refine notation

266b8eeb · peterzhang2029 · e89af968 · 266b8eeb · 266b8eeb · 266b8eeb
5 changed file
--- a/nested_sequence/text_classification/README.md
+++ b/nested_sequence/text_classification/README.md
@@ -76,7 +76,7 @@ python train.py
 ### 预测
 训练结束后模型将存储在指定目录当中（默认models目录），在终端执行：
 ```bash
-python infer.py
+python infer.py --model_path 'models/params_pass_00000.tar.gz'
 ```
 默认情况下，预测脚本将加载训练一个pass的模型对 `imdb的测试集` 进行测试。
@@ -139,20 +139,21 @@ def train_reader(data_dir, word_dict):
 `train.py`训练脚本中包含以下参数：
 ```
 Options:
-  --train_data_dir TEXT  path of training dataset (default: None). if this
+  --train_data_dir TEXT  The path of training dataset (default: None). If this
                         parameter is not set, imdb dataset will be used.
-  --test_data_dir TEXT   path of testing dataset (default: None). if this
+  --test_data_dir TEXT   The path of testing dataset (default: None). If this
                         parameter is not set, imdb dataset will be used.
-  --word_dict_path TEXT  path of word dictionary (default: None).if this
+  --word_dict_path TEXT  The path of word dictionary (default: None). If this
-                         parameter is not set, imdb dataset will be used.if
+                         parameter is not set, imdb dataset will be used. If
                         this parameter is set, but the file does not exist,
                         word dictionay will be built from the training data
                         automatically.
-  --class_num INTEGER    class number (default: 2).
+  --class_num INTEGER    The class number (default: 2).
-  --batch_size INTEGER   the number of training examples in one batch
+  --batch_size INTEGER   The number of training examples in one batch
                         (default: 32).
-  --num_passes INTEGER   number of passes to train (default: 10).
+  --num_passes INTEGER   The number of passes to train (default: 10).
-  --model_save_dir TEXT  path to save the trained models (default: 'models').
+  --model_save_dir TEXT  The path to save the trained models (default:
+                         'models').
  --help                 Show this message and exit.
 ```
@@ -170,20 +171,20 @@ python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_da
 ```
 Options:
-  --data_path TEXT       path of data for inference (default: None). if this
+  --data_path TEXT       The path of data for inference (default: None). If
-                         parameter is not set, imdb test dataset will be used.
+                         this parameter is not set, imdb test dataset will be
-  --model_path TEXT      path of saved model. (default:
+                         used.
-                         'models/params_pass_00000.tar.gz')
+  --model_path TEXT      The path of saved model.  [required]
-  --word_dict_path TEXT  path of word dictionary (default: None).if this
+  --word_dict_path TEXT  The path of word dictionary (default: None). If this
                         parameter is not set, imdb dataset will be used.
-  --class_num INTEGER    class number (default: 2).
+  --class_num INTEGER    The class number (default: 2).
-  --batch_size INTEGER   the number of examples in one batch (default: 32).
+  --batch_size INTEGER   The number of examples in one batch (default: 32).
  --help                 Show this message and exit.
 ```
 2.以`data`目录下的示例数据为例，在终端执行：
 ```bash
-python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt'
+python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' --model_path 'models/params_pass_00000.tar.gz'
 ```
 即可对样例数据进行预测。
--- a/nested_sequence/text_classification/index.html
+++ b/nested_sequence/text_classification/index.html
@@ -118,7 +118,7 @@ python train.py
 ### 预测
 训练结束后模型将存储在指定目录当中（默认models目录），在终端执行：
 ```bash
-python infer.py
+python infer.py --model_path 'models/params_pass_00000.tar.gz'
 ```
 默认情况下，预测脚本将加载训练一个pass的模型对 `imdb的测试集` 进行测试。
@@ -181,20 +181,21 @@ def train_reader(data_dir, word_dict):
 `train.py`训练脚本中包含以下参数：
 ```
 Options:
-  --train_data_dir TEXT  path of training dataset (default: None). if this
+  --train_data_dir TEXT  The path of training dataset (default: None). If this
                         parameter is not set, imdb dataset will be used.
-  --test_data_dir TEXT   path of testing dataset (default: None). if this
+  --test_data_dir TEXT   The path of testing dataset (default: None). If this
                         parameter is not set, imdb dataset will be used.
-  --word_dict_path TEXT  path of word dictionary (default: None).if this
+  --word_dict_path TEXT  The path of word dictionary (default: None). If this
-                         parameter is not set, imdb dataset will be used.if
+                         parameter is not set, imdb dataset will be used. If
                         this parameter is set, but the file does not exist,
                         word dictionay will be built from the training data
                         automatically.
-  --class_num INTEGER    class number (default: 2).
+  --class_num INTEGER    The class number (default: 2).
-  --batch_size INTEGER   the number of training examples in one batch
+  --batch_size INTEGER   The number of training examples in one batch
                         (default: 32).
-  --num_passes INTEGER   number of passes to train (default: 10).
+  --num_passes INTEGER   The number of passes to train (default: 10).
-  --model_save_dir TEXT  path to save the trained models (default: 'models').
+  --model_save_dir TEXT  The path to save the trained models (default:
+                         'models').
  --help                 Show this message and exit.
 ```
@@ -212,20 +213,20 @@ python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_da
 ```
 Options:
-  --data_path TEXT       path of data for inference (default: None). if this
+  --data_path TEXT       The path of data for inference (default: None). If
-                         parameter is not set, imdb test dataset will be used.
+                         this parameter is not set, imdb test dataset will be
-  --model_path TEXT      path of saved model. (default:
+                         used.
-                         'models/params_pass_00000.tar.gz')
+  --model_path TEXT      The path of saved model.  [required]
-  --word_dict_path TEXT  path of word dictionary (default: None).if this
+  --word_dict_path TEXT  The path of word dictionary (default: None). If this
                         parameter is not set, imdb dataset will be used.
-  --class_num INTEGER    class number (default: 2).
+  --class_num INTEGER    The class number (default: 2).
-  --batch_size INTEGER   the number of examples in one batch (default: 32).
+  --batch_size INTEGER   The number of examples in one batch (default: 32).
  --help                 Show this message and exit.
 ```
 2.以`data`目录下的示例数据为例，在终端执行：
 ```bash
-python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt'
+python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' --model_path 'models/params_pass_00000.tar.gz'
 ```
 即可对样例数据进行预测。

--- a/nested_sequence/text_classification/infer.py
+++ b/nested_sequence/text_classification/infer.py
@@ -14,28 +14,24 @@ from utils import logger, load_dict
 @click.option(
    "--data_path",
    default=None,
-    help=("path of data for inference (default: None). "
+    help=("The path of data for inference (default: None). "
-          "if this parameter is not set, "
+          "If this parameter is not set, "
          "imdb test dataset will be used."))
 @click.option(
-    "--model_path",
+    "--model_path", type=str, required=True, help="The path of saved model.")
-    type=str,
-    default='models/params_pass_00000.tar.gz',
-    help=("path of saved model. "
-          "(default: 'models/params_pass_00000.tar.gz')"))
 @click.option(
    "--word_dict_path",
    type=str,
    default=None,
-    help=("path of word dictionary (default: None)."
+    help=("The path of word dictionary (default: None). "
-          "if this parameter is not set, imdb dataset will be used."))
+          "If this parameter is not set, imdb dataset will be used."))
 @click.option(
-    "--class_num", type=int, default=2, help="class number (default: 2).")
+    "--class_num", type=int, default=2, help="The class number (default: 2).")
 @click.option(
    "--batch_size",
    type=int,
    default=32,
-    help="the number of examples in one batch (default: 32).")
+    help="The number of examples in one batch (default: 32).")
 def infer(data_path, model_path, word_dict_path, batch_size, class_num):
    def _infer_a_batch(inferer, test_batch, ids_2_word):
        probs = inferer.infer(input=test_batch, field=["value"])
@@ -49,8 +45,8 @@ def infer(data_path, model_path, word_dict_path, batch_size, class_num):
                                  " ".join(["{:0.4f}".format(p)
                                            for p in prob]), word_text))
-    assert os.path.exists(model_path), "the trained model does not exist."
+    assert os.path.exists(model_path), "The trained model does not exist."
-    logger.info("begin to predict...")
+    logger.info("Begin to predict...")
    use_default_data = (data_path is None)
    if use_default_data:
@@ -61,7 +57,7 @@ def infer(data_path, model_path, word_dict_path, batch_size, class_num):
        class_num = 2
    else:
        assert os.path.exists(
-            word_dict_path), "the word dictionary file does not exist"
+            word_dict_path), "The word dictionary file does not exist"
        word_dict = load_dict(word_dict_path)
        word_reverse_dict = dict((value, key)

--- a/nested_sequence/text_classification/network_conf.py
+++ b/nested_sequence/text_classification/network_conf.py
@@ -7,8 +7,6 @@ def cnn_cov_group(group_input, hidden_size):
    conv4 = paddle.networks.sequence_conv_pool(
        input=group_input, context_len=4, hidden_size=hidden_size)
-    #output_group = paddle.layer.concat(input=[conv3, conv4])
    output_group = paddle.layer.fc(
        input=[conv3, conv4],
        size=hidden_size,

--- a/nested_sequence/text_classification/train.py
+++ b/nested_sequence/text_classification/train.py
@@ -14,42 +14,42 @@ from utils import build_dict, load_dict, logger
 @click.option(
    "--train_data_dir",
    default=None,
-    help=("path of training dataset (default: None). "
+    help=("The path of training dataset (default: None). "
-          "if this parameter is not set, "
+          "If this parameter is not set, "
          "imdb dataset will be used."))
 @click.option(
    "--test_data_dir",
    default=None,
-    help=("path of testing dataset (default: None). "
+    help=("The path of testing dataset (default: None). "
-          "if this parameter is not set, "
+          "If this parameter is not set, "
          "imdb dataset will be used."))
 @click.option(
    "--word_dict_path",
    type=str,
    default=None,
-    help=("path of word dictionary (default: None)."
+    help=("The path of word dictionary (default: None). "
-          "if this parameter is not set, imdb dataset will be used."
+          "If this parameter is not set, imdb dataset will be used. "
-          "if this parameter is set, but the file does not exist, "
+          "If this parameter is set, but the file does not exist, "
          "word dictionay will be built from "
          "the training data automatically."))
 @click.option(
-    "--class_num", type=int, default=2, help="class number (default: 2).")
+    "--class_num", type=int, default=2, help="The class number (default: 2).")
 @click.option(
    "--batch_size",
    type=int,
    default=32,
-    help=("the number of training examples in one batch "
+    help=("The number of training examples in one batch "
          "(default: 32)."))
 @click.option(
    "--num_passes",
    type=int,
    default=10,
-    help="number of passes to train (default: 10).")
+    help="The number of passes to train (default: 10).")
 @click.option(
    "--model_save_dir",
    type=str,
    default="models",
-    help="path to save the trained models (default: 'models').")
+    help="The path to save the trained models (default: 'models').")
 def train(train_data_dir, test_data_dir, word_dict_path, class_num,
          model_save_dir, batch_size, num_passes):
    """
@@ -70,7 +70,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
    :type num_pass: int
    """
    if train_data_dir is not None:
-        assert word_dict_path, ("the parameter train_data_dir, word_dict_path "
+        assert word_dict_path, ("The parameter train_data_dir, word_dict_path "
                                "should be set at the same time.")
    if not os.path.exists(model_save_dir):
@@ -81,7 +81,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
    if use_default_data:
        logger.info(("No training data are porivided, "
                     "use imdb to train the model."))
-        logger.info("please wait to build the word dictionary ...")
+        logger.info("Please wait to build the word dictionary ...")
        word_dict = reader.imdb_word_dict()
@@ -94,7 +94,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
        class_num = 2
    else:
        if word_dict_path is None or not os.path.exists(word_dict_path):
-            logger.info(("word dictionary is not given, the dictionary "
+            logger.info(("Word dictionary is not given, the dictionary "
                         "is automatically built from the training data."))
            # build the word dictionary to map the original string-typed
@@ -107,7 +107,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
        word_dict = load_dict(word_dict_path)
        class_num = class_num
-        logger.info("class number is : %d." % class_num)
+        logger.info("Class number is : %d." % class_num)
        train_reader = paddle.batch(
            paddle.reader.shuffle(
@@ -129,7 +129,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
    emb_size = 28
    hidden_size = 128
-    logger.info("length of word dictionary is : %d." % (dict_dim))
+    logger.info("Length of word dictionary is : %d." % (dict_dim))
    paddle.init(use_gpu=True, trainer_count=4)