refine comments of DSSM.

adee3cf1 · caoying03 · f3b1bb5a · adee3cf1 · adee3cf1 · adee3cf1
6 changed file
--- a/dssm/README.cn.md
+++ b/dssm/README.cn.md
--- a/dssm/README.md
+++ b/dssm/README.md
@@ -65,10 +65,11 @@ In below, we describe how to train DSSM model in PaddlePaddle. All the codes are
 ### Create a word vector table for the text
 ```python
 def create_embedding(self, input, prefix=''):
-    '''
-    Create an embedding table whose name has a `prefix`.
-    '''
-    logger.info("create embedding table [%s] which dimention is %d" %
+    """
+    Create word embedding. The `prefix` is added in front of the name of
+    embedding"s learnable parameter.
+    """
+    logger.info("Create embedding table [%s] whose dimention is %d" %
                (prefix, self.dnn_dims[0]))
    emb = paddle.layer.embedding(
        input=input,
@@ -82,14 +83,15 @@ Since the input (embedding table) is a list of the IDs of the words correspondin
 ### CNN implementation
 ```python
 def create_cnn(self, emb, prefix=''):
-    '''
+
+    """
    A multi-layer CNN.
+    :param emb: The word embedding.
+    :type emb: paddle.layer
+    :param prefix: The prefix will be added to of layers' names.
+    :type prefix: str
+    """

-    @emb: paddle.layer
-        output of the embedding layer
-    @prefix: str
-        prefix of layers' names, used to share parameters between more than one `cnn` parts.
-    '''
    def create_conv(context_len, hidden_size, prefix):
        key = "%s_%d_%d" % (prefix, context_len, hidden_size)
        conv = paddle.networks.sequence_conv_pool(
@@ -97,15 +99,13 @@ def create_cnn(self, emb, prefix=''):
            context_len=context_len,
            hidden_size=hidden_size,
            # set parameter attr for parameter sharing
-            context_proj_param_attr=ParamAttr(name=key + 'contex_proj.w'),
-            fc_param_attr=ParamAttr(name=key + '_fc.w'),
-            fc_bias_attr=ParamAttr(name=key + '_fc.b'),
-            pool_bias_attr=ParamAttr(name=key + '_pool.b'))
+            context_proj_param_attr=ParamAttr(name=key + "contex_proj.w"),
+            fc_param_attr=ParamAttr(name=key + "_fc.w"),
+            fc_bias_attr=ParamAttr(name=key + "_fc.b"),
+            pool_bias_attr=ParamAttr(name=key + "_pool.b"))
        return conv

-    logger.info('create a sequence_conv_pool which context width is 3')
    conv_3 = create_conv(3, self.dnn_dims[1], "cnn")
-    logger.info('create a sequence_conv_pool which context width is 4')
    conv_4 = create_conv(4, self.dnn_dims[1], "cnn")
    return conv_3, conv_4
 ```
@@ -118,9 +118,9 @@ RNN is suitable for learning variable length of the information

 ```python
 def create_rnn(self, emb, prefix=''):
-    '''
+    """
    A GRU sentence vector learner.
-    '''
+    """
    gru = paddle.networks.simple_gru(
        input=emb,
        size=self.dnn_dims[1],
@@ -136,14 +136,15 @@ def create_rnn(self, emb, prefix=''):

 ```python
 def create_fc(self, emb, prefix=''):
-    '''
+
+    """
    A multi-layer fully connected neural networks.
+    :param emb: The output of the embedding layer
+    :type emb: paddle.layer
+    :param prefix: A prefix will be added to the layers' names.
+    :type prefix: str
+    """

-    @emb: paddle.layer
-        output of the embedding layer
-    @prefix: str
-        prefix of layers' names, used to share parameters between more than one `fc` parts.
-    '''
    _input_layer = paddle.layer.pooling(
        input=emb, pooling_type=paddle.pooling.Max())
    fc = paddle.layer.fc(
@@ -160,13 +161,10 @@ In the construction of FC, we use `paddle.layer.pooling` for the maximum pooling

 ```python
 def create_dnn(self, sent_vec, prefix):
-    # if more than three layers exists, a fc layer will be added.
    if len(self.dnn_dims) > 1:
        _input_layer = sent_vec
        for id, dim in enumerate(self.dnn_dims[1:]):
            name = "%s_fc_%d_%d" % (prefix, id, dim)
-            logger.info("create fc layer [%s] which dimention is %d" %
-                        (name, dim))
            fc = paddle.layer.fc(
                input=_input_layer,
                size=dim,
@@ -180,117 +178,12 @@ def create_dnn(self, sent_vec, prefix):

 ### Classification / Regression
 The structure of classification and regression is similar. Below function can be used for both tasks.
-
-```python
-def _build_classification_or_regression_model(self, is_classification):
-    '''
-    Build a classification/regression model, and the cost is returned.
-
-    A Classification has 3 inputs:
-      - source sentence
-      - target sentence
-      - classification label
-
-    '''
-    # prepare inputs.
-    assert self.class_num
-
-    source = paddle.layer.data(
-        name='source_input',
-        type=paddle.data_type.integer_value_sequence(self.vocab_sizes[0]))
-    target = paddle.layer.data(
-        name='target_input',
-        type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
-    label = paddle.layer.data(
-        name='label_input',
-        type=paddle.data_type.integer_value(self.class_num)
-        if is_classification else paddle.data_type.dense_input)
-
-    prefixs = '_ _'.split(
-    ) if self.share_semantic_generator else 'source target'.split()
-    embed_prefixs = '_ _'.split(
-    ) if self.share_embed else 'source target'.split()
-
-    word_vecs = []
-    for id, input in enumerate([source, target]):
-        x = self.create_embedding(input, prefix=embed_prefixs[id])
-        word_vecs.append(x)
-
-    semantics = []
-    for id, input in enumerate(word_vecs):
-        x = self.model_arch_creater(input, prefix=prefixs[id])
-        semantics.append(x)
-
-    if is_classification:
-        concated_vector = paddle.layer.concat(semantics)
-        prediction = paddle.layer.fc(
-            input=concated_vector,
-            size=self.class_num,
-            act=paddle.activation.Softmax())
-        cost = paddle.layer.classification_cost(
-            input=prediction, label=label)
-    else:
-        prediction = paddle.layer.cos_sim(*semantics)
-        cost = paddle.layer.square_error_cost(prediction, label)
-
-    if not self.is_infer:
-        return cost, prediction, label
-    return prediction
-```
+Please check the function `_build_classification_or_regression_model` in [network_conf.py]( https://github.com/PaddlePaddle/models/blob/develop/dssm/network_conf.py) for detail implementation.

 ### Pairwise Rank

+Please check the function `_build_rank_model` in [network_conf.py]( https://github.com/PaddlePaddle/models/blob/develop/dssm/network_conf.py) for implementation.

-```python
-def _build_rank_model(self):
-    '''
-    Build a pairwise rank model, and the cost is returned.
-
-    A pairwise rank model has 3 inputs:
-      - source sentence
-      - left_target sentence
-      - right_target sentence
-      - label, 1 if left_target should be sorted in front of right_target, otherwise 0.
-    '''
-    source = paddle.layer.data(
-        name='source_input',
-        type=paddle.data_type.integer_value_sequence(self.vocab_sizes[0]))
-    left_target = paddle.layer.data(
-        name='left_target_input',
-        type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
-    right_target = paddle.layer.data(
-        name='right_target_input',
-        type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
-    label = paddle.layer.data(
-        name='label_input', type=paddle.data_type.integer_value(1))
-
-    prefixs = '_ _ _'.split(
-    ) if self.share_semantic_generator else 'source target target'.split()
-    embed_prefixs = '_ _'.split(
-    ) if self.share_embed else 'source target target'.split()
-
-    word_vecs = []
-    for id, input in enumerate([source, left_target, right_target]):
-        x = self.create_embedding(input, prefix=embed_prefixs[id])
-        word_vecs.append(x)
-
-    semantics = []
-    for id, input in enumerate(word_vecs):
-        x = self.model_arch_creater(input, prefix=prefixs[id])
-        semantics.append(x)
-
-    # cossim score of source and left_target
-    left_score = paddle.layer.cos_sim(semantics[0], semantics[1])
-    # cossim score of source and right target
-    right_score = paddle.layer.cos_sim(semantics[0], semantics[2])
-
-    # rank cost
-    cost = paddle.layer.rank_cost(left_score, right_score, label=label)
-    # prediction = left_score - right_score
-    # but this operator is not supported currently.
-    # so AUC will not used.
-    return cost, None, None
-```
 ## Data Format
 Below is a simple example for the data in `./data`

@@ -347,67 +240,7 @@ The example of this format is as follows.

 ## Training

-We use `python train.py -y 0 --model_arch 0` with the data in  `./data/classification` to train a DSSM model for classification.
-
-
-```
-usage: train.py [-h] [-i TRAIN_DATA_PATH] [-t TEST_DATA_PATH]
-                [-s SOURCE_DIC_PATH] [--target_dic_path TARGET_DIC_PATH]
-                [-b BATCH_SIZE] [-p NUM_PASSES] -y MODEL_TYPE -a MODEL_ARCH
-                [--share_network_between_source_target SHARE_NETWORK_BETWEEN_SOURCE_TARGET]
-                [--share_embed SHARE_EMBED] [--dnn_dims DNN_DIMS]
-                [--num_workers NUM_WORKERS] [--use_gpu USE_GPU] [-c CLASS_NUM]
-                [--model_output_prefix MODEL_OUTPUT_PREFIX]
-                [-g NUM_BATCHES_TO_LOG] [-e NUM_BATCHES_TO_TEST]
-                [-z NUM_BATCHES_TO_SAVE_MODEL]
-
-PaddlePaddle DSSM example
-
-optional arguments:
-  -h, --help            show this help message and exit
-  -i TRAIN_DATA_PATH, --train_data_path TRAIN_DATA_PATH
-                        path of training dataset
-  -t TEST_DATA_PATH, --test_data_path TEST_DATA_PATH
-                        path of testing dataset
-  -s SOURCE_DIC_PATH, --source_dic_path SOURCE_DIC_PATH
-                        path of the source's word dic
-  --target_dic_path TARGET_DIC_PATH
-                        path of the target's word dic, if not set, the
-                        `source_dic_path` will be used
-  -b BATCH_SIZE, --batch_size BATCH_SIZE
-                        size of mini-batch (default:32)
-  -p NUM_PASSES, --num_passes NUM_PASSES
-                        number of passes to run(default:10)
-  -y MODEL_TYPE, --model_type MODEL_TYPE
-                        model type, 0 for classification, 1 for pairwise rank,
-                        2 for regression (default: classification)
-  -a MODEL_ARCH, --model_arch MODEL_ARCH
-                        model architecture, 1 for CNN, 0 for FC, 2 for RNN
-  --share_network_between_source_target SHARE_NETWORK_BETWEEN_SOURCE_TARGET
-                        whether to share network parameters between source and
-                        target
-  --share_embed SHARE_EMBED
-                        whether to share word embedding between source and
-                        target
-  --dnn_dims DNN_DIMS   dimentions of dnn layers, default is '256,128,64,32',
-                        which means create a 4-layer dnn, demention of each
-                        layer is 256, 128, 64 and 32
-  --num_workers NUM_WORKERS
-                        num worker threads, default 1
-  --use_gpu USE_GPU     whether to use GPU devices (default: False)
-  -c CLASS_NUM, --class_num CLASS_NUM
-                        number of categories for classification task.
-  --model_output_prefix MODEL_OUTPUT_PREFIX
-                        prefix of the path for model to store, (default: ./)
-  -g NUM_BATCHES_TO_LOG, --num_batches_to_log NUM_BATCHES_TO_LOG
-                        number of batches to output train log, (default: 100)
-  -e NUM_BATCHES_TO_TEST, --num_batches_to_test NUM_BATCHES_TO_TEST
-                        number of batches to test, (default: 200)
-  -z NUM_BATCHES_TO_SAVE_MODEL, --num_batches_to_save_model NUM_BATCHES_TO_SAVE_MODEL
-                        number of batches to output model, (default: 400)
-```
-
-Parameter description:
+We use `python train.py -y 0 --model_arch 0` with the data in  `./data/classification` to train a DSSM model for classification. The paremeters to execute the script `train.py` can be found by execution `python infer.py --help`. Some important parameters are：

 - `train_data_path` Training data path
 - `test_data_path`  Test data path, optional
@@ -418,48 +251,8 @@ Parameter description:
 - `dnn_dims` The dimension of each layer of the model is set, the default is `256,128,64,32`，with 4 layers.

 ## To predict using the trained model
-```
-usage: infer.py [-h] --model_path MODEL_PATH -i DATA_PATH -o
-                PREDICTION_OUTPUT_PATH -y MODEL_TYPE [-s SOURCE_DIC_PATH]
-                [--target_dic_path TARGET_DIC_PATH] -a MODEL_ARCH
-                [--share_network_between_source_target SHARE_NETWORK_BETWEEN_SOURCE_TARGET]
-                [--share_embed SHARE_EMBED] [--dnn_dims DNN_DIMS]
-                [-c CLASS_NUM]
-
-PaddlePaddle DSSM infer
-
-optional arguments:
-  -h, --help            show this help message and exit
-  --model_path MODEL_PATH
-                        path of model parameters file
-  -i DATA_PATH, --data_path DATA_PATH
-                        path of the dataset to infer
-  -o PREDICTION_OUTPUT_PATH, --prediction_output_path PREDICTION_OUTPUT_PATH
-                        path to output the prediction
-  -y MODEL_TYPE, --model_type MODEL_TYPE
-                        model type, 0 for classification, 1 for pairwise rank,
-                        2 for regression (default: classification)
-  -s SOURCE_DIC_PATH, --source_dic_path SOURCE_DIC_PATH
-                        path of the source's word dic
-  --target_dic_path TARGET_DIC_PATH
-                        path of the target's word dic, if not set, the
-                        `source_dic_path` will be used
-  -a MODEL_ARCH, --model_arch MODEL_ARCH
-                        model architecture, 1 for CNN, 0 for FC, 2 for RNN
-  --share_network_between_source_target SHARE_NETWORK_BETWEEN_SOURCE_TARGET
-                        whether to share network parameters between source and
-                        target
-  --share_embed SHARE_EMBED
-                        whether to share word embedding between source and
-                        target
-  --dnn_dims DNN_DIMS   dimentions of dnn layers, default is '256,128,64,32',
-                        which means create a 4-layer dnn, demention of each
-                        layer is 256, 128, 64 and 32
-  -c CLASS_NUM, --class_num CLASS_NUM
-                        number of categories for classification task.
-```

-Important parameters are
+The paremeters to execute the script `infer.py` can be found by execution `python infer.py --help`. Some important parameters are：

 - `data_path` Path for the data to predict
 - `prediction_output_path` Prediction output path

--- a/dssm/index.html
+++ b/dssm/index.html
@@ -107,10 +107,11 @@ In below, we describe how to train DSSM model in PaddlePaddle. All the codes are
 ### Create a word vector table for the text
 ```python
 def create_embedding(self, input, prefix=''):
-    '''
-    Create an embedding table whose name has a `prefix`.
-    '''
-    logger.info("create embedding table [%s] which dimention is %d" %
+    """
+    Create word embedding. The `prefix` is added in front of the name of
+    embedding"s learnable parameter.
+    """
+    logger.info("Create embedding table [%s] whose dimention is %d" %
                (prefix, self.dnn_dims[0]))
    emb = paddle.layer.embedding(
        input=input,
@@ -124,14 +125,15 @@ Since the input (embedding table) is a list of the IDs of the words correspondin
 ### CNN implementation
 ```python
 def create_cnn(self, emb, prefix=''):
-    '''
+
+    """
    A multi-layer CNN.
+    :param emb: The word embedding.
+    :type emb: paddle.layer
+    :param prefix: The prefix will be added to of layers' names.
+    :type prefix: str
+    """

-    @emb: paddle.layer
-        output of the embedding layer
-    @prefix: str
-        prefix of layers' names, used to share parameters between more than one `cnn` parts.
-    '''
    def create_conv(context_len, hidden_size, prefix):
        key = "%s_%d_%d" % (prefix, context_len, hidden_size)
        conv = paddle.networks.sequence_conv_pool(
@@ -139,15 +141,13 @@ def create_cnn(self, emb, prefix=''):
            context_len=context_len,
            hidden_size=hidden_size,
            # set parameter attr for parameter sharing
-            context_proj_param_attr=ParamAttr(name=key + 'contex_proj.w'),
-            fc_param_attr=ParamAttr(name=key + '_fc.w'),
-            fc_bias_attr=ParamAttr(name=key + '_fc.b'),
-            pool_bias_attr=ParamAttr(name=key + '_pool.b'))
+            context_proj_param_attr=ParamAttr(name=key + "contex_proj.w"),
+            fc_param_attr=ParamAttr(name=key + "_fc.w"),
+            fc_bias_attr=ParamAttr(name=key + "_fc.b"),
+            pool_bias_attr=ParamAttr(name=key + "_pool.b"))
        return conv

-    logger.info('create a sequence_conv_pool which context width is 3')
    conv_3 = create_conv(3, self.dnn_dims[1], "cnn")
-    logger.info('create a sequence_conv_pool which context width is 4')
    conv_4 = create_conv(4, self.dnn_dims[1], "cnn")
    return conv_3, conv_4
 ```
@@ -160,9 +160,9 @@ RNN is suitable for learning variable length of the information

 ```python
 def create_rnn(self, emb, prefix=''):
-    '''
+    """
    A GRU sentence vector learner.
-    '''
+    """
    gru = paddle.networks.simple_gru(
        input=emb,
        size=self.dnn_dims[1],
@@ -178,14 +178,15 @@ def create_rnn(self, emb, prefix=''):

 ```python
 def create_fc(self, emb, prefix=''):
-    '''
+
+    """
    A multi-layer fully connected neural networks.
+    :param emb: The output of the embedding layer
+    :type emb: paddle.layer
+    :param prefix: A prefix will be added to the layers' names.
+    :type prefix: str
+    """

-    @emb: paddle.layer
-        output of the embedding layer
-    @prefix: str
-        prefix of layers' names, used to share parameters between more than one `fc` parts.
-    '''
    _input_layer = paddle.layer.pooling(
        input=emb, pooling_type=paddle.pooling.Max())
    fc = paddle.layer.fc(
@@ -202,13 +203,10 @@ In the construction of FC, we use `paddle.layer.pooling` for the maximum pooling

 ```python
 def create_dnn(self, sent_vec, prefix):
-    # if more than three layers exists, a fc layer will be added.
    if len(self.dnn_dims) > 1:
        _input_layer = sent_vec
        for id, dim in enumerate(self.dnn_dims[1:]):
            name = "%s_fc_%d_%d" % (prefix, id, dim)
-            logger.info("create fc layer [%s] which dimention is %d" %
-                        (name, dim))
            fc = paddle.layer.fc(
                input=_input_layer,
                size=dim,
@@ -222,117 +220,12 @@ def create_dnn(self, sent_vec, prefix):

 ### Classification / Regression
 The structure of classification and regression is similar. Below function can be used for both tasks.
-
-```python
-def _build_classification_or_regression_model(self, is_classification):
-    '''
-    Build a classification/regression model, and the cost is returned.
-
-    A Classification has 3 inputs:
-      - source sentence
-      - target sentence
-      - classification label
-
-    '''
-    # prepare inputs.
-    assert self.class_num
-
-    source = paddle.layer.data(
-        name='source_input',
-        type=paddle.data_type.integer_value_sequence(self.vocab_sizes[0]))
-    target = paddle.layer.data(
-        name='target_input',
-        type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
-    label = paddle.layer.data(
-        name='label_input',
-        type=paddle.data_type.integer_value(self.class_num)
-        if is_classification else paddle.data_type.dense_input)
-
-    prefixs = '_ _'.split(
-    ) if self.share_semantic_generator else 'source target'.split()
-    embed_prefixs = '_ _'.split(
-    ) if self.share_embed else 'source target'.split()
-
-    word_vecs = []
-    for id, input in enumerate([source, target]):
-        x = self.create_embedding(input, prefix=embed_prefixs[id])
-        word_vecs.append(x)
-
-    semantics = []
-    for id, input in enumerate(word_vecs):
-        x = self.model_arch_creater(input, prefix=prefixs[id])
-        semantics.append(x)
-
-    if is_classification:
-        concated_vector = paddle.layer.concat(semantics)
-        prediction = paddle.layer.fc(
-            input=concated_vector,
-            size=self.class_num,
-            act=paddle.activation.Softmax())
-        cost = paddle.layer.classification_cost(
-            input=prediction, label=label)
-    else:
-        prediction = paddle.layer.cos_sim(*semantics)
-        cost = paddle.layer.square_error_cost(prediction, label)
-
-    if not self.is_infer:
-        return cost, prediction, label
-    return prediction
-```
+Please check the function `_build_classification_or_regression_model` in [network_conf.py]( https://github.com/PaddlePaddle/models/blob/develop/dssm/network_conf.py) for detail implementation.

 ### Pairwise Rank

+Please check the function `_build_rank_model` in [network_conf.py]( https://github.com/PaddlePaddle/models/blob/develop/dssm/network_conf.py) for implementation.

-```python
-def _build_rank_model(self):
-    '''
-    Build a pairwise rank model, and the cost is returned.
-
-    A pairwise rank model has 3 inputs:
-      - source sentence
-      - left_target sentence
-      - right_target sentence
-      - label, 1 if left_target should be sorted in front of right_target, otherwise 0.
-    '''
-    source = paddle.layer.data(
-        name='source_input',
-        type=paddle.data_type.integer_value_sequence(self.vocab_sizes[0]))
-    left_target = paddle.layer.data(
-        name='left_target_input',
-        type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
-    right_target = paddle.layer.data(
-        name='right_target_input',
-        type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
-    label = paddle.layer.data(
-        name='label_input', type=paddle.data_type.integer_value(1))
-
-    prefixs = '_ _ _'.split(
-    ) if self.share_semantic_generator else 'source target target'.split()
-    embed_prefixs = '_ _'.split(
-    ) if self.share_embed else 'source target target'.split()
-
-    word_vecs = []
-    for id, input in enumerate([source, left_target, right_target]):
-        x = self.create_embedding(input, prefix=embed_prefixs[id])
-        word_vecs.append(x)
-
-    semantics = []
-    for id, input in enumerate(word_vecs):
-        x = self.model_arch_creater(input, prefix=prefixs[id])
-        semantics.append(x)
-
-    # cossim score of source and left_target
-    left_score = paddle.layer.cos_sim(semantics[0], semantics[1])
-    # cossim score of source and right target
-    right_score = paddle.layer.cos_sim(semantics[0], semantics[2])
-
-    # rank cost
-    cost = paddle.layer.rank_cost(left_score, right_score, label=label)
-    # prediction = left_score - right_score
-    # but this operator is not supported currently.
-    # so AUC will not used.
-    return cost, None, None
-```
 ## Data Format
 Below is a simple example for the data in `./data`

@@ -389,67 +282,7 @@ The example of this format is as follows.

 ## Training

-We use `python train.py -y 0 --model_arch 0` with the data in  `./data/classification` to train a DSSM model for classification.
-
-
-```
-usage: train.py [-h] [-i TRAIN_DATA_PATH] [-t TEST_DATA_PATH]
-                [-s SOURCE_DIC_PATH] [--target_dic_path TARGET_DIC_PATH]
-                [-b BATCH_SIZE] [-p NUM_PASSES] -y MODEL_TYPE -a MODEL_ARCH
-                [--share_network_between_source_target SHARE_NETWORK_BETWEEN_SOURCE_TARGET]
-                [--share_embed SHARE_EMBED] [--dnn_dims DNN_DIMS]
-                [--num_workers NUM_WORKERS] [--use_gpu USE_GPU] [-c CLASS_NUM]
-                [--model_output_prefix MODEL_OUTPUT_PREFIX]
-                [-g NUM_BATCHES_TO_LOG] [-e NUM_BATCHES_TO_TEST]
-                [-z NUM_BATCHES_TO_SAVE_MODEL]
-
-PaddlePaddle DSSM example
-
-optional arguments:
-  -h, --help            show this help message and exit
-  -i TRAIN_DATA_PATH, --train_data_path TRAIN_DATA_PATH
-                        path of training dataset
-  -t TEST_DATA_PATH, --test_data_path TEST_DATA_PATH
-                        path of testing dataset
-  -s SOURCE_DIC_PATH, --source_dic_path SOURCE_DIC_PATH
-                        path of the source's word dic
-  --target_dic_path TARGET_DIC_PATH
-                        path of the target's word dic, if not set, the
-                        `source_dic_path` will be used
-  -b BATCH_SIZE, --batch_size BATCH_SIZE
-                        size of mini-batch (default:32)
-  -p NUM_PASSES, --num_passes NUM_PASSES
-                        number of passes to run(default:10)
-  -y MODEL_TYPE, --model_type MODEL_TYPE
-                        model type, 0 for classification, 1 for pairwise rank,
-                        2 for regression (default: classification)
-  -a MODEL_ARCH, --model_arch MODEL_ARCH
-                        model architecture, 1 for CNN, 0 for FC, 2 for RNN
-  --share_network_between_source_target SHARE_NETWORK_BETWEEN_SOURCE_TARGET
-                        whether to share network parameters between source and
-                        target
-  --share_embed SHARE_EMBED
-                        whether to share word embedding between source and
-                        target
-  --dnn_dims DNN_DIMS   dimentions of dnn layers, default is '256,128,64,32',
-                        which means create a 4-layer dnn, demention of each
-                        layer is 256, 128, 64 and 32
-  --num_workers NUM_WORKERS
-                        num worker threads, default 1
-  --use_gpu USE_GPU     whether to use GPU devices (default: False)
-  -c CLASS_NUM, --class_num CLASS_NUM
-                        number of categories for classification task.
-  --model_output_prefix MODEL_OUTPUT_PREFIX
-                        prefix of the path for model to store, (default: ./)
-  -g NUM_BATCHES_TO_LOG, --num_batches_to_log NUM_BATCHES_TO_LOG
-                        number of batches to output train log, (default: 100)
-  -e NUM_BATCHES_TO_TEST, --num_batches_to_test NUM_BATCHES_TO_TEST
-                        number of batches to test, (default: 200)
-  -z NUM_BATCHES_TO_SAVE_MODEL, --num_batches_to_save_model NUM_BATCHES_TO_SAVE_MODEL
-                        number of batches to output model, (default: 400)
-```
-
-Parameter description:
+We use `python train.py -y 0 --model_arch 0` with the data in  `./data/classification` to train a DSSM model for classification. The paremeters to execute the script `train.py` can be found by execution `python infer.py --help`. Some important parameters are：

 - `train_data_path` Training data path
 - `test_data_path`  Test data path, optional
@@ -460,48 +293,8 @@ Parameter description:
 - `dnn_dims` The dimension of each layer of the model is set, the default is `256,128,64,32`，with 4 layers.

 ## To predict using the trained model
-```
-usage: infer.py [-h] --model_path MODEL_PATH -i DATA_PATH -o
-                PREDICTION_OUTPUT_PATH -y MODEL_TYPE [-s SOURCE_DIC_PATH]
-                [--target_dic_path TARGET_DIC_PATH] -a MODEL_ARCH
-                [--share_network_between_source_target SHARE_NETWORK_BETWEEN_SOURCE_TARGET]
-                [--share_embed SHARE_EMBED] [--dnn_dims DNN_DIMS]
-                [-c CLASS_NUM]
-
-PaddlePaddle DSSM infer
-
-optional arguments:
-  -h, --help            show this help message and exit
-  --model_path MODEL_PATH
-                        path of model parameters file
-  -i DATA_PATH, --data_path DATA_PATH
-                        path of the dataset to infer
-  -o PREDICTION_OUTPUT_PATH, --prediction_output_path PREDICTION_OUTPUT_PATH
-                        path to output the prediction
-  -y MODEL_TYPE, --model_type MODEL_TYPE
-                        model type, 0 for classification, 1 for pairwise rank,
-                        2 for regression (default: classification)
-  -s SOURCE_DIC_PATH, --source_dic_path SOURCE_DIC_PATH
-                        path of the source's word dic
-  --target_dic_path TARGET_DIC_PATH
-                        path of the target's word dic, if not set, the
-                        `source_dic_path` will be used
-  -a MODEL_ARCH, --model_arch MODEL_ARCH
-                        model architecture, 1 for CNN, 0 for FC, 2 for RNN
-  --share_network_between_source_target SHARE_NETWORK_BETWEEN_SOURCE_TARGET
-                        whether to share network parameters between source and
-                        target
-  --share_embed SHARE_EMBED
-                        whether to share word embedding between source and
-                        target
-  --dnn_dims DNN_DIMS   dimentions of dnn layers, default is '256,128,64,32',
-                        which means create a 4-layer dnn, demention of each
-                        layer is 256, 128, 64 and 32
-  -c CLASS_NUM, --class_num CLASS_NUM
-                        number of categories for classification task.
-```

-Important parameters are
+The paremeters to execute the script `infer.py` can be found by execution `python infer.py --help`. Some important parameters are：

 - `data_path` Path for the data to predict
 - `prediction_output_path` Prediction output path

--- a/dssm/infer.py
+++ b/dssm/infer.py
@@ -9,30 +9,27 @@ from utils import logger, ModelType, ModelArch, load_dic

 parser = argparse.ArgumentParser(description="PaddlePaddle DSSM infer")
 parser.add_argument(
-    "--model_path",
-    type=str,
-    required=True,
-    help="path of model parameters file")
+    "--model_path", type=str, required=True, help="The path of trained model.")
 parser.add_argument(
    "-i",
    "--data_path",
    type=str,
    required=True,
-    help="path of the dataset to infer")
+    help="The path of the data for inferring.")
 parser.add_argument(
    "-o",
    "--prediction_output_path",
    type=str,
    required=True,
-    help="path to output the prediction")
+    help="The path to save the predictions.")
 parser.add_argument(
    "-y",
    "--model_type",
    type=int,
    required=True,
    default=ModelType.CLASSIFICATION_MODE,
-    help=("model type, %d for classification, %d for pairwise rank, "
-          "%d for regression (default: classification)") %
+    help=("The model type: %d for classification, %d for pairwise rank, "
+          "%d for regression (default: classification).") %
    (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE,
     ModelType.REGRESSION_MODE))
 parser.add_argument(
@@ -40,13 +37,13 @@ parser.add_argument(
    "--source_dic_path",
    type=str,
    required=False,
-    help="path of the source's word dic")
+    help="The path of the source's word dictionary.")
 parser.add_argument(
    "--target_dic_path",
    type=str,
    required=False,
-    help=("path of the target's word dictionary, "
-          "if not set, the `source_dic_path` will be used"))
+    help=("The path of the target's word dictionary, "
+          "if this parameter is not set, the `source_dic_path` will be used."))
 parser.add_argument(
    "-a",
    "--model_arch",
@@ -69,15 +66,15 @@ parser.add_argument(
    "--dnn_dims",
    type=str,
    default="256,128,64,32",
-    help=("dimentions of dnn layers, default is `256,128,64,32`, "
-          "which means create a 4-layer dnn, "
-          "demention of each layer is 256, 128, 64 and 32"))
+    help=("The dimentions of dnn layers, default is `256,128,64,32`, "
+          "which means a dnn with 4 layers with "
+          "dmentions 256, 128, 64 and 32 will be created."))
 parser.add_argument(
    "-c",
    "--class_num",
    type=int,
    default=0,
-    help="number of categories for classification task.")
+    help="The number of categories for classification task.")

 args = parser.parse_args()
 args.model_type = ModelType(args.model_type)

--- a/dssm/train.py
+++ b/dssm/train.py
@@ -9,120 +9,129 @@ from utils import TaskType, load_dic, logger, ModelType, ModelArch, display_args
 parser = argparse.ArgumentParser(description="PaddlePaddle DSSM example")

 parser.add_argument(
-    '-i',
-    '--train_data_path',
+    "-i",
+    "--train_data_path",
    type=str,
    required=False,
-    help="path of training dataset")
+    help="The path of training data.")
 parser.add_argument(
-    '-t',
-    '--test_data_path',
+    "-t",
+    "--test_data_path",
    type=str,
    required=False,
-    help="path of testing dataset")
+    help="The path of testing data.")
 parser.add_argument(
-    '-s',
-    '--source_dic_path',
+    "-s",
+    "--source_dic_path",
    type=str,
    required=False,
-    help="path of the source's word dic")
+    help="The path of the source's word dictionary.")
 parser.add_argument(
-    '--target_dic_path',
+    "--target_dic_path",
    type=str,
    required=False,
-    help=("path of the target's word dictionary, "
-          "if not set, the `source_dic_path` will be used"))
+    help=("The path of the target's word dictionary, "
+          "if this parameter is not set, the `source_dic_path` will be used"))
 parser.add_argument(
-    '-b',
-    '--batch_size',
+    "-b",
+    "--batch_size",
    type=int,
    default=32,
-    help="size of mini-batch (default:32)")
+    help="The size of mini-batch (default:32).")
 parser.add_argument(
-    '-p',
-    '--num_passes',
+    "-p",
+    "--num_passes",
    type=int,
    default=10,
-    help="number of passes to run(default:10)")
+    help="The number of passes to run(default:10).")
 parser.add_argument(
-    '-y',
-    '--model_type',
+    "-y",
+    "--model_type",
    type=int,
    required=True,
    default=ModelType.CLASSIFICATION_MODE,
-    help="model type, %d for classification, %d for pairwise rank, %d for regression (default: classification)"
-    % (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE,
-       ModelType.REGRESSION_MODE))
+    help=("model type, %d for classification, %d for pairwise rank, "
+          "%d for regression (default: classification).") %
+    (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE,
+     ModelType.REGRESSION_MODE))
 parser.add_argument(
-    '-a',
-    '--model_arch',
+    "-a",
+    "--model_arch",
    type=int,
    required=True,
    default=ModelArch.CNN_MODE,
-    help="model architecture, %d for CNN, %d for FC, %d for RNN" %
+    help="The model architecture, %d for CNN, %d for FC, %d for RNN." %
    (ModelArch.CNN_MODE, ModelArch.FC_MODE, ModelArch.RNN_MODE))
 parser.add_argument(
-    '--share_network_between_source_target',
+    "--share_network_between_source_target",
    type=distutils.util.strtobool,
    default=False,
-    help="whether to share network parameters between source and target")
+    help="Whether to share network parameters between source and target.")
 parser.add_argument(
-    '--share_embed',
+    "--share_embed",
    type=distutils.util.strtobool,
    default=False,
-    help="whether to share word embedding between source and target")
+    help="Whether to share word embedding between source and target.")
 parser.add_argument(
-    '--dnn_dims',
+    "--dnn_dims",
    type=str,
-    default='256,128,64,32',
-    help="dimentions of dnn layers, default is '256,128,64,32', which means create a 4-layer dnn, demention of each layer is 256, 128, 64 and 32"
-)
+    default="256,128,64,32",
+    help=("The dimentions of dnn layers, default is '256,128,64,32', "
+          "which means create a 4-layer dnn. The dimention of each layer is "
+          "'256, 128, 64 and 32'."))
 parser.add_argument(
-    '--num_workers', type=int, default=1, help="num worker threads, default 1")
+    "--num_workers",
+    type=int,
+    default=1,
+    help="The number of worker threads, default 1.")
 parser.add_argument(
-    '--use_gpu',
+    "--use_gpu",
    type=distutils.util.strtobool,
    default=False,
-    help="whether to use GPU devices (default: False)")
+    help="Whether to use GPU devices (default: False)")
 parser.add_argument(
-    '-c',
-    '--class_num',
+    "-c",
+    "--class_num",
    type=int,
    default=0,
-    help="number of categories for classification task.")
+    help="The number of categories for classification task.")
 parser.add_argument(
-    '--model_output_prefix',
+    "--model_output_prefix",
    type=str,
    default="./",
-    help="prefix of the path for model to store, (default: ./)")
+    help="The prefix of the path to store the trained models (default: ./).")
 parser.add_argument(
-    '-g',
-    '--num_batches_to_log',
+    "-g",
+    "--num_batches_to_log",
    type=int,
    default=100,
-    help="number of batches to output train log, (default: 100)")
+    help=("The log period. Every num_batches_to_test batches, "
+          "a training log will be printed. (default: 100)"))
 parser.add_argument(
-    '-e',
-    '--num_batches_to_test',
+    "-e",
+    "--num_batches_to_test",
    type=int,
    default=200,
-    help="number of batches to test, (default: 200)")
+    help=("The test period. Every num_batches_to_save_model batches, "
+          "the specified test sample will be test (default: 200)."))
 parser.add_argument(
-    '-z',
-    '--num_batches_to_save_model',
+    "-z",
+    "--num_batches_to_save_model",
    type=int,
    default=400,
-    help="number of batches to output model, (default: 400)")
+    help=("Every num_batches_to_save_model batches, "
+          "a trained model will be saved (default: 400)."))

-# arguments check.
 args = parser.parse_args()
 args.model_type = ModelType(args.model_type)
 args.model_arch = ModelArch(args.model_arch)
 if args.model_type.is_classification():
-    assert args.class_num > 1, "--class_num should be set in classification task."
+    assert args.class_num > 1, ("The parameter class_num should be set in "
+                                "classification task.")

-layer_dims = [int(i) for i in args.dnn_dims.split(',')]
-args.target_dic_path = args.source_dic_path if not args.target_dic_path else args.target_dic_path
+layer_dims = [int(i) for i in args.dnn_dims.split(",")]
+args.target_dic_path = args.source_dic_path if not \
+        args.target_dic_path else args.target_dic_path


 def train(train_data_path=None,
@@ -138,15 +147,15 @@ def train(train_data_path=None,
          class_num=None,
          num_workers=1,
          use_gpu=False):
-    '''
+    """
    Train the DSSM.
-    '''
-    default_train_path = './data/rank/train.txt'
-    default_test_path = './data/rank/test.txt'
-    default_dic_path = './data/vocab.txt'
+    """
+    default_train_path = "./data/rank/train.txt"
+    default_test_path = "./data/rank/test.txt"
+    default_dic_path = "./data/vocab.txt"
    if not model_type.is_rank():
-        default_train_path = './data/classification/train.txt'
-        default_test_path = './data/classification/test.txt'
+        default_train_path = "./data/classification/train.txt"
+        default_test_path = "./data/classification/test.txt"

    use_default_data = not train_data_path

@@ -200,19 +209,19 @@ def train(train_data_path=None,

    feeding = {}
    if model_type.is_classification() or model_type.is_regression():
-        feeding = {'source_input': 0, 'target_input': 1, 'label_input': 2}
+        feeding = {"source_input": 0, "target_input": 1, "label_input": 2}
    else:
        feeding = {
-            'source_input': 0,
-            'left_target_input': 1,
-            'right_target_input': 2,
-            'label_input': 3
+            "source_input": 0,
+            "left_target_input": 1,
+            "right_target_input": 2,
+            "label_input": 3
        }

    def _event_handler(event):
-        '''
+        """
        Define batch handler
-        '''
+        """
        if isinstance(event, paddle.event.EndIteration):
            # output train log
            if event.batch_id % args.num_batches_to_log == 0:
@@ -249,7 +258,7 @@ def train(train_data_path=None,
    logger.info("Training has finished.")


-if __name__ == '__main__':
+if __name__ == "__main__":
    display_args(args)
    train(
        train_data_path=args.train_data_path,

--- a/dssm/utils.py
+++ b/dssm/utils.py
@@ -8,7 +8,7 @@ logger.setLevel(logging.INFO)


 def mode_attr_name(mode):
-    return mode.upper() + '_MODE'
+    return mode.upper() + "_MODE"


 def create_attrs(cls):
@@ -17,9 +17,9 @@ def create_attrs(cls):


 def make_check_method(cls):
-    '''
+    """
    create methods for classes.
-    '''
+    """

    def method(mode):
        def _method(self):
@@ -28,7 +28,7 @@ def make_check_method(cls):
        return _method

    for id, mode in enumerate(cls.modes):
-        setattr(cls, 'is_' + mode, method(mode))
+        setattr(cls, "is_" + mode, method(mode))


 def make_create_method(cls):
@@ -41,10 +41,10 @@ def make_create_method(cls):
        return _method

    for id, mode in enumerate(cls.modes):
-        setattr(cls, 'create_' + mode, method(mode))
+        setattr(cls, "create_" + mode, method(mode))


-def make_str_method(cls, type_name='unk'):
+def make_str_method(cls, type_name="unk"):
    def _str_(self):
        for mode in cls.modes:
            if self.mode == getattr(cls, mode_attr_name(mode)):
@@ -53,9 +53,9 @@ def make_str_method(cls, type_name='unk'):
    def _hash_(self):
        return self.mode

-    setattr(cls, '__str__', _str_)
-    setattr(cls, '__repr__', _str_)
-    setattr(cls, '__hash__', _hash_)
+    setattr(cls, "__str__", _str_)
+    setattr(cls, "__repr__", _str_)
+    setattr(cls, "__hash__", _hash_)
    cls.__name__ = type_name


@@ -65,7 +65,7 @@ def _init_(self, mode, cls):
    elif isinstance(mode, cls):
        self.mode = mode.mode
    else:
-        raise Exception("wrong mode type, get type: %s, value: %s" %
+        raise Exception("A wrong mode type, get type: %s, value: %s." %
                        (type(mode), mode))


@@ -77,21 +77,21 @@ def build_mode_class(cls):


 class TaskType(object):
-    modes = 'train test infer'.split()
+    modes = "train test infer".split()

    def __init__(self, mode):
        _init_(self, mode, TaskType)


 class ModelType:
-    modes = 'classification rank regression'.split()
+    modes = "classification rank regression".split()

    def __init__(self, mode):
        _init_(self, mode, ModelType)


 class ModelArch:
-    modes = 'fc cnn rnn'.split()
+    modes = "fc cnn rnn".split()

    def __init__(self, mode):
        _init_(self, mode, ModelArch)
@@ -103,22 +103,16 @@ build_mode_class(ModelArch)


 def sent2ids(sent, vocab):
-    '''
+    """
    transform a sentence to a list of ids.
-
-    @sent: str
-        a sentence.
-    @vocab: dict
-        a word dic
-    '''
+    """
    return [vocab.get(w, UNK) for w in sent.split()]


 def load_dic(path):
-    '''
-    word dic format:
-      each line is a word
-    '''
+    """
+    The format of word dictionary : each line is a word.
+    """
    dic = {}
    with open(path) as f:
        for id, line in enumerate(f):
@@ -128,13 +122,6 @@ def load_dic(path):


 def display_args(args):
-    logger.info("arguments passed by command line:")
+    logger.info("The arguments passed by command line is :")
    for k, v in sorted(v for v in vars(args).items()):
        logger.info("{}:\t{}".format(k, v))
-
-
-if __name__ == '__main__':
-    t = TaskType(1)
-    t = TaskType.create_train()
-    print t
-    print 'is', t.is_train()