From 4fb0c3bbf243c4016d56897099ef8c2fc269ac55 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Wed, 27 Sep 2017 21:10:14 +0800
Subject: [PATCH] small code cleans.

---
 ctr/avazu_data_processer.py                   |  2 -
 ctr/infer.py                                  |  2 -
 ctr/network_conf.py                           |  2 -
 ctr/train.py                                  |  2 -
 dssm/infer.py                                 | 27 ++++----
 dssm/network_conf.py                          | 15 +++--
 dssm/reader.py                                |  6 +-
 dssm/train.py                                 | 12 ++--
 generate_chinese_poetry/README.md             |  1 +
 generate_chinese_poetry/index.html            | 65 +++++++++++++++++++
 hsigmoid/infer.py                             |  2 -
 hsigmoid/network_conf.py                      |  4 +-
 hsigmoid/train.py                             |  2 -
 image_classification/train.py                 |  0
 ltr/lambda_rank.py                            | 24 ++++---
 ltr/metrics.py                                |  1 -
 ltr/ranknet.py                                | 19 +++---
 mt_with_external_memory/external_memory.py    |  2 +-
 mt_with_external_memory/model.py              |  2 +-
 nce_cost/infer.py                             |  2 -
 nce_cost/network_conf.py                      |  2 -
 nce_cost/train.py                             |  2 -
 nmt_without_attention/generate.py             |  1 -
 nmt_without_attention/network_conf.py         |  1 -
 nmt_without_attention/train.py                |  1 -
 .../random_schedule_generator.py              |  3 +-
 text_classification/infer.py                  |  2 -
 text_classification/reader.py                 |  2 -
 text_classification/train.py                  |  2 -
 text_classification/utils.py                  |  2 -
 30 files changed, 128 insertions(+), 82 deletions(-)
 create mode 100644 generate_chinese_poetry/README.md
 create mode 100644 generate_chinese_poetry/index.html
 mode change 100755 => 100644 image_classification/train.py
 mode change 100755 => 100644 mt_with_external_memory/external_memory.py

diff --git a/ctr/avazu_data_processer.py b/ctr/avazu_data_processer.py
index ca150d8f..dd148adc 100644
--- a/ctr/avazu_data_processer.py
+++ b/ctr/avazu_data_processer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-import os
 import sys
 import csv
 import cPickle
diff --git a/ctr/infer.py b/ctr/infer.py
index 721c6b01..6541c746 100644
--- a/ctr/infer.py
+++ b/ctr/infer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 import gzip
 import argparse
 import itertools
diff --git a/ctr/network_conf.py b/ctr/network_conf.py
index a90d1dc6..b01e4872 100644
--- a/ctr/network_conf.py
+++ b/ctr/network_conf.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 import paddle.v2 as paddle
 from paddle.v2 import layer
 from paddle.v2 import data_type as dtype
diff --git a/ctr/train.py b/ctr/train.py
index 64831089..235e6fa5 100644
--- a/ctr/train.py
+++ b/ctr/train.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-import os
 import argparse
 import gzip
 
diff --git a/dssm/infer.py b/dssm/infer.py
index bf5abb0a..dc5595ab 100644
--- a/dssm/infer.py
+++ b/dssm/infer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 import argparse
 import itertools
 
@@ -32,9 +30,10 @@ parser.add_argument(
     type=int,
     required=True,
     default=ModelType.CLASSIFICATION_MODE,
-    help="model type, %d for classification, %d for pairwise rank, %d for regression (default: classification)"
-    % (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE,
-       ModelType.REGRESSION_MODE))
+    help=("model type, %d for classification, %d for pairwise rank, "
+          "%d for regression (default: classification)") %
+    (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE,
+     ModelType.REGRESSION_MODE))
 parser.add_argument(
     '-s',
     '--source_dic_path',
@@ -45,8 +44,8 @@ parser.add_argument(
     '--target_dic_path',
     type=str,
     required=False,
-    help="path of the target's word dic, if not set, the `source_dic_path` will be used"
-)
+    help=("path of the target's word dictionary, "
+          "if not set, the `source_dic_path` will be used"))
 parser.add_argument(
     '-a',
     '--model_arch',
@@ -69,8 +68,9 @@ parser.add_argument(
     '--dnn_dims',
     type=str,
     default='256,128,64,32',
-    help="dimentions of dnn layers, default is '256,128,64,32', which means create a 4-layer dnn, demention of each layer is 256, 128, 64 and 32"
-)
+    help=("dimentions of dnn layers, default is '256,128,64,32', "
+          "which means create a 4-layer dnn, "
+          "demention of each layer is 256, 128, 64 and 32"))
 parser.add_argument(
     '-c',
     '--class_num',
@@ -85,7 +85,8 @@ if args.model_type.is_classification():
     assert args.class_num > 1, "--class_num should be set in classification task."
 
 layer_dims = map(int, args.dnn_dims.split(','))
-args.target_dic_path = args.source_dic_path if not args.target_dic_path else args.target_dic_path
+args.target_dic_path = args.source_dic_path if not args.target_dic_path \
+        else args.target_dic_path
 
 paddle.init(use_gpu=False, trainer_count=1)
 
@@ -130,9 +131,9 @@ class Inferer(object):
         for id, batch in enumerate(infer_reader()):
             res = self.inferer.infer(input=batch)
             predictions = [' '.join(map(str, x)) for x in res]
-            assert len(batch) == len(
-                predictions), "predict error, %d inputs, but %d predictions" % (
-                    len(batch), len(predictions))
+            assert len(batch) == len(predictions), (
+                "predict error, %d inputs, "
+                "but %d predictions") % (len(batch), len(predictions))
             output_f.write('\n'.join(map(str, predictions)) + '\n')
 
 
diff --git a/dssm/network_conf.py b/dssm/network_conf.py
index 04c2b7e2..10c8974f 100644
--- a/dssm/network_conf.py
+++ b/dssm/network_conf.py
@@ -29,9 +29,9 @@ class DSSM(object):
         @class_num: int
             number of categories.
         '''
-        assert len(
-            vocab_sizes
-        ) == 2, "vocab_sizes specify the sizes left and right inputs, and dim should be 2."
+        assert len(vocab_sizes) == 2, (
+            "vocab_sizes specify the sizes left and right inputs, "
+            "and dim should be 2.")
         assert len(dnn_dims) > 1, "more than two layers is needed."
 
         self.dnn_dims = dnn_dims
@@ -91,7 +91,8 @@ class DSSM(object):
         @emb: paddle.layer
             output of the embedding layer
         @prefix: str
-            prefix of layers' names, used to share parameters between more than one `fc` parts.
+            prefix of layers' names, used to share parameters between
+            more than one `fc` parts.
         '''
         _input_layer = paddle.layer.pooling(
             input=emb, pooling_type=paddle.pooling.Max())
@@ -113,7 +114,8 @@ class DSSM(object):
         @emb: paddle.layer
             output of the embedding layer
         @prefix: str
-            prefix of layers' names, used to share parameters between more than one `cnn` parts.
+            prefix of layers' names, used to share parameters between
+            more than one `cnn` parts.
         '''
 
         def create_conv(context_len, hidden_size, prefix):
@@ -174,7 +176,8 @@ class DSSM(object):
           - source sentence
           - left_target sentence
           - right_target sentence
-          - label, 1 if left_target should be sorted in front of right_target, otherwise 0.
+          - label, 1 if left_target should be sorted in front of
+                   right_target, otherwise 0.
         '''
         logger.info("build rank model")
         assert self.model_type.is_rank()
diff --git a/dssm/reader.py b/dssm/reader.py
index 677072da..f39cd7f5 100644
--- a/dssm/reader.py
+++ b/dssm/reader.py
@@ -1,6 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-from utils import UNK, ModelType, TaskType, load_dic, sent2ids, logger, ModelType
+from utils import UNK, ModelType, TaskType, load_dic, \
+        sent2ids, logger, ModelType
 
 
 class Dataset(object):
@@ -38,7 +37,6 @@ class Dataset(object):
         '''
         Load testset.
         '''
-        # logger.info("[reader] load testset from %s" % self.test_path)
         with open(self.test_path) as f:
             for line_id, line in enumerate(f):
                 yield self.record_reader(line)
diff --git a/dssm/train.py b/dssm/train.py
index d1dd9328..98e7f321 100644
--- a/dssm/train.py
+++ b/dssm/train.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 import argparse
 
 import paddle.v2 as paddle
@@ -31,8 +29,8 @@ parser.add_argument(
     '--target_dic_path',
     type=str,
     required=False,
-    help="path of the target's word dic, if not set, the `source_dic_path` will be used"
-)
+    help=("path of the target's word dictionary, "
+          "if not set, the `source_dic_path` will be used"))
 parser.add_argument(
     '-b',
     '--batch_size',
@@ -221,7 +219,8 @@ def train(train_data_path=None,
                     event.pass_id, event.batch_id, event.cost, event.metrics))
 
             # test model
-            if event.batch_id > 0 and event.batch_id % args.num_batches_to_test == 0:
+            if event.batch_id > 0 and \
+                    event.batch_id % args.num_batches_to_test == 0:
                 if test_reader is not None:
                     if model_type.is_classification():
                         result = trainer.test(
@@ -231,7 +230,8 @@ def train(train_data_path=None,
                     else:
                         result = None
             # save model
-            if event.batch_id > 0 and event.batch_id % args.num_batches_to_save_model == 0:
+            if event.batch_id > 0 and \
+                    event.batch_id % args.num_batches_to_save_model == 0:
                 model_desc = "{type}_{arch}".format(
                     type=str(args.model_type), arch=str(args.model_arch))
                 with open("%sdssm_%s_pass_%05d.tar" %
diff --git a/generate_chinese_poetry/README.md b/generate_chinese_poetry/README.md
new file mode 100644
index 00000000..f6a09ed2
--- /dev/null
+++ b/generate_chinese_poetry/README.md
@@ -0,0 +1 @@
+[TBD]
diff --git a/generate_chinese_poetry/index.html b/generate_chinese_poetry/index.html
new file mode 100644
index 00000000..a5dba006
--- /dev/null
+++ b/generate_chinese_poetry/index.html
@@ -0,0 +1,65 @@
+
+<html>
+<head>
+  <script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSsymbols.js", "TeX/AMSmath.js"],
+    jax: ["input/TeX", "output/HTML-CSS"],
+    tex2jax: {
+      inlineMath: [ ['$','$'] ],
+      displayMath: [ ['$$','$$'] ],
+      processEscapes: true
+    },
+    "HTML-CSS": { availableFonts: ["TeX"] }
+  });
+  </script>
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js" async></script>
+  <script type="text/javascript" src="../.tools/theme/marked.js">
+  </script>
+  <link href="http://cdn.bootcss.com/highlight.js/9.9.0/styles/darcula.min.css" rel="stylesheet">
+  <script src="http://cdn.bootcss.com/highlight.js/9.9.0/highlight.min.js"></script>
+  <link href="http://cdn.bootcss.com/bootstrap/4.0.0-alpha.6/css/bootstrap.min.css" rel="stylesheet">
+  <link href="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/css/perfect-scrollbar.min.css" rel="stylesheet">
+  <link href="../.tools/theme/github-markdown.css" rel='stylesheet'>
+</head>
+<style type="text/css" >
+.markdown-body {
+    box-sizing: border-box;
+    min-width: 200px;
+    max-width: 980px;
+    margin: 0 auto;
+    padding: 45px;
+}
+</style>
+
+
+<body>
+
+<div id="context" class="container-fluid markdown-body">
+</div>
+
+<!-- This block will be replaced by each markdown file content. Please do not change lines below.-->
+<div id="markdown" style='display:none'>
+[TBD]
+
+</div>
+<!-- You can change the lines below now. -->
+
+<script type="text/javascript">
+marked.setOptions({
+  renderer: new marked.Renderer(),
+  gfm: true,
+  breaks: false,
+  smartypants: true,
+  highlight: function(code, lang) {
+    code = code.replace(/&amp;/g, "&")
+    code = code.replace(/&gt;/g, ">")
+    code = code.replace(/&lt;/g, "<")
+    code = code.replace(/&nbsp;/g, " ")
+    return hljs.highlightAuto(code, [lang]).value;
+  }
+});
+document.getElementById("context").innerHTML = marked(
+        document.getElementById("markdown").innerHTML)
+</script>
+</body>
diff --git a/hsigmoid/infer.py b/hsigmoid/infer.py
index 8645d00d..df6fd1f7 100644
--- a/hsigmoid/infer.py
+++ b/hsigmoid/infer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 import os
 import logging
 import gzip
diff --git a/hsigmoid/network_conf.py b/hsigmoid/network_conf.py
index 49449478..072c28c9 100644
--- a/hsigmoid/network_conf.py
+++ b/hsigmoid/network_conf.py
@@ -1,7 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
 import math
+
 import paddle.v2 as paddle
 
 
diff --git a/hsigmoid/train.py b/hsigmoid/train.py
index 809c842a..1763772e 100644
--- a/hsigmoid/train.py
+++ b/hsigmoid/train.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 import os
 import logging
 import gzip
diff --git a/image_classification/train.py b/image_classification/train.py
old mode 100755
new mode 100644
diff --git a/ltr/lambda_rank.py b/ltr/lambda_rank.py
index 5318b7ce..fb527ed3 100644
--- a/ltr/lambda_rank.py
+++ b/ltr/lambda_rank.py
@@ -1,14 +1,18 @@
-import os, sys
+import os
+import sys
 import gzip
-import paddle.v2 as paddle
-import numpy as np
 import functools
 import argparse
+import numpy as np
+
+import paddle.v2 as paddle
 
 
 def lambda_rank(input_dim):
     """
-    lambda_rank is a Listwise rank model, the input data and label must be sequences.
+    lambda_rank is a Listwise rank model, the input data and label
+    must be sequences.
+
     https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf
     parameters :
       input_dim, one document's dense feature vector dimension
@@ -16,6 +20,7 @@ def lambda_rank(input_dim):
     format of the dense_vector_sequence:
     [[f, ...], [f, ...], ...], f is a float or an int number
     """
+
     label = paddle.layer.data("label",
                               paddle.data_type.dense_vector_sequence(1))
     data = paddle.layer.data("data",
@@ -88,11 +93,11 @@ def train_lambda_rank(num_passes):
 
 
 def lambda_rank_infer(pass_id):
+    """lambda_rank model inference interface
+
+    parameters:
+        pass_id : inference model in pass_id
     """
-  lambda_rank model inference interface
-  parameters:
-    pass_id : inference model in pass_id
-  """
     print "Begin to Infer..."
     input_dim = 46
     output = lambda_rank(input_dim)
@@ -109,7 +114,8 @@ def lambda_rank_infer(pass_id):
         if len(infer_data) == infer_data_num:
             break
 
-    # predict score of infer_data document. Re-sort the document base on predict score
+    # predict score of infer_data document.
+    # Re-sort the document base on predict score
     # in descending order. then we build the ranking documents
     predicitons = paddle.infer(
         output_layer=output, parameters=parameters, input=infer_data)
diff --git a/ltr/metrics.py b/ltr/metrics.py
index 12a77434..a2bbf3fe 100644
--- a/ltr/metrics.py
+++ b/ltr/metrics.py
@@ -12,7 +12,6 @@ def ndcg(score_list):
     e.g. predict rank score list :
     >>> scores =  [3, 2, 3, 0, 1, 2] 
     >>> ndcg_score = ndcg(scores)
-    
     """
 
     def dcg(score_list):
diff --git a/ltr/ranknet.py b/ltr/ranknet.py
index f6327f4a..7b45ca65 100644
--- a/ltr/ranknet.py
+++ b/ltr/ranknet.py
@@ -13,11 +13,11 @@ import argparse
 
 def half_ranknet(name_prefix, input_dim):
     """
-  parameter in same name will be shared in paddle framework,
-  these parameters in ranknet can be used in shared state, e.g. left network and right network
-  shared parameters in detail
-  https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md
-  """
+    parameter in same name will be shared in paddle framework,
+    these parameters in ranknet can be used in shared state,
+    e.g. left network and right network shared parameters in detail
+    https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md
+    """
     # data layer
     data = paddle.layer.data(name_prefix + "/data",
                              paddle.data_type.dense_vector(input_dim))
@@ -102,12 +102,14 @@ def ranknet_infer(pass_id):
     print "Begin to Infer..."
     feature_dim = 46
 
-    # we just need half_ranknet to predict a rank score, which can be used in sort documents
+    # we just need half_ranknet to predict a rank score,
+    # which can be used in sort documents
     output = half_ranknet("infer", feature_dim)
     parameters = paddle.parameters.Parameters.from_tar(
         gzip.open("ranknet_params_%d.tar.gz" % (pass_id)))
 
-    # load data of same query and relevance documents, need ranknet to rank these candidates
+    # load data of same query and relevance documents,
+    # need ranknet to rank these candidates
     infer_query_id = []
     infer_data = []
     infer_doc_index = []
@@ -121,7 +123,8 @@ def ranknet_infer(pass_id):
         infer_query_id.append(query_id)
         infer_data.append([feature_vector])
 
-    # predict score of infer_data document. Re-sort the document base on predict score
+    # predict score of infer_data document.
+    # Re-sort the document base on predict score
     # in descending order. then we build the ranking documents
     scores = paddle.infer(
         output_layer=output, parameters=parameters, input=infer_data)
diff --git a/mt_with_external_memory/external_memory.py b/mt_with_external_memory/external_memory.py
old mode 100755
new mode 100644
index f0b61cb4..d5df173d
--- a/mt_with_external_memory/external_memory.py
+++ b/mt_with_external_memory/external_memory.py
@@ -23,7 +23,7 @@ class ExternalMemory(object):
     Besides, the ExternalMemory class must be used together with
     paddle.layer.recurrent_group (within its step function). It can never be
     used in a standalone manner.
-    
+
     For more details, please refer to
     `Neural Turing Machines <https://arxiv.org/abs/1410.5401>`_.
 
diff --git a/mt_with_external_memory/model.py b/mt_with_external_memory/model.py
index 64123f8c..527c9ff6 100644
--- a/mt_with_external_memory/model.py
+++ b/mt_with_external_memory/model.py
@@ -1,4 +1,4 @@
-""" 
+"""
     Contains model configuration for external-memory-enhanced seq2seq.
 
     The "external memory" refers to two types of memories.
diff --git a/nce_cost/infer.py b/nce_cost/infer.py
index 89d80792..db1c8c61 100644
--- a/nce_cost/infer.py
+++ b/nce_cost/infer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- encoding:utf-8 -*-
 import os
 import gzip
 import numpy as np
diff --git a/nce_cost/network_conf.py b/nce_cost/network_conf.py
index a9e33e1b..a37b031d 100644
--- a/nce_cost/network_conf.py
+++ b/nce_cost/network_conf.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- encoding:utf-8 -*-
 import math
 import paddle.v2 as paddle
 
diff --git a/nce_cost/train.py b/nce_cost/train.py
index 3babf7fe..9ba84214 100644
--- a/nce_cost/train.py
+++ b/nce_cost/train.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- encoding:utf-8 -*-
 import os
 import logging
 import gzip
diff --git a/nmt_without_attention/generate.py b/nmt_without_attention/generate.py
index 1de4f462..eeb02b6a 100644
--- a/nmt_without_attention/generate.py
+++ b/nmt_without_attention/generate.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 import os
 import logging
 import numpy as np
diff --git a/nmt_without_attention/network_conf.py b/nmt_without_attention/network_conf.py
index 77a1dc77..3f19ed12 100644
--- a/nmt_without_attention/network_conf.py
+++ b/nmt_without_attention/network_conf.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 import paddle.v2 as paddle
 import sys
 import gzip
diff --git a/nmt_without_attention/train.py b/nmt_without_attention/train.py
index 5604d70d..15585e18 100644
--- a/nmt_without_attention/train.py
+++ b/nmt_without_attention/train.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 import os
 import logging
 import paddle.v2 as paddle
diff --git a/scheduled_sampling/random_schedule_generator.py b/scheduled_sampling/random_schedule_generator.py
index 7569eaff..7af99685 100644
--- a/scheduled_sampling/random_schedule_generator.py
+++ b/scheduled_sampling/random_schedule_generator.py
@@ -30,7 +30,8 @@ class RandomScheduleGenerator:
 
     def getScheduleRate(self):
         """
-        Get the schedule sampling rate. Usually not needed to be called by the users
+        Get the schedule sampling rate. Usually not needed to be
+        called by the users.
         """
         return self.schedule_computer(self.a, self.b, self.data_processed_)
 
diff --git a/text_classification/infer.py b/text_classification/infer.py
index de033697..c507d749 100644
--- a/text_classification/infer.py
+++ b/text_classification/infer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 import sys
 import os
 import gzip
diff --git a/text_classification/reader.py b/text_classification/reader.py
index 7b670031..cd576c9e 100644
--- a/text_classification/reader.py
+++ b/text_classification/reader.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 import os
 
 
diff --git a/text_classification/train.py b/text_classification/train.py
index 4f31b093..3d1a5819 100644
--- a/text_classification/train.py
+++ b/text_classification/train.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 import os
 import sys
 import gzip
diff --git a/text_classification/utils.py b/text_classification/utils.py
index 831d2b3b..d14054d3 100644
--- a/text_classification/utils.py
+++ b/text_classification/utils.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 import logging
 import os
 import argparse
-- 
GitLab