diff --git a/ctr/avazu_data_processer.py b/ctr/avazu_data_processer.py
index ca150d8f35a866ae4d5bb07e4391cc7f32076e0f..dd148adc244efc64021446b17488ec7f2b1c9bd9 100644
--- a/ctr/avazu_data_processer.py
+++ b/ctr/avazu_data_processer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-import os
import sys
import csv
import cPickle
diff --git a/ctr/infer.py b/ctr/infer.py
index 721c6b01b5a82b863e7db69865cd62c496b382d9..6541c74638df63a9304989c2ccaff0ff4c00463a 100644
--- a/ctr/infer.py
+++ b/ctr/infer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
import gzip
import argparse
import itertools
diff --git a/ctr/network_conf.py b/ctr/network_conf.py
index a90d1dc6e8da0b1379926dd7b37ac4cc0d408a2a..b01e4872f108896f60f5d662fe6e1d57295de3f2 100644
--- a/ctr/network_conf.py
+++ b/ctr/network_conf.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
import paddle.v2 as paddle
from paddle.v2 import layer
from paddle.v2 import data_type as dtype
diff --git a/ctr/train.py b/ctr/train.py
index 64831089ae1b1df4cb73326824af71acb345f80d..235e6fa59be6416cfac6b1b5f2039c09889b9f76 100644
--- a/ctr/train.py
+++ b/ctr/train.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-import os
import argparse
import gzip
diff --git a/dssm/infer.py b/dssm/infer.py
index bf5abb0a8d75bd5b4610c22ece89d53b60cc09a6..dc5595abceae44f985ab616025c8488d46456b8c 100644
--- a/dssm/infer.py
+++ b/dssm/infer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
import argparse
import itertools
@@ -32,9 +30,10 @@ parser.add_argument(
type=int,
required=True,
default=ModelType.CLASSIFICATION_MODE,
- help="model type, %d for classification, %d for pairwise rank, %d for regression (default: classification)"
- % (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE,
- ModelType.REGRESSION_MODE))
+ help=("model type, %d for classification, %d for pairwise rank, "
+ "%d for regression (default: classification)") %
+ (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE,
+ ModelType.REGRESSION_MODE))
parser.add_argument(
'-s',
'--source_dic_path',
@@ -45,8 +44,8 @@ parser.add_argument(
'--target_dic_path',
type=str,
required=False,
- help="path of the target's word dic, if not set, the `source_dic_path` will be used"
-)
+ help=("path of the target's word dictionary, "
+ "if not set, the `source_dic_path` will be used"))
parser.add_argument(
'-a',
'--model_arch',
@@ -69,8 +68,9 @@ parser.add_argument(
'--dnn_dims',
type=str,
default='256,128,64,32',
- help="dimentions of dnn layers, default is '256,128,64,32', which means create a 4-layer dnn, demention of each layer is 256, 128, 64 and 32"
-)
+ help=("dimentions of dnn layers, default is '256,128,64,32', "
+ "which means create a 4-layer dnn, "
+ "demention of each layer is 256, 128, 64 and 32"))
parser.add_argument(
'-c',
'--class_num',
@@ -85,7 +85,8 @@ if args.model_type.is_classification():
assert args.class_num > 1, "--class_num should be set in classification task."
layer_dims = map(int, args.dnn_dims.split(','))
-args.target_dic_path = args.source_dic_path if not args.target_dic_path else args.target_dic_path
+args.target_dic_path = args.source_dic_path if not args.target_dic_path \
+ else args.target_dic_path
paddle.init(use_gpu=False, trainer_count=1)
@@ -130,9 +131,9 @@ class Inferer(object):
for id, batch in enumerate(infer_reader()):
res = self.inferer.infer(input=batch)
predictions = [' '.join(map(str, x)) for x in res]
- assert len(batch) == len(
- predictions), "predict error, %d inputs, but %d predictions" % (
- len(batch), len(predictions))
+ assert len(batch) == len(predictions), (
+ "predict error, %d inputs, "
+ "but %d predictions") % (len(batch), len(predictions))
output_f.write('\n'.join(map(str, predictions)) + '\n')
diff --git a/dssm/network_conf.py b/dssm/network_conf.py
index 04c2b7e2f389d3839b72d005004119a49238444f..10c8974f24f98ce05eb93c85559152f5eb274d60 100644
--- a/dssm/network_conf.py
+++ b/dssm/network_conf.py
@@ -29,9 +29,9 @@ class DSSM(object):
@class_num: int
number of categories.
'''
- assert len(
- vocab_sizes
- ) == 2, "vocab_sizes specify the sizes left and right inputs, and dim should be 2."
+ assert len(vocab_sizes) == 2, (
+ "vocab_sizes specify the sizes left and right inputs, "
+ "and dim should be 2.")
assert len(dnn_dims) > 1, "more than two layers is needed."
self.dnn_dims = dnn_dims
@@ -91,7 +91,8 @@ class DSSM(object):
@emb: paddle.layer
output of the embedding layer
@prefix: str
- prefix of layers' names, used to share parameters between more than one `fc` parts.
+ prefix of layers' names, used to share parameters between
+ more than one `fc` parts.
'''
_input_layer = paddle.layer.pooling(
input=emb, pooling_type=paddle.pooling.Max())
@@ -113,7 +114,8 @@ class DSSM(object):
@emb: paddle.layer
output of the embedding layer
@prefix: str
- prefix of layers' names, used to share parameters between more than one `cnn` parts.
+ prefix of layers' names, used to share parameters between
+ more than one `cnn` parts.
'''
def create_conv(context_len, hidden_size, prefix):
@@ -174,7 +176,8 @@ class DSSM(object):
- source sentence
- left_target sentence
- right_target sentence
- - label, 1 if left_target should be sorted in front of right_target, otherwise 0.
+ - label, 1 if left_target should be sorted in front of
+ right_target, otherwise 0.
'''
logger.info("build rank model")
assert self.model_type.is_rank()
diff --git a/dssm/reader.py b/dssm/reader.py
index 677072dae985980fab3da4dd09893721f84866fd..f39cd7f53f5cd46c89e14cb398c2a69eef0d6edb 100644
--- a/dssm/reader.py
+++ b/dssm/reader.py
@@ -1,6 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-from utils import UNK, ModelType, TaskType, load_dic, sent2ids, logger, ModelType
+from utils import UNK, ModelType, TaskType, load_dic, \
+ sent2ids, logger, ModelType
class Dataset(object):
@@ -38,7 +37,6 @@ class Dataset(object):
'''
Load testset.
'''
- # logger.info("[reader] load testset from %s" % self.test_path)
with open(self.test_path) as f:
for line_id, line in enumerate(f):
yield self.record_reader(line)
diff --git a/dssm/train.py b/dssm/train.py
index d1dd932860b0b70b461acdcf1b1b48258c3d1b30..98e7f321015c057dc41275677f159a7f806cc9eb 100644
--- a/dssm/train.py
+++ b/dssm/train.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
import argparse
import paddle.v2 as paddle
@@ -31,8 +29,8 @@ parser.add_argument(
'--target_dic_path',
type=str,
required=False,
- help="path of the target's word dic, if not set, the `source_dic_path` will be used"
-)
+ help=("path of the target's word dictionary, "
+ "if not set, the `source_dic_path` will be used"))
parser.add_argument(
'-b',
'--batch_size',
@@ -221,7 +219,8 @@ def train(train_data_path=None,
event.pass_id, event.batch_id, event.cost, event.metrics))
# test model
- if event.batch_id > 0 and event.batch_id % args.num_batches_to_test == 0:
+ if event.batch_id > 0 and \
+ event.batch_id % args.num_batches_to_test == 0:
if test_reader is not None:
if model_type.is_classification():
result = trainer.test(
@@ -231,7 +230,8 @@ def train(train_data_path=None,
else:
result = None
# save model
- if event.batch_id > 0 and event.batch_id % args.num_batches_to_save_model == 0:
+ if event.batch_id > 0 and \
+ event.batch_id % args.num_batches_to_save_model == 0:
model_desc = "{type}_{arch}".format(
type=str(args.model_type), arch=str(args.model_arch))
with open("%sdssm_%s_pass_%05d.tar" %
diff --git a/generate_chinese_poetry/README.md b/generate_chinese_poetry/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f6a09ed22d42d6de3b0d6fd14d826cb87de822f5
--- /dev/null
+++ b/generate_chinese_poetry/README.md
@@ -0,0 +1 @@
+[TBD]
diff --git a/generate_chinese_poetry/index.html b/generate_chinese_poetry/index.html
new file mode 100644
index 0000000000000000000000000000000000000000..a5dba006b9c5c272061d9d83589e0f4c2fd6eb13
--- /dev/null
+++ b/generate_chinese_poetry/index.html
@@ -0,0 +1,65 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[TBD]
+
+
+
+
+
+
diff --git a/hsigmoid/infer.py b/hsigmoid/infer.py
index 8645d00d20047f620d1beef17c60bb5b69996ff9..df6fd1f7c167da1e5ac10020869801d761c9d06e 100644
--- a/hsigmoid/infer.py
+++ b/hsigmoid/infer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
import os
import logging
import gzip
diff --git a/hsigmoid/network_conf.py b/hsigmoid/network_conf.py
index 494494788c015fd76ab5914ba6c2a8161bde5785..072c28c9219af801e6b154647c9fc5c48e67d457 100644
--- a/hsigmoid/network_conf.py
+++ b/hsigmoid/network_conf.py
@@ -1,7 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
import math
+
import paddle.v2 as paddle
diff --git a/hsigmoid/train.py b/hsigmoid/train.py
index 809c842af55b22daff3428db9b674065a16f1700..1763772e795cef79b6f094c2827521832c7c216c 100644
--- a/hsigmoid/train.py
+++ b/hsigmoid/train.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
import os
import logging
import gzip
diff --git a/image_classification/train.py b/image_classification/train.py
old mode 100755
new mode 100644
diff --git a/ltr/lambda_rank.py b/ltr/lambda_rank.py
index 5318b7ce6067375e2e497923a2422e18b08a8261..fb527ed3d099fca5b7a2da04f51d64a97c6a9c2f 100644
--- a/ltr/lambda_rank.py
+++ b/ltr/lambda_rank.py
@@ -1,14 +1,18 @@
-import os, sys
+import os
+import sys
import gzip
-import paddle.v2 as paddle
-import numpy as np
import functools
import argparse
+import numpy as np
+
+import paddle.v2 as paddle
def lambda_rank(input_dim):
"""
- lambda_rank is a Listwise rank model, the input data and label must be sequences.
+ lambda_rank is a Listwise rank model, the input data and label
+ must be sequences.
+
https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf
parameters :
input_dim, one document's dense feature vector dimension
@@ -16,6 +20,7 @@ def lambda_rank(input_dim):
format of the dense_vector_sequence:
[[f, ...], [f, ...], ...], f is a float or an int number
"""
+
label = paddle.layer.data("label",
paddle.data_type.dense_vector_sequence(1))
data = paddle.layer.data("data",
@@ -88,11 +93,11 @@ def train_lambda_rank(num_passes):
def lambda_rank_infer(pass_id):
+ """lambda_rank model inference interface
+
+ parameters:
+ pass_id : inference model in pass_id
"""
- lambda_rank model inference interface
- parameters:
- pass_id : inference model in pass_id
- """
print "Begin to Infer..."
input_dim = 46
output = lambda_rank(input_dim)
@@ -109,7 +114,8 @@ def lambda_rank_infer(pass_id):
if len(infer_data) == infer_data_num:
break
- # predict score of infer_data document. Re-sort the document base on predict score
+ # predict score of infer_data document.
+ # Re-sort the document base on predict score
# in descending order. then we build the ranking documents
predicitons = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data)
diff --git a/ltr/metrics.py b/ltr/metrics.py
index 12a77434bf0f90f87f5754d1dbef4dc4435cba21..a2bbf3feb3fb9a277b7409f73c722e717a0f675e 100644
--- a/ltr/metrics.py
+++ b/ltr/metrics.py
@@ -12,7 +12,6 @@ def ndcg(score_list):
e.g. predict rank score list :
>>> scores = [3, 2, 3, 0, 1, 2]
>>> ndcg_score = ndcg(scores)
-
"""
def dcg(score_list):
diff --git a/ltr/ranknet.py b/ltr/ranknet.py
index f6327f4a8c472c933d9a2cd433e1f315f817f960..7b45ca65608cefe89a58545bd47f6d118b422c83 100644
--- a/ltr/ranknet.py
+++ b/ltr/ranknet.py
@@ -13,11 +13,11 @@ import argparse
def half_ranknet(name_prefix, input_dim):
"""
- parameter in same name will be shared in paddle framework,
- these parameters in ranknet can be used in shared state, e.g. left network and right network
- shared parameters in detail
- https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md
- """
+ parameter in same name will be shared in paddle framework,
+ these parameters in ranknet can be used in shared state,
+ e.g. left network and right network shared parameters in detail
+ https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md
+ """
# data layer
data = paddle.layer.data(name_prefix + "/data",
paddle.data_type.dense_vector(input_dim))
@@ -102,12 +102,14 @@ def ranknet_infer(pass_id):
print "Begin to Infer..."
feature_dim = 46
- # we just need half_ranknet to predict a rank score, which can be used in sort documents
+ # we just need half_ranknet to predict a rank score,
+ # which can be used in sort documents
output = half_ranknet("infer", feature_dim)
parameters = paddle.parameters.Parameters.from_tar(
gzip.open("ranknet_params_%d.tar.gz" % (pass_id)))
- # load data of same query and relevance documents, need ranknet to rank these candidates
+ # load data of same query and relevance documents,
+ # need ranknet to rank these candidates
infer_query_id = []
infer_data = []
infer_doc_index = []
@@ -121,7 +123,8 @@ def ranknet_infer(pass_id):
infer_query_id.append(query_id)
infer_data.append([feature_vector])
- # predict score of infer_data document. Re-sort the document base on predict score
+ # predict score of infer_data document.
+ # Re-sort the document base on predict score
# in descending order. then we build the ranking documents
scores = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data)
diff --git a/mt_with_external_memory/external_memory.py b/mt_with_external_memory/external_memory.py
old mode 100755
new mode 100644
index f0b61cb413ad1824d27bdae9e05d65f1ce330224..d5df173dc7f03c49178e968c9bce66ef1294e030
--- a/mt_with_external_memory/external_memory.py
+++ b/mt_with_external_memory/external_memory.py
@@ -23,7 +23,7 @@ class ExternalMemory(object):
Besides, the ExternalMemory class must be used together with
paddle.layer.recurrent_group (within its step function). It can never be
used in a standalone manner.
-
+
For more details, please refer to
`Neural Turing Machines `_.
diff --git a/mt_with_external_memory/model.py b/mt_with_external_memory/model.py
index 64123f8c64e2a7584daf28a336223aa271f6d313..527c9ff6811a52b5c661f221d489fce8717a6128 100644
--- a/mt_with_external_memory/model.py
+++ b/mt_with_external_memory/model.py
@@ -1,4 +1,4 @@
-"""
+"""
Contains model configuration for external-memory-enhanced seq2seq.
The "external memory" refers to two types of memories.
diff --git a/nce_cost/infer.py b/nce_cost/infer.py
index 89d80792c85d68ee76234d5558b8f363b8768f92..db1c8c619c8ce6fcf66a59eb31142a736d722b15 100644
--- a/nce_cost/infer.py
+++ b/nce_cost/infer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- encoding:utf-8 -*-
import os
import gzip
import numpy as np
diff --git a/nce_cost/network_conf.py b/nce_cost/network_conf.py
index a9e33e1b2d143c9662a34ea6c7fd3690b5d49e4e..a37b031da462199ce5eca6497a0cbd62b4f7daac 100644
--- a/nce_cost/network_conf.py
+++ b/nce_cost/network_conf.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- encoding:utf-8 -*-
import math
import paddle.v2 as paddle
diff --git a/nce_cost/train.py b/nce_cost/train.py
index 3babf7fe0963fcff54430cd174b0af523e68846b..9ba842141d70cbc71e848e67dce6a537fb72f2f8 100644
--- a/nce_cost/train.py
+++ b/nce_cost/train.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- encoding:utf-8 -*-
import os
import logging
import gzip
diff --git a/nmt_without_attention/generate.py b/nmt_without_attention/generate.py
index 1de4f462649a55e4ea235f61d9fa522461752f00..eeb02b6a9312b683311c5a9146b7443ceb2d2427 100644
--- a/nmt_without_attention/generate.py
+++ b/nmt_without_attention/generate.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
import os
import logging
import numpy as np
diff --git a/nmt_without_attention/network_conf.py b/nmt_without_attention/network_conf.py
index 77a1dc77c3c85c633cd7fbdf085d02780ded0075..3f19ed125d05115004aa4143ea0764ef8a5f4509 100644
--- a/nmt_without_attention/network_conf.py
+++ b/nmt_without_attention/network_conf.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
import paddle.v2 as paddle
import sys
import gzip
diff --git a/nmt_without_attention/train.py b/nmt_without_attention/train.py
index 5604d70d9f796d2561977e166931d092902e25b2..15585e18978080cac0d3536a6aea8825eb8f04cd 100644
--- a/nmt_without_attention/train.py
+++ b/nmt_without_attention/train.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
import os
import logging
import paddle.v2 as paddle
diff --git a/scheduled_sampling/random_schedule_generator.py b/scheduled_sampling/random_schedule_generator.py
index 7569eaffc29ab0abda3e44764f2ccf647b8b306e..7af99685140993f0c40779808cc0b3200e1b45b8 100644
--- a/scheduled_sampling/random_schedule_generator.py
+++ b/scheduled_sampling/random_schedule_generator.py
@@ -30,7 +30,8 @@ class RandomScheduleGenerator:
def getScheduleRate(self):
"""
- Get the schedule sampling rate. Usually not needed to be called by the users
+ Get the schedule sampling rate. Usually not needed to be
+ called by the users.
"""
return self.schedule_computer(self.a, self.b, self.data_processed_)
diff --git a/text_classification/infer.py b/text_classification/infer.py
index de03369732eb92f398d718828ead318fba55680f..c507d749caf122ad4ccc68fded277d6adb51872a 100644
--- a/text_classification/infer.py
+++ b/text_classification/infer.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
import sys
import os
import gzip
diff --git a/text_classification/reader.py b/text_classification/reader.py
index 7b6700313a6bf94784de73785e968193d97106f2..cd576c9eed8e54b5285f328f69767fb744a2803a 100644
--- a/text_classification/reader.py
+++ b/text_classification/reader.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
import os
diff --git a/text_classification/train.py b/text_classification/train.py
index 4f31b09373ce19b5e8806735fd1d449154b27ed2..3d1a5819e3a9fafc0aec5971af8046f8c37b93c8 100644
--- a/text_classification/train.py
+++ b/text_classification/train.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
import os
import sys
import gzip
diff --git a/text_classification/utils.py b/text_classification/utils.py
index 831d2b3b071742aa233638784cbd7bb16195b29f..d14054d331987b99d2775b2a146b8c13b0d0fab4 100644
--- a/text_classification/utils.py
+++ b/text_classification/utils.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
import logging
import os
import argparse