From 4fb0c3bbf243c4016d56897099ef8c2fc269ac55 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 27 Sep 2017 21:10:14 +0800 Subject: [PATCH] small code cleans. --- ctr/avazu_data_processer.py | 2 - ctr/infer.py | 2 - ctr/network_conf.py | 2 - ctr/train.py | 2 - dssm/infer.py | 27 ++++---- dssm/network_conf.py | 15 +++-- dssm/reader.py | 6 +- dssm/train.py | 12 ++-- generate_chinese_poetry/README.md | 1 + generate_chinese_poetry/index.html | 65 +++++++++++++++++++ hsigmoid/infer.py | 2 - hsigmoid/network_conf.py | 4 +- hsigmoid/train.py | 2 - image_classification/train.py | 0 ltr/lambda_rank.py | 24 ++++--- ltr/metrics.py | 1 - ltr/ranknet.py | 19 +++--- mt_with_external_memory/external_memory.py | 2 +- mt_with_external_memory/model.py | 2 +- nce_cost/infer.py | 2 - nce_cost/network_conf.py | 2 - nce_cost/train.py | 2 - nmt_without_attention/generate.py | 1 - nmt_without_attention/network_conf.py | 1 - nmt_without_attention/train.py | 1 - .../random_schedule_generator.py | 3 +- text_classification/infer.py | 2 - text_classification/reader.py | 2 - text_classification/train.py | 2 - text_classification/utils.py | 2 - 30 files changed, 128 insertions(+), 82 deletions(-) create mode 100644 generate_chinese_poetry/README.md create mode 100644 generate_chinese_poetry/index.html mode change 100755 => 100644 image_classification/train.py mode change 100755 => 100644 mt_with_external_memory/external_memory.py diff --git a/ctr/avazu_data_processer.py b/ctr/avazu_data_processer.py index ca150d8f..dd148adc 100644 --- a/ctr/avazu_data_processer.py +++ b/ctr/avazu_data_processer.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*-import os import sys import csv import cPickle diff --git a/ctr/infer.py b/ctr/infer.py index 721c6b01..6541c746 100644 --- a/ctr/infer.py +++ b/ctr/infer.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import gzip import argparse import itertools diff --git a/ctr/network_conf.py b/ctr/network_conf.py index a90d1dc6..b01e4872 100644 --- a/ctr/network_conf.py +++ b/ctr/network_conf.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import paddle.v2 as paddle from paddle.v2 import layer from paddle.v2 import data_type as dtype diff --git a/ctr/train.py b/ctr/train.py index 64831089..235e6fa5 100644 --- a/ctr/train.py +++ b/ctr/train.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*-import os import argparse import gzip diff --git a/dssm/infer.py b/dssm/infer.py index bf5abb0a..dc5595ab 100644 --- a/dssm/infer.py +++ b/dssm/infer.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import argparse import itertools @@ -32,9 +30,10 @@ parser.add_argument( type=int, required=True, default=ModelType.CLASSIFICATION_MODE, - help="model type, %d for classification, %d for pairwise rank, %d for regression (default: classification)" - % (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE, - ModelType.REGRESSION_MODE)) + help=("model type, %d for classification, %d for pairwise rank, " + "%d for regression (default: classification)") % + (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE, + ModelType.REGRESSION_MODE)) parser.add_argument( '-s', '--source_dic_path', @@ -45,8 +44,8 @@ parser.add_argument( '--target_dic_path', type=str, required=False, - help="path of the target's word dic, if not set, the `source_dic_path` will be used" -) + help=("path of the target's word dictionary, " + "if not set, the `source_dic_path` will be used")) parser.add_argument( '-a', '--model_arch', @@ -69,8 +68,9 @@ parser.add_argument( '--dnn_dims', type=str, default='256,128,64,32', - help="dimentions of dnn layers, default is '256,128,64,32', which means create a 4-layer dnn, demention of each layer is 256, 128, 64 and 32" -) + help=("dimentions of dnn layers, default is '256,128,64,32', " + "which means create a 4-layer dnn, " + "demention of each layer is 256, 128, 64 and 32")) parser.add_argument( '-c', '--class_num', @@ -85,7 +85,8 @@ if args.model_type.is_classification(): assert args.class_num > 1, "--class_num should be set in classification task." layer_dims = map(int, args.dnn_dims.split(',')) -args.target_dic_path = args.source_dic_path if not args.target_dic_path else args.target_dic_path +args.target_dic_path = args.source_dic_path if not args.target_dic_path \ + else args.target_dic_path paddle.init(use_gpu=False, trainer_count=1) @@ -130,9 +131,9 @@ class Inferer(object): for id, batch in enumerate(infer_reader()): res = self.inferer.infer(input=batch) predictions = [' '.join(map(str, x)) for x in res] - assert len(batch) == len( - predictions), "predict error, %d inputs, but %d predictions" % ( - len(batch), len(predictions)) + assert len(batch) == len(predictions), ( + "predict error, %d inputs, " + "but %d predictions") % (len(batch), len(predictions)) output_f.write('\n'.join(map(str, predictions)) + '\n') diff --git a/dssm/network_conf.py b/dssm/network_conf.py index 04c2b7e2..10c8974f 100644 --- a/dssm/network_conf.py +++ b/dssm/network_conf.py @@ -29,9 +29,9 @@ class DSSM(object): @class_num: int number of categories. ''' - assert len( - vocab_sizes - ) == 2, "vocab_sizes specify the sizes left and right inputs, and dim should be 2." + assert len(vocab_sizes) == 2, ( + "vocab_sizes specify the sizes left and right inputs, " + "and dim should be 2.") assert len(dnn_dims) > 1, "more than two layers is needed." self.dnn_dims = dnn_dims @@ -91,7 +91,8 @@ class DSSM(object): @emb: paddle.layer output of the embedding layer @prefix: str - prefix of layers' names, used to share parameters between more than one `fc` parts. + prefix of layers' names, used to share parameters between + more than one `fc` parts. ''' _input_layer = paddle.layer.pooling( input=emb, pooling_type=paddle.pooling.Max()) @@ -113,7 +114,8 @@ class DSSM(object): @emb: paddle.layer output of the embedding layer @prefix: str - prefix of layers' names, used to share parameters between more than one `cnn` parts. + prefix of layers' names, used to share parameters between + more than one `cnn` parts. ''' def create_conv(context_len, hidden_size, prefix): @@ -174,7 +176,8 @@ class DSSM(object): - source sentence - left_target sentence - right_target sentence - - label, 1 if left_target should be sorted in front of right_target, otherwise 0. + - label, 1 if left_target should be sorted in front of + right_target, otherwise 0. ''' logger.info("build rank model") assert self.model_type.is_rank() diff --git a/dssm/reader.py b/dssm/reader.py index 677072da..f39cd7f5 100644 --- a/dssm/reader.py +++ b/dssm/reader.py @@ -1,6 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from utils import UNK, ModelType, TaskType, load_dic, sent2ids, logger, ModelType +from utils import UNK, ModelType, TaskType, load_dic, \ + sent2ids, logger, ModelType class Dataset(object): @@ -38,7 +37,6 @@ class Dataset(object): ''' Load testset. ''' - # logger.info("[reader] load testset from %s" % self.test_path) with open(self.test_path) as f: for line_id, line in enumerate(f): yield self.record_reader(line) diff --git a/dssm/train.py b/dssm/train.py index d1dd9328..98e7f321 100644 --- a/dssm/train.py +++ b/dssm/train.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import argparse import paddle.v2 as paddle @@ -31,8 +29,8 @@ parser.add_argument( '--target_dic_path', type=str, required=False, - help="path of the target's word dic, if not set, the `source_dic_path` will be used" -) + help=("path of the target's word dictionary, " + "if not set, the `source_dic_path` will be used")) parser.add_argument( '-b', '--batch_size', @@ -221,7 +219,8 @@ def train(train_data_path=None, event.pass_id, event.batch_id, event.cost, event.metrics)) # test model - if event.batch_id > 0 and event.batch_id % args.num_batches_to_test == 0: + if event.batch_id > 0 and \ + event.batch_id % args.num_batches_to_test == 0: if test_reader is not None: if model_type.is_classification(): result = trainer.test( @@ -231,7 +230,8 @@ def train(train_data_path=None, else: result = None # save model - if event.batch_id > 0 and event.batch_id % args.num_batches_to_save_model == 0: + if event.batch_id > 0 and \ + event.batch_id % args.num_batches_to_save_model == 0: model_desc = "{type}_{arch}".format( type=str(args.model_type), arch=str(args.model_arch)) with open("%sdssm_%s_pass_%05d.tar" % diff --git a/generate_chinese_poetry/README.md b/generate_chinese_poetry/README.md new file mode 100644 index 00000000..f6a09ed2 --- /dev/null +++ b/generate_chinese_poetry/README.md @@ -0,0 +1 @@ +[TBD] diff --git a/generate_chinese_poetry/index.html b/generate_chinese_poetry/index.html new file mode 100644 index 00000000..a5dba006 --- /dev/null +++ b/generate_chinese_poetry/index.html @@ -0,0 +1,65 @@ + + + + + + + + + + + + + + + + + +
+
+ + + + + + + diff --git a/hsigmoid/infer.py b/hsigmoid/infer.py index 8645d00d..df6fd1f7 100644 --- a/hsigmoid/infer.py +++ b/hsigmoid/infer.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import os import logging import gzip diff --git a/hsigmoid/network_conf.py b/hsigmoid/network_conf.py index 49449478..072c28c9 100644 --- a/hsigmoid/network_conf.py +++ b/hsigmoid/network_conf.py @@ -1,7 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - import math + import paddle.v2 as paddle diff --git a/hsigmoid/train.py b/hsigmoid/train.py index 809c842a..1763772e 100644 --- a/hsigmoid/train.py +++ b/hsigmoid/train.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import os import logging import gzip diff --git a/image_classification/train.py b/image_classification/train.py old mode 100755 new mode 100644 diff --git a/ltr/lambda_rank.py b/ltr/lambda_rank.py index 5318b7ce..fb527ed3 100644 --- a/ltr/lambda_rank.py +++ b/ltr/lambda_rank.py @@ -1,14 +1,18 @@ -import os, sys +import os +import sys import gzip -import paddle.v2 as paddle -import numpy as np import functools import argparse +import numpy as np + +import paddle.v2 as paddle def lambda_rank(input_dim): """ - lambda_rank is a Listwise rank model, the input data and label must be sequences. + lambda_rank is a Listwise rank model, the input data and label + must be sequences. + https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf parameters : input_dim, one document's dense feature vector dimension @@ -16,6 +20,7 @@ def lambda_rank(input_dim): format of the dense_vector_sequence: [[f, ...], [f, ...], ...], f is a float or an int number """ + label = paddle.layer.data("label", paddle.data_type.dense_vector_sequence(1)) data = paddle.layer.data("data", @@ -88,11 +93,11 @@ def train_lambda_rank(num_passes): def lambda_rank_infer(pass_id): + """lambda_rank model inference interface + + parameters: + pass_id : inference model in pass_id """ - lambda_rank model inference interface - parameters: - pass_id : inference model in pass_id - """ print "Begin to Infer..." input_dim = 46 output = lambda_rank(input_dim) @@ -109,7 +114,8 @@ def lambda_rank_infer(pass_id): if len(infer_data) == infer_data_num: break - # predict score of infer_data document. Re-sort the document base on predict score + # predict score of infer_data document. + # Re-sort the document base on predict score # in descending order. then we build the ranking documents predicitons = paddle.infer( output_layer=output, parameters=parameters, input=infer_data) diff --git a/ltr/metrics.py b/ltr/metrics.py index 12a77434..a2bbf3fe 100644 --- a/ltr/metrics.py +++ b/ltr/metrics.py @@ -12,7 +12,6 @@ def ndcg(score_list): e.g. predict rank score list : >>> scores = [3, 2, 3, 0, 1, 2] >>> ndcg_score = ndcg(scores) - """ def dcg(score_list): diff --git a/ltr/ranknet.py b/ltr/ranknet.py index f6327f4a..7b45ca65 100644 --- a/ltr/ranknet.py +++ b/ltr/ranknet.py @@ -13,11 +13,11 @@ import argparse def half_ranknet(name_prefix, input_dim): """ - parameter in same name will be shared in paddle framework, - these parameters in ranknet can be used in shared state, e.g. left network and right network - shared parameters in detail - https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md - """ + parameter in same name will be shared in paddle framework, + these parameters in ranknet can be used in shared state, + e.g. left network and right network shared parameters in detail + https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md + """ # data layer data = paddle.layer.data(name_prefix + "/data", paddle.data_type.dense_vector(input_dim)) @@ -102,12 +102,14 @@ def ranknet_infer(pass_id): print "Begin to Infer..." feature_dim = 46 - # we just need half_ranknet to predict a rank score, which can be used in sort documents + # we just need half_ranknet to predict a rank score, + # which can be used in sort documents output = half_ranknet("infer", feature_dim) parameters = paddle.parameters.Parameters.from_tar( gzip.open("ranknet_params_%d.tar.gz" % (pass_id))) - # load data of same query and relevance documents, need ranknet to rank these candidates + # load data of same query and relevance documents, + # need ranknet to rank these candidates infer_query_id = [] infer_data = [] infer_doc_index = [] @@ -121,7 +123,8 @@ def ranknet_infer(pass_id): infer_query_id.append(query_id) infer_data.append([feature_vector]) - # predict score of infer_data document. Re-sort the document base on predict score + # predict score of infer_data document. + # Re-sort the document base on predict score # in descending order. then we build the ranking documents scores = paddle.infer( output_layer=output, parameters=parameters, input=infer_data) diff --git a/mt_with_external_memory/external_memory.py b/mt_with_external_memory/external_memory.py old mode 100755 new mode 100644 index f0b61cb4..d5df173d --- a/mt_with_external_memory/external_memory.py +++ b/mt_with_external_memory/external_memory.py @@ -23,7 +23,7 @@ class ExternalMemory(object): Besides, the ExternalMemory class must be used together with paddle.layer.recurrent_group (within its step function). It can never be used in a standalone manner. - + For more details, please refer to `Neural Turing Machines `_. diff --git a/mt_with_external_memory/model.py b/mt_with_external_memory/model.py index 64123f8c..527c9ff6 100644 --- a/mt_with_external_memory/model.py +++ b/mt_with_external_memory/model.py @@ -1,4 +1,4 @@ -""" +""" Contains model configuration for external-memory-enhanced seq2seq. The "external memory" refers to two types of memories. diff --git a/nce_cost/infer.py b/nce_cost/infer.py index 89d80792..db1c8c61 100644 --- a/nce_cost/infer.py +++ b/nce_cost/infer.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- encoding:utf-8 -*- import os import gzip import numpy as np diff --git a/nce_cost/network_conf.py b/nce_cost/network_conf.py index a9e33e1b..a37b031d 100644 --- a/nce_cost/network_conf.py +++ b/nce_cost/network_conf.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- encoding:utf-8 -*- import math import paddle.v2 as paddle diff --git a/nce_cost/train.py b/nce_cost/train.py index 3babf7fe..9ba84214 100644 --- a/nce_cost/train.py +++ b/nce_cost/train.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- encoding:utf-8 -*- import os import logging import gzip diff --git a/nmt_without_attention/generate.py b/nmt_without_attention/generate.py index 1de4f462..eeb02b6a 100644 --- a/nmt_without_attention/generate.py +++ b/nmt_without_attention/generate.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import os import logging import numpy as np diff --git a/nmt_without_attention/network_conf.py b/nmt_without_attention/network_conf.py index 77a1dc77..3f19ed12 100644 --- a/nmt_without_attention/network_conf.py +++ b/nmt_without_attention/network_conf.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import paddle.v2 as paddle import sys import gzip diff --git a/nmt_without_attention/train.py b/nmt_without_attention/train.py index 5604d70d..15585e18 100644 --- a/nmt_without_attention/train.py +++ b/nmt_without_attention/train.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import os import logging import paddle.v2 as paddle diff --git a/scheduled_sampling/random_schedule_generator.py b/scheduled_sampling/random_schedule_generator.py index 7569eaff..7af99685 100644 --- a/scheduled_sampling/random_schedule_generator.py +++ b/scheduled_sampling/random_schedule_generator.py @@ -30,7 +30,8 @@ class RandomScheduleGenerator: def getScheduleRate(self): """ - Get the schedule sampling rate. Usually not needed to be called by the users + Get the schedule sampling rate. Usually not needed to be + called by the users. """ return self.schedule_computer(self.a, self.b, self.data_processed_) diff --git a/text_classification/infer.py b/text_classification/infer.py index de033697..c507d749 100644 --- a/text_classification/infer.py +++ b/text_classification/infer.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import sys import os import gzip diff --git a/text_classification/reader.py b/text_classification/reader.py index 7b670031..cd576c9e 100644 --- a/text_classification/reader.py +++ b/text_classification/reader.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import os diff --git a/text_classification/train.py b/text_classification/train.py index 4f31b093..3d1a5819 100644 --- a/text_classification/train.py +++ b/text_classification/train.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import os import sys import gzip diff --git a/text_classification/utils.py b/text_classification/utils.py index 831d2b3b..d14054d3 100644 --- a/text_classification/utils.py +++ b/text_classification/utils.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- import logging import os import argparse -- GitLab