提交 4637c5a7 编写于 作者: C Cao Ying 提交者: GitHub

Merge pull request #337 from lcy-seso/codes_clean

small code cleans.
#!/usr/bin/env python
# -*- coding: utf-8 -*-import os
import sys import sys
import csv import csv
import cPickle import cPickle
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import gzip import gzip
import argparse import argparse
import itertools import itertools
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import paddle.v2 as paddle import paddle.v2 as paddle
from paddle.v2 import layer from paddle.v2 import layer
from paddle.v2 import data_type as dtype from paddle.v2 import data_type as dtype
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-import os
import argparse import argparse
import gzip import gzip
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse import argparse
import itertools import itertools
...@@ -32,9 +30,10 @@ parser.add_argument( ...@@ -32,9 +30,10 @@ parser.add_argument(
type=int, type=int,
required=True, required=True,
default=ModelType.CLASSIFICATION_MODE, default=ModelType.CLASSIFICATION_MODE,
help="model type, %d for classification, %d for pairwise rank, %d for regression (default: classification)" help=("model type, %d for classification, %d for pairwise rank, "
% (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE, "%d for regression (default: classification)") %
ModelType.REGRESSION_MODE)) (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE,
ModelType.REGRESSION_MODE))
parser.add_argument( parser.add_argument(
'-s', '-s',
'--source_dic_path', '--source_dic_path',
...@@ -45,8 +44,8 @@ parser.add_argument( ...@@ -45,8 +44,8 @@ parser.add_argument(
'--target_dic_path', '--target_dic_path',
type=str, type=str,
required=False, required=False,
help="path of the target's word dic, if not set, the `source_dic_path` will be used" help=("path of the target's word dictionary, "
) "if not set, the `source_dic_path` will be used"))
parser.add_argument( parser.add_argument(
'-a', '-a',
'--model_arch', '--model_arch',
...@@ -69,8 +68,9 @@ parser.add_argument( ...@@ -69,8 +68,9 @@ parser.add_argument(
'--dnn_dims', '--dnn_dims',
type=str, type=str,
default='256,128,64,32', default='256,128,64,32',
help="dimentions of dnn layers, default is '256,128,64,32', which means create a 4-layer dnn, demention of each layer is 256, 128, 64 and 32" help=("dimentions of dnn layers, default is '256,128,64,32', "
) "which means create a 4-layer dnn, "
"demention of each layer is 256, 128, 64 and 32"))
parser.add_argument( parser.add_argument(
'-c', '-c',
'--class_num', '--class_num',
...@@ -85,7 +85,8 @@ if args.model_type.is_classification(): ...@@ -85,7 +85,8 @@ if args.model_type.is_classification():
assert args.class_num > 1, "--class_num should be set in classification task." assert args.class_num > 1, "--class_num should be set in classification task."
layer_dims = map(int, args.dnn_dims.split(',')) layer_dims = map(int, args.dnn_dims.split(','))
args.target_dic_path = args.source_dic_path if not args.target_dic_path else args.target_dic_path args.target_dic_path = args.source_dic_path if not args.target_dic_path \
else args.target_dic_path
paddle.init(use_gpu=False, trainer_count=1) paddle.init(use_gpu=False, trainer_count=1)
...@@ -130,9 +131,9 @@ class Inferer(object): ...@@ -130,9 +131,9 @@ class Inferer(object):
for id, batch in enumerate(infer_reader()): for id, batch in enumerate(infer_reader()):
res = self.inferer.infer(input=batch) res = self.inferer.infer(input=batch)
predictions = [' '.join(map(str, x)) for x in res] predictions = [' '.join(map(str, x)) for x in res]
assert len(batch) == len( assert len(batch) == len(predictions), (
predictions), "predict error, %d inputs, but %d predictions" % ( "predict error, %d inputs, "
len(batch), len(predictions)) "but %d predictions") % (len(batch), len(predictions))
output_f.write('\n'.join(map(str, predictions)) + '\n') output_f.write('\n'.join(map(str, predictions)) + '\n')
......
...@@ -29,9 +29,9 @@ class DSSM(object): ...@@ -29,9 +29,9 @@ class DSSM(object):
@class_num: int @class_num: int
number of categories. number of categories.
''' '''
assert len( assert len(vocab_sizes) == 2, (
vocab_sizes "vocab_sizes specify the sizes left and right inputs, "
) == 2, "vocab_sizes specify the sizes left and right inputs, and dim should be 2." "and dim should be 2.")
assert len(dnn_dims) > 1, "more than two layers is needed." assert len(dnn_dims) > 1, "more than two layers is needed."
self.dnn_dims = dnn_dims self.dnn_dims = dnn_dims
...@@ -91,7 +91,8 @@ class DSSM(object): ...@@ -91,7 +91,8 @@ class DSSM(object):
@emb: paddle.layer @emb: paddle.layer
output of the embedding layer output of the embedding layer
@prefix: str @prefix: str
prefix of layers' names, used to share parameters between more than one `fc` parts. prefix of layers' names, used to share parameters between
more than one `fc` parts.
''' '''
_input_layer = paddle.layer.pooling( _input_layer = paddle.layer.pooling(
input=emb, pooling_type=paddle.pooling.Max()) input=emb, pooling_type=paddle.pooling.Max())
...@@ -113,7 +114,8 @@ class DSSM(object): ...@@ -113,7 +114,8 @@ class DSSM(object):
@emb: paddle.layer @emb: paddle.layer
output of the embedding layer output of the embedding layer
@prefix: str @prefix: str
prefix of layers' names, used to share parameters between more than one `cnn` parts. prefix of layers' names, used to share parameters between
more than one `cnn` parts.
''' '''
def create_conv(context_len, hidden_size, prefix): def create_conv(context_len, hidden_size, prefix):
...@@ -174,7 +176,8 @@ class DSSM(object): ...@@ -174,7 +176,8 @@ class DSSM(object):
- source sentence - source sentence
- left_target sentence - left_target sentence
- right_target sentence - right_target sentence
- label, 1 if left_target should be sorted in front of right_target, otherwise 0. - label, 1 if left_target should be sorted in front of
right_target, otherwise 0.
''' '''
logger.info("build rank model") logger.info("build rank model")
assert self.model_type.is_rank() assert self.model_type.is_rank()
......
#!/usr/bin/env python from utils import UNK, ModelType, TaskType, load_dic, \
# -*- coding: utf-8 -*- sent2ids, logger, ModelType
from utils import UNK, ModelType, TaskType, load_dic, sent2ids, logger, ModelType
class Dataset(object): class Dataset(object):
...@@ -38,7 +37,6 @@ class Dataset(object): ...@@ -38,7 +37,6 @@ class Dataset(object):
''' '''
Load testset. Load testset.
''' '''
# logger.info("[reader] load testset from %s" % self.test_path)
with open(self.test_path) as f: with open(self.test_path) as f:
for line_id, line in enumerate(f): for line_id, line in enumerate(f):
yield self.record_reader(line) yield self.record_reader(line)
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse import argparse
import paddle.v2 as paddle import paddle.v2 as paddle
...@@ -31,8 +29,8 @@ parser.add_argument( ...@@ -31,8 +29,8 @@ parser.add_argument(
'--target_dic_path', '--target_dic_path',
type=str, type=str,
required=False, required=False,
help="path of the target's word dic, if not set, the `source_dic_path` will be used" help=("path of the target's word dictionary, "
) "if not set, the `source_dic_path` will be used"))
parser.add_argument( parser.add_argument(
'-b', '-b',
'--batch_size', '--batch_size',
...@@ -221,7 +219,8 @@ def train(train_data_path=None, ...@@ -221,7 +219,8 @@ def train(train_data_path=None,
event.pass_id, event.batch_id, event.cost, event.metrics)) event.pass_id, event.batch_id, event.cost, event.metrics))
# test model # test model
if event.batch_id > 0 and event.batch_id % args.num_batches_to_test == 0: if event.batch_id > 0 and \
event.batch_id % args.num_batches_to_test == 0:
if test_reader is not None: if test_reader is not None:
if model_type.is_classification(): if model_type.is_classification():
result = trainer.test( result = trainer.test(
...@@ -231,7 +230,8 @@ def train(train_data_path=None, ...@@ -231,7 +230,8 @@ def train(train_data_path=None,
else: else:
result = None result = None
# save model # save model
if event.batch_id > 0 and event.batch_id % args.num_batches_to_save_model == 0: if event.batch_id > 0 and \
event.batch_id % args.num_batches_to_save_model == 0:
model_desc = "{type}_{arch}".format( model_desc = "{type}_{arch}".format(
type=str(args.model_type), arch=str(args.model_arch)) type=str(args.model_type), arch=str(args.model_arch))
with open("%sdssm_%s_pass_%05d.tar" % with open("%sdssm_%s_pass_%05d.tar" %
......
<html>
<head>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
extensions: ["tex2jax.js", "TeX/AMSsymbols.js", "TeX/AMSmath.js"],
jax: ["input/TeX", "output/HTML-CSS"],
tex2jax: {
inlineMath: [ ['$','$'] ],
displayMath: [ ['$$','$$'] ],
processEscapes: true
},
"HTML-CSS": { availableFonts: ["TeX"] }
});
</script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js" async></script>
<script type="text/javascript" src="../.tools/theme/marked.js">
</script>
<link href="http://cdn.bootcss.com/highlight.js/9.9.0/styles/darcula.min.css" rel="stylesheet">
<script src="http://cdn.bootcss.com/highlight.js/9.9.0/highlight.min.js"></script>
<link href="http://cdn.bootcss.com/bootstrap/4.0.0-alpha.6/css/bootstrap.min.css" rel="stylesheet">
<link href="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/css/perfect-scrollbar.min.css" rel="stylesheet">
<link href="../.tools/theme/github-markdown.css" rel='stylesheet'>
</head>
<style type="text/css" >
.markdown-body {
box-sizing: border-box;
min-width: 200px;
max-width: 980px;
margin: 0 auto;
padding: 45px;
}
</style>
<body>
<div id="context" class="container-fluid markdown-body">
</div>
<!-- This block will be replaced by each markdown file content. Please do not change lines below.-->
<div id="markdown" style='display:none'>
[TBD]
</div>
<!-- You can change the lines below now. -->
<script type="text/javascript">
marked.setOptions({
renderer: new marked.Renderer(),
gfm: true,
breaks: false,
smartypants: true,
highlight: function(code, lang) {
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
document.getElementById("context").innerHTML = marked(
document.getElementById("markdown").innerHTML)
</script>
</body>
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os import os
import logging import logging
import gzip import gzip
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import math import math
import paddle.v2 as paddle import paddle.v2 as paddle
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os import os
import logging import logging
import gzip import gzip
......
文件模式从 100755 更改为 100644
import os, sys import os
import sys
import gzip import gzip
import paddle.v2 as paddle
import numpy as np
import functools import functools
import argparse import argparse
import numpy as np
import paddle.v2 as paddle
def lambda_rank(input_dim): def lambda_rank(input_dim):
""" """
lambda_rank is a Listwise rank model, the input data and label must be sequences. lambda_rank is a Listwise rank model, the input data and label
must be sequences.
https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf
parameters : parameters :
input_dim, one document's dense feature vector dimension input_dim, one document's dense feature vector dimension
...@@ -16,6 +20,7 @@ def lambda_rank(input_dim): ...@@ -16,6 +20,7 @@ def lambda_rank(input_dim):
format of the dense_vector_sequence: format of the dense_vector_sequence:
[[f, ...], [f, ...], ...], f is a float or an int number [[f, ...], [f, ...], ...], f is a float or an int number
""" """
label = paddle.layer.data("label", label = paddle.layer.data("label",
paddle.data_type.dense_vector_sequence(1)) paddle.data_type.dense_vector_sequence(1))
data = paddle.layer.data("data", data = paddle.layer.data("data",
...@@ -88,11 +93,11 @@ def train_lambda_rank(num_passes): ...@@ -88,11 +93,11 @@ def train_lambda_rank(num_passes):
def lambda_rank_infer(pass_id): def lambda_rank_infer(pass_id):
"""lambda_rank model inference interface
parameters:
pass_id : inference model in pass_id
""" """
lambda_rank model inference interface
parameters:
pass_id : inference model in pass_id
"""
print "Begin to Infer..." print "Begin to Infer..."
input_dim = 46 input_dim = 46
output = lambda_rank(input_dim) output = lambda_rank(input_dim)
...@@ -109,7 +114,8 @@ def lambda_rank_infer(pass_id): ...@@ -109,7 +114,8 @@ def lambda_rank_infer(pass_id):
if len(infer_data) == infer_data_num: if len(infer_data) == infer_data_num:
break break
# predict score of infer_data document. Re-sort the document base on predict score # predict score of infer_data document.
# Re-sort the document base on predict score
# in descending order. then we build the ranking documents # in descending order. then we build the ranking documents
predicitons = paddle.infer( predicitons = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data) output_layer=output, parameters=parameters, input=infer_data)
......
...@@ -12,7 +12,6 @@ def ndcg(score_list): ...@@ -12,7 +12,6 @@ def ndcg(score_list):
e.g. predict rank score list : e.g. predict rank score list :
>>> scores = [3, 2, 3, 0, 1, 2] >>> scores = [3, 2, 3, 0, 1, 2]
>>> ndcg_score = ndcg(scores) >>> ndcg_score = ndcg(scores)
""" """
def dcg(score_list): def dcg(score_list):
......
...@@ -13,11 +13,11 @@ import argparse ...@@ -13,11 +13,11 @@ import argparse
def half_ranknet(name_prefix, input_dim): def half_ranknet(name_prefix, input_dim):
""" """
parameter in same name will be shared in paddle framework, parameter in same name will be shared in paddle framework,
these parameters in ranknet can be used in shared state, e.g. left network and right network these parameters in ranknet can be used in shared state,
shared parameters in detail e.g. left network and right network shared parameters in detail
https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md
""" """
# data layer # data layer
data = paddle.layer.data(name_prefix + "/data", data = paddle.layer.data(name_prefix + "/data",
paddle.data_type.dense_vector(input_dim)) paddle.data_type.dense_vector(input_dim))
...@@ -102,12 +102,14 @@ def ranknet_infer(pass_id): ...@@ -102,12 +102,14 @@ def ranknet_infer(pass_id):
print "Begin to Infer..." print "Begin to Infer..."
feature_dim = 46 feature_dim = 46
# we just need half_ranknet to predict a rank score, which can be used in sort documents # we just need half_ranknet to predict a rank score,
# which can be used in sort documents
output = half_ranknet("infer", feature_dim) output = half_ranknet("infer", feature_dim)
parameters = paddle.parameters.Parameters.from_tar( parameters = paddle.parameters.Parameters.from_tar(
gzip.open("ranknet_params_%d.tar.gz" % (pass_id))) gzip.open("ranknet_params_%d.tar.gz" % (pass_id)))
# load data of same query and relevance documents, need ranknet to rank these candidates # load data of same query and relevance documents,
# need ranknet to rank these candidates
infer_query_id = [] infer_query_id = []
infer_data = [] infer_data = []
infer_doc_index = [] infer_doc_index = []
...@@ -121,7 +123,8 @@ def ranknet_infer(pass_id): ...@@ -121,7 +123,8 @@ def ranknet_infer(pass_id):
infer_query_id.append(query_id) infer_query_id.append(query_id)
infer_data.append([feature_vector]) infer_data.append([feature_vector])
# predict score of infer_data document. Re-sort the document base on predict score # predict score of infer_data document.
# Re-sort the document base on predict score
# in descending order. then we build the ranking documents # in descending order. then we build the ranking documents
scores = paddle.infer( scores = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data) output_layer=output, parameters=parameters, input=infer_data)
......
...@@ -23,7 +23,7 @@ class ExternalMemory(object): ...@@ -23,7 +23,7 @@ class ExternalMemory(object):
Besides, the ExternalMemory class must be used together with Besides, the ExternalMemory class must be used together with
paddle.layer.recurrent_group (within its step function). It can never be paddle.layer.recurrent_group (within its step function). It can never be
used in a standalone manner. used in a standalone manner.
For more details, please refer to For more details, please refer to
`Neural Turing Machines <https://arxiv.org/abs/1410.5401>`_. `Neural Turing Machines <https://arxiv.org/abs/1410.5401>`_.
......
""" """
Contains model configuration for external-memory-enhanced seq2seq. Contains model configuration for external-memory-enhanced seq2seq.
The "external memory" refers to two types of memories. The "external memory" refers to two types of memories.
......
#!/usr/bin/env python
# -*- encoding:utf-8 -*-
import os import os
import gzip import gzip
import numpy as np import numpy as np
......
#!/usr/bin/env python
# -*- encoding:utf-8 -*-
import math import math
import paddle.v2 as paddle import paddle.v2 as paddle
......
#!/usr/bin/env python
# -*- encoding:utf-8 -*-
import os import os
import logging import logging
import gzip import gzip
......
#!/usr/bin/env python
import os import os
import logging import logging
import numpy as np import numpy as np
......
#!/usr/bin/env python
import paddle.v2 as paddle import paddle.v2 as paddle
import sys import sys
import gzip import gzip
......
#!/usr/bin/env python
import os import os
import logging import logging
import paddle.v2 as paddle import paddle.v2 as paddle
......
...@@ -30,7 +30,8 @@ class RandomScheduleGenerator: ...@@ -30,7 +30,8 @@ class RandomScheduleGenerator:
def getScheduleRate(self): def getScheduleRate(self):
""" """
Get the schedule sampling rate. Usually not needed to be called by the users Get the schedule sampling rate. Usually not needed to be
called by the users.
""" """
return self.schedule_computer(self.a, self.b, self.data_processed_) return self.schedule_computer(self.a, self.b, self.data_processed_)
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys import sys
import os import os
import gzip import gzip
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os import os
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os import os
import sys import sys
import gzip import gzip
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging import logging
import os import os
import argparse import argparse
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册