提交 f3b1bb5a 编写于 作者: C caoying03

refine DSSM comments.

上级 77d2bf77
...@@ -9,25 +9,25 @@ from utils import logger, ModelType, ModelArch, load_dic ...@@ -9,25 +9,25 @@ from utils import logger, ModelType, ModelArch, load_dic
parser = argparse.ArgumentParser(description="PaddlePaddle DSSM infer") parser = argparse.ArgumentParser(description="PaddlePaddle DSSM infer")
parser.add_argument( parser.add_argument(
'--model_path', "--model_path",
type=str, type=str,
required=True, required=True,
help="path of model parameters file") help="path of model parameters file")
parser.add_argument( parser.add_argument(
'-i', "-i",
'--data_path', "--data_path",
type=str, type=str,
required=True, required=True,
help="path of the dataset to infer") help="path of the dataset to infer")
parser.add_argument( parser.add_argument(
'-o', "-o",
'--prediction_output_path', "--prediction_output_path",
type=str, type=str,
required=True, required=True,
help="path to output the prediction") help="path to output the prediction")
parser.add_argument( parser.add_argument(
'-y', "-y",
'--model_type', "--model_type",
type=int, type=int,
required=True, required=True,
default=ModelType.CLASSIFICATION_MODE, default=ModelType.CLASSIFICATION_MODE,
...@@ -36,45 +36,45 @@ parser.add_argument( ...@@ -36,45 +36,45 @@ parser.add_argument(
(ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE, (ModelType.CLASSIFICATION_MODE, ModelType.RANK_MODE,
ModelType.REGRESSION_MODE)) ModelType.REGRESSION_MODE))
parser.add_argument( parser.add_argument(
'-s', "-s",
'--source_dic_path', "--source_dic_path",
type=str, type=str,
required=False, required=False,
help="path of the source's word dic") help="path of the source's word dic")
parser.add_argument( parser.add_argument(
'--target_dic_path', "--target_dic_path",
type=str, type=str,
required=False, required=False,
help=("path of the target's word dictionary, " help=("path of the target's word dictionary, "
"if not set, the `source_dic_path` will be used")) "if not set, the `source_dic_path` will be used"))
parser.add_argument( parser.add_argument(
'-a', "-a",
'--model_arch', "--model_arch",
type=int, type=int,
required=True, required=True,
default=ModelArch.CNN_MODE, default=ModelArch.CNN_MODE,
help="model architecture, %d for CNN, %d for FC, %d for RNN" % help="model architecture, %d for CNN, %d for FC, %d for RNN" %
(ModelArch.CNN_MODE, ModelArch.FC_MODE, ModelArch.RNN_MODE)) (ModelArch.CNN_MODE, ModelArch.FC_MODE, ModelArch.RNN_MODE))
parser.add_argument( parser.add_argument(
'--share_network_between_source_target', "--share_network_between_source_target",
type=distutils.util.strtobool, type=distutils.util.strtobool,
default=False, default=False,
help="whether to share network parameters between source and target") help="whether to share network parameters between source and target")
parser.add_argument( parser.add_argument(
'--share_embed', "--share_embed",
type=distutils.util.strtobool, type=distutils.util.strtobool,
default=False, default=False,
help="whether to share word embedding between source and target") help="whether to share word embedding between source and target")
parser.add_argument( parser.add_argument(
'--dnn_dims', "--dnn_dims",
type=str, type=str,
default='256,128,64,32', default="256,128,64,32",
help=("dimentions of dnn layers, default is '256,128,64,32', " help=("dimentions of dnn layers, default is `256,128,64,32`, "
"which means create a 4-layer dnn, " "which means create a 4-layer dnn, "
"demention of each layer is 256, 128, 64 and 32")) "demention of each layer is 256, 128, 64 and 32"))
parser.add_argument( parser.add_argument(
'-c', "-c",
'--class_num', "--class_num",
type=int, type=int,
default=0, default=0,
help="number of categories for classification task.") help="number of categories for classification task.")
...@@ -83,9 +83,10 @@ args = parser.parse_args() ...@@ -83,9 +83,10 @@ args = parser.parse_args()
args.model_type = ModelType(args.model_type) args.model_type = ModelType(args.model_type)
args.model_arch = ModelArch(args.model_arch) args.model_arch = ModelArch(args.model_arch)
if args.model_type.is_classification(): if args.model_type.is_classification():
assert args.class_num > 1, "--class_num should be set in classification task." assert args.class_num > 1, ("The parameter class_num should be set "
"in classification task.")
layer_dims = map(int, args.dnn_dims.split(',')) layer_dims = map(int, args.dnn_dims.split(","))
args.target_dic_path = args.source_dic_path if not args.target_dic_path \ args.target_dic_path = args.source_dic_path if not args.target_dic_path \
else args.target_dic_path else args.target_dic_path
...@@ -94,8 +95,6 @@ paddle.init(use_gpu=False, trainer_count=1) ...@@ -94,8 +95,6 @@ paddle.init(use_gpu=False, trainer_count=1)
class Inferer(object): class Inferer(object):
def __init__(self, param_path): def __init__(self, param_path):
logger.info("create DSSM model")
prediction = DSSM( prediction = DSSM(
dnn_dims=layer_dims, dnn_dims=layer_dims,
vocab_sizes=[ vocab_sizes=[
...@@ -110,14 +109,13 @@ class Inferer(object): ...@@ -110,14 +109,13 @@ class Inferer(object):
is_infer=True)() is_infer=True)()
# load parameter # load parameter
logger.info("load model parameters from %s" % param_path) logger.info("Load the trained model from %s." % param_path)
self.parameters = paddle.parameters.Parameters.from_tar( self.parameters = paddle.parameters.Parameters.from_tar(
open(param_path, 'r')) open(param_path, "r"))
self.inferer = paddle.inference.Inference( self.inferer = paddle.inference.Inference(
output_layer=prediction, parameters=self.parameters) output_layer=prediction, parameters=self.parameters)
def infer(self, data_path): def infer(self, data_path):
logger.info("infer data...")
dataset = reader.Dataset( dataset = reader.Dataset(
train_path=data_path, train_path=data_path,
test_path=None, test_path=None,
...@@ -125,19 +123,20 @@ class Inferer(object): ...@@ -125,19 +123,20 @@ class Inferer(object):
target_dic_path=args.target_dic_path, target_dic_path=args.target_dic_path,
model_type=args.model_type, ) model_type=args.model_type, )
infer_reader = paddle.batch(dataset.infer, batch_size=1000) infer_reader = paddle.batch(dataset.infer, batch_size=1000)
logger.warning('write predictions to %s' % args.prediction_output_path) logger.warning("Write predictions to %s." % args.prediction_output_path)
output_f = open(args.prediction_output_path, 'w') output_f = open(args.prediction_output_path, "w")
for id, batch in enumerate(infer_reader()): for id, batch in enumerate(infer_reader()):
res = self.inferer.infer(input=batch) res = self.inferer.infer(input=batch)
predictions = [' '.join(map(str, x)) for x in res] predictions = [" ".join(map(str, x)) for x in res]
assert len(batch) == len(predictions), ( assert len(batch) == len(predictions), (
"predict error, %d inputs, " "Error! %d inputs are given, "
"but %d predictions") % (len(batch), len(predictions)) "but only %d predictions are returned.") % (len(batch),
output_f.write('\n'.join(map(str, predictions)) + '\n') len(predictions))
output_f.write("\n".join(map(str, predictions)) + "\n")
if __name__ == '__main__': if __name__ == "__main__":
inferer = Inferer(args.model_path) inferer = Inferer(args.model_path)
inferer.infer(args.data_path) inferer.infer(args.data_path)
...@@ -13,26 +13,33 @@ class DSSM(object): ...@@ -13,26 +13,33 @@ class DSSM(object):
class_num=None, class_num=None,
share_embed=False, share_embed=False,
is_infer=False): is_infer=False):
''' """
@dnn_dims: list of int :param dnn_dims: The dimention of each layer in the semantic vector
dimentions of each layer in semantic vector generator. generator.
@vocab_sizes: 2-d tuple :type dnn_dims: list of int
size of both left and right items. :param vocab_sizes: The size of left and right items.
@model_type: int :type vocab_sizes: A list having 2 elements.
type of task, should be 'rank: 0', 'regression: 1' or 'classification: 2' :param model_type: The type of task to train the DSSM model. The value
@model_arch: int should be "rank: 0", "regression: 1" or
model architecture "classification: 2".
@share_semantic_generator: bool :type model_type: int
whether to share the semantic vector generator for both left and right. :param model_arch: A value indicating the model architecture to use.
@share_embed: bool :type model_arch: int
whether to share the embeddings between left and right. :param share_semantic_generator: A flag indicating whether to share the
@class_num: int semantic vector between the left and
number of categories. the right item.
''' :type share_semantic_generator: bool
:param share_embed: A floag indicating whether to share the embeddings
between the left and the right item.
:type share_embed: bool
:param class_num: The number of categories.
:type class_num: int
"""
assert len(vocab_sizes) == 2, ( assert len(vocab_sizes) == 2, (
"vocab_sizes specify the sizes left and right inputs, " "The vocab_sizes specifying the sizes left and right inputs. "
"and dim should be 2.") "Its dimension should be 2.")
assert len(dnn_dims) > 1, "more than two layers is needed." assert len(dnn_dims) > 1, ("In the DNN model, more than two layers "
"are needed.")
self.dnn_dims = dnn_dims self.dnn_dims = dnn_dims
self.vocab_sizes = vocab_sizes self.vocab_sizes = vocab_sizes
...@@ -42,91 +49,89 @@ class DSSM(object): ...@@ -42,91 +49,89 @@ class DSSM(object):
self.model_arch = ModelArch(model_arch) self.model_arch = ModelArch(model_arch)
self.class_num = class_num self.class_num = class_num
self.is_infer = is_infer self.is_infer = is_infer
logger.warning("build DSSM model with config of %s, %s" % logger.warning("Build DSSM model with config of %s, %s" %
(self.model_type, self.model_arch)) (self.model_type, self.model_arch))
logger.info("vocabulary sizes: %s" % str(self.vocab_sizes)) logger.info("The vocabulary size is : %s" % str(self.vocab_sizes))
# bind model architecture # bind model architecture
_model_arch = { _model_arch = {
'cnn': self.create_cnn, "cnn": self.create_cnn,
'fc': self.create_fc, "fc": self.create_fc,
'rnn': self.create_rnn, "rnn": self.create_rnn,
} }
def _model_arch_creater(emb, prefix=''): def _model_arch_creater(emb, prefix=""):
sent_vec = _model_arch.get(str(model_arch))(emb, prefix) sent_vec = _model_arch.get(str(model_arch))(emb, prefix)
dnn = self.create_dnn(sent_vec, prefix) dnn = self.create_dnn(sent_vec, prefix)
return dnn return dnn
self.model_arch_creater = _model_arch_creater self.model_arch_creater = _model_arch_creater
# build model type
_model_type = { _model_type = {
'classification': self._build_classification_model, "classification": self._build_classification_model,
'rank': self._build_rank_model, "rank": self._build_rank_model,
'regression': self._build_regression_model, "regression": self._build_regression_model,
} }
print 'model type: ', str(self.model_type) print("model type: ", str(self.model_type))
self.model_type_creater = _model_type[str(self.model_type)] self.model_type_creater = _model_type[str(self.model_type)]
def __call__(self): def __call__(self):
return self.model_type_creater() return self.model_type_creater()
def create_embedding(self, input, prefix=''): def create_embedding(self, input, prefix=""):
''' """
Create an embedding table whose name has a `prefix`. Create word embedding. The `prefix` is added in front of the name of
''' embedding"s learnable parameter.
logger.info("create embedding table [%s] which dimention is %d" % """
logger.info("Create embedding table [%s] whose dimention is %d. " %
(prefix, self.dnn_dims[0])) (prefix, self.dnn_dims[0]))
emb = paddle.layer.embedding( emb = paddle.layer.embedding(
input=input, input=input,
size=self.dnn_dims[0], size=self.dnn_dims[0],
param_attr=ParamAttr(name='%s_emb.w' % prefix)) param_attr=ParamAttr(name="%s_emb.w" % prefix))
return emb return emb
def create_fc(self, emb, prefix=''): def create_fc(self, emb, prefix=""):
''' """
A multi-layer fully connected neural networks. A multi-layer fully connected neural networks.
@emb: paddle.layer :param emb: The output of the embedding layer
output of the embedding layer :type emb: paddle.layer
@prefix: str :param prefix: A prefix will be added to the layers' names.
prefix of layers' names, used to share parameters between :type prefix: str
more than one `fc` parts. """
'''
_input_layer = paddle.layer.pooling( _input_layer = paddle.layer.pooling(
input=emb, pooling_type=paddle.pooling.Max()) input=emb, pooling_type=paddle.pooling.Max())
fc = paddle.layer.fc( fc = paddle.layer.fc(
input=_input_layer, input=_input_layer,
size=self.dnn_dims[1], size=self.dnn_dims[1],
param_attr=ParamAttr(name='%s_fc.w' % prefix), param_attr=ParamAttr(name="%s_fc.w" % prefix),
bias_attr=ParamAttr(name="%s_fc.b" % prefix, initial_std=0.)) bias_attr=ParamAttr(name="%s_fc.b" % prefix, initial_std=0.))
return fc return fc
def create_rnn(self, emb, prefix=''): def create_rnn(self, emb, prefix=""):
''' """
A GRU sentence vector learner. A GRU sentence vector learner.
''' """
gru = paddle.networks.simple_gru( gru = paddle.networks.simple_gru(
input=emb, input=emb,
size=self.dnn_dims[1], size=self.dnn_dims[1],
mixed_param_attr=ParamAttr(name='%s_gru_mixed.w' % prefix), mixed_param_attr=ParamAttr(name="%s_gru_mixed.w" % prefix),
mixed_bias_param_attr=ParamAttr(name="%s_gru_mixed.b" % prefix), mixed_bias_param_attr=ParamAttr(name="%s_gru_mixed.b" % prefix),
gru_param_attr=ParamAttr(name='%s_gru.w' % prefix), gru_param_attr=ParamAttr(name="%s_gru.w" % prefix),
gru_bias_attr=ParamAttr(name="%s_gru.b" % prefix)) gru_bias_attr=ParamAttr(name="%s_gru.b" % prefix))
sent_vec = paddle.layer.last_seq(gru) sent_vec = paddle.layer.last_seq(gru)
return sent_vec return sent_vec
def create_cnn(self, emb, prefix=''): def create_cnn(self, emb, prefix=""):
''' """
A multi-layer CNN. A multi-layer CNN.
@emb: paddle.layer :param emb: The word embedding.
output of the embedding layer :type emb: paddle.layer
@prefix: str :param prefix: The prefix will be added to of layers' names.
prefix of layers' names, used to share parameters between :type prefix: str
more than one `cnn` parts. """
'''
def create_conv(context_len, hidden_size, prefix): def create_conv(context_len, hidden_size, prefix):
key = "%s_%d_%d" % (prefix, context_len, hidden_size) key = "%s_%d_%d" % (prefix, context_len, hidden_size)
...@@ -135,15 +140,15 @@ class DSSM(object): ...@@ -135,15 +140,15 @@ class DSSM(object):
context_len=context_len, context_len=context_len,
hidden_size=hidden_size, hidden_size=hidden_size,
# set parameter attr for parameter sharing # set parameter attr for parameter sharing
context_proj_param_attr=ParamAttr(name=key + 'contex_proj.w'), context_proj_param_attr=ParamAttr(name=key + "contex_proj.w"),
fc_param_attr=ParamAttr(name=key + '_fc.w'), fc_param_attr=ParamAttr(name=key + "_fc.w"),
fc_bias_attr=ParamAttr(name=key + '_fc.b'), fc_bias_attr=ParamAttr(name=key + "_fc.b"),
pool_bias_attr=ParamAttr(name=key + '_pool.b')) pool_bias_attr=ParamAttr(name=key + "_pool.b"))
return conv return conv
logger.info('create a sequence_conv_pool which context width is 3') logger.info("create a sequence_conv_pool which context width is 3")
conv_3 = create_conv(3, self.dnn_dims[1], "cnn") conv_3 = create_conv(3, self.dnn_dims[1], "cnn")
logger.info('create a sequence_conv_pool which context width is 4') logger.info("create a sequence_conv_pool which context width is 4")
conv_4 = create_conv(4, self.dnn_dims[1], "cnn") conv_4 = create_conv(4, self.dnn_dims[1], "cnn")
return conv_3, conv_4 return conv_3, conv_4
...@@ -160,8 +165,8 @@ class DSSM(object): ...@@ -160,8 +165,8 @@ class DSSM(object):
input=_input_layer, input=_input_layer,
size=dim, size=dim,
act=paddle.activation.Tanh(), act=paddle.activation.Tanh(),
param_attr=ParamAttr(name='%s.w' % name), param_attr=ParamAttr(name="%s.w" % name),
bias_attr=ParamAttr(name='%s.b' % name, initial_std=0.)) bias_attr=ParamAttr(name="%s.b" % name, initial_std=0.))
_input_layer = fc _input_layer = fc
return _input_layer return _input_layer
...@@ -178,7 +183,7 @@ class DSSM(object): ...@@ -178,7 +183,7 @@ class DSSM(object):
is_classification=False) is_classification=False)
def _build_rank_model(self): def _build_rank_model(self):
''' """
Build a pairwise rank model, and the cost is returned. Build a pairwise rank model, and the cost is returned.
A pairwise rank model has 3 inputs: A pairwise rank model has 3 inputs:
...@@ -187,26 +192,26 @@ class DSSM(object): ...@@ -187,26 +192,26 @@ class DSSM(object):
- right_target sentence - right_target sentence
- label, 1 if left_target should be sorted in front of - label, 1 if left_target should be sorted in front of
right_target, otherwise 0. right_target, otherwise 0.
''' """
logger.info("build rank model") logger.info("build rank model")
assert self.model_type.is_rank() assert self.model_type.is_rank()
source = paddle.layer.data( source = paddle.layer.data(
name='source_input', name="source_input",
type=paddle.data_type.integer_value_sequence(self.vocab_sizes[0])) type=paddle.data_type.integer_value_sequence(self.vocab_sizes[0]))
left_target = paddle.layer.data( left_target = paddle.layer.data(
name='left_target_input', name="left_target_input",
type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1])) type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
right_target = paddle.layer.data( right_target = paddle.layer.data(
name='right_target_input', name="right_target_input",
type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1])) type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
if not self.is_infer: if not self.is_infer:
label = paddle.layer.data( label = paddle.layer.data(
name='label_input', type=paddle.data_type.integer_value(1)) name="label_input", type=paddle.data_type.integer_value(1))
prefixs = '_ _ _'.split( prefixs = "_ _ _".split(
) if self.share_semantic_generator else 'source target target'.split() ) if self.share_semantic_generator else "source target target".split()
embed_prefixs = '_ _'.split( embed_prefixs = "_ _".split(
) if self.share_embed else 'source target target'.split() ) if self.share_embed else "source target target".split()
word_vecs = [] word_vecs = []
for id, input in enumerate([source, left_target, right_target]): for id, input in enumerate([source, left_target, right_target]):
...@@ -218,9 +223,9 @@ class DSSM(object): ...@@ -218,9 +223,9 @@ class DSSM(object):
x = self.model_arch_creater(input, prefix=prefixs[id]) x = self.model_arch_creater(input, prefix=prefixs[id])
semantics.append(x) semantics.append(x)
# cossim score of source and left_target # The cosine similarity score of source and left_target.
left_score = paddle.layer.cos_sim(semantics[0], semantics[1]) left_score = paddle.layer.cos_sim(semantics[0], semantics[1])
# cossim score of source and right target # The cosine similarity score of source and right target.
right_score = paddle.layer.cos_sim(semantics[0], semantics[2]) right_score = paddle.layer.cos_sim(semantics[0], semantics[2])
if not self.is_infer: if not self.is_infer:
...@@ -233,34 +238,33 @@ class DSSM(object): ...@@ -233,34 +238,33 @@ class DSSM(object):
return right_score return right_score
def _build_classification_or_regression_model(self, is_classification): def _build_classification_or_regression_model(self, is_classification):
''' """
Build a classification/regression model, and the cost is returned. Build a classification/regression model, and the cost is returned.
A Classification has 3 inputs: The classification/regression task expects 3 inputs:
- source sentence - source sentence
- target sentence - target sentence
- classification label - classification label
''' """
if is_classification: if is_classification:
# prepare inputs.
assert self.class_num assert self.class_num
source = paddle.layer.data( source = paddle.layer.data(
name='source_input', name="source_input",
type=paddle.data_type.integer_value_sequence(self.vocab_sizes[0])) type=paddle.data_type.integer_value_sequence(self.vocab_sizes[0]))
target = paddle.layer.data( target = paddle.layer.data(
name='target_input', name="target_input",
type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1])) type=paddle.data_type.integer_value_sequence(self.vocab_sizes[1]))
label = paddle.layer.data( label = paddle.layer.data(
name='label_input', name="label_input",
type=paddle.data_type.integer_value(self.class_num) type=paddle.data_type.integer_value(self.class_num)
if is_classification else paddle.data_type.dense_vector(1)) if is_classification else paddle.data_type.dense_vector(1))
prefixs = '_ _'.split( prefixs = "_ _".split(
) if self.share_semantic_generator else 'source target'.split() ) if self.share_semantic_generator else "source target".split()
embed_prefixs = '_ _'.split( embed_prefixs = "_ _".split(
) if self.share_embed else 'source target'.split() ) if self.share_embed else "source target".split()
word_vecs = [] word_vecs = []
for id, input in enumerate([source, target]): for id, input in enumerate([source, target]):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册