提交 c7008f5c 编写于 作者: R ranqiu

Refine code and doc of DSSM

上级 d5802782
...@@ -121,7 +121,13 @@ def create_rnn(self, emb, prefix=''): ...@@ -121,7 +121,13 @@ def create_rnn(self, emb, prefix=''):
''' '''
A GRU sentence vector learner. A GRU sentence vector learner.
''' '''
gru = paddle.layer.gru_memory(input=emb,) gru = paddle.networks.simple_gru(
input=emb,
size=self.dnn_dims[1],
mixed_param_attr=ParamAttr(name='%s_gru_mixed.w' % prefix),
mixed_bias_param_attr=ParamAttr(name="%s_gru_mixed.b" % prefix),
gru_param_attr=ParamAttr(name='%s_gru.w' % prefix),
gru_bias_attr=ParamAttr(name="%s_gru.b" % prefix))
sent_vec = paddle.layer.last_seq(gru) sent_vec = paddle.layer.last_seq(gru)
return sent_vec return sent_vec
``` ```
...@@ -140,7 +146,11 @@ def create_fc(self, emb, prefix=''): ...@@ -140,7 +146,11 @@ def create_fc(self, emb, prefix=''):
''' '''
_input_layer = paddle.layer.pooling( _input_layer = paddle.layer.pooling(
input=emb, pooling_type=paddle.pooling.Max()) input=emb, pooling_type=paddle.pooling.Max())
fc = paddle.layer.fc(input=_input_layer, size=self.dnn_dims[1]) fc = paddle.layer.fc(
input=_input_layer,
size=self.dnn_dims[1],
param_attr=ParamAttr(name='%s_fc.w' % prefix),
bias_attr=ParamAttr(name="%s_fc.b" % prefix))
return fc return fc
``` ```
...@@ -160,7 +170,6 @@ def create_dnn(self, sent_vec, prefix): ...@@ -160,7 +170,6 @@ def create_dnn(self, sent_vec, prefix):
fc = paddle.layer.fc( fc = paddle.layer.fc(
input=_input_layer, input=_input_layer,
size=dim, size=dim,
name=name,
act=paddle.activation.Tanh(), act=paddle.activation.Tanh(),
param_attr=ParamAttr(name='%s.w' % name), param_attr=ParamAttr(name='%s.w' % name),
bias_attr=ParamAttr(name='%s.b' % name), bias_attr=ParamAttr(name='%s.b' % name),
...@@ -198,9 +207,9 @@ def _build_classification_or_regression_model(self, is_classification): ...@@ -198,9 +207,9 @@ def _build_classification_or_regression_model(self, is_classification):
if is_classification else paddle.data_type.dense_input) if is_classification else paddle.data_type.dense_input)
prefixs = '_ _'.split( prefixs = '_ _'.split(
) if self.share_semantic_generator else 'left right'.split() ) if self.share_semantic_generator else 'source target'.split()
embed_prefixs = '_ _'.split( embed_prefixs = '_ _'.split(
) if self.share_embed else 'left right'.split() ) if self.share_embed else 'source target'.split()
word_vecs = [] word_vecs = []
for id, input in enumerate([source, target]): for id, input in enumerate([source, target]):
...@@ -212,16 +221,21 @@ def _build_classification_or_regression_model(self, is_classification): ...@@ -212,16 +221,21 @@ def _build_classification_or_regression_model(self, is_classification):
x = self.model_arch_creater(input, prefix=prefixs[id]) x = self.model_arch_creater(input, prefix=prefixs[id])
semantics.append(x) semantics.append(x)
concated_vector = paddle.layer.concat(semantics) if is_classification:
prediction = paddle.layer.fc( concated_vector = paddle.layer.concat(semantics)
input=concated_vector, prediction = paddle.layer.fc(
size=self.class_num, input=concated_vector,
act=paddle.activation.Softmax()) size=self.class_num,
cost = paddle.layer.classification_cost( act=paddle.activation.Softmax())
input=prediction, cost = paddle.layer.classification_cost(
label=label) if is_classification else paddle.layer.mse_cost( input=prediction, label=label)
prediction, label) else:
return cost, prediction, label prediction = paddle.layer.cos_sim(*semantics)
cost = paddle.layer.square_error_cost(prediction, label)
if not self.is_infer:
return cost, prediction, label
return prediction
``` ```
### Pairwise Rank ### Pairwise Rank
...@@ -251,7 +265,7 @@ def _build_rank_model(self): ...@@ -251,7 +265,7 @@ def _build_rank_model(self):
name='label_input', type=paddle.data_type.integer_value(1)) name='label_input', type=paddle.data_type.integer_value(1))
prefixs = '_ _ _'.split( prefixs = '_ _ _'.split(
) if self.share_semantic_generator else 'source left right'.split() ) if self.share_semantic_generator else 'source target target'.split()
embed_prefixs = '_ _'.split( embed_prefixs = '_ _'.split(
) if self.share_embed else 'source target target'.split() ) if self.share_embed else 'source target target'.split()
...@@ -361,7 +375,7 @@ optional arguments: ...@@ -361,7 +375,7 @@ optional arguments:
path of the target's word dic, if not set, the path of the target's word dic, if not set, the
`source_dic_path` will be used `source_dic_path` will be used
-b BATCH_SIZE, --batch_size BATCH_SIZE -b BATCH_SIZE, --batch_size BATCH_SIZE
size of mini-batch (default:10) size of mini-batch (default:32)
-p NUM_PASSES, --num_passes NUM_PASSES -p NUM_PASSES, --num_passes NUM_PASSES
number of passes to run(default:10) number of passes to run(default:10)
-y MODEL_TYPE, --model_type MODEL_TYPE -y MODEL_TYPE, --model_type MODEL_TYPE
......
...@@ -163,7 +163,13 @@ def create_rnn(self, emb, prefix=''): ...@@ -163,7 +163,13 @@ def create_rnn(self, emb, prefix=''):
''' '''
A GRU sentence vector learner. A GRU sentence vector learner.
''' '''
gru = paddle.layer.gru_memory(input=emb,) gru = paddle.networks.simple_gru(
input=emb,
size=self.dnn_dims[1],
mixed_param_attr=ParamAttr(name='%s_gru_mixed.w' % prefix),
mixed_bias_param_attr=ParamAttr(name="%s_gru_mixed.b" % prefix),
gru_param_attr=ParamAttr(name='%s_gru.w' % prefix),
gru_bias_attr=ParamAttr(name="%s_gru.b" % prefix))
sent_vec = paddle.layer.last_seq(gru) sent_vec = paddle.layer.last_seq(gru)
return sent_vec return sent_vec
``` ```
...@@ -182,7 +188,11 @@ def create_fc(self, emb, prefix=''): ...@@ -182,7 +188,11 @@ def create_fc(self, emb, prefix=''):
''' '''
_input_layer = paddle.layer.pooling( _input_layer = paddle.layer.pooling(
input=emb, pooling_type=paddle.pooling.Max()) input=emb, pooling_type=paddle.pooling.Max())
fc = paddle.layer.fc(input=_input_layer, size=self.dnn_dims[1]) fc = paddle.layer.fc(
input=_input_layer,
size=self.dnn_dims[1],
param_attr=ParamAttr(name='%s_fc.w' % prefix),
bias_attr=ParamAttr(name="%s_fc.b" % prefix))
return fc return fc
``` ```
...@@ -202,7 +212,6 @@ def create_dnn(self, sent_vec, prefix): ...@@ -202,7 +212,6 @@ def create_dnn(self, sent_vec, prefix):
fc = paddle.layer.fc( fc = paddle.layer.fc(
input=_input_layer, input=_input_layer,
size=dim, size=dim,
name=name,
act=paddle.activation.Tanh(), act=paddle.activation.Tanh(),
param_attr=ParamAttr(name='%s.w' % name), param_attr=ParamAttr(name='%s.w' % name),
bias_attr=ParamAttr(name='%s.b' % name), bias_attr=ParamAttr(name='%s.b' % name),
...@@ -240,9 +249,9 @@ def _build_classification_or_regression_model(self, is_classification): ...@@ -240,9 +249,9 @@ def _build_classification_or_regression_model(self, is_classification):
if is_classification else paddle.data_type.dense_input) if is_classification else paddle.data_type.dense_input)
prefixs = '_ _'.split( prefixs = '_ _'.split(
) if self.share_semantic_generator else 'left right'.split() ) if self.share_semantic_generator else 'source target'.split()
embed_prefixs = '_ _'.split( embed_prefixs = '_ _'.split(
) if self.share_embed else 'left right'.split() ) if self.share_embed else 'source target'.split()
word_vecs = [] word_vecs = []
for id, input in enumerate([source, target]): for id, input in enumerate([source, target]):
...@@ -254,16 +263,21 @@ def _build_classification_or_regression_model(self, is_classification): ...@@ -254,16 +263,21 @@ def _build_classification_or_regression_model(self, is_classification):
x = self.model_arch_creater(input, prefix=prefixs[id]) x = self.model_arch_creater(input, prefix=prefixs[id])
semantics.append(x) semantics.append(x)
concated_vector = paddle.layer.concat(semantics) if is_classification:
prediction = paddle.layer.fc( concated_vector = paddle.layer.concat(semantics)
input=concated_vector, prediction = paddle.layer.fc(
size=self.class_num, input=concated_vector,
act=paddle.activation.Softmax()) size=self.class_num,
cost = paddle.layer.classification_cost( act=paddle.activation.Softmax())
input=prediction, cost = paddle.layer.classification_cost(
label=label) if is_classification else paddle.layer.mse_cost( input=prediction, label=label)
prediction, label) else:
return cost, prediction, label prediction = paddle.layer.cos_sim(*semantics)
cost = paddle.layer.square_error_cost(prediction, label)
if not self.is_infer:
return cost, prediction, label
return prediction
``` ```
### Pairwise Rank ### Pairwise Rank
...@@ -293,7 +307,7 @@ def _build_rank_model(self): ...@@ -293,7 +307,7 @@ def _build_rank_model(self):
name='label_input', type=paddle.data_type.integer_value(1)) name='label_input', type=paddle.data_type.integer_value(1))
prefixs = '_ _ _'.split( prefixs = '_ _ _'.split(
) if self.share_semantic_generator else 'source left right'.split() ) if self.share_semantic_generator else 'source target target'.split()
embed_prefixs = '_ _'.split( embed_prefixs = '_ _'.split(
) if self.share_embed else 'source target target'.split() ) if self.share_embed else 'source target target'.split()
...@@ -403,7 +417,7 @@ optional arguments: ...@@ -403,7 +417,7 @@ optional arguments:
path of the target's word dic, if not set, the path of the target's word dic, if not set, the
`source_dic_path` will be used `source_dic_path` will be used
-b BATCH_SIZE, --batch_size BATCH_SIZE -b BATCH_SIZE, --batch_size BATCH_SIZE
size of mini-batch (default:10) size of mini-batch (default:32)
-p NUM_PASSES, --num_passes NUM_PASSES -p NUM_PASSES, --num_passes NUM_PASSES
number of passes to run(default:10) number of passes to run(default:10)
-y MODEL_TYPE, --model_type MODEL_TYPE -y MODEL_TYPE, --model_type MODEL_TYPE
......
...@@ -100,7 +100,7 @@ class DSSM(object): ...@@ -100,7 +100,7 @@ class DSSM(object):
input=_input_layer, input=_input_layer,
size=self.dnn_dims[1], size=self.dnn_dims[1],
param_attr=ParamAttr(name='%s_fc.w' % prefix), param_attr=ParamAttr(name='%s_fc.w' % prefix),
bias_attr=ParamAttr(name="%s_fc.b" % prefix)) bias_attr=ParamAttr(name="%s_fc.b" % prefix, initial_std=0.))
return fc return fc
def create_rnn(self, emb, prefix=''): def create_rnn(self, emb, prefix=''):
...@@ -161,7 +161,7 @@ class DSSM(object): ...@@ -161,7 +161,7 @@ class DSSM(object):
size=dim, size=dim,
act=paddle.activation.Tanh(), act=paddle.activation.Tanh(),
param_attr=ParamAttr(name='%s.w' % name), param_attr=ParamAttr(name='%s.w' % name),
bias_attr=ParamAttr(name='%s.b' % name)) bias_attr=ParamAttr(name='%s.b' % name, initial_std=0.))
_input_layer = fc _input_layer = fc
return _input_layer return _input_layer
......
...@@ -131,7 +131,7 @@ def train(train_data_path=None, ...@@ -131,7 +131,7 @@ def train(train_data_path=None,
target_dic_path=None, target_dic_path=None,
model_type=ModelType.create_classification(), model_type=ModelType.create_classification(),
model_arch=ModelArch.create_cnn(), model_arch=ModelArch.create_cnn(),
batch_size=10, batch_size=32,
num_passes=10, num_passes=10,
share_semantic_generator=False, share_semantic_generator=False,
share_embed=False, share_embed=False,
...@@ -187,7 +187,7 @@ def train(train_data_path=None, ...@@ -187,7 +187,7 @@ def train(train_data_path=None,
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
adam_optimizer = paddle.optimizer.Adam( adam_optimizer = paddle.optimizer.Adam(
learning_rate=1e-3, learning_rate=2e-4,
regularization=paddle.optimizer.L2Regularization(rate=1e-3), regularization=paddle.optimizer.L2Regularization(rate=1e-3),
model_average=paddle.optimizer.ModelAverage(average_window=0.5)) model_average=paddle.optimizer.ModelAverage(average_window=0.5))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册