diff --git a/dssm/README.cn.md b/dssm/README.cn.md index b65c11df7d00f34b8378c92371858ca383827a1d..06f0f51aa3cd778394046845fa5dcfc8c39086e9 100644 --- a/dssm/README.cn.md +++ b/dssm/README.cn.md @@ -13,7 +13,7 @@ DSSM \[[1](##参考文献)\]是微软研究院13年提出来的经典的语义 DSSM 已经发展成了一个框架,可以很自然地建模两个记录之间的距离关系, 例如对于文本相关性问题,可以用余弦相似度 (cosin similarity) 来刻画语义距离; -而对于搜索引擎的结果排序,可以在DSSM上接上Rank损失训练处一个排序模型。 +而对于搜索引擎的结果排序,可以在DSSM上接上Rank损失训练出一个排序模型。 ## 模型简介 在原论文\[[1](#参考文献)\]中,DSSM模型用来衡量用户搜索词 Query 和文档集合 Documents 之间隐含的语义关系,模型结构如下 @@ -165,7 +165,13 @@ def create_rnn(self, emb, prefix=''): ''' A GRU sentence vector learner. ''' - gru = paddle.layer.gru_memory(input=emb,) + gru = paddle.networks.simple_gru( + input=emb, + size=self.dnn_dims[1], + mixed_param_attr=ParamAttr(name='%s_gru_mixed.w' % prefix), + mixed_bias_param_attr=ParamAttr(name="%s_gru_mixed.b" % prefix), + gru_param_attr=ParamAttr(name='%s_gru.w' % prefix), + gru_bias_attr=ParamAttr(name="%s_gru.b" % prefix)) sent_vec = paddle.layer.last_seq(gru) return sent_vec ``` @@ -184,7 +190,11 @@ def create_fc(self, emb, prefix=''): ''' _input_layer = paddle.layer.pooling( input=emb, pooling_type=paddle.pooling.Max()) - fc = paddle.layer.fc(input=_input_layer, size=self.dnn_dims[1]) + fc = paddle.layer.fc( + input=_input_layer, + size=self.dnn_dims[1], + param_attr=ParamAttr(name='%s_fc.w' % prefix), + bias_attr=ParamAttr(name="%s_fc.b" % prefix)) return fc ``` @@ -206,7 +216,6 @@ def create_dnn(self, sent_vec, prefix): fc = paddle.layer.fc( input=_input_layer, size=dim, - name=name, act=paddle.activation.Tanh(), param_attr=ParamAttr(name='%s.w' % name), bias_attr=ParamAttr(name='%s.b' % name), @@ -244,9 +253,9 @@ def _build_classification_or_regression_model(self, is_classification): if is_classification else paddle.data_type.dense_input) prefixs = '_ _'.split( - ) if self.share_semantic_generator else 'left right'.split() + ) if self.share_semantic_generator else 'source target'.split() embed_prefixs = '_ _'.split( - ) if self.share_embed else 'left right'.split() + ) if self.share_embed else 'source target'.split() word_vecs = [] for id, input in enumerate([source, target]): @@ -258,16 +267,21 @@ def _build_classification_or_regression_model(self, is_classification): x = self.model_arch_creater(input, prefix=prefixs[id]) semantics.append(x) - concated_vector = paddle.layer.concat(semantics) - prediction = paddle.layer.fc( - input=concated_vector, - size=self.class_num, - act=paddle.activation.Softmax()) - cost = paddle.layer.classification_cost( - input=prediction, - label=label) if is_classification else paddle.layer.mse_cost( - prediction, label) - return cost, prediction, label + if is_classification: + concated_vector = paddle.layer.concat(semantics) + prediction = paddle.layer.fc( + input=concated_vector, + size=self.class_num, + act=paddle.activation.Softmax()) + cost = paddle.layer.classification_cost( + input=prediction, label=label) + else: + prediction = paddle.layer.cos_sim(*semantics) + cost = paddle.layer.square_error_cost(prediction, label) + + if not self.is_infer: + return cost, prediction, label + return prediction ``` ### Pairwise Rank实现 Pairwise Rank复用上面的DNN结构,同一个source对两个target求相似度打分, @@ -297,7 +311,7 @@ def _build_rank_model(self): name='label_input', type=paddle.data_type.integer_value(1)) prefixs = '_ _ _'.split( - ) if self.share_semantic_generator else 'source left right'.split() + ) if self.share_semantic_generator else 'source target target'.split() embed_prefixs = '_ _'.split( ) if self.share_embed else 'source target target'.split() diff --git a/dssm/network_conf.py b/dssm/network_conf.py index 10c8974f24f98ce05eb93c85559152f5eb274d60..8e45ef81d9408a597aa44af8521429c26f1e2697 100644 --- a/dssm/network_conf.py +++ b/dssm/network_conf.py @@ -96,14 +96,24 @@ class DSSM(object): ''' _input_layer = paddle.layer.pooling( input=emb, pooling_type=paddle.pooling.Max()) - fc = paddle.layer.fc(input=_input_layer, size=self.dnn_dims[1]) + fc = paddle.layer.fc( + input=_input_layer, + size=self.dnn_dims[1], + param_attr=ParamAttr(name='%s_fc.w' % prefix), + bias_attr=ParamAttr(name="%s_fc.b" % prefix)) return fc def create_rnn(self, emb, prefix=''): ''' A GRU sentence vector learner. ''' - gru = paddle.networks.simple_gru(input=emb, size=256) + gru = paddle.networks.simple_gru( + input=emb, + size=self.dnn_dims[1], + mixed_param_attr=ParamAttr(name='%s_gru_mixed.w' % prefix), + mixed_bias_param_attr=ParamAttr(name="%s_gru_mixed.b" % prefix), + gru_param_attr=ParamAttr(name='%s_gru.w' % prefix), + gru_bias_attr=ParamAttr(name="%s_gru.b" % prefix)) sent_vec = paddle.layer.last_seq(gru) return sent_vec @@ -147,7 +157,6 @@ class DSSM(object): logger.info("create fc layer [%s] which dimention is %d" % (name, dim)) fc = paddle.layer.fc( - name=name, input=_input_layer, size=dim, act=paddle.activation.Tanh(), @@ -195,7 +204,7 @@ class DSSM(object): name='label_input', type=paddle.data_type.integer_value(1)) prefixs = '_ _ _'.split( - ) if self.share_semantic_generator else 'source left right'.split() + ) if self.share_semantic_generator else 'source target target'.split() embed_prefixs = '_ _'.split( ) if self.share_embed else 'source target target'.split() @@ -249,9 +258,9 @@ class DSSM(object): if is_classification else paddle.data_type.dense_vector(1)) prefixs = '_ _'.split( - ) if self.share_semantic_generator else 'left right'.split() + ) if self.share_semantic_generator else 'source target'.split() embed_prefixs = '_ _'.split( - ) if self.share_embed else 'left right'.split() + ) if self.share_embed else 'source target'.split() word_vecs = [] for id, input in enumerate([source, target]):