提交 27730332 编写于 作者: 一米半's avatar 一米半 提交者: Yibing Liu

fix double softmax ,fix test function and change default config of pairwise (#2303)

上级 1229fb14
...@@ -3,6 +3,7 @@ softmax loss ...@@ -3,6 +3,7 @@ softmax loss
""" """
import sys import sys
import paddle.fluid as fluid
sys.path.append("../../../") sys.path.append("../../../")
import models.matching.paddle_layers as layers import models.matching.paddle_layers as layers
...@@ -23,8 +24,7 @@ class SoftmaxCrossEntropyLoss(object): ...@@ -23,8 +24,7 @@ class SoftmaxCrossEntropyLoss(object):
""" """
compute loss compute loss
""" """
softmax_with_cross_entropy = layers.SoftmaxWithCrossEntropyLayer()
reduce_mean = layers.ReduceMeanLayer() reduce_mean = layers.ReduceMeanLayer()
cost = softmax_with_cross_entropy.ops(input, label) cost = fluid.layers.cross_entropy(input=input, label=label)
avg_cost = reduce_mean.ops(cost) avg_cost = reduce_mean.ops(cost)
return avg_cost return avg_cost
...@@ -49,10 +49,10 @@ class MMDNN(object): ...@@ -49,10 +49,10 @@ class MMDNN(object):
input=input, input=input,
size=[self.vocab_size, self.emb_size], size=[self.vocab_size, self.emb_size],
padding_idx=(0 if zero_pad else None), padding_idx=(0 if zero_pad else None),
param_attr=fluid.ParamAttr(name="word_embedding", param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Xavier())) name="word_embedding", initializer=fluid.initializer.Xavier()))
if scale: if scale:
emb = emb * (self.emb_size ** 0.5) emb = emb * (self.emb_size**0.5)
return emb return emb
def bi_dynamic_lstm(self, input, hidden_size): def bi_dynamic_lstm(self, input, hidden_size):
...@@ -64,7 +64,9 @@ class MMDNN(object): ...@@ -64,7 +64,9 @@ class MMDNN(object):
param_attr=fluid.ParamAttr(name="fw_fc.w"), param_attr=fluid.ParamAttr(name="fw_fc.w"),
bias_attr=False) bias_attr=False)
forward, _ = fluid.layers.dynamic_lstm( forward, _ = fluid.layers.dynamic_lstm(
input=fw_in_proj, size=4 * hidden_size, is_reverse=False, input=fw_in_proj,
size=4 * hidden_size,
is_reverse=False,
param_attr=fluid.ParamAttr(name="forward_lstm.w"), param_attr=fluid.ParamAttr(name="forward_lstm.w"),
bias_attr=fluid.ParamAttr(name="forward_lstm.b")) bias_attr=fluid.ParamAttr(name="forward_lstm.b"))
...@@ -73,7 +75,9 @@ class MMDNN(object): ...@@ -73,7 +75,9 @@ class MMDNN(object):
param_attr=fluid.ParamAttr(name="rv_fc.w"), param_attr=fluid.ParamAttr(name="rv_fc.w"),
bias_attr=False) bias_attr=False)
reverse, _ = fluid.layers.dynamic_lstm( reverse, _ = fluid.layers.dynamic_lstm(
input=rv_in_proj, size=4 * hidden_size, is_reverse=True, input=rv_in_proj,
size=4 * hidden_size,
is_reverse=True,
param_attr=fluid.ParamAttr(name="reverse_lstm.w"), param_attr=fluid.ParamAttr(name="reverse_lstm.w"),
bias_attr=fluid.ParamAttr(name="reverse_lstm.b")) bias_attr=fluid.ParamAttr(name="reverse_lstm.b"))
return [forward, reverse] return [forward, reverse]
...@@ -96,7 +100,7 @@ class MMDNN(object): ...@@ -96,7 +100,7 @@ class MMDNN(object):
if mask is not None: if mask is not None:
cross_mask = fluid.layers.stack(x=[mask] * self.kernel_size, axis=1) cross_mask = fluid.layers.stack(x=[mask] * self.kernel_size, axis=1)
conv = cross_mask * conv + (1 - cross_mask) * (-2 ** 32 + 1) conv = cross_mask * conv + (1 - cross_mask) * (-2**32 + 1)
# valid padding # valid padding
pool = fluid.layers.pool2d( pool = fluid.layers.pool2d(
input=conv, input=conv,
...@@ -157,6 +161,8 @@ class MMDNN(object): ...@@ -157,6 +161,8 @@ class MMDNN(object):
act="tanh", act="tanh",
size=self.hidden_size) size=self.hidden_size)
pred = fluid.layers.fc(input=relu_hid1, size=self.out_size) pred = fluid.layers.fc(input=relu_hid1,
size=self.out_size,
act="softmax")
return left_seq_encoder, pred return left_seq_encoder, pred
...@@ -3,13 +3,13 @@ ...@@ -3,13 +3,13 @@
### 任务说明 ### 任务说明
短文本语义匹配(SimilarityNet, SimNet)是一个计算短文本相似度的框架,可以根据用户输入的两个文本,计算出相似度得分。SimNet框架在百度各产品上广泛应用,主要包括BOW、CNN、RNN、MMDNN等核心网络结构形式,提供语义相似度计算训练和预测框架,适用于信息检索、新闻推荐、智能客服等多个应用场景,帮助企业解决语义匹配问题。可通过[AI开放平台-短文本相似度](https://ai.baidu.com/tech/nlp_basic/simnet)线上体验。 短文本语义匹配(SimilarityNet, SimNet)是一个计算短文本相似度的框架,可以根据用户输入的两个文本,计算出相似度得分。SimNet框架在百度各产品上广泛应用,主要包括BOW、CNN、RNN、MMDNN等核心网络结构形式,提供语义相似度计算训练和预测框架,适用于信息检索、新闻推荐、智能客服等多个应用场景,帮助企业解决语义匹配问题。可通过[AI开放平台-短文本相似度](https://ai.baidu.com/tech/nlp_basic/simnet)线上体验。
### 效果说明 ### 效果说明
基于百度海量搜索数据,我们训练了一个SimNet-BOW-Pairwise语义匹配模型,在一些真实的FAQ问答场景中,该模型效果比基于字面的相似度方法AUC提升5%以上,我们基于百度自建测试集(包含聊天、客服等数据集)和语义匹配数据集(LCQMC)进行评测,效果如下表所示。LCQMC数据集以Accuracy为评测指标,而pairwise模型的输出为相似度,因此我们采用0.91作为分类阈值,相比于基线模型中网络结构同等复杂的CBOW模型(准确率为0.737),我们模型的准确率为0.7517 基于百度海量搜索数据,我们训练了一个SimNet-BOW-Pairwise语义匹配模型,在一些真实的FAQ问答场景中,该模型效果比基于字面的相似度方法AUC提升5%以上,我们基于百度自建测试集(包含聊天、客服等数据集)和语义匹配数据集(LCQMC)进行评测,效果如下表所示。LCQMC数据集以Accuracy为评测指标,而pairwise模型的输出为相似度,因此我们采用0.958作为分类阈值,相比于基线模型中网络结构同等复杂的CBOW模型(准确率为0.737),我们模型的准确率为0.7532
| 模型 | 百度知道 | ECOM |QQSIM | UNICOM | LCQMC | | 模型 | 百度知道 | ECOM |QQSIM | UNICOM | LCQMC |
|:-----------:|:-------------:|:-------------:|:-------------:|:-------------:|:-------------:| |:-----------:|:-------------:|:-------------:|:-------------:|:-------------:|:-------------:|
| | AUC | AUC | AUC|正逆序比|Accuracy| | | AUC | AUC | AUC|正逆序比|Accuracy|
|BOW_Pairwise|0.6766|0.7308|0.7643|1.5630|0.7517| |BOW_Pairwise|0.6767|0.7329|0.7650|1.5630|0.7532|
## 快速开始 ## 快速开始
#### 版本依赖 #### 版本依赖
本项目依赖于 Paddlepaddle Fluid 1.3.1,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装。 本项目依赖于 Paddlepaddle Fluid 1.3.1,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装。
......
...@@ -10,8 +10,11 @@ ...@@ -10,8 +10,11 @@
"class_name": "SoftmaxCrossEntropyLoss" "class_name": "SoftmaxCrossEntropyLoss"
}, },
"optimizer": { "optimizer": {
"class_name": "SGDOptimizer", "class_name": "AdamOptimizer",
"learning_rate" : 0.001 "learning_rate": 0.001,
"beta1": 0.9,
"beta2": 0.999,
"epsilon": 1e-08
}, },
"task_mode": "pointwise", "task_mode": "pointwise",
"model_path": "bow_pointwise" "model_path": "bow_pointwise"
......
...@@ -12,8 +12,11 @@ ...@@ -12,8 +12,11 @@
"class_name": "SoftmaxCrossEntropyLoss" "class_name": "SoftmaxCrossEntropyLoss"
}, },
"optimizer": { "optimizer": {
"class_name": "SGDOptimizer", "class_name": "AdamOptimizer",
"learning_rate" : 0.001 "learning_rate": 0.001,
"beta1": 0.9,
"beta2": 0.999,
"epsilon": 1e-08
}, },
"task_mode": "pointwise", "task_mode": "pointwise",
"model_path": "cnn_pointwise" "model_path": "cnn_pointwise"
......
...@@ -11,8 +11,11 @@ ...@@ -11,8 +11,11 @@
"class_name": "SoftmaxCrossEntropyLoss" "class_name": "SoftmaxCrossEntropyLoss"
}, },
"optimizer": { "optimizer": {
"class_name": "SGDOptimizer", "class_name": "AdamOptimizer",
"learning_rate" : 0.001 "learning_rate" : 0.001,
"beta1": 0.9,
"beta2": 0.999,
"epsilon": 1e-08
}, },
"task_mode": "pointwise", "task_mode": "pointwise",
"model_path": "gru_pointwise" "model_path": "gru_pointwise"
......
...@@ -11,8 +11,11 @@ ...@@ -11,8 +11,11 @@
"class_name": "SoftmaxCrossEntropyLoss" "class_name": "SoftmaxCrossEntropyLoss"
}, },
"optimizer": { "optimizer": {
"class_name": "SGDOptimizer", "class_name": "AdamOptimizer",
"learning_rate" : 0.001 "learning_rate": 0.001,
"beta1": 0.9,
"beta2": 0.999,
"epsilon": 1e-08
}, },
"task_mode": "pointwise", "task_mode": "pointwise",
"model_path": "lstm_pointwise" "model_path": "lstm_pointwise"
......
...@@ -38,7 +38,7 @@ train() { ...@@ -38,7 +38,7 @@ train() {
--save_steps 1000 \ --save_steps 1000 \
--validation_steps 100 \ --validation_steps 100 \
--compute_accuracy False \ --compute_accuracy False \
--lamda 0.91 \ --lamda 0.958 \
--task_mode ${TASK_MODE} --task_mode ${TASK_MODE}
} }
#run_evaluate #run_evaluate
...@@ -55,7 +55,7 @@ evaluate() { ...@@ -55,7 +55,7 @@ evaluate() {
--vocab_path ${VOCAB_PATH} \ --vocab_path ${VOCAB_PATH} \
--task_mode ${TASK_MODE} \ --task_mode ${TASK_MODE} \
--compute_accuracy False \ --compute_accuracy False \
--lamda 0.91 \ --lamda 0.958 \
--init_checkpoint ${INIT_CHECKPOINT} --init_checkpoint ${INIT_CHECKPOINT}
} }
# run_infer # run_infer
......
...@@ -11,7 +11,6 @@ import six ...@@ -11,7 +11,6 @@ import six
import numpy as np import numpy as np
import logging import logging
import logging.handlers import logging.handlers
""" """
******functions for file processing****** ******functions for file processing******
""" """
...@@ -165,7 +164,11 @@ def print_arguments(args): ...@@ -165,7 +164,11 @@ def print_arguments(args):
print('------------------------------------------------') print('------------------------------------------------')
def init_log(log_path, level=logging.INFO, when="D", backup=7, def init_log(
log_path,
level=logging.INFO,
when="D",
backup=7,
format="%(levelname)s: %(asctime)s - %(filename)s:%(lineno)d * %(thread)d %(message)s", format="%(levelname)s: %(asctime)s - %(filename)s:%(lineno)d * %(thread)d %(message)s",
datefmt=None): datefmt=None):
""" """
...@@ -209,16 +212,14 @@ def init_log(log_path, level=logging.INFO, when="D", backup=7, ...@@ -209,16 +212,14 @@ def init_log(log_path, level=logging.INFO, when="D", backup=7,
if not os.path.isdir(dir): if not os.path.isdir(dir):
os.makedirs(dir) os.makedirs(dir)
handler = logging.handlers.TimedRotatingFileHandler(log_path + ".log", handler = logging.handlers.TimedRotatingFileHandler(
when=when, log_path + ".log", when=when, backupCount=backup)
backupCount=backup)
handler.setLevel(level) handler.setLevel(level)
handler.setFormatter(formatter) handler.setFormatter(formatter)
logger.addHandler(handler) logger.addHandler(handler)
handler = logging.handlers.TimedRotatingFileHandler(log_path + ".log.wf", handler = logging.handlers.TimedRotatingFileHandler(
when=when, log_path + ".log.wf", when=when, backupCount=backup)
backupCount=backup)
handler.setLevel(logging.WARNING) handler.setLevel(logging.WARNING)
handler.setFormatter(formatter) handler.setFormatter(formatter)
logger.addHandler(handler) logger.addHandler(handler)
...@@ -241,7 +242,7 @@ def get_level(): ...@@ -241,7 +242,7 @@ def get_level():
return logger.level return logger.level
def get_accuracy(preds, labels, mode, lamda=0.91): def get_accuracy(preds, labels, mode, lamda=0.958):
""" """
compute accuracy compute accuracy
""" """
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册