提交 c138701d 编写于 作者: G guosheng

add hapi/simnet_model

上级 4f3aebce
...@@ -3,16 +3,6 @@ ...@@ -3,16 +3,6 @@
### 任务说明 ### 任务说明
短文本语义匹配(SimilarityNet, SimNet)是一个计算短文本相似度的框架,可以根据用户输入的两个文本,计算出相似度得分。SimNet框架在百度各产品上广泛应用,主要包括BOW、CNN、RNN、MMDNN等核心网络结构形式,提供语义相似度计算训练和预测框架,适用于信息检索、新闻推荐、智能客服等多个应用场景,帮助企业解决语义匹配问题。 短文本语义匹配(SimilarityNet, SimNet)是一个计算短文本相似度的框架,可以根据用户输入的两个文本,计算出相似度得分。SimNet框架在百度各产品上广泛应用,主要包括BOW、CNN、RNN、MMDNN等核心网络结构形式,提供语义相似度计算训练和预测框架,适用于信息检索、新闻推荐、智能客服等多个应用场景,帮助企业解决语义匹配问题。
### 效果说明
基于百度海量搜索数据,我们训练了一个SimNet-BOW-Pairwise语义匹配模型,在一些真实的FAQ问答场景中,该模型效果比基于字面的相似度方法AUC提升5%以上,我们基于百度自建测试集(包含聊天、客服等数据集)和进行评测,效果如下表所示。
| 模型 | 百度知道 | ECOM |QQSIM | UNICOM |
|:-----------:|:-------------:|:-------------:|:-------------:|:-------------:|
| | AUC | AUC | AUC|正逆序比|
|BOW_Pairwise|0.6815|0.7331|0.7638|1.5565|
#### 测试集说明 #### 测试集说明
| 数据集 | 来源 | 垂类 | | 数据集 | 来源 | 垂类 |
|:-----------:|:-------------:|:-------------:| |:-----------:|:-------------:|:-------------:|
...@@ -29,9 +19,9 @@ ...@@ -29,9 +19,9 @@
#### 安装代码 #### 安装代码
克隆工具集代码库到本地 克隆工具集代码库到本地
```shell ```shell
git clone https://github.com/PaddlePaddle/models.git git clone https://github.com/PaddlePaddle/hapi.git
cd models/dygraph/similarity_net cd hapi/examples/similarity_net
``` ```
#### 数据准备 #### 数据准备
下载经过预处理的数据,运行命令后,data目录下会存在训练集数据示例、测试集数据示例,以及对应词索引字典(term2id.dict)。 下载经过预处理的数据,运行命令后,data目录下会存在训练集数据示例、测试集数据示例,以及对应词索引字典(term2id.dict)。
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
""" """
bow class bow class
""" """
import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear, Layer, Embedding from paddle.fluid.dygraph import Linear, Layer, Embedding
from paddle.incubate.hapi.model import Model from paddle.incubate.hapi.model import Model
...@@ -25,11 +26,10 @@ class BOWEncoder(Layer): ...@@ -25,11 +26,10 @@ class BOWEncoder(Layer):
simple BOWEncoder for simnet simple BOWEncoder for simnet
""" """
def __init__(self, dict_size, bow_dim, seq_len, emb_dim, padding_idx): def __init__(self, dict_size, bow_dim, emb_dim, padding_idx):
super(BOWEncoder, self).__init__() super(BOWEncoder, self).__init__()
self.dict_size = dict_size self.dict_size = dict_size
self.bow_dim = bow_dim self.bow_dim = bow_dim
self.seq_len = seq_len
self.emb_dim = emb_dim self.emb_dim = emb_dim
self.padding_idx = padding_idx self.padding_idx = padding_idx
self.emb_layer = Embedding( self.emb_layer = Embedding(
...@@ -41,28 +41,20 @@ class BOWEncoder(Layer): ...@@ -41,28 +41,20 @@ class BOWEncoder(Layer):
def forward(self, input): def forward(self, input):
emb = self.emb_layer(input) emb = self.emb_layer(input)
emb_reshape = fluid.layers.reshape( bow_emb = fluid.layers.reduce_sum(emb, dim=1)
emb, shape=[-1, self.seq_len, self.bow_dim])
bow_emb = fluid.layers.reduce_sum(emb_reshape, dim=1)
return bow_emb return bow_emb
class Pair_BOWModel(Model): class Pair_BOWModel(Model):
"""
classify model
"""
def __init__(self, conf_dict): def __init__(self, conf_dict):
super(Pair_BOWModel, self).__init__() super(Pair_BOWModel, self).__init__()
self.dict_size = conf_dict["dict_size"] self.dict_size = conf_dict["dict_size"]
self.task_mode = conf_dict["task_mode"]
self.emb_dim = conf_dict["net"]["emb_dim"] self.emb_dim = conf_dict["net"]["emb_dim"]
self.bow_dim = conf_dict["net"]["bow_dim"] self.bow_dim = conf_dict["net"]["bow_dim"]
self.seq_len = conf_dict["seq_len"]
self.padding_idx = None self.padding_idx = None
self.emb_layer = BOWEncoder(self.dict_size, self.bow_dim, self.seq_len, self.emb_layer = BOWEncoder(self.dict_size, self.bow_dim, self.emb_dim,
self.emb_dim, self.padding_idx) self.padding_idx)
self.bow_layer = Linear( self.bow_layer = Linear(
input_dim=self.bow_dim, output_dim=self.bow_dim) input_dim=self.bow_dim, output_dim=self.bow_dim)
...@@ -83,21 +75,15 @@ class Pair_BOWModel(Model): ...@@ -83,21 +75,15 @@ class Pair_BOWModel(Model):
class Point_BOWModel(Model): class Point_BOWModel(Model):
"""
classify model
"""
def __init__(self, conf_dict): def __init__(self, conf_dict):
super(Point_BOWModel, self).__init__() super(Point_BOWModel, self).__init__()
self.dict_size = conf_dict["dict_size"] self.dict_size = conf_dict["dict_size"]
self.task_mode = conf_dict["task_mode"]
self.emb_dim = conf_dict["net"]["emb_dim"] self.emb_dim = conf_dict["net"]["emb_dim"]
self.bow_dim = conf_dict["net"]["bow_dim"] self.bow_dim = conf_dict["net"]["bow_dim"]
self.seq_len = conf_dict["seq_len"]
self.padding_idx = None self.padding_idx = None
self.emb_layer = BOWEncoder(self.dict_size, self.bow_dim, self.seq_len, self.emb_layer = BOWEncoder(self.dict_size, self.bow_dim, self.emb_dim,
self.emb_dim, self.padding_idx) self.padding_idx)
self.bow_layer_po = Linear( self.bow_layer_po = Linear(
input_dim=self.bow_dim * 2, output_dim=self.bow_dim) input_dim=self.bow_dim * 2, output_dim=self.bow_dim)
self.softmax_layer = Linear( self.softmax_layer = Linear(
......
...@@ -15,27 +15,27 @@ ...@@ -15,27 +15,27 @@
cnn class cnn class
""" """
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear, Layer, Conv2D, Pool2D from paddle.fluid.dygraph import Linear, Layer, Conv2D, Pool2D, Embedding
from paddle.incubate.hapi.model import Model from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.text.text import CNNEncoder from paddle.incubate.hapi.text.text import CNNEncoder
class Pair_CNNModel(Model): class Pair_CNNModel(Model):
"""
classify model
"""
def __init__(self, conf_dict): def __init__(self, conf_dict):
super(Pair_CNNModel, self).__init__() super(Pair_CNNModel, self).__init__()
self.dict_size = conf_dict["dict_size"] self.dict_size = conf_dict["dict_size"]
self.task_mode = conf_dict["task_mode"]
self.emb_dim = conf_dict["net"]["emb_dim"] self.emb_dim = conf_dict["net"]["emb_dim"]
self.filter_size = conf_dict["net"]["filter_size"] self.filter_size = conf_dict["net"]["filter_size"]
self.num_filters = conf_dict["net"]["num_filters"] self.num_filters = conf_dict["net"]["num_filters"]
self.hidden_dim = conf_dict["net"]["hidden_dim"] self.hidden_dim = conf_dict["net"]["hidden_dim"]
self.seq_len = conf_dict["seq_len"]
self.padding_idx = None self.padding_idx = None
#layers
self.emb_layer = Embedding(
size=[self.dict_size, self.emb_dim],
is_sparse=True,
padding_idx=self.padding_idx,
param_attr=fluid.ParamAttr(
name='emb', initializer=fluid.initializer.Xavier()))
self.encoder_layer = CNNEncoder( self.encoder_layer = CNNEncoder(
num_channels=1, num_channels=1,
num_filters=self.num_filters, num_filters=self.num_filters,
...@@ -44,24 +44,18 @@ class Pair_CNNModel(Model): ...@@ -44,24 +44,18 @@ class Pair_CNNModel(Model):
layer_num=1, layer_num=1,
act='relu') act='relu')
self.fc_layer = Linear( self.fc_layer = Linear(
input_dim=self.num_filters * self.seq_len, input_dim=self.num_filters, output_dim=self.hidden_dim)
output_dim=self.hidden_dim)
self.fc_layer_po = Linear(
input_dim=self.num_filters * self.seq_len * 2,
output_dim=self.hidden_dim)
self.softmax_layer = Linear(
input_dim=self.hidden_dim, output_dim=2, act='softmax')
def forward(self, left, pos_right, neg_right): def forward(self, left, pos_right, neg_right):
left = fluid.layers.reshape( left = self.emb_layer(left)
left, shape=[-1, self.seq_len, self.hidden_dim]) pos_right = self.emb_layer(pos_right)
pos_right = fluid.layers.reshape( neg_right = self.emb_layer(neg_right)
pos_right, shape=[-1, self.seq_len, self.hidden_dim])
neg_right = fluid.layers.reshape(
neg_right, shape=[-1, self.seq_len, self.hidden_dim])
left_cnn = self.encoder_layer(left) left_cnn = self.encoder_layer(left)
left_cnn = fluid.layers.transpose(left_cnn, perm=[0, 2, 1])
pos_right_cnn = self.encoder_layer(pos_right) pos_right_cnn = self.encoder_layer(pos_right)
pos_right_cnn = fluid.layers.transpose(pos_right_cnn, perm=[0, 2, 1])
neg_right_cnn = self.encoder_layer(neg_right) neg_right_cnn = self.encoder_layer(neg_right)
neg_right_cnn = fluid.layers.transpose(neg_right_cnn, perm=[0, 2, 1])
left_fc = self.fc_layer(left_cnn) left_fc = self.fc_layer(left_cnn)
pos_right_fc = self.fc_layer(pos_right_cnn) pos_right_fc = self.fc_layer(pos_right_cnn)
neg_right_fc = self.fc_layer(neg_right_cnn) neg_right_fc = self.fc_layer(neg_right_cnn)
...@@ -71,10 +65,6 @@ class Pair_CNNModel(Model): ...@@ -71,10 +65,6 @@ class Pair_CNNModel(Model):
class Point_CNNModel(Model): class Point_CNNModel(Model):
"""
classify model
"""
def __init__(self, conf_dict): def __init__(self, conf_dict):
super(Point_CNNModel, self).__init__() super(Point_CNNModel, self).__init__()
self.dict_size = conf_dict["dict_size"] self.dict_size = conf_dict["dict_size"]
...@@ -85,7 +75,13 @@ class Point_CNNModel(Model): ...@@ -85,7 +75,13 @@ class Point_CNNModel(Model):
self.hidden_dim = conf_dict["net"]["hidden_dim"] self.hidden_dim = conf_dict["net"]["hidden_dim"]
self.seq_len = conf_dict["seq_len"] self.seq_len = conf_dict["seq_len"]
self.padding_idx = None self.padding_idx = None
#layers
self.emb_layer = Embedding(
size=[self.dict_size, self.emb_dim],
is_sparse=True,
padding_idx=self.padding_idx,
param_attr=fluid.ParamAttr(
name='emb', initializer=fluid.initializer.Xavier()))
self.encoder_layer = CNNEncoder( self.encoder_layer = CNNEncoder(
num_channels=1, num_channels=1,
num_filters=self.num_filters, num_filters=self.num_filters,
...@@ -93,22 +89,19 @@ class Point_CNNModel(Model): ...@@ -93,22 +89,19 @@ class Point_CNNModel(Model):
pool_size=1, pool_size=1,
layer_num=1, layer_num=1,
act='relu') act='relu')
self.fc_layer = Linear(
input_dim=self.num_filters * self.seq_len,
output_dim=self.hidden_dim)
self.fc_layer_po = Linear( self.fc_layer_po = Linear(
input_dim=self.num_filters * self.seq_len * 2, input_dim=self.num_filters * 2, output_dim=self.hidden_dim)
output_dim=self.hidden_dim)
self.softmax_layer = Linear( self.softmax_layer = Linear(
input_dim=self.hidden_dim, output_dim=2, act='softmax') input_dim=self.hidden_dim, output_dim=2, act='softmax')
def forward(self, left, right): def forward(self, left, right):
left = fluid.layers.reshape( left = self.emb_layer(left)
left, shape=[-1, self.seq_len, self.hidden_dim]) right = self.emb_layer(right)
right = fluid.layers.reshape(
right, shape=[-1, self.seq_len, self.hidden_dim])
left_cnn = self.encoder_layer(left) left_cnn = self.encoder_layer(left)
left_cnn = fluid.layers.transpose(left_cnn, perm=[0, 2, 1])
right_cnn = self.encoder_layer(right) right_cnn = self.encoder_layer(right)
right_cnn = fluid.layers.transpose(right_cnn, perm=[0, 2, 1])
concat = fluid.layers.concat([left_cnn, right_cnn], axis=1) concat = fluid.layers.concat([left_cnn, right_cnn], axis=1)
concat_fc = self.fc_layer_po(concat) concat_fc = self.fc_layer_po(concat)
pred = self.softmax_layer(concat_fc) pred = self.softmax_layer(concat_fc)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
gru class
"""
import numpy as np
from paddle.fluid.dygraph import Layer, to_variable, Embedding, Linear, GRUUnit
import paddle.fluid as fluid
from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.text.text import RNN, BasicGRUCell
class GRUEncoder(Layer):
def __init__(self, dict_size, emb_dim, gru_dim, hidden_dim, padding_idx):
super(GRUEncoder, self).__init__()
self.dict_size = dict_size
self.emb_dim = emb_dim
self.gru_dim = gru_dim
self.hidden_dim = hidden_dim
self.padding_idx = padding_idx
self.emb_layer = Embedding(
size=[self.dict_size, self.emb_dim],
is_sparse=True,
padding_idx=self.padding_idx,
param_attr=fluid.ParamAttr(
name='emb', initializer=fluid.initializer.Xavier()))
cell = BasicGRUCell(
input_size=self.gru_dim * 3, hidden_size=self.hidden_dim)
self.gru_layer = RNN(cell=cell)
self.proj_layer = Linear(
input_dim=self.hidden_dim, output_dim=self.gru_dim * 3)
def forward(self, input):
emb = self.emb_layer(input)
emb_proj = self.proj_layer(emb)
gru, _ = self.gru_layer(emb_proj)
gru = fluid.layers.reduce_max(gru, dim=1)
gru = fluid.layers.tanh(gru)
return gru
class Pair_GRUModel(Model):
def __init__(self, conf_dict):
super(Pair_GRUModel, self).__init__()
self.dict_size = conf_dict["dict_size"]
self.task_mode = conf_dict["task_mode"]
self.emb_dim = conf_dict["net"]["emb_dim"]
self.gru_dim = conf_dict["net"]["gru_dim"]
self.hidden_dim = conf_dict["net"]["hidden_dim"]
self.padding_idx = None
self.emb_layer = GRUEncoder(self.dict_size, self.emb_dim, self.gru_dim,
self.hidden_dim, self.padding_idx)
self.fc_layer = Linear(
input_dim=self.hidden_dim, output_dim=self.hidden_dim)
def forward(self, left, pos_right, neg_right):
left_emb = self.emb_layer(left)
pos_right_emb = self.emb_layer(pos_right)
neg_right_emb = self.emb_layer(neg_right)
left_fc = self.fc_layer(left_emb)
pos_right_fc = self.fc_layer(pos_right_emb)
neg_right_fc = self.fc_layer(neg_right_emb)
pos_pred = fluid.layers.cos_sim(left_fc, pos_right_fc)
neg_pred = fluid.layers.cos_sim(left_fc, neg_right_fc)
return pos_pred, neg_pred
class Point_GRUModel(Model):
def __init__(self, conf_dict):
super(Point_GRUModel, self).__init__()
self.dict_size = conf_dict["dict_size"]
self.task_mode = conf_dict["task_mode"]
self.emb_dim = conf_dict["net"]["emb_dim"]
self.gru_dim = conf_dict["net"]["gru_dim"]
self.hidden_dim = conf_dict["net"]["hidden_dim"]
self.padding_idx = None
self.emb_layer = GRUEncoder(self.dict_size, self.emb_dim, self.gru_dim,
self.hidden_dim, self.padding_idx)
self.fc_layer_fo = Linear(
input_dim=self.hidden_dim * 2, output_dim=self.hidden_dim)
self.softmax_layer = Linear(
input_dim=self.hidden_dim, output_dim=2, act='softmax')
def forward(self, left, right):
left_emb = self.emb_layer(left)
right_emb = self.emb_layer(right)
concat = fluid.layers.concat([left_emb, right_emb], axis=1)
concat_fc = self.fc_layer_fo(concat)
pred = self.softmax_layer(concat_fc)
return pred
...@@ -18,7 +18,7 @@ hinge loss ...@@ -18,7 +18,7 @@ hinge loss
import sys import sys
sys.path.append("../") sys.path.append("../")
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.incubate.hapi.model import Loss from paddle.incubate.hapi.loss import Loss
class HingeLoss(Loss): class HingeLoss(Loss):
...@@ -34,6 +34,6 @@ class HingeLoss(Loss): ...@@ -34,6 +34,6 @@ class HingeLoss(Loss):
neg, neg.shape, "float32", self.margin) neg, neg.shape, "float32", self.margin)
sub = fluid.layers.elementwise_sub(neg, pos) sub = fluid.layers.elementwise_sub(neg, pos)
add = fluid.layers.elementwise_add(sub, loss_margin) add = fluid.layers.elementwise_add(sub, loss_margin)
loss_max = fluid.layers.elementwise_max(loss, add) max = fluid.layers.elementwise_max(loss, add)
loss_last = fluid.layers.reduce_mean(loss_max) loss_last = fluid.layers.reduce_mean(max)
return loss_last return loss_last
...@@ -18,7 +18,7 @@ log loss ...@@ -18,7 +18,7 @@ log loss
import sys import sys
sys.path.append("../") sys.path.append("../")
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.incubate.hapi.model import Loss from paddle.incubate.hapi.loss import Loss
class LogLoss(Loss): class LogLoss(Loss):
......
...@@ -18,14 +18,24 @@ softmax loss ...@@ -18,14 +18,24 @@ softmax loss
import sys import sys
sys.path.append("../") sys.path.append("../")
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.incubate.hapi.model import Loss from hapi.model import Loss
'''
class SoftmaxCrossEntropyLoss(Loss):
def __init__(self,conf_dict):
super(SoftmaxCrossEntropyLoss,self).__init__()
def forward(self,input,label):
cost=fluid.layers.cross_entropy(input=input,label=label)
avg_cost=fluid.layers.reduce_mean(cost)
return avg_cost
'''
class SoftmxCrossEntropyLoss(Loss):
def __init__(self, conf_dict):
super(SoftmxCrossEntropyLoss, self).__init__()
def forward(self, input, label): class SoftmaxCrossEntropyLoss(Loss):
cost = fluid.layers.cross_entropy(input=input, label=label) def __init__(self, conf_dict, average=True):
avg_cost = fluid.layers.reduce_mean(cost) super(SoftmaxCrossEntropyLoss, self).__init__()
return avg_cost
def forward(self, outputs, labels):
return [
fluid.layers.cross_entropy(o, l) for o, l in zip(outputs, labels)
]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
gru class
"""
import numpy as np
from paddle.fluid.dygraph import Layer, Embedding, Linear
import paddle.fluid as fluid
from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.text.text import BasicLSTMCell, RNN
class LSTMEncoder(Layer):
def __init__(self, dict_size, emb_dim, lstm_dim, hidden_dim, padding_idx):
super(LSTMEncoder, self).__init__()
self.dict_size = dict_size
self.emb_dim = emb_dim
self.lstm_dim = lstm_dim
self.hidden_dim = hidden_dim
self.is_reverse = False
self.padding_idx = padding_idx
self.emb_layer = Embedding(
size=[self.dict_size, self.emb_dim],
is_sparse=True,
padding_idx=self.padding_idx,
param_attr=fluid.ParamAttr(
name='emb', initializer=fluid.initializer.Xavier()))
self.lstm_cell = BasicLSTMCell(
input_size=self.lstm_dim * 4, hidden_size=self.lstm_dim)
self.lstm_layer = RNN(cell=self.lstm_cell,
time_major=True,
is_reverse=self.is_reverse)
self.proj_layer = Linear(
input_dim=self.emb_dim, output_dim=self.lstm_dim * 4)
def forward(self, input):
emb = self.emb_layer(input)
emb_proj = self.proj_layer(emb)
emb_lstm, _ = self.lstm_layer(emb_proj)
emb_reduce = fluid.layers.reduce_max(emb_lstm, dim=1)
emb_out = fluid.layers.tanh(emb_reduce)
return emb_out
class Pair_LSTMModel(Model):
def __init__(self, conf_dict):
super(Pair_LSTMModel, self).__init__()
self.dict_size = conf_dict["dict_size"]
self.emb_dim = conf_dict["net"]["emb_dim"]
self.lstm_dim = conf_dict["net"]["lstm_dim"]
self.hidden_dim = conf_dict["net"]["hidden_dim"]
self.padding_idx = None
self.emb_layer = LSTMEncoder(self.dict_size, self.emb_dim,
self.lstm_dim, self.hidden_dim,
self.padding_idx)
self.fc_layer = Linear(
input_dim=self.hidden_dim, output_dim=self.hidden_dim)
self.fc_layer_po = Linear(
input_dim=self.hidden_dim * 2, output_dim=self.hidden_dim)
self.softmax_layer = Linear(
input_dim=self.hidden_dim, output_dim=2, act='softmax')
def forward(self, left, pos_right, neg_right):
left_emb = self.emb_layer(left)
pos_right_emb = self.emb_layer(pos_right)
neg_right_emb = self.emb_layer(neg_right)
left_fc = self.fc_layer(left_emb)
pos_right_fc = self.fc_layer(pos_right_emb)
neg_right_fc = self.fc_layer(neg_right_emb)
pos_pred = fluid.layers.cos_sim(left_fc, pos_right_fc)
neg_pred = fluid.layers.cos_sim(left_fc, neg_right_fc)
return pos_pred, neg_pred
class Point_LSTMModel(Model):
def __init__(self, conf_dict):
super(Point_LSTMModel, self).__init__()
self.dict_size = conf_dict["dict_size"]
self.task_mode = conf_dict["task_mode"]
self.emb_dim = conf_dict["net"]["emb_dim"]
self.lstm_dim = conf_dict["net"]["lstm_dim"]
self.hidden_dim = conf_dict["net"]["hidden_dim"]
self.padding_idx = None
self.emb_layer = LSTMEncoder(self.dict_size, self.emb_dim,
self.lstm_dim, self.hidden_dim,
self.padding_idx)
self.fc_layer = Linear(
input_dim=self.hidden_dim, output_dim=self.hidden_dim)
self.fc_layer_po = Linear(
input_dim=self.hidden_dim * 2, output_dim=self.hidden_dim)
self.softmax_layer = Linear(
input_dim=self.hidden_dim, output_dim=2, act='softmax')
def forward(self, left, right):
left_emb = self.emb_layer(left)
right_emb = self.emb_layer(right)
concat = fluid.layers.concat([left_emb, right_emb], axis=1)
concat_fc = self.fc_layer_po(concat)
pred = self.softmax_layer(concat_fc)
return pred
...@@ -184,7 +184,7 @@ class SimNetProcessor(object): ...@@ -184,7 +184,7 @@ class SimNetProcessor(object):
query = self.padding_text(query) query = self.padding_text(query)
title = self.padding_text(title) title = self.padding_text(title)
lebel = int(label) label = int(label)
yield [query, title, label] yield [query, title, label]
else: else:
...@@ -246,7 +246,7 @@ class SimNetProcessor(object): ...@@ -246,7 +246,7 @@ class SimNetProcessor(object):
yield [query, title] yield [query, title]
def get_infer_pairdata(self): def get_infer_data(self):
""" """
get infer data get infer data
""" """
......
...@@ -15,7 +15,7 @@ INFER_RESULT_PATH=./infer_result ...@@ -15,7 +15,7 @@ INFER_RESULT_PATH=./infer_result
TASK_MODE='pairwise' TASK_MODE='pairwise'
CONFIG_PATH=./config/bow_pairwise.json CONFIG_PATH=./config/bow_pairwise.json
INIT_CHECKPOINT=./model_files/bow_pairwise/200 INIT_CHECKPOINT=./model_files/bow_pairwise/20
...@@ -36,9 +36,9 @@ train() { ...@@ -36,9 +36,9 @@ train() {
--config_path ${CONFIG_PATH} \ --config_path ${CONFIG_PATH} \
--vocab_path ${VOCAB_PATH} \ --vocab_path ${VOCAB_PATH} \
--epoch 40 \ --epoch 40 \
--save_steps 2000 \ --save_steps 10 \
--validation_steps 200 \ --validation_steps 2 \
--compute_accuracy False \ --compute_accuracy True \
--lamda 0.958 \ --lamda 0.958 \
--task_mode ${TASK_MODE}\ --task_mode ${TASK_MODE}\
--init_checkpoint "" --init_checkpoint ""
...@@ -49,14 +49,13 @@ evaluate() { ...@@ -49,14 +49,13 @@ evaluate() {
--task_name ${TASK_NAME} \ --task_name ${TASK_NAME} \
--use_cuda false \ --use_cuda false \
--do_test True \ --do_test True \
--verbose_result True \
--batch_size 128 \ --batch_size 128 \
--test_data_dir ${TEST_DATA_PATH} \ --test_data_dir ${TEST_DATA_PATH} \
--test_result_path ${TEST_RESULT_PATH} \ --test_result_path ${TEST_RESULT_PATH} \
--config_path ${CONFIG_PATH} \ --config_path ${CONFIG_PATH} \
--vocab_path ${VOCAB_PATH} \ --vocab_path ${VOCAB_PATH} \
--task_mode ${TASK_MODE} \ --task_mode ${TASK_MODE} \
--compute_accuracy False \ --compute_accuracy True \
--lamda 0.958 \ --lamda 0.958 \
--init_checkpoint ${INIT_CHECKPOINT} --init_checkpoint ${INIT_CHECKPOINT}
} }
......
...@@ -34,20 +34,16 @@ import config ...@@ -34,20 +34,16 @@ import config
from utils import load_vocab, import_class, get_accuracy, ArgConfig, print_arguments from utils import load_vocab, import_class, get_accuracy, ArgConfig, print_arguments
from paddle.incubate.hapi.metrics import Accuracy from paddle.incubate.hapi.metrics import Accuracy
from paddle.incubate.hapi.model import set_device, Model, Input, Loss, CrossEntropy from paddle.incubate.hapi.model import set_device, Model, Input
from paddle.incubate.hapi.loss import Loss
def train(conf_dict, args): # define auc method
device = set_device("cpu") def valid_and_test(pred_list, process, mode):
fluid.enable_dygraph(device)
# load auc method
metric = fluid.metrics.Auc(name="auc")
def valid_and_test(pred_list, process, mode):
""" """
return auc and acc return auc and acc
""" """
metric = fluid.metrics.Auc(name="auc")
pred_list = np.vstack(pred_list) pred_list = np.vstack(pred_list)
if mode == "test": if mode == "test":
label_list = process.get_test_label() label_list = process.get_test_label()
...@@ -55,18 +51,21 @@ def train(conf_dict, args): ...@@ -55,18 +51,21 @@ def train(conf_dict, args):
label_list = process.get_valid_label() label_list = process.get_valid_label()
if args.task_mode == "pairwise": if args.task_mode == "pairwise":
pred_list = (pred_list + 1) / 2 pred_list = (pred_list + 1) / 2
pred_list = np.hstack( pred_list = np.hstack((np.ones_like(pred_list) - pred_list, pred_list))
(np.ones_like(pred_list) - pred_list, pred_list))
metric.reset() metric.reset()
metric.update(pred_list, label_list) metric.update(pred_list, label_list)
auc = metric.eval() auc = metric.eval()
if args.compute_accuracy: if args.compute_accuracy:
acc = get_accuracy(pred_list, label_list, args.task_mode, acc = get_accuracy(pred_list, label_list, args.task_mode, args.lamda)
args.lamda)
return auc, acc return auc, acc
else: else:
return auc return auc
def train(conf_dict, args):
device = set_device("cpu")
fluid.enable_dygraph(device)
# loading vocabulary # loading vocabulary
vocab = load_vocab(args.vocab_path) vocab = load_vocab(args.vocab_path)
# get vocab size # get vocab size
...@@ -120,9 +119,9 @@ def train(conf_dict, args): ...@@ -120,9 +119,9 @@ def train(conf_dict, args):
if args.task_mode == "pairwise": if args.task_mode == "pairwise":
inputs = [ inputs = [
Input( Input(
[None, 1], 'int64', name='input_left'), Input( [None, args.seq_len], 'int64', name='input_left'), Input(
[None, 1], 'int64', name='pos_right'), Input( [None, args.seq_len], 'int64', name='pos_right'), Input(
[None, 1], 'int64', name='neg_right') [None, args.seq_len], 'int64', name='neg_right')
] ]
model.prepare( model.prepare(
...@@ -132,9 +131,11 @@ def train(conf_dict, args): ...@@ -132,9 +131,11 @@ def train(conf_dict, args):
device=device) device=device)
for left, pos_right, neg_right in train_pyreader(): for left, pos_right, neg_right in train_pyreader():
input_left = fluid.layers.reshape(left, shape=[-1, 1]) input_left = fluid.layers.reshape(left, shape=[-1, args.seq_len])
pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) pos_right = fluid.layers.reshape(
neg_right = fluid.layers.reshape(neg_right, shape=[-1, 1]) pos_right, shape=[-1, args.seq_len])
neg_right = fluid.layers.reshape(
neg_right, shape=[-1, args.seq_len])
final_loss = model.train_batch([input_left, pos_right, neg_right]) final_loss = model.train_batch([input_left, pos_right, neg_right])
print("train_steps: %d, train_loss: %f" % print("train_steps: %d, train_loss: %f" %
...@@ -144,26 +145,29 @@ def train(conf_dict, args): ...@@ -144,26 +145,29 @@ def train(conf_dict, args):
if args.do_valid and global_step % args.validation_steps == 0: if args.do_valid and global_step % args.validation_steps == 0:
for left, pos_right, neg_right in valid_pyreader(): for left, pos_right, neg_right in valid_pyreader():
input_left = fluid.layers.reshape(left, shape=[-1, 1]) input_left = fluid.layers.reshape(
pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) left, shape=[-1, args.seq_len])
neg_right = fluid.layers.reshape(neg_right, shape=[-1, 1]) pos_right = fluid.layers.reshape(
pos_right, shape=[-1, args.seq_len])
neg_right = fluid.layers.reshape(
neg_right, shape=[-1, args.seq_len])
result, _ = model.test_batch( result, _ = model.test_batch(
[input_left, pos_right, neg_right]) [input_left, pos_right, neg_right])
pred_list += list(result) pred_list = list(result)
valid_step += 1
valid_step += 1
valid_result = valid_and_test(pred_list, simnet_process, valid_result = valid_and_test(pred_list, simnet_process,
"valid") "valid")
if args.compute_accuracy: if args.compute_accuracy:
valid_auc, valid_acc = valid_result valid_auc, valid_acc = valid_result
print( print(
"valid_steps: %d, valid_auc: %f, valid_acc: %f, valid_loss: %f" "valid_steps: %d, valid_auc: %f, valid_acc: %f, valid_loss: %f"
% (global_step, valid_auc, valid_acc, np.mean(losses))) % (valid_step, valid_auc, valid_acc, np.mean(losses)))
else: else:
valid_auc = valid_result valid_auc = valid_result
print("valid_steps: %d, valid_auc: %f, valid_loss: %f" % print("valid_steps: %d, valid_auc: %f, valid_loss: %f" %
(global_step, valid_auc, np.mean(losses))) (valid_step, valid_auc, np.mean(losses)))
if global_step % args.save_steps == 0: if global_step % args.save_steps == 0:
model_save_dir = os.path.join(args.output_dir, model_save_dir = os.path.join(args.output_dir,
...@@ -177,20 +181,21 @@ def train(conf_dict, args): ...@@ -177,20 +181,21 @@ def train(conf_dict, args):
else: else:
inputs = [ inputs = [
Input( Input(
[None, 1], 'int64', name='left'), Input( [None, args.seq_len], 'int64', name='left'), Input(
[None, 1], 'int64', name='right') [None, args.seq_len], 'int64', name='right')
] ]
label = [Input([None, 1], 'int64', name='neg_right')] label = [Input([None, 1], 'int64', name='neg_right')]
model.prepare( model.prepare(
inputs=inputs, inputs=inputs,
labels=label,
optimizer=optimizer, optimizer=optimizer,
loss_function=loss, loss_function=loss,
device=device) device=device)
for left, right, label in train_pyreader(): for left, right, label in train_pyreader():
left = fluid.layers.reshape(left, shape=[-1, 1]) left = fluid.layers.reshape(left, shape=[-1, args.seq_len])
right = fluid.layers.reshape(right, shape=[-1, 1]) right = fluid.layers.reshape(right, shape=[-1, args.seq_len])
label = fluid.layers.reshape(label, shape=[-1, 1]) label = fluid.layers.reshape(label, shape=[-1, 1])
final_loss = model.train_batch([left, right], [label]) final_loss = model.train_batch([left, right], [label])
...@@ -201,26 +206,27 @@ def train(conf_dict, args): ...@@ -201,26 +206,27 @@ def train(conf_dict, args):
if args.do_valid and global_step % args.validation_steps == 0: if args.do_valid and global_step % args.validation_steps == 0:
for left, right, label in valid_pyreader(): for left, right, label in valid_pyreader():
valid_left = fluid.layers.reshape(left, shape=[-1, 1]) valid_left = fluid.layers.reshape(
valid_right = fluid.layers.reshape(right, shape=[-1, 1]) left, shape=[-1, args.seq_len])
valid_right = fluid.layers.reshape(
right, shape=[-1, args.seq_len])
valid_label = fluid.layers.reshape(label, shape=[-1, 1]) valid_label = fluid.layers.reshape(label, shape=[-1, 1])
result, _ = model.test_batch( result = model.test_batch([valid_left, valid_right])
[valid_left, valid_right, valid_right])
pred_list += list(result) pred_list += list(result)
valid_step += 1
valid_step += 1
valid_result = valid_and_test(pred_list, simnet_process, valid_result = valid_and_test(pred_list, simnet_process,
"valid") "valid")
if args.compute_accuracy: if args.compute_accuracy:
valid_auc, valid_acc = valid_result valid_auc, valid_acc = valid_result
print( print(
"valid_steps: %d, valid_auc: %f, valid_acc: %f, valid_loss: %f" "valid_steps: %d, valid_auc: %f, valid_acc: %f, valid_loss: %f"
% (global_step, valid_auc, valid_acc, np.mean(losses))) % (valid_step, valid_auc, valid_acc, np.mean(losses)))
else: else:
valid_auc = valid_result valid_auc = valid_result
print("valid_steps: %d, valid_auc: %f, valid_loss: %f" % print("valid_steps: %d, valid_auc: %f, valid_loss: %f" %
(global_step, valid_auc, np.mean(losses))) (valid_step, valid_auc, np.mean(losses)))
if global_step % args.save_steps == 0: if global_step % args.save_steps == 0:
model_save_dir = os.path.join(args.output_dir, model_save_dir = os.path.join(args.output_dir,
...@@ -236,31 +242,6 @@ def test(conf_dict, args): ...@@ -236,31 +242,6 @@ def test(conf_dict, args):
device = set_device("cpu") device = set_device("cpu")
fluid.enable_dygraph(device) fluid.enable_dygraph(device)
metric = fluid.metrics.Auc(name="auc")
def valid_and_test(pred_list, process, mode):
"""
return auc and acc
"""
pred_list = np.vstack(pred_list)
if mode == "test":
label_list = process.get_test_label()
elif mode == "valid":
label_list = process.get_valid_label()
if args.task_mode == "pairwise":
pred_list = (pred_list + 1) / 2
pred_list = np.hstack(
(np.ones_like(pred_list) - pred_list, pred_list))
metric.reset()
metric.update(pred_list, label_list)
auc = metric.eval()
if args.compute_accuracy:
acc = get_accuracy(pred_list, label_list, args.task_mode,
args.lamda)
return auc, acc
else:
return auc
# loading vocabulary # loading vocabulary
vocab = load_vocab(args.vocab_path) vocab = load_vocab(args.vocab_path)
# get vocab size # get vocab size
...@@ -286,17 +267,19 @@ def test(conf_dict, args): ...@@ -286,17 +267,19 @@ def test(conf_dict, args):
if args.task_mode == "pairwise": if args.task_mode == "pairwise":
inputs = [ inputs = [
Input( Input(
[None, 1], 'int64', name='input_left'), Input( [None, args.seq_len], 'int64', name='input_left'), Input(
[None, 1], 'int64', name='pos_right'), Input( [None, args.seq_len], 'int64', name='pos_right'), Input(
[None, 1], 'int64', name='pos_right') [None, args.seq_len], 'int64', name='pos_right')
] ]
model.prepare(inputs=inputs, device=device) model.prepare(inputs=inputs, device=device)
for left, pos_right, neg_right in test_pyreader(): for left, pos_right, neg_right in test_pyreader():
input_left = fluid.layers.reshape(left, shape=[-1, 1]) input_left = fluid.layers.reshape(left, shape=[-1, args.seq_len])
pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) pos_right = fluid.layers.reshape(
neg_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) pos_right, shape=[-1, args.seq_len])
neg_right = fluid.layers.reshape(
pos_right, shape=[-1, args.seq_len])
final_pred, _ = model.test_batch( final_pred, _ = model.test_batch(
[input_left, pos_right, neg_right]) [input_left, pos_right, neg_right])
...@@ -315,15 +298,15 @@ def test(conf_dict, args): ...@@ -315,15 +298,15 @@ def test(conf_dict, args):
else: else:
inputs = [ inputs = [
Input( Input(
[None, 1], 'int64', name='left'), Input( [None, args.seq_len], 'int64', name='left'), Input(
[None, 1], 'int64', name='right') [None, args.seq_len], 'int64', name='right')
] ]
model.prepare(inputs=inputs, device=device) model.prepare(inputs=inputs, device=device)
for left, right, label in test_pyreader(): for left, right, label in test_pyreader():
left = fluid.layers.reshape(left, shape=[-1, 1]) left = fluid.layers.reshape(left, shape=[-1, args.seq_len])
right = fluid.layers.reshape(right, shape=[-1, 1]) right = fluid.layers.reshape(right, shape=[-1, args.seq_len])
label = fluid.layers.reshape(label, shape=[-1, 1]) label = fluid.layers.reshape(label, shape=[-1, 1])
final_pred = model.test_batch([left, right]) final_pred = model.test_batch([left, right])
...@@ -368,16 +351,19 @@ def infer(conf_dict, args): ...@@ -368,16 +351,19 @@ def infer(conf_dict, args):
if args.task_mode == "pairwise": if args.task_mode == "pairwise":
inputs = [ inputs = [
Input( Input(
[None, 1], 'int64', name='input_left'), Input( [None, args.seq_len], 'int64', name='input_left'), Input(
[None, 1], 'int64', name='pos_right') [None, args.seq_len], 'int64', name='pos_right'), Input(
[None, args.seq_len], 'int64', name='neg_right')
] ]
model.prepare(inputs=inputs, device=device) model.prepare(inputs=inputs, device=device)
for left, pos_right in infer_pyreader(): for left, pos_right in infer_pyreader():
input_left = fluid.layers.reshape(left, shape=[-1, 1]) input_left = fluid.layers.reshape(left, shape=[-1, args.seq_len])
pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) pos_right = fluid.layers.reshape(
neg_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) pos_right, shape=[-1, args.seq_len])
neg_right = fluid.layers.reshape(
pos_right, shape=[-1, args.seq_len])
final_pred, _ = model.test_batch( final_pred, _ = model.test_batch(
[input_left, pos_right, neg_right]) [input_left, pos_right, neg_right])
...@@ -388,16 +374,15 @@ def infer(conf_dict, args): ...@@ -388,16 +374,15 @@ def infer(conf_dict, args):
else: else:
inputs = [ inputs = [
Input( Input(
[None, 1], 'int64', name='left'), Input( [None, args.seq_len], 'int64', name='left'), Input(
[None, 1], 'int64', name='right') [None, args.seq_len], 'int64', name='right')
] ]
model.prepare(inputs=inputs, device=device) model.prepare(inputs=inputs, device=device)
for left, right in infer_pyreader(): for left, right in infer_pyreader():
left = fluid.layers.reshape(left, shape=[-1, 1]) left = fluid.layers.reshape(left, shape=[-1, args.seq_len])
right = fluid.layers.reshape(right, shape=[-1, 1]) right = fluid.layers.reshape(right, shape=[-1, args.seq_len])
# label = fluid.layers.reshape(label, shape=[-1, 1])
final_pred = model.test_batch([left, right]) final_pred = model.test_batch([left, right])
print(final_pred) print(final_pred)
...@@ -405,8 +390,7 @@ def infer(conf_dict, args): ...@@ -405,8 +390,7 @@ def infer(conf_dict, args):
map(lambda item: str((item[0] + 1) / 2), final_pred)) map(lambda item: str((item[0] + 1) / 2), final_pred))
with io.open(args.infer_result_path, "w", encoding="utf8") as infer_file: with io.open(args.infer_result_path, "w", encoding="utf8") as infer_file:
for _data, _pred in zip(simnet_process.get_infer_data(), for _data, _pred in zip(simnet_process.get_infer_data(), pred_list):
int(pred_list)):
infer_file.write(_data + "\t" + _pred + "\n") infer_file.write(_data + "\t" + _pred + "\n")
...@@ -423,4 +407,5 @@ if __name__ == '__main__': ...@@ -423,4 +407,5 @@ if __name__ == '__main__':
elif args.do_infer: elif args.do_infer:
infer(conf_dict, args) infer(conf_dict, args)
else: else:
raise ValueError("one of do_train and do_infer must be True") raise ValueError(
"one of do_train and do_test and do_infer must be True")
...@@ -27,7 +27,7 @@ import io ...@@ -27,7 +27,7 @@ import io
import pickle import pickle
import warnings import warnings
from functools import partial from functools import partial
from hapi.configure import ArgumentGroup, str2bool from paddle.incubate.hapi.configure import ArgumentGroup, str2bool
""" """
******functions for file processing****** ******functions for file processing******
""" """
...@@ -183,7 +183,8 @@ class ArgConfig(object): ...@@ -183,7 +183,8 @@ class ArgConfig(object):
run_type_g.add_arg("do_train", bool, False, run_type_g.add_arg("do_train", bool, False,
"Whether to perform training.") "Whether to perform training.")
run_type_g.add_arg("do_valid", bool, False, "Whether to perform dev.") run_type_g.add_arg("do_valid", bool, False, "Whether to perform dev.")
#run_type_g.add_arg("do_test", bool, False, "Whether to perform testing.") run_type_g.add_arg("do_test", bool, False,
"Whether to perform testing.")
run_type_g.add_arg("do_infer", bool, False, run_type_g.add_arg("do_infer", bool, False,
"Whether to perform inference.") "Whether to perform inference.")
run_type_g.add_arg("compute_accuracy", bool, False, run_type_g.add_arg("compute_accuracy", bool, False,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册