提交 0fe84bc3 编写于 作者: M mapingshuo

add infersent and sse model

上级 ec23790d
此差异已折叠。
from cdssm import cdssm_base from cdssm import cdssm_base
from dec_att import decatt_glove from dec_att import decatt_glove
from sse import sse_base
from infer_sent import infer_sent_v1
from infer_sent import infer_sent_v2
...@@ -7,7 +7,7 @@ def cdssm_base(): ...@@ -7,7 +7,7 @@ def cdssm_base():
""" """
config = basic_config.config() config = basic_config.config()
config.learning_rate = 0.001 config.learning_rate = 0.001
config.save_dirname = "cdssm_model" config.save_dirname = "model_dir"
config.use_pretrained_word_embedding = True config.use_pretrained_word_embedding = True
config.dict_dim = 40000 # approx_vocab_size config.dict_dim = 40000 # approx_vocab_size
......
...@@ -7,7 +7,7 @@ def decatt_glove(): ...@@ -7,7 +7,7 @@ def decatt_glove():
""" """
config = basic_config.config() config = basic_config.config()
config.learning_rate = 0.05 config.learning_rate = 0.05
config.save_dirname = "decatt_model" config.save_dirname = "model_dir"
config.use_pretrained_word_embedding = True config.use_pretrained_word_embedding = True
config.dict_dim = 40000 # approx_vocab_size config.dict_dim = 40000 # approx_vocab_size
config.metric_type = ['accuracy', 'accuracy_with_threshold'] config.metric_type = ['accuracy', 'accuracy_with_threshold']
...@@ -29,4 +29,30 @@ def decatt_glove(): ...@@ -29,4 +29,30 @@ def decatt_glove():
return config return config
def decatt_word():
"""
use config 'decAtt_glove' in the paper 'Neural Paraphrase Identification of Questions with Noisy Pretraining'
"""
config = basic_config.config()
config.learning_rate = 0.05
config.save_dirname = "model_dir"
config.use_pretrained_word_embedding = False
config.dict_dim = 40000 # approx_vocab_size
config.metric_type = ['accuracy', 'accuracy_with_threshold']
config.optimizer_type = 'sgd'
config.lr_decay = 1
config.use_lod_tensor = False
config.embedding_norm = False
config.OOV_fill = 'uniform'
config.duplicate_data = False
# net config
config.emb_dim = 300
config.proj_emb_dim = 200 #TODO: has project?
config.num_units = [400, 200]
config.word_embedding_trainable = True
config.droprate = 0.1
config.share_wight_btw_seq = True
config.class_dim = 2
return config
import basic_config
def infer_sent_v1():
"""
set configs
"""
config = basic_config.config()
config.learning_rate = 0.1
config.lr_decay = 0.99
config.optimizer_type = 'sgd'
config.save_dirname = "model_dir"
config.use_pretrained_word_embedding = False
config.dict_dim = 40000 # approx_vocab_size
config.class_dim = 2
# net config
config.emb_dim = 300
config.droprate_lstm = 0.0
config.droprate_fc = 0.0
config.word_embedding_trainable = False
config.rnn_hid_dim = 2048
config.mlp_non_linear = False
return config
def infer_sent_v2():
"""
use our own config
"""
config = basic_config.config()
config.learning_rate = 0.0002
config.lr_decay = 0.99
config.optimizer_type = 'adam'
config.save_dirname = "model_dir"
config.use_pretrained_word_embedding = True
config.dict_dim = 40000 # approx_vocab_size
config.class_dim = 2
# net config
config.emb_dim = 300
config.droprate_lstm = 0.0
config.droprate_fc = 0.2
config.word_embedding_trainable = False
config.rnn_hid_dim = 2048
config.mlp_non_linear = True
return config
import basic_config
def sse_base():
"""
use config in the paper 'Shortcut-Stacked Sentence Encoders for Multi-Domain Inference'
"""
config = basic_config.config()
config.learning_rate = 0.0002
config.lr_decay = 0.7
config.save_dirname = "model_dir"
config.use_pretrained_word_embedding = True
config.dict_dim = 40000 # approx_vocab_size
config.metric_type = ['accuracy']
config.optimizer_type = 'adam'
config.use_lod_tensor = True
config.embedding_norm = False
config.OOV_fill = 'uniform'
config.duplicate_data = False
# net config
config.emb_dim = 300
config.rnn_hid_dim = [512, 1024, 2048]
config.fc_dim = [1600, 1600]
config.droprate_lstm = 0.0
config.droprate_fc = 0.1
config.class_dim = 2
return config
from cdssm import cdssmNet from cdssm import cdssmNet
from dec_att import DecAttNet from dec_att import DecAttNet
from sse import SSENet
from infer_sent import InferSentNet
import paddle.fluid as fluid
from my_layers import bi_lstm_layer
from match_layers import ElementwiseMatching
class InferSentNet():
"""
Base on the paper: Supervised Learning of Universal Sentence Representations from Natural Language Inference Data:
https://arxiv.org/abs/1705.02364
"""
def __init__(self, config):
self._config = config
def __call__(self, seq1, seq2, label):
return self.body(seq1, seq2, label, self._config)
def body(self, seq1, seq2, label, config):
"""Body function"""
seq1_rnn = self.encoder(seq1)
seq2_rnn = self.encoder(seq2)
seq_match = ElementwiseMatching(seq1_rnn, seq2_rnn)
mlp_res = self.MLP(seq_match)
prediction = fluid.layers.fc(mlp_res, size=self._config.class_dim, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=loss)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction
def encoder(self, seq):
"""encoder"""
embed = fluid.layers.embedding(
input=seq,
size=[self._config.dict_dim, self._config.emb_dim],
param_attr=fluid.ParamAttr(name='emb.w', trainable=self._config.word_embedding_trainable))
bi_lstm_h = bi_lstm_layer(
embed,
rnn_hid_dim = self._config.rnn_hid_dim,
name='encoder')
bi_lstm_h = fluid.layers.dropout(bi_lstm_h, dropout_prob=self._config.droprate_lstm)
pool = fluid.layers.sequence_pool(input=bi_lstm_h, pool_type='max')
return pool
def MLP(self, vec):
if self._config.mlp_non_linear:
drop1 = fluid.layers.dropout(vec, dropout_prob=self._config.droprate_fc)
fc1 = fluid.layers.fc(drop1, size=512, act='tanh')
drop2 = fluid.layers.dropout(fc1, dropout_prob=self._config.droprate_fc)
fc2 = fluid.layers.fc(drop2, size=512, act='tanh')
res = fluid.layers.dropout(fc2, dropout_prob=self._config.droprate_fc)
else:
fc1 = fluid.layers.fc(vec, size=512, act=None)
res = fluid.layers.fc(fc1, size=512, act=None)
return res
import paddle.fluid as fluid
from my_layers import bi_lstm_layer
from match_layers import ElementwiseMatching
class SSENet():
"""
SSE net: Shortcut-Stacked Sentence Encoders for Multi-Domain Inference
https://arxiv.org/abs/1708.02312
"""
def __init__(self, config):
self._config = config
def __call__(self, seq1, seq2, label):
return self.body(seq1, seq2, label, self._config)
def body(self, seq1, seq2, label, config):
"""Body function"""
def stacked_bi_rnn_model(seq):
embed = fluid.layers.embedding(input=seq, size=[self._config.dict_dim, self._config.emb_dim], param_attr='emb.w')
stacked_lstm_out = [embed]
for i in range(len(self._config.rnn_hid_dim)):
if i == 0:
feature = embed
else:
feature = fluid.layers.concat(input = stacked_lstm_out, axis=1)
bi_lstm_h = bi_lstm_layer(feature,
rnn_hid_dim=self._config.rnn_hid_dim[i],
name="lstm_" + str(i))
# add dropout except for the last stacked lstm layer
if i != len(self._config.rnn_hid_dim) - 1:
bi_lstm_h = fluid.layers.dropout(bi_lstm_h, dropout_prob=self._config.droprate_lstm)
stacked_lstm_out.append(bi_lstm_h)
pool = fluid.layers.sequence_pool(input=bi_lstm_h, pool_type='max')
return pool
def MLP(vec):
for i in range(len(self._config.fc_dim)):
vec = fluid.layers.fc(vec, size=self._config.fc_dim[i], act='relu')
# add dropout after every layer of MLP
vec = fluid.layers.dropout(vec, dropout_prob=self._config.droprate_fc)
return vec
seq1_rnn = stacked_bi_rnn_model(seq1)
seq2_rnn = stacked_bi_rnn_model(seq2)
seq_match = ElementwiseMatching(seq1_rnn, seq2_rnn)
mlp_res = MLP(seq_match)
prediction = fluid.layers.fc(mlp_res, size=self._config.class_dim, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=loss)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册