提交 a89b5216 编写于 作者: W wanghaoshuang

Add demo of convolution network for MNLI task

上级 70c6b708
../bert/data
\ No newline at end of file
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"dygraph transformer layers"
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import json
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard
from .transformer_encoder import ConvBNEncoderLayer
from .mobilenet_v1 import MobileNetV1
class BertModelLayer(Layer):
def __init__(self,
emb_size=128,
hidden_size=768,
voc_size=30522,
max_position_seq_len=512,
sent_types=2,
return_pooled_out=True,
initializer_range=1.0,
conv_type="mobilenet",
use_fp16=False):
super(BertModelLayer, self).__init__()
self._emb_size = emb_size
self._hidden_size = hidden_size
self._voc_size = voc_size
self._max_position_seq_len = max_position_seq_len
self._sent_types = sent_types
self.return_pooled_out = return_pooled_out
self._word_emb_name = "s_word_embedding"
self._pos_emb_name = "s_pos_embedding"
self._sent_emb_name = "s_sent_embedding"
self._dtype = "float16" if use_fp16 else "float32"
self._conv_type = conv_type
self._param_initializer = fluid.initializer.TruncatedNormal(
scale=initializer_range)
self._src_emb = Embedding(
size=[self._voc_size, self._emb_size],
param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer),
dtype=self._dtype)
self._pos_emb = Embedding(
size=[self._max_position_seq_len, self._emb_size],
param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer),
dtype=self._dtype)
self._sent_emb = Embedding(
size=[self._sent_types, self._emb_size],
param_attr=fluid.ParamAttr(
name=self._sent_emb_name, initializer=self._param_initializer),
dtype=self._dtype)
self._emb_fac = Linear(
input_dim=self._emb_size,
output_dim=self._hidden_size,
param_attr=fluid.ParamAttr(name="s_emb_factorization"))
# self.pooled_fc = Linear(
# input_dim=self._hidden_size,
# output_dim=self._hidden_size,
# param_attr=fluid.ParamAttr(
# name="s_pooled_fc.w_0", initializer=self._param_initializer),
# bias_attr="s_pooled_fc.b_0",
# act="tanh")
if self._conv_type == "conv_bn":
self._encoder = ConvBNEncoderLayer(
hidden_size=self._hidden_size, name="encoder")
elif self._conv_type == "mobilenet":
self._encoder = MobileNetV1(1, 3)
def forward(self, src_ids, position_ids, sentence_ids):
src_emb = self._src_emb(src_ids)
pos_emb = self._pos_emb(position_ids)
sent_emb = self._sent_emb(sentence_ids)
emb_out = src_emb + pos_emb
emb_out = emb_out + sent_emb
emb_out = self._emb_fac(emb_out)
emb_out = fluid.layers.reshape(
emb_out, shape=[-1, 1, emb_out.shape[1], emb_out.shape[2]])
enc_output = self._encoder(emb_out)
# if not self.return_pooled_out:
# return enc_output
# next_sent_feat = fluid.layers.slice(
# input=enc_output, axes=[2], starts=[0], ends=[1])
# next_sent_feat = fluid.layers.reshape(
# next_sent_feat, shape=[-1, self._hidden_size])
# next_sent_feat = self.pooled_fc(next_sent_feat)
return enc_output
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"dygraph transformer layers"
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import json
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear, Layer
from paddle.fluid.dygraph import to_variable
from .bert import BertModelLayer
class ClsModelLayer(Layer):
"""
classify model
"""
def __init__(self, num_labels, return_pooled_out=True):
super(ClsModelLayer, self).__init__()
self._hiden_size = 768
self.bert_layer = BertModelLayer(
emb_size=128,
hidden_size=self._hiden_size,
voc_size=30522,
max_position_seq_len=512,
sent_types=2,
return_pooled_out=True,
initializer_range=1.0,
conv_type="mobilenet",
use_fp16=False)
# self.cls_fc = Linear(
# input_dim=self._hiden_size,
# output_dim=num_labels,
# param_attr=fluid.ParamAttr(
# name="cls_out_w",
# initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
# bias_attr=fluid.ParamAttr(
# name="cls_out_b",
# initializer=fluid.initializer.Constant(0.)))
def forward(self, data_ids):
"""
forward
"""
src_ids = to_variable(data_ids[0])
position_ids = to_variable(data_ids[1])
sentence_ids = to_variable(data_ids[2])
input_mask = data_ids[3]
labels = to_variable(data_ids[4])
logits = self.bert_layer(src_ids, position_ids, sentence_ids)
# cls_feat = fluid.layers.dropout(
# x=next_sent_feat,
# dropout_prob=0.1,
# dropout_implementation="upscale_in_train")
# logit = self.cls_fc(cls_feat)
ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=labels, return_softmax=True)
loss = fluid.layers.mean(x=ce_loss)
num_seqs = fluid.layers.create_tensor(dtype='int64')
accuracy = fluid.layers.accuracy(
input=probs, label=labels, total=num_seqs)
return loss, accuracy, num_seqs
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#order: standard library, third party, local library
import os
import time
import sys
import math
import numpy as np
import argparse
import paddle
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
act='relu',
use_cudnn=True,
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(
initializer=MSRA(), name=self.full_name() + "_weights"),
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"),
bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"),
moving_mean_name=self.full_name() + "_bn" + '_mean',
moving_variance_name=self.full_name() + "_bn" + '_variance')
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class DepthwiseSeparable(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters1,
num_filters2,
num_groups,
stride,
scale,
name=None):
super(DepthwiseSeparable, self).__init__()
self._depthwise_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=int(num_filters1 * scale),
filter_size=3,
stride=stride,
padding=1,
num_groups=int(num_groups * scale),
use_cudnn=False)
self._pointwise_conv = ConvBNLayer(
num_channels=int(num_filters1 * scale),
filter_size=1,
num_filters=int(num_filters2 * scale),
stride=1,
padding=0)
def forward(self, inputs):
y = self._depthwise_conv(inputs)
y = self._pointwise_conv(y)
return y
class MobileNetV1(fluid.dygraph.Layer):
def __init__(self, scale=1.0, class_dim=1000):
super(MobileNetV1, self).__init__()
self.scale = scale
self.dwsl = []
self.conv1 = ConvBNLayer(
num_channels=1,
filter_size=3,
channels=3,
num_filters=int(32 * scale),
stride=2,
padding=1)
dws21 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(32 * scale),
num_filters1=32,
num_filters2=64,
num_groups=32,
stride=1,
scale=scale),
name="conv2_1")
self.dwsl.append(dws21)
dws22 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(64 * scale),
num_filters1=64,
num_filters2=128,
num_groups=64,
stride=2,
scale=scale),
name="conv2_2")
self.dwsl.append(dws22)
dws31 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=128,
num_groups=128,
stride=1,
scale=scale),
name="conv3_1")
self.dwsl.append(dws31)
dws32 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=256,
num_groups=128,
stride=2,
scale=scale),
name="conv3_2")
self.dwsl.append(dws32)
dws41 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=256,
num_groups=256,
stride=1,
scale=scale),
name="conv4_1")
self.dwsl.append(dws41)
dws42 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=512,
num_groups=256,
stride=2,
scale=scale),
name="conv4_2")
self.dwsl.append(dws42)
for i in range(5):
tmp = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=512,
num_groups=512,
stride=1,
scale=scale),
name="conv5_" + str(i + 1))
self.dwsl.append(tmp)
dws56 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=1024,
num_groups=512,
stride=2,
scale=scale),
name="conv5_6")
self.dwsl.append(dws56)
dws6 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(1024 * scale),
num_filters1=1024,
num_filters2=1024,
num_groups=1024,
stride=1,
scale=scale),
name="conv6")
self.dwsl.append(dws6)
self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
self.out = Linear(
int(1024 * scale),
class_dim,
param_attr=ParamAttr(
initializer=MSRA(), name=self.full_name() + "fc7_weights"),
bias_attr=ParamAttr(name="fc7_offset"))
def forward(self, inputs):
y = self.conv1(inputs)
for dws in self.dwsl:
y = dws(y)
y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, 1024])
y = self.out(y)
return y
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"dygraph transformer layers"
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from collections import Iterable
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer, Conv2D, BatchNorm, Pool2D, to_variable
from paddle.fluid.initializer import NormalInitializer
class ConvBNRelu(fluid.dygraph.Layer):
def __init__(self,
in_c=768,
out_c=768,
filter_size=[3, 1],
dilation=1,
is_test=False,
use_cudnn=True,
name=None):
super(ConvBNRelu, self).__init__()
self._name = name
conv_std = (2.0 /
(filter_size[0] * filter_size[1] * out_c * in_c))**0.5
conv_param = fluid.ParamAttr(
name=name if name is None else (name + "_conv.weights"),
initializer=fluid.initializer.Normal(0.0, conv_std))
self.conv = Conv2D(
in_c,
out_c,
filter_size,
dilation=[dilation, 1],
padding=[(filter_size[0] - 1) * dilation // 2, 0],
param_attr=conv_param,
act=None,
bias_attr=False,
use_cudnn=use_cudnn)
self.bn = BatchNorm(out_c, act="relu", is_test=False)
def forward(self, inputs):
conv = self.conv(inputs)
bn = self.bn(conv)
return conv
class GateConv(fluid.dygraph.Layer):
def __init__(self,
in_c=768,
out_c=768,
filter_size=[3, 1],
dilation=1,
is_test=False,
use_cudnn=True,
name=None):
super(GateConv, self).__init__()
conv_std = (2.0 /
(filter_size[0] * filter_size[1] * out_c * in_c))**0.5
conv_param = fluid.ParamAttr(
name=name if name is None else (name + "_conv.weights"),
initializer=fluid.initializer.Normal(0.0, conv_std))
gate_param = fluid.ParamAttr(
name=name if name is None else (name + "_conv_gate.weights"),
initializer=fluid.initializer.Normal(0.0, conv_std))
self.conv = Conv2D(
in_c,
out_c,
filter_size,
dilation=[dilation, 1],
padding=[(filter_size[0] - 1) * dilation // 2, 0],
param_attr=conv_param,
act=None,
use_cudnn=use_cudnn)
self.gate = Conv2D(
in_c,
out_c,
filter_size,
dilation=[dilation, 1],
padding=[(filter_size[0] - 1) * dilation // 2, 0],
param_attr=gate_param,
act="sigmoid",
use_cudnn=use_cudnn)
def forward(self, inputs):
conv = self.conv(inputs)
gate = self.gate(inputs)
return conv * gate
class ConvBNBlock(Layer):
def __init__(self,
in_c,
out_c,
filter_size,
dillation=1,
block_size=4,
name=None):
super(ConvBNBlock, self).__init__()
self._in_c = in_c
self._out_c = out_c
self._filter_size = filter_size
self._dillation = dillation
self._block_sie = block_size
self.convs = []
for n in range(block_size):
name = None if name is None else name + "_" + str(n)
conv = ConvBNRelu(
in_c=self._in_c,
out_c=self._out_c,
filter_size=self._filter_size,
dilation=self._dillation,
is_test=False,
use_cudnn=True,
name=name)
self.convs.append(conv)
def forward(self, input):
tmp = input
for conv in self.convs:
tmp = conv(input)
return tmp
class ConvBNEncoderLayer(Layer):
def __init__(self,
filters=[3, 5, 7],
dillations=[1, 1, 1],
hidden_size=768,
name="encoder"):
super(ConvBNEncoderLayer, self).__init__()
cells = []
self._hidden_size = hidden_size
self.conv0 = ConvBNRelu(
in_c=1,
out_c=self._hidden_size,
filter_size=[3, self._hidden_size],
dilation=1,
is_test=False,
use_cudnn=True,
name="sten")
self.blocks = []
n = 0
for filter_n, dillation_n in zip(filters, dillations):
name = None if name is None else name + "_block" + str(n)
block = ConvBNBlock(
self._hidden_size,
self._hidden_size,
filter_size=[filter_n, 1],
dillation=dillation_n,
block_size=4,
name=name)
self.blocks.append(block)
n += 1
def forward(self, enc_input):
tmp = fluid.layers.reshape(
enc_input, [-1, 1, enc_input.shape[1],
self._hidden_size]) #(bs, 1, seq_len, hidden_size)
tmp = self.conv0(tmp) # (bs, hidden_size, seq_len, 1)
for block in self.blocks:
tmp = block(tmp)
return tmp
export CUDA_VISIBLE_DEVICES=0
python train.py
import time
import paddle.fluid as fluid
from paddleslim.teachers.bert.reader.cls import *
from model.cls import ClsModelLayer
def main():
place = fluid.CUDAPlace(0)
BERT_BASE_PATH = "./data/pretrained_models/uncased_L-12_H-768_A-12/"
bert_config_path = BERT_BASE_PATH + "/bert_config.json"
vocab_path = BERT_BASE_PATH + "/vocab.txt"
data_dir = "./data/glue_data/MNLI/"
max_seq_len = 128
do_lower_case = True
batch_size = 128
epoch = 600
processor = MnliProcessor(
data_dir=data_dir,
vocab_path=vocab_path,
max_seq_len=max_seq_len,
do_lower_case=do_lower_case,
in_tokens=False)
train_reader = processor.data_generator(
batch_size=batch_size,
phase='train',
epoch=1,
dev_count=1,
shuffle=True)
val_reader = processor.data_generator(
batch_size=batch_size,
phase='dev',
epoch=1,
dev_count=1,
shuffle=False)
with fluid.dygraph.guard(place):
model = ClsModelLayer(3)
optimizer = fluid.optimizer.MomentumOptimizer(
0.001,
0.9,
regularization=fluid.regularizer.L2DecayRegularizer(3e-4),
parameter_list=model.parameters())
for i in range(epoch):
model.train()
losses = []
accs = []
start = time.time()
for step_id, data in enumerate(train_reader()):
loss, acc, num = model(data)
loss.backward()
optimizer.minimize(loss)
model.clear_gradients()
losses.append(loss.numpy())
accs.append(acc.numpy())
if step_id % 50 == 0:
time_cost = time.time() - start
start = time.time()
speed = time_cost / 50.0
print(
"Train iter-[{}]-[{}] - loss: {:.4f}; acc: {:.4f}; speed: {:.3f}s/step; time: {}".
format(i, step_id,
np.mean(losses),
np.mean(accs), speed,
time.asctime(time.localtime())))
losses = []
accs = []
model.eval()
losses = []
accs = []
for step_id, data in enumerate(val_reader()):
loss, acc, num = model(data)
losses.append(loss.numpy())
accs.append(acc.numpy())
print("Eval epoch [{}]- loss: {:.4f}; acc: {:.4f}".format(
i, np.mean(losses), np.mean(accs)))
if __name__ == '__main__':
main()
......@@ -151,7 +151,7 @@ class DataProcessor(object):
elif phase == 'search_valid':
examples = self.get_train_examples(self.data_dir)
self.num_examples['search_valid'] = len(examples) / 2
examples = examples[self.num_examples['search_train']:]
examples = examples[self.num_examples['search_valid']:]
else:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'test'].")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册