# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. "dygraph transformer layers" from __future__ import absolute_import from __future__ import division from __future__ import print_function import six import json import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import Linear, Layer from .bert import BertModelLayer class ClsModelLayer(Layer): """ classify model """ def __init__(self, config, num_labels, is_training=True, return_pooled_out=True, loss_scaling=1.0, use_fp16=False): super(ClsModelLayer, self).__init__() self.config = config self.is_training = is_training self.use_fp16 = use_fp16 self.loss_scaling = loss_scaling self.n_layers = config['num_hidden_layers'] self.return_pooled_out = return_pooled_out self.bert_layer = BertModelLayer( config=self.config, return_pooled_out=True, use_fp16=self.use_fp16) self.cls_fc = list() for i in range(self.n_layers): fc = Linear( input_dim=self.config["hidden_size"], output_dim=num_labels, param_attr=fluid.ParamAttr( name="cls_out_%d_w" % i, initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="cls_out_%d_b" % i, initializer=fluid.initializer.Constant(0.))) fc = self.add_sublayer("cls_fc_%d" % i, fc) self.cls_fc.append(fc) def emb_names(self): return self.bert_layer.emb_names() def forward(self, data_ids): """ forward """ src_ids = data_ids[0] position_ids = data_ids[1] sentence_ids = data_ids[2] input_mask = data_ids[3] labels = data_ids[4] enc_outputs, next_sent_feats = self.bert_layer( src_ids, position_ids, sentence_ids, input_mask) if not self.return_pooled_out: cls_feat = fluid.layers.dropout( x=next_sent_feats[-1], dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = self.cls_fc[-1](cls_feat) probs = fluid.layers.softmax(logits) num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy( input=probs, label=labels, total=num_seqs) return enc_outputs, logits, accuracy, num_seqs logits = [] losses = [] accuracys = [] for next_sent_feat, fc in zip(next_sent_feats, self.cls_fc): cls_feat = fluid.layers.dropout( x=next_sent_feat, dropout_prob=0.1, dropout_implementation="upscale_in_train") logit = fc(cls_feat) logits.append(logit) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logit, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) losses.append(loss) if self.use_fp16 and self.loss_scaling > 1.0: loss *= self.loss_scaling num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy( input=probs, label=labels, total=num_seqs) accuracys.append(accuracy) total_loss = fluid.layers.sum(losses) return total_loss, logits, losses, accuracys, num_seqs