# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle.fluid as fluid from backbone.utils.transformer import pre_process_layer from utils.configure import JsonConfig def compute_loss(output_tensors, args=None): """Compute loss for mlm model""" fc_out = output_tensors['mlm_out'] mask_label = output_tensors['mask_label'] mask_lm_loss = fluid.layers.softmax_with_cross_entropy( logits=fc_out, label=mask_label) mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) return mean_mask_lm_loss def create_model(reader_input, base_model=None, is_training=True, args=None): """ given the base model, reader_input return the output tensors """ src_ids, pos_ids, sent_ids, input_mask, mask_label, mask_pos = reader_input config = JsonConfig(args.pretrain_config_path) _emb_size = config['hidden_size'] _voc_size = config['vocab_size'] _hidden_act = config['hidden_act'] _word_emb_name = "word_embedding" _dtype = "float32" _param_initializer = fluid.initializer.TruncatedNormal( scale=config['initializer_range']) mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') enc_out = base_model.final_word_representation # extract the first token feature in each sentence reshaped_emb_out = fluid.layers.reshape( x=enc_out, shape=[-1, _emb_size]) # extract masked tokens' feature mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) num_seqs = fluid.layers.fill_constant(shape=[1], value=512, dtype='int64') # transform: fc mask_trans_feat = fluid.layers.fc( input=mask_feat, size=_emb_size, act=_hidden_act, param_attr=fluid.ParamAttr( name='mask_lm_trans_fc.w_0', initializer=_param_initializer), bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) # transform: layer norm mask_trans_feat = pre_process_layer( mask_trans_feat, 'n', name='mask_lm_trans') mask_lm_out_bias_attr = fluid.ParamAttr( name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) fc_out = fluid.layers.matmul( x=mask_trans_feat, y=fluid.default_main_program().global_block().var( _word_emb_name), transpose_y=True) fc_out += fluid.layers.create_parameter( shape=[_voc_size], dtype=_dtype, attr=mask_lm_out_bias_attr, is_bias=True) output_tensors = {} output_tensors['num_seqs'] = num_seqs output_tensors['mlm_out'] = fc_out output_tensors['mask_label'] = mask_label return output_tensors