# -*- coding: UTF-8 -*- # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle.fluid as fluid from paddlepalm.interface import task_paradigm from paddle.fluid import layers from paddlepalm.backbone.utils.transformer import pre_process_layer class TaskParadigm(task_paradigm): ''' matching ''' def __init__(self, config, phase, backbone_config=None): self._is_training = phase == 'train' self._emb_size = backbone_config['hidden_size'] self._hidden_size = backbone_config['hidden_size'] self._vocab_size = backbone_config['vocab_size'] self._hidden_act = backbone_config['hidden_act'] self._initializer_range = backbone_config['initializer_range'] @property def inputs_attrs(self): reader = { "mask_label": [[-1, 1], 'int64'], "mask_pos": [[-1, 1], 'int64']} if not self._is_training: del reader['mask_label'] del reader['batchsize_x_seqlen'] bb = { "encoder_outputs": [[-1, -1, self._hidden_size], 'float32'], "embedding_table": [[-1, self._vocab_size, self._emb_size], 'float32']} return {'reader': reader, 'backbone': bb} @property def outputs_attrs(self): if self._is_training: return {"loss": [[1], 'float32']} else: return {"logits": [[-1], 'float32']} def build(self, inputs): if self._is_training: mask_label = inputs["reader"]["mask_label"] # 多任务学习时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围 batchsize_x_seqlen = inputs["reader"]["batchsize_x_seqlen"] mask_pos = inputs["reader"]["mask_pos"] word_emb = inputs["backbone"]["embedding_table"] enc_out = inputs["backbone"]["encoder_outputs"] emb_size = word_emb.shape[-1] _param_initializer = fluid.initializer.TruncatedNormal( scale=self._initializer_range) if self._is_training: # 多任务训练时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围 # mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') mask_pos = fluid.layers.elementwise_min(mask_pos, batchsize_x_seqlen) #print(fluid.default_main_program().blocks[0].vars) reshaped_emb_out = fluid.layers.reshape( x=enc_out, shape=[-1, emb_size]) # extract masked tokens' feature mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) # transform: fc mask_trans_feat = fluid.layers.fc( input=mask_feat, size=emb_size, act=self._hidden_act, param_attr=fluid.ParamAttr( name='mask_lm_trans_fc.w_0', initializer=_param_initializer), bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) # transform: layer norm mask_trans_feat = pre_process_layer( mask_trans_feat, 'n', name='mask_lm_trans') mask_lm_out_bias_attr = fluid.ParamAttr( name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) # print fluid.default_main_program().global_block() # fc_out = fluid.layers.matmul( # x=mask_trans_feat, # y=fluid.default_main_program().global_block().var( # _word_emb_name), # transpose_y=True) fc_out = fluid.layers.matmul( x=mask_trans_feat, y=word_emb, transpose_y=True) fc_out += fluid.layers.create_parameter( shape=[self._vocab_size], dtype='float32', attr=mask_lm_out_bias_attr, is_bias=True) if self._is_training: mask_lm_loss = fluid.layers.softmax_with_cross_entropy( logits=fc_out, label=mask_label) loss = fluid.layers.mean(mask_lm_loss) return {'loss': loss} else: return {'logits': fc_out}