mlm.py 4.0 KB
Newer Older
X
xixiaoyao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# -*- coding: UTF-8 -*-
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle.fluid as fluid
from paddlepalm.interface import task_paradigm
from paddle.fluid import layers
X
xixiaoyao 已提交
19
from paddlepalm.backbone.utils.transformer import pre_process_layer
X
xixiaoyao 已提交
20 21 22 23 24 25 26

class TaskParadigm(task_paradigm):
    '''
    matching
    '''
    def __init__(self, config, phase, backbone_config=None):
        self._is_training = phase == 'train'
X
xixiaoyao 已提交
27
        self._emb_size = backbone_config['hidden_size']
X
xixiaoyao 已提交
28 29 30 31 32 33 34
        self._hidden_size = backbone_config['hidden_size']
        self._vocab_size = backbone_config['vocab_size']
        self._hidden_act = backbone_config['hidden_act']
        self._initializer_range = backbone_config['initializer_range']
    
    @property
    def inputs_attrs(self):
X
xixiaoyao 已提交
35
        reader = {
W
wangxiao 已提交
36 37
            "mask_label": [[-1], 'int64'],
            "mask_pos": [[-1], 'int64']}
X
xixiaoyao 已提交
38 39
        if not self._is_training:
            del reader['mask_label']
X
xixiaoyao 已提交
40
            del reader['batchsize_x_seqlen']
X
xixiaoyao 已提交
41 42 43
        bb = {
            "encoder_outputs": [[-1, -1, self._hidden_size], 'float32'],
            "embedding_table": [[-1, self._vocab_size, self._emb_size], 'float32']}
X
xixiaoyao 已提交
44 45 46 47 48 49 50
        return {'reader': reader, 'backbone': bb}

    @property
    def outputs_attrs(self):
        if self._is_training:
            return {"loss": [[1], 'float32']}
        else:
X
xixiaoyao 已提交
51
            return {"logits": [[-1], 'float32']}
X
xixiaoyao 已提交
52

X
xixiaoyao 已提交
53
    def build(self, inputs, scope_name=""):
X
xixiaoyao 已提交
54
        mask_pos = inputs["reader"]["mask_pos"]
X
xixiaoyao 已提交
55 56
        if self._is_training:
            mask_label = inputs["reader"]["mask_label"] 
X
xixiaoyao 已提交
57 58
            max_position = inputs["reader"]["batchsize_x_seqlen"] - 1
            mask_pos = fluid.layers.elementwise_min(mask_pos, max_position)
X
xixiaoyao 已提交
59
            mask_pos.stop_gradient = True
X
xixiaoyao 已提交
60

X
xixiaoyao 已提交
61
        word_emb = inputs["backbone"]["embedding_table"]
X
xixiaoyao 已提交
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
        enc_out = inputs["backbone"]["encoder_outputs"]

        emb_size = word_emb.shape[-1]

        _param_initializer = fluid.initializer.TruncatedNormal(
            scale=self._initializer_range)

        reshaped_emb_out = fluid.layers.reshape(
            x=enc_out, shape=[-1, emb_size])

        # extract masked tokens' feature
        mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos)

        # transform: fc
        mask_trans_feat = fluid.layers.fc(
            input=mask_feat,
            size=emb_size,
            act=self._hidden_act,
            param_attr=fluid.ParamAttr(
X
xixiaoyao 已提交
81
                name=scope_name+'mask_lm_trans_fc.w_0',
X
xixiaoyao 已提交
82
                initializer=_param_initializer),
X
xixiaoyao 已提交
83
            bias_attr=fluid.ParamAttr(name=scope_name+'mask_lm_trans_fc.b_0'))
X
xixiaoyao 已提交
84 85
        # transform: layer norm
        mask_trans_feat = pre_process_layer(
X
xixiaoyao 已提交
86
            mask_trans_feat, 'n', name=scope_name+'mask_lm_trans')
X
xixiaoyao 已提交
87 88

        mask_lm_out_bias_attr = fluid.ParamAttr(
X
xixiaoyao 已提交
89
            name=scope_name+"mask_lm_out_fc.b_0",
X
xixiaoyao 已提交
90 91 92 93 94 95 96 97 98 99 100 101 102
            initializer=fluid.initializer.Constant(value=0.0))

        fc_out = fluid.layers.matmul(
            x=mask_trans_feat,
            y=word_emb,
            transpose_y=True)
        fc_out += fluid.layers.create_parameter(
            shape=[self._vocab_size],
            dtype='float32',
            attr=mask_lm_out_bias_attr,
            is_bias=True)

        if self._is_training:
W
wangxiao 已提交
103
            inputs = fluid.layers.softmax(fc_out)
W
wangxiao 已提交
104
            mask_lm_loss = fluid.layers.cross_entropy(
W
wangxiao 已提交
105
                input=inputs, label=mask_label)
X
xixiaoyao 已提交
106
            loss = fluid.layers.mean(mask_lm_loss)
X
xixiaoyao 已提交
107 108
            return {'loss': loss}
        else:
X
xixiaoyao 已提交
109
            return {'logits': fc_out}
X
xixiaoyao 已提交
110 111