未验证 提交 0ad3a7f0 编写于 作者: M Meiyim 提交者: GitHub

Update setup.py (#484)

* Update setup.py

* reorganize seq2seq

* update lac dependency
上级 a7d51e7f
......@@ -30,11 +30,10 @@ import paddle.fluid as F
import paddle.fluid.layers as L
import paddle.fluid.dygraph as D
from ernie.modeling_ernie import ErnieModel, ErnieModelForPretraining
from ernie.modeling_ernie import ErnieModel, ErnieModelForPretraining, ErnieModelForGeneration
from ernie.modeling_ernie import _build_linear, _build_ln, append_name
from ernie.tokenizing_ernie import ErnieTokenizer
from experimental.seq2seq.modeling_ernie_gen import ErnieModelForGeneration
from propeller import log
import propeller.paddle as propeller
......
......@@ -32,13 +32,12 @@ import paddle.fluid.dygraph as D
from tqdm import tqdm
from ernie.modeling_ernie import ErnieModel, ErnieModelForPretraining
from ernie.modeling_ernie import ErnieModel, ErnieModelForPretraining, ErnieModelForGeneration
from ernie.modeling_ernie import _build_linear, _build_ln, append_name
from ernie.tokenizing_ernie import ErnieTokenizer
from ernie.optimization import AdamW, LinearDecay
from experimental.seq2seq.decode import beam_search_infilling, post_process
from experimental.seq2seq.modeling_ernie_gen import ErnieModelForGeneration
from propeller import log
import propeller.paddle as propeller
......
......@@ -298,6 +298,8 @@ class ErnieModel(D.Layer, PretrainedModel):
output logits of pooler classifier
encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`):
output logits of transformer stack
info (Dictionary):
addtional middle level info, inclues: all hidden stats, k/v caches.
"""
#d_batch, d_seqlen = src_ids.shape
assert len(src_ids.shape) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr(src_ids.shape))
......@@ -551,7 +553,7 @@ class ErnieModelForPretraining(ErnieModel):
nsp_labels (optional, `Variable` of shape [batch_size]):
labels for `next sentence prediction` tasks
mlm_pos (optional, `Variable` of shape [n_mask, 2]):
index of mask_id in `src_ids`, can obtain from `fluid.layers.where(src_ids==mask_id)`
index of mask_id in `src_ids`, can be obtained from `fluid.layers.where(src_ids==mask_id)`
labels (optional, `Variable` of shape [n_mask]):
labels for `mask language model` tasks, the original token indices in masked position in `src_ids`
Returns:
......@@ -582,3 +584,77 @@ class ErnieModelForPretraining(ErnieModel):
total_loss = mlm_loss + nsp_loss
return total_loss, mlm_loss, nsp_loss
class ErnieModelForGeneration(ErnieModel):
"""
Ernie Model for sequence to sequence generation.
"""
resource_map = {
'ernie-gen-base-en': ErnieModel.bce + 'model-ernie-gen-base-en.1.tar.gz',
'ernie-gen-large-en': ErnieModel.bce + 'model-ernie-gen-large-en.1.tar.gz',
'ernie-gen-large-160g-en': ErnieModel.bce + 'model-ernie-gen-large-160g-en.1.tar.gz',
'ernie-1.0': ErnieModel.bce + 'model-ernie1.0.1.tar.gz',
}
def __init__(self, cfg, name=None):
cfg['return_additional_info'] = True
cfg['has_pooler'] = False
super(ErnieModelForGeneration, self).__init__(cfg, name=name)
initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range'])
d_model = cfg['hidden_size']
d_vocab = cfg['vocab_size']
self.mlm = _build_linear(d_model, d_model, append_name(name, 'mask_lm_trans_fc'), initializer, act=cfg['hidden_act'])
self.mlm_ln = _build_ln(d_model, name = append_name(name, 'mask_lm_trans'))
self.mlm_bias = L.create_parameter(
dtype='float32',
shape=[d_vocab],
attr=F.ParamAttr(
name=append_name(name, 'mask_lm_out_fc.b_0'),
initializer=F.initializer.Constant(value=0.0)
),
is_bias=True,
)
@add_docstring(ErnieModel.forward.__doc__)
def forward(self, *args, **kwargs):
"""
Args
tgt_labels(`Variable` of shape [batch_size, seqlen] or [batch, seqlen, vocab_size]):
ground trouth target sequence id (hard label) or distribution (soft label)
tgt_pos(`Variable` of shape [n_targets, 2]):
index of tgt_labels in `src_ids`, can be obtained from `fluid.layers.where(src_ids==mask_id)`
encoder_only(Bool):
if set, will not return loss, logits_2d
Returns:
loss(`Variable` of shape []):
cross entropy loss mean over every target label. if `encode_only`, returns None.
logits(`Variable` of shape [n_targets, vocab_size]):
logits for every targets. if `encode_only`, returns None.
info(Dictionary): see `ErnieModel`
"""
tgt_labels = kwargs.pop('tgt_labels', None)
tgt_pos = kwargs.pop('tgt_pos', None)
encode_only = kwargs.pop('encode_only', False)
_, encoded, info = ErnieModel.forward(self, *args, **kwargs)
if encode_only:
return None, None, info
elif tgt_labels is None or tgt_pos is None:
encoded = self.mlm(encoded)
encoded = self.mlm_ln(encoded)
logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias
output_ids = L.argmax(logits, -1)
return output_ids, logits, info
else:
encoded_2d = L.gather_nd(encoded, tgt_pos)
encoded_2d = self.mlm(encoded_2d)
encoded_2d = self.mlm_ln(encoded_2d)
logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias
if len(tgt_labels.shape) == 1:
tgt_labels = L.reshape(tgt_labels, [-1, 1])
loss = L.reduce_mean(
L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))
)
return loss, logits_2d, info
../demo/seq2seq
\ No newline at end of file
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import sys
import logging
import numpy as np
import paddle.fluid as F
import paddle.fluid.layers as L
import paddle.fluid.dygraph as D
from ernie.modeling_ernie import ErnieModel
from ernie.modeling_ernie import _build_linear, _build_ln, append_name
class ErnieModelForGeneration(ErnieModel):
resource_map = {
'ernie-gen-base-en': ErnieModel.bce + 'model-ernie-gen-base-en.1.tar.gz',
'ernie-gen-large-en': ErnieModel.bce + 'model-ernie-gen-large-en.1.tar.gz',
'ernie-gen-large-160g-en': ErnieModel.bce + 'model-ernie-gen-large-160g-en.1.tar.gz',
'ernie-1.0': ErnieModel.bce + 'model-ernie1.0.1.tar.gz',
}
def __init__(self, cfg, name=None):
cfg['return_additional_info'] = True
cfg['has_pooler'] = False
super(ErnieModelForGeneration, self).__init__(cfg, name=name)
initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range'])
d_model = cfg['hidden_size']
d_vocab = cfg['vocab_size']
self.mlm = _build_linear(d_model, d_model, append_name(name, 'mask_lm_trans_fc'), initializer, act=cfg['hidden_act'])
self.mlm_ln = _build_ln(d_model, name = append_name(name, 'mask_lm_trans'))
self.mlm_bias = L.create_parameter(
dtype='float32',
shape=[d_vocab],
attr=F.ParamAttr(
name=append_name(name, 'mask_lm_out_fc.b_0'),
initializer=F.initializer.Constant(value=0.0)
),
is_bias=True,
)
def forward(self, src_ids, *args, **kwargs):
tgt_labels = kwargs.pop('tgt_labels', None)
tgt_pos = kwargs.pop('tgt_pos', None)
encode_only = kwargs.pop('encode_only', False)
_, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs)
#log.debug('hidden_-1 %r'% L.reduce_mean(info['hiddens'][0]).numpy())
#log.debug('hidden_0 %r'% L.reduce_mean(info['hiddens'][1]).numpy())
if encode_only:
return None, None, info
elif tgt_labels is None:
encoded = self.mlm(encoded)
encoded = self.mlm_ln(encoded)
logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias
output_ids = L.argmax(logits, -1)
return output_ids, logits, info
else:
encoded_2d = L.gather_nd(encoded, tgt_pos)
#log.debug('input shape %s' % repr(src_ids.shape))
#log.debug(L.gather_nd(src_ids, tgt_pos).numpy())
encoded_2d = self.mlm(encoded_2d)
encoded_2d = self.mlm_ln(encoded_2d)
logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias
if len(tgt_labels.shape) == 1:
tgt_labels = L.reshape(tgt_labels, [-1, 1])
loss = L.reduce_mean(
L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))
)
return loss, logits_2d, info
......@@ -3,3 +3,4 @@ pyzmq==18.0.2
six==1.11.0
sklearn==0.0
sentencepiece==0.1.8
LAC
......@@ -22,7 +22,7 @@ with open("README.md", "r", encoding='utf-8') as fh:
setuptools.setup(
name="paddle-ernie", # Replace with your own username
version="0.0.3dev1",
version="0.0.4dev1",
author="Baidu Ernie Team",
author_email="ernieernie.team@gmail.com",
description="A pretrained NLP model for every NLP tasks",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册