提交 386f2d13 编写于 作者: G guosheng

Make Transformer, seq2seq, sequence_tagging adapt to paddle.incubate.hapi

上级 b337063c
...@@ -19,7 +19,8 @@ from paddle.fluid.initializer import UniformInitializer ...@@ -19,7 +19,8 @@ from paddle.fluid.initializer import UniformInitializer
from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder from paddle.fluid.layers import BeamSearchDecoder
from paddle.incubate.hapi.model import Model, Loss from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
from seq2seq_base import Encoder from seq2seq_base import Encoder
......
...@@ -19,7 +19,8 @@ from paddle.fluid.initializer import UniformInitializer ...@@ -19,7 +19,8 @@ from paddle.fluid.initializer import UniformInitializer
from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder from paddle.fluid.layers import BeamSearchDecoder
from paddle.incubate.hapi.model import Model, Loss from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
......
...@@ -26,7 +26,7 @@ from args import parse_args ...@@ -26,7 +26,7 @@ from args import parse_args
from seq2seq_base import BaseModel, CrossEntropyCriterion from seq2seq_base import BaseModel, CrossEntropyCriterion
from seq2seq_attn import AttentionModel from seq2seq_attn import AttentionModel
from reader import create_data_loader from reader import create_data_loader
from utility import PPL, TrainCallback from utility import PPL, TrainCallback, get_model_cls
def do_train(args): def do_train(args):
...@@ -56,7 +56,8 @@ def do_train(args): ...@@ -56,7 +56,8 @@ def do_train(args):
# def dataloader # def dataloader
train_loader, eval_loader = create_data_loader(args, device) train_loader, eval_loader = create_data_loader(args, device)
model_maker = AttentionModel if args.attention else BaseModel model_maker = get_model_cls(
AttentionModel) if args.attention else get_model_cls(BaseModel)
model = model_maker(args.src_vocab_size, args.tar_vocab_size, model = model_maker(args.src_vocab_size, args.tar_vocab_size,
args.hidden_size, args.hidden_size, args.num_layers, args.hidden_size, args.hidden_size, args.num_layers,
args.dropout) args.dropout)
......
...@@ -18,6 +18,7 @@ import paddle.fluid as fluid ...@@ -18,6 +18,7 @@ import paddle.fluid as fluid
from paddle.incubate.hapi.metrics import Metric from paddle.incubate.hapi.metrics import Metric
from paddle.incubate.hapi.callbacks import ProgBarLogger from paddle.incubate.hapi.callbacks import ProgBarLogger
from paddle.incubate.hapi.text import BasicLSTMCell
class TrainCallback(ProgBarLogger): class TrainCallback(ProgBarLogger):
...@@ -78,3 +79,21 @@ class PPL(Metric): ...@@ -78,3 +79,21 @@ class PPL(Metric):
self.total_loss += batch_loss * batch_size self.total_loss += batch_loss * batch_size
ppl = math.exp(self.total_loss / self.word_count) ppl = math.exp(self.total_loss / self.word_count)
return ppl return ppl
def get_model_cls(model_cls):
"""
Patch for BasicLSTMCell to make `_forget_bias.stop_gradient=True`
Remove this workaround when BasicLSTMCell or recurrent_op is fixed.
"""
def __lstm_patch__(self, *args, **kwargs):
self._raw_init(*args, **kwargs)
layers = self.sublayers(include_sublayers=True)
for layer in layers:
if isinstance(layer, BasicLSTMCell):
layer._forget_bias.stop_gradient = False
model_cls._raw_init = model_cls.__init__
model_cls.__init__ = __lstm_patch__
return model_cls
...@@ -18,24 +18,14 @@ SequenceTagging eval structure ...@@ -18,24 +18,14 @@ SequenceTagging eval structure
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import io
import os
import sys
import math
import argparse
import numpy as np
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from paddle.incubate.hapi.model import set_device, Input
from paddle.incubate.hapi.text.sequence_tagging import SeqTagging, ChunkEval, LacLoss
from paddle.incubate.hapi.text.sequence_tagging import LacDataset, LacDataLoader
from paddle.incubate.hapi.text.sequence_tagging import check_gpu, check_version
from paddle.incubate.hapi.text.sequence_tagging import PDConfig
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.incubate.hapi.model import Input, set_device
from sequence_tagging import SeqTagging, LacLoss, ChunkEval
from reader import LacDataset, LacDataLoader
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
def main(args): def main(args):
...@@ -79,5 +69,6 @@ if __name__ == '__main__': ...@@ -79,5 +69,6 @@ if __name__ == '__main__':
use_gpu = True if args.device == "gpu" else False use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu) check_gpu(use_gpu)
check_version() # TODO: add check for 2.0.0-alpha0 if fluid.require_version support
# check_version()
main(args) main(args)
...@@ -18,25 +18,16 @@ SequenceTagging predict structure ...@@ -18,25 +18,16 @@ SequenceTagging predict structure
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import io
import os
import sys
import six import six
import math
import argparse
import numpy as np
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from paddle.incubate.hapi.text.sequence_tagging import SeqTagging
from paddle.incubate.hapi.model import Input, set_device
from paddle.incubate.hapi.text.sequence_tagging import LacDataset, LacDataLoader
from paddle.incubate.hapi.text.sequence_tagging import check_gpu, check_version
from paddle.incubate.hapi.text.sequence_tagging import PDConfig
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.incubate.hapi.model import Input, set_device
from sequence_tagging import SeqTagging, LacLoss, ChunkEval
from reader import LacDataset, LacDataLoader
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
def main(args): def main(args):
...@@ -45,8 +36,9 @@ def main(args): ...@@ -45,8 +36,9 @@ def main(args):
inputs = [ inputs = [
Input( Input(
[None, None], 'int64', name='words'), Input( [None, None], 'int64', name='words'),
[None], 'int64', name='length') Input(
[None], 'int64', name='length'),
] ]
dataset = LacDataset(args) dataset = LacDataset(args)
...@@ -87,5 +79,6 @@ if __name__ == '__main__': ...@@ -87,5 +79,6 @@ if __name__ == '__main__':
use_gpu = True if args.device == "gpu" else False use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu) check_gpu(use_gpu)
check_version() # TODO: add check for 2.0.0-alpha0 if fluid.require_version support
# check_version()
main(args) main(args)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging dataset
"""
from __future__ import division
from __future__ import print_function
import io
import os
import numpy as np
import shutil
from functools import partial
import paddle
from paddle.io import BatchSampler, DataLoader, Dataset
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.incubate.hapi.distributed import DistributedBatchSampler
class LacDataset(Dataset):
"""
Load lexical analysis dataset
"""
def __init__(self, args):
self.word_dict_path = args.word_dict_path
self.label_dict_path = args.label_dict_path
self.word_rep_dict_path = args.word_rep_dict_path
self._load_dict()
self.examples = []
def _load_dict(self):
self.word2id_dict = self.load_kv_dict(
self.word_dict_path, reverse=True, value_func=np.int64)
self.id2word_dict = self.load_kv_dict(self.word_dict_path)
self.label2id_dict = self.load_kv_dict(
self.label_dict_path, reverse=True, value_func=np.int64)
self.id2label_dict = self.load_kv_dict(self.label_dict_path)
if self.word_rep_dict_path is None:
self.word_replace_dict = dict()
else:
self.word_replace_dict = self.load_kv_dict(self.word_rep_dict_path)
def load_kv_dict(self,
dict_path,
reverse=False,
delimiter="\t",
key_func=None,
value_func=None):
"""
Load key-value dict from file
"""
result_dict = {}
for line in io.open(dict_path, "r", encoding='utf8'):
terms = line.strip("\n").split(delimiter)
if len(terms) != 2:
continue
if reverse:
value, key = terms
else:
key, value = terms
if key in result_dict:
raise KeyError("key duplicated with [%s]" % (key))
if key_func:
key = key_func(key)
if value_func:
value = value_func(value)
result_dict[key] = value
return result_dict
@property
def vocab_size(self):
return max(self.word2id_dict.values()) + 1
@property
def num_labels(self):
return max(self.label2id_dict.values()) + 1
def get_num_examples(self, filename):
"""num of line of file"""
return sum(1 for line in io.open(filename, "r", encoding='utf8'))
def word_to_ids(self, words):
"""convert word to word index"""
word_ids = []
for word in words:
word = self.word_replace_dict.get(word, word)
if word not in self.word2id_dict:
word = "OOV"
word_id = self.word2id_dict[word]
word_ids.append(word_id)
return word_ids
def label_to_ids(self, labels):
"""convert label to label index"""
label_ids = []
for label in labels:
if label not in self.label2id_dict:
label = "O"
label_id = self.label2id_dict[label]
label_ids.append(label_id)
return label_ids
def file_reader(self, filename, phase="train"):
"""
yield (word_idx, target_idx) one by one from file,
or yield (word_idx, ) in `infer` mode
"""
self.phase = phase
with io.open(filename, "r", encoding="utf8") as fr:
if phase in ["train", "test"]:
headline = next(fr)
headline = headline.strip().split('\t')
assert len(headline) == 2 and headline[
0] == "text_a" and headline[1] == "label"
for line in fr:
line_str = line.strip("\n")
if len(line_str) < 1 and len(line_str.split('\t')) < 2:
continue
self.examples.append(line_str)
else:
for idx, line in enumerate(fr):
words = line.strip("\n").split("\t")[0]
self.examples.append(words)
def __getitem__(self, idx):
line_str = self.examples[idx]
if self.phase in ["train", "test"]:
words, labels = line_str.split('\t')
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
return word_ids, label_ids
else:
words = [w for w in line_str]
word_ids = self.word_to_ids(words)
return word_ids
def __len__(self):
return len(self.examples)
def create_lexnet_data_generator(args, insts, phase="train"):
def padding_data(max_len, batch_data, if_len=False):
padding_batch_data = []
padding_lens = []
for data in batch_data:
data = data[:max_len]
if if_len:
seq_len = np.int64(len(data))
padding_lens.append(seq_len)
data += [0 for _ in range(max_len - len(data))]
padding_batch_data.append(data)
if if_len:
return np.array(padding_batch_data), np.array(padding_lens)
else:
return np.array(padding_batch_data)
if phase == "train":
batch_words = [inst[0] for inst in insts]
batch_labels = [inst[1] for inst in insts]
padding_batch_words, padding_lens = padding_data(
args.max_seq_len, batch_words, if_len=True)
padding_batch_labels = padding_data(args.max_seq_len, batch_labels)
return [
padding_batch_words, padding_lens, padding_batch_labels,
padding_batch_labels
]
elif phase == "test":
batch_words = [inst[0] for inst in insts]
seq_len = [len(inst[0]) for inst in insts]
max_seq_len = max(seq_len)
batch_labels = [inst[1] for inst in insts]
padding_batch_words, padding_lens = padding_data(
max_seq_len, batch_words, if_len=True)
padding_batch_labels = padding_data(max_seq_len, batch_labels)
return [
padding_batch_words, padding_lens, padding_batch_labels,
padding_batch_labels
]
else:
batch_words = insts
seq_len = [len(inst) for inst in insts]
max_seq_len = max(seq_len)
padding_batch_words, padding_lens = padding_data(
max_seq_len, batch_words, if_len=True)
return [padding_batch_words, padding_lens]
class LacDataLoader(object):
def __init__(self,
args,
place,
phase="train",
shuffle=False,
num_workers=0,
drop_last=False):
assert phase in [
"train", "test", "predict"
], "phase should be in [train, test, predict], but get %s" % phase
if phase == "train":
file_name = args.train_file
elif phase == "test":
file_name = args.test_file
elif phase == "predict":
file_name = args.predict_file
self.dataset = LacDataset(args)
self.dataset.file_reader(file_name, phase=phase)
if phase == "train":
self.sampler = DistributedBatchSampler(
dataset=self.dataset,
batch_size=args.batch_size,
shuffle=shuffle,
drop_last=drop_last)
else:
self.sampler = BatchSampler(
dataset=self.dataset,
batch_size=args.batch_size,
shuffle=shuffle,
drop_last=drop_last)
self.dataloader = DataLoader(
dataset=self.dataset,
batch_sampler=self.sampler,
places=place,
collate_fn=partial(
create_lexnet_data_generator, args, phase=phase),
num_workers=num_workers,
return_list=True)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging network structure
"""
from __future__ import division
from __future__ import print_function
import io
import os
import sys
import math
import argparse
import numpy as np
import paddle.fluid as fluid
from paddle.incubate.hapi.metrics import Metric
from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import SequenceTagging
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
class SeqTagging(Model):
def __init__(self, args, vocab_size, num_labels, length=None,
mode="train"):
super(SeqTagging, self).__init__()
"""
define the lexical analysis network structure
word: stores the input of the model
for_infer: a boolean value, indicating if the model to be created is for training or predicting.
return:
for infer: return the prediction
otherwise: return the prediction
"""
self.mode_type = mode
self.word_emb_dim = args.word_emb_dim
self.vocab_size = vocab_size
self.num_labels = num_labels
self.grnn_hidden_dim = args.grnn_hidden_dim
self.emb_lr = args.emb_learning_rate if 'emb_learning_rate' in dir(
args) else 1.0
self.crf_lr = args.emb_learning_rate if 'crf_learning_rate' in dir(
args) else 1.0
self.bigru_num = args.bigru_num
self.batch_size = args.batch_size
self.init_bound = 0.1
self.length = length
self.sequence_tagging = SequenceTagging(
vocab_size=self.vocab_size,
num_labels=self.num_labels,
word_emb_dim=self.word_emb_dim,
grnn_hidden_dim=self.grnn_hidden_dim,
emb_learning_rate=self.emb_lr,
crf_learning_rate=self.crf_lr,
bigru_num=self.bigru_num,
init_bound=self.init_bound)
def forward(self, *inputs):
"""
Configure the network
"""
word = inputs[0]
lengths = inputs[1]
if self.mode_type == "train" or self.mode_type == "test":
target = inputs[2]
outputs = self.sequence_tagging(word, lengths, target)
else:
outputs = self.sequence_tagging(word, lengths)
return outputs
class Chunk_eval(fluid.dygraph.Layer):
def __init__(self,
num_chunk_types,
chunk_scheme,
excluded_chunk_types=None):
super(Chunk_eval, self).__init__()
self.num_chunk_types = num_chunk_types
self.chunk_scheme = chunk_scheme
self.excluded_chunk_types = excluded_chunk_types
def forward(self, input, label, seq_length=None):
precision = self._helper.create_variable_for_type_inference(
dtype="float32")
recall = self._helper.create_variable_for_type_inference(
dtype="float32")
f1_score = self._helper.create_variable_for_type_inference(
dtype="float32")
num_infer_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
num_label_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
num_correct_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
this_input = {"Inference": input, "Label": label}
if seq_length is not None:
this_input["SeqLength"] = seq_length
self._helper.append_op(
type='chunk_eval',
inputs=this_input,
outputs={
"Precision": [precision],
"Recall": [recall],
"F1-Score": [f1_score],
"NumInferChunks": [num_infer_chunks],
"NumLabelChunks": [num_label_chunks],
"NumCorrectChunks": [num_correct_chunks]
},
attrs={
"num_chunk_types": self.num_chunk_types,
"chunk_scheme": self.chunk_scheme,
"excluded_chunk_types": self.excluded_chunk_types or []
})
return (num_infer_chunks, num_label_chunks, num_correct_chunks)
class LacLoss(Loss):
def __init__(self):
super(LacLoss, self).__init__()
pass
def forward(self, outputs, labels):
avg_cost = outputs[1]
return avg_cost
class ChunkEval(Metric):
def __init__(self, num_labels, name=None, *args, **kwargs):
super(ChunkEval, self).__init__(*args, **kwargs)
self._init_name(name)
self.chunk_eval = Chunk_eval(
int(math.ceil((num_labels - 1) / 2.0)), "IOB")
self.reset()
def add_metric_op(self, *args):
crf_decode = args[0]
lengths = args[2]
label = args[3]
(num_infer_chunks, num_label_chunks,
num_correct_chunks) = self.chunk_eval(
input=crf_decode, label=label, seq_length=lengths)
return [num_infer_chunks, num_label_chunks, num_correct_chunks]
def update(self, num_infer_chunks, num_label_chunks, num_correct_chunks,
*args, **kwargs):
self.infer_chunks_total += num_infer_chunks
self.label_chunks_total += num_label_chunks
self.correct_chunks_total += num_correct_chunks
precision = float(
num_correct_chunks) / num_infer_chunks if num_infer_chunks else 0
recall = float(
num_correct_chunks) / num_label_chunks if num_label_chunks else 0
f1_score = float(2 * precision * recall) / (
precision + recall) if num_correct_chunks else 0
return [precision, recall, f1_score]
def reset(self):
self.infer_chunks_total = 0
self.label_chunks_total = 0
self.correct_chunks_total = 0
def accumulate(self):
precision = float(
self.correct_chunks_total
) / self.infer_chunks_total if self.infer_chunks_total else 0
recall = float(
self.correct_chunks_total
) / self.label_chunks_total if self.label_chunks_total else 0
f1_score = float(2 * precision * recall) / (
precision + recall) if self.correct_chunks_total else 0
res = [precision, recall, f1_score]
return res
def _init_name(self, name):
name = name or 'chunk eval'
self._name = ['precision', 'recall', 'F1']
def name(self):
return self._name
...@@ -18,24 +18,14 @@ SequenceTagging network structure ...@@ -18,24 +18,14 @@ SequenceTagging network structure
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import io
import os
import sys
import math
import argparse
import numpy as np
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from paddle.incubate.hapi.model import Input, set_device
from paddle.incubate.hapi.text.sequence_tagging import SeqTagging, LacLoss, ChunkEval
from paddle.incubate.hapi.text.sequence_tagging import LacDataset, LacDataLoader
from paddle.incubate.hapi.text.sequence_tagging import check_gpu, check_version
from paddle.incubate.hapi.text.sequence_tagging import PDConfig
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.optimizer import AdamOptimizer
from paddle.incubate.hapi.model import Input, set_device
from sequence_tagging import SeqTagging, LacLoss, ChunkEval
from reader import LacDataset, LacDataLoader
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
def main(args): def main(args):
...@@ -44,17 +34,15 @@ def main(args): ...@@ -44,17 +34,15 @@ def main(args):
inputs = [ inputs = [
Input( Input(
[None, None], 'int64', name='words'), Input( [None, None], 'int64', name='words'),
[None], 'int64', name='length'), Input( Input(
[None, None], 'int64', name='target') [None], 'int64', name='length'),
Input(
[None, None], 'int64', name='target'),
] ]
labels = [Input([None, None], 'int64', name='labels')] labels = [Input([None, None], 'int64', name='labels')]
feed_list = None if args.dynamic else [
x.forward() for x in inputs + labels
]
dataset = LacDataset(args) dataset = LacDataset(args)
train_dataset = LacDataLoader(args, place, phase="train") train_dataset = LacDataLoader(args, place, phase="train")
...@@ -95,6 +83,7 @@ if __name__ == '__main__': ...@@ -95,6 +83,7 @@ if __name__ == '__main__':
use_gpu = True if args.device == "gpu" else False use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu) check_gpu(use_gpu)
check_version() # TODO: add check for 2.0.0-alpha0 if fluid.require_version support
# check_version()
main(args) main(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import paddle.fluid as fluid
__all__ = ['check_gpu', 'check_version']
def check_gpu(use_gpu):
"""
Log error and exit when set use_gpu=true in paddlepaddle
cpu version.
"""
err = "Config use_gpu cannot be set as true while you are " \
"using paddlepaddle cpu version ! \nPlease try: \n" \
"\t1. Install paddlepaddle-gpu to run model on GPU \n" \
"\t2. Set use_gpu as false in config file to run " \
"model on CPU"
try:
if use_gpu and not fluid.is_compiled_with_cuda():
print(err)
sys.exit(1)
except Exception as e:
pass
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 2.0 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('2.0.0')
except Exception as e:
print(err)
sys.exit(1)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import argparse
import json
import yaml
import six
import logging
logging_only_message = "%(message)s"
logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s"
class JsonConfig(object):
"""
A high-level api for handling json configure file.
"""
def __init__(self, config_path):
self._config_dict = self._parse(config_path)
def _parse(self, config_path):
try:
with open(config_path) as json_file:
config_dict = json.load(json_file)
except:
raise IOError("Error in parsing bert model config file '%s'" %
config_path)
else:
return config_dict
def __getitem__(self, key):
return self._config_dict[key]
def print_config(self):
for arg, value in sorted(six.iteritems(self._config_dict)):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
class ArgumentGroup(object):
def __init__(self, parser, title, des):
self._group = parser.add_argument_group(title=title, description=des)
def add_arg(self, name, type, default, help, **kwargs):
type = str2bool if type == bool else type
self._group.add_argument(
"--" + name,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
class ArgConfig(object):
"""
A high-level api for handling argument configs.
"""
def __init__(self):
parser = argparse.ArgumentParser()
train_g = ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
train_g.add_arg("learning_rate", float, 5e-5,
"Learning rate used to train with warmup.")
train_g.add_arg(
"lr_scheduler",
str,
"linear_warmup_decay",
"scheduler of learning rate.",
choices=['linear_warmup_decay', 'noam_decay'])
train_g.add_arg("weight_decay", float, 0.01,
"Weight decay rate for L2 regularizer.")
train_g.add_arg(
"warmup_proportion", float, 0.1,
"Proportion of training steps to perform linear learning rate warmup for."
)
train_g.add_arg("save_steps", int, 1000,
"The steps interval to save checkpoints.")
train_g.add_arg("use_fp16", bool, False,
"Whether to use fp16 mixed precision training.")
train_g.add_arg(
"loss_scaling", float, 1.0,
"Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled."
)
train_g.add_arg("pred_dir", str, None,
"Path to save the prediction results")
log_g = ArgumentGroup(parser, "logging", "logging related.")
log_g.add_arg("skip_steps", int, 10,
"The steps interval to print loss.")
log_g.add_arg("verbose", bool, False, "Whether to output verbose log.")
run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, True,
"If set, use GPU for training.")
run_type_g.add_arg(
"use_fast_executor", bool, False,
"If set, use fast parallel executor (in experiment).")
run_type_g.add_arg(
"num_iteration_per_drop_scope", int, 1,
"Ihe iteration intervals to clean up temporary variables.")
run_type_g.add_arg("do_train", bool, True,
"Whether to perform training.")
run_type_g.add_arg("do_predict", bool, True,
"Whether to perform prediction.")
custom_g = ArgumentGroup(parser, "customize", "customized options.")
self.custom_g = custom_g
self.parser = parser
def add_arg(self, name, dtype, default, descrip):
self.custom_g.add_arg(name, dtype, default, descrip)
def build_conf(self):
return self.parser.parse_args()
def str2bool(v):
# because argparse does not support to parse "true, False" as python
# boolean directly
return v.lower() in ("true", "t", "1")
def print_arguments(args, log=None):
if not log:
print('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
else:
log.info('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
log.info('%s: %s' % (arg, value))
log.info('------------------------------------------------')
class PDConfig(object):
"""
A high-level API for managing configuration files in PaddlePaddle.
Can jointly work with command-line-arugment, json files and yaml files.
"""
def __init__(self, json_file="", yaml_file="", fuse_args=True):
"""
Init funciton for PDConfig.
json_file: the path to the json configure file.
yaml_file: the path to the yaml configure file.
fuse_args: if fuse the json/yaml configs with argparse.
"""
assert isinstance(json_file, str)
assert isinstance(yaml_file, str)
if json_file != "" and yaml_file != "":
raise Warning(
"json_file and yaml_file can not co-exist for now. please only use one configure file type."
)
return
self.args = None
self.arg_config = {}
self.json_config = {}
self.yaml_config = {}
parser = argparse.ArgumentParser()
self.default_g = ArgumentGroup(parser, "default", "default options.")
self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
self.json_g = ArgumentGroup(parser, "json", "options from json.")
self.com_g = ArgumentGroup(parser, "custom", "customized options.")
self.default_g.add_arg("do_train", bool, False,
"Whether to perform training.")
self.default_g.add_arg("do_predict", bool, False,
"Whether to perform predicting.")
self.default_g.add_arg("do_eval", bool, False,
"Whether to perform evaluating.")
self.default_g.add_arg(
"do_save_inference_model", bool, False,
"Whether to perform model saving for inference.")
# NOTE: args for profiler
self.default_g.add_arg(
"is_profiler", int, 0,
"the switch of profiler tools. (used for benchmark)")
self.default_g.add_arg(
"profiler_path", str, './',
"the profiler output file path. (used for benchmark)")
self.default_g.add_arg("max_iter", int, 0,
"the max train batch num.(used for benchmark)")
self.parser = parser
if json_file != "":
self.load_json(json_file, fuse_args=fuse_args)
if yaml_file:
self.load_yaml(yaml_file, fuse_args=fuse_args)
def load_json(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the json file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.json_config = json.loads(fin.read())
fin.close()
if fuse_args:
for name in self.json_config:
if isinstance(self.json_config[name], list):
self.json_g.add_arg(
name,
type(self.json_config[name][0]),
self.json_config[name],
"This is from %s" % file_path,
nargs=len(self.json_config[name]))
continue
if not isinstance(self.json_config[name], int) \
and not isinstance(self.json_config[name], float) \
and not isinstance(self.json_config[name], str) \
and not isinstance(self.json_config[name], bool):
continue
self.json_g.add_arg(name,
type(self.json_config[name]),
self.json_config[name],
"This is from %s" % file_path)
def load_yaml(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the yaml file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
fin.close()
if fuse_args:
for name in self.yaml_config:
if isinstance(self.yaml_config[name], list):
self.yaml_g.add_arg(
name,
type(self.yaml_config[name][0]),
self.yaml_config[name],
"This is from %s" % file_path,
nargs=len(self.yaml_config[name]))
continue
if not isinstance(self.yaml_config[name], int) \
and not isinstance(self.yaml_config[name], float) \
and not isinstance(self.yaml_config[name], str) \
and not isinstance(self.yaml_config[name], bool):
continue
self.yaml_g.add_arg(name,
type(self.yaml_config[name]),
self.yaml_config[name],
"This is from %s" % file_path)
def build(self):
self.args = self.parser.parse_args()
self.arg_config = vars(self.args)
def __add__(self, new_arg):
assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
assert len(new_arg) >= 3
assert self.args is None
name = new_arg[0]
dtype = new_arg[1]
dvalue = new_arg[2]
desc = new_arg[3] if len(
new_arg) == 4 else "Description is not provided."
self.com_g.add_arg(name, dtype, dvalue, desc)
return self
def __getattr__(self, name):
if name in self.arg_config:
return self.arg_config[name]
if name in self.json_config:
return self.json_config[name]
if name in self.yaml_config:
return self.yaml_config[name]
raise Warning("The argument %s is not defined." % name)
def Print(self):
print("-" * 70)
for name in self.arg_config:
print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name])))
for name in self.json_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.json_config[name])))
for name in self.yaml_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.yaml_config[name])))
print("-" * 70)
if __name__ == "__main__":
"""
pd_config = PDConfig(json_file = "./test/bert_config.json")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
pd_config = PDConfig(yaml_file = "./test/bert_config.yaml")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
"""
pd_config = PDConfig(yaml_file="./test/bert_config.yaml")
pd_config += ("my_age", int, 18, "I am forever 18.")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
print(pd_config.my_age)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import paddle.fluid as fluid
__all__ = ['chunk_count', "build_chunk"]
def build_chunk(data_list, id2label_dict):
"""
Assembly entity
"""
tag_list = [id2label_dict.get(str(id)) for id in data_list]
ner_dict = {}
ner_str = ""
ner_start = 0
for i in range(len(tag_list)):
tag = tag_list[i]
if tag == u"O":
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag
elif tag.endswith(u"B"):
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag.split('-')[0]
elif tag.endswith(u"I"):
if tag.split('-')[0] != ner_str:
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag.split('-')[0]
return ner_dict
def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
"""
calculate num_correct_chunks num_error_chunks total_num for metrics
"""
num_infer_chunks, num_label_chunks, num_correct_chunks = 0, 0, 0
assert infer_numpy.shape[0] == label_numpy.shape[0]
for i in range(infer_numpy.shape[0]):
infer_list = infer_numpy[i][:seq_len[i]]
label_list = label_numpy[i][:seq_len[i]]
infer_dict = build_chunk(infer_list, id2label_dict)
num_infer_chunks += len(infer_dict)
label_dict = build_chunk(label_list, id2label_dict)
num_label_chunks += len(label_dict)
for key in infer_dict:
if key in label_dict and label_dict[key] == infer_dict[key]:
num_correct_chunks += 1
return num_infer_chunks, num_label_chunks, num_correct_chunks
...@@ -18,9 +18,9 @@ import numpy as np ...@@ -18,9 +18,9 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer, to_variable from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.model import Model, CrossEntropy, Loss from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import TransformerBeamSearchDecoder, DynamicDecode from paddle.incubate.hapi.text import TransformerBeamSearchDecoder, DynamicDecode
...@@ -43,31 +43,6 @@ def position_encoding_init(n_position, d_pos_vec): ...@@ -43,31 +43,6 @@ def position_encoding_init(n_position, d_pos_vec):
return position_enc.astype("float32") return position_enc.astype("float32")
class NoamDecay(LearningRateDecay):
"""
learning rate scheduler
"""
def __init__(self,
d_model,
warmup_steps,
static_lr=2.0,
begin=1,
step=1,
dtype='float32'):
super(NoamDecay, self).__init__(begin, step, dtype)
self.d_model = d_model
self.warmup_steps = warmup_steps
self.static_lr = static_lr
def step(self):
a = self.create_lr_var(self.step_num**-0.5)
b = self.create_lr_var((self.warmup_steps**-1.5) * self.step_num)
lr_value = (self.d_model**-0.5) * layers.elementwise_min(
a, b) * self.static_lr
return lr_value
class PrePostProcessLayer(Layer): class PrePostProcessLayer(Layer):
""" """
PrePostProcessLayer PrePostProcessLayer
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册