提交 51c28c20 编写于 作者: W wuzewu

Update dygraph code

上级 8793a586
#coding:utf-8
import argparse
import os
import numpy as np
import paddlehub as hub
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.optimizer import AdamOptimizer
from paddlehub.finetune.evaluate import chunk_eval, calculate_f1
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--log_interval", type=int, default=10, help="log interval.")
parser.add_argument("--save_interval", type=int, default=10, help="save interval.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt_dygraph", help="Path to save log data.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
# yapf: enable.
class TransformerSequenceLabelLayer(fluid.dygraph.Layer):
def __init__(self, num_classes, transformer):
super(TransformerSequenceLabelLayer, self).__init__()
self.num_classes = num_classes
self.transformer = transformer
self.fc = Linear(input_dim=768, output_dim=num_classes)
def forward(self, input_ids, position_ids, segment_ids, input_mask):
result = self.transformer(input_ids, position_ids, segment_ids,
input_mask)
pred = self.fc(result['sequence_output'])
ret_infers = fluid.layers.reshape(
x=fluid.layers.argmax(pred, axis=2), shape=[-1, 1])
pred = fluid.layers.reshape(pred, shape=[-1, self.num_classes])
return fluid.layers.softmax(pred), ret_infers
def finetune(args):
ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
with fluid.dygraph.guard():
dataset = hub.dataset.MSRA_NER()
ts = TransformerSequenceLabelLayer(
num_classes=dataset.num_labels, transformer=ernie)
adam = AdamOptimizer(learning_rate=1e-5, parameter_list=ts.parameters())
state_dict_path = os.path.join(args.checkpoint_dir,
'dygraph_state_dict')
if os.path.exists(state_dict_path + '.pdparams'):
state_dict, _ = fluid.load_dygraph(state_dict_path)
ts.load_dict(state_dict)
reader = hub.reader.SequenceLabelReader(
dataset=dataset,
vocab_path=ernie.get_vocab_path(),
max_seq_len=args.max_seq_len,
sp_model_path=ernie.get_spm_path(),
word_dict_path=ernie.get_word_dict_path())
train_reader = reader.data_generator(
batch_size=args.batch_size, phase='train')
loss_sum = total_infer = total_label = total_correct = cnt = 0
# 执行epoch_num次训练
for epoch in range(args.num_epoch):
# 读取训练数据进行训练
for batch_id, data in enumerate(train_reader()):
input_ids = np.array(data[0][0]).astype(np.int64)
position_ids = np.array(data[0][1]).astype(np.int64)
segment_ids = np.array(data[0][2]).astype(np.int64)
input_mask = np.array(data[0][3]).astype(np.float32)
labels = np.array(data[0][4]).astype(np.int64).reshape(-1, 1)
seq_len = np.squeeze(
np.array(data[0][5]).astype(np.int64), axis=1)
pred, ret_infers = ts(input_ids, position_ids, segment_ids,
input_mask)
loss = fluid.layers.cross_entropy(pred, to_variable(labels))
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
# 参数更新
adam.minimize(avg_loss)
loss_sum += avg_loss.numpy() * labels.shape[0]
label_num, infer_num, correct_num = chunk_eval(
labels, ret_infers.numpy(), seq_len, dataset.num_labels, 1)
cnt += labels.shape[0]
total_infer += infer_num
total_label += label_num
total_correct += correct_num
if batch_id % args.log_interval == 0:
precision, recall, f1 = calculate_f1(
total_label, total_infer, total_correct)
print('epoch {}: loss {}, f1 {} recall {} precision {}'.
format(epoch, loss_sum / cnt, f1, recall, precision))
loss_sum = total_infer = total_label = total_correct = cnt = 0
if batch_id % args.save_interval == 0:
state_dict = ts.state_dict()
fluid.save_dygraph(state_dict, state_dict_path)
if __name__ == "__main__":
args = parser.parse_args()
finetune(args)
......@@ -28,10 +28,10 @@ class TransformerClassifier(fluid.dygraph.Layer):
self.fc = Linear(input_dim=768, output_dim=num_classes)
def forward(self, input_ids, position_ids, segment_ids, input_mask):
pooled_output, sequence_output = self.transformer(
input_ids, position_ids, segment_ids, input_mask)
result = self.transformer(input_ids, position_ids, segment_ids,
input_mask)
cls_feats = fluid.layers.dropout(
pooled_output,
result['pooled_output'],
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
cls_feats = fluid.layers.reshape(cls_feats, shape=[-1, 768])
......@@ -40,14 +40,12 @@ class TransformerClassifier(fluid.dygraph.Layer):
def finetune(args):
ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
with fluid.dygraph.guard():
ernie = hub.Module(name="ernie")
dataset = hub.dataset.ChnSentiCorp()
tc = TransformerClassifier(
num_classes=dataset.num_labels, transformer=ernie)
adam = AdamOptimizer(
learning_rate=0.001, parameter_list=tc.parameters())
print(len(tc.parameters()))
adam = AdamOptimizer(learning_rate=1e-5, parameter_list=tc.parameters())
state_dict_path = os.path.join(args.checkpoint_dir,
'dygraph_state_dict')
if os.path.exists(state_dict_path + '.pdparams'):
......
......@@ -19,10 +19,11 @@ from __future__ import print_function
import copy
import paddle
import paddle.fluid as fluid
from paddlehub.module import module_desc_pb2
from paddlehub.common.utils import from_pyobj_to_module_attr, from_module_attr_to_pyobj
from paddlehub.common.utils import from_pyobj_to_module_attr, from_module_attr_to_pyobj, version_compare
from paddlehub.common.logger import logger
dtype_map = {
......@@ -62,7 +63,8 @@ def get_variable_info(var):
var_info['trainable'] = var.trainable
var_info['optimize_attr'] = var.optimize_attr
var_info['regularizer'] = var.regularizer
var_info['gradient_clip_attr'] = var.gradient_clip_attr
if not version_compare(paddle.__version__, '1.8'):
var_info['gradient_clip_attr'] = var.gradient_clip_attr
var_info['do_model_average'] = var.do_model_average
else:
var_info['persistable'] = var.persistable
......
......@@ -146,12 +146,6 @@ class Module(fluid.dygraph.Layer):
self._initialize(**kwargs)
self._is_initialize = True
self._code_version = "v2"
self.model_runner = fluid.dygraph.StaticModelRunner(
self.pretrained_model_path)
@property
def pretrained_model_path(self):
return self.default_pretrained_model_path
def _get_func_name(self, current_cls, module_func_dict):
mod = current_cls.__module__ + "." + current_cls.__name__
......@@ -256,7 +250,8 @@ class Module(fluid.dygraph.Layer):
pass
def forward(self, *args, **kwargs):
return self.model_runner(*args, **kwargs)
raise RuntimeError('{} does not support dynamic graph mode yet.'.format(
self.name))
class ModuleHelper(object):
......
......@@ -24,13 +24,15 @@ import os
import re
import six
import paddle
import numpy as np
import paddle.fluid as fluid
from paddlehub.common import paddle_helper
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
import paddlehub as hub
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.common import paddle_helper, tmp_dir
from paddlehub.common.logger import logger
from paddlehub.common.utils import sys_stdin_encoding
from paddlehub.common.utils import sys_stdin_encoding, version_compare
from paddlehub.io.parser import txt_parser
from paddlehub.module.module import runnable
......@@ -246,9 +248,44 @@ class TransformerModule(NLPBaseModule):
Tranformer Module base class can be used by BERT, ERNIE, RoBERTa and so on.
"""
@property
def pretrained_model_path(self):
return self.params_path
def __init__(self,
name=None,
directory=None,
module_dir=None,
version=None,
max_seq_len=128,
**kwargs):
if not directory:
return
super(TransformerModule, self).__init__(
name=name,
directory=directory,
module_dir=module_dir,
version=version,
**kwargs)
self.max_seq_len = max_seq_len
if version_compare(paddle.__version__, '1.8.0'):
with tmp_dir() as _dir:
input_dict, output_dict, program = self.context(
max_seq_len=max_seq_len)
fluid.io.save_inference_model(
dirname=_dir,
main_program=program,
feeded_var_names=[
input_dict['input_ids'].name,
input_dict['position_ids'].name,
input_dict['segment_ids'].name,
input_dict['input_mask'].name
],
target_vars=[
output_dict["pooled_output"],
output_dict["sequence_output"]
],
executor=fluid.Executor(fluid.CPUPlace()))
with fluid.dygraph.guard():
self.model_runner = fluid.dygraph.StaticModelRunner(_dir)
def init_pretraining_params(self, exe, pretraining_params_path,
main_program):
......@@ -275,7 +312,7 @@ class TransformerModule(NLPBaseModule):
def context(
self,
max_seq_len=128,
max_seq_len=None,
trainable=True,
):
"""
......@@ -291,6 +328,9 @@ class TransformerModule(NLPBaseModule):
"""
if not max_seq_len:
max_seq_len = self.max_seq_len
assert max_seq_len <= self.MAX_SEQ_LEN and max_seq_len >= 1, "max_seq_len({}) should be in the range of [1, {}]".format(
max_seq_len, self.MAX_SEQ_LEN)
......@@ -357,14 +397,6 @@ class TransformerModule(NLPBaseModule):
return inputs, outputs, module_program
# @property
# def model_runner(self):
# if not self._model_runner:
# self._model_runner = fluid.dygraph.StaticModelRunner(
# self.params_path)
# return self._model_runner
def get_embedding(self, texts, use_gpu=False, batch_size=1):
"""
get pooled_output and sequence_output for input texts.
......@@ -443,3 +475,16 @@ class TransformerModule(NLPBaseModule):
"The module context has not been initialized. "
"Please call context() before using get_params_layer")
return self.params_layer
def forward(self, input_ids, position_ids, segment_ids, input_mask):
if version_compare(paddle.__version__, '1.8.0'):
pooled_output, sequence_output = self.model_runner(
input_ids, position_ids, segment_ids, input_mask)
return {
'pooled_output': pooled_output,
'sequence_output': sequence_output
}
else:
raise RuntimeError(
'{} only support dynamic graph mode in paddle >= 1.8.0'.format(
self.name))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册