提交 51c28c20 编写于 作者: W wuzewu

Update dygraph code

上级 8793a586
#coding:utf-8
import argparse
import os
import numpy as np
import paddlehub as hub
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.optimizer import AdamOptimizer
from paddlehub.finetune.evaluate import chunk_eval, calculate_f1
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--log_interval", type=int, default=10, help="log interval.")
parser.add_argument("--save_interval", type=int, default=10, help="save interval.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt_dygraph", help="Path to save log data.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
# yapf: enable.
class TransformerSequenceLabelLayer(fluid.dygraph.Layer):
def __init__(self, num_classes, transformer):
super(TransformerSequenceLabelLayer, self).__init__()
self.num_classes = num_classes
self.transformer = transformer
self.fc = Linear(input_dim=768, output_dim=num_classes)
def forward(self, input_ids, position_ids, segment_ids, input_mask):
result = self.transformer(input_ids, position_ids, segment_ids,
input_mask)
pred = self.fc(result['sequence_output'])
ret_infers = fluid.layers.reshape(
x=fluid.layers.argmax(pred, axis=2), shape=[-1, 1])
pred = fluid.layers.reshape(pred, shape=[-1, self.num_classes])
return fluid.layers.softmax(pred), ret_infers
def finetune(args):
ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
with fluid.dygraph.guard():
dataset = hub.dataset.MSRA_NER()
ts = TransformerSequenceLabelLayer(
num_classes=dataset.num_labels, transformer=ernie)
adam = AdamOptimizer(learning_rate=1e-5, parameter_list=ts.parameters())
state_dict_path = os.path.join(args.checkpoint_dir,
'dygraph_state_dict')
if os.path.exists(state_dict_path + '.pdparams'):
state_dict, _ = fluid.load_dygraph(state_dict_path)
ts.load_dict(state_dict)
reader = hub.reader.SequenceLabelReader(
dataset=dataset,
vocab_path=ernie.get_vocab_path(),
max_seq_len=args.max_seq_len,
sp_model_path=ernie.get_spm_path(),
word_dict_path=ernie.get_word_dict_path())
train_reader = reader.data_generator(
batch_size=args.batch_size, phase='train')
loss_sum = total_infer = total_label = total_correct = cnt = 0
# 执行epoch_num次训练
for epoch in range(args.num_epoch):
# 读取训练数据进行训练
for batch_id, data in enumerate(train_reader()):
input_ids = np.array(data[0][0]).astype(np.int64)
position_ids = np.array(data[0][1]).astype(np.int64)
segment_ids = np.array(data[0][2]).astype(np.int64)
input_mask = np.array(data[0][3]).astype(np.float32)
labels = np.array(data[0][4]).astype(np.int64).reshape(-1, 1)
seq_len = np.squeeze(
np.array(data[0][5]).astype(np.int64), axis=1)
pred, ret_infers = ts(input_ids, position_ids, segment_ids,
input_mask)
loss = fluid.layers.cross_entropy(pred, to_variable(labels))
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
# 参数更新
adam.minimize(avg_loss)
loss_sum += avg_loss.numpy() * labels.shape[0]
label_num, infer_num, correct_num = chunk_eval(
labels, ret_infers.numpy(), seq_len, dataset.num_labels, 1)
cnt += labels.shape[0]
total_infer += infer_num
total_label += label_num
total_correct += correct_num
if batch_id % args.log_interval == 0:
precision, recall, f1 = calculate_f1(
total_label, total_infer, total_correct)
print('epoch {}: loss {}, f1 {} recall {} precision {}'.
format(epoch, loss_sum / cnt, f1, recall, precision))
loss_sum = total_infer = total_label = total_correct = cnt = 0
if batch_id % args.save_interval == 0:
state_dict = ts.state_dict()
fluid.save_dygraph(state_dict, state_dict_path)
if __name__ == "__main__":
args = parser.parse_args()
finetune(args)
...@@ -28,10 +28,10 @@ class TransformerClassifier(fluid.dygraph.Layer): ...@@ -28,10 +28,10 @@ class TransformerClassifier(fluid.dygraph.Layer):
self.fc = Linear(input_dim=768, output_dim=num_classes) self.fc = Linear(input_dim=768, output_dim=num_classes)
def forward(self, input_ids, position_ids, segment_ids, input_mask): def forward(self, input_ids, position_ids, segment_ids, input_mask):
pooled_output, sequence_output = self.transformer( result = self.transformer(input_ids, position_ids, segment_ids,
input_ids, position_ids, segment_ids, input_mask) input_mask)
cls_feats = fluid.layers.dropout( cls_feats = fluid.layers.dropout(
pooled_output, result['pooled_output'],
dropout_prob=0.1, dropout_prob=0.1,
dropout_implementation="upscale_in_train") dropout_implementation="upscale_in_train")
cls_feats = fluid.layers.reshape(cls_feats, shape=[-1, 768]) cls_feats = fluid.layers.reshape(cls_feats, shape=[-1, 768])
...@@ -40,14 +40,12 @@ class TransformerClassifier(fluid.dygraph.Layer): ...@@ -40,14 +40,12 @@ class TransformerClassifier(fluid.dygraph.Layer):
def finetune(args): def finetune(args):
ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
ernie = hub.Module(name="ernie")
dataset = hub.dataset.ChnSentiCorp() dataset = hub.dataset.ChnSentiCorp()
tc = TransformerClassifier( tc = TransformerClassifier(
num_classes=dataset.num_labels, transformer=ernie) num_classes=dataset.num_labels, transformer=ernie)
adam = AdamOptimizer( adam = AdamOptimizer(learning_rate=1e-5, parameter_list=tc.parameters())
learning_rate=0.001, parameter_list=tc.parameters())
print(len(tc.parameters()))
state_dict_path = os.path.join(args.checkpoint_dir, state_dict_path = os.path.join(args.checkpoint_dir,
'dygraph_state_dict') 'dygraph_state_dict')
if os.path.exists(state_dict_path + '.pdparams'): if os.path.exists(state_dict_path + '.pdparams'):
......
...@@ -19,10 +19,11 @@ from __future__ import print_function ...@@ -19,10 +19,11 @@ from __future__ import print_function
import copy import copy
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlehub.module import module_desc_pb2 from paddlehub.module import module_desc_pb2
from paddlehub.common.utils import from_pyobj_to_module_attr, from_module_attr_to_pyobj from paddlehub.common.utils import from_pyobj_to_module_attr, from_module_attr_to_pyobj, version_compare
from paddlehub.common.logger import logger from paddlehub.common.logger import logger
dtype_map = { dtype_map = {
...@@ -62,7 +63,8 @@ def get_variable_info(var): ...@@ -62,7 +63,8 @@ def get_variable_info(var):
var_info['trainable'] = var.trainable var_info['trainable'] = var.trainable
var_info['optimize_attr'] = var.optimize_attr var_info['optimize_attr'] = var.optimize_attr
var_info['regularizer'] = var.regularizer var_info['regularizer'] = var.regularizer
var_info['gradient_clip_attr'] = var.gradient_clip_attr if not version_compare(paddle.__version__, '1.8'):
var_info['gradient_clip_attr'] = var.gradient_clip_attr
var_info['do_model_average'] = var.do_model_average var_info['do_model_average'] = var.do_model_average
else: else:
var_info['persistable'] = var.persistable var_info['persistable'] = var.persistable
......
...@@ -146,12 +146,6 @@ class Module(fluid.dygraph.Layer): ...@@ -146,12 +146,6 @@ class Module(fluid.dygraph.Layer):
self._initialize(**kwargs) self._initialize(**kwargs)
self._is_initialize = True self._is_initialize = True
self._code_version = "v2" self._code_version = "v2"
self.model_runner = fluid.dygraph.StaticModelRunner(
self.pretrained_model_path)
@property
def pretrained_model_path(self):
return self.default_pretrained_model_path
def _get_func_name(self, current_cls, module_func_dict): def _get_func_name(self, current_cls, module_func_dict):
mod = current_cls.__module__ + "." + current_cls.__name__ mod = current_cls.__module__ + "." + current_cls.__name__
...@@ -256,7 +250,8 @@ class Module(fluid.dygraph.Layer): ...@@ -256,7 +250,8 @@ class Module(fluid.dygraph.Layer):
pass pass
def forward(self, *args, **kwargs): def forward(self, *args, **kwargs):
return self.model_runner(*args, **kwargs) raise RuntimeError('{} does not support dynamic graph mode yet.'.format(
self.name))
class ModuleHelper(object): class ModuleHelper(object):
......
...@@ -24,13 +24,15 @@ import os ...@@ -24,13 +24,15 @@ import os
import re import re
import six import six
import paddle
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlehub.common import paddle_helper
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
import paddlehub as hub import paddlehub as hub
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.common import paddle_helper, tmp_dir
from paddlehub.common.logger import logger from paddlehub.common.logger import logger
from paddlehub.common.utils import sys_stdin_encoding from paddlehub.common.utils import sys_stdin_encoding, version_compare
from paddlehub.io.parser import txt_parser from paddlehub.io.parser import txt_parser
from paddlehub.module.module import runnable from paddlehub.module.module import runnable
...@@ -246,9 +248,44 @@ class TransformerModule(NLPBaseModule): ...@@ -246,9 +248,44 @@ class TransformerModule(NLPBaseModule):
Tranformer Module base class can be used by BERT, ERNIE, RoBERTa and so on. Tranformer Module base class can be used by BERT, ERNIE, RoBERTa and so on.
""" """
@property def __init__(self,
def pretrained_model_path(self): name=None,
return self.params_path directory=None,
module_dir=None,
version=None,
max_seq_len=128,
**kwargs):
if not directory:
return
super(TransformerModule, self).__init__(
name=name,
directory=directory,
module_dir=module_dir,
version=version,
**kwargs)
self.max_seq_len = max_seq_len
if version_compare(paddle.__version__, '1.8.0'):
with tmp_dir() as _dir:
input_dict, output_dict, program = self.context(
max_seq_len=max_seq_len)
fluid.io.save_inference_model(
dirname=_dir,
main_program=program,
feeded_var_names=[
input_dict['input_ids'].name,
input_dict['position_ids'].name,
input_dict['segment_ids'].name,
input_dict['input_mask'].name
],
target_vars=[
output_dict["pooled_output"],
output_dict["sequence_output"]
],
executor=fluid.Executor(fluid.CPUPlace()))
with fluid.dygraph.guard():
self.model_runner = fluid.dygraph.StaticModelRunner(_dir)
def init_pretraining_params(self, exe, pretraining_params_path, def init_pretraining_params(self, exe, pretraining_params_path,
main_program): main_program):
...@@ -275,7 +312,7 @@ class TransformerModule(NLPBaseModule): ...@@ -275,7 +312,7 @@ class TransformerModule(NLPBaseModule):
def context( def context(
self, self,
max_seq_len=128, max_seq_len=None,
trainable=True, trainable=True,
): ):
""" """
...@@ -291,6 +328,9 @@ class TransformerModule(NLPBaseModule): ...@@ -291,6 +328,9 @@ class TransformerModule(NLPBaseModule):
""" """
if not max_seq_len:
max_seq_len = self.max_seq_len
assert max_seq_len <= self.MAX_SEQ_LEN and max_seq_len >= 1, "max_seq_len({}) should be in the range of [1, {}]".format( assert max_seq_len <= self.MAX_SEQ_LEN and max_seq_len >= 1, "max_seq_len({}) should be in the range of [1, {}]".format(
max_seq_len, self.MAX_SEQ_LEN) max_seq_len, self.MAX_SEQ_LEN)
...@@ -357,14 +397,6 @@ class TransformerModule(NLPBaseModule): ...@@ -357,14 +397,6 @@ class TransformerModule(NLPBaseModule):
return inputs, outputs, module_program return inputs, outputs, module_program
# @property
# def model_runner(self):
# if not self._model_runner:
# self._model_runner = fluid.dygraph.StaticModelRunner(
# self.params_path)
# return self._model_runner
def get_embedding(self, texts, use_gpu=False, batch_size=1): def get_embedding(self, texts, use_gpu=False, batch_size=1):
""" """
get pooled_output and sequence_output for input texts. get pooled_output and sequence_output for input texts.
...@@ -443,3 +475,16 @@ class TransformerModule(NLPBaseModule): ...@@ -443,3 +475,16 @@ class TransformerModule(NLPBaseModule):
"The module context has not been initialized. " "The module context has not been initialized. "
"Please call context() before using get_params_layer") "Please call context() before using get_params_layer")
return self.params_layer return self.params_layer
def forward(self, input_ids, position_ids, segment_ids, input_mask):
if version_compare(paddle.__version__, '1.8.0'):
pooled_output, sequence_output = self.model_runner(
input_ids, position_ids, segment_ids, input_mask)
return {
'pooled_output': pooled_output,
'sequence_output': sequence_output
}
else:
raise RuntimeError(
'{} only support dynamic graph mode in paddle >= 1.8.0'.format(
self.name))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册