提交 b7e8230f 编写于 作者: W wuzewu

Merge branch 'dygraph' into develop

#coding:utf-8
import argparse
import os
import numpy as np
import paddlehub as hub
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.optimizer import AdamOptimizer
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt_dygraph", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--log_interval", type=int, default=10, help="log interval.")
parser.add_argument("--save_interval", type=int, default=10, help="save interval.")
# yapf: enable.
class ResNet50(fluid.dygraph.Layer):
def __init__(self, num_classes, backbone):
super(ResNet50, self).__init__()
self.fc = Linear(input_dim=2048, output_dim=num_classes)
self.backbone = backbone
def forward(self, imgs):
feature_map = self.backbone(imgs)
feature_map = fluid.layers.reshape(feature_map, shape=[-1, 2048])
pred = self.fc(feature_map)
return fluid.layers.softmax(pred)
def finetune(args):
with fluid.dygraph.guard():
resnet50_vd_10w = hub.Module(name="resnet50_vd_10w")
dataset = hub.dataset.Flowers()
resnet = ResNet50(
num_classes=dataset.num_labels, backbone=resnet50_vd_10w)
adam = AdamOptimizer(
learning_rate=0.001, parameter_list=resnet.parameters())
state_dict_path = os.path.join(args.checkpoint_dir,
'dygraph_state_dict')
if os.path.exists(state_dict_path + '.pdparams'):
state_dict, _ = fluid.load_dygraph(state_dict_path)
resnet.load_dict(state_dict)
reader = hub.reader.ImageClassificationReader(
image_width=resnet50_vd_10w.get_expected_image_width(),
image_height=resnet50_vd_10w.get_expected_image_height(),
images_mean=resnet50_vd_10w.get_pretrained_images_mean(),
images_std=resnet50_vd_10w.get_pretrained_images_std(),
dataset=dataset)
train_reader = reader.data_generator(
batch_size=args.batch_size, phase='train')
loss_sum = acc_sum = cnt = 0
# 执行epoch_num次训练
for epoch in range(args.num_epoch):
# 读取训练数据进行训练
for batch_id, data in enumerate(train_reader()):
imgs = np.array(data[0][0])
labels = np.array(data[0][1])
pred = resnet(imgs)
acc = fluid.layers.accuracy(pred, to_variable(labels))
loss = fluid.layers.cross_entropy(pred, to_variable(labels))
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
# 参数更新
adam.minimize(avg_loss)
loss_sum += avg_loss.numpy() * imgs.shape[0]
acc_sum += acc.numpy() * imgs.shape[0]
cnt += imgs.shape[0]
if batch_id % args.log_interval == 0:
print('epoch {}: loss {}, acc {}'.format(
epoch, loss_sum / cnt, acc_sum / cnt))
loss_sum = acc_sum = cnt = 0
if batch_id % args.save_interval == 0:
state_dict = resnet.state_dict()
fluid.save_dygraph(state_dict, state_dict_path)
if __name__ == "__main__":
args = parser.parse_args()
finetune(args)
#coding:utf-8
import argparse
import os
import numpy as np
import paddlehub as hub
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.optimizer import AdamOptimizer
from paddlehub.finetune.evaluate import chunk_eval, calculate_f1
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--log_interval", type=int, default=10, help="log interval.")
parser.add_argument("--save_interval", type=int, default=10, help="save interval.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt_dygraph", help="Path to save log data.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
# yapf: enable.
class TransformerSequenceLabelLayer(fluid.dygraph.Layer):
def __init__(self, num_classes, transformer):
super(TransformerSequenceLabelLayer, self).__init__()
self.num_classes = num_classes
self.transformer = transformer
self.fc = Linear(input_dim=768, output_dim=num_classes)
def forward(self, input_ids, position_ids, segment_ids, input_mask):
result = self.transformer(input_ids, position_ids, segment_ids,
input_mask)
pred = self.fc(result['sequence_output'])
ret_infers = fluid.layers.reshape(
x=fluid.layers.argmax(pred, axis=2), shape=[-1, 1])
pred = fluid.layers.reshape(pred, shape=[-1, self.num_classes])
return fluid.layers.softmax(pred), ret_infers
def finetune(args):
ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
with fluid.dygraph.guard():
dataset = hub.dataset.MSRA_NER()
ts = TransformerSequenceLabelLayer(
num_classes=dataset.num_labels, transformer=ernie)
adam = AdamOptimizer(learning_rate=1e-5, parameter_list=ts.parameters())
state_dict_path = os.path.join(args.checkpoint_dir,
'dygraph_state_dict')
if os.path.exists(state_dict_path + '.pdparams'):
state_dict, _ = fluid.load_dygraph(state_dict_path)
ts.load_dict(state_dict)
reader = hub.reader.SequenceLabelReader(
dataset=dataset,
vocab_path=ernie.get_vocab_path(),
max_seq_len=args.max_seq_len,
sp_model_path=ernie.get_spm_path(),
word_dict_path=ernie.get_word_dict_path())
train_reader = reader.data_generator(
batch_size=args.batch_size, phase='train')
loss_sum = total_infer = total_label = total_correct = cnt = 0
# 执行epoch_num次训练
for epoch in range(args.num_epoch):
# 读取训练数据进行训练
for batch_id, data in enumerate(train_reader()):
input_ids = np.array(data[0][0]).astype(np.int64)
position_ids = np.array(data[0][1]).astype(np.int64)
segment_ids = np.array(data[0][2]).astype(np.int64)
input_mask = np.array(data[0][3]).astype(np.float32)
labels = np.array(data[0][4]).astype(np.int64).reshape(-1, 1)
seq_len = np.squeeze(
np.array(data[0][5]).astype(np.int64), axis=1)
pred, ret_infers = ts(input_ids, position_ids, segment_ids,
input_mask)
loss = fluid.layers.cross_entropy(pred, to_variable(labels))
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
# 参数更新
adam.minimize(avg_loss)
loss_sum += avg_loss.numpy() * labels.shape[0]
label_num, infer_num, correct_num = chunk_eval(
labels, ret_infers.numpy(), seq_len, dataset.num_labels, 1)
cnt += labels.shape[0]
total_infer += infer_num
total_label += label_num
total_correct += correct_num
if batch_id % args.log_interval == 0:
precision, recall, f1 = calculate_f1(
total_label, total_infer, total_correct)
print('epoch {}: loss {}, f1 {} recall {} precision {}'.
format(epoch, loss_sum / cnt, f1, recall, precision))
loss_sum = total_infer = total_label = total_correct = cnt = 0
if batch_id % args.save_interval == 0:
state_dict = ts.state_dict()
fluid.save_dygraph(state_dict, state_dict_path)
if __name__ == "__main__":
args = parser.parse_args()
finetune(args)
#coding:utf-8
import argparse
import os
import numpy as np
import paddlehub as hub
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.optimizer import AdamOptimizer
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--log_interval", type=int, default=10, help="log interval.")
parser.add_argument("--save_interval", type=int, default=10, help="save interval.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt_dygraph", help="Path to save log data.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
# yapf: enable.
class TransformerClassifier(fluid.dygraph.Layer):
def __init__(self, num_classes, transformer):
super(TransformerClassifier, self).__init__()
self.num_classes = num_classes
self.transformer = transformer
self.fc = Linear(input_dim=768, output_dim=num_classes)
def forward(self, input_ids, position_ids, segment_ids, input_mask):
result = self.transformer(input_ids, position_ids, segment_ids,
input_mask)
cls_feats = fluid.layers.dropout(
result['pooled_output'],
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
cls_feats = fluid.layers.reshape(cls_feats, shape=[-1, 768])
pred = self.fc(cls_feats)
return fluid.layers.softmax(pred)
def finetune(args):
ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
with fluid.dygraph.guard():
dataset = hub.dataset.ChnSentiCorp()
tc = TransformerClassifier(
num_classes=dataset.num_labels, transformer=ernie)
adam = AdamOptimizer(learning_rate=1e-5, parameter_list=tc.parameters())
state_dict_path = os.path.join(args.checkpoint_dir,
'dygraph_state_dict')
if os.path.exists(state_dict_path + '.pdparams'):
state_dict, _ = fluid.load_dygraph(state_dict_path)
tc.load_dict(state_dict)
reader = hub.reader.ClassifyReader(
dataset=dataset,
vocab_path=ernie.get_vocab_path(),
max_seq_len=args.max_seq_len,
sp_model_path=ernie.get_spm_path(),
word_dict_path=ernie.get_word_dict_path())
train_reader = reader.data_generator(
batch_size=args.batch_size, phase='train')
loss_sum = acc_sum = cnt = 0
# 执行epoch_num次训练
for epoch in range(args.num_epoch):
# 读取训练数据进行训练
for batch_id, data in enumerate(train_reader()):
input_ids = np.array(data[0][0]).astype(np.int64)
position_ids = np.array(data[0][1]).astype(np.int64)
segment_ids = np.array(data[0][2]).astype(np.int64)
input_mask = np.array(data[0][3]).astype(np.float32)
labels = np.array(data[0][4]).astype(np.int64)
pred = tc(input_ids, position_ids, segment_ids, input_mask)
acc = fluid.layers.accuracy(pred, to_variable(labels))
loss = fluid.layers.cross_entropy(pred, to_variable(labels))
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
# 参数更新
adam.minimize(avg_loss)
loss_sum += avg_loss.numpy() * labels.shape[0]
acc_sum += acc.numpy() * labels.shape[0]
cnt += labels.shape[0]
if batch_id % args.log_interval == 0:
print('epoch {}: loss {}, acc {}'.format(
epoch, loss_sum / cnt, acc_sum / cnt))
loss_sum = acc_sum = cnt = 0
if batch_id % args.save_interval == 0:
state_dict = tc.state_dict()
fluid.save_dygraph(state_dict, state_dict_path)
if __name__ == "__main__":
args = parser.parse_args()
finetune(args)
...@@ -19,10 +19,11 @@ from __future__ import print_function ...@@ -19,10 +19,11 @@ from __future__ import print_function
import copy import copy
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlehub.module import module_desc_pb2 from paddlehub.module import module_desc_pb2
from paddlehub.common.utils import from_pyobj_to_module_attr, from_module_attr_to_pyobj from paddlehub.common.utils import from_pyobj_to_module_attr, from_module_attr_to_pyobj, version_compare
from paddlehub.common.logger import logger from paddlehub.common.logger import logger
dtype_map = { dtype_map = {
...@@ -62,7 +63,8 @@ def get_variable_info(var): ...@@ -62,7 +63,8 @@ def get_variable_info(var):
var_info['trainable'] = var.trainable var_info['trainable'] = var.trainable
var_info['optimize_attr'] = var.optimize_attr var_info['optimize_attr'] = var.optimize_attr
var_info['regularizer'] = var.regularizer var_info['regularizer'] = var.regularizer
var_info['gradient_clip_attr'] = var.gradient_clip_attr if not version_compare(paddle.__version__, '1.8'):
var_info['gradient_clip_attr'] = var.gradient_clip_attr
var_info['do_model_average'] = var.do_model_average var_info['do_model_average'] = var.do_model_average
else: else:
var_info['persistable'] = var.persistable var_info['persistable'] = var.persistable
......
...@@ -89,7 +89,7 @@ def moduleinfo(name, version, author, author_email, summary, type): ...@@ -89,7 +89,7 @@ def moduleinfo(name, version, author, author_email, summary, type):
return _wrapper return _wrapper
class Module(object): class Module(fluid.dygraph.Layer):
def __new__(cls, def __new__(cls,
name=None, name=None,
directory=None, directory=None,
...@@ -121,7 +121,7 @@ class Module(object): ...@@ -121,7 +121,7 @@ class Module(object):
module = Module.init_with_directory( module = Module.init_with_directory(
directory=directory, **kwargs) directory=directory, **kwargs)
else: else:
module = object.__new__(cls) module = fluid.dygraph.Layer.__new__(cls)
return module return module
...@@ -135,6 +135,7 @@ class Module(object): ...@@ -135,6 +135,7 @@ class Module(object):
if "_is_initialize" in self.__dict__ and self._is_initialize: if "_is_initialize" in self.__dict__ and self._is_initialize:
return return
super(Module, self).__init__()
_run_func_name = self._get_func_name(self.__class__, _run_func_name = self._get_func_name(self.__class__,
_module_runnable_func) _module_runnable_func)
self._run_func = getattr(self, self._run_func = getattr(self,
...@@ -248,6 +249,10 @@ class Module(object): ...@@ -248,6 +249,10 @@ class Module(object):
def _initialize(self): def _initialize(self):
pass pass
def forward(self, *args, **kwargs):
raise RuntimeError('{} does not support dynamic graph mode yet.'.format(
self.name))
class ModuleHelper(object): class ModuleHelper(object):
def __init__(self, directory): def __init__(self, directory):
......
...@@ -24,13 +24,15 @@ import os ...@@ -24,13 +24,15 @@ import os
import re import re
import six import six
import paddle
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlehub.common import paddle_helper
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
import paddlehub as hub import paddlehub as hub
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.common import paddle_helper, tmp_dir
from paddlehub.common.logger import logger from paddlehub.common.logger import logger
from paddlehub.common.utils import sys_stdin_encoding from paddlehub.common.utils import sys_stdin_encoding, version_compare
from paddlehub.io.parser import txt_parser from paddlehub.io.parser import txt_parser
from paddlehub.module.module import runnable from paddlehub.module.module import runnable
...@@ -246,6 +248,45 @@ class TransformerModule(NLPBaseModule): ...@@ -246,6 +248,45 @@ class TransformerModule(NLPBaseModule):
Tranformer Module base class can be used by BERT, ERNIE, RoBERTa and so on. Tranformer Module base class can be used by BERT, ERNIE, RoBERTa and so on.
""" """
def __init__(self,
name=None,
directory=None,
module_dir=None,
version=None,
max_seq_len=128,
**kwargs):
if not directory:
return
super(TransformerModule, self).__init__(
name=name,
directory=directory,
module_dir=module_dir,
version=version,
**kwargs)
self.max_seq_len = max_seq_len
if version_compare(paddle.__version__, '1.8.0'):
with tmp_dir() as _dir:
input_dict, output_dict, program = self.context(
max_seq_len=max_seq_len)
fluid.io.save_inference_model(
dirname=_dir,
main_program=program,
feeded_var_names=[
input_dict['input_ids'].name,
input_dict['position_ids'].name,
input_dict['segment_ids'].name,
input_dict['input_mask'].name
],
target_vars=[
output_dict["pooled_output"],
output_dict["sequence_output"]
],
executor=fluid.Executor(fluid.CPUPlace()))
with fluid.dygraph.guard():
self.model_runner = fluid.dygraph.StaticModelRunner(_dir)
def init_pretraining_params(self, exe, pretraining_params_path, def init_pretraining_params(self, exe, pretraining_params_path,
main_program): main_program):
assert os.path.exists( assert os.path.exists(
...@@ -271,7 +312,7 @@ class TransformerModule(NLPBaseModule): ...@@ -271,7 +312,7 @@ class TransformerModule(NLPBaseModule):
def context( def context(
self, self,
max_seq_len=128, max_seq_len=None,
trainable=True, trainable=True,
): ):
""" """
...@@ -287,6 +328,9 @@ class TransformerModule(NLPBaseModule): ...@@ -287,6 +328,9 @@ class TransformerModule(NLPBaseModule):
""" """
if not max_seq_len:
max_seq_len = self.max_seq_len
assert max_seq_len <= self.MAX_SEQ_LEN and max_seq_len >= 1, "max_seq_len({}) should be in the range of [1, {}]".format( assert max_seq_len <= self.MAX_SEQ_LEN and max_seq_len >= 1, "max_seq_len({}) should be in the range of [1, {}]".format(
max_seq_len, self.MAX_SEQ_LEN) max_seq_len, self.MAX_SEQ_LEN)
...@@ -431,3 +475,16 @@ class TransformerModule(NLPBaseModule): ...@@ -431,3 +475,16 @@ class TransformerModule(NLPBaseModule):
"The module context has not been initialized. " "The module context has not been initialized. "
"Please call context() before using get_params_layer") "Please call context() before using get_params_layer")
return self.params_layer return self.params_layer
def forward(self, input_ids, position_ids, segment_ids, input_mask):
if version_compare(paddle.__version__, '1.8.0'):
pooled_output, sequence_output = self.model_runner(
input_ids, position_ids, segment_ids, input_mask)
return {
'pooled_output': pooled_output,
'sequence_output': sequence_output
}
else:
raise RuntimeError(
'{} only support dynamic graph mode in paddle >= 1.8.0'.format(
self.name))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册