提交 b7e8230f 编写于 作者: W wuzewu

Merge branch 'dygraph' into develop

#coding:utf-8
import argparse
import os
import numpy as np
import paddlehub as hub
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.optimizer import AdamOptimizer
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt_dygraph", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--log_interval", type=int, default=10, help="log interval.")
parser.add_argument("--save_interval", type=int, default=10, help="save interval.")
# yapf: enable.
class ResNet50(fluid.dygraph.Layer):
def __init__(self, num_classes, backbone):
super(ResNet50, self).__init__()
self.fc = Linear(input_dim=2048, output_dim=num_classes)
self.backbone = backbone
def forward(self, imgs):
feature_map = self.backbone(imgs)
feature_map = fluid.layers.reshape(feature_map, shape=[-1, 2048])
pred = self.fc(feature_map)
return fluid.layers.softmax(pred)
def finetune(args):
with fluid.dygraph.guard():
resnet50_vd_10w = hub.Module(name="resnet50_vd_10w")
dataset = hub.dataset.Flowers()
resnet = ResNet50(
num_classes=dataset.num_labels, backbone=resnet50_vd_10w)
adam = AdamOptimizer(
learning_rate=0.001, parameter_list=resnet.parameters())
state_dict_path = os.path.join(args.checkpoint_dir,
'dygraph_state_dict')
if os.path.exists(state_dict_path + '.pdparams'):
state_dict, _ = fluid.load_dygraph(state_dict_path)
resnet.load_dict(state_dict)
reader = hub.reader.ImageClassificationReader(
image_width=resnet50_vd_10w.get_expected_image_width(),
image_height=resnet50_vd_10w.get_expected_image_height(),
images_mean=resnet50_vd_10w.get_pretrained_images_mean(),
images_std=resnet50_vd_10w.get_pretrained_images_std(),
dataset=dataset)
train_reader = reader.data_generator(
batch_size=args.batch_size, phase='train')
loss_sum = acc_sum = cnt = 0
# 执行epoch_num次训练
for epoch in range(args.num_epoch):
# 读取训练数据进行训练
for batch_id, data in enumerate(train_reader()):
imgs = np.array(data[0][0])
labels = np.array(data[0][1])
pred = resnet(imgs)
acc = fluid.layers.accuracy(pred, to_variable(labels))
loss = fluid.layers.cross_entropy(pred, to_variable(labels))
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
# 参数更新
adam.minimize(avg_loss)
loss_sum += avg_loss.numpy() * imgs.shape[0]
acc_sum += acc.numpy() * imgs.shape[0]
cnt += imgs.shape[0]
if batch_id % args.log_interval == 0:
print('epoch {}: loss {}, acc {}'.format(
epoch, loss_sum / cnt, acc_sum / cnt))
loss_sum = acc_sum = cnt = 0
if batch_id % args.save_interval == 0:
state_dict = resnet.state_dict()
fluid.save_dygraph(state_dict, state_dict_path)
if __name__ == "__main__":
args = parser.parse_args()
finetune(args)
#coding:utf-8
import argparse
import os
import numpy as np
import paddlehub as hub
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.optimizer import AdamOptimizer
from paddlehub.finetune.evaluate import chunk_eval, calculate_f1
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--log_interval", type=int, default=10, help="log interval.")
parser.add_argument("--save_interval", type=int, default=10, help="save interval.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt_dygraph", help="Path to save log data.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
# yapf: enable.
class TransformerSequenceLabelLayer(fluid.dygraph.Layer):
def __init__(self, num_classes, transformer):
super(TransformerSequenceLabelLayer, self).__init__()
self.num_classes = num_classes
self.transformer = transformer
self.fc = Linear(input_dim=768, output_dim=num_classes)
def forward(self, input_ids, position_ids, segment_ids, input_mask):
result = self.transformer(input_ids, position_ids, segment_ids,
input_mask)
pred = self.fc(result['sequence_output'])
ret_infers = fluid.layers.reshape(
x=fluid.layers.argmax(pred, axis=2), shape=[-1, 1])
pred = fluid.layers.reshape(pred, shape=[-1, self.num_classes])
return fluid.layers.softmax(pred), ret_infers
def finetune(args):
ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
with fluid.dygraph.guard():
dataset = hub.dataset.MSRA_NER()
ts = TransformerSequenceLabelLayer(
num_classes=dataset.num_labels, transformer=ernie)
adam = AdamOptimizer(learning_rate=1e-5, parameter_list=ts.parameters())
state_dict_path = os.path.join(args.checkpoint_dir,
'dygraph_state_dict')
if os.path.exists(state_dict_path + '.pdparams'):
state_dict, _ = fluid.load_dygraph(state_dict_path)
ts.load_dict(state_dict)
reader = hub.reader.SequenceLabelReader(
dataset=dataset,
vocab_path=ernie.get_vocab_path(),
max_seq_len=args.max_seq_len,
sp_model_path=ernie.get_spm_path(),
word_dict_path=ernie.get_word_dict_path())
train_reader = reader.data_generator(
batch_size=args.batch_size, phase='train')
loss_sum = total_infer = total_label = total_correct = cnt = 0
# 执行epoch_num次训练
for epoch in range(args.num_epoch):
# 读取训练数据进行训练
for batch_id, data in enumerate(train_reader()):
input_ids = np.array(data[0][0]).astype(np.int64)
position_ids = np.array(data[0][1]).astype(np.int64)
segment_ids = np.array(data[0][2]).astype(np.int64)
input_mask = np.array(data[0][3]).astype(np.float32)
labels = np.array(data[0][4]).astype(np.int64).reshape(-1, 1)
seq_len = np.squeeze(
np.array(data[0][5]).astype(np.int64), axis=1)
pred, ret_infers = ts(input_ids, position_ids, segment_ids,
input_mask)
loss = fluid.layers.cross_entropy(pred, to_variable(labels))
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
# 参数更新
adam.minimize(avg_loss)
loss_sum += avg_loss.numpy() * labels.shape[0]
label_num, infer_num, correct_num = chunk_eval(
labels, ret_infers.numpy(), seq_len, dataset.num_labels, 1)
cnt += labels.shape[0]
total_infer += infer_num
total_label += label_num
total_correct += correct_num
if batch_id % args.log_interval == 0:
precision, recall, f1 = calculate_f1(
total_label, total_infer, total_correct)
print('epoch {}: loss {}, f1 {} recall {} precision {}'.
format(epoch, loss_sum / cnt, f1, recall, precision))
loss_sum = total_infer = total_label = total_correct = cnt = 0
if batch_id % args.save_interval == 0:
state_dict = ts.state_dict()
fluid.save_dygraph(state_dict, state_dict_path)
if __name__ == "__main__":
args = parser.parse_args()
finetune(args)
#coding:utf-8
import argparse
import os
import numpy as np
import paddlehub as hub
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.optimizer import AdamOptimizer
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--log_interval", type=int, default=10, help="log interval.")
parser.add_argument("--save_interval", type=int, default=10, help="save interval.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt_dygraph", help="Path to save log data.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
# yapf: enable.
class TransformerClassifier(fluid.dygraph.Layer):
def __init__(self, num_classes, transformer):
super(TransformerClassifier, self).__init__()
self.num_classes = num_classes
self.transformer = transformer
self.fc = Linear(input_dim=768, output_dim=num_classes)
def forward(self, input_ids, position_ids, segment_ids, input_mask):
result = self.transformer(input_ids, position_ids, segment_ids,
input_mask)
cls_feats = fluid.layers.dropout(
result['pooled_output'],
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
cls_feats = fluid.layers.reshape(cls_feats, shape=[-1, 768])
pred = self.fc(cls_feats)
return fluid.layers.softmax(pred)
def finetune(args):
ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
with fluid.dygraph.guard():
dataset = hub.dataset.ChnSentiCorp()
tc = TransformerClassifier(
num_classes=dataset.num_labels, transformer=ernie)
adam = AdamOptimizer(learning_rate=1e-5, parameter_list=tc.parameters())
state_dict_path = os.path.join(args.checkpoint_dir,
'dygraph_state_dict')
if os.path.exists(state_dict_path + '.pdparams'):
state_dict, _ = fluid.load_dygraph(state_dict_path)
tc.load_dict(state_dict)
reader = hub.reader.ClassifyReader(
dataset=dataset,
vocab_path=ernie.get_vocab_path(),
max_seq_len=args.max_seq_len,
sp_model_path=ernie.get_spm_path(),
word_dict_path=ernie.get_word_dict_path())
train_reader = reader.data_generator(
batch_size=args.batch_size, phase='train')
loss_sum = acc_sum = cnt = 0
# 执行epoch_num次训练
for epoch in range(args.num_epoch):
# 读取训练数据进行训练
for batch_id, data in enumerate(train_reader()):
input_ids = np.array(data[0][0]).astype(np.int64)
position_ids = np.array(data[0][1]).astype(np.int64)
segment_ids = np.array(data[0][2]).astype(np.int64)
input_mask = np.array(data[0][3]).astype(np.float32)
labels = np.array(data[0][4]).astype(np.int64)
pred = tc(input_ids, position_ids, segment_ids, input_mask)
acc = fluid.layers.accuracy(pred, to_variable(labels))
loss = fluid.layers.cross_entropy(pred, to_variable(labels))
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
# 参数更新
adam.minimize(avg_loss)
loss_sum += avg_loss.numpy() * labels.shape[0]
acc_sum += acc.numpy() * labels.shape[0]
cnt += labels.shape[0]
if batch_id % args.log_interval == 0:
print('epoch {}: loss {}, acc {}'.format(
epoch, loss_sum / cnt, acc_sum / cnt))
loss_sum = acc_sum = cnt = 0
if batch_id % args.save_interval == 0:
state_dict = tc.state_dict()
fluid.save_dygraph(state_dict, state_dict_path)
if __name__ == "__main__":
args = parser.parse_args()
finetune(args)
......@@ -19,10 +19,11 @@ from __future__ import print_function
import copy
import paddle
import paddle.fluid as fluid
from paddlehub.module import module_desc_pb2
from paddlehub.common.utils import from_pyobj_to_module_attr, from_module_attr_to_pyobj
from paddlehub.common.utils import from_pyobj_to_module_attr, from_module_attr_to_pyobj, version_compare
from paddlehub.common.logger import logger
dtype_map = {
......@@ -62,7 +63,8 @@ def get_variable_info(var):
var_info['trainable'] = var.trainable
var_info['optimize_attr'] = var.optimize_attr
var_info['regularizer'] = var.regularizer
var_info['gradient_clip_attr'] = var.gradient_clip_attr
if not version_compare(paddle.__version__, '1.8'):
var_info['gradient_clip_attr'] = var.gradient_clip_attr
var_info['do_model_average'] = var.do_model_average
else:
var_info['persistable'] = var.persistable
......
......@@ -89,7 +89,7 @@ def moduleinfo(name, version, author, author_email, summary, type):
return _wrapper
class Module(object):
class Module(fluid.dygraph.Layer):
def __new__(cls,
name=None,
directory=None,
......@@ -121,7 +121,7 @@ class Module(object):
module = Module.init_with_directory(
directory=directory, **kwargs)
else:
module = object.__new__(cls)
module = fluid.dygraph.Layer.__new__(cls)
return module
......@@ -135,6 +135,7 @@ class Module(object):
if "_is_initialize" in self.__dict__ and self._is_initialize:
return
super(Module, self).__init__()
_run_func_name = self._get_func_name(self.__class__,
_module_runnable_func)
self._run_func = getattr(self,
......@@ -248,6 +249,10 @@ class Module(object):
def _initialize(self):
pass
def forward(self, *args, **kwargs):
raise RuntimeError('{} does not support dynamic graph mode yet.'.format(
self.name))
class ModuleHelper(object):
def __init__(self, directory):
......
......@@ -24,13 +24,15 @@ import os
import re
import six
import paddle
import numpy as np
import paddle.fluid as fluid
from paddlehub.common import paddle_helper
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
import paddlehub as hub
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.common import paddle_helper, tmp_dir
from paddlehub.common.logger import logger
from paddlehub.common.utils import sys_stdin_encoding
from paddlehub.common.utils import sys_stdin_encoding, version_compare
from paddlehub.io.parser import txt_parser
from paddlehub.module.module import runnable
......@@ -246,6 +248,45 @@ class TransformerModule(NLPBaseModule):
Tranformer Module base class can be used by BERT, ERNIE, RoBERTa and so on.
"""
def __init__(self,
name=None,
directory=None,
module_dir=None,
version=None,
max_seq_len=128,
**kwargs):
if not directory:
return
super(TransformerModule, self).__init__(
name=name,
directory=directory,
module_dir=module_dir,
version=version,
**kwargs)
self.max_seq_len = max_seq_len
if version_compare(paddle.__version__, '1.8.0'):
with tmp_dir() as _dir:
input_dict, output_dict, program = self.context(
max_seq_len=max_seq_len)
fluid.io.save_inference_model(
dirname=_dir,
main_program=program,
feeded_var_names=[
input_dict['input_ids'].name,
input_dict['position_ids'].name,
input_dict['segment_ids'].name,
input_dict['input_mask'].name
],
target_vars=[
output_dict["pooled_output"],
output_dict["sequence_output"]
],
executor=fluid.Executor(fluid.CPUPlace()))
with fluid.dygraph.guard():
self.model_runner = fluid.dygraph.StaticModelRunner(_dir)
def init_pretraining_params(self, exe, pretraining_params_path,
main_program):
assert os.path.exists(
......@@ -271,7 +312,7 @@ class TransformerModule(NLPBaseModule):
def context(
self,
max_seq_len=128,
max_seq_len=None,
trainable=True,
):
"""
......@@ -287,6 +328,9 @@ class TransformerModule(NLPBaseModule):
"""
if not max_seq_len:
max_seq_len = self.max_seq_len
assert max_seq_len <= self.MAX_SEQ_LEN and max_seq_len >= 1, "max_seq_len({}) should be in the range of [1, {}]".format(
max_seq_len, self.MAX_SEQ_LEN)
......@@ -431,3 +475,16 @@ class TransformerModule(NLPBaseModule):
"The module context has not been initialized. "
"Please call context() before using get_params_layer")
return self.params_layer
def forward(self, input_ids, position_ids, segment_ids, input_mask):
if version_compare(paddle.__version__, '1.8.0'):
pooled_output, sequence_output = self.model_runner(
input_ids, position_ids, segment_ids, input_mask)
return {
'pooled_output': pooled_output,
'sequence_output': sequence_output
}
else:
raise RuntimeError(
'{} only support dynamic graph mode in paddle >= 1.8.0'.format(
self.name))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册