diff --git a/demo/sequence_labeling/sequence_label_dygraph.py b/demo/sequence_labeling/sequence_label_dygraph.py new file mode 100644 index 0000000000000000000000000000000000000000..f95de0a278970c9a0ed45e38f7554f698b464a05 --- /dev/null +++ b/demo/sequence_labeling/sequence_label_dygraph.py @@ -0,0 +1,107 @@ +#coding:utf-8 +import argparse +import os + +import numpy as np +import paddlehub as hub +import paddle.fluid as fluid +from paddle.fluid.dygraph import Linear +from paddle.fluid.dygraph.base import to_variable +from paddle.fluid.optimizer import AdamOptimizer +from paddlehub.finetune.evaluate import chunk_eval, calculate_f1 + +# yapf: disable +parser = argparse.ArgumentParser(__doc__) +parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.") +parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.") +parser.add_argument("--log_interval", type=int, default=10, help="log interval.") +parser.add_argument("--save_interval", type=int, default=10, help="save interval.") +parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt_dygraph", help="Path to save log data.") +parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.") +# yapf: enable. + + +class TransformerSequenceLabelLayer(fluid.dygraph.Layer): + def __init__(self, num_classes, transformer): + super(TransformerSequenceLabelLayer, self).__init__() + self.num_classes = num_classes + self.transformer = transformer + self.fc = Linear(input_dim=768, output_dim=num_classes) + + def forward(self, input_ids, position_ids, segment_ids, input_mask): + result = self.transformer(input_ids, position_ids, segment_ids, + input_mask) + pred = self.fc(result['sequence_output']) + ret_infers = fluid.layers.reshape( + x=fluid.layers.argmax(pred, axis=2), shape=[-1, 1]) + pred = fluid.layers.reshape(pred, shape=[-1, self.num_classes]) + return fluid.layers.softmax(pred), ret_infers + + +def finetune(args): + ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len) + with fluid.dygraph.guard(): + dataset = hub.dataset.MSRA_NER() + ts = TransformerSequenceLabelLayer( + num_classes=dataset.num_labels, transformer=ernie) + adam = AdamOptimizer(learning_rate=1e-5, parameter_list=ts.parameters()) + state_dict_path = os.path.join(args.checkpoint_dir, + 'dygraph_state_dict') + if os.path.exists(state_dict_path + '.pdparams'): + state_dict, _ = fluid.load_dygraph(state_dict_path) + ts.load_dict(state_dict) + + reader = hub.reader.SequenceLabelReader( + dataset=dataset, + vocab_path=ernie.get_vocab_path(), + max_seq_len=args.max_seq_len, + sp_model_path=ernie.get_spm_path(), + word_dict_path=ernie.get_word_dict_path()) + train_reader = reader.data_generator( + batch_size=args.batch_size, phase='train') + + loss_sum = total_infer = total_label = total_correct = cnt = 0 + # 执行epoch_num次训练 + for epoch in range(args.num_epoch): + # 读取训练数据进行训练 + for batch_id, data in enumerate(train_reader()): + input_ids = np.array(data[0][0]).astype(np.int64) + position_ids = np.array(data[0][1]).astype(np.int64) + segment_ids = np.array(data[0][2]).astype(np.int64) + input_mask = np.array(data[0][3]).astype(np.float32) + labels = np.array(data[0][4]).astype(np.int64).reshape(-1, 1) + seq_len = np.squeeze( + np.array(data[0][5]).astype(np.int64), axis=1) + pred, ret_infers = ts(input_ids, position_ids, segment_ids, + input_mask) + + loss = fluid.layers.cross_entropy(pred, to_variable(labels)) + avg_loss = fluid.layers.mean(loss) + avg_loss.backward() + # 参数更新 + adam.minimize(avg_loss) + + loss_sum += avg_loss.numpy() * labels.shape[0] + label_num, infer_num, correct_num = chunk_eval( + labels, ret_infers.numpy(), seq_len, dataset.num_labels, 1) + cnt += labels.shape[0] + + total_infer += infer_num + total_label += label_num + total_correct += correct_num + + if batch_id % args.log_interval == 0: + precision, recall, f1 = calculate_f1( + total_label, total_infer, total_correct) + print('epoch {}: loss {}, f1 {} recall {} precision {}'. + format(epoch, loss_sum / cnt, f1, recall, precision)) + loss_sum = total_infer = total_label = total_correct = cnt = 0 + + if batch_id % args.save_interval == 0: + state_dict = ts.state_dict() + fluid.save_dygraph(state_dict, state_dict_path) + + +if __name__ == "__main__": + args = parser.parse_args() + finetune(args) diff --git a/demo/text_classification/text_classifier_dygraph.py b/demo/text_classification/text_classifier_dygraph.py index f60e7cbf6766f640e525999a2fb6191d525ddda0..b648740e1b3a43cc668d9ecfb2ab4c05641bb18d 100644 --- a/demo/text_classification/text_classifier_dygraph.py +++ b/demo/text_classification/text_classifier_dygraph.py @@ -28,10 +28,10 @@ class TransformerClassifier(fluid.dygraph.Layer): self.fc = Linear(input_dim=768, output_dim=num_classes) def forward(self, input_ids, position_ids, segment_ids, input_mask): - pooled_output, sequence_output = self.transformer( - input_ids, position_ids, segment_ids, input_mask) + result = self.transformer(input_ids, position_ids, segment_ids, + input_mask) cls_feats = fluid.layers.dropout( - pooled_output, + result['pooled_output'], dropout_prob=0.1, dropout_implementation="upscale_in_train") cls_feats = fluid.layers.reshape(cls_feats, shape=[-1, 768]) @@ -40,14 +40,12 @@ class TransformerClassifier(fluid.dygraph.Layer): def finetune(args): + ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len) with fluid.dygraph.guard(): - ernie = hub.Module(name="ernie") dataset = hub.dataset.ChnSentiCorp() tc = TransformerClassifier( num_classes=dataset.num_labels, transformer=ernie) - adam = AdamOptimizer( - learning_rate=0.001, parameter_list=tc.parameters()) - print(len(tc.parameters())) + adam = AdamOptimizer(learning_rate=1e-5, parameter_list=tc.parameters()) state_dict_path = os.path.join(args.checkpoint_dir, 'dygraph_state_dict') if os.path.exists(state_dict_path + '.pdparams'): diff --git a/paddlehub/common/paddle_helper.py b/paddlehub/common/paddle_helper.py index 34dc1482c2f1fbcaac7dcaa7b7245710774d250a..5515ea6329ed417b9c6152cc63532dbd888b9c54 100644 --- a/paddlehub/common/paddle_helper.py +++ b/paddlehub/common/paddle_helper.py @@ -19,10 +19,11 @@ from __future__ import print_function import copy +import paddle import paddle.fluid as fluid from paddlehub.module import module_desc_pb2 -from paddlehub.common.utils import from_pyobj_to_module_attr, from_module_attr_to_pyobj +from paddlehub.common.utils import from_pyobj_to_module_attr, from_module_attr_to_pyobj, version_compare from paddlehub.common.logger import logger dtype_map = { @@ -62,7 +63,8 @@ def get_variable_info(var): var_info['trainable'] = var.trainable var_info['optimize_attr'] = var.optimize_attr var_info['regularizer'] = var.regularizer - var_info['gradient_clip_attr'] = var.gradient_clip_attr + if not version_compare(paddle.__version__, '1.8'): + var_info['gradient_clip_attr'] = var.gradient_clip_attr var_info['do_model_average'] = var.do_model_average else: var_info['persistable'] = var.persistable diff --git a/paddlehub/module/module.py b/paddlehub/module/module.py index fe6d41deb555ef875a90dfac04e87bfcf0723086..9e44f1c52003df2c4a8e13b066cd03ad9a87d6d5 100644 --- a/paddlehub/module/module.py +++ b/paddlehub/module/module.py @@ -146,12 +146,6 @@ class Module(fluid.dygraph.Layer): self._initialize(**kwargs) self._is_initialize = True self._code_version = "v2" - self.model_runner = fluid.dygraph.StaticModelRunner( - self.pretrained_model_path) - - @property - def pretrained_model_path(self): - return self.default_pretrained_model_path def _get_func_name(self, current_cls, module_func_dict): mod = current_cls.__module__ + "." + current_cls.__name__ @@ -256,7 +250,8 @@ class Module(fluid.dygraph.Layer): pass def forward(self, *args, **kwargs): - return self.model_runner(*args, **kwargs) + raise RuntimeError('{} does not support dynamic graph mode yet.'.format( + self.name)) class ModuleHelper(object): diff --git a/paddlehub/module/nlp_module.py b/paddlehub/module/nlp_module.py index 9742ec92c4ac9b7e0635c85adfe93a89f6ff009c..0ebcf23636de3e548560710806d90df580118abb 100644 --- a/paddlehub/module/nlp_module.py +++ b/paddlehub/module/nlp_module.py @@ -24,13 +24,15 @@ import os import re import six +import paddle import numpy as np import paddle.fluid as fluid -from paddlehub.common import paddle_helper -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor + import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.common import paddle_helper, tmp_dir from paddlehub.common.logger import logger -from paddlehub.common.utils import sys_stdin_encoding +from paddlehub.common.utils import sys_stdin_encoding, version_compare from paddlehub.io.parser import txt_parser from paddlehub.module.module import runnable @@ -246,9 +248,44 @@ class TransformerModule(NLPBaseModule): Tranformer Module base class can be used by BERT, ERNIE, RoBERTa and so on. """ - @property - def pretrained_model_path(self): - return self.params_path + def __init__(self, + name=None, + directory=None, + module_dir=None, + version=None, + max_seq_len=128, + **kwargs): + if not directory: + return + super(TransformerModule, self).__init__( + name=name, + directory=directory, + module_dir=module_dir, + version=version, + **kwargs) + + self.max_seq_len = max_seq_len + if version_compare(paddle.__version__, '1.8.0'): + with tmp_dir() as _dir: + input_dict, output_dict, program = self.context( + max_seq_len=max_seq_len) + fluid.io.save_inference_model( + dirname=_dir, + main_program=program, + feeded_var_names=[ + input_dict['input_ids'].name, + input_dict['position_ids'].name, + input_dict['segment_ids'].name, + input_dict['input_mask'].name + ], + target_vars=[ + output_dict["pooled_output"], + output_dict["sequence_output"] + ], + executor=fluid.Executor(fluid.CPUPlace())) + + with fluid.dygraph.guard(): + self.model_runner = fluid.dygraph.StaticModelRunner(_dir) def init_pretraining_params(self, exe, pretraining_params_path, main_program): @@ -275,7 +312,7 @@ class TransformerModule(NLPBaseModule): def context( self, - max_seq_len=128, + max_seq_len=None, trainable=True, ): """ @@ -291,6 +328,9 @@ class TransformerModule(NLPBaseModule): """ + if not max_seq_len: + max_seq_len = self.max_seq_len + assert max_seq_len <= self.MAX_SEQ_LEN and max_seq_len >= 1, "max_seq_len({}) should be in the range of [1, {}]".format( max_seq_len, self.MAX_SEQ_LEN) @@ -357,14 +397,6 @@ class TransformerModule(NLPBaseModule): return inputs, outputs, module_program - -# @property -# def model_runner(self): -# if not self._model_runner: -# self._model_runner = fluid.dygraph.StaticModelRunner( -# self.params_path) -# return self._model_runner - def get_embedding(self, texts, use_gpu=False, batch_size=1): """ get pooled_output and sequence_output for input texts. @@ -443,3 +475,16 @@ class TransformerModule(NLPBaseModule): "The module context has not been initialized. " "Please call context() before using get_params_layer") return self.params_layer + + def forward(self, input_ids, position_ids, segment_ids, input_mask): + if version_compare(paddle.__version__, '1.8.0'): + pooled_output, sequence_output = self.model_runner( + input_ids, position_ids, segment_ids, input_mask) + return { + 'pooled_output': pooled_output, + 'sequence_output': sequence_output + } + else: + raise RuntimeError( + '{} only support dynamic graph mode in paddle >= 1.8.0'.format( + self.name))