提交 a409d43c 编写于 作者: Z Zeyu Chen

remove useless import

上级 5737d422
# ERNIE Classification # ERNIE Classification
本示例如果使用PaddleHub Finetune API快速的完成Transformer类模型ERNIE或BERT完成文本分类任务。 本示例如果使用PaddleHub Finetune API快速的完成Transformer类模型ERNIE或BERT完成文本分类任务。
import paddle.fluid as fluid
import paddlehub as hub
module = hub.Module(name="ernie")
inputs, outputs, program = module.context(trainable=True, max_seq_len=128)
reader = hub.reader.ClassifyReader(
dataset=hub.dataset.ChnSentiCorp(),
vocab_path=module.get_vocab_path(),
max_seq_len=128)
with fluid.program_guard(program):
label = fluid.layers.data(name="label", shape=[1], dtype='int64')
pooled_output = outputs["pooled_output"]
feed_list = [
inputs["input_ids"].name, inputs["position_ids"].name,
inputs["segment_ids"].name, inputs["input_mask"].name, label.name
]
cls_task = hub.create_text_classification_task(
pooled_output, label, num_classes=reader.get_num_labels())
strategy = hub.BERTFinetuneStrategy(
weight_decay=0.01,
learning_rate=5e-5,
warmup_strategy="linear_warmup_decay",
)
config = hub.RunConfig(
use_cuda=True, num_epoch=3, batch_size=32, strategy=strategy)
hub.finetune_and_eval(
task=cls_task, data_reader=reader, feed_list=feed_list, config=config)
...@@ -13,16 +13,8 @@ ...@@ -13,16 +13,8 @@
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Finetuning on classification task """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import argparse import argparse
import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddlehub as hub import paddlehub as hub
...@@ -46,9 +38,8 @@ if __name__ == '__main__': ...@@ -46,9 +38,8 @@ if __name__ == '__main__':
trainable=True, max_seq_len=args.max_seq_len) trainable=True, max_seq_len=args.max_seq_len)
# Step2: Download dataset and use ClassifyReader to read dataset # Step2: Download dataset and use ClassifyReader to read dataset
dataset = hub.dataset.NLPCC_DBQA()
reader = hub.reader.ClassifyReader( reader = hub.reader.ClassifyReader(
dataset=dataset, dataset=hub.dataset.NLPCC_DBQA(),
vocab_path=module.get_vocab_path(), vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len) max_seq_len=args.max_seq_len)
num_labels = len(reader.get_labels()) num_labels = len(reader.get_labels())
......
...@@ -13,16 +13,8 @@ ...@@ -13,16 +13,8 @@
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Finetuning on classification task """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import argparse import argparse
import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddlehub as hub import paddlehub as hub
...@@ -46,9 +38,8 @@ if __name__ == '__main__': ...@@ -46,9 +38,8 @@ if __name__ == '__main__':
trainable=True, max_seq_len=args.max_seq_len) trainable=True, max_seq_len=args.max_seq_len)
# Step2: Download dataset and use ClassifyReader to read dataset # Step2: Download dataset and use ClassifyReader to read dataset
dataset = hub.dataset.LCQMC()
reader = hub.reader.ClassifyReader( reader = hub.reader.ClassifyReader(
dataset=dataset, dataset=hub.dataset.LCQMC(),
vocab_path=module.get_vocab_path(), vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len) max_seq_len=args.max_seq_len)
num_labels = len(reader.get_labels()) num_labels = len(reader.get_labels())
......
...@@ -13,16 +13,8 @@ ...@@ -13,16 +13,8 @@
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Finetuning on classification task """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import argparse import argparse
import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddlehub as hub import paddlehub as hub
...@@ -46,12 +38,10 @@ if __name__ == '__main__': ...@@ -46,12 +38,10 @@ if __name__ == '__main__':
trainable=True, max_seq_len=args.max_seq_len) trainable=True, max_seq_len=args.max_seq_len)
# Step2: Download dataset and use ClassifyReader to read dataset # Step2: Download dataset and use ClassifyReader to read dataset
dataset = hub.dataset.ChnSentiCorp()
reader = hub.reader.ClassifyReader( reader = hub.reader.ClassifyReader(
dataset=dataset, dataset=hub.dataset.ChnSentiCorp(),
vocab_path=module.get_vocab_path(), vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len) max_seq_len=args.max_seq_len)
num_labels = len(reader.get_labels())
# Step3: construct transfer learning network # Step3: construct transfer learning network
with fluid.program_guard(program): with fluid.program_guard(program):
...@@ -69,7 +59,7 @@ if __name__ == '__main__': ...@@ -69,7 +59,7 @@ if __name__ == '__main__':
] ]
# Define a classfication finetune task by PaddleHub's API # Define a classfication finetune task by PaddleHub's API
cls_task = hub.create_text_classification_task( cls_task = hub.create_text_classification_task(
pooled_output, label, num_classes=num_labels) pooled_output, label, num_classes=reader.get_num_labels())
# Step4: Select finetune strategy, setup config and finetune # Step4: Select finetune strategy, setup config and finetune
strategy = hub.BERTFinetuneStrategy( strategy = hub.BERTFinetuneStrategy(
......
...@@ -13,15 +13,6 @@ ...@@ -13,15 +13,6 @@
# limitations under the License. # limitations under the License.
"""Finetuning on sequence labeling task.""" """Finetuning on sequence labeling task."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import argparse
import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddlehub as hub import paddlehub as hub
...@@ -46,9 +37,8 @@ if __name__ == '__main__': ...@@ -46,9 +37,8 @@ if __name__ == '__main__':
trainable=True, max_seq_len=args.max_seq_len) trainable=True, max_seq_len=args.max_seq_len)
# Step2: Download dataset and use SequenceLabelReader to read dataset # Step2: Download dataset and use SequenceLabelReader to read dataset
dataset = hub.dataset.MSRA_NER()
reader = hub.reader.SequenceLabelReader( reader = hub.reader.SequenceLabelReader(
dataset=dataset, dataset=hub.dataset.MSRA_NER(),
vocab_path=module.get_vocab_path(), vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len) max_seq_len=args.max_seq_len)
...@@ -60,7 +50,6 @@ if __name__ == '__main__': ...@@ -60,7 +50,6 @@ if __name__ == '__main__':
name="label", shape=[args.max_seq_len, 1], dtype='int64') name="label", shape=[args.max_seq_len, 1], dtype='int64')
seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64') seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64')
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output. # Use "sequence_output" for token-level output.
sequence_output = outputs["sequence_output"] sequence_output = outputs["sequence_output"]
...@@ -93,6 +82,7 @@ if __name__ == '__main__': ...@@ -93,6 +82,7 @@ if __name__ == '__main__':
batch_size=args.batch_size, batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=strategy) strategy=strategy)
# Finetune and evaluate model by PaddleHub's API # Finetune and evaluate model by PaddleHub's API
# will finish training, evaluation, testing, save model automatically # will finish training, evaluation, testing, save model automatically
hub.finetune_and_eval( hub.finetune_and_eval(
......
...@@ -80,6 +80,9 @@ class BaseReader(object): ...@@ -80,6 +80,9 @@ class BaseReader(object):
"""Gets the list of labels for this data set.""" """Gets the list of labels for this data set."""
return self.dataset.get_labels() return self.dataset.get_labels()
def get_num_labels(self):
return len(self.dataset.get_labels())
def get_train_progress(self): def get_train_progress(self):
"""Gets progress for training phase.""" """Gets progress for training phase."""
return self.current_example, self.current_epoch return self.current_example, self.current_epoch
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册