提交 a409d43c 编写于 作者: Z Zeyu Chen

remove useless import

上级 5737d422
# ERNIE Classification
本示例如果使用PaddleHub Finetune API快速的完成Transformer类模型ERNIE或BERT完成文本分类任务。
import paddle.fluid as fluid
import paddlehub as hub
module = hub.Module(name="ernie")
inputs, outputs, program = module.context(trainable=True, max_seq_len=128)
reader = hub.reader.ClassifyReader(
dataset=hub.dataset.ChnSentiCorp(),
vocab_path=module.get_vocab_path(),
max_seq_len=128)
with fluid.program_guard(program):
label = fluid.layers.data(name="label", shape=[1], dtype='int64')
pooled_output = outputs["pooled_output"]
feed_list = [
inputs["input_ids"].name, inputs["position_ids"].name,
inputs["segment_ids"].name, inputs["input_mask"].name, label.name
]
cls_task = hub.create_text_classification_task(
pooled_output, label, num_classes=reader.get_num_labels())
strategy = hub.BERTFinetuneStrategy(
weight_decay=0.01,
learning_rate=5e-5,
warmup_strategy="linear_warmup_decay",
)
config = hub.RunConfig(
use_cuda=True, num_epoch=3, batch_size=32, strategy=strategy)
hub.finetune_and_eval(
task=cls_task, data_reader=reader, feed_list=feed_list, config=config)
......@@ -13,16 +13,8 @@
# limitations under the License.
"""Finetuning on classification task """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import argparse
import numpy as np
import paddle
import paddle.fluid as fluid
import paddlehub as hub
......@@ -46,9 +38,8 @@ if __name__ == '__main__':
trainable=True, max_seq_len=args.max_seq_len)
# Step2: Download dataset and use ClassifyReader to read dataset
dataset = hub.dataset.NLPCC_DBQA()
reader = hub.reader.ClassifyReader(
dataset=dataset,
dataset=hub.dataset.NLPCC_DBQA(),
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len)
num_labels = len(reader.get_labels())
......
......@@ -13,16 +13,8 @@
# limitations under the License.
"""Finetuning on classification task """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import argparse
import numpy as np
import paddle
import paddle.fluid as fluid
import paddlehub as hub
......@@ -46,9 +38,8 @@ if __name__ == '__main__':
trainable=True, max_seq_len=args.max_seq_len)
# Step2: Download dataset and use ClassifyReader to read dataset
dataset = hub.dataset.LCQMC()
reader = hub.reader.ClassifyReader(
dataset=dataset,
dataset=hub.dataset.LCQMC(),
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len)
num_labels = len(reader.get_labels())
......
......@@ -13,16 +13,8 @@
# limitations under the License.
"""Finetuning on classification task """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import argparse
import numpy as np
import paddle
import paddle.fluid as fluid
import paddlehub as hub
......@@ -46,12 +38,10 @@ if __name__ == '__main__':
trainable=True, max_seq_len=args.max_seq_len)
# Step2: Download dataset and use ClassifyReader to read dataset
dataset = hub.dataset.ChnSentiCorp()
reader = hub.reader.ClassifyReader(
dataset=dataset,
dataset=hub.dataset.ChnSentiCorp(),
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len)
num_labels = len(reader.get_labels())
# Step3: construct transfer learning network
with fluid.program_guard(program):
......@@ -69,7 +59,7 @@ if __name__ == '__main__':
]
# Define a classfication finetune task by PaddleHub's API
cls_task = hub.create_text_classification_task(
pooled_output, label, num_classes=num_labels)
pooled_output, label, num_classes=reader.get_num_labels())
# Step4: Select finetune strategy, setup config and finetune
strategy = hub.BERTFinetuneStrategy(
......
......@@ -13,15 +13,6 @@
# limitations under the License.
"""Finetuning on sequence labeling task."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import argparse
import numpy as np
import paddle
import paddle.fluid as fluid
import paddlehub as hub
......@@ -46,9 +37,8 @@ if __name__ == '__main__':
trainable=True, max_seq_len=args.max_seq_len)
# Step2: Download dataset and use SequenceLabelReader to read dataset
dataset = hub.dataset.MSRA_NER()
reader = hub.reader.SequenceLabelReader(
dataset=dataset,
dataset=hub.dataset.MSRA_NER(),
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len)
......@@ -60,7 +50,6 @@ if __name__ == '__main__':
name="label", shape=[args.max_seq_len, 1], dtype='int64')
seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64')
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
sequence_output = outputs["sequence_output"]
......@@ -93,6 +82,7 @@ if __name__ == '__main__':
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=strategy)
# Finetune and evaluate model by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub.finetune_and_eval(
......
......@@ -80,6 +80,9 @@ class BaseReader(object):
"""Gets the list of labels for this data set."""
return self.dataset.get_labels()
def get_num_labels(self):
return len(self.dataset.get_labels())
def get_train_progress(self):
"""Gets progress for training phase."""
return self.current_example, self.current_epoch
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册