paddlehub使 用语义预训练模型ERNIE fine-tune后无法预测
Created by: zhuyingjun-zyj
paddlepaddle -version:1.8.1 paddlehub -version :1.7.1
import paddlehub as hub
module = hub.Module(name="ernie")
path = 'D://project//test//textPaddle//testdata'
from paddlehub.dataset.base_nlp_dataset import BaseNLPDataset
class ThuTastData(BaseNLPDataset):
def __init__(self):
# 数据集存放位置
self.dataset_dir = path
super(ThuTastData, self).__init__(
base_path=self.dataset_dir,
train_file="train.txt",
dev_file="valid.txt",
test_file="test.txt",
train_file_with_header=True,
dev_file_with_header=True,
test_file_with_header=True,
# 数据集类别集合
label_list=["5", "2", "4", "3", "1"])
dataset = ThuTastData()
# for e in dataset.get_train_examples()[:3]:
# print("{}\t{}\t{}".format(e.guid, e.text_a, e.label))
reader = hub.reader.ClassifyReader(
dataset=dataset,
vocab_path=module.get_vocab_path(),
sp_model_path=module.get_spm_path(),
word_dict_path=module.get_word_dict_path(),
max_seq_len=128)
strategy = hub.AdamWeightDecayStrategy(
weight_decay=0.01,
warmup_proportion=0.1,
learning_rate=5e-5)
config = hub.RunConfig(
log_interval=3,
save_ckpt_interval=10,
checkpoint_dir='./modle.ckpt',
num_epoch=20,
use_cuda=False,
batch_size=32,
eval_interval=50,
strategy=strategy)
inputs, outputs, program = module.context(
trainable=True, max_seq_len=128)
# Use "pooled_output" for classification tasks on an entire sentence.
pooled_output = outputs["pooled_output"]
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
cls_task = hub.TextClassifierTask(
data_reader=reader,
feature=pooled_output,
feed_list=feed_list,
num_classes=dataset.num_labels,
config=config,
metrics_choices=["acc"])
# run_states = cls_task.finetune_and_eval()
data = [["环境不好??太乱了??东西又贵又不好吃"], ["交通方便;环境很好;服务态度很好 房间较小"],
["19天硬盘就罢工了~~~算上运来的一周都没用上15天~~~可就是不能换了~~~唉~~~~你说这算什么事呀~~~"]]
print(cls_task.predict(data=data, return_result=True))
一下是报错日志:
!!! The default number of CPU_NUM=1.
[2020-06-10 15:10:17,401] [ INFO] - PaddleHub predict start
[2020-06-10 15:10:17,401] [ INFO] - Load the best model from ./modle.ckpt\best_model
C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\paddle\fluid\executor.py:1093: UserWarning: There are no operators in the program to be executed. If you pass Program manually, please use fluid.program_guard to ensure the current Program is being used.
warnings.warn(error_info)
[2020-06-10 15:10:18,218] [ INFO] - Try loading checkpoint from ./modle.ckpt\ckpt.meta
[2020-06-10 15:10:19,297] [ INFO] - PaddleHub model checkpoint loaded. current_epoch=1, global_step=10, best_score=-999.00000
Traceback (most recent call last):
File "D:/project/test/textPaddle/testPaddleFinetune.py", line 77, in <module>
print(cls_task.predict(data=data, return_result=True))
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\paddlehub\finetune\task\base_task.py", line 1011, in predict
self.init_if_load_best_model()
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\paddlehub\finetune\task\base_task.py", line 382, in init_if_load_best_model
self.init_if_necessary()
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\paddlehub\finetune\task\base_task.py", line 367, in init_if_necessary
if not self.load_checkpoint():
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\paddlehub\finetune\task\base_task.py", line 837, in load_checkpoint
self.max_train_steps = self.env.current_step + self.max_train_steps / self.config.num_epoch * (
AttributeError: 'TextClassifierTask' object has no attribute 'max_train_steps'