PaddleHub使用自定义英文序列标注数据集进行finetune_and_eval时报读取错误
Created by: yaweisun
- 版本、环境信息
PaddleHub 1.5.0, paddlepaddle-gpu 1.7.1.post97 是在AiStudio上运行的。
- 数据集格式:
第一行是:"text_a\tlabel\n" 第二行开始:每个单词之间以空格隔开,然后是“\t”,后面接每个单词对应的标签,以空格隔开,序列标签采用“BIESO” 在生成数据集的时候已经检查过每句的单词数与对应的标签数目相同。
- 代码
完全模仿MSRA数据集的代码,且已经测试过基于MSRA数据集的序列标注,没有问题。 代码如下:
import os
import codecs
import csv
import paddlehub as hub
from paddlehub.dataset import InputExample
from paddlehub.dataset.base_nlp_dataset import BaseNLPDataset
class MyDS(BaseNLPDataset):
def __init__(self):
super(SciIE, self).__init__(
base_path="/home/aistudio/work/data/material",
train_file="train.txt",
dev_file="dev.txt",
test_file="test.txt",
label_file=None,
label_list=[
"B", "I", "E", "S", "O"
],
)
def _read_file(self, input_file, phase=None):
"""Reads a tab separated value file."""
with codecs.open(input_file, "r", encoding="UTF-8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=None)
examples = []
seq_id = 0
header = next(reader) # skip header
for line in reader:
example = InputExample(
guid=seq_id, label=line[1], text_a=line[0])
seq_id += 1
examples.append(example)
return examples
module = hub.Module(name="ernie_v2_eng_base")
inputs, outputs, program = module.context(trainable=True, max_seq_len=128)
dataset = MyDS()
reader = hub.reader.SequenceLabelReader(
dataset=dataset,
vocab_path=module.get_vocab_path(),
max_seq_len=128
)
strategy = hub.AdamWeightDecayStrategy(
learning_rate=5e-5,
weight_decay=0.01,
warmup_proportion=0.0,
lr_scheduler="linear_decay",
)
config = hub.RunConfig(use_cuda=True, num_epoch=3, batch_size=32, strategy=strategy)
sequence_output = outputs["sequence_output"]
# feed_list的Tensor顺序不可以调整
feed_list = [
inputs["input_ids"].name, inputs["position_ids"].name,
inputs["segment_ids"].name, inputs["input_mask"].name
]
seq_label_task = hub.SequenceLabelTask(
data_reader=reader,
feature=sequence_output,
feed_list=feed_list,
max_seq_len=128,
num_classes=dataset.num_labels,
config=config,
add_crf=False
)
seq_label_task.finetune_and_eval()
- 报错
2020-04-28 01:16:59,329-WARNING: paddle.fluid.layers.py_reader() may be deprecated in the near future. Please use paddle.fluid.io.DataLoader.from_generator() instead. [2020-04-28 01:16:59,615] [ INFO] - Strategy with scheduler: {'warmup': 0.0, 'linear_decay': {'start_point': 0.0, 'end_learning_rate': 0}, 'noam_decay': False, 'discriminative': {'blocks': 0, 'factor': 2.6}, 'gradual_unfreeze': 0, 'slanted_triangle': {'cut_fraction': 0.0, 'ratio': 32}}, regularization: {'L2': 0.0, 'L2SP': 0.0, 'weight_decay': 0.01} and clip: {'GlobalNorm': 1.0, 'Norm': 0.0} [2020-04-28 01:18:05,671] [ INFO] - Try loading checkpoint from ckpt_20200428011654/ckpt.meta [2020-04-28 01:18:05,672] [ INFO] - PaddleHub model checkpoint not found, start from scratch... [2020-04-28 01:18:05,779] [ INFO] - PaddleHub finetune start 2020-04-28 01:18:05,784-WARNING: Your decorated reader has raised an exception! Exception in thread Thread-5: Traceback (most recent call last): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/threading.py", line 926, in _bootstrap_inner self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/threading.py", line 870, in run self._target(*self._args, **self._kwargs) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/layers/io.py", line 491, in provider_thread six.reraise(*sys.exc_info()) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/six.py", line 693, in reraise raise value File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/layers/io.py", line 472, in provider_thread for tensors in func(): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/layers/io.py", line 523, in tensor_provider for slots in paddle_reader(): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/data_feeder.py", line 488, in reader_creator for item in reader(): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/reader/nlp_reader.py", line 257, in wrapper examples, batch_size, phase=phase): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/reader/nlp_reader.py", line 187, in _prepare_batch_data self.tokenizer, phase) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/reader/nlp_reader.py", line 463, in _convert_example_to_record for label in labels] + [no_entity_id] File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/reader/nlp_reader.py", line 463, in for label in labels] + [no_entity_id] KeyError: 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O'
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/executor.py:782: UserWarning: The following exception is not an EOF exception. "The following exception is not an EOF exception.") ---------------------------------------------------------------------------EnforceNotMet Traceback (most recent call last) in 17 ) 18 ---> 19 seq_label_task.finetune_and_eval() /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/finetune/task/base_task.py in finetune_and_eval(self) 761 762 def finetune_and_eval(self): --> 763 return self.finetune(do_eval=True) 764 765 def finetune(self, do_eval=False): /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/finetune/task/base_task.py in finetune(self, do_eval) 773 while self.current_epoch <= self.config.num_epoch: 774 self.config.strategy.step() --> 775 run_states = self._run(do_eval=do_eval) 776 self.env.current_epoch += 1 777 /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/finetune/task/base_task.py in _run(self, do_eval) 826 with fluid.program_guard(self.main_program, self.startup_program): 827 if self.config.use_pyreader: --> 828 return self._run_with_py_reader(do_eval=do_eval) 829 return self._run_with_data_feeder(do_eval=do_eval) 830 /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/finetune/task/base_task.py in _run_with_py_reader(self, do_eval) 904 fetch_result = self.exe.run( 905 self.main_program_to_be_run, --> 906 fetch_list=self.fetch_list) 907 else: 908 fetch_result = self.exe.run( /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/executor.py in run(self, program, feed, fetch_list, feed_var_name, fetch_var_name, scope, return_numpy, use_program_cache) 781 warnings.warn( 782 "The following exception is not an EOF exception.") --> 783 six.reraise(*sys.exc_info()) 784 785 def _run_impl(self, program, feed, fetch_list, feed_var_name, /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/six.py in reraise(tp, value, tb) 691 if value.traceback is not tb: 692 raise value.with_traceback(tb) --> 693 raise value 694 finally: 695 value = None /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/executor.py in run(self, program, feed, fetch_list, feed_var_name, fetch_var_name, scope, return_numpy, use_program_cache) 776 scope=scope, 777 return_numpy=return_numpy, --> 778 use_program_cache=use_program_cache) 779 except Exception as e: 780 if not isinstance(e, core.EOFException): /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/executor.py in _run_impl(self, program, feed, fetch_list, feed_var_name, fetch_var_name, scope, return_numpy, use_program_cache) 841 fetch_list=fetch_list, 842 fetch_var_name=fetch_var_name, --> 843 return_numpy=return_numpy) 844 845 def _run_program(self, program, feed, fetch_list, feed_var_name, /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/executor.py in _run_parallel(self, program, scope, feed, fetch_list, fetch_var_name, return_numpy) 675 676 fetch_var_names = list(map(_to_name_str, fetch_list)) --> 677 tensors = exe.run(fetch_var_names)._move_to_list() 678 return as_numpy(tensors) if return_numpy else tensors 679 EnforceNotMet:
C++ Call Stacks (More useful to developers):
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int) 1 paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const*, int) 2 paddle::operators::reader::BlockingQueue<std::vector<paddle::framework::LoDTensor, std::allocatorpaddle::framework::LoDTensor > >::Receive(std::vector<paddle::framework::LoDTensor, std::allocatorpaddle::framework::LoDTensor >) 3 paddle::operators::reader::PyReader::ReadNext(std::vector<paddle::framework::LoDTensor, std::allocatorpaddle::framework::LoDTensor >) 4 paddle::framework::ReaderHolder::ReadNext(std::vector<paddle::framework::LoDTensor, std::allocatorpaddle::framework::LoDTensor >) 5 paddle::operators::ReadOp::RunImpl(paddle::framework::Scope const&, paddle::platform::Place const&) const 6 paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, paddle::platform::Place const&) 7 paddle::framework::details::ComputationOpHandle::RunImpl() 8 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync(paddle::framework::details::OpHandleBase) 9 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp(paddle::framework::details::OpHandleBase*, std::shared_ptr<paddle::framework::BlockingQueue > const&, unsigned long*) 10 std::_Function_handler<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> (), std::__future_base::_Task_setter<std::unique_ptr<std::__future_base::_Result, std::__future_base::_Result_base::_Deleter>, void> >::_M_invoke(std::_Any_data const&) 11 std::__future_base::_State_base::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>&, bool&) 12 ThreadPool::ThreadPool(unsigned long)::{lambda()#1 (closed)}::operator()() const
Python Call Stacks (More useful to users):
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/framework.py", line 2525, in append_op attrs=kwargs.get("attrs", None)) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/layer_helper.py", line 43, in append_op return self.main_program.current_block().append_op(*args, **kwargs) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/layers/io.py", line 889, in read_file type='read', inputs={'Reader': [reader]}, outputs={'Out': out}) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/finetune/task/base_task.py", line 352, in _build_env py_vars = fluid.layers.read_file(self.env.py_reader) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/finetune/task/base_task.py", line 474, in main_program self._build_env() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/finetune/task/base_task.py", line 732, in load_checkpoint main_program=self.main_program) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/finetune/task/base_task.py", line 297, in init_if_necessary if not self.load_checkpoint(): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/finetune/task/base_task.py", line 769, in finetune self.init_if_necessary() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlehub/finetune/task/base_task.py", line 763, in finetune_and_eval return self.finetune(do_eval=True) File "", line 19, in seq_label_task.finetune_and_eval() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3265, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3189, in run_ast_nodes if (yield from self.run_code(code, result)): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3018, in run_cell_async interactivity=interactivity, compiler=compiler, result=result) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner coro.send(None) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2843, in _run_cell return runner(coro) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2817, in run_cell raw_cell, store_history, silent, shell_futures) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 536, in run_cell return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 294, in do_execute res = shell.run_cell(code, store_history=store_history, silent=silent) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper yielded = next(result) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 534, in execute_request user_expressions, allow_stdin, File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper yielded = next(result) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell yield gen.maybe_future(handler(stream, idents, msg)) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper yielded = next(result) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 357, in process_one yield gen.maybe_future(dispatch(*args)) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/tornado/gen.py", line 1147, in run yielded = self.gen.send(value) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/tornado/gen.py", line 1233, in inner self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/tornado/stack_context.py", line 300, in null_wrapper return fn(*args, **kwargs) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/tornado/ioloop.py", line 758, in _run_callback ret = callback() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/asyncio/events.py", line 88, in _run self._context.run(self._callback, *self._args) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/asyncio/base_events.py", line 1771, in _run_once handle._run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/asyncio/base_events.py", line 534, in run_forever self._run_once() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 132, in start self.asyncio_loop.run_forever() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 505, in start self.io_loop.start() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/traitlets/config/application.py", line 664, in launch_instance app.start() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in app.launch_new_instance() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/runpy.py", line 85, in _run_code exec(code, run_globals) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/runpy.py", line 193, in _run_module_as_main "main", mod_spec)
Error Message Summary:
Error: Blocking queue is killed because the data reader raises an exception [Hint: Expected killed_ != true, but received killed_:1 == true:1.] at (/paddle/paddle/fluid/operators/reader/blocking_queue.h:141) [operator < read > error]