运行run_ChnSentiCorp.sh报错
Created by: MrRace
在下载完模型和任务数据之后,运行 bash script/run_ChnSentiCorp.sh
。之后报错如下:
Theoretical memory usage in training: 10522.327 - 11023.390 MB
W0318 14:37:05.017385 13682 device_context.cc:263] Please NOTE: device: 0, CUDA Capability: 75, Driver API Version: 10.0, Runtime API Version: 9.0
W0318 14:37:05.017417 13682 device_context.cc:271] device: 0, cuDNN Version: 7.0.
Load pretraining parameters from /home/lab/liujiepeng/Framework/LARK/ERNIE/model/model_data/params.
Traceback (most recent call last):
File "run_classifier.py", line 283, in <module>
main(args)
File "run_classifier.py", line 204, in main
train_exe.run(fetch_list=[])
File "/home/lab/liujiepeng/Framework/LARK/venv_paddle/lib/python3.5/site-packages/paddle/fluid/parallel_executor.py", line 303, in run
self.executor.run(fetch_list, fetch_var_name)
paddle.fluid.core.EnforceNotMet: Invoke operator softmax error.
Python Callstacks:
File "/home/lab/liujiepeng/Framework/LARK/venv_paddle/lib/python3.5/site-packages/paddle/fluid/framework.py", line 1317, in append_op
attrs=kwargs.get("attrs", None))
File "/home/lab/liujiepeng/Framework/LARK/venv_paddle/lib/python3.5/site-packages/paddle/fluid/layer_helper.py", line 56, in append_op
return self.main_program.current_block().append_op(*args, **kwargs)
File "/home/lab/liujiepeng/Framework/LARK/venv_paddle/lib/python3.5/site-packages/paddle/fluid/layers/nn.py", line 1788, in softmax
attrs={"use_cudnn": use_cudnn})
File "/home/lab/liujiepeng/Framework/LARK/ERNIE/model/transformer_encoder.py", line 120, in scaled_dot_product_attention
weights = layers.softmax(product)
File "/home/lab/liujiepeng/Framework/LARK/ERNIE/model/transformer_encoder.py", line 148, in multi_head_attention
dropout_rate)
File "/home/lab/liujiepeng/Framework/LARK/ERNIE/model/transformer_encoder.py", line 274, in encoder_layer
name=name + '_multi_head_att')
File "/home/lab/liujiepeng/Framework/LARK/ERNIE/model/transformer_encoder.py", line 337, in encoder
name=name + '_layer_' + str(i))
File "/home/lab/liujiepeng/Framework/LARK/ERNIE/model/ernie.py", line 136, in _build_model
name='encoder')
File "/home/lab/liujiepeng/Framework/LARK/ERNIE/model/ernie.py", line 81, in __init__
self._build_model(src_ids, position_ids, sentence_ids, self_attn_mask)
File "/home/lab/liujiepeng/Framework/LARK/ERNIE/finetune/classifier.py", line 49, in create_model
use_fp16=args.use_fp16)
File "run_classifier.py", line 97, in main
ernie_config=ernie_config)
File "run_classifier.py", line 283, in <module>
main(args)
C++ Callstacks:
CUDNN_STATUS_EXECUTION_FAILED at [/paddle/paddle/fluid/operators/math/softmax.cu:56]
PaddlePaddle Call Stacks:
0 0x7fc944af6dbdp void paddle::platform::EnforceNotMet::Init<std::string>(std::string, char const*, int) + 365
1 0x7fc944af7107p paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const*, int) + 87
2 0x7fc94628bd8ep paddle::operators::math::SoftmaxCUDNNFunctor<float>::operator()(paddle::platform::CUDADeviceContext const&, paddle::framework::Tensor const*, paddle::framework::Tensor*) + 830
3 0x7fc9450ec423p paddle::operators::SoftmaxCUDNNKernel<float>::Compute(paddle::framework::ExecutionContext const&) const + 595
4 0x7fc9450ec4d3p std::_Function_handler<void (paddle::framework::ExecutionContext const&), paddle::framework::OpKernelRegistrarFunctor<paddle::platform::CUDAPlace, false, 0ul, paddle::operators::SoftmaxCUDNNKernel<float>, paddle::operators::SoftmaxCUDNNKernel<double>, paddle::operators::SoftmaxCUDNNKernel<paddle::platform::float16> >::operator()(char const*, char const*, int) const::{lambda(paddle::framework::ExecutionContext const&)#1}>::_M_invoke(std::_Any_data const&, paddle::framework::ExecutionContext const&) + 35
5 0x7fc946504c33p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 659
6 0x7fc9465024a5p paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) + 341
7 0x7fc94637881ap paddle::framework::details::ComputationOpHandle::RunImpl() + 250
8 0x7fc946371ef6p paddle::framework::details::OpHandleBase::Run(bool) + 118
9 0x7fc94630a10dp
10 0x7fc94630ab0dp paddle::framework::details::ThreadedSSAGraphExecutor::RunOp(std::shared_ptr<paddle::framework::BlockingQueue<paddle::framework::details::VarHandleBase*> > const&, paddle::framework::details::OpHandleBase*) + 1309
11 0x7fc94630c784p paddle::framework::details::ThreadedSSAGraphExecutor::Run(std::vector<std::string, std::allocator<std::string> > const&) + 2324
12 0x7fc94630380ap paddle::framework::details::ScopeBufferedSSAGraphExecutor::Run(std::vector<std::string, std::allocator<std::string> > const&) + 394
13 0x7fc944c31ca2p paddle::framework::ParallelExecutor::Run(std::vector<std::string, std::allocator<std::string> > const&, std::string const&) + 562
14 0x7fc944ae712ep
15 0x7fc944b21c4ep
16 0x4ea137p PyCFunction_Call + 119
17 0x53c176p PyEval_EvalFrameEx + 23030
18 0x5401efp
19 0x53b83fp PyEval_EvalFrameEx + 20671
20 0x53b294p PyEval_EvalFrameEx + 19220
21 0x53fc97p
22 0x5409bfp PyEval_EvalCode + 31
23 0x60cb42p
24 0x60efeap PyRun_FileExFlags + 154
25 0x60f7dcp PyRun_SimpleFileExFlags + 444
26 0x640256p Py_Main + 1110
27 0x4d0001p main + 225
28 0x7fc976ec5830p __libc_start_main + 240
29 0x5d6999p _start + 41
请问如何解决?谢谢!