layers.where出core
Created by: zhangyimi
之前提过相关issue #25166,结论是多卡引起的,但是目前在单卡下也会碰到同样的问题。
Traceback (most recent call last): File "run.py", line 249, in train(env) File "run.py", line 101, in train puncts) File "", line 2, in epoch_evaluate File "/home/work/zhangao/ZoooSP/python/miniconda3/envs/zwp/lib/python3.7/site-packages/paddle/fluid/dygraph/base.py", line 277, in impl return func(*args, **kwargs) File "/mnt/zwp/0623/baidu/nlp/dependency-parser/dependency_parser/parser/model.py", line 213, in epoch_evaluate metric(arc_preds, rel_preds, arcs, rels, mask) File "/mnt/zwp/0623/baidu/nlp/dependency-parser/dependency_parser/parser/utils/metric.py", line 43, in call arc_mask = nn.masked_select(arc_preds == arc_golds, mask) File "/mnt/zwp/0623/baidu/nlp/dependency-parser/dependency_parser/parser/nets/nn.py", line 159, in masked_select select = layers.where(mask) File "/home/work/zhangao/ZoooSP/python/miniconda3/envs/zwp/lib/python3.7/site-packages/paddle/fluid/layers/nn.py", line 14882, in where return core.ops.where_index(condition) paddle.fluid.core_avx.EnforceNotMet:
C++ Call Stacks (More useful to developers):
0 std::string paddle::platform::GetTraceBackString<char const*>(char const*&&, char const*, int) 1 paddle::platform::EnforceNotMet::EnforceNotMet(std::exception_ptr::exception_ptr, char const*, int) 2 paddle::platform::GpuMemcpyAsync(void*, void const*, unsigned long, cudaMemcpyKind, CUstream_st*) 3 void paddle::memory::Copy<paddle::platform::CPUPlace, paddle::platform::CUDAPlace>(paddle::platform::CPUPlace, void*, paddle::platform::CUDAPlace, void const*, unsigned long, CUstream_st*) 4 paddle::framework::TensorCopy(paddle::framework::Tensor const&, paddle::platform::Place const&, paddle::platform::DeviceContext const&, paddle::framework::Tensor*) 5 paddle::operators::CUDAWhereIndexKernel::Compute(paddle::framework::ExecutionContext const&) const 6 std::Function_handler<void (paddle::framework::ExecutionContext const&), paddle::framework::OpKernelRegistrarFunctor<paddle::platform::CUDAPlace, false, 2ul, paddle::operators::CUDAWhereIndexKernel, paddle::operators::CUDAWhereIndexKernel, paddle::operators::CUDAWhereIndexKernel, paddle::operators::CUDAWhereIndexKernel, paddle::operators::CUDAWhereIndexKernel >::operator()(char const*, char const*, int) const::{lambda(paddle::framework::ExecutionContext const&)#1 (closed)}>::M_invoke(std::Any_data const&, paddle::framework::ExecutionContext const&) 7 paddle::imperative::PreparedOp::Run(std::map<std::string, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > >, std::lessstd::string, std::allocator<std::pair<std::string const, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > > > > > const&, std::map<std::string, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > >, std::lessstd::string, std::allocator<std::pair<std::string const, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > > > > > const&, std::unordered_map<std::string, boost::variant<boost::blank, int, float, std::string, std::vector<int, std::allocator >, std::vector<float, std::allocator >, std::vector<std::string, std::allocatorstd::string >, bool, std::vector<bool, std::allocator >, paddle::framework::BlockDesc*, long, std::vector<paddle::framework::BlockDesc*, std::allocatorpaddle::framework::BlockDesc* >, std::vector<long, std::allocator >, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void_, boost::detail::variant::void_>, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, boost::variant<boost::blank, int, float, std::string, std::vector<int, std::allocator >, std::vector<float, std::allocator >, std::vector<std::string, std::allocatorstd::string >, bool, std::vector<bool, std::allocator >, paddle::framework::BlockDesc*, long, std::vector<paddle::framework::BlockDesc*, std::allocatorpaddle::framework::BlockDesc* >, std::vector<long, std::allocator >, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> > > > const&) 8 paddle::imperative::OpBase::Run(paddle::framework::OperatorBase const&, std::map<std::string, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > >, std::lessstd::string, std::allocator<std::pair<std::string const, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > > > > > const&, std::map<std::string, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > >, std::lessstd::string, std::allocator<std::pair<std::string const, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > > > > > const&, std::unordered_map<std::string, boost::variant<boost::blank, int, float, std::string, std::vector<int, std::allocator >, std::vector<float, std::allocator >, std::vector<std::string, std::allocatorstd::string >, bool, std::vector<bool, std::allocator >, paddle::framework::BlockDesc*, long, std::vector<paddle::framework::BlockDesc*, std::allocatorpaddle::framework::BlockDesc* >, std::vector<long, std::allocator >, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_>, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, boost::variant<boost::blank, int, float, std::string, std::vector<int, std::allocator >, std::vector<float, std::allocator >, std::vector<std::string, std::allocatorstd::string >, bool, std::vector<bool, std::allocator >, paddle::framework::BlockDesc*, long, std::vector<paddle::framework::BlockDesc*, std::allocatorpaddle::framework::BlockDesc* >, std::vector<long, std::allocator >, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> > > > const&, paddle::platform::Place const&) 9 paddle::imperative::Tracer::TraceOp(std::string const&, std::map<std::string, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > >, std::lessstd::string, std::allocator<std::pair<std::string const, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > > > > > const&, std::map<std::string, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > >, std::lessstd::string, std::allocator<std::pair<std::string const, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > > > > > const&, std::unordered_map<std::string, boost::variant<boost::blank, int, float, std::string, std::vector<int, std::allocator >, std::vector<float, std::allocator >, std::vector<std::string, std::allocatorstd::string >, bool, std::vector<bool, std::allocator >, paddle::framework::BlockDesc*, long, std::vector<paddle::framework::BlockDesc*, std::allocatorpaddle::framework::BlockDesc* >, std::vector<long, std::allocator >, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_>, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, boost::variant<boost::blank, int, float, std::string, std::vector<int, std::allocator >, std::vector<float, std::allocator >, std::vector<std::string, std::allocatorstd::string >, bool, std::vector<bool, std::allocator >, paddle::framework::BlockDesc*, long, std::vector<paddle::framework::BlockDesc*, std::allocatorpaddle::framework::BlockDesc* >, std::vector<long, std::allocator >, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> > > >, paddle::platform::Place const&, bool) 10 paddle::imperative::Tracer::TraceOp(std::string const&, std::map<std::string, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > >, std::lessstd::string, std::allocator<std::pair<std::string const, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > > > > > const&, std::map<std::string, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > >, std::lessstd::string, std::allocator<std::pair<std::string const, std::vector<std::shared_ptrpaddle::imperative::VarBase, std::allocator<std::shared_ptrpaddle::imperative::VarBase > > > > > const&, std::unordered_map<std::string, boost::variant<boost::blank, int, float, std::string, std::vector<int, std::allocator >, std::vector<float, std::allocator >, std::vector<std::string, std::allocatorstd::string >, bool, std::vector<bool, std::allocator >, paddle::framework::BlockDesc*, long, std::vector<paddle::framework::BlockDesc*, std::allocatorpaddle::framework::BlockDesc* >, std::vector<long, std::allocator >, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_>, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, boost::variant<boost::blank, int, float, std::string, std::vector<int, std::allocator >, std::vector<float, std::allocator >, std::vector<std::string, std::allocatorstd::string >, bool, std::vector<bool, std::allocator >, paddle::framework::BlockDesc*, long, std::vector<paddle::framework::BlockDesc*, std::allocatorpaddle::framework::BlockDesc* >, std::vector<long, std::allocator >, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> > > >)
Error Message Summary:
ExternalError: Cuda error(77), an illegal memory access was encountered. [Advise: The device encountered a load or store instruction on an invalid memory address. This leaves the process in an inconsistentstate and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched. ] at (/paddle/paddle/fluid/platform/gpu_info.cc:276)
W0628 00:46:37.419998 20601 init.cc:216] Warning: PaddlePaddle catches a failure signal, it may not work properly W0628 00:46:37.420078 20601 init.cc:218] You could check whether you killed PaddlePaddle thread/process accidentally or report the case to PaddlePaddle W0628 00:46:37.420094 20601 init.cc:221] The detail failure signal is:
W0628 00:46:37.420101 20601 init.cc:224] *** Aborted at 1593276397 (unix time) try "date -d @1593276397" if you are using GNU date *** W0628 00:46:37.421769 20601 init.cc:224] PC: @ 0x0 (unknown) W0628 00:46:37.421870 20601 init.cc:224] *** SIGSEGV (@0x8) received by PID 20601 (TID 0x7f485a397740) from PID 8; stack trace: *** W0628 00:46:37.423175 20601 init.cc:224] @ 0x7f4859f7b5d0 (unknown) W0628 00:46:37.427018 20601 init.cc:224] @ 0x7f46d6d4e82f paddle::platform::proto::cudaerrorDesc::ByteSizeLong() W0628 00:46:37.429356 20601 init.cc:224] @ 0x7f46d36df722 paddle::platform::build_nvidia_error_msg() W0628 00:46:37.430402 20601 init.cc:224] @ 0x7f46d6cd4fbf ZNSt17_Function_handlerIFvP11CUstream_stEZN6paddle8platform22CudaStreamResourcePoolC1EvEUlS1_E0_E9_M_invokeERKSt9_Any_dataS1 W0628 00:46:37.431753 20601 init.cc:224] @ 0x7f46d6cd56e3 std::_Sp_counted_ptr<>::_M_dispose() W0628 00:46:37.433069 20601 init.cc:224] @ 0x7f46d6cd5bcc paddle::platform::CudaStreamResourcePool::~CudaStreamResourcePool() W0628 00:46:37.434511 20601 init.cc:224] @ 0x7f4859bd8c29 __run_exit_handlers W0628 00:46:37.435712 20601 init.cc:224] @ 0x7f4859bd8c77 __GI_exit W0628 00:46:37.436900 20601 init.cc:224] @ 0x7f4859bc149c __libc_start_main W0628 00:46:37.437292 20601 init.cc:224] @ 0x55dd973e23c0 (unknown) run_train.sh: 行 21: 20601 段错误 (吐核)python -u run.py --mode=train --use_cuda --feat=char --preprocess --output_dir=exp/baidu --train_data_path=data/baidu/train.txt --valid_data_path=data/baidu/test.txt --test_data_path=data/baidu/test.txt --pretrained_embedding_dir=data/cc.zh.300.vec --unk=UNK --buckets=15