用V100训练mobilenet-ssd, 开启fp16报错
Created by: FibonacciSun
不开fp16是ok的,开启之后训练不起来,错误如下 paddle版本:1.6.1 cuda版本:9.2 cudnn版本: 7.6.5.32 paddledetection分支:release/0.1, 最新的提交时间是1.30日
/usr/local/lib/python3.5/dist-packages/paddle/fluid/executor.py:774: UserWarning: The following exception is not an EOF exception. "The following exception is not an EOF exception.") Traceback (most recent call last): File "tools/train.py", line 340, in main() File "tools/train.py", line 246, in main outs = exe.run(compiled_train_prog, fetch_list=train_values) File "/usr/local/lib/python3.5/dist-packages/paddle/fluid/executor.py", line 775, in run six.reraise(*sys.exc_info()) File "/usr/lib/python3/dist-packages/six.py", line 686, in reraise raise value File "/usr/local/lib/python3.5/dist-packages/paddle/fluid/executor.py", line 770, in run use_program_cache=use_program_cache) File "/usr/local/lib/python3.5/dist-packages/paddle/fluid/executor.py", line 829, in _run_impl return_numpy=return_numpy) File "/usr/local/lib/python3.5/dist-packages/paddle/fluid/executor.py", line 669, in _run_parallel tensors = exe.run(fetch_var_names)._move_to_list() paddle.fluid.core_avx.EnforceNotMet:
C++ Call Stacks (More useful to developers):
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int) 1 paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const*, int) 2 paddle::operators::ConvOp::GetExpectedKernelType(paddle::framework::ExecutionContext const&) const 3 paddle::framework::OperatorWithKernel::ChooseKernel(paddle::framework::RuntimeContext const&, paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const 4 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, paddle::framework::RuntimeContext*) const 5 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const 6 paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) 7 paddle::framework::details::ComputationOpHandle::RunImpl() 8 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync(paddle::framework::details::OpHandleBase*) 9 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp(paddle::framework::details::OpHandleBase*, std::shared_ptr<paddle::framework::BlockingQueue > const&, unsigned long*) 10 std::_Function_handler<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> (), std::__future_base::_Task_setter<std::unique_ptr<std::__future_base::_Result, std::__future_base::_Result_base::_Deleter>, void> >::_M_invoke(std::_Any_data const&) 11 std::__future_base::_State_base::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>&, bool&) 12 ThreadPool::ThreadPool(unsigned long)::{lambda()#1}::operator()() const
Python Call Stacks (More useful to users):
File "/usr/local/lib/python3.5/dist-packages/paddle/fluid/framework.py", line 2459, in append_op attrs=kwargs.get("attrs", None)) File "/usr/local/lib/python3.5/dist-packages/paddle/fluid/layer_helper.py", line 43, in append_op return self.main_program.current_block().append_op(*args, **kwargs) File "/usr/local/lib/python3.5/dist-packages/paddle/fluid/layers/nn.py", line 2803, in conv2d "data_format": data_format, File "/home/sunqiuwei/paddlepaddle/work/PaddleDetection/ppdet/modeling/backbones/mobilenet.py", line 83, in _conv_norm bias_attr=False) File "/home/sunqiuwei/paddlepaddle/work/PaddleDetection/ppdet/modeling/backbones/mobilenet.py", line 115, in depthwise_separable name=name + "_dw") File "/home/sunqiuwei/paddlepaddle/work/PaddleDetection/ppdet/modeling/backbones/mobilenet.py", line 160, in call out, 32, 64, 32, 1, scale, name=self.prefix_name + "conv2_1") File "/home/sunqiuwei/paddlepaddle/work/PaddleDetection/ppdet/modeling/architectures/ssd.py", line 71, in build body_feats = self.backbone(im) File "/home/sunqiuwei/paddlepaddle/work/PaddleDetection/ppdet/modeling/architectures/ssd.py", line 94, in train return self.build(feed_vars, 'train') File "tools/train.py", line 128, in main train_fetches = model.train(feed_vars) File "tools/train.py", line 340, in main()
Error Message Summary:
PaddleCheckError: Expected library == framework::LibraryType::kCUDNN, but received library:PLAIN != framework::LibraryType::kCUDNN:CUDNN. float16 can only be used when CUDNN is used at [/paddle/paddle/fluid/operators/conv_op.cc:169] [operator < depthwise_conv2d > error]