运行eval.py和vis.py 报错CUDNN_STATUS_EXECUTION_FAILED
Created by: KooSung
运行eval.py和vis.py的时候,会报错:
PaddleCheckError: CUDNN_STATUS_EXECUTION_FAILED at [/paddle/paddle/fluid/operators/conv_cudnn_op.cu:288]
$ python pdseg/vis.py --use_gpu --cfg ./configs/deeplabv3p_xception65_pet.yaml
{'AUG': {'AUG_METHOD': 'unpadding',
'FIX_RESIZE_SIZE': (512, 512),
'FLIP': False,
'FLIP_RATIO': 0.5,
'INF_RESIZE_VALUE': 500,
'MAX_RESIZE_VALUE': 600,
'MAX_SCALE_FACTOR': 1.25,
'MIN_RESIZE_VALUE': 400,
'MIN_SCALE_FACTOR': 0.75,
'MIRROR': True,
'RICH_CROP': {'ASPECT_RATIO': 0.33,
'BLUR': False,
'BLUR_RATIO': 0.1,
'BRIGHTNESS_JITTER_RATIO': 0.5,
'CONTRAST_JITTER_RATIO': 0.5,
'ENABLE': False,
'MAX_ROTATION': 15,
'MIN_AREA_RATIO': 0.5,
'SATURATION_JITTER_RATIO': 0.5},
'SCALE_STEP_SIZE': 0.25},
'BATCH_SIZE': 4,
'DATALOADER': {'BUF_SIZE': 256, 'NUM_WORKERS': 2},
'DATASET': {'DATA_DIM': 3,
'DATA_DIR': '/mnt/hdd/tangwei/autodrive/awesome-semantic-segmentation-pytorch/datasets/citys/',
'IGNORE_INDEX': 255,
'IMAGE_TYPE': 'rgb',
'NUM_CLASSES': 10,
'PADDING_VALUE': [127.5, 127.5, 127.5],
'SEPARATOR': ' ',
'TEST_FILE_LIST': './dataset/mini_pet/file_list/test_list.txt',
'TEST_TOTAL_IMAGES': 3000,
'TRAIN_FILE_LIST': './dataset/mini_pet/file_list/train_list.txt',
'TRAIN_TOTAL_IMAGES': 4990,
'VAL_FILE_LIST': './dataset/mini_pet/file_list/val_list.txt',
'VAL_TOTAL_IMAGES': 1997,
'VIS_FILE_LIST': './dataset/mini_pet/file_list/test_list.txt'},
'EVAL_CROP_SIZE': (512, 512),
'FREEZE': {'MODEL_FILENAME': '__model__',
'PARAMS_FILENAME': '__params__',
'SAVE_DIR': 'freeze_model'},
'MEAN': [0.5, 0.5, 0.5],
'MODEL': {'BN_MOMENTUM': 0.99,
'DEEPLAB': {'ASPP_WITH_SEP_CONV': True,
'BACKBONE': 'xception_65',
'DECODER_USE_SEP_CONV': True,
'DEPTH_MULTIPLIER': 1.0,
'ENABLE_DECODER': True,
'ENCODER_WITH_ASPP': True,
'OUTPUT_STRIDE': 16},
'DEFAULT_EPSILON': 1e-05,
'DEFAULT_GROUP_NUMBER': 32,
'DEFAULT_NORM_TYPE': 'bn',
'FP16': False,
'ICNET': {'DEPTH_MULTIPLIER': 0.5, 'LAYERS': 50},
'MODEL_NAME': 'deeplabv3p',
'MULTI_LOSS_WEIGHT': [1.0],
'PSPNET': {'DEPTH_MULTIPLIER': 1, 'LAYERS': 50},
'SCALE_LOSS': 'DYNAMIC',
'UNET': {'UPSAMPLE_MODE': 'bilinear'}},
'NUM_TRAINERS': 1,
'SOLVER': {'BEGIN_EPOCH': 1,
'DECAY_EPOCH': [10, 20],
'GAMMA': 0.1,
'LOSS': ['softmax_loss'],
'LR': 0.005,
'LR_POLICY': 'poly',
'MOMENTUM': 0.9,
'MOMENTUM2': 0.999,
'NUM_EPOCHS': 100,
'OPTIMIZER': 'sgd',
'POWER': 0.9,
'WEIGHT_DECAY': 4e-05},
'STD': [0.5, 0.5, 0.5],
'TEST': {'TEST_MODEL': './saved_model/deeplabv3p_xception65_bn_pet/final'},
'TRAIN': {'MODEL_SAVE_DIR': './saved_model/deeplabv3p_xception65_bn_pet/',
'PRETRAINED_MODEL_DIR': './pretrained_model/deeplabv3p_xception65_bn_coco/',
'RESUME_MODEL_DIR': '',
'SNAPSHOT_EPOCH': 10,
'SYNC_BATCH_NORM': False},
'TRAINER_ID': 0,
'TRAIN_CROP_SIZE': (512, 512)}
W1104 01:40:07.852170 2170 device_context.cc:235] Please NOTE: device: 0, CUDA Capability: 75, Driver API Version: 10.1, Runtime API Version: 10.0
W1104 01:40:07.855646 2170 device_context.cc:243] device: 0, cuDNN Version: 7.6.
/home/tw/anaconda3/envs/paddle/lib/python3.7/site-packages/paddle/fluid/executor.py:774: UserWarning: The following exception is not an EOF exception.
"The following exception is not an EOF exception.")
Traceback (most recent call last):
File "pdseg/vis.py", line 293, in <module>
visualize(cfg, **args.__dict__)
File "pdseg/vis.py", line 196, in visualize
return_numpy=True)
File "/home/tw/anaconda3/envs/paddle/lib/python3.7/site-packages/paddle/fluid/executor.py", line 775, in run
six.reraise(*sys.exc_info())
File "/home/tw/.local/lib/python3.7/site-packages/six.py", line 693, in reraise
raise value
File "/home/tw/anaconda3/envs/paddle/lib/python3.7/site-packages/paddle/fluid/executor.py", line 770, in run
use_program_cache=use_program_cache)
File "/home/tw/anaconda3/envs/paddle/lib/python3.7/site-packages/paddle/fluid/executor.py", line 817, in _run_impl
use_program_cache=use_program_cache)
File "/home/tw/anaconda3/envs/paddle/lib/python3.7/site-packages/paddle/fluid/executor.py", line 894, in _run_program
fetch_var_name)
paddle.fluid.core_avx.EnforceNotMet:
--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int)
1 paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const*, int)
2 paddle::operators::CUDNNConvOpKernel<float>::Compute(paddle::framework::ExecutionContext const&) const
3 std::_Function_handler<void (paddle::framework::ExecutionContext const&), paddle::framework::OpKernelRegistrarFunctor<paddle::platform::CUDAPlace, false, 0ul, paddle::operators::CUDNNConvOpKernel<float>, paddle::operators::CUDNNConvOpKernel<double>, paddle::operators::CUDNNConvOpKernel<paddle::platform::float16> >::operator()(char const*, char const*, int) const::{lambda(paddle::framework::ExecutionContext const&)#1}>::_M_invoke(std::_Any_data const&, paddle::framework::ExecutionContext const&)
4 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, paddle::framework::RuntimeContext*) const
5 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const
6 paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&)
7 paddle::framework::Executor::RunPreparedContext(paddle::framework::ExecutorPrepareContext*, paddle::framework::Scope*, bool, bool, bool)
8 paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::vector<std::string, std::allocator<std::string> > const&, bool)
------------------------------------------
Python Call Stacks (More useful to users):
------------------------------------------
File "/home/tw/anaconda3/envs/paddle/lib/python3.7/site-packages/paddle/fluid/framework.py", line 2426, in append_op
attrs=kwargs.get("attrs", None))
File "/home/tw/anaconda3/envs/paddle/lib/python3.7/site-packages/paddle/fluid/layer_helper.py", line 43, in append_op
return self.main_program.current_block().append_op(*args, **kwargs)
File "/home/tw/anaconda3/envs/paddle/lib/python3.7/site-packages/paddle/fluid/layers/nn.py", line 2801, in conv2d
"data_format": data_format,
File "/mnt/hdd/tangwei/autodrive/PaddleSeg-release-v0.2.0/pdseg/models/libs/model_libs.py", line 125, in conv
return fluid.layers.conv2d(*args, **kargs)
File "/mnt/hdd/tangwei/autodrive/PaddleSeg-release-v0.2.0/pdseg/models/backbone/xception.py", line 140, in entry_flow
param_attr=param_attr))
File "/mnt/hdd/tangwei/autodrive/PaddleSeg-release-v0.2.0/pdseg/models/backbone/xception.py", line 101, in net
data = self.entry_flow(input)
File "/mnt/hdd/tangwei/autodrive/PaddleSeg-release-v0.2.0/pdseg/models/modeling/deeplab.py", line 225, in xception
decode_points=decode_point)
File "/mnt/hdd/tangwei/autodrive/PaddleSeg-release-v0.2.0/pdseg/models/modeling/deeplab.py", line 233, in deeplabv3p
data, decode_shortcut = xception(img)
File "/mnt/hdd/tangwei/autodrive/PaddleSeg-release-v0.2.0/pdseg/models/model_builder.py", line 172, in build_model
logits = model_func(image, class_num)
File "pdseg/vis.py", line 164, in visualize
pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL)
File "pdseg/vis.py", line 293, in <module>
visualize(cfg, **args.__dict__)
----------------------
Error Message Summary:
----------------------
PaddleCheckError: CUDNN_STATUS_EXECUTION_FAILED at [/paddle/paddle/fluid/operators/conv_cudnn_op.cu:288]
[operator < conv2d > error]