报错信息“EnforceNotMet: Invoke operator fetch error.”
Created by: tmylla
根据5种花卉分类的Resnet做102分类,却报错,而且是在运行一个batch后报错,为什么?
错误信息如下:
2019-09-16 23:10:09,740 - [line:549] - INFO: create prog success 2019-09-16 23:10:09,742 - [line:550] - INFO: train config: {'image_count': 6552, 'sgd_strategy': {'lr_decay': [1, 0.5, 0.25, 0.1, 0.01, 0.002], 'lr_epochs': [20, 40, 60, 80, 100], 'learning_rate': 0.002}, 'save_persistable_dir': './persistable-params', 'continue_train': False, 'label_dict': {}, 'image_enhance_strategy': {'need_crop': True, 'need_rotate': True, 'hue_delta': 18, 'need_distort': True, 'brightness_prob': 0.5, 'saturation_delta': 0.5, 'contrast_prob': 0.5, 'hue_prob': 0.5, 'brightness_delta': 0.125, 'contrast_delta': 0.5, 'need_flip': True, 'saturation_prob': 0.5}, 'momentum_strategy': {'lr_decay': [1, 0.5, 0.25, 0.1, 0.01, 0.002], 'lr_epochs': [20, 40, 60, 80, 100], 'learning_rate': 0.002}, 'adam_strategy': {'learning_rate': 0.002}, 'early_stop': {'successive_limit': 3, 'sample_frequency': 30, 'good_acc1': 0.85}, 'train_batch_size': 15, 'save_freeze_dir': './freeze-model', 'num_epochs': 40, 'mode': 'train', 'use_gpu': True, 'train_file_list': 'train.txt', 'mean_rgb': [127.5, 127.5, 127.5], 'input_size': [3, 224, 224], 'data_dir': 'data/data12479/hackathon-blossom-flower-classification/flower_data', 'rsm_strategy': {'lr_decay': [1, 0.5, 0.25, 0.1, 0.01, 0.002], 'lr_epochs': [20, 40, 60, 80, 100], 'learning_rate': 0.002}, 'class_dim': 102} 2019-09-16 23:10:09,743 - [line:551] - INFO: build input custom reader and data feeder 2019-09-16 23:10:09,747 - [line:564] - INFO: build newwork 2019-09-16 23:10:11,887 - [line:594] - INFO: current pass: 0, start read image 2019-09-16 23:10:18,417 - [line:609] - INFO: Pass 0, trainbatch 10, loss 7.132730960845947, acc1 0.06666667014360428, time 0.14 sec ---------------------------------------------------------------------------EnforceNotMet Traceback (most recent call last) in 646 init_log_config() 647 init_train_parameters() --> 648 train() in train() 598 loss, acc1, pred_ot = exe.run(main_program, 599 feed=feeder.feed(data), --> 600 fetch_list=train_fetch_list) 601 t2 = time.time() 602 batch_id += 1 /opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/paddle/fluid/executor.py in run(self, program, feed, fetch_list, feed_var_name, fetch_var_name, scope, return_numpy, use_program_cache) 648 scope=scope, 649 return_numpy=return_numpy, --> 650 use_program_cache=use_program_cache) 651 else: 652 if fetch_list and program.is_data_parallel and program.program and ( /opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/paddle/fluid/executor.py in run(self, program, exe, feed, fetch_list, feed_var_name, fetch_var_name, scope, return_numpy, use_program_cache) 746 self.feed_data(program, feed, feed_var_name, scope) 747 if not use_program_cache: --> 748 exe.run(program.desc, scope, 0, True, True, fetch_var_name) 749 else: 750 exe.run_cached_prepared_ctx(ctx, scope, False, False, False) EnforceNotMet: Invoke operator fetch error. Python Callstacks: File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/paddle/fluid/framework.py", line 1748, in append_op attrs=kwargs.get("attrs", None)) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/paddle/fluid/executor.py", line 437, in add_feed_fetch_ops attrs={'col': i}) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/paddle/fluid/executor.py", line 744, in run fetch_var_name=fetch_var_name) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/paddle/fluid/executor.py", line 650, in run use_program_cache=use_program_cache) File "", line 600, in train fetch_list=train_fetch_list) File "", line 648, in train() File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3265, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3183, in run_ast_nodes if (yield from self.run_code(code, result)): File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3018, in run_cell_async interactivity=interactivity, compiler=compiler, result=result) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/IPython/core/async_helpers.py", line 67, in pseudo_sync_runner coro.send(None) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2843, in run_cell return runner(coro) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2817, in run_cell raw_cell, store_history, silent, shell_futures) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 536, in run_cell return super(ZMQInteractiveShell, self).run_cell(args, kwargs) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 294, in do_execute res = shell.run_cell(code, store_history=store_history, silent=silent) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/tornado/gen.py", line 326, in wrapper yielded = next(result) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 534, in execute_request user_expressions, allow_stdin, File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/tornado/gen.py", line 326, in wrapper yielded = next(result) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell yield gen.maybe_future(handler(stream, idents, msg)) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/tornado/gen.py", line 326, in wrapper yielded = next(result) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 357, in process_one yield gen.maybe_future(dispatch(args)) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/tornado/gen.py", line 1147, in run yielded = self.gen.send(value) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/tornado/gen.py", line 1233, in inner self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/tornado/stack_context.py", line 300, in null_wrapper return fn(args, kwargs) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/tornado/ioloop.py", line 758, in _run_callback ret = callback() File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/asyncio/events.py", line 127, in _run self._callback(self._args) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/asyncio/base_events.py", line 1425, in _run_once handle._run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/asyncio/base_events.py", line 421, in run_forever self._run_once() File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 132, in start self.asyncio_loop.run_forever() File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 505, in start self.io_loop.start() File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance app.start() File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in app.launch_new_instance() File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/runpy.py", line 85, in _run_code exec(code, run_globals) File "/opt/conda/envs/python35-paddle120-env/lib/python3.5/runpy.py", line 193, in _run_module_as_main "main", mod_spec) C++ Callstacks: cudaMemcpy failed in paddle::platform::GpuMemcpySync (0x7f0b7e9eba40 -> 0x7f0a9abff040, length: 4): unspecified launch failure at [/paddle/paddle/fluid/platform/gpu_info.cc:280] PaddlePaddle Call Stacks: 0 0x7f0ea056e2e0p void paddle::platform::EnforceNotMet::Init<char const>(char const, char const, int) + 352 1 0x7f0ea056e659p paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const, int) + 137 2 0x7f0ea25849ccp paddle::platform::GpuMemcpySync(void, void const, unsigned long, cudaMemcpyKind) + 188 3 0x7f0ea06f7079p void paddle::memory::Copy<paddle::platform::CPUPlace, paddle::platform::CUDAPlace>(paddle::platform::CPUPlace, void, paddle::platform::CUDAPlace, void const, unsigned long, CUstream_st*) + 249 4 0x7f0ea2524454p paddle::framework::TensorCopySync(paddle::framework::Tensor const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, paddle::framework::Tensor*) + 900 5 0x7f0ea1f65490p paddle::operators::FetchOp::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 656 6 0x7f0ea24c702cp paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) + 332 7 0x7f0ea06f847ep paddle::framework::Executor::RunPreparedContext(paddle::framework::ExecutorPrepareContext*, paddle::framework::Scope*, bool, bool, bool) + 382 8 0x7f0ea06fb51fp paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::vector<std::string, std::allocatorstd::string > const&, bool) + 143 9 0x7f0ea055f96dp 10 0x7f0ea05a0ca6p 11 0x7f0f245e2199p PyCFunction_Call + 233 12 0x7f0f2467d3f9p PyEval_EvalFrameEx + 33545 13 0x7f0f2467f4b6p 14 0x7f0f2467c5b5p PyEval_EvalFrameEx + 29893 15 0x7f0f2467f4b6p 16 0x7f0f2467c5b5p PyEval_EvalFrameEx + 29893 17 0x7f0f2467d1d0p PyEval_EvalFrameEx + 32992 18 0x7f0f2467f4b6p 19 0x7f0f2467f5a8p PyEval_EvalCodeEx + 72 20 0x7f0f2467f5ebp PyEval_EvalCode + 59 21 0x7f0f24672c5dp 22 0x7f0f245e2179p PyCFunction_Call + 201 23 0x7f0f2467cdbep PyEval_EvalFrameEx + 31950 24 0x7f0f245b6410p _PyGen_Send + 128 25 0x7f0f2467b953p PyEval_EvalFrameEx + 26723 26 0x7f0f245b6410p _PyGen_Send + 128 27 0x7f0f2467b953p PyEval_EvalFrameEx + 26723 28 0x7f0f245b6410p _PyGen_Send + 128 29 0x7f0f2467cd60p PyEval_EvalFrameEx + 31856 30 0x7f0f2467d1d0p PyEval_EvalFrameEx + 32992 31 0x7f0f2467d1d0p PyEval_EvalFrameEx + 32992 32 0x7f0f2467f4b6p 33 0x7f0f2467f5a8p PyEval_EvalCodeEx + 72 34 0x7f0f245bec33p 35 0x7f0f2458d33ap PyObject_Call + 106 36 0x7f0f246776eep PyEval_EvalFrameEx + 9726 37 0x7f0f2467f4b6p 38 0x7f0f2467c5b5p PyEval_EvalFrameEx + 29893 39 0x7f0f245b56bap 40 0x7f0f24670af6p 41 0x7f0f245e2179p PyCFunction_Call + 201 42 0x7f0f2467cdbep PyEval_EvalFrameEx + 31950 43 0x7f0f2467f4b6p 44 0x7f0f2467c5b5p PyEval_EvalFrameEx + 29893 45 0x7f0f245b56bap 46 0x7f0f24670af6p 47 0x7f0f245e2179p PyCFunction_Call + 201 48 0x7f0f2467cdbep PyEval_EvalFrameEx + 31950 49 0x7f0f2467f4b6p 50 0x7f0f2467c5b5p PyEval_EvalFrameEx + 29893 51 0x7f0f245b56bap 52 0x7f0f24670af6p 53 0x7f0f245e2179p PyCFunction_Call + 201 54 0x7f0f2467cdbep PyEval_EvalFrameEx + 31950 55 0x7f0f2467f4b6p 56 0x7f0f2467f5a8p PyEval_EvalCodeEx + 72 57 0x7f0f245beb56p 58 0x7f0f2458d33ap PyObject_Call + 106 59 0x7f0f246776eep PyEval_EvalFrameEx + 9726 60 0x7f0f245b6410p _PyGen_Send + 128 61 0x7f0f2467cd60p PyEval_EvalFrameEx + 31856 62 0x7f0f2467d1d0p PyEval_EvalFrameEx + 32992 63 0x7f0f2467f4b6p 64 0x7f0f2467f5a8p PyEval_EvalCodeEx + 72 65 0x7f0f245bec33p 66 0x7f0f2458d33ap PyObject_Call + 106 67 0x7f0f246776eep PyEval_EvalFrameEx + 9726 68 0x7f0f2467f4b6p 69 0x7f0f2467f5a8p PyEval_EvalCodeEx + 72 70 0x7f0f245beb56p 71 0x7f0f2458d33ap PyObject_Call + 106 72 0x7f0f246f2ccap 73 0x7f0f2458d33ap PyObject_Call + 106 74 0x7f0f246794c5p PyEval_EvalFrameEx + 17365 75 0x7f0f2467f4b6p 76 0x7f0f2467f5a8p PyEval_EvalCodeEx + 72 77 0x7f0f245beb56p 78 0x7f0f2458d33ap PyObject_Call + 106 79 0x7f0f246776eep PyEval_EvalFrameEx + 9726 80 0x7f0f2467d1d0p PyEval_EvalFrameEx + 32992 81 0x7f0f2467d1d0p PyEval_EvalFrameEx + 32992 82 0x7f0f2467d1d0p PyEval_EvalFrameEx + 32992 83 0x7f0f2467d1d0p PyEval_EvalFrameEx + 32992 84 0x7f0f2467d1d0p PyEval_EvalFrameEx + 32992 85 0x7f0f2467f4b6p 86 0x7f0f2467c5b5p PyEval_EvalFrameEx + 29893 87 0x7f0f2467f4b6p 88 0x7f0f2467f5a8p PyEval_EvalCodeEx + 72 89 0x7f0f2467f5ebp PyEval_EvalCode + 59 90 0x7f0f24672c5dp 91 0x7f0f245e2179p PyCFunction_Call + 201 92 0x7f0f2467cdbep PyEval_EvalFrameEx + 31950 93 0x7f0f2467f4b6p 94 0x7f0f2467c5b5p PyEval_EvalFrameEx + 29893 95 0x7f0f2467f4b6p 96 0x7f0f2467f5a8p PyEval_EvalCodeEx + 72 97 0x7f0f245beb56p 98 0x7f0f2458d33ap PyObject_Call + 106 99 0x7f0f246cbba1p