Created by: chengduoZH
This PR's work includes:
- Add call stack info during compile time.
- Extract the common code of run time and compile time to operator_call_stack.
Before
Traceback (most recent call last):
File "./train_gpu_paddle.py", line 471, in <module>
train(n_token, cutoffs)
File "./train_gpu_paddle.py", line 254, in train
decr_ratio=FLAGS.decr_ratio)
File "/ssd3/transformer-xl-paddlepaddle_tmp/optimization.py", line 151, in optimization
_, param_grads = optimizer.minimize(loss)
File "</usr/local/lib/python2.7/dist-packages/decorator.pyc:decorator-gen-20>", line 2, in minimize
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/wrapped_decorator.py", line 25, in __impl__
return wrapped_func(*args, **kwargs)
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/dygraph/base.py", line 86, in __impl__
return func(*args, **kwargs)
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/optimizer.py", line 594, in minimize
no_grad_set=no_grad_set)
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/optimizer.py", line 493, in backward
no_grad_set, callbacks)
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/backward.py", line 706, in append_backward
_append_backward_vars_(root_block, fwd_op_num, grad_to_var, grad_info_map)
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/backward.py", line 518, in _append_backward_vars_
op_desc.infer_shape(block.desc)
paddle.fluid.core_avx.EnforceNotMet: Invoke operator elementwise_add_grad error.
Input(Out@GRAD) should not be null at [../paddle/fluid/operators/elementwise/elementwise_op.h:289]
PaddlePaddle Call Stacks:
0 0x7fb76db1b35ap void paddle::platform::EnforceNotMet::Init<char const*>(char const*, char const*, int) + 506
1 0x7fb76db1c065p paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const*, int) + 165
2 0x7fb76f427ebap paddle::operators::ElementwiseOpExplicitGrad::InferShape(paddle::framework::InferShapeContext*) const + 1130
3 0x7fb76dcdf605p
4 0x7fb76dce3ed9p paddle::framework::OpDesc::InferShape(paddle::framework::BlockDesc const&) const + 985
5 0x7fb76dc00334p
6 0x7fb76db4eb14p
7 0x4c5326p PyEval_EvalFrameEx + 37958
8 0x4b9b66p PyEval_EvalCodeEx + 774
9 0x4c1f56p PyEval_EvalFrameEx + 24694
10 0x4b9b66p PyEval_EvalCodeEx + 774
11 0x4c17c6p PyEval_EvalFrameEx + 22758
12 0x4b9b66p PyEval_EvalCodeEx + 774
13 0x4c17c6p PyEval_EvalFrameEx + 22758
14 0x4b9b66p PyEval_EvalCodeEx + 774
15 0x4d57a3p
16 0x4a587ep PyObject_Call + 62
17 0x4be51ep PyEval_EvalFrameEx + 9790
18 0x4b9b66p PyEval_EvalCodeEx + 774
19 0x4d57a3p
Now:
Traceback (most recent call last):
File "./train_gpu_paddle.py", line 471, in <module>
train(n_token, cutoffs)
File "./train_gpu_paddle.py", line 254, in train
decr_ratio=FLAGS.decr_ratio)
File "/ssd3/transformer-xl-paddlepaddle_tmp/optimization.py", line 151, in optimization
_, param_grads = optimizer.minimize(loss)
File "</usr/local/lib/python2.7/dist-packages/decorator.pyc:decorator-gen-20>", line 2, in minimize
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/wrapped_decorator.py", line 25, in __impl__
return wrapped_func(*args, **kwargs)
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/dygraph/base.py", line 86, in __impl__
return func(*args, **kwargs)
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/optimizer.py", line 594, in minimize
no_grad_set=no_grad_set)
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/optimizer.py", line 493, in backward
no_grad_set, callbacks)
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/backward.py", line 706, in append_backward
_append_backward_vars_(root_block, fwd_op_num, grad_to_var, grad_info_map)
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/backward.py", line 518, in _append_backward_vars_
op_desc.infer_shape(block.desc)
paddle.fluid.core_avx.EnforceNotMet: Invoke operator elementwise_add_grad error.
Python Callstacks:
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/framework.py", line 1776, in append_op
attrs=kwargs.get("attrs", None))
File "/ssd3/zcd_Paddle/build_fast/python/paddle/fluid/layers/math_op_patch.py", line 149, in __impl__
attrs={'axis': axis})
File "/ssd3/transformer-xl-paddlepaddle_tmp/model.py", line 371, in gather_nd
index_join = raw_index * tensor.shape[1] + col_index
File "/ssd3/transformer-xl-paddlepaddle_tmp/model.py", line 403, in _gather_logprob
return gather_nd(logprob, idx)
File "/ssd3/transformer-xl-paddlepaddle_tmp/model.py", line 494, in mask_adaptive_logsoftmax
_gather_logprob(tail_logprob, cur_target))
File "/ssd3/transformer-xl-paddlepaddle_tmp/model.py", line 558, in transformer
proj_same_dim=proj_same_dim)
File "/ssd3/transformer-xl-paddlepaddle_tmp/transformer_xl.py", line 200, in _build_model
name='encoder'
File "./train_gpu_paddle.py", line 84, in model_fn
loss, new_mems = transofrmer_xl._build_model()
File "./train_gpu_paddle.py", line 99, in single_core_graph
is_training=is_training)
File "./train_gpu_paddle.py", line 128, in create_model
mems=mems)
File "./train_gpu_paddle.py", line 238, in train
batch_size=bsz_per_trainner, n_token=n_token, cutoffs=cutoffs)
File "./train_gpu_paddle.py", line 471, in <module>
train(n_token, cutoffs)
C++ Callstacks:
Input(Out@GRAD) should not be null at [../paddle/fluid/operators/elementwise/elementwise_op.h:289]
PaddlePaddle Call Stacks:
0 0x7fb76db1b35ap void paddle::platform::EnforceNotMet::Init<char const*>(char const*, char const*, int) + 506
1 0x7fb76db1c065p paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const*, int) + 165
2 0x7fb76f427ebap paddle::operators::ElementwiseOpExplicitGrad::InferShape(paddle::framework::InferShapeContext*) const + 1130
3 0x7fb76dcdf605p
4 0x7fb76dce3ed9p paddle::framework::OpDesc::InferShape(paddle::framework::BlockDesc const&) const + 985
5 0x7fb76dc00334p
6 0x7fb76db4eb14p
7 0x4c5326p PyEval_EvalFrameEx + 37958
8 0x4b9b66p PyEval_EvalCodeEx + 774
9 0x4c1f56p PyEval_EvalFrameEx + 24694
10 0x4b9b66p PyEval_EvalCodeEx + 774
11 0x4c17c6p PyEval_EvalFrameEx + 22758
12 0x4b9b66p PyEval_EvalCodeEx + 774