diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index 7d62d2d020ec2e3a29ad8720a8f04fead3a90a63..289f50f52e74807a5ff1b8fda1433a10c3b07309 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -311,6 +311,10 @@ class LSTMGradKernel : public framework::OpKernel { lstm_grad.prev_state_grad = c0_g ? ordered_c0_g.data() : nullptr; } + // lstm_value.output_value not used in bp, set to null + // lstm_grad.state_active_grad not used in bp, set to null + lstm_value.output_value = nullptr; + lstm_grad.state_active_grad = nullptr; int cur_batch_size = bend - bstart; math::LstmUnitGradFunctor::compute( device_ctx, lstm_value, lstm_grad, frame_size, cur_batch_size, diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index 370dd04d1449a8e211febf9a4f9e90e6f5008e20..05ecd3c1aec189776159b4c208a5e91aa0a77454 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -405,6 +405,11 @@ class LSTMPGradKernel : public framework::OpKernel { } int cur_batch_size = bend - bstart; + // lstm_value.output_value not used in bp, set to null + // lstm_grad.state_active_grad not used in bp, set to null + lstm_value.output_value = nullptr; + lstm_grad.state_active_grad = nullptr; + math::LstmUnitGradFunctor::compute( device_ctx, lstmp_value, lstmp_grad, frame_size, cur_batch_size, gate_act, cell_act, cand_act);