diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index 7d62d2d020ec2e3a29ad8720a8f04fead3a90a63..3f110024b285d41ccfe305e35c8efca5ed5ee0fe 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -311,6 +311,10 @@ class LSTMGradKernel : public framework::OpKernel { lstm_grad.prev_state_grad = c0_g ? ordered_c0_g.data() : nullptr; } + // lstm_value.output_value not used in bp, set to nullptr + // lstm_grad.state_active_grad not used in bp, set to nullptr + lstm_value.output_value = nullptr; + lstm_grad.state_active_grad = nullptr; int cur_batch_size = bend - bstart; math::LstmUnitGradFunctor::compute( device_ctx, lstm_value, lstm_grad, frame_size, cur_batch_size, diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index 370dd04d1449a8e211febf9a4f9e90e6f5008e20..1f11e57dcb721012c7b8e50d7e138355685053da 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -405,6 +405,11 @@ class LSTMPGradKernel : public framework::OpKernel { } int cur_batch_size = bend - bstart; + // lstmp_value.output_value not used in bp, set to null + // lstmp_grad.state_active_grad not used in bp, set to null + lstmp_value.output_value = nullptr; + lstmp_grad.state_active_grad = nullptr; + math::LstmUnitGradFunctor::compute( device_ctx, lstmp_value, lstmp_grad, frame_size, cur_batch_size, gate_act, cell_act, cand_act);