diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 8803ea78963a00c4724bcfd5cb02381fd2d1a89e..f1e42cad7259e3fb9c862bb53a6da254dabc5c1e 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -232,7 +232,7 @@ public: /// input grad and output grad have the same batch_size CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]); /// dim of output = dim of input * context_length - CHECK_EQ(inputs[1].dims_[1], inputs[0].dims_[1] * context_length_); + CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_); typename SequenceT::type seq_vec( inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); diff --git a/paddle/function/ContextProjectionOpGpu.cu b/paddle/function/ContextProjectionOpGpu.cu index 6194ad8e74a37edb287d11a631a968d7eef6e7d9..c5a636dce8ea31721bfec7daef7b815054d680ac 100644 --- a/paddle/function/ContextProjectionOpGpu.cu +++ b/paddle/function/ContextProjectionOpGpu.cu @@ -256,7 +256,7 @@ __global__ void KeContextProjectionBackwardWeight(const real* out_grad, for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) { int seq_start = sequence[seqId]; int seq_end = sequence[seqId+1]; - output_r = const_cast(out_grad) + output_r = const_cast(out_grad) + seq_start * w_dim * context_length; if (context_start < 0) {