diff --git a/paddle/fluid/operators/log_softmax_op.cu b/paddle/fluid/operators/log_softmax_op.cu
index e4fe92c625640dba38daa6690705eed2cf0032be..12c607adb44f4e9590bd5a50305c9d6fd5b3d1d7 100644
--- a/paddle/fluid/operators/log_softmax_op.cu
+++ b/paddle/fluid/operators/log_softmax_op.cu
@@ -104,7 +104,7 @@ __global__ void ComputeLogSoftmaxForwardInWarp(T *dst, const T *src,
 #pragma unroll
   for (int it = 0; it < warp_iter; ++it) {
     int element_index = thread_in_warp_idx + it * kernel_warp_size;
-    if (element_index < element_count) {
+    if (element_index < effective_element_count) {
       dst[batch_id * element_count + element_index] =
           static_cast<T>(elements[it] - max_value - sum);
     } else {
@@ -226,7 +226,7 @@ __global__ void ComputeLogSoftmaxBackwardInWarp(const T *output,
 #pragma unroll
   for (int iter = 0; iter < warp_iter; ++iter) {
     int element_index = thread_in_warp_idx + iter * kernel_warp_size;
-    if (element_index < element_count) {
+    if (element_index < effective_element_count) {
       grad_input[batch_id * element_count + element_index] = static_cast<T>(
           (grad_output_register[iter] - std::exp(output_register[iter]) * sum));
     }