diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.cu.h b/paddle/fluid/operators/reduce_ops/reduce_op.cu.h index fe77d3158ed27c93c5226e4c55941616b84407e1..30b1cf5ac711d248c3c554265580ad1c4ec8d24a 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.cu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.cu.h @@ -770,7 +770,7 @@ void TensorReduceFunctorImpl(const framework::Tensor& x, framework::Tensor* y, auto x_dim = framework::vectorize(x.dims()); auto config = ReduceConfig(origin_reduce_dims, x_dim); config.Run(); // get the parameters of LaunchReduceKernel - + int numel = x.numel(); // after config.run() // SetOutputData for ReduceHigherDim when should_reduce_again is true, // temp_output should be stored temp_data in output_data space or stored in @@ -787,7 +787,7 @@ void TensorReduceFunctorImpl(const framework::Tensor& x, framework::Tensor* y, } config.SetOutputData(y_data, x.place(), &tmp); - bool use_cub_reduce = (config.left_num == 1) && + bool use_cub_reduce = (config.reduce_num == numel) && (!std::is_same::value); if (use_cub_reduce) { // launch CUB::Reduce