diff --git a/paddle/fluid/platform/for_range.h b/paddle/fluid/platform/for_range.h index 9fbaa36723bcfa6a4273a24ba550c225a4daf582..a767bf92993b901015d3dc447746777d7d52d70d 100644 --- a/paddle/fluid/platform/for_range.h +++ b/paddle/fluid/platform/for_range.h @@ -117,17 +117,18 @@ __global__ static void ForRangeInElemwiseOp(Function func, T* vector, template <> struct ForRangeIn { - ForRange(const CUDADeviceContext& dev_ctx, std::vector range) + ForRangeIn(const CUDADeviceContext& dev_ctx, std::vector range) : dev_ctx_(dev_ctx), range_(range) {} template inline void operator()(Function func) const { constexpr int num_threads = 1024; - int block_size = range_.size() <= num_threads ? limit_ : num_threads; + int range_size = range_.size(); + int block_size = range_size <= num_threads ? range_size : num_threads; int grid_size = (range_.size() + num_threads - 1) / num_threads; ForRangeInElemwiseOp<<>>( - func, range_.data(), range_.size()); + func, range_.data(), range_size); } const CUDADeviceContext& dev_ctx_;