未验证 提交 37f43ebc 编写于 作者: L Leo Chen 提交者: GitHub

fix int32 overflow in cuda kernel loop (#38007)

上级 dabf8152
...@@ -21,8 +21,7 @@ template <typename T> ...@@ -21,8 +21,7 @@ template <typename T>
__global__ void LabelSmoothRunOriginKernel(const int N, const float epsilon, __global__ void LabelSmoothRunOriginKernel(const int N, const float epsilon,
const int label_dim, const T* src, const int label_dim, const T* src,
T* dst) { T* dst) {
int idx = blockDim.x * blockIdx.x + threadIdx.x; CUDA_KERNEL_LOOP(idx, N) {
for (; idx < N; idx += blockDim.x * gridDim.x) {
dst[idx] = static_cast<T>(1 - epsilon) * src[idx] + dst[idx] = static_cast<T>(1 - epsilon) * src[idx] +
static_cast<T>(epsilon / label_dim); static_cast<T>(epsilon / label_dim);
} }
...@@ -32,8 +31,7 @@ template <typename T> ...@@ -32,8 +31,7 @@ template <typename T>
__global__ void LabelSmoothRunDistKernel(const int N, const float epsilon, __global__ void LabelSmoothRunDistKernel(const int N, const float epsilon,
const int dist_numel, const T* src, const int dist_numel, const T* src,
const T* dist_data, T* dst) { const T* dist_data, T* dst) {
int idx = blockDim.x * blockIdx.x + threadIdx.x; CUDA_KERNEL_LOOP(idx, N) {
for (; idx < N; idx += blockDim.x * gridDim.x) {
int dist_idx = idx % dist_numel; int dist_idx = idx % dist_numel;
dst[idx] = static_cast<T>(1 - epsilon) * src[idx] + dst[idx] = static_cast<T>(1 - epsilon) * src[idx] +
static_cast<T>(epsilon) * dist_data[dist_idx]; static_cast<T>(epsilon) * dist_data[dist_idx];
...@@ -43,8 +41,7 @@ __global__ void LabelSmoothRunDistKernel(const int N, const float epsilon, ...@@ -43,8 +41,7 @@ __global__ void LabelSmoothRunDistKernel(const int N, const float epsilon,
template <typename T> template <typename T>
__global__ void LabelSmoothGradRunKernel(const int N, const float epsilon, __global__ void LabelSmoothGradRunKernel(const int N, const float epsilon,
const T* src, T* dst) { const T* src, T* dst) {
int idx = blockDim.x * blockIdx.x + threadIdx.x; CUDA_KERNEL_LOOP(idx, N) {
for (; idx < N; idx += blockDim.x * gridDim.x) {
dst[idx] = static_cast<T>(1 - epsilon) * src[idx]; dst[idx] = static_cast<T>(1 - epsilon) * src[idx];
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册