未验证 提交 26fe2dcb 编写于 作者: Y Yiqun Liu 提交者: GitHub

Fix the index calculation in cross_entroy_kernel. (#53659)

上级 3be7a6c8
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/phi/kernels/cross_entropy_kernel.h" #include "paddle/phi/kernels/cross_entropy_kernel.h"
#include "glog/logging.h"
#ifdef __NVCC__ #ifdef __NVCC__
#include "cub/cub.cuh" #include "cub/cub.cuh"
#endif #endif
...@@ -468,8 +470,8 @@ __global__ void VectorizedSoftmaxForward(T* loss, ...@@ -468,8 +470,8 @@ __global__ void VectorizedSoftmaxForward(T* loss,
using VecT = kps::details::VectorType<T, VecSize>; using VecT = kps::details::VectorType<T, VecSize>;
// each block deal with one batch // each block deal with one batch
logits += blockIdx.x * mid_dim; logits += static_cast<int64_t>(blockIdx.x) * static_cast<int64_t>(mid_dim);
softmax += blockIdx.x * mid_dim; softmax += static_cast<int64_t>(blockIdx.x) * static_cast<int64_t>(mid_dim);
const int input_offset = ((uint64_t)logits) % ALIGN_BYTES / sizeof(T); const int input_offset = ((uint64_t)logits) % ALIGN_BYTES / sizeof(T);
const int output_offset = ((uint64_t)softmax) % ALIGN_BYTES / sizeof(T); const int output_offset = ((uint64_t)softmax) % ALIGN_BYTES / sizeof(T);
...@@ -1165,6 +1167,8 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx, ...@@ -1165,6 +1167,8 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
int dim, int dim,
int D, int D,
const int ignore_index) { const int ignore_index) {
VLOG(7) << "rank=" << rank << ", axis = " << axis << ", N = " << N
<< ", dim = " << dim << ", D = " << D;
auto stream = dev_ctx.stream(); auto stream = dev_ctx.stream();
constexpr int max_dim = 320; constexpr int max_dim = 320;
if (D == 1) { if (D == 1) {
...@@ -1247,11 +1251,11 @@ void CrossEntropyWithSoftmaxCUDAKernel(const GPUContext& dev_ctx, ...@@ -1247,11 +1251,11 @@ void CrossEntropyWithSoftmaxCUDAKernel(const GPUContext& dev_ctx,
int axis, int axis,
DenseTensor* softmax, DenseTensor* softmax,
DenseTensor* loss) { DenseTensor* loss) {
PADDLE_ENFORCE_EQ( VLOG(7) << "logits.shape={" << logits.dims() << "}, label.shape={"
dev_ctx.GetPlace().GetType(), << label.dims() << "}, soft_label=" << soft_label
AllocationType::GPU, << ", use_softmax=" << use_softmax
phi::errors::Unavailable("softmax_with_cross_entropy operator's " << ", numeric_stable_mode=" << numeric_stable_mode
"CUDA kernel only runs on GPU device.")); << ", ignore_index=" << ignore_index << ", axis=" << axis;
// do not with softmax op, and input is softmax // do not with softmax op, and input is softmax
if (!use_softmax) { if (!use_softmax) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册