From 795d7121b19da034a5cc3358b23097338fd2d6d1 Mon Sep 17 00:00:00 2001 From: sneaxiy <32832641+sneaxiy@users.noreply.github.com> Date: Mon, 11 Apr 2022 10:23:37 +0800 Subject: [PATCH] fix some ops (#41577) --- paddle/phi/kernels/cpu/size_kernel.cc | 1 + paddle/phi/kernels/gpu/cumsum_kernel.cu | 23 +++++++++++++---------- paddle/phi/kernels/gpu/size_kernel.cu | 1 + python/paddle/nn/functional/loss.py | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/paddle/phi/kernels/cpu/size_kernel.cc b/paddle/phi/kernels/cpu/size_kernel.cc index ff34ef26f6b..71ebf9cdc09 100644 --- a/paddle/phi/kernels/cpu/size_kernel.cc +++ b/paddle/phi/kernels/cpu/size_kernel.cc @@ -22,6 +22,7 @@ PD_REGISTER_KERNEL(size, CPU, ALL_LAYOUT, phi::SizeKernel, + int16_t, int, int64_t, phi::dtype::float16, diff --git a/paddle/phi/kernels/gpu/cumsum_kernel.cu b/paddle/phi/kernels/gpu/cumsum_kernel.cu index a253e6f4ad2..e04f2b5f876 100644 --- a/paddle/phi/kernels/gpu/cumsum_kernel.cu +++ b/paddle/phi/kernels/gpu/cumsum_kernel.cu @@ -222,25 +222,28 @@ void CumsumKernel(const Context& dev_ctx, // Use thrust for parallel acceleration when the input size is equal to the // length of the ‘axis’ dimension. if (size == out_dims[axis]) { +#ifdef __HIPCC__ + const auto& policy = thrust::hip::par.on(dev_ctx.stream()); +#else + const auto& policy = thrust::cuda::par.on(dev_ctx.stream()); +#endif if (reverse) { - thrust::device_ptr dev_ptr = - thrust::device_pointer_cast(in_data); - thrust::device_vector vec(dev_ptr, dev_ptr + size); + thrust::reverse_iterator> reversed_in( + thrust::device_pointer_cast(in_data) + size); + thrust::reverse_iterator> reversed_out( + thrust::device_pointer_cast(out_data) + size); if (exclusive) { thrust::exclusive_scan( - thrust::device, vec.rbegin(), vec.rend(), out_data); + policy, reversed_in, reversed_in + size, reversed_out); } else { thrust::inclusive_scan( - thrust::device, vec.rbegin(), vec.rend(), out_data); + policy, reversed_in, reversed_in + size, reversed_out); } - thrust::reverse(thrust::device, out_data, out_data + size); } else { if (exclusive) { - thrust::exclusive_scan( - thrust::device, in_data, in_data + size, out_data); + thrust::exclusive_scan(policy, in_data, in_data + size, out_data); } else { - thrust::inclusive_scan( - thrust::device, in_data, in_data + size, out_data); + thrust::inclusive_scan(policy, in_data, in_data + size, out_data); } } return; diff --git a/paddle/phi/kernels/gpu/size_kernel.cu b/paddle/phi/kernels/gpu/size_kernel.cu index 17a39944eb0..7051fb78c75 100644 --- a/paddle/phi/kernels/gpu/size_kernel.cu +++ b/paddle/phi/kernels/gpu/size_kernel.cu @@ -22,6 +22,7 @@ PD_REGISTER_KERNEL(size, GPU, ALL_LAYOUT, phi::SizeKernel, + int16_t, int, int64_t, phi::dtype::float16, diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index fb9c22edc65..62f034c7b41 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -1795,7 +1795,7 @@ def cross_entropy(input, # 2. else # numerator: loss's weighted sum # denominator: cal the sum of weight where the sample's class_index!=ignore_index - if ignore_index != -100: + if ignore_index >= 0: out_sum = _C_ops.reduce_sum(out, 'reduce_all', True) # for each label[i],set 1 or 0, according to ignore_index # mask[i]=0, if label[i]==ignore_index -- GitLab