// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #ifndef _MSC_VER #include #include #include #include #endif #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/for_range.h" #include "paddle/phi/kernels/gpu/shuffle_batch_utils.h" #include "paddle/phi/kernels/shuffle_batch_grad_kernel.h" namespace phi { template void ShuffleBatchGradKernel(const Context& dev_ctx, const DenseTensor& shuffleidx, const DenseTensor& out_grad, int startup_seed, DenseTensor* x_grad) { #ifdef _MSC_VER PADDLE_THROW(phi::errors::Unimplemented( "GPU shuffle_batch_grad is not supported on Windows yet")); #else const auto* out_grad_data = out_grad.data(); const auto* shuffleidx_data = shuffleidx.data(); auto* x_grad_data = dev_ctx.template Alloc(x_grad); auto x_embed_size = x_grad->dims()[x_grad->dims().size() - 1]; ReorderFunctor functor( out_grad_data, shuffleidx_data, x_grad_data, x_embed_size); // TODO(zengjinle): for small data, direct cudaMemcpy may be better phi::funcs::ForRange for_range(dev_ctx, x_grad->numel()); for_range(functor); #endif } } // namespace phi PD_REGISTER_KERNEL(shuffle_batch_grad, GPU, ALL_LAYOUT, phi::ShuffleBatchGradKernel, float, double, int32_t, int64_t) {} #endif