// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/phi/kernels/prelu_kernel.h" #include "glog/logging.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" #include "paddle/phi/kernels/funcs/index_impl.cu.h" #include "paddle/phi/kernels/gpu/prelu_funcs.h" namespace phi { template void PReluKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& alpha, const std::string& data_format, const std::string& mode, DenseTensor* out) { dev_ctx.template Alloc(out); const T* x_ptr = x.data(); const T* alpha_ptr = alpha.data(); int numel = x.numel(); auto dim = x.dims(); auto x_rank = dim.size(); VLOG(4) << "dim[0]:" << dim[0] << ", dim[1]:" << dim[1] << ", dim[" << x_rank - 1 << "]:" << dim[x_rank - 1] << ", numel:" << numel << ", mode:" << mode << ", format:" << data_format; if (mode == "channel") { bool channel_last = data_format == "NHWC"; size_t channel = channel_last ? dim[x_rank - 1] : dim[1]; if (channel_last) { auto func = PReluChannelLastWiseCUDAFunctor(x_ptr, alpha_ptr, channel); phi::IndexKernel>( dev_ctx, out, func); } else { size_t plane_size = numel / dim[0] / channel; auto func = PReluChannelFirstWiseCUDAFunctor( x_ptr, alpha_ptr, numel, channel, plane_size); phi::IndexKernel>( dev_ctx, out, func); } } else if (mode == "element") { size_t spatial_size = numel / dim[0]; auto func = PreluElementWiseDirectCUDAFunctor(x_ptr, alpha_ptr, spatial_size); phi::IndexKernel>( dev_ctx, out, func); } else { std::vector ins = {&x}; std::vector outs = {out}; auto func = PreluScalarDirectCUDAFunctor(alpha_ptr); phi::funcs::ElementwiseKernel(dev_ctx, ins, &outs, func); } } } // namespace phi PD_REGISTER_KERNEL(prelu, GPU, ALL_LAYOUT, phi::PReluKernel, float, phi::dtype::float16, double) {}