diff --git a/paddle/fluid/operators/dropout_op_xpu.cc b/paddle/fluid/operators/dropout_op_xpu.cc deleted file mode 100644 index b3de03e7e8d323c737850227ce530240a1ed0293..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/dropout_op_xpu.cc +++ /dev/null @@ -1,175 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -namespace paddle { -namespace operators { - -#ifdef PADDLE_WITH_XPU - -using Tensor = framework::Tensor; -template -class DropoutXPUKernel : public framework::OpKernel { - using XPUTyp = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* y = context.Output("Out"); - const auto* x_data = x->data(); - auto* y_data = y->mutable_data(context.GetPlace()); - float dropout_prob = context.Attr("dropout_prob"); - auto dropout_implementation = - context.Attr("dropout_implementation"); - auto& dev_ctx = context.template device_context(); - - auto* seed = - context.HasInput("Seed") ? context.Input("Seed") : nullptr; - - int is_upscale = (dropout_implementation == "upscale_in_train"); - - if (!context.Attr("is_test")) { - int seed_data = 0; - if (seed) { - if (platform::is_xpu_place(seed->place())) { - memory::Copy(platform::CPUPlace(), - &seed_data, - seed->place(), - seed->data(), - sizeof(int)); - } else { - seed_data = *(seed->data()); - } - - } else { - seed_data = - context.Attr("fix_seed") ? context.Attr("seed") : 0; - } - - auto* mask = context.Output("Mask"); - auto* mask_data = mask->mutable_data(context.GetPlace()); - // Special case when dropout_prob is 1.0 - if (dropout_prob == 1.0f) { - int r = xpu::constant(dev_ctx.x_context(), - reinterpret_cast(y_data), - y->numel(), - XPUTyp(0)); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); - r = xpu::constant(dev_ctx.x_context(), - reinterpret_cast(mask_data), - mask->numel(), - XPUTyp(0)); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); - return; - } - int r = xpu::dropout(dev_ctx.x_context(), - reinterpret_cast(x->data()), - reinterpret_cast(y->data()), - reinterpret_cast(mask_data), - seed_data, - mask->numel(), - is_upscale, - dropout_prob); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "dropout"); - } else { - float scale = - (is_upscale) ? (1.0) : (static_cast(1.0f - dropout_prob)); - int r = xpu::scale(dev_ctx.x_context(), - reinterpret_cast(x_data), - reinterpret_cast(y_data), - x->numel(), - false, - scale, - 0.0f); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale"); - } - } -}; -template -class DropoutGradXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE_EQ(!context.Attr("is_test"), - true, - platform::errors::InvalidArgument( - "GradOp is only callable when is_test is false")); - auto* grad_x = context.Output(framework::GradVarName("X")); - auto* grad_y = context.Input(framework::GradVarName("Out")); - auto* mask = context.Input("Mask"); - grad_x->mutable_data(context.GetPlace()); - auto& dev_ctx = context.template device_context(); - auto& dropout_implementation = - context.Attr("dropout_implementation"); - float dropout_prob = context.Attr("dropout_prob"); - const T* mask_data = mask->data(); - - if (dropout_implementation != "upscale_in_train") { - int r = xpu::mul(dev_ctx.x_context(), - reinterpret_cast(grad_y->data()), - reinterpret_cast(mask_data), - reinterpret_cast(grad_x->data()), - grad_y->numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "mul"); - return; - } - - auto version = platform::get_xpu_version(context.GetPlace().GetDeviceId()); - if (version == phi::backends::xpu::XPUVersion::XPU1) { - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - XPUType* mask_new = RAII_GUARD.alloc_l3_or_gm(mask->numel()); - float scale = - (dropout_prob == 1.0f) ? (1.0f) : (1.0f / (1.0f - dropout_prob)); - int r = xpu::scale(dev_ctx.x_context(), - reinterpret_cast(mask->data()), - reinterpret_cast(mask_new), - mask->numel(), - false, - scale, - 0.0f); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale"); - r = xpu::mul(dev_ctx.x_context(), - reinterpret_cast(grad_y->data()), - reinterpret_cast(mask_new), - reinterpret_cast(grad_x->data()), - grad_y->numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "mul"); - } else { - int r = - xpu::dropout_grad(dev_ctx.x_context(), - reinterpret_cast(mask->data()), - reinterpret_cast(grad_y->data()), - reinterpret_cast(grad_x->data()), - dropout_prob, - grad_y->numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "dropout_grad"); - } - } -}; -} // namespace operators -} // namespace paddle -namespace ops = paddle::operators; -namespace plat = paddle::platform; -REGISTER_OP_XPU_KERNEL( - dropout, - ops::DropoutXPUKernel, - ops::DropoutXPUKernel); -REGISTER_OP_XPU_KERNEL( - dropout_grad, - ops::DropoutGradXPUKernel, - ops::DropoutGradXPUKernel); -#endif diff --git a/paddle/phi/kernels/xpu/dropout_grad_kernel.cc b/paddle/phi/kernels/xpu/dropout_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..c6803ca5cfcbd86f7d96b948ac02ec055e6730fd --- /dev/null +++ b/paddle/phi/kernels/xpu/dropout_grad_kernel.cc @@ -0,0 +1,94 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/dropout_grad_kernel.h" + +#include +#include + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void DropoutGradRawKernel(const Context& dev_ctx, + const DenseTensor& mask, + const DenseTensor& out_grad, + const Scalar& p, + bool is_test, + const std::string& mode, + DenseTensor* x_grad) { + using XPUType = typename XPUTypeTrait::Type; + PADDLE_ENFORCE_EQ(!is_test, + true, + phi::errors::InvalidArgument( + "GradOp is only callable when is_test is false")); + auto* grad_x = x_grad; + auto* grad_y = &out_grad; + dev_ctx.template Alloc(grad_x); + float dropout_prob = p.to(); + const T* mask_data = mask.data(); + + if (mode != "upscale_in_train") { + int r = xpu::mul(dev_ctx.x_context(), + reinterpret_cast(grad_y->data()), + reinterpret_cast(mask_data), + reinterpret_cast(grad_x->data()), + grad_y->numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "mul"); + return; + } + + auto version = + phi::backends::xpu::get_xpu_version(dev_ctx.GetPlace().GetDeviceId()); + if (version == phi::backends::xpu::XPUVersion::XPU1) { + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + XPUType* mask_new = RAII_GUARD.alloc_l3_or_gm(mask.numel()); + float scale = + (dropout_prob == 1.0f) ? (1.0f) : (1.0f / (1.0f - dropout_prob)); + int r = xpu::scale(dev_ctx.x_context(), + reinterpret_cast(mask.data()), + reinterpret_cast(mask_new), + mask.numel(), + false, + scale, + 0.0f); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale"); + r = xpu::mul(dev_ctx.x_context(), + reinterpret_cast(grad_y->data()), + reinterpret_cast(mask_new), + reinterpret_cast(grad_x->data()), + grad_y->numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "mul"); + } else { + int r = + xpu::dropout_grad(dev_ctx.x_context(), + reinterpret_cast(mask.data()), + reinterpret_cast(grad_y->data()), + reinterpret_cast(grad_x->data()), + dropout_prob, + grad_y->numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "dropout_grad"); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(dropout_grad, + XPU, + ALL_LAYOUT, + phi::DropoutGradRawKernel, + float, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/xpu/dropout_kernel.cc b/paddle/phi/kernels/xpu/dropout_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..c9645f06a1331955a89f4295b1b17bf1815370ae --- /dev/null +++ b/paddle/phi/kernels/xpu/dropout_kernel.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/dropout_kernel.h" + +#include +#include + +#include "paddle/fluid/memory/memcpy.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void DropoutRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const paddle::optional& seed_tensor, + const Scalar& p, + bool is_test, + const std::string& mode, + int seed, + bool fix_seed, + DenseTensor* out, + DenseTensor* mask) { + using XPUType = typename XPUTypeTrait::Type; + auto* y = out; + const auto* x_data = x.data(); + auto* y_data = dev_ctx.template Alloc(y); + float dropout_prob = p.to(); + + int is_upscale = (mode == "upscale_in_train"); + + if (!is_test) { + int seed_data = 0; + if (seed_tensor.get_ptr() != nullptr) { + if ((seed_tensor->place()).GetType() == phi::AllocationType::XPU) { + paddle::memory::Copy(phi::CPUPlace(), + &seed_data, + seed_tensor->place(), + seed_tensor->data(), + sizeof(int)); + } else { + seed_data = *(seed_tensor->data()); + } + + } else { + seed_data = fix_seed ? seed : 0; + } + + auto* mask_data = dev_ctx.template Alloc(mask); + // Special case when dropout_prob is 1.0 + if (dropout_prob == 1.0f) { + int r = xpu::constant(dev_ctx.x_context(), + reinterpret_cast(y_data), + y->numel(), + XPUType(0)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); + r = xpu::constant(dev_ctx.x_context(), + reinterpret_cast(mask_data), + mask->numel(), + XPUType(0)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); + return; + } + int r = xpu::dropout(dev_ctx.x_context(), + reinterpret_cast(x.data()), + reinterpret_cast(y->data()), + reinterpret_cast(mask_data), + seed_data, + mask->numel(), + is_upscale, + dropout_prob); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "dropout"); + } else { + float scale = + (is_upscale) ? (1.0) : (static_cast(1.0f - dropout_prob)); + int r = xpu::scale(dev_ctx.x_context(), + reinterpret_cast(x_data), + reinterpret_cast(y_data), + x.numel(), + false, + scale, + 0.0f); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale"); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(dropout, + XPU, + ALL_LAYOUT, + phi::DropoutRawKernel, + float, + phi::dtype::float16) {}