// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { namespace operators { using Tensor = framework::Tensor; template class ClipNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); auto min_tensor = ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; auto max_tensor = ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; Tensor min_tensor_temp(x->type()); Tensor max_tensor_temp(x->type()); if (min_tensor == nullptr) { auto min_value = static_cast(ctx.Attr("min")); min_tensor_temp.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&min_tensor_temp, min_value); min_tensor = &min_tensor_temp; } if (max_tensor == nullptr) { auto max_value = static_cast(ctx.Attr("max")); max_tensor_temp.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&max_tensor_temp, max_value); max_tensor = &max_tensor_temp; } auto stream = ctx.template device_context() .stream(); const auto& runner = NpuOpRunner("ClipByValue", {*x, *min_tensor, *max_tensor}, {*out}, {}); runner.Run(stream); } }; template class ClipGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); auto* dout = ctx.Input(framework::GradVarName("Out")); auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); auto* min_tensor = ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; auto* max_tensor = ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; auto min_val = ctx.Attr("min"); if (min_tensor) { Tensor min_data; framework::TensorCopy( *min_tensor, platform::CPUPlace(), ctx.template device_context(), &min_data); ctx.template device_context().Wait(); min_val = static_cast(min_data.data()[0]); } auto max_val = ctx.Attr("max"); if (max_tensor) { Tensor max_data; framework::TensorCopy( *max_tensor, platform::CPUPlace(), ctx.template device_context(), &max_data); ctx.template device_context().Wait(); max_val = static_cast(max_data.data()[0]); } auto stream = ctx.template device_context() .stream(); const auto& runner = NpuOpRunner("HardtanhGrad", {*x, *dout}, {*dx}, {{"min_val", min_val}, {"max_val", max_val}}); runner.Run(stream); } }; } // namespace operators } // namespace paddle namespace ops = paddle::operators; namespace plat = paddle::platform; REGISTER_OP_NPU_KERNEL( clip, ops::ClipNPUKernel, ops::ClipNPUKernel); REGISTER_OP_NPU_KERNEL( clip_grad, ops::ClipGradNPUKernel, ops::ClipGradNPUKernel);