// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/operators/flatten_op.h" #include "paddle/fluid/operators/npu_op_runner.h" namespace paddle { namespace operators { template class Flatten2NPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { auto *in = context.Input("X"); auto *out = context.Output("Out"); auto &axis = context.Attr("axis"); out->mutable_data(context.GetPlace(), in->type()); framework::NPUAttributeMap attr_input = {{"axis", axis}}; auto stream = context.template device_context() .stream(); const auto &runner = NpuOpRunner("FlattenV2", {*in}, {*out}, attr_input); runner.Run(stream); } }; template class Flatten2GradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto *d_x = ctx.Output(framework::GradVarName("X")); auto *d_out = ctx.Input(framework::GradVarName("Out")); auto xshape_dims = ctx.Input("XShape")->dims(); auto x_dims = framework::slice_ddim(xshape_dims, 1, xshape_dims.size()); d_x->mutable_data(ctx.GetPlace(), d_out->type()); framework::TensorCopy( *d_out, ctx.GetPlace(), ctx.template device_context(), d_x); d_x->Resize(x_dims); } }; using Tensor = framework::Tensor; template class FlattenContiguousRangeNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto *X = ctx.Input("X"); auto *Out = ctx.Output("Out"); int start_axis = ctx.Attr("start_axis"); int stop_axis = ctx.Attr("stop_axis"); Out->mutable_data(ctx.GetPlace()); const auto &runner = NpuOpRunner("FlattenV2", {*X}, {*Out}, {{"axis", static_cast(start_axis)}, {"end_axis", static_cast(stop_axis)}}); auto stream = ctx.template device_context() .stream(); runner.Run(stream); } }; template class FlattenContiguousRangeGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto *d_x = ctx.Output(framework::GradVarName("X")); auto *d_out = ctx.Input(framework::GradVarName("Out")); auto xshape_dims = ctx.Input("XShape")->dims(); auto x_dims = framework::slice_ddim(xshape_dims, 1, xshape_dims.size()); d_x->mutable_data(ctx.GetPlace(), d_out->type()); framework::TensorCopy( *d_out, ctx.GetPlace(), ctx.template device_context(), d_x); d_x->Resize(x_dims); } }; } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OP_NPU_KERNEL(flatten2, ops::Flatten2NPUKernel, ops::Flatten2NPUKernel, ops::Flatten2NPUKernel, ops::Flatten2NPUKernel, ops::Flatten2NPUKernel, ops::Flatten2NPUKernel); REGISTER_OP_NPU_KERNEL(flatten2_grad, ops::Flatten2GradNPUKernel, ops::Flatten2GradNPUKernel, ops::Flatten2GradNPUKernel, ops::Flatten2GradNPUKernel, ops::Flatten2GradNPUKernel, ops::Flatten2GradNPUKernel); REGISTER_OP_NPU_KERNEL( flatten_contiguous_range, ops::FlattenContiguousRangeNPUKernel, ops::FlattenContiguousRangeNPUKernel, ops::FlattenContiguousRangeNPUKernel, ops::FlattenContiguousRangeNPUKernel, ops::FlattenContiguousRangeNPUKernel, ops::FlattenContiguousRangeNPUKernel); REGISTER_OP_NPU_KERNEL( flatten_contiguous_range_grad, ops::FlattenContiguousRangeGradNPUKernel, ops::FlattenContiguousRangeGradNPUKernel, ops::FlattenContiguousRangeGradNPUKernel, ops::FlattenContiguousRangeGradNPUKernel, ops::FlattenContiguousRangeGradNPUKernel, ops::FlattenContiguousRangeGradNPUKernel);