norm_op_npu.cc 3.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

H
hong 已提交
12
#include "paddle/fluid/framework/op_registry.h"
13
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
14 15 16 17

namespace paddle {
namespace operators {

F
furnace 已提交
18 19 20 21 22 23 24 25 26 27 28 29
using DDim = framework::DDim;
using Tensor = framework::Tensor;

void CheckAxis(int axis, int rank) {
  // check the axis is in [-rank, rank-1]
  if (axis <= rank - 1 && axis >= -rank) return;
  PADDLE_THROW(platform::errors::InvalidArgument(
      "axis in norm operator must between (%d) and (%d)"
      "but got (%d).",
      -rank, rank - 1, axis));
}

30 31 32
template <typename DeviceContext, typename T>
class NormNPUKernel : public framework::OpKernel<T> {
 public:
F
furnace 已提交
33
  void Compute(const framework::ExecutionContext &ctx) const override {
34
    VLOG(4) << "Launch Norm Op Kernel on NPU." << std::endl;
F
furnace 已提交
35 36 37
    auto *in_x = ctx.Input<framework::Tensor>("X");
    auto *out_y = ctx.Output<framework::Tensor>("Out");
    auto *out_norm = ctx.Output<framework::Tensor>("Norm");
38 39 40 41 42 43 44 45 46 47 48 49 50
    out_y->mutable_data<T>(ctx.GetPlace());
    out_norm->mutable_data<T>(ctx.GetPlace());
    auto xdim = in_x->dims();
    float eps = ctx.Attr<float>("epsilon");
    int axis = ctx.Attr<int>("axis");
    CheckAxis(axis, xdim.size());
    if (axis < 0) axis = xdim.size() + axis;

    framework::NPUAttributeMap attr_input_norm;
    attr_input_norm["axes"] = std::vector<int>({axis});
    attr_input_norm["p"] = 2;
    attr_input_norm["keepdim"] = true;
    attr_input_norm["epsilon"] = eps;
F
furnace 已提交
51
    const auto &runner =
52 53 54 55 56 57 58 59 60
        NpuOpRunner("LpNorm", {*in_x}, {*out_norm}, attr_input_norm);
    auto stream =
        ctx.template device_context<paddle::platform::NPUDeviceContext>()
            .stream();
    runner.Run(stream);
    NpuOpRunner("Div", {*in_x, *out_norm}, {*out_y}, {}).Run(stream);
  }
};

F
furnace 已提交
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
template <typename DeviceContext, typename T>
class NormGradNPUKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
    float epsilon = ctx.Attr<float>("epsilon");
    int axis = ctx.Attr<int>("axis");

    auto *x = ctx.Input<Tensor>("X");
    auto *y = ctx.Input<framework::Tensor>("Out");
    auto *dy = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
    auto *dx = ctx.Output<framework::Tensor>(framework::GradVarName("X"));

    auto xdim = x->dims();
    CheckAxis(axis, xdim.size());

    auto place = ctx.GetPlace();

    dx->mutable_data<T>(place);

    framework::NPUAttributeMap attr_input_norm;
    attr_input_norm["dim"] = std::vector<int>({axis});
    attr_input_norm["eps"] = epsilon;
    const auto &runner =
        NpuOpRunner("L2NormalizeGrad", {*x, *y, *dy}, {*dx}, attr_input_norm);
    auto stream =
        ctx.template device_context<paddle::platform::NPUDeviceContext>()
            .stream();
    runner.Run(stream);
  }
};

92 93 94 95 96
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
namespace plat = paddle::platform;
F
furnace 已提交
97

98 99 100 101
REGISTER_OP_NPU_KERNEL(
    norm, ops::NormNPUKernel<paddle::platform::NPUDeviceContext, float>,
    ops::NormNPUKernel<paddle::platform::NPUDeviceContext,
                       paddle::platform::float16>)
F
furnace 已提交
102 103 104 105

REGISTER_OP_NPU_KERNEL(
    norm_grad, ops::NormGradNPUKernel<plat::NPUDeviceContext, float>,
    ops::NormGradNPUKernel<plat::NPUDeviceContext, plat::float16>);