diff --git a/paddle/fluid/operators/roi_align_op_npu.cc b/paddle/fluid/operators/roi_align_op_npu.cc new file mode 100644 index 0000000000000000000000000000000000000000..c1ba046ca6af1a95165a0bf78458a1be56e29c0e --- /dev/null +++ b/paddle/fluid/operators/roi_align_op_npu.cc @@ -0,0 +1,101 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/roi_align_op.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; + +template +class ROIAlignNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* X = ctx.Input("X"); // (B,C,H,W) + auto* ROIs = ctx.Input("ROIs"); // (N,4) + auto* ROIsNum = ctx.Input("RoisNum"); // [0 1 1 2 2 2] + auto* Out = ctx.Output("Out"); + Out->mutable_data(ctx.GetPlace()); + + auto spatial_scale = ctx.Attr("spatial_scale"); + auto pooled_height = ctx.Attr("pooled_height"); + auto pooled_width = ctx.Attr("pooled_width"); + auto sample_num = ctx.Attr("sampling_ratio"); + auto aligned = ctx.Attr("aligned"); + auto roi_end_mode = 0; + PADDLE_ENFORCE_EQ( + aligned, false, + platform::errors::InvalidArgument( + "ROIAlignNPU only support Aligned attribute equaled to False")); + + framework::NPUAttributeMap attr_roi = {{"spatial_scale", spatial_scale}, + {"pooled_height", pooled_height}, + {"pooled_width", pooled_width}, + {"sample_num", sample_num}, + {"roi_end_mode", roi_end_mode}}; + + auto stream = + ctx.template device_context() + .stream(); + + // Combine *ROIsNum with ROIs to get new ROIs + // change roisnum's datatype & resize + int dtype = + static_cast(ConvertToNpuDtype(framework::proto::VarType::FP32)); + framework::NPUAttributeMap attr_cast = {{"dst_type", dtype}}; + Tensor ROIsNum_fp(ROIs->type()); + ROIsNum_fp.Resize(framework::make_ddim({ROIs->dims()[0], 1})); + ROIsNum_fp.mutable_data(ctx.GetPlace()); + + const auto& runner_c = + NpuOpRunner("Cast", {*ROIsNum}, {ROIsNum_fp}, attr_cast); + runner_c.Run(stream); + + // concate to make (N, 5) + std::vector x_list; + x_list.push_back(ROIsNum_fp); + x_list.push_back(*ROIs); + auto axis = 1; + // output of concate + Tensor ROIs_N5(ROIs->type()); + ROIs_N5.Resize(framework::make_ddim({ROIs->dims()[0], 5})); + ROIs_N5.mutable_data(ctx.GetPlace()); + + // attribute of concate + auto EleNum = 2; + framework::NPUAttributeMap attr_concat = {{"N", EleNum}, + {"concat_dim", axis}}; + + NpuOpRunner runner0; + runner0.SetType("ConcatD") + .AddInputs(x_list) + .AddOutput(ROIs_N5) + .AddInputNames({"x0", "x1"}) + .AddAttrs(attr_concat); + runner0.Run(stream); + + const auto& runner = + NpuOpRunner("ROIAlign", {*X, ROIs_N5}, {*Out}, attr_roi); + runner.Run(stream); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_NPU_KERNEL( + roi_align, + ops::ROIAlignNPUKernel, + ops::ROIAlignNPUKernel, + ops::ROIAlignNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_roi_align_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_roi_align_op_npu.py new file mode 100644 index 0000000000000000000000000000000000000000..9ca2856886e08cc1ae0ca2da2dea31f073c24c2b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_roi_align_op_npu.py @@ -0,0 +1,217 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import math +import sys +sys.path.append("..") +from op_test import OpTest +import paddle + +paddle.enable_static() +np.random.seed(1243) + + +class TestROIAlignNPUOp(OpTest): + def set_data(self): + self.init_test_case() + self.make_rois() + self.calc_roi_align() + + seq_len = self.rois_lod[0] + + self.inputs = { + 'X': self.x, + 'ROIs': self.rois[:, 1:5], + 'RoisNum': np.asarray(seq_len).astype('int32') + } + + self.attrs = { + 'spatial_scale': self.spatial_scale, + 'pooled_height': self.pooled_height, + 'pooled_width': self.pooled_width, + 'sampling_ratio': self.sampling_ratio, + 'aligned': self.aligned + } + + self.outputs = {'Out': self.out_data} + + def init_test_case(self): + self.batch_size = 3 + self.channels = 3 + self.height = 8 + self.width = 6 + + # n, c, h, w + self.x_dim = (self.batch_size, self.channels, self.height, self.width) + + self.spatial_scale = 1.0 / 2.0 + self.pooled_height = 2 + self.pooled_width = 2 + self.sampling_ratio = 2 + self.aligned = False + + self.x = np.random.random(self.x_dim).astype('float32') + + def pre_calc(self, x_i, roi_xmin, roi_ymin, roi_bin_grid_h, roi_bin_grid_w, + bin_size_h, bin_size_w): + count = roi_bin_grid_h * roi_bin_grid_w + bilinear_pos = np.zeros( + [self.channels, self.pooled_height, self.pooled_width, count, 4], + np.float32) + bilinear_w = np.zeros( + [self.pooled_height, self.pooled_width, count, 4], np.float32) + for ph in range(self.pooled_width): + for pw in range(self.pooled_height): + c = 0 + for iy in range(roi_bin_grid_h): + y = roi_ymin + ph * bin_size_h + (iy + 0.5) * \ + bin_size_h / roi_bin_grid_h + for ix in range(roi_bin_grid_w): + x = roi_xmin + pw * bin_size_w + (ix + 0.5) * \ + bin_size_w / roi_bin_grid_w + if y < -1.0 or y > self.height or \ + x < -1.0 or x > self.width: + continue + if y <= 0: + y = 0 + if x <= 0: + x = 0 + y_low = int(y) + x_low = int(x) + if y_low >= self.height - 1: + y = y_high = y_low = self.height - 1 + else: + y_high = y_low + 1 + if x_low >= self.width - 1: + x = x_high = x_low = self.width - 1 + else: + x_high = x_low + 1 + ly = y - y_low + lx = x - x_low + hy = 1 - ly + hx = 1 - lx + for ch in range(self.channels): + bilinear_pos[ch, ph, pw, c, 0] = x_i[ch, y_low, + x_low] + bilinear_pos[ch, ph, pw, c, 1] = x_i[ch, y_low, + x_high] + bilinear_pos[ch, ph, pw, c, 2] = x_i[ch, y_high, + x_low] + bilinear_pos[ch, ph, pw, c, 3] = x_i[ch, y_high, + x_high] + bilinear_w[ph, pw, c, 0] = hy * hx + bilinear_w[ph, pw, c, 1] = hy * lx + bilinear_w[ph, pw, c, 2] = ly * hx + bilinear_w[ph, pw, c, 3] = ly * lx + c = c + 1 + return bilinear_pos, bilinear_w + + def calc_roi_align(self): + self.out_data = np.zeros( + (self.rois_num, self.channels, self.pooled_height, + self.pooled_width)).astype('float32') + + offset = 0.5 if self.aligned else 0. + for i in range(self.rois_num): + roi = self.rois[i] + roi_batch_id = int(roi[0]) + x_i = self.x[roi_batch_id] + roi_xmin = roi[1] * self.spatial_scale - offset + roi_ymin = roi[2] * self.spatial_scale - offset + roi_xmax = roi[3] * self.spatial_scale - offset + roi_ymax = roi[4] * self.spatial_scale - offset + + roi_width = roi_xmax - roi_xmin + roi_height = roi_ymax - roi_ymin + if not self.aligned: + roi_width = max(roi_width, 1) + roi_height = max(roi_height, 1) + + bin_size_h = float(roi_height) / float(self.pooled_height) + bin_size_w = float(roi_width) / float(self.pooled_width) + roi_bin_grid_h = self.sampling_ratio if self.sampling_ratio > 0 else \ + math.ceil(roi_height / self.pooled_height) + roi_bin_grid_w = self.sampling_ratio if self.sampling_ratio > 0 else \ + math.ceil(roi_width / self.pooled_width) + count = max(int(roi_bin_grid_h * roi_bin_grid_w), 1) + pre_size = count * self.pooled_width * self.pooled_height + bilinear_pos, bilinear_w = self.pre_calc(x_i, roi_xmin, roi_ymin, + int(roi_bin_grid_h), + int(roi_bin_grid_w), + bin_size_h, bin_size_w) + for ch in range(self.channels): + align_per_bin = (bilinear_pos[ch] * bilinear_w).sum(axis=-1) + output_val = align_per_bin.mean(axis=-1) + self.out_data[i, ch, :, :] = output_val + + def make_rois(self): + rois = [] + self.rois_lod = [[]] + for bno in range(self.batch_size): + # for i in range(bno + 1): + self.rois_lod[0].append(bno) + x1 = np.random.randint( + 0, self.width // self.spatial_scale - self.pooled_width) + y1 = np.random.randint( + 0, self.height // self.spatial_scale - self.pooled_height) + + x2 = np.random.randint(x1 + self.pooled_width, + self.width // self.spatial_scale) + y2 = np.random.randint(y1 + self.pooled_height, + self.height // self.spatial_scale) + + roi = [bno, x1, y1, x2, y2] + rois.append(roi) + + self.rois_num = len(rois) + self.rois = np.array(rois).astype("float32") + + def setUp(self): + self.op_type = "roi_align" + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + self.set_data() + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + self.check_grad_with_place(self.place, ['X'], 'Out') + + +class TestROIAlignOpWithMinusSample(TestROIAlignNPUOp): + def init_test_case(self): + self.batch_size = 3 + self.channels = 3 + self.height = 8 + self.width = 6 + + # n, c, h, w + self.x_dim = (self.batch_size, self.channels, self.height, self.width) + + self.spatial_scale = 1.0 / 2.0 + self.pooled_height = 2 + self.pooled_width = 2 + self.sampling_ratio = -1 + self.aligned = False + + self.x = np.random.random(self.x_dim).astype('float32') + + +if __name__ == '__main__': + unittest.main()