未验证 提交 d82b0300 编写于 作者: Q QingshuChen 提交者: GitHub

support roi_align & affine_channel for kunlun (#29561) (#29657)

* support roi_align & affine_channel for kunlun

* minor
上级 03ddf690
...@@ -4,7 +4,7 @@ endif() ...@@ -4,7 +4,7 @@ endif()
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(XPU_PROJECT "extern_xpu") SET(XPU_PROJECT "extern_xpu")
SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2020_12_07_cdfbf0c.tar.gz" CACHE STRING "" FORCE) SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2020_12_11.tar.gz" CACHE STRING "" FORCE)
SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu") SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu")
SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}") SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}")
SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu") SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu")
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Indicesou may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class AffineChannelXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<framework::Tensor>("X");
auto* scale = ctx.Input<framework::Tensor>("Scale");
auto* bias = ctx.Input<framework::Tensor>("Bias");
auto* y = ctx.Output<framework::Tensor>("Out");
y->mutable_data<T>(ctx.GetPlace());
const framework::DataLayout layout =
framework::StringToDataLayout(ctx.Attr<std::string>("data_layout"));
auto dims = x->dims();
int N = dims[0];
int C = layout == framework::DataLayout::kNCHW ? dims[1]
: dims[dims.size() - 1];
int HxW = x->numel() / N / C;
auto* scale_d = scale->data<T>();
auto* bias_d = bias->data<T>();
auto* x_d = x->data<T>();
auto* y_d = y->data<T>();
auto& dev_ctx = ctx.template device_context<DeviceContext>();
std::vector<int> x_shape;
std::vector<int> b_shape;
if (layout == framework::DataLayout::kNCHW) {
x_shape.push_back(N);
x_shape.push_back(C);
x_shape.push_back(HxW);
b_shape.push_back(1);
b_shape.push_back(C);
b_shape.push_back(1);
} else {
x_shape.push_back(N * HxW);
x_shape.push_back(C);
b_shape.push_back(1);
b_shape.push_back(C);
}
int r = 0;
r = xpu::broadcast_mul(dev_ctx.x_context(), x_d, scale_d, y_d, x_shape,
b_shape);
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External(
"The broadcast_mul XPU OP return wrong value[%d %s]",
r, XPUAPIErrorMsg[r]));
r = xpu::broadcast_add(dev_ctx.x_context(), y_d, bias_d, y_d, x_shape,
b_shape);
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External(
"The broadcast_add XPU OP return wrong value[%d %s]",
r, XPUAPIErrorMsg[r]));
}
};
template <typename DeviceContext, typename T>
class AffineChannelGradXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<framework::Tensor>("X");
auto* scale = ctx.Input<framework::Tensor>("Scale");
auto* dy = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto* dscale =
ctx.Output<framework::Tensor>(framework::GradVarName("Scale"));
auto* dbias = ctx.Output<framework::Tensor>(framework::GradVarName("Bias"));
const framework::DataLayout layout =
framework::StringToDataLayout(ctx.Attr<std::string>("data_layout"));
auto dims = x->dims();
int N = dims[0];
int C = layout == framework::DataLayout::kNCHW ? dims[1]
: dims[dims.size() - 1];
int HxW = x->numel() / N / C;
auto* dy_d = dy->data<T>();
auto* scale_d = scale->data<T>();
T* dx_d = dx ? dx->mutable_data<T>(ctx.GetPlace()) : nullptr;
T* dscale_d = dscale ? dscale->mutable_data<T>(ctx.GetPlace()) : nullptr;
T* dbias_d = dbias ? dbias->mutable_data<T>(ctx.GetPlace()) : nullptr;
auto& dev_ctx = ctx.template device_context<DeviceContext>();
std::vector<int> x_shape;
std::vector<int> b_shape;
std::vector<int> rdims;
if (layout == framework::DataLayout::kNCHW) {
x_shape.push_back(N);
x_shape.push_back(C);
x_shape.push_back(HxW);
b_shape.push_back(1);
b_shape.push_back(C);
b_shape.push_back(1);
rdims.push_back(0);
rdims.push_back(2);
} else {
x_shape.push_back(N * HxW);
x_shape.push_back(C);
b_shape.push_back(1);
b_shape.push_back(C);
rdims.push_back(0);
}
int r = 0;
if (dscale_d && dbias_d) {
r = xpu::reduce_sum<T>(dev_ctx.x_context(), dy_d, dbias_d, x_shape,
rdims);
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External(
"The reduce_sum XPU OP return wrong value[%d %s]",
r, XPUAPIErrorMsg[r]));
T* tmp = nullptr;
r = xpu_malloc(reinterpret_cast<void**>(&tmp), dy->numel() * sizeof(T));
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External("no enough memory in xpu"));
r = xpu::mul<T>(dev_ctx.x_context(), dy_d, x->data<T>(), tmp,
dy->numel());
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External("The mul XPU OP return wrong value[%d %s]",
r, XPUAPIErrorMsg[r]));
r = xpu::reduce_sum<T>(dev_ctx.x_context(), tmp, dscale_d, x_shape,
rdims);
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External(
"The reduce_sum XPU OP return wrong value[%d %s]",
r, XPUAPIErrorMsg[r]));
if (dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait();
}
xpu_free(tmp);
}
if (dx_d) {
r = xpu::broadcast_mul(dev_ctx.x_context(), dy_d, scale_d, dx_d, x_shape,
b_shape);
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External(
"The broadcast_mul XPU OP return wrong value[%d %s]", r,
XPUAPIErrorMsg[r]));
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
using XPU = paddle::platform::XPUDeviceContext;
REGISTER_OP_XPU_KERNEL(affine_channel, ops::AffineChannelXPUKernel<XPU, float>);
REGISTER_OP_XPU_KERNEL(affine_channel_grad,
ops::AffineChannelGradXPUKernel<XPU, float>);
#endif
...@@ -24,89 +24,202 @@ template <typename DeviceContext, typename T> ...@@ -24,89 +24,202 @@ template <typename DeviceContext, typename T>
class XPUROIAlignOpKernel : public framework::OpKernel<T> { class XPUROIAlignOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<framework::Tensor>("X"); auto* in = ctx.Input<Tensor>("X");
auto* rois = ctx.Input<framework::LoDTensor>("ROIs"); auto* rois = ctx.Input<LoDTensor>("ROIs");
auto* out = ctx.Output<framework::Tensor>("Out"); auto* out = ctx.Output<Tensor>("Out");
auto pooled_height = ctx.Attr<int>("pooled_height"); auto pooled_height = ctx.Attr<int>("pooled_height");
auto pooled_width = ctx.Attr<int>("pooled_width"); auto pooled_width = ctx.Attr<int>("pooled_width");
auto spatial_scale = ctx.Attr<float>("spatial_scale"); auto spatial_scale = ctx.Attr<float>("spatial_scale");
auto sampling_ratio = ctx.Attr<int>("sampling_ratio"); auto sampling_ratio = ctx.Attr<int>("sampling_ratio");
auto& dev_ctx = ctx.template device_context<DeviceContext>();
auto in_dims = in->dims(); auto in_dims = in->dims();
int batch_size = in_dims[0]; int batch_size = in_dims[0];
int channels = in_dims[1]; int channels = in_dims[1];
int height = in_dims[2]; int height = in_dims[2];
int width = in_dims[3]; int width = in_dims[3];
int rois_num = rois->dims()[0]; int rois_num = rois->dims()[0];
const T* input_data = in->data<T>();
framework::Tensor _roi_batch_list; if (rois_num == 0) return;
_roi_batch_list.Resize({rois_num});
int* rois_lod = _roi_batch_list.mutable_data<int>(ctx.GetPlace()); Tensor roi_batch_id_list;
int rois_batch_size = 1; roi_batch_id_list.Resize({rois_num});
auto cplace = platform::CPUPlace();
int* roi_batch_id_data = roi_batch_id_list.mutable_data<int>(cplace);
auto& dev_ctx = ctx.template device_context<DeviceContext>();
auto xplace = BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace());
int rois_batch_size = 0;
int* cpu_lod = nullptr;
if (ctx.HasInput("RoisNum")) { if (ctx.HasInput("RoisNum")) {
auto* rois_num_t = ctx.Input<framework::Tensor>("RoisNum"); auto* rois_num_t = ctx.Input<Tensor>("RoisNum");
rois_batch_size = rois_num_t->numel(); rois_batch_size = rois_num_t->numel();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
rois_batch_size, batch_size, rois_batch_size, batch_size,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The batch size of rois and the batch size of images " "The rois_batch_size and imgs "
" must be the same. But received the batch size of rois is %d, " "batch_size must be the same. But received rois_batch_size = %d, "
"and the batch size of images is %d", "batch_size = %d",
rois_batch_size, batch_size)); rois_batch_size, batch_size));
auto* rois_num_data = rois_num_t->data<int>();
rois_lod[0] = 0; std::vector<int> rois_num_list(rois_batch_size);
for (int n = 0; n < rois_batch_size; ++n) { memory::Copy(cplace, rois_num_list.data(), xplace,
rois_lod[n + 1] = rois_lod[n] + rois_num_data[n]; rois_num_t->data<int>(), sizeof(int) * rois_batch_size);
cpu_lod = new int[rois_batch_size + 1];
cpu_lod[0] = 0;
for (int i = 0; i < rois_batch_size; i++) {
cpu_lod[i + 1] = cpu_lod[i] + rois_num_list[i];
} }
} else { } else {
auto _rois_lod = rois->lod().back(); auto lod = rois->lod();
rois_batch_size = _rois_lod.size() - 1; PADDLE_ENFORCE_EQ(
for (int n = 0; n < static_cast<int>(_rois_lod.size()); ++n) { lod.empty(), false,
rois_lod[n] = _rois_lod[n]; platform::errors::InvalidArgument("Input(ROIs) in ROIAlignOp does "
} "not contain LoD information."));
auto rois_lod = lod.back();
rois_batch_size = rois_lod.size() - 1;
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
rois_batch_size, batch_size, rois_batch_size, batch_size,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The rois_batch_size and imgs batch_size of roi_align_xpu OP " "The batch size of rois and batch size "
"must " "of images must be the same. But received rois batch size = %d, "
"be the same. But received rois_batch_size %d , batch_size %d", "and images batch size = %d",
rois_batch_size, batch_size)); rois_batch_size, batch_size));
}
int rois_num_with_lod = rois_lod[rois_batch_size]; int rois_num_with_lod = rois_lod[rois_batch_size];
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
rois_num, rois_num_with_lod, rois_num, rois_num_with_lod,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The rois_num from input and lod of roi_align_xpu OP must be the " "The actual number of rois and the number of rois "
"same. But received input rois_num %d , input lod %d", "provided from Input(RoIsLoD) in RoIAlign must be the same."
" But received actual number of rois is %d, and the number "
"of rois from RoIsLoD is %d",
rois_num, rois_num_with_lod)); rois_num, rois_num_with_lod));
T* output_data = out->mutable_data<T>(ctx.GetPlace()); for (int n = 0; n < rois_batch_size; ++n) {
const T* rois_data = rois->data<T>(); for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
for (int n = 0; n < rois_batch_size; n++) { roi_batch_id_data[i] = n;
int cur_batch_rois_num = rois_lod[n + 1] - rois_lod[n]; }
if (cur_batch_rois_num != 0) { }
int r = xpu::roi_align( cpu_lod = new int[rois_batch_size + 1];
dev_ctx.x_context(), input_data + n * channels * height * width, for (int i = 0; i < rois_batch_size + 1; i++) {
rois_data + rois_lod[n] * 4, cur_batch_rois_num, channels, height, cpu_lod[i] = rois_lod[i];
width, pooled_height, pooled_width, sampling_ratio, spatial_scale, }
output_data + }
rois_lod[n] * channels * pooled_height * pooled_width);
int* roi_id_data = nullptr;
int r = xpu_malloc(reinterpret_cast<void**>(&roi_id_data),
(rois_batch_size + 1) * sizeof(int));
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External("no enough memory in xpu"));
memory::Copy(xplace, roi_id_data, cplace, cpu_lod,
(rois_batch_size + 1) * sizeof(int));
delete[] cpu_lod;
r = xpu::roi_align<T, int>(
dev_ctx.x_context(), in->data<T>(),
out->mutable_data<T>(ctx.GetPlace()), rois->data<T>(), roi_id_data,
batch_size, channels, height, width, out->dims()[0], pooled_height,
pooled_width, spatial_scale, sampling_ratio, true);
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External(
"The roi_align XPU OP return wrong value[%d %s]", r,
XPUAPIErrorMsg[r]));
if (dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait();
}
xpu_free(roi_id_data);
}
};
template <typename DeviceContext, typename T>
class XPUROIAlignGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<Tensor>("X");
auto* rois = ctx.Input<LoDTensor>("ROIs");
auto* out_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* in_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto pooled_height = ctx.Attr<int>("pooled_height");
auto pooled_width = ctx.Attr<int>("pooled_width");
auto spatial_scale = ctx.Attr<float>("spatial_scale");
auto sampling_ratio = ctx.Attr<int>("sampling_ratio");
int rois_num = rois->dims()[0];
int channels = in->dims()[1];
int height = in->dims()[2];
int width = in->dims()[3];
if (!in_grad) {
return;
}
Tensor roi_batch_id_list;
roi_batch_id_list.Resize({rois_num});
auto cplace = platform::CPUPlace();
auto& dev_ctx = ctx.template device_context<DeviceContext>();
auto xplace = BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace());
int rois_batch_size = 0;
int* cpu_lod = nullptr;
if (ctx.HasInput("RoisNum")) {
auto* rois_num_t = ctx.Input<Tensor>("RoisNum");
rois_batch_size = rois_num_t->numel();
std::vector<int> rois_num_list(rois_batch_size);
memory::Copy(cplace, rois_num_list.data(), xplace,
rois_num_t->data<int>(), sizeof(int) * rois_batch_size);
cpu_lod = new int[rois_batch_size + 1];
cpu_lod[0] = 0;
for (int i = 0; i < rois_batch_size; i++) {
cpu_lod[i + 1] = cpu_lod[i] + rois_num_list[i];
}
} else {
auto rois_lod = rois->lod().back();
rois_batch_size = rois_lod.size() - 1;
cpu_lod = new int[rois_batch_size + 1];
for (int i = 0; i < rois_batch_size + 1; i++) {
cpu_lod[i] = rois_lod[i];
}
}
int* roi_id_data = nullptr;
int r = xpu_malloc(reinterpret_cast<void**>(&roi_id_data),
(rois_batch_size + 1) * sizeof(int));
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External("no enough memory in xpu"));
memory::Copy(xplace, roi_id_data, cplace, cpu_lod,
(rois_batch_size + 1) * sizeof(int));
in_grad->mutable_data<T>(ctx.GetPlace());
int output_grad_size = out_grad->numel();
delete[] cpu_lod;
if (output_grad_size > 0) {
r = xpu::roi_align_grad<T, int>(
dev_ctx.x_context(), out_grad->data<T>(), in_grad->data<T>(),
rois->data<T>(), roi_id_data, in->dims()[0], channels, height, width,
out_grad->dims()[0], pooled_height, pooled_width, spatial_scale,
sampling_ratio, true);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS, r, xpu::Error_t::SUCCESS,
platform::errors::External( platform::errors::External(
"The roi_align XPU OP return wrong value[%d], please check " "The roi_align_grad XPU OP return wrong value[%d %s]", r,
"where Baidu Kunlun Card is properly installed.", XPUAPIErrorMsg[r]));
r));
} }
if (dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait();
} }
xpu_free(roi_id_data);
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL( REGISTER_OP_XPU_KERNEL(
roi_align, roi_align,
ops::XPUROIAlignOpKernel<paddle::platform::XPUDeviceContext, float>); ops::XPUROIAlignOpKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(
roi_align_grad,
ops::XPUROIAlignGradOpKernel<paddle::platform::XPUDeviceContext, float>);
#endif #endif
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Unit testing for affine_channel_op
"""
from __future__ import print_function
import sys
sys.path.append("..")
import unittest
import numpy as np
from op_test_xpu import XPUOpTest
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
def affine_channel(x, scale, bias, layout):
C = x.shape[1] if layout == 'NCHW' else x.shape[-1]
if len(x.shape) == 4:
new_shape = (1, C, 1, 1) if layout == 'NCHW' else (1, 1, 1, C)
else:
new_shape = (1, C)
scale = scale.reshape(new_shape)
bias = bias.reshape(new_shape)
return x * scale + bias
class TestAffineChannelOp(XPUOpTest):
def setUp(self):
self.op_type = "affine_channel"
self.init_test_case()
x = np.random.random(self.shape).astype("float32")
scale = np.random.random(self.C).astype("float32")
bias = np.random.random(self.C).astype("float32")
y = affine_channel(x, scale, bias, self.layout)
self.inputs = {'X': x, 'Scale': scale, 'Bias': bias}
self.attrs = {'data_layout': self.layout}
self.outputs = {'Out': y}
def test_check_output(self):
if core.is_compiled_with_xpu():
paddle.enable_static()
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad(self):
if core.is_compiled_with_xpu():
paddle.enable_static()
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X', 'Scale', 'Bias'], 'Out')
def test_check_grad_stopgrad_dx(self):
if core.is_compiled_with_xpu():
paddle.enable_static()
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Scale', 'Bias'], 'Out', no_grad_set=set('X'))
def test_check_grad_stopgrad_dscale_dbias(self):
if core.is_compiled_with_xpu():
paddle.enable_static()
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'], 'Out', no_grad_set=set(['Scale', 'Bias']))
def init_test_case(self):
self.shape = [2, 100, 3, 3]
self.C = 100
self.layout = 'NCHW'
class TestAffineChannelOpError(unittest.TestCase):
def test_errors(self):
with fluid.program_guard(fluid.Program()):
def test_x_type():
input_data = np.random.random(2, 1, 2, 2).astype("float32")
fluid.layers.affine_channel(input_data)
self.assertRaises(TypeError, test_x_type)
def test_x_dtype():
x2 = fluid.layers.data(
name='x2', shape=[None, 1, 2, 2], dtype='int32')
fluid.layers.affine_channel(x2)
self.assertRaises(TypeError, test_x_dtype)
def test_scale_type():
x3 = fluid.layers.data(
name='x3', shape=[None, 1, 2, 2], dtype='float32')
fluid.layers.affine_channel(x3, scale=1)
self.assertRaises(TypeError, test_scale_type)
def test_bias_type():
x4 = fluid.layers.data(
name='x4', shape=[None, 1, 2, 2], dtype='float32')
fluid.layers.affine_channel(x4, bias=1)
self.assertRaises(TypeError, test_bias_type)
class TestAffineChannelNHWC(TestAffineChannelOp):
def init_test_case(self):
self.shape = [2, 3, 3, 100]
self.C = 100
self.layout = 'NHWC'
def test_check_grad_stopgrad_dx(self):
return
def test_check_grad_stopgrad_dscale_dbias(self):
return
class TestAffineChannel2D(TestAffineChannelOp):
def init_test_case(self):
self.shape = [2, 100]
self.C = 100
self.layout = 'NCHW'
def test_check_grad_stopgrad_dx(self):
return
def test_check_grad_stopgrad_dscale_dbias(self):
return
if __name__ == '__main__':
unittest.main()
...@@ -20,13 +20,13 @@ import math ...@@ -20,13 +20,13 @@ import math
import numpy as np import numpy as np
import paddle.fluid.core as core import paddle.fluid.core as core
from op_test import OpTest, skip_check_grad_ci from op_test import OpTest, skip_check_grad_ci
from op_test_xpu import XPUOpTest
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import Program, program_guard from paddle.fluid import Program, program_guard
@skip_check_grad_ci(reason="There is no grad kernel for roi_align_xpu kernel.") class TestROIAlignOp(XPUOpTest):
class TestROIAlignOp(OpTest):
def set_data(self): def set_data(self):
self.init_test_case() self.init_test_case()
self.make_rois() self.make_rois()
...@@ -59,16 +59,16 @@ class TestROIAlignOp(OpTest): ...@@ -59,16 +59,16 @@ class TestROIAlignOp(OpTest):
self.pooled_width = 2 self.pooled_width = 2
self.sampling_ratio = -1 self.sampling_ratio = -1
self.x = np.random.random(self.x_dim).astype('float64') self.x = np.random.random(self.x_dim).astype('float32')
def pre_calc(self, x_i, roi_xmin, roi_ymin, roi_bin_grid_h, roi_bin_grid_w, def pre_calc(self, x_i, roi_xmin, roi_ymin, roi_bin_grid_h, roi_bin_grid_w,
bin_size_h, bin_size_w): bin_size_h, bin_size_w):
count = roi_bin_grid_h * roi_bin_grid_w count = roi_bin_grid_h * roi_bin_grid_w
bilinear_pos = np.zeros( bilinear_pos = np.zeros(
[self.channels, self.pooled_height, self.pooled_width, count, 4], [self.channels, self.pooled_height, self.pooled_width, count, 4],
np.float64) np.float32)
bilinear_w = np.zeros( bilinear_w = np.zeros(
[self.pooled_height, self.pooled_width, count, 4], np.float64) [self.pooled_height, self.pooled_width, count, 4], np.float32)
for ph in range(self.pooled_width): for ph in range(self.pooled_width):
for pw in range(self.pooled_height): for pw in range(self.pooled_height):
c = 0 c = 0
...@@ -118,7 +118,7 @@ class TestROIAlignOp(OpTest): ...@@ -118,7 +118,7 @@ class TestROIAlignOp(OpTest):
def calc_roi_align(self): def calc_roi_align(self):
self.out_data = np.zeros( self.out_data = np.zeros(
(self.rois_num, self.channels, self.pooled_height, (self.rois_num, self.channels, self.pooled_height,
self.pooled_width)).astype('float64') self.pooled_width)).astype('float32')
for i in range(self.rois_num): for i in range(self.rois_num):
roi = self.rois[i] roi = self.rois[i]
...@@ -166,7 +166,7 @@ class TestROIAlignOp(OpTest): ...@@ -166,7 +166,7 @@ class TestROIAlignOp(OpTest):
roi = [bno, x1, y1, x2, y2] roi = [bno, x1, y1, x2, y2]
rois.append(roi) rois.append(roi)
self.rois_num = len(rois) self.rois_num = len(rois)
self.rois = np.array(rois).astype("float64") self.rois = np.array(rois).astype("float32")
def setUp(self): def setUp(self):
self.op_type = "roi_align" self.op_type = "roi_align"
...@@ -178,6 +178,12 @@ class TestROIAlignOp(OpTest): ...@@ -178,6 +178,12 @@ class TestROIAlignOp(OpTest):
place = paddle.XPUPlace(0) place = paddle.XPUPlace(0)
self.check_output_with_place(place) self.check_output_with_place(place)
def test_check_grad(self):
if core.is_compiled_with_xpu():
paddle.enable_static()
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, {'X'}, 'Out')
class TestROIAlignInLodOp(TestROIAlignOp): class TestROIAlignInLodOp(TestROIAlignOp):
def set_data(self): def set_data(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册