提交 cf5b5986 编写于 作者: W wanghaox

fix some issues

上级 ef905598
...@@ -17,6 +17,10 @@ limitations under the License. */ ...@@ -17,6 +17,10 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
static constexpr int kROISize = 5;
class ROIPoolOp : public framework::OperatorWithKernel { class ROIPoolOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -38,6 +42,9 @@ class ROIPoolOp : public framework::OperatorWithKernel { ...@@ -38,6 +42,9 @@ class ROIPoolOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(rois_dims.size() == 2, PADDLE_ENFORCE(rois_dims.size() == 2,
"ROIs should be a 2-D tensor of shape (num_rois, 5)" "ROIs should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …]."); "given as [[batch_id, x1, y1, x2, y2], …].");
PADDLE_ENFORCE(rois_dims[1] == kROISize,
"ROIs should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …].");
int pooled_height = ctx->Attrs().Get<int>("pooled_height"); int pooled_height = ctx->Attrs().Get<int>("pooled_height");
int pooled_width = ctx->Attrs().Get<int>("pooled_width"); int pooled_width = ctx->Attrs().Get<int>("pooled_width");
...@@ -150,7 +157,9 @@ REGISTER_OP(roi_pool, ops::ROIPoolOp, ops::ROIPoolOpMaker, ...@@ -150,7 +157,9 @@ REGISTER_OP(roi_pool, ops::ROIPoolOp, ops::ROIPoolOpMaker,
roi_pool_grad, ops::ROIPoolGradOp); roi_pool_grad, ops::ROIPoolGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
roi_pool, roi_pool,
ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, float>); ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, float>,
ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
roi_pool_grad, roi_pool_grad,
ops::CPUROIPoolGradOpKernel<paddle::platform::CPUPlace, float>); ops::CPUROIPoolGradOpKernel<paddle::platform::CPUPlace, float>,
ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, double>);
...@@ -18,6 +18,8 @@ limitations under the License. */ ...@@ -18,6 +18,8 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
static constexpr int kNumCUDAThreads = 512; static constexpr int kNumCUDAThreads = 512;
static constexpr int kNumMaxinumNumBlocks = 4096; static constexpr int kNumMaxinumNumBlocks = 4096;
static constexpr int kROISize = 5; static constexpr int kROISize = 5;
...@@ -25,7 +27,7 @@ static constexpr int kROISize = 5; ...@@ -25,7 +27,7 @@ static constexpr int kROISize = 5;
static inline int NumBlocks(const int N) { static inline int NumBlocks(const int N) {
return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
kNumMaxinumNumBlocks); kNumMaxinumNumBlocks);
} }
template <typename T> template <typename T>
__global__ void GPUROIPoolForward( __global__ void GPUROIPoolForward(
...@@ -64,7 +66,7 @@ static inline int NumBlocks(const int N) { ...@@ -64,7 +66,7 @@ static inline int NumBlocks(const int N) {
wend = min(max(wend + roi_start_w, 0), width); wend = min(max(wend + roi_start_w, 0), width);
bool is_empty = (hend <= hstart) || (wend <= wstart); bool is_empty = (hend <= hstart) || (wend <= wstart);
T maxval = is_empty ? 0 : -std::numeric_limits<float>::max(); T maxval = is_empty ? 0 : -std::numeric_limits<T>::max();
int maxidx = -1; int maxidx = -1;
const T* offset_input_data = const T* offset_input_data =
input_data + (roi_batch_ind * channels + c) * height * width; input_data + (roi_batch_ind * channels + c) * height * width;
...@@ -143,14 +145,6 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> { ...@@ -143,14 +145,6 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
int width = in_dims[3]; int width = in_dims[3];
size_t rois_num = rois->dims()[0]; size_t rois_num = rois->dims()[0];
out->mutable_data<T>(ctx.GetPlace());
math::SetConstant<Place, T> set_zero;
set_zero(ctx.device_context(), out, static_cast<T>(0));
argmax->mutable_data<int64_t>(ctx.GetPlace());
math::SetConstant<Place, int64_t> set_init;
set_init(ctx.device_context(), argmax, static_cast<int64_t>(-1));
if (rois_num== 0) return; if (rois_num== 0) return;
int output_size = out->numel(); int output_size = out->numel();
...@@ -230,7 +224,9 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel<T> { ...@@ -230,7 +224,9 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
roi_pool, roi_pool,
ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, float>); ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, float>,
ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
roi_pool_grad, roi_pool_grad,
ops::GPUROIPoolGradOpKernel<paddle::platform::GPUPlace, float>); ops::GPUROIPoolGradOpKernel<paddle::platform::GPUPlace, float>,
ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, double>);
...@@ -15,23 +15,18 @@ limitations under the License. */ ...@@ -15,23 +15,18 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include "paddle/operators/strided_memcpy.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using LoD = framework::LoD;
template <typename Place, typename T> template <typename Place, typename T>
class CPUROIPoolOpKernel : public framework::OpKernel<T> { class CPUROIPoolOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<Tensor>("X"); auto* in = ctx.Input<framework::Tensor>("X");
auto* rois = ctx.Input<Tensor>("ROIs"); auto* rois = ctx.Input<framework::Tensor>("ROIs");
auto* out = ctx.Output<Tensor>("Out"); auto* out = ctx.Output<framework::Tensor>("Out");
auto* argmax = ctx.Output<Tensor>("Argmax"); auto* argmax = ctx.Output<framework::Tensor>("Argmax");
auto pooled_height = ctx.Attr<int>("pooled_height"); auto pooled_height = ctx.Attr<int>("pooled_height");
auto pooled_width = ctx.Attr<int>("pooled_width"); auto pooled_width = ctx.Attr<int>("pooled_width");
...@@ -54,11 +49,6 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> { ...@@ -54,11 +49,6 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
T* output_data = out->mutable_data<T>(ctx.GetPlace()); T* output_data = out->mutable_data<T>(ctx.GetPlace());
int64_t* argmax_data = argmax->mutable_data<int64_t>(ctx.GetPlace()); int64_t* argmax_data = argmax->mutable_data<int64_t>(ctx.GetPlace());
math::SetConstant<Place, T> set_zero;
set_zero(ctx.device_context(), out, static_cast<T>(0));
math::SetConstant<Place, int64_t> set_init;
set_init(ctx.device_context(), argmax, static_cast<int64_t>(-1));
for (int n = 0; n < rois_num; ++n) { for (int n = 0; n < rois_num; ++n) {
int roi_batch_id = rois_data[0]; int roi_batch_id = rois_data[0];
PADDLE_ENFORCE_GE(roi_batch_id, 0); PADDLE_ENFORCE_GE(roi_batch_id, 0);
...@@ -83,7 +73,7 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> { ...@@ -83,7 +73,7 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
const float bin_size_w = const float bin_size_w =
static_cast<float>(roi_width) / static_cast<float>(pooled_width); static_cast<float>(roi_width) / static_cast<float>(pooled_width);
const float* batch_data = input_data + roi_batch_id * in_stride[0]; const T* batch_data = input_data + roi_batch_id * in_stride[0];
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
for (int ph = 0; ph < pooled_height; ++ph) { for (int ph = 0; ph < pooled_height; ++ph) {
...@@ -110,7 +100,8 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> { ...@@ -110,7 +100,8 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
// Define an empty pooling region to be zero // Define an empty pooling region to be zero
bool is_empty = (hend <= hstart) || (wend <= wstart); bool is_empty = (hend <= hstart) || (wend <= wstart);
output_data[pool_index] = output_data[pool_index] =
is_empty ? 0 : -std::numeric_limits<float>::max(); is_empty ? 0 : -std::numeric_limits<T>::max();
argmax_data[pool_index] = -1;
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
...@@ -139,14 +130,14 @@ template <typename Place, typename T> ...@@ -139,14 +130,14 @@ template <typename Place, typename T>
class CPUROIPoolGradOpKernel : public framework::OpKernel<T> { class CPUROIPoolGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<Tensor>("X"); auto* in = ctx.Input<framework::Tensor>("X");
auto* rois = ctx.Input<Tensor>("ROIs"); auto* rois = ctx.Input<framework::Tensor>("ROIs");
auto* argmax = ctx.Input<Tensor>("Argmax"); auto* argmax = ctx.Input<framework::Tensor>("Argmax");
auto* out_grad = auto* out_grad =
ctx.Input<Tensor>(framework::GradVarName("Out")); ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* x_grad = auto* x_grad =
ctx.Output<Tensor>(framework::GradVarName("X")); ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto pooled_height = ctx.Attr<int>("pooled_height"); auto pooled_height = ctx.Attr<int>("pooled_height");
auto pooled_width = ctx.Attr<int>("pooled_width"); auto pooled_width = ctx.Attr<int>("pooled_width");
......
...@@ -77,7 +77,12 @@ class TestROIPoolOp(OpTest): ...@@ -77,7 +77,12 @@ class TestROIPoolOp(OpTest):
wstart = min(max(wstart + roi_start_w, 0), self.width) wstart = min(max(wstart + roi_start_w, 0), self.width)
wend = min(max(wend + roi_start_w, 0), self.width) wend = min(max(wend + roi_start_w, 0), self.width)
is_empty = (hend <= hstart) or (wend <= wstart)
if is_empty:
out_data[i, c, ph, pw] = 0 out_data[i, c, ph, pw] = 0
else:
out_data[i, c, ph, pw] = -sys.float_info.max
argmax_data[i, c, ph, pw] = -1 argmax_data[i, c, ph, pw] = -1
for h in range(hstart, hend): for h in range(hstart, hend):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册