提交 cf5b5986 编写于 作者: W wanghaox

fix some issues

上级 ef905598
......@@ -17,6 +17,10 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
static constexpr int kROISize = 5;
class ROIPoolOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -38,6 +42,9 @@ class ROIPoolOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(rois_dims.size() == 2,
"ROIs should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …].");
PADDLE_ENFORCE(rois_dims[1] == kROISize,
"ROIs should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …].");
int pooled_height = ctx->Attrs().Get<int>("pooled_height");
int pooled_width = ctx->Attrs().Get<int>("pooled_width");
......@@ -150,7 +157,9 @@ REGISTER_OP(roi_pool, ops::ROIPoolOp, ops::ROIPoolOpMaker,
roi_pool_grad, ops::ROIPoolGradOp);
REGISTER_OP_CPU_KERNEL(
roi_pool,
ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, float>);
ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, float>,
ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(
roi_pool_grad,
ops::CPUROIPoolGradOpKernel<paddle::platform::CPUPlace, float>);
ops::CPUROIPoolGradOpKernel<paddle::platform::CPUPlace, float>,
ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, double>);
......@@ -18,6 +18,8 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
static constexpr int kNumCUDAThreads = 512;
static constexpr int kNumMaxinumNumBlocks = 4096;
static constexpr int kROISize = 5;
......@@ -25,7 +27,7 @@ static constexpr int kROISize = 5;
static inline int NumBlocks(const int N) {
return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
kNumMaxinumNumBlocks);
}
}
template <typename T>
__global__ void GPUROIPoolForward(
......@@ -64,7 +66,7 @@ static inline int NumBlocks(const int N) {
wend = min(max(wend + roi_start_w, 0), width);
bool is_empty = (hend <= hstart) || (wend <= wstart);
T maxval = is_empty ? 0 : -std::numeric_limits<float>::max();
T maxval = is_empty ? 0 : -std::numeric_limits<T>::max();
int maxidx = -1;
const T* offset_input_data =
input_data + (roi_batch_ind * channels + c) * height * width;
......@@ -143,14 +145,6 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
int width = in_dims[3];
size_t rois_num = rois->dims()[0];
out->mutable_data<T>(ctx.GetPlace());
math::SetConstant<Place, T> set_zero;
set_zero(ctx.device_context(), out, static_cast<T>(0));
argmax->mutable_data<int64_t>(ctx.GetPlace());
math::SetConstant<Place, int64_t> set_init;
set_init(ctx.device_context(), argmax, static_cast<int64_t>(-1));
if (rois_num== 0) return;
int output_size = out->numel();
......@@ -230,7 +224,9 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
roi_pool,
ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, float>);
ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, float>,
ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(
roi_pool_grad,
ops::GPUROIPoolGradOpKernel<paddle::platform::GPUPlace, float>);
ops::GPUROIPoolGradOpKernel<paddle::platform::GPUPlace, float>,
ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, double>);
......@@ -15,23 +15,18 @@ limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/strided_memcpy.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using LoD = framework::LoD;
template <typename Place, typename T>
class CPUROIPoolOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<Tensor>("X");
auto* rois = ctx.Input<Tensor>("ROIs");
auto* out = ctx.Output<Tensor>("Out");
auto* argmax = ctx.Output<Tensor>("Argmax");
auto* in = ctx.Input<framework::Tensor>("X");
auto* rois = ctx.Input<framework::Tensor>("ROIs");
auto* out = ctx.Output<framework::Tensor>("Out");
auto* argmax = ctx.Output<framework::Tensor>("Argmax");
auto pooled_height = ctx.Attr<int>("pooled_height");
auto pooled_width = ctx.Attr<int>("pooled_width");
......@@ -54,11 +49,6 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
T* output_data = out->mutable_data<T>(ctx.GetPlace());
int64_t* argmax_data = argmax->mutable_data<int64_t>(ctx.GetPlace());
math::SetConstant<Place, T> set_zero;
set_zero(ctx.device_context(), out, static_cast<T>(0));
math::SetConstant<Place, int64_t> set_init;
set_init(ctx.device_context(), argmax, static_cast<int64_t>(-1));
for (int n = 0; n < rois_num; ++n) {
int roi_batch_id = rois_data[0];
PADDLE_ENFORCE_GE(roi_batch_id, 0);
......@@ -83,7 +73,7 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
const float bin_size_w =
static_cast<float>(roi_width) / static_cast<float>(pooled_width);
const float* batch_data = input_data + roi_batch_id * in_stride[0];
const T* batch_data = input_data + roi_batch_id * in_stride[0];
for (int c = 0; c < channels; ++c) {
for (int ph = 0; ph < pooled_height; ++ph) {
......@@ -110,7 +100,8 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
// Define an empty pooling region to be zero
bool is_empty = (hend <= hstart) || (wend <= wstart);
output_data[pool_index] =
is_empty ? 0 : -std::numeric_limits<float>::max();
is_empty ? 0 : -std::numeric_limits<T>::max();
argmax_data[pool_index] = -1;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
......@@ -139,14 +130,14 @@ template <typename Place, typename T>
class CPUROIPoolGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<Tensor>("X");
auto* rois = ctx.Input<Tensor>("ROIs");
auto* argmax = ctx.Input<Tensor>("Argmax");
auto* in = ctx.Input<framework::Tensor>("X");
auto* rois = ctx.Input<framework::Tensor>("ROIs");
auto* argmax = ctx.Input<framework::Tensor>("Argmax");
auto* out_grad =
ctx.Input<Tensor>(framework::GradVarName("Out"));
ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* x_grad =
ctx.Output<Tensor>(framework::GradVarName("X"));
ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto pooled_height = ctx.Attr<int>("pooled_height");
auto pooled_width = ctx.Attr<int>("pooled_width");
......
......@@ -77,7 +77,12 @@ class TestROIPoolOp(OpTest):
wstart = min(max(wstart + roi_start_w, 0), self.width)
wend = min(max(wend + roi_start_w, 0), self.width)
out_data[i, c, ph, pw] = 0
is_empty = (hend <= hstart) or (wend <= wstart)
if is_empty:
out_data[i, c, ph, pw] = 0
else:
out_data[i, c, ph, pw] = -sys.float_info.max
argmax_data[i, c, ph, pw] = -1
for h in range(hstart, hend):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册