提交 ef905598 编写于 作者: W wanghaox 提交者: wanghaox

fix some code issues

#include <paddle/capi.h>
#include <time.h>
#include "../common/common.h"
#define CONFIG_BIN "./trainer_config.bin"
......@@ -27,20 +28,19 @@ int main() {
CHECK(paddle_arguments_resize(in_args, 1));
// Create input matrix.
paddle_matrix mat = paddle_matrix_create(/* sample_num */ 10,
paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1,
/* size */ 784,
/* useGPU */ false);
srand(time(0));
std::vector<paddle_real> input;
input.resize(784 * 10);
paddle_real* array;
for (int i = 0; i < input.size(); ++i) {
input[i] = rand() / ((float)RAND_MAX);
}
// Get First row.
CHECK(paddle_matrix_get_row(mat, 0, &array));
// Set value for the input matrix
CHECK(paddle_matrix_set_value(mat, input.data()));
for (int i = 0; i < 784; ++i) {
array[i] = rand() / ((float)RAND_MAX);
}
CHECK(paddle_arguments_set_value(in_args, 0, mat));
......@@ -53,17 +53,18 @@ int main() {
CHECK(paddle_arguments_get_value(out_args, 0, prob));
std::std::vector<paddle_real> result;
int height;
int width;
uint64_t height;
uint64_t width;
CHECK(paddle_matrix_get_shape(prob, &height, &width);
result.resize(height * width);
CHECK(paddle_matrix_get_value(prob, result.data()));
CHECK(paddle_matrix_get_shape(prob, &height, &width));
CHECK(paddle_matrix_get_row(prob, 0, &array));
printf("Prob: ");
printf("Prob: \n");
for (int i = 0; i < height * width; ++i) {
printf("%.2f ", result[i]);
printf("%.4f ", array[i]);
if ((i + 1) % width == 0) {
printf("\n");
}
}
printf("\n");
......
......@@ -17,24 +17,47 @@ limitations under the License. */
namespace paddle {
namespace operators {
class RoiPoolOp : public framework::OperatorWithKernel {
class ROIPoolOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of RoiPoolOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Rois"),
"Input(Rois) of RoiPoolOp should not be null.");
"Input(X) of ROIPoolOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("ROIs"),
"Input(ROIs) of ROIPoolOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of RoiPoolOp should not be null.");
"Output(Out) of ROIPoolOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Argmax"),
"Output(Argmax) of RoiPoolOp should not be null.");
"Output(Argmax) of ROIPoolOp should not be null.");
auto input_dims = ctx->GetInputDim("X");
auto rois_dims = ctx->GetInputDim("ROIs");
// Initialize the output's dims to maximum,
// and re-set to real dims by the value of Rois at kernel
ctx->SetOutputDim("Out", input_dims);
PADDLE_ENFORCE(input_dims.size() == 4,
"The format of input tensor is NCHW.");
PADDLE_ENFORCE(rois_dims.size() == 2,
"ROIs should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …].");
int pooled_height = ctx->Attrs().Get<int>("pooled_height");
int pooled_width = ctx->Attrs().Get<int>("pooled_width");
float spatial_scale = ctx->Attrs().Get<float>("spatial_scale");
PADDLE_ENFORCE_GT(pooled_height, 0,
"The pooled output height must greater than 0");
PADDLE_ENFORCE_GT(pooled_width, 0,
"The pooled output width must greater than 0");
PADDLE_ENFORCE_GT(spatial_scale, 0.0f,
"The spatial scale must greater than 0");
auto out_dims = input_dims;
out_dims[0] = rois_dims[0];
out_dims[1] = input_dims[1];
out_dims[2] = pooled_height;
out_dims[3] = pooled_width;
ctx->SetOutputDim("Out", out_dims);
ctx->SetOutputDim("Argmax", out_dims);
}
protected:
......@@ -46,7 +69,7 @@ class RoiPoolOp : public framework::OperatorWithKernel {
}
};
class RoiPoolGradOp : public framework::OperatorWithKernel {
class ROIPoolGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -67,22 +90,29 @@ class RoiPoolGradOp : public framework::OperatorWithKernel {
}
};
class RoiPoolOpMaker : public framework::OpProtoAndCheckerMaker {
class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
public:
RoiPoolOpMaker(framework::OpProto* proto,
ROIPoolOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(Tensor), "
"the input of RoiPoolOp.");
AddInput("Rois",
"the input of ROIPoolOp. "
"The format of input tensor is NCHW. Where N is batch size, "
"C is the number of input channels, "
"H is the height of the feature, and "
"W is the width of the feature.");
AddInput("ROIs",
"(Tensor), "
"RoIs (Regions of Interest) to pool over. "
"Should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …].");
"ROIs (Regions of Interest) to pool over. "
"should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …]. "
"Where batch_id is the id of the data, "
"(x1, y1) is the top left coordinates, and "
"(x2, y2) is the bottom right coordinates.");
AddOutput("Out",
"(Tensor), "
"RoI pooled output 4-D tensor of shape "
"The output of ROIPoolOp is a 4-D tensor with shape "
"(num_rois, channels, pooled_h, pooled_w).");
AddOutput("Argmax",
"(Tensor), "
......@@ -104,7 +134,7 @@ class RoiPoolOpMaker : public framework::OpProtoAndCheckerMaker {
"The pooled output width.")
.SetDefault(1);
AddComment(R"DOC(
RoiPool operator
ROIPool operator
ROI Pooling for Faster-RCNN. The link below is a further introduction:
https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
......@@ -116,11 +146,11 @@ https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(roi_pool, ops::RoiPoolOp, ops::RoiPoolOpMaker,
roi_pool_grad, ops::RoiPoolGradOp);
REGISTER_OP(roi_pool, ops::ROIPoolOp, ops::ROIPoolOpMaker,
roi_pool_grad, ops::ROIPoolGradOp);
REGISTER_OP_CPU_KERNEL(
roi_pool,
ops::CPURoiPoolOpKernel<paddle::platform::CPUPlace, float>);
ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
roi_pool_grad,
ops::CPURoiPoolGradOpKernel<paddle::platform::CPUPlace, float>);
ops::CPUROIPoolGradOpKernel<paddle::platform::CPUPlace, float>);
......@@ -12,36 +12,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/cuda_helper.h"
#include "paddle/operators/roi_pool_op.h"
#include "paddle/platform/cuda_helper.h"
namespace paddle {
namespace operators {
#define FLT_MAX __FLT_MAX__
static constexpr int kNumCUDAThreads = 512;
static constexpr int kNumMaxinumNumBlocks = 4096;
static constexpr int kROISize = 5;
constexpr int PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS = 512;
constexpr int PADDLE_OPERATORS_ROIPOOL_MAXIMUM_NUM_BLOCKS = 4096;
inline int PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS(const int N) {
return std::min((N + PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS - 1)
/ PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS,
PADDLE_OPERATORS_ROIPOOL_MAXIMUM_NUM_BLOCKS);
}
static inline int NumBlocks(const int N) {
return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
kNumMaxinumNumBlocks);
}
template <typename T>
__global__ void GPURoiPoolForward(
const int nthreads,
const T* input_data,
const int64_t* input_rois,
const float spatial_scale,
const int channels,
const int height,
const int width,
const int pooled_height,
const int pooled_width,
T* output_data,
int64_t* argmax_data) {
template <typename T>
__global__ void GPUROIPoolForward(
const int nthreads, const T* input_data, const int64_t* input_rois,
const float spatial_scale, const int channels, const int height,
const int width, const int pooled_height, const int pooled_width,
T* output_data, int64_t* argmax_data) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
int offset = blockDim.x * gridDim.x;
for (size_t i = index; i < nthreads; i += offset) {
......@@ -50,7 +41,7 @@ __global__ void GPURoiPoolForward(
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels;
const int64_t* offset_input_rois = input_rois + n * 5;
const int64_t* offset_input_rois = input_rois + n * kROISize;
int roi_batch_ind = offset_input_rois[0];
int roi_start_w = round(offset_input_rois[1] * spatial_scale);
int roi_start_h = round(offset_input_rois[2] * spatial_scale);
......@@ -59,10 +50,8 @@ __global__ void GPURoiPoolForward(
int roi_width = max(roi_end_w - roi_start_w + 1, 1);
int roi_height = max(roi_end_h - roi_start_h + 1, 1);
T bin_size_h = static_cast<T>(roi_height)
/ static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width)
/ static_cast<T>(pooled_width);
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
int hstart = static_cast<int>(floor(static_cast<T>(ph) * bin_size_h));
int wstart = static_cast<int>(floor(static_cast<T>(pw) * bin_size_w));
......@@ -75,7 +64,7 @@ __global__ void GPURoiPoolForward(
wend = min(max(wend + roi_start_w, 0), width);
bool is_empty = (hend <= hstart) || (wend <= wstart);
T maxval = is_empty ? 0 : -FLT_MAX;
T maxval = is_empty ? 0 : -std::numeric_limits<float>::max();
int maxidx = -1;
const T* offset_input_data =
input_data + (roi_batch_ind * channels + c) * height * width;
......@@ -96,7 +85,7 @@ __global__ void GPURoiPoolForward(
}
template <typename T>
__global__ void GPURoiPoolBackward(
__global__ void GPUROIPoolBackward(
const int nthreads,
const int64_t* input_rois,
const T* output_grad,
......@@ -117,7 +106,7 @@ __global__ void GPURoiPoolBackward(
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels;
const int64_t* offset_input_rois = input_rois + n * 5;
const int64_t* offset_input_rois = input_rois + n * kROISize;
int roi_batch_ind = offset_input_rois[0];
int input_offset = (roi_batch_ind * channels + c) * height * width;
int output_offset = (n * channels + c) * pooled_height * pooled_width;
......@@ -135,11 +124,11 @@ __global__ void GPURoiPoolBackward(
template <typename Place, typename T>
class GPURoiPoolOpKernel : public framework::OpKernel<T> {
class GPUROIPoolOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<Tensor>("X");
auto* rois = ctx.Input<Tensor>("Rois");
auto* rois = ctx.Input<Tensor>("ROIs");
auto* out = ctx.Output<Tensor>("Out");
auto* argmax = ctx.Output<Tensor>("Argmax");
......@@ -147,31 +136,17 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
auto pooled_width = ctx.Attr<int>("pooled_width");
auto spatial_scale = ctx.Attr<float>("spatial_scale");
PADDLE_ENFORCE_GT(pooled_height, 0,
"The pooled output height must greater than 0");
PADDLE_ENFORCE_GT(pooled_width, 0,
"The pooled output width must greater than 0");
PADDLE_ENFORCE_GT(spatial_scale, 0,
"The spatial scale must greater than 0");
auto in_dims = in->dims();
auto in_stride = framework::stride(in_dims);
int channels = in_dims[1];
int height = in_dims[2];
int width = in_dims[3];
int rois_num = rois->dims()[0];
auto out_dims = in_dims;
out_dims[0] = rois_num;
out_dims[1] = in_dims[1];
out_dims[2] = pooled_height;
out_dims[3] = pooled_width;
size_t rois_num = rois->dims()[0];
out->Resize(out_dims);
out->mutable_data<T>(ctx.GetPlace());
math::SetConstant<Place, T> set_zero;
set_zero(ctx.device_context(), out, static_cast<T>(0));
argmax->Resize(out->dims());
argmax->mutable_data<int64_t>(ctx.GetPlace());
math::SetConstant<Place, int64_t> set_init;
set_init(ctx.device_context(), argmax, static_cast<int64_t>(-1));
......@@ -179,10 +154,10 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
if (rois_num== 0) return;
int output_size = out->numel();
int blocks = PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS(output_size);
int threads = PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS;
int blocks = NumBlocks(output_size);
int threads = kNumCUDAThreads;
GPURoiPoolForward<T>
GPUROIPoolForward<T>
<<<blocks, threads, 0, ctx.cuda_device_context().stream()>>>(
output_size,
in->data<T>(),
......@@ -195,17 +170,15 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
pooled_width,
out->mutable_data<T>(ctx.GetPlace()),
argmax->mutable_data<int64_t>(ctx.GetPlace()));
return;
}
};
template <typename Place, typename T>
class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
class GPUROIPoolGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<Tensor>("X");
auto* rois = ctx.Input<Tensor>("Rois");
auto* rois = ctx.Input<Tensor>("ROIs");
auto* argmax = ctx.Input<Tensor>("Argmax");
auto* out_grad =
......@@ -217,23 +190,22 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
auto pooled_width = ctx.Attr<int>("pooled_width");
auto spatial_scale = ctx.Attr<float>("spatial_scale");
int rois_num = rois->dims()[0];
size_t rois_num = rois->dims()[0];
int channels = in->dims()[1];
int height = in->dims()[2];
int width = in->dims()[3];
if (x_grad) {
x_grad->Resize(in->dims());
x_grad->mutable_data<T>(ctx.GetPlace());
math::SetConstant<Place, T> set_zero;
set_zero(ctx.device_context(), x_grad, static_cast<T>(0));
int output_grad_size = out_grad->numel();
int blocks = PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS(output_grad_size);
int threads = PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS;
int blocks = NumBlocks(output_grad_size);
int threads = kNumCUDAThreads;
if (output_grad_size > 0) {
GPURoiPoolBackward<T>
GPUROIPoolBackward<T>
<<<blocks, threads, 0, ctx.cuda_device_context().stream()>>>(
output_grad_size,
rois->data<int64_t>(),
......@@ -248,7 +220,6 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
pooled_width,
x_grad->mutable_data<T>(ctx.GetPlace()));
}
return;
}
}
};
......@@ -259,7 +230,7 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
roi_pool,
ops::GPURoiPoolOpKernel<paddle::platform::GPUPlace, float>);
ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
roi_pool_grad,
ops::GPURoiPoolGradOpKernel<paddle::platform::GPUPlace, float>);
ops::GPUROIPoolGradOpKernel<paddle::platform::GPUPlace, float>);
......@@ -25,11 +25,11 @@ using LoDTensor = framework::LoDTensor;
using LoD = framework::LoD;
template <typename Place, typename T>
class CPURoiPoolOpKernel : public framework::OpKernel<T> {
class CPUROIPoolOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<Tensor>("X");
auto* rois = ctx.Input<Tensor>("Rois");
auto* rois = ctx.Input<Tensor>("ROIs");
auto* out = ctx.Output<Tensor>("Out");
auto* argmax = ctx.Output<Tensor>("Argmax");
......@@ -37,13 +37,6 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
auto pooled_width = ctx.Attr<int>("pooled_width");
auto spatial_scale = ctx.Attr<float>("spatial_scale");
PADDLE_ENFORCE_GT(pooled_height, 0,
"The pooled output height must greater than 0");
PADDLE_ENFORCE_GT(pooled_width, 0,
"The pooled output width must greater than 0");
PADDLE_ENFORCE_GT(spatial_scale, 0,
"The spatial scale must greater than 0");
auto in_dims = in->dims();
int batch_size = in_dims[0];
int channels = in_dims[1];
......@@ -51,18 +44,10 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
int width = in_dims[3];
int rois_num = rois->dims()[0];
auto out_dims = in_dims;
out_dims[0] = rois_num;
out_dims[1] = channels;
out_dims[2] = pooled_height;
out_dims[3] = pooled_width;
out->Resize(out_dims);
argmax->Resize(out->dims());
auto in_stride = framework::stride(in_dims);
auto argmax_stride = framework::stride(argmax->dims());
auto roi_stride = framework::stride(rois->dims());
auto out_stride = framework::stride(out_dims);
auto out_stride = framework::stride(out->dims());
const T* input_data = in->data<T>();
const int64_t* rois_data = rois->data<int64_t>();
......@@ -124,7 +109,8 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
// Define an empty pooling region to be zero
bool is_empty = (hend <= hstart) || (wend <= wstart);
output_data[pool_index] = is_empty ? 0 : -__FLT_MAX__;
output_data[pool_index] =
is_empty ? 0 : -std::numeric_limits<float>::max();
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
......@@ -150,11 +136,11 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
};
template <typename Place, typename T>
class CPURoiPoolGradOpKernel : public framework::OpKernel<T> {
class CPUROIPoolGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<Tensor>("X");
auto* rois = ctx.Input<Tensor>("Rois");
auto* rois = ctx.Input<Tensor>("ROIs");
auto* argmax = ctx.Input<Tensor>("Argmax");
auto* out_grad =
......@@ -188,9 +174,9 @@ class CPURoiPoolGradOpKernel : public framework::OpKernel<T> {
for (size_t n = 0; n < rois_num; ++n) {
size_t roi_batch_idx = rois_data[0];
T* batch_grad_data = x_grad_data + batch_offset * roi_batch_idx;
for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < pooled_height; ++ph) {
for (size_t pw = 0; pw < pooled_width; ++pw) {
for (int c = 0; c < channels; ++c) {
for (int ph = 0; ph < pooled_height; ++ph) {
for (int pw = 0; pw < pooled_width; ++pw) {
size_t pool_index = ph * pooled_width + pw;
if (argmax_data[pool_index] >= 0) {
......
......@@ -4,8 +4,7 @@ import math
import sys
from op_test import OpTest
class TestSequenceSliceOp(OpTest):
class TestROIPoolOp(OpTest):
def set_data(self):
self.init_test_case()
self.make_rois()
......@@ -13,7 +12,7 @@ class TestSequenceSliceOp(OpTest):
self.inputs = {
'X': self.x,
'Rois': self.rois}
'ROIs': self.rois}
self.attrs = {
'spatial_scale': self.spatial_scale,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册