Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
ef905598
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ef905598
编写于
11月 22, 2017
作者:
W
wanghaox
提交者:
wanghaox
11月 24, 2017
浏览文件
操作
浏览文件
下载
差异文件
fix some code issues
上级
36dd770a
6ab78aee
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
170 addition
and
183 deletion
+170
-183
paddle/capi/examples/model_inference/dense/main.c
paddle/capi/examples/model_inference/dense/main.c
+17
-16
paddle/operators/roi_pool_op.cc
paddle/operators/roi_pool_op.cc
+66
-36
paddle/operators/roi_pool_op.cu
paddle/operators/roi_pool_op.cu
+75
-104
paddle/operators/roi_pool_op.h
paddle/operators/roi_pool_op.h
+10
-24
python/paddle/v2/fluid/tests/test_roi_pool_op.py
python/paddle/v2/fluid/tests/test_roi_pool_op.py
+2
-3
未找到文件。
paddle/capi/examples/model_inference/dense/main.c
浏览文件 @
ef905598
#include <paddle/capi.h>
#include <time.h>
#include "../common/common.h"
#define CONFIG_BIN "./trainer_config.bin"
...
...
@@ -27,20 +28,19 @@ int main() {
CHECK
(
paddle_arguments_resize
(
in_args
,
1
));
// Create input matrix.
paddle_matrix
mat
=
paddle_matrix_create
(
/* sample_num */
1
0
,
paddle_matrix
mat
=
paddle_matrix_create
(
/* sample_num */
1
,
/* size */
784
,
/* useGPU */
false
);
srand
(
time
(
0
));
std
::
vector
<
paddle_real
>
input
;
input
.
resize
(
784
*
10
);
paddle_real
*
array
;
// Get First row.
CHECK
(
paddle_matrix_get_row
(
mat
,
0
,
&
array
));
for
(
int
i
=
0
;
i
<
input
.
size
()
;
++
i
)
{
input
[
i
]
=
rand
()
/
((
float
)
RAND_MAX
);
for
(
int
i
=
0
;
i
<
784
;
++
i
)
{
array
[
i
]
=
rand
()
/
((
float
)
RAND_MAX
);
}
// Set value for the input matrix
CHECK
(
paddle_matrix_set_value
(
mat
,
input
.
data
()));
CHECK
(
paddle_arguments_set_value
(
in_args
,
0
,
mat
));
...
...
@@ -53,17 +53,18 @@ int main() {
CHECK
(
paddle_arguments_get_value
(
out_args
,
0
,
prob
));
std
::
std
::
vector
<
paddle_real
>
result
;
int
height
;
int
width
;
uint64_t
height
;
uint64_t
width
;
CHECK
(
paddle_matrix_get_shape
(
prob
,
&
height
,
&
width
);
result
.
resize
(
height
*
width
);
CHECK
(
paddle_matrix_get_value
(
prob
,
result
.
data
()));
CHECK
(
paddle_matrix_get_shape
(
prob
,
&
height
,
&
width
));
CHECK
(
paddle_matrix_get_row
(
prob
,
0
,
&
array
));
printf
(
"Prob: "
);
printf
(
"Prob:
\n
"
);
for
(
int
i
=
0
;
i
<
height
*
width
;
++
i
)
{
printf
(
"%.2f "
,
result
[
i
]);
printf
(
"%.4f "
,
array
[
i
]);
if
((
i
+
1
)
%
width
==
0
)
{
printf
(
"
\n
"
);
}
}
printf
(
"
\n
"
);
...
...
paddle/operators/roi_pool_op.cc
浏览文件 @
ef905598
...
...
@@ -17,24 +17,47 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
class
R
oi
PoolOp
:
public
framework
::
OperatorWithKernel
{
class
R
OI
PoolOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of R
oi
PoolOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"R
oi
s"
),
"Input(R
ois) of Roi
PoolOp should not be null."
);
"Input(X) of R
OI
PoolOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"R
OI
s"
),
"Input(R
OIs) of ROI
PoolOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of R
oi
PoolOp should not be null."
);
"Output(Out) of R
OI
PoolOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Argmax"
),
"Output(Argmax) of R
oi
PoolOp should not be null."
);
"Output(Argmax) of R
OI
PoolOp should not be null."
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
// Initialize the output's dims to maximum,
// and re-set to real dims by the value of Rois at kernel
ctx
->
SetOutputDim
(
"Out"
,
input_dims
);
auto
rois_dims
=
ctx
->
GetInputDim
(
"ROIs"
);
PADDLE_ENFORCE
(
input_dims
.
size
()
==
4
,
"The format of input tensor is NCHW."
);
PADDLE_ENFORCE
(
rois_dims
.
size
()
==
2
,
"ROIs should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …]."
);
int
pooled_height
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_height"
);
int
pooled_width
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_width"
);
float
spatial_scale
=
ctx
->
Attrs
().
Get
<
float
>
(
"spatial_scale"
);
PADDLE_ENFORCE_GT
(
pooled_height
,
0
,
"The pooled output height must greater than 0"
);
PADDLE_ENFORCE_GT
(
pooled_width
,
0
,
"The pooled output width must greater than 0"
);
PADDLE_ENFORCE_GT
(
spatial_scale
,
0.0
f
,
"The spatial scale must greater than 0"
);
auto
out_dims
=
input_dims
;
out_dims
[
0
]
=
rois_dims
[
0
];
out_dims
[
1
]
=
input_dims
[
1
];
out_dims
[
2
]
=
pooled_height
;
out_dims
[
3
]
=
pooled_width
;
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
ctx
->
SetOutputDim
(
"Argmax"
,
out_dims
);
}
protected:
...
...
@@ -46,7 +69,7 @@ class RoiPoolOp : public framework::OperatorWithKernel {
}
};
class
R
oi
PoolGradOp
:
public
framework
::
OperatorWithKernel
{
class
R
OI
PoolGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
...
@@ -67,44 +90,51 @@ class RoiPoolGradOp : public framework::OperatorWithKernel {
}
};
class
R
oi
PoolOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
R
OI
PoolOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
R
oi
PoolOpMaker
(
framework
::
OpProto
*
proto
,
R
OI
PoolOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(Tensor), "
"the input of RoiPoolOp."
);
AddInput
(
"Rois"
,
"the input of ROIPoolOp. "
"The format of input tensor is NCHW. Where N is batch size, "
"C is the number of input channels, "
"H is the height of the feature, and "
"W is the width of the feature."
);
AddInput
(
"ROIs"
,
"(Tensor), "
"RoIs (Regions of Interest) to pool over. "
"Should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …]."
);
"ROIs (Regions of Interest) to pool over. "
"should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …]. "
"Where batch_id is the id of the data, "
"(x1, y1) is the top left coordinates, and "
"(x2, y2) is the bottom right coordinates."
);
AddOutput
(
"Out"
,
"(Tensor), "
"RoI pooled output 4-D tensor of
shape "
"(num_rois, channels, pooled_h, pooled_w)."
);
"The output of ROIPoolOp is a 4-D tensor with
shape "
"(num_rois, channels, pooled_h, pooled_w)."
);
AddOutput
(
"Argmax"
,
"(Tensor), "
"Argmaxes corresponding to indices in X used "
"for gradient computation. Only output "
"if arg “is_test” is false."
).
AsIntermediate
();
AddAttr
<
float
>
(
"spatial_scale"
,
"(float, default 1.0), "
"Multiplicative spatial scale factor "
"to translate ROI coords from their input scale "
"to the scale used when pooling."
)
.
SetDefault
(
1.0
);
"(float, default 1.0), "
"Multiplicative spatial scale factor "
"to translate ROI coords from their input scale "
"to the scale used when pooling."
)
.
SetDefault
(
1.0
);
AddAttr
<
int
>
(
"pooled_height"
,
"(int, default 1), "
"The pooled output height."
)
.
SetDefault
(
1
);
"(int, default 1), "
"The pooled output height."
)
.
SetDefault
(
1
);
AddAttr
<
int
>
(
"pooled_width"
,
"(int, default 1), "
"The pooled output width."
)
.
SetDefault
(
1
);
"(int, default 1), "
"The pooled output width."
)
.
SetDefault
(
1
);
AddComment
(
R"DOC(
R
oi
Pool operator
R
OI
Pool operator
ROI Pooling for Faster-RCNN. The link below is a further introduction:
https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
...
...
@@ -116,11 +146,11 @@ https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
roi_pool
,
ops
::
R
oiPoolOp
,
ops
::
Roi
PoolOpMaker
,
roi_pool_grad
,
ops
::
R
oi
PoolGradOp
);
REGISTER_OP
(
roi_pool
,
ops
::
R
OIPoolOp
,
ops
::
ROI
PoolOpMaker
,
roi_pool_grad
,
ops
::
R
OI
PoolGradOp
);
REGISTER_OP_CPU_KERNEL
(
roi_pool
,
ops
::
CPUR
oi
PoolOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
CPUR
OI
PoolOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
roi_pool_grad
,
ops
::
CPUR
oi
PoolGradOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
CPUR
OI
PoolGradOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/roi_pool_op.cu
浏览文件 @
ef905598
...
...
@@ -12,91 +12,80 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/cuda_helper.h"
#include "paddle/operators/roi_pool_op.h"
#include "paddle/platform/cuda_helper.h"
namespace
paddle
{
namespace
operators
{
#define FLT_MAX __FLT_MAX__
static
constexpr
int
kNumCUDAThreads
=
512
;
static
constexpr
int
kNumMaxinumNumBlocks
=
4096
;
static
constexpr
int
kROISize
=
5
;
constexpr
int
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
=
512
;
constexpr
int
PADDLE_OPERATORS_ROIPOOL_MAXIMUM_NUM_BLOCKS
=
4096
;
static
inline
int
NumBlocks
(
const
int
N
)
{
return
std
::
min
((
N
+
kNumCUDAThreads
-
1
)
/
kNumCUDAThreads
,
kNumMaxinumNumBlocks
);
}
inline
int
PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS
(
const
int
N
)
{
return
std
::
min
((
N
+
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
-
1
)
/
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
,
PADDLE_OPERATORS_ROIPOOL_MAXIMUM_NUM_BLOCKS
);
}
template
<
typename
T
>
__global__
void
GPUROIPoolForward
(
const
int
nthreads
,
const
T
*
input_data
,
const
int64_t
*
input_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
T
*
output_data
,
int64_t
*
argmax_data
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
size_t
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
int
pw
=
index
%
pooled_width
;
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
template
<
typename
T
>
__global__
void
GPURoiPoolForward
(
const
int
nthreads
,
const
T
*
input_data
,
const
int64_t
*
input_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
T
*
output_data
,
int64_t
*
argmax_data
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
size_t
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
int
pw
=
index
%
pooled_width
;
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
5
;
int
roi_batch_ind
=
offset_input_rois
[
0
];
int
roi_start_w
=
round
(
offset_input_rois
[
1
]
*
spatial_scale
);
int
roi_start_h
=
round
(
offset_input_rois
[
2
]
*
spatial_scale
);
int
roi_end_w
=
round
(
offset_input_rois
[
3
]
*
spatial_scale
);
int
roi_end_h
=
round
(
offset_input_rois
[
4
]
*
spatial_scale
);
int
roi_width
=
max
(
roi_end_w
-
roi_start_w
+
1
,
1
);
int
roi_height
=
max
(
roi_end_h
-
roi_start_h
+
1
,
1
);
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
int
hstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
ph
)
*
bin_size_h
));
int
wstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
pw
)
*
bin_size_w
));
int
hend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
ph
+
1
)
*
bin_size_h
));
int
wend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
pw
+
1
)
*
bin_size_w
));
hstart
=
min
(
max
(
hstart
+
roi_start_h
,
0
),
height
);
hend
=
min
(
max
(
hend
+
roi_start_h
,
0
),
height
);
wstart
=
min
(
max
(
wstart
+
roi_start_w
,
0
),
width
);
wend
=
min
(
max
(
wend
+
roi_start_w
,
0
),
width
);
bool
is_empty
=
(
hend
<=
hstart
)
||
(
wend
<=
wstart
);
T
maxval
=
is_empty
?
0
:
-
FLT_MAX
;
int
maxidx
=
-
1
;
const
T
*
offset_input_data
=
input_data
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
int
input_data_index
=
h
*
width
+
w
;
if
(
offset_input_data
[
input_data_index
]
>
maxval
)
{
maxval
=
offset_input_data
[
input_data_index
];
maxidx
=
input_data_index
;
}
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
offset_input_rois
[
0
];
int
roi_start_w
=
round
(
offset_input_rois
[
1
]
*
spatial_scale
);
int
roi_start_h
=
round
(
offset_input_rois
[
2
]
*
spatial_scale
);
int
roi_end_w
=
round
(
offset_input_rois
[
3
]
*
spatial_scale
);
int
roi_end_h
=
round
(
offset_input_rois
[
4
]
*
spatial_scale
);
int
roi_width
=
max
(
roi_end_w
-
roi_start_w
+
1
,
1
);
int
roi_height
=
max
(
roi_end_h
-
roi_start_h
+
1
,
1
);
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
int
hstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
ph
)
*
bin_size_h
));
int
wstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
pw
)
*
bin_size_w
));
int
hend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
ph
+
1
)
*
bin_size_h
));
int
wend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
pw
+
1
)
*
bin_size_w
));
hstart
=
min
(
max
(
hstart
+
roi_start_h
,
0
),
height
);
hend
=
min
(
max
(
hend
+
roi_start_h
,
0
),
height
);
wstart
=
min
(
max
(
wstart
+
roi_start_w
,
0
),
width
);
wend
=
min
(
max
(
wend
+
roi_start_w
,
0
),
width
);
bool
is_empty
=
(
hend
<=
hstart
)
||
(
wend
<=
wstart
);
T
maxval
=
is_empty
?
0
:
-
std
::
numeric_limits
<
float
>::
max
();
int
maxidx
=
-
1
;
const
T
*
offset_input_data
=
input_data
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
int
input_data_index
=
h
*
width
+
w
;
if
(
offset_input_data
[
input_data_index
]
>
maxval
)
{
maxval
=
offset_input_data
[
input_data_index
];
maxidx
=
input_data_index
;
}
}
output_data
[
index
]
=
maxval
;
if
(
argmax_data
)
{
argmax_data
[
index
]
=
maxidx
;
}
}
output_data
[
index
]
=
maxval
;
if
(
argmax_data
)
{
argmax_data
[
index
]
=
maxidx
;
}
}
}
template
<
typename
T
>
__global__
void
GPUR
oi
PoolBackward
(
__global__
void
GPUR
OI
PoolBackward
(
const
int
nthreads
,
const
int64_t
*
input_rois
,
const
T
*
output_grad
,
...
...
@@ -117,7 +106,7 @@ __global__ void GPURoiPoolBackward(
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
5
;
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
offset_input_rois
[
0
];
int
input_offset
=
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
int
output_offset
=
(
n
*
channels
+
c
)
*
pooled_height
*
pooled_width
;
...
...
@@ -135,11 +124,11 @@ __global__ void GPURoiPoolBackward(
template
<
typename
Place
,
typename
T
>
class
GPUR
oi
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GPUR
OI
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
argmax
=
ctx
.
Output
<
Tensor
>
(
"Argmax"
);
...
...
@@ -147,31 +136,17 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
PADDLE_ENFORCE_GT
(
pooled_height
,
0
,
"The pooled output height must greater than 0"
);
PADDLE_ENFORCE_GT
(
pooled_width
,
0
,
"The pooled output width must greater than 0"
);
PADDLE_ENFORCE_GT
(
spatial_scale
,
0
,
"The spatial scale must greater than 0"
);
auto
in_dims
=
in
->
dims
();
auto
in_stride
=
framework
::
stride
(
in_dims
);
int
channels
=
in_dims
[
1
];
int
height
=
in_dims
[
2
];
int
width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
auto
out_dims
=
in_dims
;
out_dims
[
0
]
=
rois_num
;
out_dims
[
1
]
=
in_dims
[
1
];
out_dims
[
2
]
=
pooled_height
;
out_dims
[
3
]
=
pooled_width
;
size_t
rois_num
=
rois
->
dims
()[
0
];
out
->
Resize
(
out_dims
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
ctx
.
device_context
(),
out
,
static_cast
<
T
>
(
0
));
argmax
->
Resize
(
out
->
dims
());
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
Place
,
int64_t
>
set_init
;
set_init
(
ctx
.
device_context
(),
argmax
,
static_cast
<
int64_t
>
(
-
1
));
...
...
@@ -179,10 +154,10 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
if
(
rois_num
==
0
)
return
;
int
output_size
=
out
->
numel
();
int
blocks
=
PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS
(
output_size
);
int
threads
=
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
;
int
blocks
=
NumBlocks
(
output_size
);
int
threads
=
kNumCUDAThreads
;
GPUR
oi
PoolForward
<
T
>
GPUR
OI
PoolForward
<
T
>
<<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_size
,
in
->
data
<
T
>
(),
...
...
@@ -195,17 +170,15 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
pooled_width
,
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
()));
return
;
}
};
template
<
typename
Place
,
typename
T
>
class
GPUR
oi
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GPUR
OI
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
argmax
=
ctx
.
Input
<
Tensor
>
(
"Argmax"
);
auto
*
out_grad
=
...
...
@@ -217,23 +190,22 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
in
t
rois_num
=
rois
->
dims
()[
0
];
size_
t
rois_num
=
rois
->
dims
()[
0
];
int
channels
=
in
->
dims
()[
1
];
int
height
=
in
->
dims
()[
2
];
int
width
=
in
->
dims
()[
3
];
if
(
x_grad
)
{
x_grad
->
Resize
(
in
->
dims
());
x_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
ctx
.
device_context
(),
x_grad
,
static_cast
<
T
>
(
0
));
int
output_grad_size
=
out_grad
->
numel
();
int
blocks
=
PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS
(
output_grad_size
);
int
threads
=
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
;
int
blocks
=
NumBlocks
(
output_grad_size
);
int
threads
=
kNumCUDAThreads
;
if
(
output_grad_size
>
0
)
{
GPUR
oi
PoolBackward
<
T
>
GPUR
OI
PoolBackward
<
T
>
<<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_grad_size
,
rois
->
data
<
int64_t
>
(),
...
...
@@ -248,7 +220,6 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
pooled_width
,
x_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
}
return
;
}
}
};
...
...
@@ -259,7 +230,7 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
roi_pool
,
ops
::
GPUR
oi
PoolOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GPUR
OI
PoolOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
roi_pool_grad
,
ops
::
GPUR
oi
PoolGradOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GPUR
OI
PoolGradOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
paddle/operators/roi_pool_op.h
浏览文件 @
ef905598
...
...
@@ -25,11 +25,11 @@ using LoDTensor = framework::LoDTensor;
using
LoD
=
framework
::
LoD
;
template
<
typename
Place
,
typename
T
>
class
CPUR
oi
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CPUR
OI
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
argmax
=
ctx
.
Output
<
Tensor
>
(
"Argmax"
);
...
...
@@ -37,13 +37,6 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
PADDLE_ENFORCE_GT
(
pooled_height
,
0
,
"The pooled output height must greater than 0"
);
PADDLE_ENFORCE_GT
(
pooled_width
,
0
,
"The pooled output width must greater than 0"
);
PADDLE_ENFORCE_GT
(
spatial_scale
,
0
,
"The spatial scale must greater than 0"
);
auto
in_dims
=
in
->
dims
();
int
batch_size
=
in_dims
[
0
];
int
channels
=
in_dims
[
1
];
...
...
@@ -51,18 +44,10 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
int
width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
auto
out_dims
=
in_dims
;
out_dims
[
0
]
=
rois_num
;
out_dims
[
1
]
=
channels
;
out_dims
[
2
]
=
pooled_height
;
out_dims
[
3
]
=
pooled_width
;
out
->
Resize
(
out_dims
);
argmax
->
Resize
(
out
->
dims
());
auto
in_stride
=
framework
::
stride
(
in_dims
);
auto
argmax_stride
=
framework
::
stride
(
argmax
->
dims
());
auto
roi_stride
=
framework
::
stride
(
rois
->
dims
());
auto
out_stride
=
framework
::
stride
(
out
_dims
);
auto
out_stride
=
framework
::
stride
(
out
->
dims
()
);
const
T
*
input_data
=
in
->
data
<
T
>
();
const
int64_t
*
rois_data
=
rois
->
data
<
int64_t
>
();
...
...
@@ -124,7 +109,8 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
// Define an empty pooling region to be zero
bool
is_empty
=
(
hend
<=
hstart
)
||
(
wend
<=
wstart
);
output_data
[
pool_index
]
=
is_empty
?
0
:
-
__FLT_MAX__
;
output_data
[
pool_index
]
=
is_empty
?
0
:
-
std
::
numeric_limits
<
float
>::
max
();
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
...
...
@@ -150,11 +136,11 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
};
template
<
typename
Place
,
typename
T
>
class
CPUR
oi
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CPUR
OI
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
argmax
=
ctx
.
Input
<
Tensor
>
(
"Argmax"
);
auto
*
out_grad
=
...
...
@@ -188,9 +174,9 @@ class CPURoiPoolGradOpKernel : public framework::OpKernel<T> {
for
(
size_t
n
=
0
;
n
<
rois_num
;
++
n
)
{
size_t
roi_batch_idx
=
rois_data
[
0
];
T
*
batch_grad_data
=
x_grad_data
+
batch_offset
*
roi_batch_idx
;
for
(
size_
t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
size_
t
ph
=
0
;
ph
<
pooled_height
;
++
ph
)
{
for
(
size_
t
pw
=
0
;
pw
<
pooled_width
;
++
pw
)
{
for
(
in
t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
in
t
ph
=
0
;
ph
<
pooled_height
;
++
ph
)
{
for
(
in
t
pw
=
0
;
pw
<
pooled_width
;
++
pw
)
{
size_t
pool_index
=
ph
*
pooled_width
+
pw
;
if
(
argmax_data
[
pool_index
]
>=
0
)
{
...
...
python/paddle/v2/fluid/tests/test_roi_pool_op.py
浏览文件 @
ef905598
...
...
@@ -4,8 +4,7 @@ import math
import
sys
from
op_test
import
OpTest
class
TestSequenceSliceOp
(
OpTest
):
class
TestROIPoolOp
(
OpTest
):
def
set_data
(
self
):
self
.
init_test_case
()
self
.
make_rois
()
...
...
@@ -13,7 +12,7 @@ class TestSequenceSliceOp(OpTest):
self
.
inputs
=
{
'X'
:
self
.
x
,
'R
oi
s'
:
self
.
rois
}
'R
OI
s'
:
self
.
rois
}
self
.
attrs
=
{
'spatial_scale'
:
self
.
spatial_scale
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录