Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
ef905598
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ef905598
编写于
11月 22, 2017
作者:
W
wanghaox
提交者:
wanghaox
11月 24, 2017
浏览文件
操作
浏览文件
下载
差异文件
fix some code issues
上级
36dd770a
6ab78aee
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
170 addition
and
183 deletion
+170
-183
paddle/capi/examples/model_inference/dense/main.c
paddle/capi/examples/model_inference/dense/main.c
+17
-16
paddle/operators/roi_pool_op.cc
paddle/operators/roi_pool_op.cc
+66
-36
paddle/operators/roi_pool_op.cu
paddle/operators/roi_pool_op.cu
+75
-104
paddle/operators/roi_pool_op.h
paddle/operators/roi_pool_op.h
+10
-24
python/paddle/v2/fluid/tests/test_roi_pool_op.py
python/paddle/v2/fluid/tests/test_roi_pool_op.py
+2
-3
未找到文件。
paddle/capi/examples/model_inference/dense/main.c
浏览文件 @
ef905598
#include <paddle/capi.h>
#include <time.h>
#include "../common/common.h"
#define CONFIG_BIN "./trainer_config.bin"
...
...
@@ -27,20 +28,19 @@ int main() {
CHECK
(
paddle_arguments_resize
(
in_args
,
1
));
// Create input matrix.
paddle_matrix
mat
=
paddle_matrix_create
(
/* sample_num */
1
0
,
paddle_matrix
mat
=
paddle_matrix_create
(
/* sample_num */
1
,
/* size */
784
,
/* useGPU */
false
);
srand
(
time
(
0
));
std
::
vector
<
paddle_real
>
input
;
input
.
resize
(
784
*
10
);
paddle_real
*
array
;
// Get First row.
CHECK
(
paddle_matrix_get_row
(
mat
,
0
,
&
array
));
for
(
int
i
=
0
;
i
<
input
.
size
()
;
++
i
)
{
input
[
i
]
=
rand
()
/
((
float
)
RAND_MAX
);
for
(
int
i
=
0
;
i
<
784
;
++
i
)
{
array
[
i
]
=
rand
()
/
((
float
)
RAND_MAX
);
}
// Set value for the input matrix
CHECK
(
paddle_matrix_set_value
(
mat
,
input
.
data
()));
CHECK
(
paddle_arguments_set_value
(
in_args
,
0
,
mat
));
...
...
@@ -53,17 +53,18 @@ int main() {
CHECK
(
paddle_arguments_get_value
(
out_args
,
0
,
prob
));
std
::
std
::
vector
<
paddle_real
>
result
;
int
height
;
int
width
;
uint64_t
height
;
uint64_t
width
;
CHECK
(
paddle_matrix_get_shape
(
prob
,
&
height
,
&
width
);
result
.
resize
(
height
*
width
);
CHECK
(
paddle_matrix_get_value
(
prob
,
result
.
data
()));
CHECK
(
paddle_matrix_get_shape
(
prob
,
&
height
,
&
width
));
CHECK
(
paddle_matrix_get_row
(
prob
,
0
,
&
array
));
printf
(
"Prob: "
);
printf
(
"Prob:
\n
"
);
for
(
int
i
=
0
;
i
<
height
*
width
;
++
i
)
{
printf
(
"%.2f "
,
result
[
i
]);
printf
(
"%.4f "
,
array
[
i
]);
if
((
i
+
1
)
%
width
==
0
)
{
printf
(
"
\n
"
);
}
}
printf
(
"
\n
"
);
...
...
paddle/operators/roi_pool_op.cc
浏览文件 @
ef905598
...
...
@@ -17,24 +17,47 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
class
R
oi
PoolOp
:
public
framework
::
OperatorWithKernel
{
class
R
OI
PoolOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of R
oi
PoolOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"R
oi
s"
),
"Input(R
ois) of Roi
PoolOp should not be null."
);
"Input(X) of R
OI
PoolOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"R
OI
s"
),
"Input(R
OIs) of ROI
PoolOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of R
oi
PoolOp should not be null."
);
"Output(Out) of R
OI
PoolOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Argmax"
),
"Output(Argmax) of R
oi
PoolOp should not be null."
);
"Output(Argmax) of R
OI
PoolOp should not be null."
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
// Initialize the output's dims to maximum,
// and re-set to real dims by the value of Rois at kernel
ctx
->
SetOutputDim
(
"Out"
,
input_dims
);
auto
rois_dims
=
ctx
->
GetInputDim
(
"ROIs"
);
PADDLE_ENFORCE
(
input_dims
.
size
()
==
4
,
"The format of input tensor is NCHW."
);
PADDLE_ENFORCE
(
rois_dims
.
size
()
==
2
,
"ROIs should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …]."
);
int
pooled_height
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_height"
);
int
pooled_width
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_width"
);
float
spatial_scale
=
ctx
->
Attrs
().
Get
<
float
>
(
"spatial_scale"
);
PADDLE_ENFORCE_GT
(
pooled_height
,
0
,
"The pooled output height must greater than 0"
);
PADDLE_ENFORCE_GT
(
pooled_width
,
0
,
"The pooled output width must greater than 0"
);
PADDLE_ENFORCE_GT
(
spatial_scale
,
0.0
f
,
"The spatial scale must greater than 0"
);
auto
out_dims
=
input_dims
;
out_dims
[
0
]
=
rois_dims
[
0
];
out_dims
[
1
]
=
input_dims
[
1
];
out_dims
[
2
]
=
pooled_height
;
out_dims
[
3
]
=
pooled_width
;
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
ctx
->
SetOutputDim
(
"Argmax"
,
out_dims
);
}
protected:
...
...
@@ -46,7 +69,7 @@ class RoiPoolOp : public framework::OperatorWithKernel {
}
};
class
R
oi
PoolGradOp
:
public
framework
::
OperatorWithKernel
{
class
R
OI
PoolGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
...
@@ -67,44 +90,51 @@ class RoiPoolGradOp : public framework::OperatorWithKernel {
}
};
class
R
oi
PoolOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
R
OI
PoolOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
R
oi
PoolOpMaker
(
framework
::
OpProto
*
proto
,
R
OI
PoolOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(Tensor), "
"the input of RoiPoolOp."
);
AddInput
(
"Rois"
,
"the input of ROIPoolOp. "
"The format of input tensor is NCHW. Where N is batch size, "
"C is the number of input channels, "
"H is the height of the feature, and "
"W is the width of the feature."
);
AddInput
(
"ROIs"
,
"(Tensor), "
"RoIs (Regions of Interest) to pool over. "
"Should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …]."
);
"ROIs (Regions of Interest) to pool over. "
"should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …]. "
"Where batch_id is the id of the data, "
"(x1, y1) is the top left coordinates, and "
"(x2, y2) is the bottom right coordinates."
);
AddOutput
(
"Out"
,
"(Tensor), "
"RoI pooled output 4-D tensor of
shape "
"(num_rois, channels, pooled_h, pooled_w)."
);
"The output of ROIPoolOp is a 4-D tensor with
shape "
"(num_rois, channels, pooled_h, pooled_w)."
);
AddOutput
(
"Argmax"
,
"(Tensor), "
"Argmaxes corresponding to indices in X used "
"for gradient computation. Only output "
"if arg “is_test” is false."
).
AsIntermediate
();
AddAttr
<
float
>
(
"spatial_scale"
,
"(float, default 1.0), "
"Multiplicative spatial scale factor "
"to translate ROI coords from their input scale "
"to the scale used when pooling."
)
.
SetDefault
(
1.0
);
"(float, default 1.0), "
"Multiplicative spatial scale factor "
"to translate ROI coords from their input scale "
"to the scale used when pooling."
)
.
SetDefault
(
1.0
);
AddAttr
<
int
>
(
"pooled_height"
,
"(int, default 1), "
"The pooled output height."
)
.
SetDefault
(
1
);
"(int, default 1), "
"The pooled output height."
)
.
SetDefault
(
1
);
AddAttr
<
int
>
(
"pooled_width"
,
"(int, default 1), "
"The pooled output width."
)
.
SetDefault
(
1
);
"(int, default 1), "
"The pooled output width."
)
.
SetDefault
(
1
);
AddComment
(
R"DOC(
R
oi
Pool operator
R
OI
Pool operator
ROI Pooling for Faster-RCNN. The link below is a further introduction:
https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
...
...
@@ -116,11 +146,11 @@ https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
roi_pool
,
ops
::
R
oiPoolOp
,
ops
::
Roi
PoolOpMaker
,
roi_pool_grad
,
ops
::
R
oi
PoolGradOp
);
REGISTER_OP
(
roi_pool
,
ops
::
R
OIPoolOp
,
ops
::
ROI
PoolOpMaker
,
roi_pool_grad
,
ops
::
R
OI
PoolGradOp
);
REGISTER_OP_CPU_KERNEL
(
roi_pool
,
ops
::
CPUR
oi
PoolOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
CPUR
OI
PoolOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
roi_pool_grad
,
ops
::
CPUR
oi
PoolGradOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
CPUR
OI
PoolGradOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/roi_pool_op.cu
浏览文件 @
ef905598
...
...
@@ -12,91 +12,80 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/cuda_helper.h"
#include "paddle/operators/roi_pool_op.h"
#include "paddle/platform/cuda_helper.h"
namespace
paddle
{
namespace
operators
{
#define FLT_MAX __FLT_MAX__
static
constexpr
int
kNumCUDAThreads
=
512
;
static
constexpr
int
kNumMaxinumNumBlocks
=
4096
;
static
constexpr
int
kROISize
=
5
;
constexpr
int
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
=
512
;
constexpr
int
PADDLE_OPERATORS_ROIPOOL_MAXIMUM_NUM_BLOCKS
=
4096
;
static
inline
int
NumBlocks
(
const
int
N
)
{
return
std
::
min
((
N
+
kNumCUDAThreads
-
1
)
/
kNumCUDAThreads
,
kNumMaxinumNumBlocks
);
}
inline
int
PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS
(
const
int
N
)
{
return
std
::
min
((
N
+
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
-
1
)
/
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
,
PADDLE_OPERATORS_ROIPOOL_MAXIMUM_NUM_BLOCKS
);
}
template
<
typename
T
>
__global__
void
GPUROIPoolForward
(
const
int
nthreads
,
const
T
*
input_data
,
const
int64_t
*
input_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
T
*
output_data
,
int64_t
*
argmax_data
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
size_t
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
int
pw
=
index
%
pooled_width
;
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
template
<
typename
T
>
__global__
void
GPURoiPoolForward
(
const
int
nthreads
,
const
T
*
input_data
,
const
int64_t
*
input_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
T
*
output_data
,
int64_t
*
argmax_data
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
size_t
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
int
pw
=
index
%
pooled_width
;
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
5
;
int
roi_batch_ind
=
offset_input_rois
[
0
];
int
roi_start_w
=
round
(
offset_input_rois
[
1
]
*
spatial_scale
);
int
roi_start_h
=
round
(
offset_input_rois
[
2
]
*
spatial_scale
);
int
roi_end_w
=
round
(
offset_input_rois
[
3
]
*
spatial_scale
);
int
roi_end_h
=
round
(
offset_input_rois
[
4
]
*
spatial_scale
);
int
roi_width
=
max
(
roi_end_w
-
roi_start_w
+
1
,
1
);
int
roi_height
=
max
(
roi_end_h
-
roi_start_h
+
1
,
1
);
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
int
hstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
ph
)
*
bin_size_h
));
int
wstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
pw
)
*
bin_size_w
));
int
hend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
ph
+
1
)
*
bin_size_h
));
int
wend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
pw
+
1
)
*
bin_size_w
));
hstart
=
min
(
max
(
hstart
+
roi_start_h
,
0
),
height
);
hend
=
min
(
max
(
hend
+
roi_start_h
,
0
),
height
);
wstart
=
min
(
max
(
wstart
+
roi_start_w
,
0
),
width
);
wend
=
min
(
max
(
wend
+
roi_start_w
,
0
),
width
);
bool
is_empty
=
(
hend
<=
hstart
)
||
(
wend
<=
wstart
);
T
maxval
=
is_empty
?
0
:
-
FLT_MAX
;
int
maxidx
=
-
1
;
const
T
*
offset_input_data
=
input_data
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
int
input_data_index
=
h
*
width
+
w
;
if
(
offset_input_data
[
input_data_index
]
>
maxval
)
{
maxval
=
offset_input_data
[
input_data_index
];
maxidx
=
input_data_index
;
}
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
offset_input_rois
[
0
];
int
roi_start_w
=
round
(
offset_input_rois
[
1
]
*
spatial_scale
);
int
roi_start_h
=
round
(
offset_input_rois
[
2
]
*
spatial_scale
);
int
roi_end_w
=
round
(
offset_input_rois
[
3
]
*
spatial_scale
);
int
roi_end_h
=
round
(
offset_input_rois
[
4
]
*
spatial_scale
);
int
roi_width
=
max
(
roi_end_w
-
roi_start_w
+
1
,
1
);
int
roi_height
=
max
(
roi_end_h
-
roi_start_h
+
1
,
1
);
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
int
hstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
ph
)
*
bin_size_h
));
int
wstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
pw
)
*
bin_size_w
));
int
hend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
ph
+
1
)
*
bin_size_h
));
int
wend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
pw
+
1
)
*
bin_size_w
));
hstart
=
min
(
max
(
hstart
+
roi_start_h
,
0
),
height
);
hend
=
min
(
max
(
hend
+
roi_start_h
,
0
),
height
);
wstart
=
min
(
max
(
wstart
+
roi_start_w
,
0
),
width
);
wend
=
min
(
max
(
wend
+
roi_start_w
,
0
),
width
);
bool
is_empty
=
(
hend
<=
hstart
)
||
(
wend
<=
wstart
);
T
maxval
=
is_empty
?
0
:
-
std
::
numeric_limits
<
float
>::
max
();
int
maxidx
=
-
1
;
const
T
*
offset_input_data
=
input_data
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
int
input_data_index
=
h
*
width
+
w
;
if
(
offset_input_data
[
input_data_index
]
>
maxval
)
{
maxval
=
offset_input_data
[
input_data_index
];
maxidx
=
input_data_index
;
}
}
output_data
[
index
]
=
maxval
;
if
(
argmax_data
)
{
argmax_data
[
index
]
=
maxidx
;
}
}
output_data
[
index
]
=
maxval
;
if
(
argmax_data
)
{
argmax_data
[
index
]
=
maxidx
;
}
}
}
template
<
typename
T
>
__global__
void
GPUR
oi
PoolBackward
(
__global__
void
GPUR
OI
PoolBackward
(
const
int
nthreads
,
const
int64_t
*
input_rois
,
const
T
*
output_grad
,
...
...
@@ -117,7 +106,7 @@ __global__ void GPURoiPoolBackward(
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
5
;
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
offset_input_rois
[
0
];
int
input_offset
=
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
int
output_offset
=
(
n
*
channels
+
c
)
*
pooled_height
*
pooled_width
;
...
...
@@ -135,11 +124,11 @@ __global__ void GPURoiPoolBackward(
template
<
typename
Place
,
typename
T
>
class
GPUR
oi
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GPUR
OI
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
argmax
=
ctx
.
Output
<
Tensor
>
(
"Argmax"
);
...
...
@@ -147,31 +136,17 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
PADDLE_ENFORCE_GT
(
pooled_height
,
0
,
"The pooled output height must greater than 0"
);
PADDLE_ENFORCE_GT
(
pooled_width
,
0
,
"The pooled output width must greater than 0"
);
PADDLE_ENFORCE_GT
(
spatial_scale
,
0
,
"The spatial scale must greater than 0"
);
auto
in_dims
=
in
->
dims
();
auto
in_stride
=
framework
::
stride
(
in_dims
);
int
channels
=
in_dims
[
1
];
int
height
=
in_dims
[
2
];
int
width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
auto
out_dims
=
in_dims
;
out_dims
[
0
]
=
rois_num
;
out_dims
[
1
]
=
in_dims
[
1
];
out_dims
[
2
]
=
pooled_height
;
out_dims
[
3
]
=
pooled_width
;
size_t
rois_num
=
rois
->
dims
()[
0
];
out
->
Resize
(
out_dims
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
ctx
.
device_context
(),
out
,
static_cast
<
T
>
(
0
));
argmax
->
Resize
(
out
->
dims
());
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
Place
,
int64_t
>
set_init
;
set_init
(
ctx
.
device_context
(),
argmax
,
static_cast
<
int64_t
>
(
-
1
));
...
...
@@ -179,10 +154,10 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
if
(
rois_num
==
0
)
return
;
int
output_size
=
out
->
numel
();
int
blocks
=
PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS
(
output_size
);
int
threads
=
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
;
int
blocks
=
NumBlocks
(
output_size
);
int
threads
=
kNumCUDAThreads
;
GPUR
oi
PoolForward
<
T
>
GPUR
OI
PoolForward
<
T
>
<<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_size
,
in
->
data
<
T
>
(),
...
...
@@ -195,17 +170,15 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
pooled_width
,
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
()));
return
;
}
};
template
<
typename
Place
,
typename
T
>
class
GPUR
oi
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GPUR
OI
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
argmax
=
ctx
.
Input
<
Tensor
>
(
"Argmax"
);
auto
*
out_grad
=
...
...
@@ -217,23 +190,22 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
in
t
rois_num
=
rois
->
dims
()[
0
];
size_
t
rois_num
=
rois
->
dims
()[
0
];
int
channels
=
in
->
dims
()[
1
];
int
height
=
in
->
dims
()[
2
];
int
width
=
in
->
dims
()[
3
];
if
(
x_grad
)
{
x_grad
->
Resize
(
in
->
dims
());
x_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
ctx
.
device_context
(),
x_grad
,
static_cast
<
T
>
(
0
));
int
output_grad_size
=
out_grad
->
numel
();
int
blocks
=
PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS
(
output_grad_size
);
int
threads
=
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
;
int
blocks
=
NumBlocks
(
output_grad_size
);
int
threads
=
kNumCUDAThreads
;
if
(
output_grad_size
>
0
)
{
GPUR
oi
PoolBackward
<
T
>
GPUR
OI
PoolBackward
<
T
>
<<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_grad_size
,
rois
->
data
<
int64_t
>
(),
...
...
@@ -248,7 +220,6 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
pooled_width
,
x_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
}
return
;
}
}
};
...
...
@@ -259,7 +230,7 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
roi_pool
,
ops
::
GPUR
oi
PoolOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GPUR
OI
PoolOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
roi_pool_grad
,
ops
::
GPUR
oi
PoolGradOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GPUR
OI
PoolGradOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
paddle/operators/roi_pool_op.h
浏览文件 @
ef905598
...
...
@@ -25,11 +25,11 @@ using LoDTensor = framework::LoDTensor;
using
LoD
=
framework
::
LoD
;
template
<
typename
Place
,
typename
T
>
class
CPUR
oi
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CPUR
OI
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
argmax
=
ctx
.
Output
<
Tensor
>
(
"Argmax"
);
...
...
@@ -37,13 +37,6 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
PADDLE_ENFORCE_GT
(
pooled_height
,
0
,
"The pooled output height must greater than 0"
);
PADDLE_ENFORCE_GT
(
pooled_width
,
0
,
"The pooled output width must greater than 0"
);
PADDLE_ENFORCE_GT
(
spatial_scale
,
0
,
"The spatial scale must greater than 0"
);
auto
in_dims
=
in
->
dims
();
int
batch_size
=
in_dims
[
0
];
int
channels
=
in_dims
[
1
];
...
...
@@ -51,18 +44,10 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
int
width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
auto
out_dims
=
in_dims
;
out_dims
[
0
]
=
rois_num
;
out_dims
[
1
]
=
channels
;
out_dims
[
2
]
=
pooled_height
;
out_dims
[
3
]
=
pooled_width
;
out
->
Resize
(
out_dims
);
argmax
->
Resize
(
out
->
dims
());
auto
in_stride
=
framework
::
stride
(
in_dims
);
auto
argmax_stride
=
framework
::
stride
(
argmax
->
dims
());
auto
roi_stride
=
framework
::
stride
(
rois
->
dims
());
auto
out_stride
=
framework
::
stride
(
out
_dims
);
auto
out_stride
=
framework
::
stride
(
out
->
dims
()
);
const
T
*
input_data
=
in
->
data
<
T
>
();
const
int64_t
*
rois_data
=
rois
->
data
<
int64_t
>
();
...
...
@@ -124,7 +109,8 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
// Define an empty pooling region to be zero
bool
is_empty
=
(
hend
<=
hstart
)
||
(
wend
<=
wstart
);
output_data
[
pool_index
]
=
is_empty
?
0
:
-
__FLT_MAX__
;
output_data
[
pool_index
]
=
is_empty
?
0
:
-
std
::
numeric_limits
<
float
>::
max
();
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
...
...
@@ -150,11 +136,11 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
};
template
<
typename
Place
,
typename
T
>
class
CPUR
oi
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CPUR
OI
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
argmax
=
ctx
.
Input
<
Tensor
>
(
"Argmax"
);
auto
*
out_grad
=
...
...
@@ -188,9 +174,9 @@ class CPURoiPoolGradOpKernel : public framework::OpKernel<T> {
for
(
size_t
n
=
0
;
n
<
rois_num
;
++
n
)
{
size_t
roi_batch_idx
=
rois_data
[
0
];
T
*
batch_grad_data
=
x_grad_data
+
batch_offset
*
roi_batch_idx
;
for
(
size_
t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
size_
t
ph
=
0
;
ph
<
pooled_height
;
++
ph
)
{
for
(
size_
t
pw
=
0
;
pw
<
pooled_width
;
++
pw
)
{
for
(
in
t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
in
t
ph
=
0
;
ph
<
pooled_height
;
++
ph
)
{
for
(
in
t
pw
=
0
;
pw
<
pooled_width
;
++
pw
)
{
size_t
pool_index
=
ph
*
pooled_width
+
pw
;
if
(
argmax_data
[
pool_index
]
>=
0
)
{
...
...
python/paddle/v2/fluid/tests/test_roi_pool_op.py
浏览文件 @
ef905598
...
...
@@ -4,8 +4,7 @@ import math
import
sys
from
op_test
import
OpTest
class
TestSequenceSliceOp
(
OpTest
):
class
TestROIPoolOp
(
OpTest
):
def
set_data
(
self
):
self
.
init_test_case
()
self
.
make_rois
()
...
...
@@ -13,7 +12,7 @@ class TestSequenceSliceOp(OpTest):
self
.
inputs
=
{
'X'
:
self
.
x
,
'R
oi
s'
:
self
.
rois
}
'R
OI
s'
:
self
.
rois
}
self
.
attrs
=
{
'spatial_scale'
:
self
.
spatial_scale
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录