Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
ef905598
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ef905598
编写于
11月 22, 2017
作者:
W
wanghaox
提交者:
wanghaox
11月 24, 2017
浏览文件
操作
浏览文件
下载
差异文件
fix some code issues
上级
36dd770a
6ab78aee
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
170 addition
and
183 deletion
+170
-183
paddle/capi/examples/model_inference/dense/main.c
paddle/capi/examples/model_inference/dense/main.c
+17
-16
paddle/operators/roi_pool_op.cc
paddle/operators/roi_pool_op.cc
+66
-36
paddle/operators/roi_pool_op.cu
paddle/operators/roi_pool_op.cu
+75
-104
paddle/operators/roi_pool_op.h
paddle/operators/roi_pool_op.h
+10
-24
python/paddle/v2/fluid/tests/test_roi_pool_op.py
python/paddle/v2/fluid/tests/test_roi_pool_op.py
+2
-3
未找到文件。
paddle/capi/examples/model_inference/dense/main.c
浏览文件 @
ef905598
#include <paddle/capi.h>
#include <paddle/capi.h>
#include <time.h>
#include <time.h>
#include "../common/common.h"
#include "../common/common.h"
#define CONFIG_BIN "./trainer_config.bin"
#define CONFIG_BIN "./trainer_config.bin"
...
@@ -27,20 +28,19 @@ int main() {
...
@@ -27,20 +28,19 @@ int main() {
CHECK
(
paddle_arguments_resize
(
in_args
,
1
));
CHECK
(
paddle_arguments_resize
(
in_args
,
1
));
// Create input matrix.
// Create input matrix.
paddle_matrix
mat
=
paddle_matrix_create
(
/* sample_num */
1
0
,
paddle_matrix
mat
=
paddle_matrix_create
(
/* sample_num */
1
,
/* size */
784
,
/* size */
784
,
/* useGPU */
false
);
/* useGPU */
false
);
srand
(
time
(
0
));
srand
(
time
(
0
));
std
::
vector
<
paddle_real
>
input
;
paddle_real
*
array
;
input
.
resize
(
784
*
10
);
// Get First row.
CHECK
(
paddle_matrix_get_row
(
mat
,
0
,
&
array
));
for
(
int
i
=
0
;
i
<
input
.
size
()
;
++
i
)
{
for
(
int
i
=
0
;
i
<
784
;
++
i
)
{
input
[
i
]
=
rand
()
/
((
float
)
RAND_MAX
);
array
[
i
]
=
rand
()
/
((
float
)
RAND_MAX
);
}
}
// Set value for the input matrix
CHECK
(
paddle_matrix_set_value
(
mat
,
input
.
data
()));
CHECK
(
paddle_arguments_set_value
(
in_args
,
0
,
mat
));
CHECK
(
paddle_arguments_set_value
(
in_args
,
0
,
mat
));
...
@@ -53,17 +53,18 @@ int main() {
...
@@ -53,17 +53,18 @@ int main() {
CHECK
(
paddle_arguments_get_value
(
out_args
,
0
,
prob
));
CHECK
(
paddle_arguments_get_value
(
out_args
,
0
,
prob
));
std
::
std
::
vector
<
paddle_real
>
result
;
uint64_t
height
;
int
height
;
uint64_t
width
;
int
width
;
CHECK
(
paddle_matrix_get_shape
(
prob
,
&
height
,
&
width
);
CHECK
(
paddle_matrix_get_shape
(
prob
,
&
height
,
&
width
));
result
.
resize
(
height
*
width
);
CHECK
(
paddle_matrix_get_row
(
prob
,
0
,
&
array
));
CHECK
(
paddle_matrix_get_value
(
prob
,
result
.
data
()));
printf
(
"Prob: "
);
printf
(
"Prob:
\n
"
);
for
(
int
i
=
0
;
i
<
height
*
width
;
++
i
)
{
for
(
int
i
=
0
;
i
<
height
*
width
;
++
i
)
{
printf
(
"%.2f "
,
result
[
i
]);
printf
(
"%.4f "
,
array
[
i
]);
if
((
i
+
1
)
%
width
==
0
)
{
printf
(
"
\n
"
);
}
}
}
printf
(
"
\n
"
);
printf
(
"
\n
"
);
...
...
paddle/operators/roi_pool_op.cc
浏览文件 @
ef905598
...
@@ -17,24 +17,47 @@ limitations under the License. */
...
@@ -17,24 +17,47 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
class
R
oi
PoolOp
:
public
framework
::
OperatorWithKernel
{
class
R
OI
PoolOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of R
oi
PoolOp should not be null."
);
"Input(X) of R
OI
PoolOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"R
oi
s"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"R
OI
s"
),
"Input(R
ois) of Roi
PoolOp should not be null."
);
"Input(R
OIs) of ROI
PoolOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of R
oi
PoolOp should not be null."
);
"Output(Out) of R
OI
PoolOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Argmax"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Argmax"
),
"Output(Argmax) of R
oi
PoolOp should not be null."
);
"Output(Argmax) of R
OI
PoolOp should not be null."
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
rois_dims
=
ctx
->
GetInputDim
(
"ROIs"
);
// Initialize the output's dims to maximum,
// and re-set to real dims by the value of Rois at kernel
PADDLE_ENFORCE
(
input_dims
.
size
()
==
4
,
ctx
->
SetOutputDim
(
"Out"
,
input_dims
);
"The format of input tensor is NCHW."
);
PADDLE_ENFORCE
(
rois_dims
.
size
()
==
2
,
"ROIs should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …]."
);
int
pooled_height
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_height"
);
int
pooled_width
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_width"
);
float
spatial_scale
=
ctx
->
Attrs
().
Get
<
float
>
(
"spatial_scale"
);
PADDLE_ENFORCE_GT
(
pooled_height
,
0
,
"The pooled output height must greater than 0"
);
PADDLE_ENFORCE_GT
(
pooled_width
,
0
,
"The pooled output width must greater than 0"
);
PADDLE_ENFORCE_GT
(
spatial_scale
,
0.0
f
,
"The spatial scale must greater than 0"
);
auto
out_dims
=
input_dims
;
out_dims
[
0
]
=
rois_dims
[
0
];
out_dims
[
1
]
=
input_dims
[
1
];
out_dims
[
2
]
=
pooled_height
;
out_dims
[
3
]
=
pooled_width
;
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
ctx
->
SetOutputDim
(
"Argmax"
,
out_dims
);
}
}
protected:
protected:
...
@@ -46,7 +69,7 @@ class RoiPoolOp : public framework::OperatorWithKernel {
...
@@ -46,7 +69,7 @@ class RoiPoolOp : public framework::OperatorWithKernel {
}
}
};
};
class
R
oi
PoolGradOp
:
public
framework
::
OperatorWithKernel
{
class
R
OI
PoolGradOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
@@ -67,44 +90,51 @@ class RoiPoolGradOp : public framework::OperatorWithKernel {
...
@@ -67,44 +90,51 @@ class RoiPoolGradOp : public framework::OperatorWithKernel {
}
}
};
};
class
R
oi
PoolOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
R
OI
PoolOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
R
oi
PoolOpMaker
(
framework
::
OpProto
*
proto
,
R
OI
PoolOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
AddInput
(
"X"
,
"(Tensor), "
"(Tensor), "
"the input of RoiPoolOp."
);
"the input of ROIPoolOp. "
AddInput
(
"Rois"
,
"The format of input tensor is NCHW. Where N is batch size, "
"C is the number of input channels, "
"H is the height of the feature, and "
"W is the width of the feature."
);
AddInput
(
"ROIs"
,
"(Tensor), "
"(Tensor), "
"RoIs (Regions of Interest) to pool over. "
"ROIs (Regions of Interest) to pool over. "
"Should be a 2-D tensor of shape (num_rois, 5)"
"should be a 2-D tensor of shape (num_rois, 5)"
"given as [[batch_id, x1, y1, x2, y2], …]."
);
"given as [[batch_id, x1, y1, x2, y2], …]. "
"Where batch_id is the id of the data, "
"(x1, y1) is the top left coordinates, and "
"(x2, y2) is the bottom right coordinates."
);
AddOutput
(
"Out"
,
AddOutput
(
"Out"
,
"(Tensor), "
"(Tensor), "
"RoI pooled output 4-D tensor of
shape "
"The output of ROIPoolOp is a 4-D tensor with
shape "
"(num_rois, channels, pooled_h, pooled_w)."
);
"(num_rois, channels, pooled_h, pooled_w)."
);
AddOutput
(
"Argmax"
,
AddOutput
(
"Argmax"
,
"(Tensor), "
"(Tensor), "
"Argmaxes corresponding to indices in X used "
"Argmaxes corresponding to indices in X used "
"for gradient computation. Only output "
"for gradient computation. Only output "
"if arg “is_test” is false."
).
AsIntermediate
();
"if arg “is_test” is false."
).
AsIntermediate
();
AddAttr
<
float
>
(
"spatial_scale"
,
AddAttr
<
float
>
(
"spatial_scale"
,
"(float, default 1.0), "
"(float, default 1.0), "
"Multiplicative spatial scale factor "
"Multiplicative spatial scale factor "
"to translate ROI coords from their input scale "
"to translate ROI coords from their input scale "
"to the scale used when pooling."
)
"to the scale used when pooling."
)
.
SetDefault
(
1.0
);
.
SetDefault
(
1.0
);
AddAttr
<
int
>
(
"pooled_height"
,
AddAttr
<
int
>
(
"pooled_height"
,
"(int, default 1), "
"(int, default 1), "
"The pooled output height."
)
"The pooled output height."
)
.
SetDefault
(
1
);
.
SetDefault
(
1
);
AddAttr
<
int
>
(
"pooled_width"
,
AddAttr
<
int
>
(
"pooled_width"
,
"(int, default 1), "
"(int, default 1), "
"The pooled output width."
)
"The pooled output width."
)
.
SetDefault
(
1
);
.
SetDefault
(
1
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
R
oi
Pool operator
R
OI
Pool operator
ROI Pooling for Faster-RCNN. The link below is a further introduction:
ROI Pooling for Faster-RCNN. The link below is a further introduction:
https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
...
@@ -116,11 +146,11 @@ https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
...
@@ -116,11 +146,11 @@ https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
}
// namespace paddle
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
roi_pool
,
ops
::
R
oiPoolOp
,
ops
::
Roi
PoolOpMaker
,
REGISTER_OP
(
roi_pool
,
ops
::
R
OIPoolOp
,
ops
::
ROI
PoolOpMaker
,
roi_pool_grad
,
ops
::
R
oi
PoolGradOp
);
roi_pool_grad
,
ops
::
R
OI
PoolGradOp
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
roi_pool
,
roi_pool
,
ops
::
CPUR
oi
PoolOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
CPUR
OI
PoolOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
roi_pool_grad
,
roi_pool_grad
,
ops
::
CPUR
oi
PoolGradOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
CPUR
OI
PoolGradOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/roi_pool_op.cu
浏览文件 @
ef905598
...
@@ -12,91 +12,80 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,91 +12,80 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/platform/cuda_helper.h"
#include "paddle/operators/roi_pool_op.h"
#include "paddle/operators/roi_pool_op.h"
#include "paddle/platform/cuda_helper.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
#define FLT_MAX __FLT_MAX__
static
constexpr
int
kNumCUDAThreads
=
512
;
static
constexpr
int
kNumMaxinumNumBlocks
=
4096
;
static
constexpr
int
kROISize
=
5
;
constexpr
int
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
=
512
;
static
inline
int
NumBlocks
(
const
int
N
)
{
constexpr
int
PADDLE_OPERATORS_ROIPOOL_MAXIMUM_NUM_BLOCKS
=
4096
;
return
std
::
min
((
N
+
kNumCUDAThreads
-
1
)
/
kNumCUDAThreads
,
kNumMaxinumNumBlocks
);
}
inline
int
PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS
(
const
int
N
)
{
template
<
typename
T
>
return
std
::
min
((
N
+
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
-
1
)
__global__
void
GPUROIPoolForward
(
/
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
,
const
int
nthreads
,
const
T
*
input_data
,
const
int64_t
*
input_rois
,
PADDLE_OPERATORS_ROIPOOL_MAXIMUM_NUM_BLOCKS
);
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
}
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
T
*
output_data
,
int64_t
*
argmax_data
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
size_t
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
int
pw
=
index
%
pooled_width
;
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
template
<
typename
T
>
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
__global__
void
GPURoiPoolForward
(
int
roi_batch_ind
=
offset_input_rois
[
0
];
const
int
nthreads
,
int
roi_start_w
=
round
(
offset_input_rois
[
1
]
*
spatial_scale
);
const
T
*
input_data
,
int
roi_start_h
=
round
(
offset_input_rois
[
2
]
*
spatial_scale
);
const
int64_t
*
input_rois
,
int
roi_end_w
=
round
(
offset_input_rois
[
3
]
*
spatial_scale
);
const
float
spatial_scale
,
int
roi_end_h
=
round
(
offset_input_rois
[
4
]
*
spatial_scale
);
const
int
channels
,
const
int
height
,
int
roi_width
=
max
(
roi_end_w
-
roi_start_w
+
1
,
1
);
const
int
width
,
int
roi_height
=
max
(
roi_end_h
-
roi_start_h
+
1
,
1
);
const
int
pooled_height
,
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
const
int
pooled_width
,
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
T
*
output_data
,
int64_t
*
argmax_data
)
{
int
hstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
ph
)
*
bin_size_h
));
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
wstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
pw
)
*
bin_size_w
));
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
int
hend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
ph
+
1
)
*
bin_size_h
));
for
(
size_t
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
int
wend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
pw
+
1
)
*
bin_size_w
));
int
pw
=
index
%
pooled_width
;
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
hstart
=
min
(
max
(
hstart
+
roi_start_h
,
0
),
height
);
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
hend
=
min
(
max
(
hend
+
roi_start_h
,
0
),
height
);
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
wstart
=
min
(
max
(
wstart
+
roi_start_w
,
0
),
width
);
wend
=
min
(
max
(
wend
+
roi_start_w
,
0
),
width
);
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
5
;
bool
is_empty
=
(
hend
<=
hstart
)
||
(
wend
<=
wstart
);
int
roi_batch_ind
=
offset_input_rois
[
0
];
int
roi_start_w
=
round
(
offset_input_rois
[
1
]
*
spatial_scale
);
T
maxval
=
is_empty
?
0
:
-
std
::
numeric_limits
<
float
>::
max
();
int
roi_start_h
=
round
(
offset_input_rois
[
2
]
*
spatial_scale
);
int
maxidx
=
-
1
;
int
roi_end_w
=
round
(
offset_input_rois
[
3
]
*
spatial_scale
);
const
T
*
offset_input_data
=
int
roi_end_h
=
round
(
offset_input_rois
[
4
]
*
spatial_scale
);
input_data
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
int
roi_width
=
max
(
roi_end_w
-
roi_start_w
+
1
,
1
);
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
int
roi_height
=
max
(
roi_end_h
-
roi_start_h
+
1
,
1
);
int
input_data_index
=
h
*
width
+
w
;
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
if
(
offset_input_data
[
input_data_index
]
>
maxval
)
{
/
static_cast
<
T
>
(
pooled_height
);
maxval
=
offset_input_data
[
input_data_index
];
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
maxidx
=
input_data_index
;
/
static_cast
<
T
>
(
pooled_width
);
int
hstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
ph
)
*
bin_size_h
));
int
wstart
=
static_cast
<
int
>
(
floor
(
static_cast
<
T
>
(
pw
)
*
bin_size_w
));
int
hend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
ph
+
1
)
*
bin_size_h
));
int
wend
=
static_cast
<
int
>
(
ceil
(
static_cast
<
T
>
(
pw
+
1
)
*
bin_size_w
));
hstart
=
min
(
max
(
hstart
+
roi_start_h
,
0
),
height
);
hend
=
min
(
max
(
hend
+
roi_start_h
,
0
),
height
);
wstart
=
min
(
max
(
wstart
+
roi_start_w
,
0
),
width
);
wend
=
min
(
max
(
wend
+
roi_start_w
,
0
),
width
);
bool
is_empty
=
(
hend
<=
hstart
)
||
(
wend
<=
wstart
);
T
maxval
=
is_empty
?
0
:
-
FLT_MAX
;
int
maxidx
=
-
1
;
const
T
*
offset_input_data
=
input_data
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
int
input_data_index
=
h
*
width
+
w
;
if
(
offset_input_data
[
input_data_index
]
>
maxval
)
{
maxval
=
offset_input_data
[
input_data_index
];
maxidx
=
input_data_index
;
}
}
}
}
}
output_data
[
index
]
=
maxval
;
}
if
(
argmax_data
)
{
output_data
[
index
]
=
maxval
;
argmax_data
[
index
]
=
maxidx
;
if
(
argmax_data
)
{
}
argmax_data
[
index
]
=
maxidx
;
}
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
__global__
void
GPUR
oi
PoolBackward
(
__global__
void
GPUR
OI
PoolBackward
(
const
int
nthreads
,
const
int
nthreads
,
const
int64_t
*
input_rois
,
const
int64_t
*
input_rois
,
const
T
*
output_grad
,
const
T
*
output_grad
,
...
@@ -117,7 +106,7 @@ __global__ void GPURoiPoolBackward(
...
@@ -117,7 +106,7 @@ __global__ void GPURoiPoolBackward(
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
5
;
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
offset_input_rois
[
0
];
int
roi_batch_ind
=
offset_input_rois
[
0
];
int
input_offset
=
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
int
input_offset
=
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
int
output_offset
=
(
n
*
channels
+
c
)
*
pooled_height
*
pooled_width
;
int
output_offset
=
(
n
*
channels
+
c
)
*
pooled_height
*
pooled_width
;
...
@@ -135,11 +124,11 @@ __global__ void GPURoiPoolBackward(
...
@@ -135,11 +124,11 @@ __global__ void GPURoiPoolBackward(
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
GPUR
oi
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GPUR
OI
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
argmax
=
ctx
.
Output
<
Tensor
>
(
"Argmax"
);
auto
*
argmax
=
ctx
.
Output
<
Tensor
>
(
"Argmax"
);
...
@@ -147,31 +136,17 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
...
@@ -147,31 +136,17 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
PADDLE_ENFORCE_GT
(
pooled_height
,
0
,
"The pooled output height must greater than 0"
);
PADDLE_ENFORCE_GT
(
pooled_width
,
0
,
"The pooled output width must greater than 0"
);
PADDLE_ENFORCE_GT
(
spatial_scale
,
0
,
"The spatial scale must greater than 0"
);
auto
in_dims
=
in
->
dims
();
auto
in_dims
=
in
->
dims
();
auto
in_stride
=
framework
::
stride
(
in_dims
);
auto
in_stride
=
framework
::
stride
(
in_dims
);
int
channels
=
in_dims
[
1
];
int
channels
=
in_dims
[
1
];
int
height
=
in_dims
[
2
];
int
height
=
in_dims
[
2
];
int
width
=
in_dims
[
3
];
int
width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
size_t
rois_num
=
rois
->
dims
()[
0
];
auto
out_dims
=
in_dims
;
out_dims
[
0
]
=
rois_num
;
out_dims
[
1
]
=
in_dims
[
1
];
out_dims
[
2
]
=
pooled_height
;
out_dims
[
3
]
=
pooled_width
;
out
->
Resize
(
out_dims
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
Place
,
T
>
set_zero
;
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
ctx
.
device_context
(),
out
,
static_cast
<
T
>
(
0
));
set_zero
(
ctx
.
device_context
(),
out
,
static_cast
<
T
>
(
0
));
argmax
->
Resize
(
out
->
dims
());
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
Place
,
int64_t
>
set_init
;
math
::
SetConstant
<
Place
,
int64_t
>
set_init
;
set_init
(
ctx
.
device_context
(),
argmax
,
static_cast
<
int64_t
>
(
-
1
));
set_init
(
ctx
.
device_context
(),
argmax
,
static_cast
<
int64_t
>
(
-
1
));
...
@@ -179,10 +154,10 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
...
@@ -179,10 +154,10 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
if
(
rois_num
==
0
)
return
;
if
(
rois_num
==
0
)
return
;
int
output_size
=
out
->
numel
();
int
output_size
=
out
->
numel
();
int
blocks
=
PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS
(
output_size
);
int
blocks
=
NumBlocks
(
output_size
);
int
threads
=
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
;
int
threads
=
kNumCUDAThreads
;
GPUR
oi
PoolForward
<
T
>
GPUR
OI
PoolForward
<
T
>
<<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
<<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_size
,
output_size
,
in
->
data
<
T
>
(),
in
->
data
<
T
>
(),
...
@@ -195,17 +170,15 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
...
@@ -195,17 +170,15 @@ class GPURoiPoolOpKernel : public framework::OpKernel<T> {
pooled_width
,
pooled_width
,
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
()));
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
()));
return
;
}
}
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
GPUR
oi
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GPUR
OI
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
argmax
=
ctx
.
Input
<
Tensor
>
(
"Argmax"
);
auto
*
argmax
=
ctx
.
Input
<
Tensor
>
(
"Argmax"
);
auto
*
out_grad
=
auto
*
out_grad
=
...
@@ -217,23 +190,22 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
...
@@ -217,23 +190,22 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
in
t
rois_num
=
rois
->
dims
()[
0
];
size_
t
rois_num
=
rois
->
dims
()[
0
];
int
channels
=
in
->
dims
()[
1
];
int
channels
=
in
->
dims
()[
1
];
int
height
=
in
->
dims
()[
2
];
int
height
=
in
->
dims
()[
2
];
int
width
=
in
->
dims
()[
3
];
int
width
=
in
->
dims
()[
3
];
if
(
x_grad
)
{
if
(
x_grad
)
{
x_grad
->
Resize
(
in
->
dims
());
x_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
x_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
Place
,
T
>
set_zero
;
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
ctx
.
device_context
(),
x_grad
,
static_cast
<
T
>
(
0
));
set_zero
(
ctx
.
device_context
(),
x_grad
,
static_cast
<
T
>
(
0
));
int
output_grad_size
=
out_grad
->
numel
();
int
output_grad_size
=
out_grad
->
numel
();
int
blocks
=
PADDLE_OPERATORS_ROIPOOL_GET_BLOCKS
(
output_grad_size
);
int
blocks
=
NumBlocks
(
output_grad_size
);
int
threads
=
PADDLE_OPERATORS_ROIPOOL_CUDA_NUM_THREADS
;
int
threads
=
kNumCUDAThreads
;
if
(
output_grad_size
>
0
)
{
if
(
output_grad_size
>
0
)
{
GPUR
oi
PoolBackward
<
T
>
GPUR
OI
PoolBackward
<
T
>
<<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
<<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_grad_size
,
output_grad_size
,
rois
->
data
<
int64_t
>
(),
rois
->
data
<
int64_t
>
(),
...
@@ -248,7 +220,6 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
...
@@ -248,7 +220,6 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
pooled_width
,
pooled_width
,
x_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
x_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
}
}
return
;
}
}
}
}
};
};
...
@@ -259,7 +230,7 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
...
@@ -259,7 +230,7 @@ class GPURoiPoolGradOpKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
REGISTER_OP_GPU_KERNEL
(
roi_pool
,
roi_pool
,
ops
::
GPUR
oi
PoolOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GPUR
OI
PoolOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
REGISTER_OP_GPU_KERNEL
(
roi_pool_grad
,
roi_pool_grad
,
ops
::
GPUR
oi
PoolGradOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GPUR
OI
PoolGradOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
paddle/operators/roi_pool_op.h
浏览文件 @
ef905598
...
@@ -25,11 +25,11 @@ using LoDTensor = framework::LoDTensor;
...
@@ -25,11 +25,11 @@ using LoDTensor = framework::LoDTensor;
using
LoD
=
framework
::
LoD
;
using
LoD
=
framework
::
LoD
;
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
CPUR
oi
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CPUR
OI
PoolOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
argmax
=
ctx
.
Output
<
Tensor
>
(
"Argmax"
);
auto
*
argmax
=
ctx
.
Output
<
Tensor
>
(
"Argmax"
);
...
@@ -37,13 +37,6 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
...
@@ -37,13 +37,6 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
PADDLE_ENFORCE_GT
(
pooled_height
,
0
,
"The pooled output height must greater than 0"
);
PADDLE_ENFORCE_GT
(
pooled_width
,
0
,
"The pooled output width must greater than 0"
);
PADDLE_ENFORCE_GT
(
spatial_scale
,
0
,
"The spatial scale must greater than 0"
);
auto
in_dims
=
in
->
dims
();
auto
in_dims
=
in
->
dims
();
int
batch_size
=
in_dims
[
0
];
int
batch_size
=
in_dims
[
0
];
int
channels
=
in_dims
[
1
];
int
channels
=
in_dims
[
1
];
...
@@ -51,18 +44,10 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
...
@@ -51,18 +44,10 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
int
width
=
in_dims
[
3
];
int
width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
int
rois_num
=
rois
->
dims
()[
0
];
auto
out_dims
=
in_dims
;
out_dims
[
0
]
=
rois_num
;
out_dims
[
1
]
=
channels
;
out_dims
[
2
]
=
pooled_height
;
out_dims
[
3
]
=
pooled_width
;
out
->
Resize
(
out_dims
);
argmax
->
Resize
(
out
->
dims
());
auto
in_stride
=
framework
::
stride
(
in_dims
);
auto
in_stride
=
framework
::
stride
(
in_dims
);
auto
argmax_stride
=
framework
::
stride
(
argmax
->
dims
());
auto
argmax_stride
=
framework
::
stride
(
argmax
->
dims
());
auto
roi_stride
=
framework
::
stride
(
rois
->
dims
());
auto
roi_stride
=
framework
::
stride
(
rois
->
dims
());
auto
out_stride
=
framework
::
stride
(
out
_dims
);
auto
out_stride
=
framework
::
stride
(
out
->
dims
()
);
const
T
*
input_data
=
in
->
data
<
T
>
();
const
T
*
input_data
=
in
->
data
<
T
>
();
const
int64_t
*
rois_data
=
rois
->
data
<
int64_t
>
();
const
int64_t
*
rois_data
=
rois
->
data
<
int64_t
>
();
...
@@ -124,7 +109,8 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
...
@@ -124,7 +109,8 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
// Define an empty pooling region to be zero
// Define an empty pooling region to be zero
bool
is_empty
=
(
hend
<=
hstart
)
||
(
wend
<=
wstart
);
bool
is_empty
=
(
hend
<=
hstart
)
||
(
wend
<=
wstart
);
output_data
[
pool_index
]
=
is_empty
?
0
:
-
__FLT_MAX__
;
output_data
[
pool_index
]
=
is_empty
?
0
:
-
std
::
numeric_limits
<
float
>::
max
();
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
h
=
hstart
;
h
<
hend
;
++
h
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
for
(
int
w
=
wstart
;
w
<
wend
;
++
w
)
{
...
@@ -150,11 +136,11 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
...
@@ -150,11 +136,11 @@ class CPURoiPoolOpKernel : public framework::OpKernel<T> {
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
CPUR
oi
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CPUR
OI
PoolGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
oi
s"
);
auto
*
rois
=
ctx
.
Input
<
Tensor
>
(
"R
OI
s"
);
auto
*
argmax
=
ctx
.
Input
<
Tensor
>
(
"Argmax"
);
auto
*
argmax
=
ctx
.
Input
<
Tensor
>
(
"Argmax"
);
auto
*
out_grad
=
auto
*
out_grad
=
...
@@ -188,9 +174,9 @@ class CPURoiPoolGradOpKernel : public framework::OpKernel<T> {
...
@@ -188,9 +174,9 @@ class CPURoiPoolGradOpKernel : public framework::OpKernel<T> {
for
(
size_t
n
=
0
;
n
<
rois_num
;
++
n
)
{
for
(
size_t
n
=
0
;
n
<
rois_num
;
++
n
)
{
size_t
roi_batch_idx
=
rois_data
[
0
];
size_t
roi_batch_idx
=
rois_data
[
0
];
T
*
batch_grad_data
=
x_grad_data
+
batch_offset
*
roi_batch_idx
;
T
*
batch_grad_data
=
x_grad_data
+
batch_offset
*
roi_batch_idx
;
for
(
size_
t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
in
t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
size_
t
ph
=
0
;
ph
<
pooled_height
;
++
ph
)
{
for
(
in
t
ph
=
0
;
ph
<
pooled_height
;
++
ph
)
{
for
(
size_
t
pw
=
0
;
pw
<
pooled_width
;
++
pw
)
{
for
(
in
t
pw
=
0
;
pw
<
pooled_width
;
++
pw
)
{
size_t
pool_index
=
ph
*
pooled_width
+
pw
;
size_t
pool_index
=
ph
*
pooled_width
+
pw
;
if
(
argmax_data
[
pool_index
]
>=
0
)
{
if
(
argmax_data
[
pool_index
]
>=
0
)
{
...
...
python/paddle/v2/fluid/tests/test_roi_pool_op.py
浏览文件 @
ef905598
...
@@ -4,8 +4,7 @@ import math
...
@@ -4,8 +4,7 @@ import math
import
sys
import
sys
from
op_test
import
OpTest
from
op_test
import
OpTest
class
TestROIPoolOp
(
OpTest
):
class
TestSequenceSliceOp
(
OpTest
):
def
set_data
(
self
):
def
set_data
(
self
):
self
.
init_test_case
()
self
.
init_test_case
()
self
.
make_rois
()
self
.
make_rois
()
...
@@ -13,7 +12,7 @@ class TestSequenceSliceOp(OpTest):
...
@@ -13,7 +12,7 @@ class TestSequenceSliceOp(OpTest):
self
.
inputs
=
{
self
.
inputs
=
{
'X'
:
self
.
x
,
'X'
:
self
.
x
,
'R
oi
s'
:
self
.
rois
}
'R
OI
s'
:
self
.
rois
}
self
.
attrs
=
{
self
.
attrs
=
{
'spatial_scale'
:
self
.
spatial_scale
,
'spatial_scale'
:
self
.
spatial_scale
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录