Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
5802880b
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5802880b
编写于
11月 19, 2017
作者:
W
wanghaox
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update maxoutop for code review 3
上级
3ef776ef
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
54 addition
and
134 deletion
+54
-134
paddle/operators/math/maxouting.cc
paddle/operators/math/maxouting.cc
+15
-21
paddle/operators/math/maxouting.cu
paddle/operators/math/maxouting.cu
+30
-32
paddle/operators/math/maxouting.h
paddle/operators/math/maxouting.h
+2
-34
paddle/operators/maxout_op.cc
paddle/operators/maxout_op.cc
+5
-38
paddle/operators/maxout_op.h
paddle/operators/maxout_op.h
+2
-9
未找到文件。
paddle/operators/math/maxouting.cc
浏览文件 @
5802880b
...
...
@@ -22,23 +22,20 @@ namespace math {
* All tensors are in NCHW format.
* groups mustbe > 1
*/
template
<
typename
MaxOutProcess
,
typename
T
>
class
MaxOutFunctor
<
platform
::
CPUPlace
,
MaxOutProcess
,
T
>
{
template
<
typename
T
>
class
MaxOutFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
output
,
int
groups
,
MaxOutProcess
maxout_process
)
{
int
groups
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
output_channels
=
output
->
dims
()[
1
];
int
fea_size
=
input_height
*
input_width
;
// c_size mean
output one batch siz
e
// c_size mean
s the output size of each sampl
e
int
c_size
=
fea_size
*
output_channels
;
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
...
...
@@ -47,10 +44,11 @@ class MaxOutFunctor<platform::CPUPlace, MaxOutProcess, T> {
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
int
new_cindex
=
fea_size
*
c
;
for
(
int
f
=
0
;
f
<
fea_size
;
++
f
)
{
T
ele
=
maxout_process
.
initial
();
// T ele = maxout_process.initial();
T
ele
=
static_cast
<
T
>
(
-
FLT_MAX
);
for
(
int
ph
=
0
;
ph
<
groups
;
++
ph
)
{
maxout_process
.
compute
(
ele
,
input_data
[(
new_bindex
+
new_cindex
)
*
groups
+
ph
*
fea_size
+
f
])
;
T
x
=
input_data
[(
new_bindex
+
new_cindex
)
*
groups
+
ph
*
fea_size
+
f
];
ele
=
ele
>
x
?
ele
:
x
;
}
output_data
[(
new_bindex
+
new_cindex
+
f
)]
=
ele
;
}
...
...
@@ -74,9 +72,7 @@ public:
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
output_channels
=
output
.
dims
()[
1
];
int
fea_size
=
input_height
*
input_width
;
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
...
...
@@ -87,15 +83,15 @@ public:
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
int
clen
=
fea_size
*
c
;
for
(
int
f
=
0
;
f
<
fea_size
;
++
f
)
{
int
input_idx
=
0
;
bool
stop
=
fals
e
;
int
input_idx
0
=
(
blen
+
clen
)
*
groups
+
f
;
bool
continue_match
=
tru
e
;
int
output_idx
=
blen
+
clen
+
f
;
for
(
int
g
=
0
;
g
<
groups
&&
!
stop
;
++
g
)
{
in
put_idx
=
(
blen
+
clen
)
*
groups
+
fea_size
*
g
+
f
;
for
(
int
g
=
0
;
g
<
groups
&&
continue_match
;
++
g
)
{
in
t
input_idx
=
input_idx0
+
fea_size
*
g
;
input_grad_data
[
input_idx
]
=
0
;
if
(
input_data
[
input_idx
]
==
output_data
[
output_idx
])
{
input_grad_data
[
input_idx
]
+=
output_grad_data
[
output_idx
];
stop
=
tru
e
;
continue_match
=
fals
e
;
}
}
}
...
...
@@ -106,10 +102,8 @@ public:
template
class
MaxOutGradFunctor
<
platform
::
CPUPlace
,
float
>;
template
class
MaxOutGradFunctor
<
platform
::
CPUPlace
,
double
>;
template
class
MaxOutFunctor
<
platform
::
CPUPlace
,
math
::
MaxOut
<
float
>,
float
>
;
template
class
MaxOutFunctor
<
platform
::
CPUPlace
,
math
::
MaxOut
<
double
>,
double
>
;
template
class
MaxOutFunctor
<
platform
::
CPUPlace
,
float
>;
template
class
MaxOutFunctor
<
platform
::
CPUPlace
,
double
>;
}
// namespace math
}
// namespace operators
...
...
paddle/operators/math/maxouting.cu
浏览文件 @
5802880b
...
...
@@ -19,27 +19,28 @@ namespace paddle {
namespace
operators
{
namespace
math
{
template
<
typename
MaxOutProcess
,
typename
T
>
template
<
typename
T
>
__global__
void
KernelMaxOut
(
const
int
nthreads
,
const
T
*
input_data
,
const
int
channels
,
const
int
input_height
,
const
int
input_width
,
int
groups
,
T
*
output_data
,
MaxOutProcess
maxout_process
)
{
int
groups
,
T
*
output_data
)
{
const
int
size
=
input_height
*
input_width
*
channels
/
groups
;
const
int
feat_len
=
input_height
*
input_width
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
batch_idx
=
index
/
size
;
int
batch_offset
=
index
%
size
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
int
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
int
batch_idx
=
i
/
size
;
int
batch_offset
=
i
%
size
;
int
channel_idx
=
batch_offset
/
feat_len
;
int
feat_idx
=
batch_offset
%
feat_len
;
int
data_idx
=
(
batch_idx
*
size
+
channel_idx
*
feat_len
)
*
groups
+
feat_idx
;
T
ele
=
maxout_process
.
initial
(
);
T
ele
=
static_cast
<
T
>
(
-
FLT_MAX
);
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
maxout_process
.
compute
(
ele
,
input_data
[
data_idx
+
g
*
feat_len
]);
T
x
=
input_data
[
data_idx
+
g
*
feat_len
];
ele
=
ele
>
x
?
ele
:
x
;
}
output_data
[
i
ndex
]
=
ele
;
output_data
[
i
]
=
ele
;
}
}
template
<
typename
T
>
...
...
@@ -49,38 +50,38 @@ __global__ void KernelMaxoutGrad(
const
int
input_height
,
const
int
input_width
,
int
groups
)
{
const
int
size
=
input_height
*
input_width
*
channels
/
groups
;
const
int
feat_len
=
input_height
*
input_width
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
batch_idx
=
index
/
size
;
int
batch_offset
=
index
%
size
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
int
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
int
batch_idx
=
i
/
size
;
int
batch_offset
=
i
%
size
;
int
channel_idx
=
batch_offset
/
feat_len
;
int
feat_idx
=
batch_offset
%
feat_len
;
int
data_idx
=
(
batch_idx
*
size
+
channel_idx
*
feat_len
)
*
groups
+
feat_idx
;
int
max
I
ndex
=
-
1
;
bool
stop
=
fals
e
;
for
(
int
g
=
0
;
g
<
groups
&&
!
stop
;
++
g
)
{
if
(
input_data
[
data_idx
+
g
*
feat_len
]
==
output_data
[
i
ndex
])
{
max
I
ndex
=
data_idx
+
g
*
feat_len
;
stop
=
tru
e
;
int
max
_i
ndex
=
-
1
;
bool
continue_match
=
tru
e
;
for
(
int
g
=
0
;
g
<
groups
&&
continue_match
;
++
g
)
{
if
(
input_data
[
data_idx
+
g
*
feat_len
]
==
output_data
[
i
])
{
max
_i
ndex
=
data_idx
+
g
*
feat_len
;
continue_match
=
fals
e
;
}
}
if
(
max
I
ndex
!=
-
1
)
{
if
(
max
_i
ndex
!=
-
1
)
{
// atomic add
platform
::
CudaAtomicAdd
(
input_grad
+
max
I
ndex
,
output_grad
[
index
]);
platform
::
CudaAtomicAdd
(
input_grad
+
max
_i
ndex
,
output_grad
[
index
]);
}
}
}
/*
* All tensors are in NCHW format.
*/
template
<
typename
MaxOutProcess
,
typename
T
>
class
MaxOutFunctor
<
platform
::
GPUPlace
,
MaxOutProcess
,
T
>
{
template
<
typename
T
>
class
MaxOutFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
output
,
int
groups
,
MaxOutProcess
maxout_process
)
{
int
groups
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_height
=
input
.
dims
()[
2
];
...
...
@@ -97,12 +98,11 @@ class MaxOutFunctor<platform::GPUPlace, MaxOutProcess, T> {
dim3
grid
(
blocks
,
1
);
KernelMaxOut
<
MaxOutProcess
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
nthreads
,
input_data
,
input_channels
,
input_height
,
input_width
,
groups
,
output_data
,
maxout_process
);
output_data
);
}
};
/*
...
...
@@ -145,10 +145,8 @@ class MaxOutGradFunctor<platform::GPUPlace, T> {
template
class
MaxOutGradFunctor
<
platform
::
GPUPlace
,
float
>;
template
class
MaxOutGradFunctor
<
platform
::
GPUPlace
,
double
>;
template
class
MaxOutFunctor
<
platform
::
GPUPlace
,
math
::
MaxOut
<
float
>,
float
>
;
template
class
MaxOutFunctor
<
platform
::
GPUPlace
,
math
::
MaxOut
<
double
>,
double
>
;
template
class
MaxOutFunctor
<
platform
::
GPUPlace
,
float
>;
template
class
MaxOutFunctor
<
platform
::
GPUPlace
,
double
>;
}
// namespace math
}
// namespace operators
...
...
paddle/operators/math/maxouting.h
浏览文件 @
5802880b
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/hostdevice.h"
...
...
@@ -22,42 +21,18 @@ namespace paddle {
namespace
operators
{
namespace
math
{
#define FLT_MAX \
__FLT_MAX__
/*
* \brief Extracting simple operations from maxout.
* need "initial", "compute"
* operation.
*/
template
<
class
T
>
class
MaxOut
{
public:
DEVICE
inline
T
initial
()
{
return
static_cast
<
T
>
(
-
FLT_MAX
);
}
DEVICE
inline
void
compute
(
T
&
y
,
const
T
&
x
)
{
y
=
y
>
x
?
y
:
x
;
}
};
template
<
class
T
>
class
MaxOutGrad
{
public:
DEVICE
inline
void
compute
(
const
T
&
x
,
const
T
&
y
,
const
T
&
dy
,
T
&
dx
,
T
scale
)
{
dx
+=
dy
*
(
x
==
y
);
}
};
template
<
typename
Place
,
typename
MaxOutProcess
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
MaxOutFunctor
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
output
,
int
groups
,
MaxOutProcess
maxout_compute
);
int
groups
);
};
template
<
typename
Place
,
class
T
>
class
MaxOutGradFunctor
{
public:
...
...
@@ -67,13 +42,6 @@ class MaxOutGradFunctor {
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
int
groups
);
};
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/operators/maxout_op.cc
浏览文件 @
5802880b
...
...
@@ -12,7 +12,6 @@
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/operators/maxout_op.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -33,18 +32,18 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
"Where N is batch size, C is "
"the number of channels, H and W is the height and "
"width of feature."
);
AddAttr
<
int
>
(
"groups"
,
R"DOC(The group number of input layer.
)DOC"
);
AddComment
(
R"DOC(
- Input: NCHW.
- Output: feature map size same as input. Channel is (input channel) / groups.
- Output: The feature map size of output is the same as the input.
The output_channel is (input channel) / groups
So groups should be larger than 1, and the num of channels should be able
to devided by groups.
to
be
devided by groups.
.. math:
:
math
:
y_{si+j} = \max_k x_{gsi + sk + j}
g = groups
s = input.size / num_channels
...
...
@@ -57,29 +56,6 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
- Multi-digit Number Recognition from Street View \
Imagery using Deep Convolutional Neural Networks: \
https://arxiv.org/pdf/1312.6082v4.pdf
The simple usage is:
.. code-block:: python
maxout = maxout_layer(input,
num_channels=128,
groups=4)
:param input: The input of this layer.
:type input: LayerOutput
:param num_channels: The channel number of input layer. If None will be set
automatically from previous output.
:type num_channels: int | None
:param groups: The group number of input layer.
:type groups: int
:param name: The name of this layer. It is optional.
:type name: None | basestring.
:param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
)DOC"
);
}
};
...
...
@@ -88,7 +64,6 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
class
MaxOutOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of maxoutOp"
"should not be null."
);
...
...
@@ -96,26 +71,20 @@ class MaxOutOp : public framework::OperatorWithKernel {
"Output(Out) of maxoutOp should not be null."
);
auto
in_x_dims
=
ctx
->
GetInputDim
(
"X"
);
int
groups
=
ctx
->
Attrs
().
Get
<
int
>
(
"groups"
);
// check groups > 1
PADDLE_ENFORCE_GT
(
groups
,
1
,
"in maxoutop groups should be larger than 1"
);
"groups should be larger than 1 in maxoutop"
);
std
::
vector
<
int64_t
>
output_shape
({
in_x_dims
[
0
],
in_x_dims
[
1
]
/
groups
});
output_shape
.
push_back
(
in_x_dims
[
2
]);
output_shape
.
push_back
(
in_x_dims
[
3
]);
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
}
};
class
MaxOutOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) must not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)),
...
...
@@ -129,8 +98,6 @@ class MaxOutOpGrad : public framework::OperatorWithKernel {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
maxout
,
ops
::
MaxOutOp
,
ops
::
MaxOutOpMaker
,
maxout_grad
,
ops
::
MaxOutOpGrad
);
REGISTER_OP_CPU_KERNEL
(
maxout
,
ops
::
MaxOutKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
maxout_grad
,
...
...
paddle/operators/maxout_op.h
浏览文件 @
5802880b
...
...
@@ -29,16 +29,12 @@ class MaxOutKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
Tensor
*
in_x
=
context
.
Input
<
Tensor
>
(
"X"
);
Tensor
*
out
=
context
.
Output
<
Tensor
>
(
"Out"
);
int
groups
=
context
.
template
Attr
<
int
>(
"groups"
);
paddle
::
operators
::
math
::
MaxOutFunctor
<
Place
,
paddle
::
operators
::
math
::
MaxOut
<
T
>
,
T
>
Place
,
T
>
maxout_forward
;
paddle
::
operators
::
math
::
MaxOut
<
T
>
maxout_process
;
maxout_forward
(
context
.
device_context
(),
*
in_x
,
out
,
groups
,
maxout_process
);
maxout_forward
(
context
.
device_context
(),
*
in_x
,
out
,
groups
);
}
};
...
...
@@ -51,15 +47,12 @@ class MaxOutGradKernel : public framework::OpKernel<T> {
const
Tensor
*
out_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
Tensor
*
in_x_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
int
groups
=
context
.
template
Attr
<
int
>(
"groups"
);
auto
&
device_ctx
=
context
.
device_context
();
math
::
SetConstant
<
Place
,
T
>
zero
;
if
(
in_x_grad
)
{
in_x_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
zero
(
device_ctx
,
in_x_grad
,
static_cast
<
T
>
(
0.0
));
paddle
::
operators
::
math
::
MaxOutGradFunctor
<
Place
,
T
>
maxout_backward
;
maxout_backward
(
context
.
device_context
(),
*
in_x
,
*
in_x_grad
,
*
out
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录