Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
5802880b
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5802880b
编写于
11月 19, 2017
作者:
W
wanghaox
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update maxoutop for code review 3
上级
3ef776ef
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
54 addition
and
134 deletion
+54
-134
paddle/operators/math/maxouting.cc
paddle/operators/math/maxouting.cc
+15
-21
paddle/operators/math/maxouting.cu
paddle/operators/math/maxouting.cu
+30
-32
paddle/operators/math/maxouting.h
paddle/operators/math/maxouting.h
+2
-34
paddle/operators/maxout_op.cc
paddle/operators/maxout_op.cc
+5
-38
paddle/operators/maxout_op.h
paddle/operators/maxout_op.h
+2
-9
未找到文件。
paddle/operators/math/maxouting.cc
浏览文件 @
5802880b
...
...
@@ -22,23 +22,20 @@ namespace math {
* All tensors are in NCHW format.
* groups mustbe > 1
*/
template
<
typename
MaxOutProcess
,
typename
T
>
class
MaxOutFunctor
<
platform
::
CPUPlace
,
MaxOutProcess
,
T
>
{
template
<
typename
T
>
class
MaxOutFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
output
,
int
groups
,
MaxOutProcess
maxout_process
)
{
int
groups
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
output_channels
=
output
->
dims
()[
1
];
int
fea_size
=
input_height
*
input_width
;
// c_size mean
output one batch siz
e
// c_size mean
s the output size of each sampl
e
int
c_size
=
fea_size
*
output_channels
;
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
...
...
@@ -47,10 +44,11 @@ class MaxOutFunctor<platform::CPUPlace, MaxOutProcess, T> {
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
int
new_cindex
=
fea_size
*
c
;
for
(
int
f
=
0
;
f
<
fea_size
;
++
f
)
{
T
ele
=
maxout_process
.
initial
();
// T ele = maxout_process.initial();
T
ele
=
static_cast
<
T
>
(
-
FLT_MAX
);
for
(
int
ph
=
0
;
ph
<
groups
;
++
ph
)
{
maxout_process
.
compute
(
ele
,
input_data
[(
new_bindex
+
new_cindex
)
*
groups
+
ph
*
fea_size
+
f
])
;
T
x
=
input_data
[(
new_bindex
+
new_cindex
)
*
groups
+
ph
*
fea_size
+
f
];
ele
=
ele
>
x
?
ele
:
x
;
}
output_data
[(
new_bindex
+
new_cindex
+
f
)]
=
ele
;
}
...
...
@@ -74,9 +72,7 @@ public:
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_width
=
input
.
dims
()[
3
];
const
int
output_channels
=
output
.
dims
()[
1
];
int
fea_size
=
input_height
*
input_width
;
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
...
...
@@ -87,15 +83,15 @@ public:
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
int
clen
=
fea_size
*
c
;
for
(
int
f
=
0
;
f
<
fea_size
;
++
f
)
{
int
input_idx
=
0
;
bool
stop
=
fals
e
;
int
input_idx
0
=
(
blen
+
clen
)
*
groups
+
f
;
bool
continue_match
=
tru
e
;
int
output_idx
=
blen
+
clen
+
f
;
for
(
int
g
=
0
;
g
<
groups
&&
!
stop
;
++
g
)
{
in
put_idx
=
(
blen
+
clen
)
*
groups
+
fea_size
*
g
+
f
;
for
(
int
g
=
0
;
g
<
groups
&&
continue_match
;
++
g
)
{
in
t
input_idx
=
input_idx0
+
fea_size
*
g
;
input_grad_data
[
input_idx
]
=
0
;
if
(
input_data
[
input_idx
]
==
output_data
[
output_idx
])
{
input_grad_data
[
input_idx
]
+=
output_grad_data
[
output_idx
];
stop
=
tru
e
;
continue_match
=
fals
e
;
}
}
}
...
...
@@ -106,10 +102,8 @@ public:
template
class
MaxOutGradFunctor
<
platform
::
CPUPlace
,
float
>;
template
class
MaxOutGradFunctor
<
platform
::
CPUPlace
,
double
>;
template
class
MaxOutFunctor
<
platform
::
CPUPlace
,
math
::
MaxOut
<
float
>,
float
>
;
template
class
MaxOutFunctor
<
platform
::
CPUPlace
,
math
::
MaxOut
<
double
>,
double
>
;
template
class
MaxOutFunctor
<
platform
::
CPUPlace
,
float
>;
template
class
MaxOutFunctor
<
platform
::
CPUPlace
,
double
>;
}
// namespace math
}
// namespace operators
...
...
paddle/operators/math/maxouting.cu
浏览文件 @
5802880b
...
...
@@ -19,27 +19,28 @@ namespace paddle {
namespace
operators
{
namespace
math
{
template
<
typename
MaxOutProcess
,
typename
T
>
template
<
typename
T
>
__global__
void
KernelMaxOut
(
const
int
nthreads
,
const
T
*
input_data
,
const
int
channels
,
const
int
input_height
,
const
int
input_width
,
int
groups
,
T
*
output_data
,
MaxOutProcess
maxout_process
)
{
int
groups
,
T
*
output_data
)
{
const
int
size
=
input_height
*
input_width
*
channels
/
groups
;
const
int
feat_len
=
input_height
*
input_width
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
batch_idx
=
index
/
size
;
int
batch_offset
=
index
%
size
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
int
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
int
batch_idx
=
i
/
size
;
int
batch_offset
=
i
%
size
;
int
channel_idx
=
batch_offset
/
feat_len
;
int
feat_idx
=
batch_offset
%
feat_len
;
int
data_idx
=
(
batch_idx
*
size
+
channel_idx
*
feat_len
)
*
groups
+
feat_idx
;
T
ele
=
maxout_process
.
initial
(
);
T
ele
=
static_cast
<
T
>
(
-
FLT_MAX
);
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
maxout_process
.
compute
(
ele
,
input_data
[
data_idx
+
g
*
feat_len
]);
T
x
=
input_data
[
data_idx
+
g
*
feat_len
];
ele
=
ele
>
x
?
ele
:
x
;
}
output_data
[
i
ndex
]
=
ele
;
output_data
[
i
]
=
ele
;
}
}
template
<
typename
T
>
...
...
@@ -49,38 +50,38 @@ __global__ void KernelMaxoutGrad(
const
int
input_height
,
const
int
input_width
,
int
groups
)
{
const
int
size
=
input_height
*
input_width
*
channels
/
groups
;
const
int
feat_len
=
input_height
*
input_width
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
nthreads
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
batch_idx
=
index
/
size
;
int
batch_offset
=
index
%
size
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
int
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
int
batch_idx
=
i
/
size
;
int
batch_offset
=
i
%
size
;
int
channel_idx
=
batch_offset
/
feat_len
;
int
feat_idx
=
batch_offset
%
feat_len
;
int
data_idx
=
(
batch_idx
*
size
+
channel_idx
*
feat_len
)
*
groups
+
feat_idx
;
int
max
I
ndex
=
-
1
;
bool
stop
=
fals
e
;
for
(
int
g
=
0
;
g
<
groups
&&
!
stop
;
++
g
)
{
if
(
input_data
[
data_idx
+
g
*
feat_len
]
==
output_data
[
i
ndex
])
{
max
I
ndex
=
data_idx
+
g
*
feat_len
;
stop
=
tru
e
;
int
max
_i
ndex
=
-
1
;
bool
continue_match
=
tru
e
;
for
(
int
g
=
0
;
g
<
groups
&&
continue_match
;
++
g
)
{
if
(
input_data
[
data_idx
+
g
*
feat_len
]
==
output_data
[
i
])
{
max
_i
ndex
=
data_idx
+
g
*
feat_len
;
continue_match
=
fals
e
;
}
}
if
(
max
I
ndex
!=
-
1
)
{
if
(
max
_i
ndex
!=
-
1
)
{
// atomic add
platform
::
CudaAtomicAdd
(
input_grad
+
max
I
ndex
,
output_grad
[
index
]);
platform
::
CudaAtomicAdd
(
input_grad
+
max
_i
ndex
,
output_grad
[
index
]);
}
}
}
/*
* All tensors are in NCHW format.
*/
template
<
typename
MaxOutProcess
,
typename
T
>
class
MaxOutFunctor
<
platform
::
GPUPlace
,
MaxOutProcess
,
T
>
{
template
<
typename
T
>
class
MaxOutFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
output
,
int
groups
,
MaxOutProcess
maxout_process
)
{
int
groups
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_height
=
input
.
dims
()[
2
];
...
...
@@ -97,12 +98,11 @@ class MaxOutFunctor<platform::GPUPlace, MaxOutProcess, T> {
dim3
grid
(
blocks
,
1
);
KernelMaxOut
<
MaxOutProcess
,
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
nthreads
,
input_data
,
input_channels
,
input_height
,
input_width
,
groups
,
output_data
,
maxout_process
);
output_data
);
}
};
/*
...
...
@@ -145,10 +145,8 @@ class MaxOutGradFunctor<platform::GPUPlace, T> {
template
class
MaxOutGradFunctor
<
platform
::
GPUPlace
,
float
>;
template
class
MaxOutGradFunctor
<
platform
::
GPUPlace
,
double
>;
template
class
MaxOutFunctor
<
platform
::
GPUPlace
,
math
::
MaxOut
<
float
>,
float
>
;
template
class
MaxOutFunctor
<
platform
::
GPUPlace
,
math
::
MaxOut
<
double
>,
double
>
;
template
class
MaxOutFunctor
<
platform
::
GPUPlace
,
float
>;
template
class
MaxOutFunctor
<
platform
::
GPUPlace
,
double
>;
}
// namespace math
}
// namespace operators
...
...
paddle/operators/math/maxouting.h
浏览文件 @
5802880b
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/hostdevice.h"
...
...
@@ -22,42 +21,18 @@ namespace paddle {
namespace
operators
{
namespace
math
{
#define FLT_MAX \
__FLT_MAX__
/*
* \brief Extracting simple operations from maxout.
* need "initial", "compute"
* operation.
*/
template
<
class
T
>
class
MaxOut
{
public:
DEVICE
inline
T
initial
()
{
return
static_cast
<
T
>
(
-
FLT_MAX
);
}
DEVICE
inline
void
compute
(
T
&
y
,
const
T
&
x
)
{
y
=
y
>
x
?
y
:
x
;
}
};
template
<
class
T
>
class
MaxOutGrad
{
public:
DEVICE
inline
void
compute
(
const
T
&
x
,
const
T
&
y
,
const
T
&
dy
,
T
&
dx
,
T
scale
)
{
dx
+=
dy
*
(
x
==
y
);
}
};
template
<
typename
Place
,
typename
MaxOutProcess
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
MaxOutFunctor
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
output
,
int
groups
,
MaxOutProcess
maxout_compute
);
int
groups
);
};
template
<
typename
Place
,
class
T
>
class
MaxOutGradFunctor
{
public:
...
...
@@ -67,13 +42,6 @@ class MaxOutGradFunctor {
const
framework
::
Tensor
&
output
,
const
framework
::
Tensor
&
output_grad
,
int
groups
);
};
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/operators/maxout_op.cc
浏览文件 @
5802880b
...
...
@@ -12,7 +12,6 @@
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/operators/maxout_op.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -33,18 +32,18 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
"Where N is batch size, C is "
"the number of channels, H and W is the height and "
"width of feature."
);
AddAttr
<
int
>
(
"groups"
,
R"DOC(The group number of input layer.
)DOC"
);
AddComment
(
R"DOC(
- Input: NCHW.
- Output: feature map size same as input. Channel is (input channel) / groups.
- Output: The feature map size of output is the same as the input.
The output_channel is (input channel) / groups
So groups should be larger than 1, and the num of channels should be able
to devided by groups.
to
be
devided by groups.
.. math:
:
math
:
y_{si+j} = \max_k x_{gsi + sk + j}
g = groups
s = input.size / num_channels
...
...
@@ -57,29 +56,6 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
- Multi-digit Number Recognition from Street View \
Imagery using Deep Convolutional Neural Networks: \
https://arxiv.org/pdf/1312.6082v4.pdf
The simple usage is:
.. code-block:: python
maxout = maxout_layer(input,
num_channels=128,
groups=4)
:param input: The input of this layer.
:type input: LayerOutput
:param num_channels: The channel number of input layer. If None will be set
automatically from previous output.
:type num_channels: int | None
:param groups: The group number of input layer.
:type groups: int
:param name: The name of this layer. It is optional.
:type name: None | basestring.
:param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
)DOC"
);
}
};
...
...
@@ -88,7 +64,6 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
class
MaxOutOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of maxoutOp"
"should not be null."
);
...
...
@@ -96,26 +71,20 @@ class MaxOutOp : public framework::OperatorWithKernel {
"Output(Out) of maxoutOp should not be null."
);
auto
in_x_dims
=
ctx
->
GetInputDim
(
"X"
);
int
groups
=
ctx
->
Attrs
().
Get
<
int
>
(
"groups"
);
// check groups > 1
PADDLE_ENFORCE_GT
(
groups
,
1
,
"in maxoutop groups should be larger than 1"
);
"groups should be larger than 1 in maxoutop"
);
std
::
vector
<
int64_t
>
output_shape
({
in_x_dims
[
0
],
in_x_dims
[
1
]
/
groups
});
output_shape
.
push_back
(
in_x_dims
[
2
]);
output_shape
.
push_back
(
in_x_dims
[
3
]);
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
}
};
class
MaxOutOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) must not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)),
...
...
@@ -129,8 +98,6 @@ class MaxOutOpGrad : public framework::OperatorWithKernel {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
maxout
,
ops
::
MaxOutOp
,
ops
::
MaxOutOpMaker
,
maxout_grad
,
ops
::
MaxOutOpGrad
);
REGISTER_OP_CPU_KERNEL
(
maxout
,
ops
::
MaxOutKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
maxout_grad
,
...
...
paddle/operators/maxout_op.h
浏览文件 @
5802880b
...
...
@@ -29,16 +29,12 @@ class MaxOutKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
Tensor
*
in_x
=
context
.
Input
<
Tensor
>
(
"X"
);
Tensor
*
out
=
context
.
Output
<
Tensor
>
(
"Out"
);
int
groups
=
context
.
template
Attr
<
int
>(
"groups"
);
paddle
::
operators
::
math
::
MaxOutFunctor
<
Place
,
paddle
::
operators
::
math
::
MaxOut
<
T
>
,
T
>
Place
,
T
>
maxout_forward
;
paddle
::
operators
::
math
::
MaxOut
<
T
>
maxout_process
;
maxout_forward
(
context
.
device_context
(),
*
in_x
,
out
,
groups
,
maxout_process
);
maxout_forward
(
context
.
device_context
(),
*
in_x
,
out
,
groups
);
}
};
...
...
@@ -51,15 +47,12 @@ class MaxOutGradKernel : public framework::OpKernel<T> {
const
Tensor
*
out_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
Tensor
*
in_x_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
int
groups
=
context
.
template
Attr
<
int
>(
"groups"
);
auto
&
device_ctx
=
context
.
device_context
();
math
::
SetConstant
<
Place
,
T
>
zero
;
if
(
in_x_grad
)
{
in_x_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
zero
(
device_ctx
,
in_x_grad
,
static_cast
<
T
>
(
0.0
));
paddle
::
operators
::
math
::
MaxOutGradFunctor
<
Place
,
T
>
maxout_backward
;
maxout_backward
(
context
.
device_context
(),
*
in_x
,
*
in_x_grad
,
*
out
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录