Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
273f7375
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
273f7375
编写于
7月 22, 2018
作者:
Q
qiaolongfei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimize code
上级
5d718a58
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
112 addition
and
40 deletion
+112
-40
paddle/fluid/operators/reduce_op.h
paddle/fluid/operators/reduce_op.h
+0
-29
paddle/fluid/operators/reduce_sum_op.cc
paddle/fluid/operators/reduce_sum_op.cc
+10
-9
paddle/fluid/operators/reduce_sum_op.h
paddle/fluid/operators/reduce_sum_op.h
+58
-0
python/paddle/fluid/tests/unittests/test_reduce_op.py
python/paddle/fluid/tests/unittests/test_reduce_op.py
+44
-2
未找到文件。
paddle/fluid/operators/reduce_op.h
浏览文件 @
273f7375
...
@@ -88,35 +88,6 @@ class ReduceGradKernel : public framework::OpKernel<T> {
...
@@ -88,35 +88,6 @@ class ReduceGradKernel : public framework::OpKernel<T> {
auto
*
output
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
output
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
if
(
context
.
GetPlace
().
type
()
==
typeid
(
platform
::
CPUPlace
))
{
const
auto
*
input2_d
=
input2
->
data
<
T
>
();
auto
*
output_d
=
output
->
data
<
T
>
();
// CPU reduce_all_grad
if
(
reduce_all
)
{
PADDLE_ENFORCE
(
input2
->
dims
().
size
()
==
1
&&
input2
->
dims
()[
0
]
==
1
,
"output should be a scalar"
);
for
(
int64_t
i
=
0
;
i
<
framework
::
product
(
input0
->
dims
());
++
i
)
{
output_d
[
i
]
=
input2_d
[
0
];
}
return
;
}
if
(
input0
->
dims
().
size
()
==
2
&&
dims
.
size
()
==
1
)
{
auto
&
input_dim
=
input0
->
dims
();
for
(
int64_t
i
=
0
;
i
<
input_dim
[
0
];
++
i
)
{
for
(
int64_t
j
=
0
;
j
<
input_dim
[
1
];
++
j
)
{
if
(
dims
[
0
]
==
0
)
{
output_d
[
i
*
input_dim
[
1
]
+
j
]
=
input2_d
[
j
];
}
else
{
output_d
[
i
*
input_dim
[
1
]
+
j
]
=
input2_d
[
i
];
}
}
}
return
;
}
}
if
(
reduce_all
)
{
if
(
reduce_all
)
{
auto
x
=
EigenVector
<
T
>::
Flatten
(
*
input0
);
auto
x
=
EigenVector
<
T
>::
Flatten
(
*
input0
);
auto
x_reduce
=
EigenVector
<
T
>::
From
(
*
input1
);
auto
x_reduce
=
EigenVector
<
T
>::
From
(
*
input1
);
...
...
paddle/fluid/operators/reduce_sum_op.cc
浏览文件 @
273f7375
...
@@ -23,12 +23,13 @@ REGISTER_OP_CPU_KERNEL(
...
@@ -23,12 +23,13 @@ REGISTER_OP_CPU_KERNEL(
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
SumFunctor
>
);
ops
::
SumFunctor
>
);
REGISTER_OP_CPU_KERNEL
(
reduce_sum_grad
,
REGISTER_OP_CPU_KERNEL
(
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
reduce_sum_grad
,
float
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceSumGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SumGradFunctor
>
,
double
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceSumGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SumGradFunctor
>
,
int
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceSumGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
ops
::
SumGradFunctor
>
,
int64_t
,
ops
::
SumGradFunctor
>
);
ops
::
ReduceSumGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
SumGradFunctor
>
);
paddle/fluid/operators/reduce_sum_op.h
浏览文件 @
273f7375
...
@@ -14,11 +14,69 @@
...
@@ -14,11 +14,69 @@
#pragma once
#pragma once
#include <vector>
#include "paddle/fluid/operators/reduce_op.h"
#include "paddle/fluid/operators/reduce_op.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
// use for loop to speed up Eigen broadcast. 4 timer faster then broadcast
template
<
typename
DeviceContext
,
typename
T
,
typename
Functor
>
class
ReduceSumGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
dims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
if
(
context
.
GetPlace
().
type
()
==
typeid
(
platform
::
CPUPlace
)
&&
dims
.
size
()
==
1
)
{
auto
*
input0
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
input2
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
output
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
const
auto
*
input2_d
=
input2
->
data
<
T
>
();
auto
*
output_d
=
output
->
data
<
T
>
();
// handle reduce_all
if
(
input2
->
dims
().
size
()
==
1
&&
input2
->
dims
()[
0
]
==
1
)
{
for
(
int64_t
i
=
0
;
i
<
framework
::
product
(
input0
->
dims
());
++
i
)
{
output_d
[
i
]
=
input2_d
[
0
];
}
return
;
}
// handle reduce by one dimension
int
reduce_dim_index
=
dims
[
0
];
if
(
reduce_dim_index
<
0
)
{
reduce_dim_index
+=
input0
->
dims
().
size
();
}
auto
&
input_dim
=
input0
->
dims
();
int64_t
before_dim
=
1
;
for
(
int
i
=
0
;
i
<
reduce_dim_index
;
++
i
)
{
before_dim
*=
input_dim
[
i
];
}
int64_t
reduce_dim
=
input_dim
[
reduce_dim_index
];
int64_t
after_dim
=
1
;
for
(
int
i
=
reduce_dim_index
+
1
;
i
<
input_dim
.
size
();
++
i
)
{
after_dim
*=
input_dim
[
i
];
}
for
(
int64_t
i
=
0
;
i
<
before_dim
;
++
i
)
{
for
(
int64_t
j
=
0
;
j
<
reduce_dim
;
++
j
)
{
for
(
int64_t
k
=
0
;
k
<
after_dim
;
++
k
)
{
output_d
[
i
*
reduce_dim
*
after_dim
+
j
*
after_dim
+
k
]
=
input2_d
[
i
*
after_dim
+
k
];
}
}
}
return
;
}
// default use Eigen broadcast
ReduceGradKernel
<
DeviceContext
,
T
,
Functor
>
kernel
;
kernel
.
Compute
(
context
);
}
};
struct
SumFunctor
{
struct
SumFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
...
...
python/paddle/fluid/tests/unittests/test_reduce_op.py
浏览文件 @
273f7375
...
@@ -115,14 +115,56 @@ class Test2DReduce1(Test1DReduce):
...
@@ -115,14 +115,56 @@ class Test2DReduce1(Test1DReduce):
self
.
op_type
=
"reduce_sum"
self
.
op_type
=
"reduce_sum"
self
.
attrs
=
{
'dim'
:
[
1
]}
self
.
attrs
=
{
'dim'
:
[
1
]}
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
20
,
10
)).
astype
(
"float64"
)}
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
20
,
10
)).
astype
(
"float64"
)}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
sum
(
axis
=
1
)}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))
}
class
Test3DReduce0
(
Test1DReduce
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
attrs
=
{
'dim'
:
[
1
]}
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
5
,
6
,
7
)).
astype
(
"float64"
)}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))
}
class
Test3DReduce1
(
Test1DReduce
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
attrs
=
{
'dim'
:
[
2
]}
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
5
,
6
,
7
)).
astype
(
"float64"
)}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))
}
class
Test3DReduce2
(
Test1DReduce
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
attrs
=
{
'dim'
:
[
-
2
]}
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
5
,
6
,
7
)).
astype
(
"float64"
)}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))
}
class
Test3DReduce3
(
Test1DReduce
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
attrs
=
{
'dim'
:
[
1
,
2
]}
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
5
,
6
,
7
)).
astype
(
"float64"
)}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))
}
class
TestKeepDimReduce
(
Test1DReduce
):
class
TestKeepDimReduce
(
Test1DReduce
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
op_type
=
"reduce_sum"
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
5
,
6
,
10
)).
astype
(
"float64"
)}
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
5
,
6
,
10
)).
astype
(
"float64"
)}
self
.
attrs
=
{
'dim'
:
[
-
2
],
'keep_dim'
:
True
}
self
.
attrs
=
{
'dim'
:
[
1
],
'keep_dim'
:
True
}
self
.
outputs
=
{
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]),
'Out'
:
self
.
inputs
[
'X'
].
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]),
keepdims
=
self
.
attrs
[
'keep_dim'
])
keepdims
=
self
.
attrs
[
'keep_dim'
])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录