Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e43c8f33
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e43c8f33
编写于
5月 17, 2018
作者:
T
Tomasz Patejko
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
MKL elementwise add: elementwise_add uses vAdd VML function when MKL is used
上级
174d884d
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
80 addition
and
9 deletion
+80
-9
paddle/fluid/operators/elementwise_add_op.cc
paddle/fluid/operators/elementwise_add_op.cc
+4
-4
paddle/fluid/operators/elementwise_add_op.h
paddle/fluid/operators/elementwise_add_op.h
+18
-5
paddle/fluid/operators/math/blas.h
paddle/fluid/operators/math/blas.h
+16
-0
paddle/fluid/operators/math/blas_impl.h
paddle/fluid/operators/math/blas_impl.h
+42
-0
未找到文件。
paddle/fluid/operators/elementwise_add_op.cc
浏览文件 @
e43c8f33
...
@@ -18,10 +18,10 @@ namespace ops = paddle::operators;
...
@@ -18,10 +18,10 @@ namespace ops = paddle::operators;
REGISTER_ELEMWISE_OP
(
elementwise_add
,
"Add"
,
"Out = X + Y"
);
REGISTER_ELEMWISE_OP
(
elementwise_add
,
"Add"
,
"Out = X + Y"
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
elementwise_add
,
elementwise_add
,
ops
::
ElementwiseAddKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
ElementwiseAddKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
ops
::
ElementwiseAddKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
// ops::ElementwiseAddKernel<paddle::platform::CPUDeviceContext, double>);
ops
::
ElementwiseAddKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
,
//
ops::ElementwiseAddKernel<paddle::platform::CPUDeviceContext, int>,
ops
::
ElementwiseAddKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>
);
//
ops::ElementwiseAddKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
elementwise_add_grad
,
elementwise_add_grad
,
ops
::
ElementwiseAddGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
ElementwiseAddGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/elementwise_add_op.h
浏览文件 @
e43c8f33
...
@@ -14,7 +14,9 @@ limitations under the License. */
...
@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/operators/elementwise_op_function.h"
#include "paddle/fluid/operators/elementwise_op_function.h"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -30,14 +32,25 @@ class ElementwiseAddKernel : public framework::OpKernel<T> {
...
@@ -30,14 +32,25 @@ class ElementwiseAddKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
auto
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
const
auto
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
z
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
z
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
auto
dims_equal
=
x
->
dims
()
==
y
->
dims
();
if
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
())
&&
dims_equal
)
{
auto
eigen_x
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
x
);
auto
eigen_y
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
y
);
auto
eigen_z
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
z
);
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
ctx
);
blas
.
VADD
(
x
->
numel
(),
eigen_x
.
data
(),
eigen_y
.
data
(),
eigen_z
.
data
());
}
else
{
ElementwiseComputeEx
<
AddFunctor
<
T
>
,
DeviceContext
,
T
>
(
ctx
,
x
,
y
,
axis
,
ElementwiseComputeEx
<
AddFunctor
<
T
>
,
DeviceContext
,
T
>
(
ctx
,
x
,
y
,
axis
,
AddFunctor
<
T
>
(),
z
);
AddFunctor
<
T
>
(),
z
);
}
}
}
};
};
template
<
typename
T
>
template
<
typename
T
>
...
...
paddle/fluid/operators/math/blas.h
浏览文件 @
e43c8f33
...
@@ -125,6 +125,12 @@ class Blas {
...
@@ -125,6 +125,12 @@ class Blas {
template
<
typename
T
>
template
<
typename
T
>
void
AXPY
(
int
n
,
T
alpha
,
const
T
*
x
,
T
*
y
)
const
;
void
AXPY
(
int
n
,
T
alpha
,
const
T
*
x
,
T
*
y
)
const
;
template
<
typename
T
>
void
VADD
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
;
template
<
typename
T
>
void
VCOPY
(
int
n
,
const
T
*
x
,
T
*
y
)
const
;
template
<
typename
T
>
template
<
typename
T
>
void
GEMV
(
bool
trans_a
,
int
M
,
int
N
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
void
GEMV
(
bool
trans_a
,
int
M
,
int
N
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
T
*
C
)
const
;
T
*
C
)
const
;
...
@@ -163,6 +169,16 @@ class BlasT : private Blas<DeviceContext> {
...
@@ -163,6 +169,16 @@ class BlasT : private Blas<DeviceContext> {
Base
()
->
template
AXPY
<
T
>(
args
...);
Base
()
->
template
AXPY
<
T
>(
args
...);
}
}
template
<
typename
...
ARGS
>
void
VADD
(
ARGS
...
args
)
const
{
Base
()
->
template
VADD
<
T
>(
args
...);
}
template
<
typename
...
ARGS
>
void
VCOPY
(
ARGS
...
args
)
const
{
Base
()
->
template
VCOPY
<
T
>(
args
...);
}
template
<
typename
...
ARGS
>
template
<
typename
...
ARGS
>
void
GEMV
(
ARGS
...
args
)
const
{
void
GEMV
(
ARGS
...
args
)
const
{
Base
()
->
template
GEMV
<
T
>(
args
...);
Base
()
->
template
GEMV
<
T
>(
args
...);
...
...
paddle/fluid/operators/math/blas_impl.h
浏览文件 @
e43c8f33
...
@@ -34,6 +34,18 @@ struct CBlas<float> {
...
@@ -34,6 +34,18 @@ struct CBlas<float> {
cblas_saxpy
(
args
...);
cblas_saxpy
(
args
...);
}
}
#ifdef PADDLE_WITH_MKLML
template
<
typename
...
ARGS
>
static
void
VADD
(
ARGS
...
args
)
{
vsAdd
(
args
...);
}
#endif
template
<
typename
...
ARGS
>
static
void
VCOPY
(
ARGS
...
args
)
{
cblas_scopy
(
args
...);
}
template
<
typename
...
ARGS
>
template
<
typename
...
ARGS
>
static
void
GEMV
(
ARGS
...
args
)
{
static
void
GEMV
(
ARGS
...
args
)
{
cblas_sgemv
(
args
...);
cblas_sgemv
(
args
...);
...
@@ -59,6 +71,18 @@ struct CBlas<double> {
...
@@ -59,6 +71,18 @@ struct CBlas<double> {
cblas_daxpy
(
args
...);
cblas_daxpy
(
args
...);
}
}
#ifdef PADDLE_WITH_MKLML
template
<
typename
...
ARGS
>
static
void
VADD
(
ARGS
...
args
)
{
vdAdd
(
args
...);
}
#endif
template
<
typename
...
ARGS
>
static
void
VCOPY
(
ARGS
...
args
)
{
cblas_dcopy
(
args
...);
}
template
<
typename
...
ARGS
>
template
<
typename
...
ARGS
>
static
void
GEMV
(
ARGS
...
args
)
{
static
void
GEMV
(
ARGS
...
args
)
{
cblas_dgemv
(
args
...);
cblas_dgemv
(
args
...);
...
@@ -139,6 +163,24 @@ void Blas<platform::CPUDeviceContext>::AXPY(int n, T alpha, const T *x,
...
@@ -139,6 +163,24 @@ void Blas<platform::CPUDeviceContext>::AXPY(int n, T alpha, const T *x,
CBlas
<
T
>::
AXPY
(
n
,
alpha
,
x
,
1
,
y
,
1
);
CBlas
<
T
>::
AXPY
(
n
,
alpha
,
x
,
1
,
y
,
1
);
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
platform
::
CPUDeviceContext
>::
VCOPY
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
CBlas
<
T
>::
VCOPY
(
n
,
x
,
1
,
y
,
1
);
}
template
<
>
template
<
typename
T
>
void
Blas
<
platform
::
CPUDeviceContext
>::
VADD
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VADD
(
n
,
x
,
y
,
z
);
#else
this
->
template
VCOPY
<
T
>(
n
,
y
,
z
);
this
->
template
AXPY
<
T
>(
n
,
1.
,
x
,
z
);
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
platform
::
CPUDeviceContext
>::
GEMV
(
bool
trans_a
,
int
M
,
int
N
,
T
alpha
,
void
Blas
<
platform
::
CPUDeviceContext
>::
GEMV
(
bool
trans_a
,
int
M
,
int
N
,
T
alpha
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录