Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
9eb0ab1d
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9eb0ab1d
编写于
11月 14, 2018
作者:
T
Tao Luo
提交者:
GitHub
11月 14, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #14384 from tensor-tang/refine/lrn
Refine lrn cpu forward
上级
e65cbd3b
b4dfba17
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
107 addition
and
27 deletion
+107
-27
paddle/fluid/operators/lrn_op.cc
paddle/fluid/operators/lrn_op.cc
+39
-26
paddle/fluid/operators/lrn_op.h
paddle/fluid/operators/lrn_op.h
+0
-1
paddle/fluid/operators/math/blas.h
paddle/fluid/operators/math/blas.h
+16
-0
paddle/fluid/operators/math/blas_impl.h
paddle/fluid/operators/math/blas_impl.h
+48
-0
paddle/fluid/platform/dynload/mklml.h
paddle/fluid/platform/dynload/mklml.h
+4
-0
未找到文件。
paddle/fluid/operators/lrn_op.cc
浏览文件 @
9eb0ab1d
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/lrn_op.h"
#include "paddle/fluid/operators/lrn_op.h"
#include <string>
#include <string>
#include "paddle/fluid/operators/math/blas.h"
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
#endif
...
@@ -29,34 +30,43 @@ struct LRNFunctor<platform::CPUDeviceContext, T> {
...
@@ -29,34 +30,43 @@ struct LRNFunctor<platform::CPUDeviceContext, T> {
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
out
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
mid
,
int
N
,
int
C
,
int
H
,
int
W
,
int
n
,
framework
::
Tensor
*
mid
,
int
N
,
int
C
,
int
H
,
int
W
,
int
n
,
T
k
,
T
alpha
,
T
beta
)
{
T
k
,
T
alpha
,
T
beta
)
{
auto
x_v
=
framework
::
EigenVector
<
T
>::
Flatten
(
input
);
const
T
*
idata
=
input
.
data
<
T
>
();
auto
place
=
ctx
.
GetPlace
();
const
int
start
=
-
(
n
-
1
)
/
2
;
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
ctx
);
const
int
end
=
start
+
n
;
T
*
odata
=
out
->
mutable_data
<
T
>
(
place
);
T
*
mdata
=
mid
->
mutable_data
<
T
>
(
place
);
auto
e_mid
=
framework
::
EigenTensor
<
T
,
4
>::
From
(
*
mid
);
Tensor
squared
;
e_mid
=
e_mid
.
constant
(
k
);
T
*
sdata
=
squared
.
mutable_data
<
T
>
({
1
,
C
+
n
-
1
,
H
,
W
},
place
);
std
::
memset
(
sdata
,
0
,
sizeof
(
T
)
*
squared
.
numel
());
auto
e_x
=
framework
::
EigenTensor
<
T
,
4
>::
From
(
input
);
for
(
int
i
=
0
;
i
<
mid
->
numel
();
++
i
)
{
for
(
int
m
=
0
;
m
<
N
;
m
++
)
{
mdata
[
i
]
=
k
;
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
}
for
(
int
c
=
start
;
c
<
end
;
c
++
)
{
int
img_size
=
H
*
W
;
int
ch
=
i
+
c
;
int
fea_size
=
C
*
img_size
;
if
(
ch
>=
0
&&
ch
<
C
)
{
int
pre_pad
=
(
n
-
1
)
/
2
;
auto
s
=
e_mid
.
slice
(
Eigen
::
array
<
int
,
4
>
({{
m
,
i
,
0
,
0
}}),
// compute batches one by one
Eigen
::
array
<
int
,
4
>
({{
1
,
1
,
H
,
W
}}));
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
blas
.
VSQR
(
fea_size
,
idata
+
i
*
fea_size
,
sdata
+
pre_pad
*
img_size
);
auto
r
=
e_x
.
slice
(
Eigen
::
array
<
int
,
4
>
({{
m
,
ch
,
0
,
0
}}),
// init the first channel of mid
Eigen
::
array
<
int
,
4
>
({{
1
,
1
,
H
,
W
}}));
for
(
int
c
=
0
;
c
<
n
;
++
c
)
{
blas
.
AXPY
(
img_size
,
alpha
,
sdata
+
c
*
img_size
,
mdata
+
i
*
fea_size
);
s
+=
alpha
*
r
.
square
();
}
}
for
(
int
c
=
1
;
c
<
C
;
++
c
)
{
}
// copy previous scale
int
mid_offset
=
i
*
fea_size
+
c
*
img_size
;
std
::
memcpy
(
mdata
+
mid_offset
,
mdata
+
mid_offset
-
img_size
,
img_size
*
sizeof
(
T
));
// add last
blas
.
AXPY
(
img_size
,
alpha
,
sdata
+
(
c
+
n
-
1
)
*
img_size
,
mdata
+
mid_offset
);
// sub rest
blas
.
AXPY
(
img_size
,
-
alpha
,
sdata
+
(
c
-
1
)
*
img_size
,
mdata
+
mid_offset
);
}
}
}
}
// compute the final output
auto
out_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out
);
blas
.
VPOW
(
mid
->
numel
(),
mdata
,
-
beta
,
odata
);
out_e
=
x_v
*
e_mid
.
reshape
(
Eigen
::
DSizes
<
int
,
1
>
(
e_mid
.
size
())).
pow
(
-
be
ta
);
blas
.
VMUL
(
mid
->
numel
(),
odata
,
idata
,
oda
ta
);
}
}
};
};
template
struct
LRNFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
LRNFunctor
<
platform
::
CPUDeviceContext
,
float
>;
...
@@ -156,6 +166,9 @@ class LRNOp : public framework::OperatorWithKernel {
...
@@ -156,6 +166,9 @@ class LRNOp : public framework::OperatorWithKernel {
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_EQ
(
x_dim
.
size
(),
4
,
"Input(X)'rank of LRNOp should be 4."
);
PADDLE_ENFORCE_EQ
(
x_dim
.
size
(),
4
,
"Input(X)'rank of LRNOp should be 4."
);
int
n
=
ctx
->
Attrs
().
Get
<
int
>
(
"n"
);
PADDLE_ENFORCE
(
n
>
0
&&
n
%
2
==
1
,
"n should be positive odd value"
);
ctx
->
SetOutputDim
(
"Out"
,
x_dim
);
ctx
->
SetOutputDim
(
"Out"
,
x_dim
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
ctx
->
SetOutputDim
(
"MidOut"
,
x_dim
);
ctx
->
SetOutputDim
(
"MidOut"
,
x_dim
);
...
...
paddle/fluid/operators/lrn_op.h
浏览文件 @
9eb0ab1d
...
@@ -60,7 +60,6 @@ class LRNKernel : public framework::OpKernel<T> {
...
@@ -60,7 +60,6 @@ class LRNKernel : public framework::OpKernel<T> {
T
beta
=
ctx
.
Attr
<
float
>
(
"beta"
);
T
beta
=
ctx
.
Attr
<
float
>
(
"beta"
);
T
k
=
ctx
.
Attr
<
float
>
(
"k"
);
T
k
=
ctx
.
Attr
<
float
>
(
"k"
);
PADDLE_ENFORCE
(
n
>
0
,
"n should >= 0"
);
PADDLE_ENFORCE
(
alpha
>=
0.0
,
"alpha should >= 0.0"
);
PADDLE_ENFORCE
(
alpha
>=
0.0
,
"alpha should >= 0.0"
);
PADDLE_ENFORCE
(
beta
>=
0.0
,
"beta should >= 0.0"
);
PADDLE_ENFORCE
(
beta
>=
0.0
,
"beta should >= 0.0"
);
PADDLE_ENFORCE
(
k
>=
0.0
,
"k should >= 0.0"
);
PADDLE_ENFORCE
(
k
>=
0.0
,
"k should >= 0.0"
);
...
...
paddle/fluid/operators/math/blas.h
浏览文件 @
9eb0ab1d
...
@@ -152,6 +152,12 @@ class Blas {
...
@@ -152,6 +152,12 @@ class Blas {
template
<
typename
T
>
template
<
typename
T
>
void
VEXP
(
int
n
,
const
T
*
x
,
T
*
y
)
const
;
void
VEXP
(
int
n
,
const
T
*
x
,
T
*
y
)
const
;
template
<
typename
T
>
void
VSQR
(
int
n
,
const
T
*
x
,
T
*
y
)
const
;
template
<
typename
T
>
void
VPOW
(
int
n
,
const
T
*
x
,
T
alpha
,
T
*
y
)
const
;
template
<
typename
T
>
template
<
typename
T
>
void
GEMV
(
bool
trans_a
,
int
M
,
int
N
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
void
GEMV
(
bool
trans_a
,
int
M
,
int
N
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
T
*
C
)
const
;
T
*
C
)
const
;
...
@@ -238,6 +244,16 @@ class BlasT : private Blas<DeviceContext> {
...
@@ -238,6 +244,16 @@ class BlasT : private Blas<DeviceContext> {
Base
()
->
template
VEXP
<
T
>(
args
...);
Base
()
->
template
VEXP
<
T
>(
args
...);
}
}
template
<
typename
...
ARGS
>
void
VSQR
(
ARGS
...
args
)
const
{
Base
()
->
template
VSQR
<
T
>(
args
...);
}
template
<
typename
...
ARGS
>
void
VPOW
(
ARGS
...
args
)
const
{
Base
()
->
template
VPOW
<
T
>(
args
...);
}
template
<
typename
...
ARGS
>
template
<
typename
...
ARGS
>
void
GEMV
(
ARGS
...
args
)
const
{
void
GEMV
(
ARGS
...
args
)
const
{
Base
()
->
template
GEMV
<
T
>(
args
...);
Base
()
->
template
GEMV
<
T
>(
args
...);
...
...
paddle/fluid/operators/math/blas_impl.h
浏览文件 @
9eb0ab1d
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include <cmath>
#include <limits>
#include <limits>
#include <vector>
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
...
@@ -102,6 +103,16 @@ struct CBlas<float> {
...
@@ -102,6 +103,16 @@ struct CBlas<float> {
static
void
VEXP
(
ARGS
...
args
)
{
static
void
VEXP
(
ARGS
...
args
)
{
platform
::
dynload
::
vsExp
(
args
...);
platform
::
dynload
::
vsExp
(
args
...);
}
}
template
<
typename
...
ARGS
>
static
void
VSQR
(
ARGS
...
args
)
{
platform
::
dynload
::
vsSqr
(
args
...);
}
template
<
typename
...
ARGS
>
static
void
VPOW
(
ARGS
...
args
)
{
platform
::
dynload
::
vsPowx
(
args
...);
}
};
};
template
<
>
template
<
>
...
@@ -182,6 +193,16 @@ struct CBlas<double> {
...
@@ -182,6 +193,16 @@ struct CBlas<double> {
static
void
VEXP
(
ARGS
...
args
)
{
static
void
VEXP
(
ARGS
...
args
)
{
platform
::
dynload
::
vdExp
(
args
...);
platform
::
dynload
::
vdExp
(
args
...);
}
}
template
<
typename
...
ARGS
>
static
void
VSQR
(
ARGS
...
args
)
{
platform
::
dynload
::
vdSqr
(
args
...);
}
template
<
typename
...
ARGS
>
static
void
VPOW
(
ARGS
...
args
)
{
platform
::
dynload
::
vdPowx
(
args
...);
}
};
};
#else
#else
...
@@ -241,6 +262,8 @@ struct CBlas<platform::float16> {
...
@@ -241,6 +262,8 @@ struct CBlas<platform::float16> {
}
}
static
void
VMUL
(...)
{
PADDLE_THROW
(
"float16 VMUL not supported on CPU"
);
}
static
void
VMUL
(...)
{
PADDLE_THROW
(
"float16 VMUL not supported on CPU"
);
}
static
void
VEXP
(...)
{
PADDLE_THROW
(
"float16 VEXP not supported on CPU"
);
}
static
void
VEXP
(...)
{
PADDLE_THROW
(
"float16 VEXP not supported on CPU"
);
}
static
void
VSQR
(...)
{
PADDLE_THROW
(
"float16 VSQR not supported on CPU"
);
}
static
void
VPOW
(...)
{
PADDLE_THROW
(
"float16 VPOW not supported on CPU"
);
}
static
void
DOT
(...)
{
PADDLE_THROW
(
"float16 DOT not supported on CPU"
);
};
static
void
DOT
(...)
{
PADDLE_THROW
(
"float16 DOT not supported on CPU"
);
};
static
void
SCAL
(...)
{
PADDLE_THROW
(
"float16 SCAL not supported on CPU"
);
};
static
void
SCAL
(...)
{
PADDLE_THROW
(
"float16 SCAL not supported on CPU"
);
};
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
...
@@ -398,6 +421,31 @@ void Blas<platform::CPUDeviceContext>::VEXP(int n, const T *x, T *y) const {
...
@@ -398,6 +421,31 @@ void Blas<platform::CPUDeviceContext>::VEXP(int n, const T *x, T *y) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
platform
::
CPUDeviceContext
>::
VSQR
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VSQR
(
n
,
x
,
y
);
#else
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
std
::
sqrt
(
x
[
i
]);
}
#endif
}
template
<
>
template
<
typename
T
>
void
Blas
<
platform
::
CPUDeviceContext
>::
VPOW
(
int
n
,
const
T
*
x
,
T
a
,
T
*
y
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VPOW
(
n
,
x
,
a
,
y
);
#else
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
std
::
pow
(
x
[
i
],
a
);
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
T
Blas
<
platform
::
CPUDeviceContext
>::
DOT
(
int
n
,
const
T
*
x
,
const
T
*
y
)
const
{
T
Blas
<
platform
::
CPUDeviceContext
>::
DOT
(
int
n
,
const
T
*
x
,
const
T
*
y
)
const
{
...
...
paddle/fluid/platform/dynload/mklml.h
浏览文件 @
9eb0ab1d
...
@@ -76,6 +76,10 @@ extern void* mklml_dso_handle;
...
@@ -76,6 +76,10 @@ extern void* mklml_dso_handle;
__macro(vdMul); \
__macro(vdMul); \
__macro(vsExp); \
__macro(vsExp); \
__macro(vdExp); \
__macro(vdExp); \
__macro(vsSqr); \
__macro(vdSqr); \
__macro(vsPowx); \
__macro(vdPowx); \
__macro(MKL_Set_Num_Threads)
__macro(MKL_Set_Num_Threads)
MKLML_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_MKLML_WRAP
);
MKLML_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_MKLML_WRAP
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录