Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
1aaec571
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1aaec571
编写于
12月 20, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix enum style
test=develop
上级
facfecbd
变更
27
隐藏空白更改
内联
并排
Showing
27 changed file
with
301 addition
and
302 deletion
+301
-302
paddle/fluid/operators/crf_decoding_op.h
paddle/fluid/operators/crf_decoding_op.h
+1
-1
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
.../fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
+1
-1
paddle/fluid/operators/fused/fusion_gru_op.cc
paddle/fluid/operators/fused/fusion_gru_op.cc
+23
-23
paddle/fluid/operators/fused/fusion_lstm_op.cc
paddle/fluid/operators/fused/fusion_lstm_op.cc
+26
-26
paddle/fluid/operators/jit/benchmark.cc
paddle/fluid/operators/jit/benchmark.cc
+18
-18
paddle/fluid/operators/jit/gen/CMakeLists.txt
paddle/fluid/operators/jit/gen/CMakeLists.txt
+17
-17
paddle/fluid/operators/jit/gen/act.cc
paddle/fluid/operators/jit/gen/act.cc
+5
-5
paddle/fluid/operators/jit/gen/blas.cc
paddle/fluid/operators/jit/gen/blas.cc
+7
-7
paddle/fluid/operators/jit/gen/gru.cc
paddle/fluid/operators/jit/gen/gru.cc
+3
-3
paddle/fluid/operators/jit/gen/gru.h
paddle/fluid/operators/jit/gen/gru.h
+4
-4
paddle/fluid/operators/jit/gen/lstm.cc
paddle/fluid/operators/jit/gen/lstm.cc
+2
-2
paddle/fluid/operators/jit/gen/lstm.h
paddle/fluid/operators/jit/gen/lstm.h
+4
-4
paddle/fluid/operators/jit/helper.cc
paddle/fluid/operators/jit/helper.cc
+25
-26
paddle/fluid/operators/jit/kernel_base.h
paddle/fluid/operators/jit/kernel_base.h
+20
-20
paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt
paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt
+2
-2
paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc
paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc
+1
-1
paddle/fluid/operators/jit/more/intrinsic/layer_norm.cc
paddle/fluid/operators/jit/more/intrinsic/layer_norm.cc
+1
-1
paddle/fluid/operators/jit/more/mix/CMakeLists.txt
paddle/fluid/operators/jit/more/mix/CMakeLists.txt
+7
-7
paddle/fluid/operators/jit/more/mix/mix.cc
paddle/fluid/operators/jit/more/mix/mix.cc
+26
-26
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
+6
-6
paddle/fluid/operators/jit/more/mkl/mkl.cc
paddle/fluid/operators/jit/more/mkl/mkl.cc
+6
-6
paddle/fluid/operators/jit/refer/CMakeLists.txt
paddle/fluid/operators/jit/refer/CMakeLists.txt
+19
-19
paddle/fluid/operators/jit/refer/refer.cc
paddle/fluid/operators/jit/refer/refer.cc
+19
-19
paddle/fluid/operators/jit/refer/refer.h
paddle/fluid/operators/jit/refer/refer.h
+4
-4
paddle/fluid/operators/jit/test.cc
paddle/fluid/operators/jit/test.cc
+51
-51
paddle/fluid/operators/layer_norm_op.h
paddle/fluid/operators/layer_norm_op.h
+1
-1
paddle/fluid/operators/math/fc_compute.h
paddle/fluid/operators/math/fc_compute.h
+2
-2
未找到文件。
paddle/fluid/operators/crf_decoding_op.h
浏览文件 @
1aaec571
...
@@ -82,7 +82,7 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
...
@@ -82,7 +82,7 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
Tensor
track
;
Tensor
track
;
int
*
track_value
=
int
*
track_value
=
track
.
mutable_data
<
int
>
(
emission_dims
,
platform
::
CPUPlace
());
track
.
mutable_data
<
int
>
(
emission_dims
,
platform
::
CPUPlace
());
auto
ker
=
jit
::
Get
<
jit
::
crfd
ecoding
,
jit
::
CRFDecodingTuples
<
T
>
,
auto
ker
=
jit
::
Get
<
jit
::
kCRFD
ecoding
,
jit
::
CRFDecodingTuples
<
T
>
,
platform
::
CPUPlace
>
(
tag_num
);
platform
::
CPUPlace
>
(
tag_num
);
ker
(
static_cast
<
int
>
(
seq_len
),
x
,
w
,
alpha_value
,
track_value
,
tag_num
);
ker
(
static_cast
<
int
>
(
seq_len
),
x
,
w
,
alpha_value
,
track_value
,
tag_num
);
T
max_score
=
-
std
::
numeric_limits
<
T
>::
max
();
T
max_score
=
-
std
::
numeric_limits
<
T
>::
max
();
...
...
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
浏览文件 @
1aaec571
...
@@ -108,7 +108,7 @@ class ElementwiseMulMKLDNNKernel : public framework::OpKernel<T> {
...
@@ -108,7 +108,7 @@ class ElementwiseMulMKLDNNKernel : public framework::OpKernel<T> {
constexpr
int
simd_width
=
16
;
constexpr
int
simd_width
=
16
;
int
C
=
c
/
simd_width
;
int
C
=
c
/
simd_width
;
auto
multiply
=
jit
::
Get
<
jit
::
nchw16cmulnc
,
jit
::
NCHW16CMulNCTuples
<
T
>
,
auto
multiply
=
jit
::
Get
<
jit
::
kNCHW16CMulNC
,
jit
::
NCHW16CMulNCTuples
<
T
>
,
platform
::
CPUPlace
>
(
0
);
platform
::
CPUPlace
>
(
0
);
#pragma omp parallel for collapse(2)
#pragma omp parallel for collapse(2)
for
(
int
ni
=
0
;
ni
<
n
;
ni
++
)
{
for
(
int
ni
=
0
;
ni
<
n
;
ni
++
)
{
...
...
paddle/fluid/operators/fused/fusion_gru_op.cc
浏览文件 @
1aaec571
...
@@ -182,29 +182,29 @@ class FusionGRUKernel : public framework::OpKernel<T> {
...
@@ -182,29 +182,29 @@ class FusionGRUKernel : public framework::OpKernel<T> {
const int total_T = x_dims[0]; \
const int total_T = x_dims[0]; \
const int D3 = wh_dims[1]
const int D3 = wh_dims[1]
#define INIT_OTHER_DEFINES \
#define INIT_OTHER_DEFINES
\
auto* h0 = ctx.Input<Tensor>("H0"); \
auto* h0 = ctx.Input<Tensor>("H0");
\
auto* wx = ctx.Input<Tensor>("WeightX"); \
auto* wx = ctx.Input<Tensor>("WeightX");
\
auto* bias = ctx.Input<Tensor>("Bias"); \
auto* bias = ctx.Input<Tensor>("Bias");
\
auto* hidden_out = ctx.Output<LoDTensor>("Hidden"); \
auto* hidden_out = ctx.Output<LoDTensor>("Hidden");
\
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
bool is_reverse = ctx.Attr<bool>("is_reverse");
\
const int M = x_dims[1]; \
const int M = x_dims[1];
\
const int D = wh_dims[0]; \
const int D = wh_dims[0];
\
const int D2 = D * 2; \
const int D2 = D * 2;
\
const jit::gru_attr_t attr( \
const jit::gru_attr_t attr(
\
D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")), \
D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")),
\
jit::to_kerneltype(ctx.Attr<std::string>("activation"))); \
jit::to_kerneltype(ctx.Attr<std::string>("activation")));
\
jit::gru_t one_step; \
jit::gru_t one_step;
\
auto ComputeH1 = \
auto ComputeH1 =
\
jit::Get<jit::
gruh
1, jit::GRUTuples<T>, platform::CPUPlace>(attr); \
jit::Get<jit::
kGRUH
1, jit::GRUTuples<T>, platform::CPUPlace>(attr); \
auto ComputeHtPart1 = \
auto ComputeHtPart1 =
\
jit::Get<jit::
gruhtp
art1, jit::GRUTuples<T>, platform::CPUPlace>(attr); \
jit::Get<jit::
kGRUHtP
art1, jit::GRUTuples<T>, platform::CPUPlace>(attr); \
auto ComputeHtPart2 = \
auto ComputeHtPart2 =
\
jit::Get<jit::
gruhtp
art2, jit::GRUTuples<T>, platform::CPUPlace>(attr); \
jit::Get<jit::
kGRUHtP
art2, jit::GRUTuples<T>, platform::CPUPlace>(attr); \
const T* x_data = x->data<T>(); \
const T* x_data = x->data<T>();
\
const T* wx_data = wx->data<T>(); \
const T* wx_data = wx->data<T>();
\
const T* wh_data = wh->data<T>(); \
const T* wh_data = wh->data<T>();
\
auto place = ctx.GetPlace(); \
auto place = ctx.GetPlace();
\
T* xx_data = xx->mutable_data<T>(place)
T* xx_data = xx->mutable_data<T>(place)
void
SeqCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
void
SeqCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
...
...
paddle/fluid/operators/fused/fusion_lstm_op.cc
浏览文件 @
1aaec571
...
@@ -235,32 +235,32 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -235,32 +235,32 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
const int D = wh_dims[0]; \
const int D = wh_dims[0]; \
const int D4 = wh_dims[1]
const int D4 = wh_dims[1]
#define INIT_OTHER_DEFINES \
#define INIT_OTHER_DEFINES
\
const T* x_data = x->data<T>(); \
const T* x_data = x->data<T>();
\
const T* wx_data = wx->data<T>(); \
const T* wx_data = wx->data<T>();
\
const T* wh_data = wh->data<T>(); \
const T* wh_data = wh->data<T>();
\
/* diagonal weight*/
\
/* diagonal weight*/
\
const T* wp_data = bias->data<T>() + D4; \
const T* wp_data = bias->data<T>() + D4;
\
/* for peephole only*/
\
/* for peephole only*/
\
T* checked_cell_data = nullptr; \
T* checked_cell_data = nullptr;
\
auto place = ctx.GetPlace(); \
auto place = ctx.GetPlace();
\
if (use_peepholes) { \
if (use_peepholes) {
\
/* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/
\
/* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/
\
auto* checked_cell = ctx.Output<Tensor>("CheckedCell"); \
auto* checked_cell = ctx.Output<Tensor>("CheckedCell");
\
checked_cell_data = checked_cell->mutable_data<T>(place); \
checked_cell_data = checked_cell->mutable_data<T>(place);
\
} \
}
\
const jit::lstm_attr_t attr( \
const jit::lstm_attr_t attr(
\
D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")), \
D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")),
\
jit::to_kerneltype(ctx.Attr<std::string>("candidate_activation")), \
jit::to_kerneltype(ctx.Attr<std::string>("candidate_activation")),
\
jit::to_kerneltype(ctx.Attr<std::string>("cell_activation")), \
jit::to_kerneltype(ctx.Attr<std::string>("cell_activation")),
\
use_peepholes); \
use_peepholes);
\
jit::lstm_t one_step; \
jit::lstm_t one_step;
\
one_step.wp = wp_data; \
one_step.wp = wp_data;
\
one_step.checked = checked_cell_data; \
one_step.checked = checked_cell_data;
\
auto ComputeC1H1 = \
auto ComputeC1H1 =
\
jit::Get<jit::
lstmc1h
1, jit::LSTMTuples<T>, platform::CPUPlace>(attr); \
jit::Get<jit::
kLSTMC1H
1, jit::LSTMTuples<T>, platform::CPUPlace>(attr); \
auto ComputeCtHt = \
auto ComputeCtHt =
\
jit::Get<jit::
lstmcth
t, jit::LSTMTuples<T>, platform::CPUPlace>(attr)
jit::Get<jit::
kLSTMCtH
t, jit::LSTMTuples<T>, platform::CPUPlace>(attr)
// Wh GEMM
// Wh GEMM
#define GEMM_WH_ADDON(bs, prev, out) \
#define GEMM_WH_ADDON(bs, prev, out) \
...
...
paddle/fluid/operators/jit/benchmark.cc
浏览文件 @
1aaec571
...
@@ -146,7 +146,7 @@ template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
...
@@ -146,7 +146,7 @@ template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
void
BenchLSTMKernel
()
{
void
BenchLSTMKernel
()
{
for
(
bool
use_peephole
:
{
true
,
false
})
{
for
(
bool
use_peephole
:
{
true
,
false
})
{
for
(
int
d
:
TestSizes
())
{
for
(
int
d
:
TestSizes
())
{
const
jit
::
lstm_attr_t
attr
(
d
,
jit
::
vsigmoid
,
jit
::
vtanh
,
jit
::
vt
anh
,
const
jit
::
lstm_attr_t
attr
(
d
,
jit
::
kVSigmoid
,
jit
::
kVTanh
,
jit
::
kVT
anh
,
use_peephole
);
use_peephole
);
std
::
vector
<
T
>
x
(
4
*
d
),
ct_1
(
d
),
ct
(
d
),
ht
(
d
),
wp
(
3
*
d
),
checked
(
2
*
d
);
std
::
vector
<
T
>
x
(
4
*
d
),
ct_1
(
d
),
ct
(
d
),
ht
(
d
),
wp
(
3
*
d
),
checked
(
2
*
d
);
RandomVec
<
T
>
(
4
*
d
,
x
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
4
*
d
,
x
.
data
(),
-
2.
f
,
2.
f
);
...
@@ -175,7 +175,7 @@ void BenchLSTMKernel() {
...
@@ -175,7 +175,7 @@ void BenchLSTMKernel() {
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchGRUKernel
()
{
void
BenchGRUKernel
()
{
for
(
int
d
:
TestSizes
())
{
for
(
int
d
:
TestSizes
())
{
const
jit
::
gru_attr_t
attr
(
d
,
jit
::
vsigmoid
,
jit
::
vt
anh
);
const
jit
::
gru_attr_t
attr
(
d
,
jit
::
kVSigmoid
,
jit
::
kVT
anh
);
std
::
vector
<
T
>
x
(
3
*
d
),
ht_1
(
d
),
ht
(
d
);
std
::
vector
<
T
>
x
(
3
*
d
),
ht_1
(
d
),
ht
(
d
);
RandomVec
<
T
>
(
3
*
d
,
x
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
3
*
d
,
x
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
d
,
ht_1
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
d
,
ht_1
.
data
(),
-
2.
f
,
2.
f
);
...
@@ -204,28 +204,28 @@ int main(int argc, char* argv[]) {
...
@@ -204,28 +204,28 @@ int main(int argc, char* argv[]) {
using
T
=
float
;
using
T
=
float
;
using
PlaceType
=
paddle
::
platform
::
CPUPlace
;
using
PlaceType
=
paddle
::
platform
::
CPUPlace
;
// xyzn
// xyzn
BenchXYZNKernel
<
jit
::
vm
ul
,
T
,
PlaceType
>
();
BenchXYZNKernel
<
jit
::
kVM
ul
,
T
,
PlaceType
>
();
BenchXYZNKernel
<
jit
::
va
dd
,
T
,
PlaceType
>
();
BenchXYZNKernel
<
jit
::
kVA
dd
,
T
,
PlaceType
>
();
BenchXYZNKernel
<
jit
::
vaddr
elu
,
T
,
PlaceType
>
();
BenchXYZNKernel
<
jit
::
kVAddR
elu
,
T
,
PlaceType
>
();
BenchXYZNKernel
<
jit
::
vs
ub
,
T
,
PlaceType
>
();
BenchXYZNKernel
<
jit
::
kVS
ub
,
T
,
PlaceType
>
();
// axyn
// axyn
BenchAXYNKernel
<
jit
::
vs
cal
,
T
,
PlaceType
>
();
BenchAXYNKernel
<
jit
::
kVS
cal
,
T
,
PlaceType
>
();
BenchAXYNKernel
<
jit
::
vaddb
ias
,
T
,
PlaceType
>
();
BenchAXYNKernel
<
jit
::
kVAddB
ias
,
T
,
PlaceType
>
();
// xyn
// xyn
BenchXYNKernel
<
jit
::
vr
elu
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
kVR
elu
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
vi
dentity
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
kVI
dentity
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
ve
xp
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
kVE
xp
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
vs
igmoid
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
kVS
igmoid
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
vt
anh
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
kVT
anh
,
T
,
PlaceType
>
();
// lstm and peephole
// lstm and peephole
BenchLSTMKernel
<
jit
::
lstmcth
t
,
T
,
PlaceType
>
();
BenchLSTMKernel
<
jit
::
kLSTMCtH
t
,
T
,
PlaceType
>
();
BenchLSTMKernel
<
jit
::
lstmc1h
1
,
T
,
PlaceType
>
();
BenchLSTMKernel
<
jit
::
kLSTMC1H
1
,
T
,
PlaceType
>
();
// gru functions
// gru functions
BenchGRUKernel
<
jit
::
gruh
1
,
T
,
PlaceType
>
();
BenchGRUKernel
<
jit
::
kGRUH
1
,
T
,
PlaceType
>
();
BenchGRUKernel
<
jit
::
gruhtp
art1
,
T
,
PlaceType
>
();
BenchGRUKernel
<
jit
::
kGRUHtP
art1
,
T
,
PlaceType
>
();
BenchGRUKernel
<
jit
::
gruhtp
art2
,
T
,
PlaceType
>
();
BenchGRUKernel
<
jit
::
kGRUHtP
art2
,
T
,
PlaceType
>
();
}
}
paddle/fluid/operators/jit/gen/CMakeLists.txt
浏览文件 @
1aaec571
...
@@ -9,20 +9,20 @@ function(USE_JITKERNEL_GEN TARGET)
...
@@ -9,20 +9,20 @@ function(USE_JITKERNEL_GEN TARGET)
endfunction
()
endfunction
()
# use gen jitcode kernel by name
# use gen jitcode kernel by name
USE_JITKERNEL_GEN
(
vm
ul
)
USE_JITKERNEL_GEN
(
kVM
ul
)
USE_JITKERNEL_GEN
(
va
dd
)
USE_JITKERNEL_GEN
(
kVA
dd
)
#USE_JITKERNEL_GEN(
vs
ub) # TODO(TJ): enable me
#USE_JITKERNEL_GEN(
kVS
ub) # TODO(TJ): enable me
USE_JITKERNEL_GEN
(
vaddr
elu
)
USE_JITKERNEL_GEN
(
kVAddR
elu
)
USE_JITKERNEL_GEN
(
vs
cal
)
USE_JITKERNEL_GEN
(
kVS
cal
)
USE_JITKERNEL_GEN
(
vaddb
ias
)
USE_JITKERNEL_GEN
(
kVAddB
ias
)
USE_JITKERNEL_GEN
(
vr
elu
)
USE_JITKERNEL_GEN
(
kVR
elu
)
USE_JITKERNEL_GEN
(
vi
dentity
)
USE_JITKERNEL_GEN
(
kVI
dentity
)
USE_JITKERNEL_GEN
(
ve
xp
)
USE_JITKERNEL_GEN
(
kVE
xp
)
USE_JITKERNEL_GEN
(
vs
igmoid
)
USE_JITKERNEL_GEN
(
kVS
igmoid
)
USE_JITKERNEL_GEN
(
vt
anh
)
USE_JITKERNEL_GEN
(
kVT
anh
)
USE_JITKERNEL_GEN
(
lstmcth
t
)
USE_JITKERNEL_GEN
(
kLSTMCtH
t
)
USE_JITKERNEL_GEN
(
lstmc1h
1
)
USE_JITKERNEL_GEN
(
kLSTMC1H
1
)
USE_JITKERNEL_GEN
(
gruh
1
)
USE_JITKERNEL_GEN
(
kGRUH
1
)
USE_JITKERNEL_GEN
(
gruhtp
art1
)
USE_JITKERNEL_GEN
(
kGRUHtP
art1
)
USE_JITKERNEL_GEN
(
gruhtp
art2
)
USE_JITKERNEL_GEN
(
kGRUHtP
art2
)
USE_JITKERNEL_GEN
(
nchw16cmulnc
)
USE_JITKERNEL_GEN
(
kNCHW16CMulNC
)
paddle/fluid/operators/jit/gen/act.cc
浏览文件 @
1aaec571
...
@@ -128,8 +128,8 @@ size_t VTanhCreator::CodeSize(const int& d) const {
...
@@ -128,8 +128,8 @@ size_t VTanhCreator::CodeSize(const int& d) const {
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
REGISTER_JITKERNEL_GEN
(
vr
elu
,
gen
::
VReluCreator
);
REGISTER_JITKERNEL_GEN
(
kVR
elu
,
gen
::
VReluCreator
);
REGISTER_JITKERNEL_GEN
(
vi
dentity
,
gen
::
VIdentityCreator
);
REGISTER_JITKERNEL_GEN
(
kVI
dentity
,
gen
::
VIdentityCreator
);
REGISTER_JITKERNEL_GEN
(
ve
xp
,
gen
::
VExpCreator
);
REGISTER_JITKERNEL_GEN
(
kVE
xp
,
gen
::
VExpCreator
);
REGISTER_JITKERNEL_GEN
(
vs
igmoid
,
gen
::
VSigmoidCreator
);
REGISTER_JITKERNEL_GEN
(
kVS
igmoid
,
gen
::
VSigmoidCreator
);
REGISTER_JITKERNEL_GEN
(
vt
anh
,
gen
::
VTanhCreator
);
REGISTER_JITKERNEL_GEN
(
kVT
anh
,
gen
::
VTanhCreator
);
paddle/fluid/operators/jit/gen/blas.cc
浏览文件 @
1aaec571
...
@@ -176,11 +176,11 @@ DECLARE_BLAS_CREATOR(VAddBias);
...
@@ -176,11 +176,11 @@ DECLARE_BLAS_CREATOR(VAddBias);
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
REGISTER_JITKERNEL_GEN
(
vm
ul
,
gen
::
VMulCreator
);
REGISTER_JITKERNEL_GEN
(
kVM
ul
,
gen
::
VMulCreator
);
REGISTER_JITKERNEL_GEN
(
va
dd
,
gen
::
VAddCreator
);
REGISTER_JITKERNEL_GEN
(
kVA
dd
,
gen
::
VAddCreator
);
// TODO(TJ): enable sub
// TODO(TJ): enable sub
// REGISTER_JITKERNEL_GEN(
vs
ub, gen::VSubCreator);
// REGISTER_JITKERNEL_GEN(
kVS
ub, gen::VSubCreator);
REGISTER_JITKERNEL_GEN
(
vaddr
elu
,
gen
::
VAddReluCreator
);
REGISTER_JITKERNEL_GEN
(
kVAddR
elu
,
gen
::
VAddReluCreator
);
REGISTER_JITKERNEL_GEN
(
vs
cal
,
gen
::
VScalCreator
);
REGISTER_JITKERNEL_GEN
(
kVS
cal
,
gen
::
VScalCreator
);
REGISTER_JITKERNEL_GEN
(
vaddb
ias
,
gen
::
VAddBiasCreator
);
REGISTER_JITKERNEL_GEN
(
kVAddB
ias
,
gen
::
VAddBiasCreator
);
REGISTER_JITKERNEL_GEN
(
nchw16cmulnc
,
gen
::
NCHW16CMulNCCreator
);
REGISTER_JITKERNEL_GEN
(
kNCHW16CMulNC
,
gen
::
NCHW16CMulNCCreator
);
paddle/fluid/operators/jit/gen/gru.cc
浏览文件 @
1aaec571
...
@@ -111,6 +111,6 @@ DECLARE_GRU_CREATOR(GRUHtPart2);
...
@@ -111,6 +111,6 @@ DECLARE_GRU_CREATOR(GRUHtPart2);
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
REGISTER_JITKERNEL_GEN
(
gruh
1
,
gen
::
GRUH1Creator
);
REGISTER_JITKERNEL_GEN
(
kGRUH
1
,
gen
::
GRUH1Creator
);
REGISTER_JITKERNEL_GEN
(
gruhtp
art1
,
gen
::
GRUHtPart1Creator
);
REGISTER_JITKERNEL_GEN
(
kGRUHtP
art1
,
gen
::
GRUHtPart1Creator
);
REGISTER_JITKERNEL_GEN
(
gruhtp
art2
,
gen
::
GRUHtPart2Creator
);
REGISTER_JITKERNEL_GEN
(
kGRUHtP
art2
,
gen
::
GRUHtPart2Creator
);
paddle/fluid/operators/jit/gen/gru.h
浏览文件 @
1aaec571
...
@@ -30,13 +30,13 @@ class GRUJitCode : public VActFunc {
...
@@ -30,13 +30,13 @@ class GRUJitCode : public VActFunc {
void
*
code_ptr
=
nullptr
)
void
*
code_ptr
=
nullptr
)
:
VActFunc
(
code_size
,
code_ptr
),
id_
(
id
),
num_
(
attr
.
d
)
{
:
VActFunc
(
code_size
,
code_ptr
),
id_
(
id
),
num_
(
attr
.
d
)
{
auto
typeExchange
=
[](
KernelType
type
)
->
gen
::
operand_type
{
auto
typeExchange
=
[](
KernelType
type
)
->
gen
::
operand_type
{
if
(
type
==
KernelType
::
vs
igmoid
)
{
if
(
type
==
KernelType
::
kVS
igmoid
)
{
return
operand_type
::
SIGMOID
;
return
operand_type
::
SIGMOID
;
}
else
if
(
type
==
KernelType
::
vr
elu
)
{
}
else
if
(
type
==
KernelType
::
kVR
elu
)
{
return
operand_type
::
RELU
;
return
operand_type
::
RELU
;
}
else
if
(
type
==
KernelType
::
vt
anh
)
{
}
else
if
(
type
==
KernelType
::
kVT
anh
)
{
return
operand_type
::
TANH
;
return
operand_type
::
TANH
;
}
else
if
(
type
==
KernelType
::
vi
dentity
)
{
}
else
if
(
type
==
KernelType
::
kVI
dentity
)
{
return
operand_type
::
IDENTITY
;
return
operand_type
::
IDENTITY
;
}
else
{
}
else
{
LOG
(
FATAL
)
<<
"Do not support this jit::KernelType: "
<<
type
;
LOG
(
FATAL
)
<<
"Do not support this jit::KernelType: "
<<
type
;
...
...
paddle/fluid/operators/jit/gen/lstm.cc
浏览文件 @
1aaec571
...
@@ -138,5 +138,5 @@ DECLARE_LSTM_CREATOR(LSTMC1H1);
...
@@ -138,5 +138,5 @@ DECLARE_LSTM_CREATOR(LSTMC1H1);
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
REGISTER_JITKERNEL_GEN
(
lstmcth
t
,
gen
::
LSTMCtHtCreator
);
REGISTER_JITKERNEL_GEN
(
kLSTMCtH
t
,
gen
::
LSTMCtHtCreator
);
REGISTER_JITKERNEL_GEN
(
lstmc1h
1
,
gen
::
LSTMC1H1Creator
);
REGISTER_JITKERNEL_GEN
(
kLSTMC1H
1
,
gen
::
LSTMC1H1Creator
);
paddle/fluid/operators/jit/gen/lstm.h
浏览文件 @
1aaec571
...
@@ -33,13 +33,13 @@ class LSTMJitCode : public VActFunc {
...
@@ -33,13 +33,13 @@ class LSTMJitCode : public VActFunc {
compute_c1h1_
(
compute_c1h1
),
compute_c1h1_
(
compute_c1h1
),
use_peephole_
(
attr
.
use_peephole
)
{
use_peephole_
(
attr
.
use_peephole
)
{
auto
typeExchange
=
[](
KernelType
type
)
->
gen
::
operand_type
{
auto
typeExchange
=
[](
KernelType
type
)
->
gen
::
operand_type
{
if
(
type
==
KernelType
::
vs
igmoid
)
{
if
(
type
==
KernelType
::
kVS
igmoid
)
{
return
operand_type
::
SIGMOID
;
return
operand_type
::
SIGMOID
;
}
else
if
(
type
==
KernelType
::
vr
elu
)
{
}
else
if
(
type
==
KernelType
::
kVR
elu
)
{
return
operand_type
::
RELU
;
return
operand_type
::
RELU
;
}
else
if
(
type
==
KernelType
::
vt
anh
)
{
}
else
if
(
type
==
KernelType
::
kVT
anh
)
{
return
operand_type
::
TANH
;
return
operand_type
::
TANH
;
}
else
if
(
type
==
KernelType
::
vi
dentity
)
{
}
else
if
(
type
==
KernelType
::
kVI
dentity
)
{
return
operand_type
::
IDENTITY
;
return
operand_type
::
IDENTITY
;
}
else
{
}
else
{
LOG
(
FATAL
)
<<
"Do not support this jit::KernelType: "
<<
type
;
LOG
(
FATAL
)
<<
"Do not support this jit::KernelType: "
<<
type
;
...
...
paddle/fluid/operators/jit/helper.cc
浏览文件 @
1aaec571
...
@@ -26,25 +26,25 @@ namespace jit {
...
@@ -26,25 +26,25 @@ namespace jit {
const
char
*
to_string
(
KernelType
kt
)
{
const
char
*
to_string
(
KernelType
kt
)
{
switch
(
kt
)
{
switch
(
kt
)
{
ONE_CASE
(
vm
ul
);
ONE_CASE
(
kVM
ul
);
ONE_CASE
(
va
dd
);
ONE_CASE
(
kVA
dd
);
ONE_CASE
(
vaddr
elu
);
ONE_CASE
(
kVAddR
elu
);
ONE_CASE
(
vs
ub
);
ONE_CASE
(
kVS
ub
);
ONE_CASE
(
vs
cal
);
ONE_CASE
(
kVS
cal
);
ONE_CASE
(
vaddb
ias
);
ONE_CASE
(
kVAddB
ias
);
ONE_CASE
(
vr
elu
);
ONE_CASE
(
kVR
elu
);
ONE_CASE
(
vi
dentity
);
ONE_CASE
(
kVI
dentity
);
ONE_CASE
(
ve
xp
);
ONE_CASE
(
kVE
xp
);
ONE_CASE
(
vs
igmoid
);
ONE_CASE
(
kVS
igmoid
);
ONE_CASE
(
vt
anh
);
ONE_CASE
(
kVT
anh
);
ONE_CASE
(
lstmcth
t
);
ONE_CASE
(
kLSTMCtH
t
);
ONE_CASE
(
lstmc1h
1
);
ONE_CASE
(
kLSTMC1H
1
);
ONE_CASE
(
gruh
1
);
ONE_CASE
(
kGRUH
1
);
ONE_CASE
(
gruhtp
art1
);
ONE_CASE
(
kGRUHtP
art1
);
ONE_CASE
(
gruhtp
art2
);
ONE_CASE
(
kGRUHtP
art2
);
ONE_CASE
(
crfd
ecoding
);
ONE_CASE
(
kCRFD
ecoding
);
ONE_CASE
(
layern
orm
);
ONE_CASE
(
kLayerN
orm
);
ONE_CASE
(
nchw16cmulnc
);
ONE_CASE
(
kNCHW16CMulNC
);
default:
default:
PADDLE_THROW
(
"Not support type: %d, or forget to add it."
,
kt
);
PADDLE_THROW
(
"Not support type: %d, or forget to add it."
,
kt
);
return
"NOT JITKernel"
;
return
"NOT JITKernel"
;
...
@@ -57,19 +57,18 @@ KernelType to_kerneltype(const std::string& act) {
...
@@ -57,19 +57,18 @@ KernelType to_kerneltype(const std::string& act) {
std
::
string
lower
=
act
;
std
::
string
lower
=
act
;
std
::
transform
(
lower
.
begin
(),
lower
.
end
(),
lower
.
begin
(),
::
tolower
);
std
::
transform
(
lower
.
begin
(),
lower
.
end
(),
lower
.
begin
(),
::
tolower
);
if
(
lower
==
"relu"
||
lower
==
"vrelu"
)
{
if
(
lower
==
"relu"
||
lower
==
"vrelu"
)
{
return
vr
elu
;
return
kVR
elu
;
}
else
if
(
lower
==
"identity"
||
lower
==
"videntity"
||
lower
==
""
)
{
}
else
if
(
lower
==
"identity"
||
lower
==
"videntity"
||
lower
==
""
)
{
return
vi
dentity
;
return
kVI
dentity
;
}
else
if
(
lower
==
"exp"
||
lower
==
"vexp"
)
{
}
else
if
(
lower
==
"exp"
||
lower
==
"vexp"
)
{
return
ve
xp
;
return
kVE
xp
;
}
else
if
(
lower
==
"sigmoid"
||
lower
==
"vsigmoid"
)
{
}
else
if
(
lower
==
"sigmoid"
||
lower
==
"vsigmoid"
)
{
return
vs
igmoid
;
return
kVS
igmoid
;
}
else
if
(
lower
==
"tanh"
||
lower
==
"vtanh"
)
{
}
else
if
(
lower
==
"tanh"
||
lower
==
"vtanh"
)
{
return
vt
anh
;
return
kVT
anh
;
}
}
PADDLE_THROW
(
"Not support type: %s, or forget to add this case"
,
act
);
PADDLE_THROW
(
"Not support type: %s, or forget to add this case"
,
act
);
return
kNone
;
return
non_kernel
;
}
}
}
// namespace jit
}
// namespace jit
...
...
paddle/fluid/operators/jit/kernel_base.h
浏览文件 @
1aaec571
...
@@ -21,26 +21,26 @@ namespace operators {
...
@@ -21,26 +21,26 @@ namespace operators {
namespace
jit
{
namespace
jit
{
typedef
enum
{
typedef
enum
{
non_kernel
=
0
,
kNone
=
0
,
vm
ul
=
1
,
kVM
ul
=
1
,
va
dd
=
2
,
kVA
dd
=
2
,
vaddr
elu
,
kVAddR
elu
,
vs
ub
,
kVS
ub
,
vs
cal
,
kVS
cal
,
vaddb
ias
,
kVAddB
ias
,
vr
elu
,
kVR
elu
,
vi
dentity
,
kVI
dentity
,
ve
xp
,
kVE
xp
,
vs
igmoid
,
kVS
igmoid
,
vt
anh
,
kVT
anh
,
lstmcth
t
,
kLSTMCtH
t
,
lstmc1h
1
,
kLSTMC1H
1
,
gruh
1
,
kGRUH
1
,
gruhtp
art1
,
kGRUHtP
art1
,
gruhtp
art2
,
kGRUHtP
art2
,
crfd
ecoding
,
kCRFD
ecoding
,
layern
orm
,
kLayerN
orm
,
nchw16cmulnc
,
kNCHW16CMulNC
,
}
KernelType
;
}
KernelType
;
template
<
typename
T
>
template
<
typename
T
>
...
...
paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt
浏览文件 @
1aaec571
...
@@ -5,5 +5,5 @@ cc_library(jit_kernel_intrinsic SRCS ${jit_kernel_cc_intrinsic} DEPS jit_kernel_
...
@@ -5,5 +5,5 @@ cc_library(jit_kernel_intrinsic SRCS ${jit_kernel_cc_intrinsic} DEPS jit_kernel_
set
(
JIT_KERNEL_DEPS
${
JIT_KERNEL_DEPS
}
jit_kernel_intrinsic PARENT_SCOPE
)
set
(
JIT_KERNEL_DEPS
${
JIT_KERNEL_DEPS
}
jit_kernel_intrinsic PARENT_SCOPE
)
# use mkl kernels by name and type
# use mkl kernels by name and type
USE_JITKERNEL_MORE
(
crfd
ecoding, intrinsic
)
USE_JITKERNEL_MORE
(
kCRFD
ecoding, intrinsic
)
USE_JITKERNEL_MORE
(
layern
orm, intrinsic
)
USE_JITKERNEL_MORE
(
kLayerN
orm, intrinsic
)
paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc
浏览文件 @
1aaec571
...
@@ -178,4 +178,4 @@ bool CRFDecodingKernel::UseMe(const int& d) const {
...
@@ -178,4 +178,4 @@ bool CRFDecodingKernel::UseMe(const int& d) const {
namespace
intrinsic
=
paddle
::
operators
::
jit
::
more
::
intrinsic
;
namespace
intrinsic
=
paddle
::
operators
::
jit
::
more
::
intrinsic
;
REGISTER_JITKERNEL_MORE
(
crfd
ecoding
,
intrinsic
,
intrinsic
::
CRFDecodingKernel
);
REGISTER_JITKERNEL_MORE
(
kCRFD
ecoding
,
intrinsic
,
intrinsic
::
CRFDecodingKernel
);
paddle/fluid/operators/jit/more/intrinsic/layer_norm.cc
浏览文件 @
1aaec571
...
@@ -165,4 +165,4 @@ bool LayerNormKernel::UseMe(const int& d) const {
...
@@ -165,4 +165,4 @@ bool LayerNormKernel::UseMe(const int& d) const {
namespace
intrinsic
=
paddle
::
operators
::
jit
::
more
::
intrinsic
;
namespace
intrinsic
=
paddle
::
operators
::
jit
::
more
::
intrinsic
;
REGISTER_JITKERNEL_MORE
(
layern
orm
,
intrinsic
,
intrinsic
::
LayerNormKernel
);
REGISTER_JITKERNEL_MORE
(
kLayerN
orm
,
intrinsic
,
intrinsic
::
LayerNormKernel
);
paddle/fluid/operators/jit/more/mix/CMakeLists.txt
浏览文件 @
1aaec571
...
@@ -5,10 +5,10 @@ cc_library(jit_kernel_mix SRCS ${jit_kernel_mix_cc} DEPS jit_kernel_base)
...
@@ -5,10 +5,10 @@ cc_library(jit_kernel_mix SRCS ${jit_kernel_mix_cc} DEPS jit_kernel_base)
set
(
JIT_KERNEL_DEPS
${
JIT_KERNEL_DEPS
}
jit_kernel_mix PARENT_SCOPE
)
set
(
JIT_KERNEL_DEPS
${
JIT_KERNEL_DEPS
}
jit_kernel_mix PARENT_SCOPE
)
USE_JITKERNEL_MORE
(
vs
igmoid, mix
)
USE_JITKERNEL_MORE
(
kVS
igmoid, mix
)
USE_JITKERNEL_MORE
(
vt
anh, mix
)
USE_JITKERNEL_MORE
(
kVT
anh, mix
)
USE_JITKERNEL_MORE
(
lstmcth
t, mix
)
USE_JITKERNEL_MORE
(
kLSTMCtH
t, mix
)
USE_JITKERNEL_MORE
(
lstmc1h
1, mix
)
USE_JITKERNEL_MORE
(
kLSTMC1H
1, mix
)
USE_JITKERNEL_MORE
(
gruh
1, mix
)
USE_JITKERNEL_MORE
(
kGRUH
1, mix
)
USE_JITKERNEL_MORE
(
gruhtp
art1, mix
)
USE_JITKERNEL_MORE
(
kGRUHtP
art1, mix
)
USE_JITKERNEL_MORE
(
gruhtp
art2, mix
)
USE_JITKERNEL_MORE
(
kGRUHtP
art2, mix
)
paddle/fluid/operators/jit/more/mix/mix.cc
浏览文件 @
1aaec571
...
@@ -30,7 +30,7 @@ void VSigmoid(const T* x, T* y, int n) {
...
@@ -30,7 +30,7 @@ void VSigmoid(const T* x, T* y, int n) {
y
[
i
]
=
(
x
[
i
]
<
min
)
?
min
:
((
x
[
i
]
>
max
)
?
max
:
x
[
i
]);
y
[
i
]
=
(
x
[
i
]
<
min
)
?
min
:
((
x
[
i
]
>
max
)
?
max
:
x
[
i
]);
y
[
i
]
=
static_cast
<
T
>
(
0
)
-
y
[
i
];
y
[
i
]
=
static_cast
<
T
>
(
0
)
-
y
[
i
];
}
}
auto
compute
=
Get
<
KernelType
::
ve
xp
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
auto
compute
=
Get
<
KernelType
::
kVE
xp
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
compute
(
y
,
y
,
n
);
compute
(
y
,
y
,
n
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
y
[
i
]);
y
[
i
]
=
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
y
[
i
]);
...
@@ -39,9 +39,9 @@ void VSigmoid(const T* x, T* y, int n) {
...
@@ -39,9 +39,9 @@ void VSigmoid(const T* x, T* y, int n) {
void
VTanh
(
const
T
*
x
,
T
*
y
,
int
n
)
{
void
VTanh
(
const
T
*
x
,
T
*
y
,
int
n
)
{
const
T
a
=
2
,
b
=
-
1
;
const
T
a
=
2
,
b
=
-
1
;
auto
compute_scal
=
Get
<
vs
cal
,
AXYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
auto
compute_scal
=
Get
<
kVS
cal
,
AXYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
auto
compute_addbias
=
Get
<
vaddb
ias
,
AXYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
auto
compute_addbias
=
Get
<
kVAddB
ias
,
AXYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
auto
compute_sigmoid
=
Get
<
vs
igmoid
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
auto
compute_sigmoid
=
Get
<
kVS
igmoid
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
compute_scal
(
&
a
,
x
,
y
,
n
);
compute_scal
(
&
a
,
x
,
y
,
n
);
compute_sigmoid
(
y
,
y
,
n
);
compute_sigmoid
(
y
,
y
,
n
);
compute_scal
(
&
a
,
y
,
y
,
n
);
compute_scal
(
&
a
,
y
,
y
,
n
);
...
@@ -49,14 +49,14 @@ void VTanh(const T* x, T* y, int n) {
...
@@ -49,14 +49,14 @@ void VTanh(const T* x, T* y, int n) {
}
}
void
(
*
getActFunc
(
KernelType
type
,
int
d
))(
const
T
*
,
T
*
,
int
)
{
// NOLINT
void
(
*
getActFunc
(
KernelType
type
,
int
d
))(
const
T
*
,
T
*
,
int
)
{
// NOLINT
if
(
type
==
vs
igmoid
)
{
if
(
type
==
kVS
igmoid
)
{
return
Get
<
vs
igmoid
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
return
Get
<
kVS
igmoid
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
}
else
if
(
type
==
vr
elu
)
{
}
else
if
(
type
==
kVR
elu
)
{
return
Get
<
vr
elu
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
return
Get
<
kVR
elu
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
}
else
if
(
type
==
vt
anh
)
{
}
else
if
(
type
==
kVT
anh
)
{
return
Get
<
vt
anh
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
return
Get
<
kVT
anh
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
}
else
if
(
type
==
vi
dentity
)
{
}
else
if
(
type
==
kVI
dentity
)
{
return
Get
<
vi
dentity
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
return
Get
<
kVI
dentity
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
}
}
PADDLE_THROW
(
"Not support type: %s"
,
type
);
PADDLE_THROW
(
"Not support type: %s"
,
type
);
return
nullptr
;
return
nullptr
;
...
@@ -72,9 +72,9 @@ void LSTMCtHt(lstm_t* step, const lstm_attr_t* attr) {
...
@@ -72,9 +72,9 @@ void LSTMCtHt(lstm_t* step, const lstm_attr_t* attr) {
const
int
d
=
attr
->
d
;
const
int
d
=
attr
->
d
;
const
int
d2
=
d
*
2
;
const
int
d2
=
d
*
2
;
const
int
d3
=
d
*
3
;
const
int
d3
=
d
*
3
;
auto
vmul_d
=
Get
<
vm
ul
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
auto
vmul_d
=
Get
<
kVM
ul
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
auto
vadd_d
=
Get
<
va
dd
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
auto
vadd_d
=
Get
<
kVA
dd
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
auto
vadd_d2
=
Get
<
va
dd
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d2
);
auto
vadd_d2
=
Get
<
kVA
dd
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d2
);
auto
act_gate_d
=
getActFunc
(
attr
->
act_gate
,
d
);
auto
act_gate_d
=
getActFunc
(
attr
->
act_gate
,
d
);
auto
act_gate_d2
=
getActFunc
(
attr
->
act_gate
,
d2
);
auto
act_gate_d2
=
getActFunc
(
attr
->
act_gate
,
d2
);
auto
act_gate_d3
=
getActFunc
(
attr
->
act_gate
,
d3
);
auto
act_gate_d3
=
getActFunc
(
attr
->
act_gate
,
d3
);
...
@@ -114,8 +114,8 @@ void LSTMC1H1(lstm_t* step, const lstm_attr_t* attr) {
...
@@ -114,8 +114,8 @@ void LSTMC1H1(lstm_t* step, const lstm_attr_t* attr) {
int
d
=
attr
->
d
;
int
d
=
attr
->
d
;
int
d2
=
d
*
2
;
int
d2
=
d
*
2
;
int
d3
=
d
*
3
;
int
d3
=
d
*
3
;
auto
vmul_d
=
Get
<
vm
ul
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
auto
vmul_d
=
Get
<
kVM
ul
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
auto
vadd_d
=
Get
<
va
dd
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
auto
vadd_d
=
Get
<
kVA
dd
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
auto
act_gate_d
=
getActFunc
(
attr
->
act_gate
,
d
);
auto
act_gate_d
=
getActFunc
(
attr
->
act_gate
,
d
);
auto
act_cand_d
=
getActFunc
(
attr
->
act_cand
,
d
);
auto
act_cand_d
=
getActFunc
(
attr
->
act_cand
,
d
);
auto
act_cell_d
=
getActFunc
(
attr
->
act_cell
,
d
);
auto
act_cell_d
=
getActFunc
(
attr
->
act_cell
,
d
);
...
@@ -143,7 +143,7 @@ void GRUH1(gru_t* step, const gru_attr_t* attr) {
...
@@ -143,7 +143,7 @@ void GRUH1(gru_t* step, const gru_attr_t* attr) {
int
d2
=
d
*
2
;
int
d2
=
d
*
2
;
auto
act_gate
=
getActFunc
(
attr
->
act_gate
,
d
);
auto
act_gate
=
getActFunc
(
attr
->
act_gate
,
d
);
auto
act_cand
=
getActFunc
(
attr
->
act_cand
,
d
);
auto
act_cand
=
getActFunc
(
attr
->
act_cand
,
d
);
auto
vmul_d
=
Get
<
vm
ul
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
auto
vmul_d
=
Get
<
kVM
ul
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
d
);
act_gate
(
gates
,
gates
,
d
);
act_gate
(
gates
,
gates
,
d
);
act_cand
(
gates
+
d2
,
gates
+
d2
,
d
);
act_cand
(
gates
+
d2
,
gates
+
d2
,
d
);
vmul_d
(
gates
,
gates
+
d2
,
ht
,
d
);
vmul_d
(
gates
,
gates
+
d2
,
ht
,
d
);
...
@@ -156,7 +156,7 @@ void GRUHtPart1(gru_t* step, const gru_attr_t* attr) {
...
@@ -156,7 +156,7 @@ void GRUHtPart1(gru_t* step, const gru_attr_t* attr) {
T
*
ht
=
reinterpret_cast
<
T
*>
(
step
->
ht
);
T
*
ht
=
reinterpret_cast
<
T
*>
(
step
->
ht
);
const
T
*
ht_1
=
reinterpret_cast
<
const
T
*>
(
step
->
ht_1
);
const
T
*
ht_1
=
reinterpret_cast
<
const
T
*>
(
step
->
ht_1
);
auto
act_gate
=
getActFunc
(
attr
->
act_gate
,
attr
->
d
);
auto
act_gate
=
getActFunc
(
attr
->
act_gate
,
attr
->
d
);
auto
vmul_d
=
Get
<
vm
ul
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
attr
->
d
);
auto
vmul_d
=
Get
<
kVM
ul
,
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
attr
->
d
);
act_gate
(
gates
+
attr
->
d
,
gates
+
attr
->
d
,
attr
->
d
);
act_gate
(
gates
+
attr
->
d
,
gates
+
attr
->
d
,
attr
->
d
);
vmul_d
(
ht_1
,
gates
+
attr
->
d
,
ht
,
attr
->
d
);
vmul_d
(
ht_1
,
gates
+
attr
->
d
,
ht
,
attr
->
d
);
}
}
...
@@ -205,12 +205,12 @@ namespace mix = paddle::operators::jit::more::mix;
...
@@ -205,12 +205,12 @@ namespace mix = paddle::operators::jit::more::mix;
#define REGISTER_MORE_KERNEL(key, func) \
#define REGISTER_MORE_KERNEL(key, func) \
REGISTER_JITKERNEL_MORE(key, mix, mix::func##Kernel)
REGISTER_JITKERNEL_MORE(key, mix, mix::func##Kernel)
REGISTER_MORE_KERNEL
(
vs
igmoid
,
VSigmoid
);
REGISTER_MORE_KERNEL
(
kVS
igmoid
,
VSigmoid
);
REGISTER_MORE_KERNEL
(
vt
anh
,
VTanh
);
REGISTER_MORE_KERNEL
(
kVT
anh
,
VTanh
);
REGISTER_MORE_KERNEL
(
lstmcth
t
,
LSTMCtHt
);
REGISTER_MORE_KERNEL
(
kLSTMCtH
t
,
LSTMCtHt
);
REGISTER_MORE_KERNEL
(
lstmc1h
1
,
LSTMC1H1
);
REGISTER_MORE_KERNEL
(
kLSTMC1H
1
,
LSTMC1H1
);
REGISTER_MORE_KERNEL
(
gruh
1
,
GRUH1
);
REGISTER_MORE_KERNEL
(
kGRUH
1
,
GRUH1
);
REGISTER_MORE_KERNEL
(
gruhtp
art1
,
GRUHtPart1
);
REGISTER_MORE_KERNEL
(
kGRUHtP
art1
,
GRUHtPart1
);
REGISTER_MORE_KERNEL
(
gruhtp
art2
,
GRUHtPart2
);
REGISTER_MORE_KERNEL
(
kGRUHtP
art2
,
GRUHtPart2
);
#undef REGISTER_MORE_KERNEL
#undef REGISTER_MORE_KERNEL
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
浏览文件 @
1aaec571
...
@@ -3,9 +3,9 @@ cc_library(jit_kernel_mkl SRCS mkl.cc DEPS jit_kernel_base dynload_mklml)
...
@@ -3,9 +3,9 @@ cc_library(jit_kernel_mkl SRCS mkl.cc DEPS jit_kernel_base dynload_mklml)
set
(
JIT_KERNEL_DEPS
${
JIT_KERNEL_DEPS
}
dynload_mklml jit_kernel_mkl PARENT_SCOPE
)
set
(
JIT_KERNEL_DEPS
${
JIT_KERNEL_DEPS
}
dynload_mklml jit_kernel_mkl PARENT_SCOPE
)
# use mkl kernels by name and type
# use mkl kernels by name and type
USE_JITKERNEL_MORE
(
vm
ul, mkl
)
USE_JITKERNEL_MORE
(
kVM
ul, mkl
)
USE_JITKERNEL_MORE
(
va
dd, mkl
)
USE_JITKERNEL_MORE
(
kVA
dd, mkl
)
USE_JITKERNEL_MORE
(
vs
cal, mkl
)
USE_JITKERNEL_MORE
(
kVS
cal, mkl
)
USE_JITKERNEL_MORE
(
ve
xp, mkl
)
USE_JITKERNEL_MORE
(
kVE
xp, mkl
)
USE_JITKERNEL_MORE
(
vs
igmoid, mkl
)
USE_JITKERNEL_MORE
(
kVS
igmoid, mkl
)
USE_JITKERNEL_MORE
(
vt
anh, mkl
)
USE_JITKERNEL_MORE
(
kVT
anh, mkl
)
paddle/fluid/operators/jit/more/mkl/mkl.cc
浏览文件 @
1aaec571
...
@@ -129,11 +129,11 @@ namespace mkl = paddle::operators::jit::more::mkl;
...
@@ -129,11 +129,11 @@ namespace mkl = paddle::operators::jit::more::mkl;
REGISTER_JITKERNEL_MORE(key, mkl, mkl::func##Kernel<float>, \
REGISTER_JITKERNEL_MORE(key, mkl, mkl::func##Kernel<float>, \
mkl::func##Kernel<double>)
mkl::func##Kernel<double>)
REGISTER_MKL_KERNEL
(
vm
ul
,
VMul
);
REGISTER_MKL_KERNEL
(
kVM
ul
,
VMul
);
REGISTER_MKL_KERNEL
(
va
dd
,
VAdd
);
REGISTER_MKL_KERNEL
(
kVA
dd
,
VAdd
);
REGISTER_MKL_KERNEL
(
vs
cal
,
VScal
);
REGISTER_MKL_KERNEL
(
kVS
cal
,
VScal
);
REGISTER_MKL_KERNEL
(
ve
xp
,
VExp
);
REGISTER_MKL_KERNEL
(
kVE
xp
,
VExp
);
REGISTER_MKL_KERNEL
(
vs
igmoid
,
VSigmoid
);
REGISTER_MKL_KERNEL
(
kVS
igmoid
,
VSigmoid
);
REGISTER_MKL_KERNEL
(
vt
anh
,
VTanh
);
REGISTER_MKL_KERNEL
(
kVT
anh
,
VTanh
);
#undef REGISTER_MKL_KERNEL
#undef REGISTER_MKL_KERNEL
paddle/fluid/operators/jit/refer/CMakeLists.txt
浏览文件 @
1aaec571
...
@@ -7,22 +7,22 @@ function(USE_JITKERNEL_REFER TARGET)
...
@@ -7,22 +7,22 @@ function(USE_JITKERNEL_REFER TARGET)
endfunction
()
endfunction
()
# use refer kernel by name
# use refer kernel by name
USE_JITKERNEL_REFER
(
vm
ul
)
USE_JITKERNEL_REFER
(
kVM
ul
)
USE_JITKERNEL_REFER
(
va
dd
)
USE_JITKERNEL_REFER
(
kVA
dd
)
USE_JITKERNEL_REFER
(
vaddr
elu
)
USE_JITKERNEL_REFER
(
kVAddR
elu
)
USE_JITKERNEL_REFER
(
vs
ub
)
USE_JITKERNEL_REFER
(
kVS
ub
)
USE_JITKERNEL_REFER
(
vs
cal
)
USE_JITKERNEL_REFER
(
kVS
cal
)
USE_JITKERNEL_REFER
(
vaddb
ias
)
USE_JITKERNEL_REFER
(
kVAddB
ias
)
USE_JITKERNEL_REFER
(
vr
elu
)
USE_JITKERNEL_REFER
(
kVR
elu
)
USE_JITKERNEL_REFER
(
vi
dentity
)
USE_JITKERNEL_REFER
(
kVI
dentity
)
USE_JITKERNEL_REFER
(
ve
xp
)
USE_JITKERNEL_REFER
(
kVE
xp
)
USE_JITKERNEL_REFER
(
vs
igmoid
)
USE_JITKERNEL_REFER
(
kVS
igmoid
)
USE_JITKERNEL_REFER
(
vt
anh
)
USE_JITKERNEL_REFER
(
kVT
anh
)
USE_JITKERNEL_REFER
(
lstmcth
t
)
USE_JITKERNEL_REFER
(
kLSTMCtH
t
)
USE_JITKERNEL_REFER
(
lstmc1h
1
)
USE_JITKERNEL_REFER
(
kLSTMC1H
1
)
USE_JITKERNEL_REFER
(
gruh
1
)
USE_JITKERNEL_REFER
(
kGRUH
1
)
USE_JITKERNEL_REFER
(
gruhtp
art1
)
USE_JITKERNEL_REFER
(
kGRUHtP
art1
)
USE_JITKERNEL_REFER
(
gruhtp
art2
)
USE_JITKERNEL_REFER
(
kGRUHtP
art2
)
USE_JITKERNEL_REFER
(
crfd
ecoding
)
USE_JITKERNEL_REFER
(
kCRFD
ecoding
)
USE_JITKERNEL_REFER
(
layern
orm
)
USE_JITKERNEL_REFER
(
kLayerN
orm
)
USE_JITKERNEL_REFER
(
nchw16cmulnc
)
USE_JITKERNEL_REFER
(
kNCHW16CMulNC
)
paddle/fluid/operators/jit/refer/refer.cc
浏览文件 @
1aaec571
...
@@ -21,30 +21,30 @@ namespace refer = paddle::operators::jit::refer;
...
@@ -21,30 +21,30 @@ namespace refer = paddle::operators::jit::refer;
REGISTER_JITKERNEL_REFER(key, refer::func##Kernel<float>, \
REGISTER_JITKERNEL_REFER(key, refer::func##Kernel<float>, \
refer::func##Kernel<double>)
refer::func##Kernel<double>)
REGISTER_REFER_KERNEL
(
vm
ul
,
VMul
);
REGISTER_REFER_KERNEL
(
kVM
ul
,
VMul
);
REGISTER_REFER_KERNEL
(
va
dd
,
VAdd
);
REGISTER_REFER_KERNEL
(
kVA
dd
,
VAdd
);
REGISTER_REFER_KERNEL
(
vaddr
elu
,
VAddRelu
);
REGISTER_REFER_KERNEL
(
kVAddR
elu
,
VAddRelu
);
REGISTER_REFER_KERNEL
(
vs
ub
,
VSub
);
REGISTER_REFER_KERNEL
(
kVS
ub
,
VSub
);
REGISTER_REFER_KERNEL
(
vs
cal
,
VScal
);
REGISTER_REFER_KERNEL
(
kVS
cal
,
VScal
);
REGISTER_REFER_KERNEL
(
vaddb
ias
,
VAddBias
);
REGISTER_REFER_KERNEL
(
kVAddB
ias
,
VAddBias
);
REGISTER_REFER_KERNEL
(
vr
elu
,
VRelu
);
REGISTER_REFER_KERNEL
(
kVR
elu
,
VRelu
);
REGISTER_REFER_KERNEL
(
vi
dentity
,
VIdentity
);
REGISTER_REFER_KERNEL
(
kVI
dentity
,
VIdentity
);
REGISTER_REFER_KERNEL
(
ve
xp
,
VExp
);
REGISTER_REFER_KERNEL
(
kVE
xp
,
VExp
);
REGISTER_REFER_KERNEL
(
vs
igmoid
,
VSigmoid
);
REGISTER_REFER_KERNEL
(
kVS
igmoid
,
VSigmoid
);
REGISTER_REFER_KERNEL
(
vt
anh
,
VTanh
);
REGISTER_REFER_KERNEL
(
kVT
anh
,
VTanh
);
REGISTER_REFER_KERNEL
(
lstmcth
t
,
LSTMCtHt
);
REGISTER_REFER_KERNEL
(
kLSTMCtH
t
,
LSTMCtHt
);
REGISTER_REFER_KERNEL
(
lstmc1h
1
,
LSTMC1H1
);
REGISTER_REFER_KERNEL
(
kLSTMC1H
1
,
LSTMC1H1
);
REGISTER_REFER_KERNEL
(
gruh
1
,
GRUH1
);
REGISTER_REFER_KERNEL
(
kGRUH
1
,
GRUH1
);
REGISTER_REFER_KERNEL
(
gruhtp
art1
,
GRUHtPart1
);
REGISTER_REFER_KERNEL
(
kGRUHtP
art1
,
GRUHtPart1
);
REGISTER_REFER_KERNEL
(
gruhtp
art2
,
GRUHtPart2
);
REGISTER_REFER_KERNEL
(
kGRUHtP
art2
,
GRUHtPart2
);
REGISTER_REFER_KERNEL
(
crfd
ecoding
,
CRFDecoding
);
REGISTER_REFER_KERNEL
(
kCRFD
ecoding
,
CRFDecoding
);
REGISTER_REFER_KERNEL
(
layern
orm
,
LayerNorm
);
REGISTER_REFER_KERNEL
(
kLayerN
orm
,
LayerNorm
);
REGISTER_REFER_KERNEL
(
nchw16cmulnc
,
NCHW16CMulNC
);
REGISTER_REFER_KERNEL
(
kNCHW16CMulNC
,
NCHW16CMulNC
);
#undef REGISTER_REFER_KERNEL
#undef REGISTER_REFER_KERNEL
paddle/fluid/operators/jit/refer/refer.h
浏览文件 @
1aaec571
...
@@ -115,13 +115,13 @@ void VTanh(const T* x, T* y, int n) {
...
@@ -115,13 +115,13 @@ void VTanh(const T* x, T* y, int n) {
template
<
typename
T
>
template
<
typename
T
>
void
(
*
getActFunc
(
KernelType
type
))(
const
T
*
,
T
*
,
int
)
{
// NOLINT
void
(
*
getActFunc
(
KernelType
type
))(
const
T
*
,
T
*
,
int
)
{
// NOLINT
if
(
type
==
vs
igmoid
)
{
if
(
type
==
kVS
igmoid
)
{
return
VSigmoid
<
T
>
;
return
VSigmoid
<
T
>
;
}
else
if
(
type
==
vr
elu
)
{
}
else
if
(
type
==
kVR
elu
)
{
return
VRelu
<
T
>
;
return
VRelu
<
T
>
;
}
else
if
(
type
==
vt
anh
)
{
}
else
if
(
type
==
kVT
anh
)
{
return
VTanh
<
T
>
;
return
VTanh
<
T
>
;
}
else
if
(
type
==
vi
dentity
)
{
}
else
if
(
type
==
kVI
dentity
)
{
return
VIdentity
<
T
>
;
return
VIdentity
<
T
>
;
}
}
PADDLE_THROW
(
"Not support type: %s"
,
type
);
PADDLE_THROW
(
"Not support type: %s"
,
type
);
...
...
paddle/fluid/operators/jit/test.cc
浏览文件 @
1aaec571
...
@@ -469,111 +469,111 @@ void TestNCHW16CMulNCKernel() {
...
@@ -469,111 +469,111 @@ void TestNCHW16CMulNCKernel() {
}
}
// XYZNTuple
// XYZNTuple
TEST
(
JITKernel
,
vm
ul
)
{
TEST
(
JITKernel
,
kVM
ul
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestXYZNKernel
<
jit
::
vm
ul
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVM
ul
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
vm
ul
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVM
ul
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
va
dd
)
{
TEST
(
JITKernel
,
kVA
dd
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestXYZNKernel
<
jit
::
va
dd
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVA
dd
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
va
dd
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVA
dd
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
vaddr
elu
)
{
TEST
(
JITKernel
,
kVAddR
elu
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestXYZNKernel
<
jit
::
vaddr
elu
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVAddR
elu
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
vaddr
elu
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVAddR
elu
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
vs
ub
)
{
TEST
(
JITKernel
,
kVS
ub
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestXYZNKernel
<
jit
::
vs
ub
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVS
ub
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
vs
ub
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVS
ub
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
// AXYNTuples
// AXYNTuples
TEST
(
JITKernel
,
vs
cal
)
{
TEST
(
JITKernel
,
kVS
cal
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestAXYNKernel
<
jit
::
vs
cal
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestAXYNKernel
<
jit
::
kVS
cal
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestAXYNKernel
<
jit
::
vs
cal
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestAXYNKernel
<
jit
::
kVS
cal
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
vaddb
ias
)
{
TEST
(
JITKernel
,
kVAddB
ias
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestAXYNKernel
<
jit
::
vaddb
ias
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestAXYNKernel
<
jit
::
kVAddB
ias
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestAXYNKernel
<
jit
::
vaddb
ias
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestAXYNKernel
<
jit
::
kVAddB
ias
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
// XYNTuples
// XYNTuples
TEST
(
JITKernel
,
vr
elu
)
{
TEST
(
JITKernel
,
kVR
elu
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestXYNKernel
<
jit
::
vr
elu
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVR
elu
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
vr
elu
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVR
elu
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
vi
dentity
)
{
TEST
(
JITKernel
,
kVI
dentity
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestXYNKernel
<
jit
::
vi
dentity
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVI
dentity
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
vi
dentity
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVI
dentity
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
ve
xp
)
{
TEST
(
JITKernel
,
kVE
xp
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestXYNKernel
<
jit
::
ve
xp
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVE
xp
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
ve
xp
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVE
xp
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
vs
igmoid
)
{
TEST
(
JITKernel
,
kVS
igmoid
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestXYNKernel
<
jit
::
vs
igmoid
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVS
igmoid
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
vs
igmoid
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVS
igmoid
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
vt
anh
)
{
TEST
(
JITKernel
,
kVT
anh
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestXYNKernel
<
jit
::
vt
anh
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVT
anh
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
vt
anh
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVT
anh
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
// LSTM
// LSTM
TEST
(
JITKernel
,
lstmcth
t
)
{
TEST
(
JITKernel
,
kLSTMCtH
t
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestLSTMKernel
<
jit
::
lstmcth
t
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestLSTMKernel
<
jit
::
kLSTMCtH
t
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestLSTMKernel
<
jit
::
lstmcth
t
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestLSTMKernel
<
jit
::
kLSTMCtH
t
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
lstmc1h
1
)
{
TEST
(
JITKernel
,
kLSTMC1H
1
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestLSTMKernel
<
jit
::
lstmc1h
1
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestLSTMKernel
<
jit
::
kLSTMC1H
1
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestLSTMKernel
<
jit
::
lstmc1h
1
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestLSTMKernel
<
jit
::
kLSTMC1H
1
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
// GRU
// GRU
TEST
(
JITKernel
,
gruh
1
)
{
TEST
(
JITKernel
,
kGRUH
1
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestGRUKernel
<
jit
::
gruh
1
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestGRUKernel
<
jit
::
kGRUH
1
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestGRUKernel
<
jit
::
gruh
1
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestGRUKernel
<
jit
::
kGRUH
1
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
gruhtp
art1
)
{
TEST
(
JITKernel
,
kGRUHtP
art1
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestGRUKernel
<
jit
::
gruhtp
art1
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestGRUKernel
<
jit
::
kGRUHtP
art1
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestGRUKernel
<
jit
::
gruhtp
art1
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestGRUKernel
<
jit
::
kGRUHtP
art1
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
gruhtp
art2
)
{
TEST
(
JITKernel
,
kGRUHtP
art2
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestGRUKernel
<
jit
::
gruhtp
art2
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestGRUKernel
<
jit
::
kGRUHtP
art2
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestGRUKernel
<
jit
::
gruhtp
art2
,
double
,
paddle
::
platform
::
CPUPlace
>
();
TestGRUKernel
<
jit
::
kGRUHtP
art2
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
}
TEST
(
JITKernel
,
nchw16cmulnc
)
{
TEST
(
JITKernel
,
kNCHW16CMulNC
)
{
namespace
jit
=
paddle
::
operators
::
jit
;
namespace
jit
=
paddle
::
operators
::
jit
;
TestNCHW16CMulNCKernel
<
jit
::
nchw16cmulnc
,
float
,
TestNCHW16CMulNCKernel
<
jit
::
kNCHW16CMulNC
,
float
,
paddle
::
platform
::
CPUPlace
>
();
paddle
::
platform
::
CPUPlace
>
();
TestNCHW16CMulNCKernel
<
jit
::
nchw16cmulnc
,
double
,
TestNCHW16CMulNCKernel
<
jit
::
kNCHW16CMulNC
,
double
,
paddle
::
platform
::
CPUPlace
>
();
paddle
::
platform
::
CPUPlace
>
();
}
}
...
...
paddle/fluid/operators/layer_norm_op.h
浏览文件 @
1aaec571
...
@@ -230,7 +230,7 @@ class LayerNormKernel : public framework::OpKernel<T> {
...
@@ -230,7 +230,7 @@ class LayerNormKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
bias
->
numel
(),
right
);
PADDLE_ENFORCE_EQ
(
bias
->
numel
(),
right
);
auto
ker
=
auto
ker
=
jit
::
Get
<
jit
::
layern
orm
,
jit
::
LayerNormTuples
<
T
>
,
platform
::
CPUPlace
>
(
jit
::
Get
<
jit
::
kLayerN
orm
,
jit
::
LayerNormTuples
<
T
>
,
platform
::
CPUPlace
>
(
right
);
right
);
ker
(
x
.
data
<
T
>
(),
out
.
data
<
T
>
(),
mean
->
data
<
T
>
(),
var
->
data
<
T
>
(),
ker
(
x
.
data
<
T
>
(),
out
.
data
<
T
>
(),
mean
->
data
<
T
>
(),
var
->
data
<
T
>
(),
scale
->
data
<
T
>
(),
bias
->
data
<
T
>
(),
static_cast
<
int
>
(
left
),
scale
->
data
<
T
>
(),
bias
->
data
<
T
>
(),
static_cast
<
int
>
(
left
),
...
...
paddle/fluid/operators/math/fc_compute.h
浏览文件 @
1aaec571
...
@@ -31,14 +31,14 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
...
@@ -31,14 +31,14 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
}
}
if
(
relu
)
{
if
(
relu
)
{
auto
compute
=
auto
compute
=
jit
::
Get
<
jit
::
vaddr
elu
,
jit
::
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
N
);
jit
::
Get
<
jit
::
kVAddR
elu
,
jit
::
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
N
);
for
(
int
i
=
0
;
i
<
M
;
i
++
)
{
for
(
int
i
=
0
;
i
<
M
;
i
++
)
{
T
*
dst
=
Y
+
i
*
N
;
T
*
dst
=
Y
+
i
*
N
;
compute
(
B
,
dst
,
dst
,
N
);
compute
(
B
,
dst
,
dst
,
N
);
}
}
}
else
{
}
else
{
auto
compute
=
auto
compute
=
jit
::
Get
<
jit
::
va
dd
,
jit
::
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
N
);
jit
::
Get
<
jit
::
kVA
dd
,
jit
::
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
N
);
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#pragma omp parallel for
#endif
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录