Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
e2d6eddd
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e2d6eddd
编写于
11月 16, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove ComputeDeprecated
test=develop
上级
f65ddff8
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
53 addition
and
78 deletion
+53
-78
paddle/fluid/operators/math/jit_code.cc
paddle/fluid/operators/math/jit_code.cc
+1
-0
paddle/fluid/operators/math/jit_kernel.h
paddle/fluid/operators/math/jit_kernel.h
+6
-25
paddle/fluid/operators/math/jit_kernel_blas.cc
paddle/fluid/operators/math/jit_kernel_blas.cc
+14
-14
paddle/fluid/operators/math/jit_kernel_exp.cc
paddle/fluid/operators/math/jit_kernel_exp.cc
+5
-12
paddle/fluid/operators/math/jit_kernel_rnn.cc
paddle/fluid/operators/math/jit_kernel_rnn.cc
+19
-19
paddle/fluid/operators/math/jit_kernel_test.cc
paddle/fluid/operators/math/jit_kernel_test.cc
+8
-8
未找到文件。
paddle/fluid/operators/math/jit_code.cc
浏览文件 @
e2d6eddd
...
@@ -178,6 +178,7 @@ bool VActJitCode::init(int d, operand_type type) {
...
@@ -178,6 +178,7 @@ bool VActJitCode::init(int d, operand_type type) {
if
(
type
==
operand_type
::
relu
)
{
if
(
type
==
operand_type
::
relu
)
{
return
ok
;
return
ok
;
}
else
{
}
else
{
// TODO(TJ): support more
return
ok
&&
d
==
8
;
// only 8 yet
return
ok
&&
d
==
8
;
// only 8 yet
}
}
}
}
...
...
paddle/fluid/operators/math/jit_kernel.h
浏览文件 @
e2d6eddd
...
@@ -98,42 +98,23 @@ class VAddBiasKernel : public Kernel {
...
@@ -98,42 +98,23 @@ class VAddBiasKernel : public Kernel {
template
<
typename
T
>
template
<
typename
T
>
class
VActKernel
:
public
Kernel
{
class
VActKernel
:
public
Kernel
{
public:
public:
v
irtual
void
ComputeDeprecated
(
const
T
*
x
,
T
*
y
)
const
=
0
;
v
oid
(
*
Compute
)(
const
T
*
,
T
*
,
int
)
;
};
};
template
<
typename
T
>
template
<
typename
T
>
class
VReluKernel
:
public
VActKernel
<
T
>
{
class
VReluKernel
:
public
VActKernel
<
T
>
{};
public:
virtual
void
ComputeDeprecated
(
const
T
*
x
,
T
*
y
)
const
=
0
;
void
(
*
Compute
)(
const
T
*
,
T
*
,
int
);
};
template
<
typename
T
>
template
<
typename
T
>
class
VIdentityKernel
:
public
VActKernel
<
T
>
{
class
VIdentityKernel
:
public
VActKernel
<
T
>
{};
public:
virtual
void
ComputeDeprecated
(
const
T
*
x
,
T
*
y
)
const
=
0
;
};
template
<
typename
T
>
template
<
typename
T
>
class
VExpKernel
:
public
VActKernel
<
T
>
{
class
VExpKernel
:
public
VActKernel
<
T
>
{};
public:
virtual
void
ComputeDeprecated
(
const
T
*
x
,
T
*
y
)
const
=
0
;
void
(
*
Compute
)(
const
T
*
,
T
*
,
int
);
};
template
<
typename
T
>
template
<
typename
T
>
class
VSigmoidKernel
:
public
VActKernel
<
T
>
{
class
VSigmoidKernel
:
public
VActKernel
<
T
>
{};
public:
virtual
void
ComputeDeprecated
(
const
T
*
x
,
T
*
y
)
const
=
0
;
void
(
*
Compute
)(
const
T
*
,
T
*
,
int
);
};
template
<
typename
T
>
template
<
typename
T
>
class
VTanhKernel
:
public
VActKernel
<
T
>
{
class
VTanhKernel
:
public
VActKernel
<
T
>
{};
public:
virtual
void
ComputeDeprecated
(
const
T
*
x
,
T
*
y
)
const
=
0
;
void
(
*
Compute
)(
const
T
*
,
T
*
,
int
);
};
template
<
typename
T
>
template
<
typename
T
>
class
LSTMKernel
:
public
Kernel
{
class
LSTMKernel
:
public
Kernel
{
...
...
paddle/fluid/operators/math/jit_kernel_blas.cc
浏览文件 @
e2d6eddd
...
@@ -346,7 +346,6 @@ class VReluKernelImpl : public VReluKernel<T> {
...
@@ -346,7 +346,6 @@ class VReluKernelImpl : public VReluKernel<T> {
public:
public:
JITKERNEL_DECLARE_STATIC_FUNC
;
JITKERNEL_DECLARE_STATIC_FUNC
;
explicit
VReluKernelImpl
(
int
d
)
:
VReluKernel
<
T
>
()
{
explicit
VReluKernelImpl
(
int
d
)
:
VReluKernel
<
T
>
()
{
this
->
num_
=
d
;
// TODO(TJ): remove me when ComputeDeprecated done
#ifdef PADDLE_WITH_XBYAK
#ifdef PADDLE_WITH_XBYAK
if
(
useJIT
(
d
))
{
if
(
useJIT
(
d
))
{
size_t
sz
=
96
/* init size */
+
size_t
sz
=
96
/* init size */
+
...
@@ -361,9 +360,6 @@ class VReluKernelImpl : public VReluKernel<T> {
...
@@ -361,9 +360,6 @@ class VReluKernelImpl : public VReluKernel<T> {
this
->
Compute
=
VReluRefer
<
T
>
;
this
->
Compute
=
VReluRefer
<
T
>
;
}
}
void
ComputeDeprecated
(
const
T
*
x
,
T
*
y
)
const
override
{
VReluRefer
(
x
,
y
,
this
->
num_
);
}
#ifdef PADDLE_WITH_XBYAK
#ifdef PADDLE_WITH_XBYAK
private:
private:
...
@@ -378,22 +374,26 @@ bool VReluKernelImpl<float>::useJIT(int d) {
...
@@ -378,22 +374,26 @@ bool VReluKernelImpl<float>::useJIT(int d) {
}
}
#endif
#endif
REGISTER_JITKERNEL
(
vmul
,
VMulKernel
);
template
<
typename
T
>
REGISTER_JITKERNEL
(
vadd
,
VAddKernel
);
inline
void
VIdentityRefer
(
const
T
*
x
,
T
*
y
,
int
n
)
{}
REGISTER_JITKERNEL
(
vaddrelu
,
VAddReluKernel
);
REGISTER_JITKERNEL
(
vscal
,
VScalKernel
);
REGISTER_JITKERNEL
(
vaddbias
,
VAddBiasKernel
);
REGISTER_JITKERNEL
(
vrelu
,
VReluKernel
);
/* An empty JitKernel */
/* An empty JitKernel */
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
,
jit_block
>
template
<
typename
T
>
class
VIdentityKernelImpl
:
public
VIdentityKernel
<
T
>
{
class
VIdentityKernelImpl
:
public
VIdentityKernel
<
T
>
{
public:
public:
explicit
VIdentityKernelImpl
(
int
d
)
:
VIdentityKernel
<
T
>
()
{
this
->
num_
=
d
;
}
JITKERNEL_DECLARE_STATIC_FUNC
;
void
ComputeDeprecated
(
const
T
*
x
,
T
*
y
)
const
override
{}
explicit
VIdentityKernelImpl
(
int
d
)
:
VIdentityKernel
<
T
>
()
{
this
->
Compute
=
VIdentityRefer
<
T
>
;
}
};
};
REGISTER_JITKERNEL_DEPRECATED
(
videntity
,
VIdentityKernel
);
REGISTER_JITKERNEL
(
vmul
,
VMulKernel
);
REGISTER_JITKERNEL
(
vadd
,
VAddKernel
);
REGISTER_JITKERNEL
(
vaddrelu
,
VAddReluKernel
);
REGISTER_JITKERNEL
(
vscal
,
VScalKernel
);
REGISTER_JITKERNEL
(
vaddbias
,
VAddBiasKernel
);
REGISTER_JITKERNEL
(
vrelu
,
VReluKernel
);
REGISTER_JITKERNEL
(
videntity
,
VIdentityKernel
);
}
// namespace jitkernel
}
// namespace jitkernel
}
// namespace math
}
// namespace math
...
...
paddle/fluid/operators/math/jit_kernel_exp.cc
浏览文件 @
e2d6eddd
...
@@ -36,6 +36,7 @@ namespace jitkernel {
...
@@ -36,6 +36,7 @@ namespace jitkernel {
namespace
jit
=
platform
::
jit
;
namespace
jit
=
platform
::
jit
;
// TODO(TJ): move refer codes to one file
// TODO(TJ): move refer codes to one file
// Refer code only focus on correctness
template
<
typename
T
>
template
<
typename
T
>
void
VExpRefer
(
const
T
*
x
,
T
*
y
,
int
n
)
{
void
VExpRefer
(
const
T
*
x
,
T
*
y
,
int
n
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
...
@@ -67,6 +68,7 @@ void VTanhRefer(const T* x, T* y, int n) {
...
@@ -67,6 +68,7 @@ void VTanhRefer(const T* x, T* y, int n) {
}
}
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
// try to use MKL to speedup
template
<
typename
T
>
template
<
typename
T
>
void
VExpMKL
(
const
T
*
x
,
T
*
y
,
int
n
);
void
VExpMKL
(
const
T
*
x
,
T
*
y
,
int
n
);
...
@@ -112,7 +114,6 @@ class VExpKernelImpl : public VExpKernel<T> {
...
@@ -112,7 +114,6 @@ class VExpKernelImpl : public VExpKernel<T> {
public:
public:
JITKERNEL_DECLARE_STATIC_FUNC
;
JITKERNEL_DECLARE_STATIC_FUNC
;
explicit
VExpKernelImpl
(
int
d
)
:
VExpKernel
<
T
>
()
{
explicit
VExpKernelImpl
(
int
d
)
:
VExpKernel
<
T
>
()
{
this
->
num_
=
d
;
// TODO(TJ): remove me when ComputeDeprecated done
#ifdef PADDLE_WITH_XBYAK
#ifdef PADDLE_WITH_XBYAK
if
(
useJIT
(
d
))
{
if
(
useJIT
(
d
))
{
size_t
sz
=
96
+
d
/
AVX_FLOAT_BLOCK
*
4
*
8
;
// should change
size_t
sz
=
96
+
d
/
AVX_FLOAT_BLOCK
*
4
*
8
;
// should change
...
@@ -130,9 +131,7 @@ class VExpKernelImpl : public VExpKernel<T> {
...
@@ -130,9 +131,7 @@ class VExpKernelImpl : public VExpKernel<T> {
#endif
#endif
this
->
Compute
=
VExpRefer
<
T
>
;
this
->
Compute
=
VExpRefer
<
T
>
;
}
}
void
ComputeDeprecated
(
const
T
*
x
,
T
*
y
)
const
override
{
VExpRefer
(
x
,
y
,
this
->
num_
);
}
#ifdef PADDLE_WITH_XBYAK
#ifdef PADDLE_WITH_XBYAK
private:
private:
...
@@ -166,7 +165,6 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
...
@@ -166,7 +165,6 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
public:
public:
JITKERNEL_DECLARE_STATIC_FUNC
;
JITKERNEL_DECLARE_STATIC_FUNC
;
explicit
VSigmoidKernelImpl
(
int
d
)
:
VSigmoidKernel
<
T
>
()
{
explicit
VSigmoidKernelImpl
(
int
d
)
:
VSigmoidKernel
<
T
>
()
{
this
->
num_
=
d
;
// TODO(TJ): remove me when ComputeDeprecated done
#ifdef PADDLE_WITH_XBYAK
#ifdef PADDLE_WITH_XBYAK
if
(
useJIT
(
d
))
{
if
(
useJIT
(
d
))
{
size_t
sz
=
96
+
d
/
AVX_FLOAT_BLOCK
*
4
*
8
;
// should change
size_t
sz
=
96
+
d
/
AVX_FLOAT_BLOCK
*
4
*
8
;
// should change
...
@@ -186,9 +184,7 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
...
@@ -186,9 +184,7 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
#endif
#endif
this
->
Compute
=
VSigmoidRefer
<
T
>
;
this
->
Compute
=
VSigmoidRefer
<
T
>
;
}
}
void
ComputeDeprecated
(
const
T
*
x
,
T
*
y
)
const
override
{
VSigmoidRefer
(
x
,
y
,
this
->
num_
);
}
#ifdef PADDLE_WITH_XBYAK
#ifdef PADDLE_WITH_XBYAK
private:
private:
...
@@ -221,7 +217,6 @@ class VTanhKernelImpl : public VTanhKernel<T> {
...
@@ -221,7 +217,6 @@ class VTanhKernelImpl : public VTanhKernel<T> {
public:
public:
JITKERNEL_DECLARE_STATIC_FUNC
;
JITKERNEL_DECLARE_STATIC_FUNC
;
explicit
VTanhKernelImpl
(
int
d
)
:
VTanhKernel
<
T
>
()
{
explicit
VTanhKernelImpl
(
int
d
)
:
VTanhKernel
<
T
>
()
{
this
->
num_
=
d
;
// TODO(TJ): remove me when ComputeDeprecated done
#ifdef PADDLE_WITH_XBYAK
#ifdef PADDLE_WITH_XBYAK
if
(
useJIT
(
d
))
{
if
(
useJIT
(
d
))
{
size_t
sz
=
96
+
d
/
AVX_FLOAT_BLOCK
*
4
*
8
;
// should change
size_t
sz
=
96
+
d
/
AVX_FLOAT_BLOCK
*
4
*
8
;
// should change
...
@@ -241,9 +236,7 @@ class VTanhKernelImpl : public VTanhKernel<T> {
...
@@ -241,9 +236,7 @@ class VTanhKernelImpl : public VTanhKernel<T> {
#endif
#endif
this
->
Compute
=
VTanhRefer
<
T
>
;
this
->
Compute
=
VTanhRefer
<
T
>
;
}
}
void
ComputeDeprecated
(
const
T
*
x
,
T
*
y
)
const
override
{
VTanhRefer
(
x
,
y
,
this
->
num_
);
}
#ifdef PADDLE_WITH_XBYAK
#ifdef PADDLE_WITH_XBYAK
private:
private:
...
...
paddle/fluid/operators/math/jit_kernel_rnn.cc
浏览文件 @
e2d6eddd
...
@@ -175,26 +175,26 @@ class LSTMKernelImpl : public LSTMKernel<T> {
...
@@ -175,26 +175,26 @@ class LSTMKernelImpl : public LSTMKernel<T> {
void
ComputeCtHt
(
T
*
gates
,
const
T
*
ct_1
,
T
*
ct
,
T
*
ht
,
const
T
*
wp_data
,
void
ComputeCtHt
(
T
*
gates
,
const
T
*
ct_1
,
T
*
ct
,
T
*
ht
,
const
T
*
wp_data
,
T
*
checked
)
const
override
{
T
*
checked
)
const
override
{
// gates: W_ch, W_ih, W_fh, W_oh
// gates: W_ch, W_ih, W_fh, W_oh
act_gate_d3_
->
Compute
Deprecated
(
gates
+
d_
,
gates
+
d
_
);
act_gate_d3_
->
Compute
(
gates
+
d_
,
gates
+
d_
,
d3
_
);
/* C_t = C_t-1 * fgated + cand_gated * igated */
/* C_t = C_t-1 * fgated + cand_gated * igated */
act_cand_d_
->
Compute
Deprecated
(
gates
,
gates
);
act_cand_d_
->
Compute
(
gates
,
gates
,
d_
);
vmul_d_
->
Compute
(
gates
,
gates
+
d_
,
gates
+
d_
,
d_
);
vmul_d_
->
Compute
(
gates
,
gates
+
d_
,
gates
+
d_
,
d_
);
vmul_d_
->
Compute
(
ct_1
,
gates
+
d2_
,
gates
+
d2_
,
d_
);
vmul_d_
->
Compute
(
ct_1
,
gates
+
d2_
,
gates
+
d2_
,
d_
);
vadd_d_
->
Compute
(
gates
+
d_
,
gates
+
d2_
,
ct
,
d_
);
vadd_d_
->
Compute
(
gates
+
d_
,
gates
+
d2_
,
ct
,
d_
);
/* H_t = act_cell(C_t) * ogated */
/* H_t = act_cell(C_t) * ogated */
act_cell_d_
->
Compute
Deprecated
(
ct
,
gates
+
d2
_
);
act_cell_d_
->
Compute
(
ct
,
gates
+
d2_
,
d
_
);
vmul_d_
->
Compute
(
gates
+
d2_
,
gates
+
d3_
,
ht
,
d_
);
vmul_d_
->
Compute
(
gates
+
d2_
,
gates
+
d3_
,
ht
,
d_
);
}
}
void
ComputeC1H1
(
T
*
gates
,
T
*
ct
,
T
*
ht
,
const
T
*
wp_data
)
const
override
{
void
ComputeC1H1
(
T
*
gates
,
T
*
ct
,
T
*
ht
,
const
T
*
wp_data
)
const
override
{
/* C_t = igated * cgated*/
/* C_t = igated * cgated*/
act_gate_d_
->
Compute
Deprecated
(
gates
+
d_
,
gates
+
d_
);
act_gate_d_
->
Compute
(
gates
+
d_
,
gates
+
d_
,
d_
);
act_cand_d_
->
Compute
Deprecated
(
gates
,
gates
);
act_cand_d_
->
Compute
(
gates
,
gates
,
d_
);
vmul_d_
->
Compute
(
gates
,
gates
+
d_
,
ct
,
d_
);
vmul_d_
->
Compute
(
gates
,
gates
+
d_
,
ct
,
d_
);
/* H_t = act_cell(C_t) * ogated */
/* H_t = act_cell(C_t) * ogated */
act_gate_d_
->
Compute
Deprecated
(
gates
+
d3_
,
gates
+
d3
_
);
act_gate_d_
->
Compute
(
gates
+
d3_
,
gates
+
d3_
,
d
_
);
act_cell_d_
->
Compute
Deprecated
(
ct
,
gates
+
d2
_
);
act_cell_d_
->
Compute
(
ct
,
gates
+
d2_
,
d
_
);
vmul_d_
->
Compute
(
gates
+
d2_
,
gates
+
d3_
,
ht
,
d_
);
vmul_d_
->
Compute
(
gates
+
d2_
,
gates
+
d3_
,
ht
,
d_
);
}
}
...
@@ -292,32 +292,32 @@ class PeepholeKernelImpl : public LSTMKernel<T> {
...
@@ -292,32 +292,32 @@ class PeepholeKernelImpl : public LSTMKernel<T> {
vmul_d_
->
Compute
(
wp_data
,
ct_1
,
checked
,
d_
);
vmul_d_
->
Compute
(
wp_data
,
ct_1
,
checked
,
d_
);
vmul_d_
->
Compute
(
wp_data
+
d_
,
ct_1
,
checked
+
d_
,
d_
);
vmul_d_
->
Compute
(
wp_data
+
d_
,
ct_1
,
checked
+
d_
,
d_
);
vadd_d2_
->
Compute
(
checked
,
gates
+
d_
,
gates
+
d_
,
d2_
);
vadd_d2_
->
Compute
(
checked
,
gates
+
d_
,
gates
+
d_
,
d2_
);
act_gate_d2_
->
Compute
Deprecated
(
gates
+
d_
,
gates
+
d
_
);
act_gate_d2_
->
Compute
(
gates
+
d_
,
gates
+
d_
,
d2
_
);
/* C_t = C_t-1 * fgated + cand_gated * igated*/
/* C_t = C_t-1 * fgated + cand_gated * igated*/
act_cand_d_
->
Compute
Deprecated
(
gates
,
gates
);
act_cand_d_
->
Compute
(
gates
,
gates
,
d_
);
vmul_d_
->
Compute
(
gates
,
gates
+
d_
,
gates
+
d_
,
d_
);
vmul_d_
->
Compute
(
gates
,
gates
+
d_
,
gates
+
d_
,
d_
);
vmul_d_
->
Compute
(
ct_1
,
gates
+
d2_
,
gates
+
d2_
,
d_
);
vmul_d_
->
Compute
(
ct_1
,
gates
+
d2_
,
gates
+
d2_
,
d_
);
vadd_d_
->
Compute
(
gates
+
d_
,
gates
+
d2_
,
ct
,
d_
);
vadd_d_
->
Compute
(
gates
+
d_
,
gates
+
d2_
,
ct
,
d_
);
/* get ogated*/
/* get ogated*/
vmul_d_
->
Compute
(
wp_data
+
d2_
,
ct
,
gates
+
d_
,
d_
);
vmul_d_
->
Compute
(
wp_data
+
d2_
,
ct
,
gates
+
d_
,
d_
);
vadd_d_
->
Compute
(
gates
+
d_
,
gates
+
d3_
,
gates
+
d3_
,
d_
);
vadd_d_
->
Compute
(
gates
+
d_
,
gates
+
d3_
,
gates
+
d3_
,
d_
);
act_gate_d_
->
Compute
Deprecated
(
gates
+
d3_
,
gates
+
d3
_
);
act_gate_d_
->
Compute
(
gates
+
d3_
,
gates
+
d3_
,
d
_
);
/* H_t = act_cell(C_t) * ogated */
/* H_t = act_cell(C_t) * ogated */
act_cell_d_
->
Compute
Deprecated
(
ct
,
gates
+
d2
_
);
act_cell_d_
->
Compute
(
ct
,
gates
+
d2_
,
d
_
);
vmul_d_
->
Compute
(
gates
+
d2_
,
gates
+
d3_
,
ht
,
d_
);
vmul_d_
->
Compute
(
gates
+
d2_
,
gates
+
d3_
,
ht
,
d_
);
}
}
void
ComputeC1H1
(
T
*
gates
,
T
*
ct
,
T
*
ht
,
const
T
*
wp_data
)
const
override
{
void
ComputeC1H1
(
T
*
gates
,
T
*
ct
,
T
*
ht
,
const
T
*
wp_data
)
const
override
{
/* C_t = igated * cgated*/
/* C_t = igated * cgated*/
act_gate_d_
->
Compute
Deprecated
(
gates
+
d_
,
gates
+
d_
);
act_gate_d_
->
Compute
(
gates
+
d_
,
gates
+
d_
,
d_
);
act_cand_d_
->
Compute
Deprecated
(
gates
,
gates
);
act_cand_d_
->
Compute
(
gates
,
gates
,
d_
);
vmul_d_
->
Compute
(
gates
,
gates
+
d_
,
ct
,
d_
);
vmul_d_
->
Compute
(
gates
,
gates
+
d_
,
ct
,
d_
);
/* get outgated, put W_oc * C_t on igated */
/* get outgated, put W_oc * C_t on igated */
vmul_d_
->
Compute
(
wp_data
+
d2_
,
ct
,
gates
+
d_
,
d_
);
vmul_d_
->
Compute
(
wp_data
+
d2_
,
ct
,
gates
+
d_
,
d_
);
vadd_d_
->
Compute
(
gates
+
d_
,
gates
+
d3_
,
gates
+
d3_
,
d_
);
vadd_d_
->
Compute
(
gates
+
d_
,
gates
+
d3_
,
gates
+
d3_
,
d_
);
/* H_t = act_cell(C_t) * ogated */
/* H_t = act_cell(C_t) * ogated */
act_gate_d_
->
Compute
Deprecated
(
gates
+
d3_
,
gates
+
d3
_
);
act_gate_d_
->
Compute
(
gates
+
d3_
,
gates
+
d3_
,
d
_
);
act_cell_d_
->
Compute
Deprecated
(
ct
,
gates
+
d2
_
);
act_cell_d_
->
Compute
(
ct
,
gates
+
d2_
,
d
_
);
vmul_d_
->
Compute
(
gates
+
d2_
,
gates
+
d3_
,
ht
,
d_
);
vmul_d_
->
Compute
(
gates
+
d2_
,
gates
+
d3_
,
ht
,
d_
);
}
}
...
@@ -376,20 +376,20 @@ class GRUKernelImpl : public GRUKernel<T> {
...
@@ -376,20 +376,20 @@ class GRUKernelImpl : public GRUKernel<T> {
}
}
void
ComputeH1
(
T
*
gates
,
T
*
ht
)
const
override
{
void
ComputeH1
(
T
*
gates
,
T
*
ht
)
const
override
{
act_gate_d_
->
Compute
Deprecated
(
gates
,
gates
);
act_gate_d_
->
Compute
(
gates
,
gates
,
d_
);
act_state_d_
->
Compute
Deprecated
(
gates
+
d2_
,
gates
+
d2
_
);
act_state_d_
->
Compute
(
gates
+
d2_
,
gates
+
d2_
,
d
_
);
vmul_d_
->
Compute
(
gates
,
gates
+
d2_
,
ht
,
d_
);
vmul_d_
->
Compute
(
gates
,
gates
+
d2_
,
ht
,
d_
);
}
}
void
ComputeHtPart1
(
T
*
gates
,
const
T
*
ht_1
,
T
*
ht
)
const
override
{
void
ComputeHtPart1
(
T
*
gates
,
const
T
*
ht_1
,
T
*
ht
)
const
override
{
// W: {W_update, W_reset; W_state}
// W: {W_update, W_reset; W_state}
act_gate_d2_
->
Compute
Deprecated
(
gates
,
gates
);
act_gate_d2_
->
Compute
(
gates
,
gates
,
d2_
);
vmul_d_
->
Compute
(
ht_1
,
gates
+
d_
,
ht
,
d_
);
vmul_d_
->
Compute
(
ht_1
,
gates
+
d_
,
ht
,
d_
);
}
}
void
ComputeHtPart2
(
T
*
gates
,
const
T
*
ht_1
,
T
*
ht
)
const
override
{
void
ComputeHtPart2
(
T
*
gates
,
const
T
*
ht_1
,
T
*
ht
)
const
override
{
T
*
y
=
gates
+
d2_
;
T
*
y
=
gates
+
d2_
;
act_state_d_
->
Compute
Deprecated
(
y
,
y
);
act_state_d_
->
Compute
(
y
,
y
,
d_
);
// out = zt*ht~ + (1-zt)*ht_1
// out = zt*ht~ + (1-zt)*ht_1
for
(
int
i
=
0
;
i
<
d_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
d_
;
++
i
)
{
ht
[
i
]
=
gates
[
i
]
*
y
[
i
]
+
(
static_cast
<
T
>
(
1
)
-
gates
[
i
])
*
ht_1
[
i
];
ht
[
i
]
=
gates
[
i
]
*
y
[
i
]
+
(
static_cast
<
T
>
(
1
)
-
gates
[
i
])
*
ht_1
[
i
];
...
...
paddle/fluid/operators/math/jit_kernel_test.cc
浏览文件 @
e2d6eddd
...
@@ -181,7 +181,7 @@ TEST(JitKernel, vexp) {
...
@@ -181,7 +181,7 @@ TEST(JitKernel, vexp) {
auto
ttgts
=
GetCurrentUS
();
auto
ttgts
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
// ker->Compute
Deprecated
(x_data, ztgt_data);
// ker->Compute(x_data, ztgt_data);
ker
->
Compute
(
x_data
,
ztgt_data
,
d
);
ker
->
Compute
(
x_data
,
ztgt_data
,
d
);
}
}
auto
ttgte
=
GetCurrentUS
();
auto
ttgte
=
GetCurrentUS
();
...
@@ -345,8 +345,8 @@ void lstm_ctht_ref(
...
@@ -345,8 +345,8 @@ void lstm_ctht_ref(
const
std
::
shared_ptr
<
const
std
::
shared_ptr
<
const
paddle
::
operators
::
math
::
jitkernel
::
VExpKernel
<
float
>>&
vexp_1
,
const
paddle
::
operators
::
math
::
jitkernel
::
VExpKernel
<
float
>>&
vexp_1
,
const
int
d
,
float
*
gates
,
const
float
*
ct_1
,
float
*
ct
,
float
*
ht
)
{
const
int
d
,
float
*
gates
,
const
float
*
ct_1
,
float
*
ct
,
float
*
ht
)
{
vsigmoid_3d
->
Compute
Deprecated
(
gates
+
d
,
gates
+
d
);
vsigmoid_3d
->
Compute
(
gates
+
d
,
gates
+
d
,
3
*
d
);
vtanh_d
->
Compute
Deprecated
(
gates
,
gates
);
vtanh_d
->
Compute
(
gates
,
gates
,
d
);
const
float
*
i
=
gates
+
d
,
*
f
=
gates
+
d
*
2
,
*
o
=
gates
+
d
*
3
;
const
float
*
i
=
gates
+
d
,
*
f
=
gates
+
d
*
2
,
*
o
=
gates
+
d
*
3
;
const
float
min
=
SIGMOID_THRESHOLD_MIN
;
const
float
min
=
SIGMOID_THRESHOLD_MIN
;
const
float
max
=
SIGMOID_THRESHOLD_MAX
;
const
float
max
=
SIGMOID_THRESHOLD_MAX
;
...
@@ -356,7 +356,7 @@ void lstm_ctht_ref(
...
@@ -356,7 +356,7 @@ void lstm_ctht_ref(
// H_t = act_cell(C_t) * ogated
// H_t = act_cell(C_t) * ogated
float
tmp
=
ct
[
k
]
*
2
;
float
tmp
=
ct
[
k
]
*
2
;
tmp
=
0.
f
-
((
tmp
<
min
)
?
min
:
((
tmp
>
max
)
?
max
:
tmp
));
tmp
=
0.
f
-
((
tmp
<
min
)
?
min
:
((
tmp
>
max
)
?
max
:
tmp
));
vexp_1
->
Compute
Deprecated
(
&
tmp
,
&
tmp
);
vexp_1
->
Compute
(
&
tmp
,
&
tmp
,
1
);
tmp
=
2.
f
/
(
1.
f
+
tmp
)
-
1.
f
;
tmp
=
2.
f
/
(
1.
f
+
tmp
)
-
1.
f
;
ht
[
k
]
=
tmp
*
o
[
k
];
ht
[
k
]
=
tmp
*
o
[
k
];
}
}
...
@@ -374,13 +374,13 @@ void lstm_ctht_better(
...
@@ -374,13 +374,13 @@ void lstm_ctht_better(
const
paddle
::
operators
::
math
::
jitkernel
::
VAddKernel
<
float
>>&
vadd_d
,
const
paddle
::
operators
::
math
::
jitkernel
::
VAddKernel
<
float
>>&
vadd_d
,
const
int
d
,
float
*
gates
,
const
float
*
ct_1
,
float
*
ct
,
float
*
ht
)
{
const
int
d
,
float
*
gates
,
const
float
*
ct_1
,
float
*
ct
,
float
*
ht
)
{
int
d2
=
d
*
2
;
int
d2
=
d
*
2
;
vsigmoid_3d
->
Compute
Deprecated
(
gates
+
d
,
gates
+
d
);
vsigmoid_3d
->
Compute
(
gates
+
d
,
gates
+
d
,
3
*
d
);
vtanh_d
->
Compute
Deprecated
(
gates
,
gates
);
vtanh_d
->
Compute
(
gates
,
gates
,
d
);
vmul_d
->
Compute
(
gates
,
gates
+
d
,
gates
+
d
,
d
);
vmul_d
->
Compute
(
gates
,
gates
+
d
,
gates
+
d
,
d
);
vmul_d
->
Compute
(
ct_1
,
gates
+
d2
,
gates
+
d2
,
d
);
vmul_d
->
Compute
(
ct_1
,
gates
+
d2
,
gates
+
d2
,
d
);
vadd_d
->
Compute
(
gates
+
d
,
gates
+
d2
,
ct
,
d
);
vadd_d
->
Compute
(
gates
+
d
,
gates
+
d2
,
ct
,
d
);
/* H_t = act_cell(C_t) * ogated */
/* H_t = act_cell(C_t) * ogated */
vtanh_d
->
Compute
Deprecated
(
ct
,
gates
+
d2
);
vtanh_d
->
Compute
(
ct
,
gates
+
d2
,
d
);
vmul_d
->
Compute
(
gates
+
d2
,
gates
+
d
*
3
,
ht
,
d
);
vmul_d
->
Compute
(
gates
+
d2
,
gates
+
d
*
3
,
ht
,
d
);
}
}
...
@@ -737,7 +737,7 @@ void vaddrelu_better(
...
@@ -737,7 +737,7 @@ void vaddrelu_better(
const
paddle
::
operators
::
math
::
jitkernel
::
VReluKernel
<
float
>>&
vrelu
,
const
paddle
::
operators
::
math
::
jitkernel
::
VReluKernel
<
float
>>&
vrelu
,
const
float
*
x
,
const
float
*
y
,
float
*
z
,
int
d
)
{
const
float
*
x
,
const
float
*
y
,
float
*
z
,
int
d
)
{
vadd
->
Compute
(
x
,
y
,
z
,
d
);
vadd
->
Compute
(
x
,
y
,
z
,
d
);
vrelu
->
Compute
Deprecated
(
z
,
z
);
vrelu
->
Compute
(
z
,
z
,
d
);
}
}
TEST
(
JitKernel
,
vaddrelu
)
{
TEST
(
JitKernel
,
vaddrelu
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录