Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
9131a356
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9131a356
编写于
10月 09, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
replace the lstm compute with jitkernel
test=develop
上级
b55c2476
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
22 addition
and
145 deletion
+22
-145
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+1
-1
paddle/fluid/operators/fusion_lstm_op.cc
paddle/fluid/operators/fusion_lstm_op.cc
+18
-32
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+3
-5
paddle/fluid/operators/math/cpu_lstm_compute.cc
paddle/fluid/operators/math/cpu_lstm_compute.cc
+0
-43
paddle/fluid/operators/math/cpu_lstm_compute.h
paddle/fluid/operators/math/cpu_lstm_compute.h
+0
-64
未找到文件。
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
9131a356
...
@@ -299,7 +299,7 @@ op_library(flatten_op DEPS reshape_op)
...
@@ -299,7 +299,7 @@ op_library(flatten_op DEPS reshape_op)
op_library
(
sequence_pad_op DEPS sequence_padding
)
op_library
(
sequence_pad_op DEPS sequence_padding
)
op_library
(
unstack_op DEPS stack_op
)
op_library
(
unstack_op DEPS stack_op
)
op_library
(
fake_quantize_op DEPS memory
)
op_library
(
fake_quantize_op DEPS memory
)
op_library
(
fusion_lstm_op DEPS
cpu_lstm_compute
)
op_library
(
fusion_lstm_op DEPS
jit_kernel
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
op_library
(
conv_op DEPS vol2col depthwise_conv im2col
)
op_library
(
conv_op DEPS vol2col depthwise_conv im2col
)
op_library
(
layer_norm_op DEPS cub
)
op_library
(
layer_norm_op DEPS cub
)
...
...
paddle/fluid/operators/fusion_lstm_op.cc
浏览文件 @
9131a356
...
@@ -15,9 +15,9 @@ limitations under the License. */
...
@@ -15,9 +15,9 @@ limitations under the License. */
#include "paddle/fluid/operators/fusion_lstm_op.h"
#include "paddle/fluid/operators/fusion_lstm_op.h"
#include <string>
#include <string>
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/cpu_lstm_compute.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/operators/math/fc_compute.h"
#include "paddle/fluid/operators/math/fc_compute.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
#include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/cpu_info.h"
...
@@ -309,11 +309,6 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -309,11 +309,6 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
act_gate(D, gates + D3, gates + D3); \
act_gate(D, gates + D3, gates + D3); \
GET_Ht(ct, gates, ht)
GET_Ht(ct, gates, ht)
#define COMPUTE_CtHt(gates, ct_1, ct, ht) \
act_gate(D3, gates + D, gates + D); \
GET_Ct(ct_1, gates, ct); \
GET_Ht(ct, gates, ht)
#define COMPUTE_CtHt_PEEPHOLE(gates, ct_1, ct, ht) \
#define COMPUTE_CtHt_PEEPHOLE(gates, ct_1, ct, ht) \
/* get fgated and igated*/
\
/* get fgated and igated*/
\
blas.VMUL(D, wc_data, ct_1, checked_cell_data); \
blas.VMUL(D, wc_data, ct_1, checked_cell_data); \
...
@@ -403,22 +398,18 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -403,22 +398,18 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
}
}
}
}
}
else
{
}
else
{
// TODO(TJ): unly workaround, clean me
const
auto
&
ker
=
std
::
function
<
void
(
T
*
,
const
T
*
,
T
*
,
T
*
)
>
compute_ctht
;
math
::
jitkernel
::
KernelPool
::
Instance
()
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx
)
&&
.
template
Get
<
math
::
jitkernel
::
LSTMKernel
<
T
>,
int
,
act_gate_str
==
"sigmoid"
&&
act_cand_str
==
"tanh"
&&
const
std
::
string
&
,
const
std
::
string
&
,
act_cell_str
==
"tanh"
&&
D
==
8
)
{
const
std
::
string
&>
(
D
,
act_gate_str
,
act_cand_str
,
compute_ctht
=
math
::
lstm_compute_ctht
<
T
>
;
act_cell_str
);
}
else
{
compute_ctht
=
[
&
](
T
*
gates
,
const
T
*
ct_1
,
T
*
ct
,
T
*
ht
)
{
COMPUTE_CtHt
(
gates
,
ct_1
,
ct
,
ht
);
};
}
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
PROCESS_H0C0
PROCESS_H0C0
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
GEMM_WH_ADDON
(
1
,
prev_h_data
,
xx_data
);
GEMM_WH_ADDON
(
1
,
prev_h_data
,
xx_data
);
compute_cth
t
(
xx_data
,
prev_c_data
,
c_out_data
,
h_out_data
);
ker
->
ComputeCtH
t
(
xx_data
,
prev_c_data
,
c_out_data
,
h_out_data
);
MOVE_ONE_STEP
;
MOVE_ONE_STEP
;
}
}
}
}
...
@@ -552,23 +543,19 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -552,23 +543,19 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
MOVE_ONE_STEP
;
MOVE_ONE_STEP
;
}
}
}
else
{
}
else
{
// TODO(TJ): unly workaround, clean me
const
auto
&
ker
=
std
::
function
<
void
(
T
*
,
const
T
*
,
T
*
,
T
*
)
>
compute_ctht
;
math
::
jitkernel
::
KernelPool
::
Instance
()
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx
)
&&
.
template
Get
<
math
::
jitkernel
::
LSTMKernel
<
T
>,
int
,
act_gate_str
==
"sigmoid"
&&
act_cand_str
==
"tanh"
&&
const
std
::
string
&
,
const
std
::
string
&
,
act_cell_str
==
"tanh"
&&
D
==
8
)
{
const
std
::
string
&>
(
D
,
act_gate_str
,
act_cand_str
,
compute_ctht
=
math
::
lstm_compute_ctht
<
T
>
;
act_cell_str
);
}
else
{
compute_ctht
=
[
&
](
T
*
gates
,
const
T
*
ct_1
,
T
*
ct
,
T
*
ht
)
{
COMPUTE_CtHt
(
gates
,
ct_1
,
ct
,
ht
);
};
}
for
(
int
step
=
tstart
;
step
<
max_seq_len
;
++
step
)
{
for
(
int
step
=
tstart
;
step
<
max_seq_len
;
++
step
)
{
const
int
cur_bs
=
batch_starts
[
step
+
1
]
-
batch_starts
[
step
];
const
int
cur_bs
=
batch_starts
[
step
+
1
]
-
batch_starts
[
step
];
GEMM_WH_ADDON
(
cur_bs
,
prev_h_data
,
batched_input_data
);
GEMM_WH_ADDON
(
cur_bs
,
prev_h_data
,
batched_input_data
);
DEFINE_CUR
;
DEFINE_CUR
;
for
(
int
i
=
0
;
i
<
cur_bs
;
++
i
)
{
for
(
int
i
=
0
;
i
<
cur_bs
;
++
i
)
{
compute_cth
t
(
cur_in_data
,
cur_prev_c_data
,
cur_c_out_data
,
ker
->
ComputeCtH
t
(
cur_in_data
,
cur_prev_c_data
,
cur_c_out_data
,
cur_h_out_data
);
cur_h_out_data
);
MOVE_ONE_BATCH
;
MOVE_ONE_BATCH
;
}
}
...
@@ -595,7 +582,6 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -595,7 +582,6 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
}
}
#undef COMPUTE_CtHt_PEEPHOLE
#undef COMPUTE_CtHt_PEEPHOLE
#undef COMPUTE_CtHt
#undef GET_Ct_NOH0C0
#undef GET_Ct_NOH0C0
#undef COMPUTE_CtHt_NOH0C0
#undef COMPUTE_CtHt_NOH0C0
#undef COMPUTE_CtHt_PEEPHOLE_NOH0C0
#undef COMPUTE_CtHt_PEEPHOLE_NOH0C0
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
9131a356
...
@@ -45,8 +45,6 @@ math_library(im2col)
...
@@ -45,8 +45,6 @@ math_library(im2col)
if
(
NOT WIN32
)
# windows do not support avx functions yet.
if
(
NOT WIN32
)
# windows do not support avx functions yet.
math_library
(
gru_compute DEPS activation_functions math_function
)
math_library
(
gru_compute DEPS activation_functions math_function
)
math_library
(
lstm_compute DEPS activation_functions
)
math_library
(
lstm_compute DEPS activation_functions
)
# TODO(TJ): ugly workaround, clean me
cc_library
(
cpu_lstm_compute SRCS cpu_lstm_compute.cc DEPS activation_functions cblas cpu_info
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
cc_library
(
blas SRCS blas.cc DEPS cblas framework_proto device_context
)
cc_library
(
blas SRCS blas.cc DEPS cblas framework_proto device_context
)
...
@@ -76,7 +74,7 @@ if(WITH_GPU)
...
@@ -76,7 +74,7 @@ if(WITH_GPU)
endif
()
endif
()
cc_test
(
concat_test SRCS concat_test.cc DEPS concat
)
cc_test
(
concat_test SRCS concat_test.cc DEPS concat
)
cc_test
(
cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info
)
cc_test
(
cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info
)
cc_library
(
jit_kernel
_exp SRCS jit_kernel_exp.cc DEPS cpu_info cblas activation_functions
)
cc_library
(
jit_kernel
cc_library
(
jit_kernel_lstm SRCS jit_kernel_lstm.cc DEPS cpu_info cblas activation_functions
)
SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_lstm.cc
cc_library
(
jit_kernel SRCS jit_kernel.cc jit_kernel_blas.cc DEPS cpu_info cblas jit_kernel_exp jit_kernel_lstm
)
DEPS cpu_info cblas activation_functions
)
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
paddle/fluid/operators/math/cpu_lstm_compute.cc
已删除
100644 → 0
浏览文件 @
b55c2476
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/cpu_lstm_compute.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
#ifdef __AVX__
template
<
>
void
lstm_compute_ctht
<
float
>
(
float
*
gates
,
const
float
*
ct_1
,
float
*
ct
,
float
*
ht
)
{
namespace
act
=
detail
::
forward
::
avx
;
// gates: W_ch, W_ih, W_fh, W_oh
__m256
c
,
i
,
f
,
o
;
c
=
_mm256_loadu_ps
(
gates
);
i
=
_mm256_loadu_ps
(
gates
+
8
);
f
=
_mm256_loadu_ps
(
gates
+
16
);
o
=
_mm256_loadu_ps
(
gates
+
24
);
/* C_t = C_t-1 * fgated + cand_gated * igated*/
c
=
_mm256_mul_ps
(
act
::
Tanh
(
c
),
act
::
Sigmoid
(
i
));
i
=
_mm256_loadu_ps
(
ct_1
);
f
=
_mm256_mul_ps
(
i
,
act
::
Sigmoid
(
f
));
f
=
_mm256_add_ps
(
c
,
f
);
_mm256_storeu_ps
(
ct
,
f
);
/* H_t = act_cell(C_t) * ogated */
o
=
_mm256_mul_ps
(
act
::
Tanh
(
f
),
act
::
Sigmoid
(
o
));
_mm256_storeu_ps
(
ht
,
o
);
}
#endif
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/cpu_lstm_compute.h
已删除
100644 → 0
浏览文件 @
b55c2476
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/platform/cpu_info.h"
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace
paddle
{
namespace
operators
{
namespace
math
{
// TODO(TJ): ugly workaround, clean me
template
<
typename
T
>
void
lstm_compute_ctht
(
T
*
gates
,
const
T
*
ct_1
,
T
*
ct
,
T
*
ht
)
{
// gates: W_ch, W_ih, W_fh, W_oh
vec_sigmoid
<
T
,
platform
::
jit
::
avx
>
(
24
,
gates
+
8
,
gates
+
8
);
vec_tanh
<
T
,
platform
::
jit
::
avx
>
(
8
,
gates
,
gates
);
const
T
*
i
=
gates
+
8
,
*
f
=
gates
+
16
,
*
o
=
gates
+
24
;
const
T
min
=
SIGMOID_THRESHOLD_MIN
;
const
T
max
=
SIGMOID_THRESHOLD_MAX
;
for
(
int
d
=
0
;
d
<
8
;
++
d
)
{
// C_t = C_t-1 * fgated + cand_gated * igated
ct
[
d
]
=
ct_1
[
d
]
*
f
[
d
]
+
gates
[
d
]
*
i
[
d
];
// H_t = act_cell(C_t) * ogated
T
tmp
=
ct
[
d
]
*
2
;
tmp
=
static_cast
<
T
>
(
0
)
-
((
tmp
<
min
)
?
min
:
((
tmp
>
max
)
?
max
:
tmp
));
vec_exp
<
T
>
(
1
,
&
tmp
,
&
tmp
);
tmp
=
static_cast
<
T
>
(
2
)
/
(
static_cast
<
T
>
(
1
)
+
tmp
)
-
static_cast
<
T
>
(
1
);
ht
[
d
]
=
tmp
*
o
[
d
];
}
}
#ifdef __AVX__
namespace
detail
{
namespace
forward
{
namespace
avx
{
__m256
Sigmoid
(
const
__m256
a
);
__m256
Tanh
(
const
__m256
a
);
}
// namespace avx
}
// namespace forward
}
// namespace detail
template
<
>
void
lstm_compute_ctht
<
float
>
(
float
*
gates
,
const
float
*
ct_1
,
float
*
ct
,
float
*
ht
);
#endif
}
// namespace math
}
// namespace operators
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录