Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
92031968
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
92031968
编写于
9月 20, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
init vmul kernel
上级
b9acbcc8
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
169 addition
and
9 deletion
+169
-9
paddle/fluid/operators/math/jit_kernel.cc
paddle/fluid/operators/math/jit_kernel.cc
+126
-1
paddle/fluid/operators/math/jit_kernel.h
paddle/fluid/operators/math/jit_kernel.h
+26
-6
paddle/fluid/operators/math/jit_kernel_test.cc
paddle/fluid/operators/math/jit_kernel_test.cc
+17
-2
未找到文件。
paddle/fluid/operators/math/jit_kernel.cc
浏览文件 @
92031968
...
...
@@ -16,23 +16,132 @@ limitations under the License. */
#include <functional>
#include <string>
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/platform/cpu_info.h"
#ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h"
#endif
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace
paddle
{
namespace
operators
{
namespace
math
{
namespace
jitkernel
{
namespace
jit
=
platform
::
jit
;
KernelPool
&
KernelPool
::
Instance
()
{
static
KernelPool
g_jit_kernels
;
return
g_jit_kernels
;
}
#define SEARCH_BLOCK(src, t, isa) \
if (d < AVX_FLOAT_BLOCK) { \
Compute = src<t, isa, kLT8>; \
} else if (d == AVX_FLOAT_BLOCK) { \
Compute = src<t, isa, kEQ8>; \
} else if (d == AVX512_FLOAT_BLOCK) { \
Compute = src<t, isa, kEQ16>; \
} else { \
Compute = src<t, isa, kGT16>; \
}
#define SEARCH_ISA_BLOCK(src, t) \
if (jit::MayIUse(jit::avx512_common)) { \
SEARCH_BLOCK(src, t, jit::avx512_common); \
} else if (jit::MayIUse(jit::avx2)) { \
SEARCH_BLOCK(src, t, jit::avx2); \
} else if (jit::MayIUse(jit::avx)) { \
SEARCH_BLOCK(src, t, jit::avx); \
} else { \
SEARCH_BLOCK(src, t, jit::isa_any); \
}
#define FOR_EACH_BLOCK(macro_, isa) \
macro_(isa, kLT8) macro_(isa, kEQ8) macro_(isa, kEQ16) macro_(isa, kGT16)
#define FOR_EACH_ISA_BLOCK(macro_) \
FOR_EACH_BLOCK(macro_, jit::avx512_common) \
FOR_EACH_BLOCK(macro_, jit::avx2) \
FOR_EACH_BLOCK(macro_, jit::avx) \
FOR_EACH_BLOCK(macro_, jit::any)
#define VMUL_ANY \
for (int i = 0; i < n; ++i) { \
z[i] = x[i] * y[i]; \
}
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
,
jit_block
>
static
void
VMulCompute
(
const
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
{
VMUL_ANY
}
#ifdef PADDLE_USE_MKLML
#define DEFINE_VMUL_COMPUTE_FLOAT(isa, block) \
template <> \
static void VMulCompute<float, isa, block>(const int n, const float* x, \
const float* y, float* z) { \
platform::dynload::vsMul(n, x, y, z); \
}
#define DEFINE_VMUL_COMPUTE_DOUBLE(isa, block) \
template <> \
static void VMulCompute<double, isa, block>(const int n, const double* x, \
const double* y, float* z) { \
platform::dynload::vdMul(n, x, y, z); \
}
FOR_EACH_ISA_BLOCK
(
DEFINE_VMUL_COMPUTE_FLOAT
)
FOR_EACH_ISA_BLOCK
(
DEFINE_VMUL_COMPUTE_DOUBLE
)
// TODO(TJ): add EQ8
#endif
#undef DEFINE_VMUL_COMPUTE_FLOAT
#undef DEFINE_VMUL_COMPUTE_DOUBLE
#undef VMUL_ANY
template
<
>
VMulKernel
<
float
>::
VMulKernel
(
int
d
)
{
SEARCH_ISA_BLOCK
(
VMulCompute
,
float
);
}
template
<
>
VMulKernel
<
double
>::
VMulKernel
(
int
d
)
{
SEARCH_ISA_BLOCK
(
VMulCompute
,
double
);
}
template
<
>
const
std
::
shared_ptr
<
VMulKernel
<
float
>>
KernelPool
::
Get
<
VMulKernel
<
float
>>
(
int
d
)
{
std
::
string
key
=
"f"
+
std
::
to_string
(
d
);
if
(
kers_
.
find
(
key
)
==
kers_
.
end
())
{
auto
p
=
std
::
make_shared
<
VMulKernel
<
float
>>
(
d
);
kers_
.
insert
({
key
,
std
::
dynamic_pointer_cast
<
Kernel
>
(
p
)});
return
p
;
}
return
std
::
dynamic_pointer_cast
<
VMulKernel
<
float
>>
(
kers_
.
at
(
key
));
}
template
<
>
const
std
::
shared_ptr
<
VMulKernel
<
double
>>
KernelPool
::
Get
<
VMulKernel
<
double
>>
(
int
d
)
{
std
::
string
key
=
"d"
+
std
::
to_string
(
d
);
if
(
kers_
.
find
(
key
)
==
kers_
.
end
())
{
auto
p
=
std
::
make_shared
<
VMulKernel
<
double
>>
(
d
);
kers_
.
insert
({
key
,
std
::
dynamic_pointer_cast
<
Kernel
>
(
p
)});
return
p
;
}
return
std
::
dynamic_pointer_cast
<
VMulKernel
<
double
>>
(
kers_
.
at
(
key
));
}
template
<
>
LSTMKernel
<
float
>::
LSTMKernel
(
int
d
,
const
std
::
string
&
act_gate_str
,
const
std
::
string
&
act_cand_str
,
const
std
::
string
&
act_cell_str
)
:
Kernel
(),
d_
(
d
)
{
d2_
=
d
*
2
;
d3_
=
d
*
3
;
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx512_common
))
{
math
::
VecActivations
<
float
,
platform
::
jit
::
avx512_common
>
act_functor
;
act_gate_
=
act_functor
(
act_gate_str
);
...
...
@@ -48,6 +157,22 @@ LSTMKernel<float>::LSTMKernel(int d, const std::string& act_gate_str,
act_gate_
=
act_functor
(
act_gate_str
);
act_cell_
=
act_functor
(
act_cell_str
);
act_cand_
=
act_functor
(
act_cand_str
);
// ComputeCtHt = [&](float*gates,const float*ct_1,float*ct, float*ht) {
// // gates: W_ch, W_ih, W_fh, W_oh
// act_gate(d3_, gates + d_, gates + d_);
// /* C_t = C_t-1 * fgated + cand_gated * igated */
// act_cand(d_, gates, gates);
// blas.VMUL(d_, gates, gates + d_, gates + d_);
// blas.VMUL(d_, ct_1, gates + d2_, gates + d2_);
// blas.VADD(d_, gates + d_, gates + d2_, ct);
// /* H_t = act_cell(C_t) * ogated */
// act_cell(d_, ct, gates + d2_);
// blas.VMUL(d_, gates + d2_, gates + d3_, ht)
// GET_Ct(ct_1, gates, ct);
// GET_Ht(ct, gates, ht);
// };
}
else
{
math
::
VecActivations
<
float
,
platform
::
jit
::
isa_any
>
act_functor
;
act_gate_
=
act_functor
(
act_gate_str
);
...
...
paddle/fluid/operators/math/jit_kernel.h
浏览文件 @
92031968
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <memory> // for shared_ptr
#include <string>
#include <unordered_map>
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/macros.h"
// Note: Only support on CPU yet.
...
...
@@ -25,6 +26,18 @@ namespace operators {
namespace
math
{
namespace
jitkernel
{
#define SIGMOID_THRESHOLD_MIN -40.0
#define SIGMOID_THRESHOLD_MAX 13.0
#define AVX_FLOAT_BLOCK 8
#define AVX_DOUBLE_BLOCK 4
#define AVX2_FLOAT_BLOCK 8
#define AVX2_DOUBLE_BLOCK 4
#define AVX512_FLOAT_BLOCK 16
#define AVX512_DOUBLE_BLOCK 8
typedef
enum
{
kLT8
,
kEQ8
,
kEQ16
,
kGT16
}
jit_block
;
class
Kernel
{
public:
Kernel
()
{}
...
...
@@ -36,7 +49,7 @@ class Kernel {
class
KernelPool
{
public:
static
KernelPool
&
Instance
();
static
KernelPool
&
Instance
();
template
<
typename
Ker
,
typename
...
ARGS
>
const
std
::
shared_ptr
<
Ker
>
Get
(
ARGS
...
args
);
...
...
@@ -48,17 +61,24 @@ class KernelPool {
DISABLE_COPY_AND_ASSIGN
(
KernelPool
);
};
template
<
typename
T
>
class
VMulKernel
:
public
Kernel
{
public:
explicit
VMulKernel
(
int
n
);
void
(
*
Compute
)(
const
int
n
,
const
T
*
,
const
T
*
,
T
*
);
};
template
<
typename
T
>
class
LSTMKernel
:
public
Kernel
{
public:
explicit
LSTMKernel
(
int
d
,
const
std
::
string
&
act_gate
,
const
std
::
string
&
act_cand
,
const
std
::
string
&
act_cell
);
explicit
LSTMKernel
(
int
d
,
const
std
::
string
&
act_gate
,
const
std
::
string
&
act_cand
,
const
std
::
string
&
act_cell
);
void
ComputeCtHt
(
T
*
gates
,
const
T
*
ct_1
,
T
*
ct
);
void
ComputeCtHt_NoC0H0
(
T
*
gates
,
const
T
*
ct_1
,
T
*
ct
)
;
void
(
*
jit_ker
)(
T
*
,
const
T
*
,
T
*
,
T
*
);
std
::
function
<
void
(
T
*
,
const
T
*
,
T
*
,
T
*
)
>
ComputeCtHt
,
ComputeCtHt_NoC0H0
;
private:
int
d_
;
int
d_
,
d2_
,
d3_
;
std
::
function
<
void
(
const
int
,
const
T
*
,
T
*
)
>
act_gate_
,
act_cell_
,
act_cand_
;
};
...
...
paddle/fluid/operators/math/jit_kernel_test.cc
浏览文件 @
92031968
...
...
@@ -23,10 +23,25 @@ TEST(JitKernel, pool) {
namespace
jit
=
paddle
::
operators
::
math
::
jitkernel
;
const
int
frame_size
=
4
;
std
::
string
act_gate
=
"sigmoid"
,
act_cand
=
"tanh"
,
act_cell
=
"tanh"
;
const
auto
&
t
=
const
auto
&
p1
=
jit
::
KernelPool
::
Instance
()
.
template
Get
<
jit
::
LSTMKernel
<
float
>,
int
,
const
std
::
string
&
,
const
std
::
string
&
,
const
std
::
string
&>
(
frame_size
,
act_gate
,
act_cand
,
act_cell
);
LOG
(
INFO
)
<<
t
;
const
auto
&
p2
=
jit
::
KernelPool
::
Instance
()
.
template
Get
<
jit
::
LSTMKernel
<
float
>,
int
,
const
std
::
string
&
,
const
std
::
string
&
,
const
std
::
string
&>
(
frame_size
,
act_gate
,
act_cand
,
act_cell
);
EXPECT_EQ
(
p1
,
p2
);
const
auto
&
p3
=
jit
::
KernelPool
::
Instance
().
template
Get
<
jit
::
VMulKernel
<
float
>
>
(
4
);
EXPECT_TRUE
(
std
::
dynamic_pointer_cast
<
jit
::
Kernel
>
(
p2
)
!=
std
::
dynamic_pointer_cast
<
jit
::
Kernel
>
(
p3
));
const
auto
&
p4
=
jit
::
KernelPool
::
Instance
().
template
Get
<
jit
::
VMulKernel
<
double
>
>
(
4
);
EXPECT_TRUE
(
std
::
dynamic_pointer_cast
<
jit
::
Kernel
>
(
p3
)
!=
std
::
dynamic_pointer_cast
<
jit
::
Kernel
>
(
p4
));
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录