Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d59f7335
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d59f7335
编写于
1月 28, 2019
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine softmax and use with cache
test=develop
上级
7383eefd
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
102 addition
and
34 deletion
+102
-34
paddle/fluid/operators/jit/benchmark.cc
paddle/fluid/operators/jit/benchmark.cc
+3
-0
paddle/fluid/operators/jit/gen/act.cc
paddle/fluid/operators/jit/gen/act.cc
+25
-3
paddle/fluid/operators/jit/helper.h
paddle/fluid/operators/jit/helper.h
+22
-0
paddle/fluid/operators/jit/more/mix/mix.cc
paddle/fluid/operators/jit/more/mix/mix.cc
+44
-6
paddle/fluid/operators/jit/more/mkl/mkl.cc
paddle/fluid/operators/jit/more/mkl/mkl.cc
+2
-1
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+1
-1
paddle/fluid/operators/math/softmax_impl.h
paddle/fluid/operators/math/softmax_impl.h
+5
-23
未找到文件。
paddle/fluid/operators/jit/benchmark.cc
浏览文件 @
d59f7335
...
...
@@ -187,6 +187,9 @@ void BenchAXYNKernel() {
RandomVec
<
T
>
(
d
,
x_data
);
BenchAllImpls
<
KT
,
jit
::
AXYNTuples
<
T
>
,
PlaceType
>
(
d
,
&
a
,
x
.
data
<
T
>
(),
y_data
,
d
);
// test inplace
BenchAllImpls
<
KT
,
jit
::
AXYNTuples
<
T
>
,
PlaceType
>
(
d
,
&
a
,
x
.
data
<
T
>
(),
x_data
,
d
);
}
}
...
...
paddle/fluid/operators/jit/gen/act.cc
浏览文件 @
d59f7335
...
...
@@ -81,9 +81,7 @@ void VActJitCode::genCode() {
#define DECLARE_ACT_CREATOR(name) \
class name##Creator : public JitCodeCreator<int> { \
public: \
bool UseMe(const int& attr) const override { \
return platform::MayIUse(platform::avx); \
} \
bool UseMe(const int& attr) const override; \
size_t CodeSize(const int& d) const override; \
std::unique_ptr<GenBase> CreateJitCode(const int& attr) const override { \
return make_unique<name##JitCode>(attr, CodeSize(attr)); \
...
...
@@ -98,6 +96,30 @@ DECLARE_ACT_CREATOR(VSigmoid);
DECLARE_ACT_CREATOR
(
VTanh
);
// TODO(TJ): tuning use me
bool
VReluCreator
::
UseMe
(
const
int
&
d
)
const
{
return
platform
::
MayIUse
(
platform
::
avx
);
}
bool
VSquareCreator
::
UseMe
(
const
int
&
d
)
const
{
return
platform
::
MayIUse
(
platform
::
avx
);
}
bool
VIdentityCreator
::
UseMe
(
const
int
&
d
)
const
{
return
platform
::
MayIUse
(
platform
::
avx
);
}
bool
VExpCreator
::
UseMe
(
const
int
&
d
)
const
{
return
platform
::
MayIUse
(
platform
::
avx
)
&&
d
<
32
;
}
bool
VSigmoidCreator
::
UseMe
(
const
int
&
d
)
const
{
return
platform
::
MayIUse
(
platform
::
avx
);
}
bool
VTanhCreator
::
UseMe
(
const
int
&
d
)
const
{
return
platform
::
MayIUse
(
platform
::
avx
);
}
size_t
VReluCreator
::
CodeSize
(
const
int
&
d
)
const
{
return
96
/* init size */
+
(
d
/
YMM_FLOAT_BLOCK
+
3
)
*
4
/* instructions */
*
...
...
paddle/fluid/operators/jit/helper.h
浏览文件 @
d59f7335
...
...
@@ -118,6 +118,28 @@ typename KernelTuples::func_type Get(
return
GetRefer
<
KT
,
KernelTuples
>
();
}
template
<
KernelType
KT
,
typename
KernelTuples
>
class
KernelFuncsCache
{
public:
KernelFuncsCache
()
=
default
;
static
KernelFuncsCache
&
Instance
()
{
static
thread_local
KernelFuncsCache
<
KT
,
KernelTuples
>
g_func_cache
;
return
g_func_cache
;
}
bool
Has
(
int
key
)
const
{
return
funcs_
.
find
(
key
)
!=
funcs_
.
end
();
}
typename
KernelTuples
::
func_type
At
(
int
key
)
{
return
funcs_
.
at
(
key
);
}
void
Insert
(
int
key
,
typename
KernelTuples
::
func_type
func
)
{
funcs_
.
emplace
(
key
,
func
);
}
private:
std
::
unordered_map
<
int
,
typename
KernelTuples
::
func_type
>
funcs_
;
DISABLE_COPY_AND_ASSIGN
(
KernelFuncsCache
);
};
const
char
*
to_string
(
KernelType
kt
);
const
char
*
to_string
(
SeqPoolType
kt
);
...
...
paddle/fluid/operators/jit/more/mix/mix.cc
浏览文件 @
d59f7335
...
...
@@ -49,12 +49,50 @@ void VTanh(const T* x, T* y, int n) {
}
void
Softmax
(
const
T
*
x
,
T
*
y
,
int
n
,
int
bs
)
{
auto
compute_hmax
=
Get
<
kHMax
,
XRNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
auto
compute_hsum
=
Get
<
kHSum
,
XRNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
auto
compute_vscal
=
Get
<
kVScal
,
AXYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
auto
compute_vaddbias
=
Get
<
kVAddBias
,
AXYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
auto
compute_vexp
=
Get
<
KernelType
::
kVExp
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
typename
XRNTuples
<
T
>::
func_type
compute_hmax
{
nullptr
};
typename
XRNTuples
<
T
>::
func_type
compute_hsum
{
nullptr
};
typename
AXYNTuples
<
T
>::
func_type
compute_vscal
{
nullptr
};
typename
AXYNTuples
<
T
>::
func_type
compute_vaddbias
{
nullptr
};
typename
XYNTuples
<
T
>::
func_type
compute_vexp
{
nullptr
};
if
(
!
KernelFuncsCache
<
kHMax
,
XRNTuples
<
T
>>::
Instance
().
Has
(
n
))
{
compute_hmax
=
Get
<
kHMax
,
XRNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
KernelFuncsCache
<
kHMax
,
XRNTuples
<
T
>>::
Instance
().
Insert
(
n
,
compute_hmax
);
}
else
{
compute_hmax
=
KernelFuncsCache
<
kHMax
,
XRNTuples
<
T
>>::
Instance
().
At
(
n
);
}
if
(
!
KernelFuncsCache
<
kHSum
,
XRNTuples
<
T
>>::
Instance
().
Has
(
n
))
{
compute_hsum
=
Get
<
kHSum
,
XRNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
KernelFuncsCache
<
kHSum
,
XRNTuples
<
T
>>::
Instance
().
Insert
(
n
,
compute_hsum
);
}
else
{
compute_hsum
=
KernelFuncsCache
<
kHSum
,
XRNTuples
<
T
>>::
Instance
().
At
(
n
);
}
if
(
!
KernelFuncsCache
<
kVScal
,
AXYNTuples
<
T
>>::
Instance
().
Has
(
n
))
{
compute_vscal
=
Get
<
kVScal
,
AXYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
KernelFuncsCache
<
kVScal
,
AXYNTuples
<
T
>>::
Instance
().
Insert
(
n
,
compute_vscal
);
}
else
{
compute_vscal
=
KernelFuncsCache
<
kVScal
,
AXYNTuples
<
T
>>::
Instance
().
At
(
n
);
}
if
(
!
KernelFuncsCache
<
kVAddBias
,
AXYNTuples
<
T
>>::
Instance
().
Has
(
n
))
{
compute_vaddbias
=
Get
<
kVAddBias
,
AXYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
KernelFuncsCache
<
kVAddBias
,
AXYNTuples
<
T
>>::
Instance
().
Insert
(
n
,
compute_vaddbias
);
}
else
{
compute_vaddbias
=
KernelFuncsCache
<
kVAddBias
,
AXYNTuples
<
T
>>::
Instance
().
At
(
n
);
}
if
(
!
KernelFuncsCache
<
kVExp
,
XYNTuples
<
T
>>::
Instance
().
Has
(
n
))
{
compute_vexp
=
Get
<
KernelType
::
kVExp
,
XYNTuples
<
T
>
,
platform
::
CPUPlace
>
(
n
);
KernelFuncsCache
<
kVExp
,
XYNTuples
<
T
>>::
Instance
().
Insert
(
n
,
compute_vexp
);
}
else
{
compute_vexp
=
KernelFuncsCache
<
kVExp
,
XYNTuples
<
T
>>::
Instance
().
At
(
n
);
}
for
(
int
i
=
0
;
i
<
bs
;
++
i
)
{
T
scalar
;
compute_hmax
(
x
,
&
scalar
,
n
);
...
...
paddle/fluid/operators/jit/more/mkl/mkl.cc
浏览文件 @
d59f7335
...
...
@@ -179,7 +179,8 @@ bool SeqPoolKernel<double>::UseMe(const seq_pool_attr_t& attr) const {
template
<
>
bool
SoftmaxKernel
<
float
>::
UseMe
(
const
int
&
d
)
const
{
return
true
;
// tuned on avx2
return
platform
::
MayIUse
(
platform
::
avx
)
&&
d
<
60
;
}
#define AWALYS_USE_ME_WITH_DOUBLE(func) \
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
d59f7335
...
...
@@ -53,7 +53,7 @@ math_library(sequence2batch)
math_library
(
sequence_padding
)
math_library
(
sequence_pooling DEPS math_function jit_kernel_helper
)
math_library
(
sequence_scale
)
math_library
(
softmax DEPS math_function
)
math_library
(
softmax DEPS math_function
jit_kernel_helper
)
math_library
(
beam_search DEPS math_function
)
math_library
(
matrix_bit_code
)
...
...
paddle/fluid/operators/math/softmax_impl.h
浏览文件 @
d59f7335
...
...
@@ -16,8 +16,8 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
...
...
@@ -81,28 +81,10 @@ class SoftmaxFunctor<DeviceContext, float, true, enable_if_CPU<DeviceContext>> {
const
int
kBatchDim
=
0
;
const
int
kClassDim
=
1
;
// 2D data. Batch x C
const
int
batch_size
=
in_dims
[
kBatchDim
];
const
int
num_classes
=
in_dims
[
kClassDim
];
std
::
vector
<
float
>
entities
(
batch_size
);
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
float
>
(
context
);
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
entities
[
n
]
=
in_data
[
n
*
num_classes
];
for
(
int
c
=
1
;
c
<
num_classes
;
++
c
)
{
entities
[
n
]
=
in_data
[
n
*
num_classes
+
c
]
>
entities
[
n
]
?
in_data
[
n
*
num_classes
+
c
]
:
entities
[
n
];
}
for
(
int
c
=
0
;
c
<
num_classes
;
++
c
)
{
out_data
[
n
*
num_classes
+
c
]
=
in_data
[
n
*
num_classes
+
c
]
-
entities
[
n
];
}
}
blas
.
VEXP
(
num_classes
*
batch_size
,
out_data
,
out_data
);
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
auto
sum
=
blas
.
ASUM
(
num_classes
,
&
out_data
[
n
*
num_classes
],
1
);
blas
.
SCAL
(
num_classes
,
1.0
f
/
sum
,
&
out_data
[
n
*
num_classes
]);
}
auto
compute_softmax
=
jit
::
Get
<
jit
::
kSoftmax
,
jit
::
SoftmaxTuples
<
float
>
,
platform
::
CPUPlace
>
(
in_dims
[
kClassDim
]);
compute_softmax
(
in_data
,
out_data
,
in_dims
[
kClassDim
],
in_dims
[
kBatchDim
]);
}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录