Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
318dfa0d
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
318dfa0d
编写于
4月 17, 2020
作者:
Z
Zhang Ting
提交者:
GitHub
4月 17, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove eval in eigen function when dtype is fp16 (#23845)
上级
66dc8e30
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
181 addition
and
82 deletion
+181
-82
paddle/fluid/operators/math/softmax_impl.h
paddle/fluid/operators/math/softmax_impl.h
+181
-82
未找到文件。
paddle/fluid/operators/math/softmax_impl.h
浏览文件 @
318dfa0d
...
@@ -37,7 +37,9 @@ struct ValueClip {
...
@@ -37,7 +37,9 @@ struct ValueClip {
};
};
template
<
typename
DeviceContext
,
typename
T
,
bool
is_test
>
template
<
typename
DeviceContext
,
typename
T
,
bool
is_test
>
void
SoftmaxEigen
(
const
DeviceContext
&
context
,
const
int
axis_dim
,
class
SoftmaxEigen
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
constexpr
int
kBatchDim
=
0
;
constexpr
int
kBatchDim
=
0
;
constexpr
int
kClassDim
=
1
;
constexpr
int
kClassDim
=
1
;
...
@@ -85,19 +87,80 @@ void SoftmaxEigen(const DeviceContext& context, const int axis_dim,
...
@@ -85,19 +87,80 @@ void SoftmaxEigen(const DeviceContext& context, const int axis_dim,
}
}
softmax
.
device
(
*
context
.
eigen_device
())
=
softmax
.
exp
();
softmax
.
device
(
*
context
.
eigen_device
())
=
softmax
.
exp
();
softmax
.
device
(
*
context
.
eigen_device
())
=
(
softmax
*
softmax
.
device
(
*
context
.
eigen_device
())
=
(
softmax
*
softmax
.
reshape
(
batch_axis_remain
)
softmax
.
reshape
(
batch_axis_remain
)
.
sum
(
along_axis
)
.
sum
(
along_axis
)
.
inverse
()
.
inverse
()
.
eval
()
.
eval
()
.
broadcast
(
one_axis
));
.
broadcast
(
one_axis
));
}
}
};
template
<
typename
DeviceContext
,
bool
is_test
>
class
SoftmaxEigen
<
DeviceContext
,
platform
::
float16
,
is_test
>
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
constexpr
int
kBatchDim
=
0
;
constexpr
int
kClassDim
=
1
;
constexpr
int
kAxisDim
=
1
;
auto
logits
=
EigenMatrix
<
platform
::
float16
>::
From
(
*
X
);
auto
softmax
=
EigenMatrix
<
platform
::
float16
>::
From
(
*
Y
);
const
int
batch_size
=
logits
.
dimension
(
kBatchDim
);
const
int
num_classes
=
logits
.
dimension
(
kClassDim
);
const
int
num_remain
=
num_classes
/
axis_dim
;
Eigen
::
DSizes
<
int
,
1
>
along_axis
(
kAxisDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_classes
(
batch_size
,
num_classes
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
Eigen
::
DSizes
<
int
,
3
>
batch_one_remain
(
batch_size
,
1
,
num_remain
);
Eigen
::
DSizes
<
int
,
3
>
one_axis_one
(
1
,
axis_dim
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_axis
(
1
,
axis_dim
);
Eigen
::
DSizes
<
int
,
3
>
batch_axis_remain
(
batch_size
,
axis_dim
,
num_remain
);
// For numerical stability, logits should be shifted by maximum number along
// axis, calculate shifted_logits into softmax tensor for memory reuse.
if
(
num_remain
==
1
)
{
// axis == -1, axis and class in same dimension, calculate along
// class dimension directly for higher performance
softmax
.
device
(
*
context
.
eigen_device
())
=
(
logits
-
logits
.
maximum
(
along_axis
)
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
))
.
unaryExpr
(
ValueClip
<
platform
::
float16
>
());
}
else
{
// axis != -1, class dimension split into (axis, remain), max and sum
// should be calculated along axis dimension
softmax
.
device
(
*
context
.
eigen_device
())
=
(
logits
.
reshape
(
batch_axis_remain
)
-
logits
.
reshape
(
batch_axis_remain
)
.
maximum
(
along_axis
)
.
reshape
(
batch_one_remain
)
.
broadcast
(
one_axis_one
)
.
reshape
(
batch_classes
))
.
unaryExpr
(
ValueClip
<
platform
::
float16
>
());
}
softmax
.
device
(
*
context
.
eigen_device
())
=
softmax
.
exp
();
softmax
.
device
(
*
context
.
eigen_device
())
=
(
softmax
*
softmax
.
reshape
(
batch_axis_remain
)
.
sum
(
along_axis
)
.
inverse
()
.
broadcast
(
one_axis
));
}
};
template
<
typename
DeviceContext
,
typename
T
,
bool
is_test
,
typename
Enable
>
template
<
typename
DeviceContext
,
typename
T
,
bool
is_test
,
typename
Enable
>
void
SoftmaxFunctor
<
DeviceContext
,
T
,
is_test
,
Enable
>::
operator
()(
void
SoftmaxFunctor
<
DeviceContext
,
T
,
is_test
,
Enable
>::
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
SoftmaxEigen
<
DeviceContext
,
T
,
is_test
>
(
context
,
axis_dim
,
X
,
Y
);
SoftmaxEigen
<
DeviceContext
,
T
,
is_test
>
(
)(
context
,
axis_dim
,
X
,
Y
);
}
}
template
<
class
DeviceContext
>
template
<
class
DeviceContext
>
...
@@ -137,7 +200,7 @@ class SoftmaxFunctor<DeviceContext, T, is_test, enable_if_CPU<DeviceContext>> {
...
@@ -137,7 +200,7 @@ class SoftmaxFunctor<DeviceContext, T, is_test, enable_if_CPU<DeviceContext>> {
out_data
+=
num_classes
;
out_data
+=
num_classes
;
}
}
}
else
{
}
else
{
SoftmaxEigen
<
DeviceContext
,
T
,
is_test
>
(
context
,
axis_dim
,
X
,
Y
);
SoftmaxEigen
<
DeviceContext
,
T
,
is_test
>
(
)(
context
,
axis_dim
,
X
,
Y
);
}
}
}
}
};
};
...
@@ -162,9 +225,10 @@ class SoftmaxFunctor<DeviceContext, float, true, enable_if_CPU<DeviceContext>> {
...
@@ -162,9 +225,10 @@ class SoftmaxFunctor<DeviceContext, float, true, enable_if_CPU<DeviceContext>> {
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
void
SoftmaxGradEigen
(
const
DeviceContext
&
context
,
const
int
axis_dim
,
class
SoftmaxGradEigen
{
const
framework
::
Tensor
*
y
,
public:
const
framework
::
Tensor
*
y_grad
,
void
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
y
,
const
framework
::
Tensor
*
y_grad
,
framework
::
Tensor
*
x_grad
)
{
framework
::
Tensor
*
x_grad
)
{
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
y
);
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
y
);
auto
softmax_grad
=
EigenMatrix
<
T
>::
From
(
*
y_grad
);
auto
softmax_grad
=
EigenMatrix
<
T
>::
From
(
*
y_grad
);
...
@@ -188,15 +252,49 @@ void SoftmaxGradEigen(const DeviceContext& context, const int axis_dim,
...
@@ -188,15 +252,49 @@ void SoftmaxGradEigen(const DeviceContext& context, const int axis_dim,
.
sum
(
along_class
)
.
sum
(
along_class
)
.
eval
()
.
eval
()
.
broadcast
(
one_axis
);
.
broadcast
(
one_axis
);
logits_grad
.
device
(
*
context
.
eigen_device
())
=
(
softmax_grad
-
dot
)
*
softmax
;
logits_grad
.
device
(
*
context
.
eigen_device
())
=
}
(
softmax_grad
-
dot
)
*
softmax
;
}
};
template
<
typename
DeviceContext
>
class
SoftmaxGradEigen
<
DeviceContext
,
platform
::
float16
>
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
y
,
const
framework
::
Tensor
*
y_grad
,
framework
::
Tensor
*
x_grad
)
{
auto
softmax
=
EigenMatrix
<
platform
::
float16
>::
From
(
*
y
);
auto
softmax_grad
=
EigenMatrix
<
platform
::
float16
>::
From
(
*
y_grad
);
auto
logits_grad
=
EigenMatrix
<
platform
::
float16
>::
From
(
*
x_grad
);
constexpr
int
kBatchDim
=
0
;
constexpr
int
kClassDim
=
1
;
const
int
batch_size
=
softmax
.
dimension
(
kBatchDim
);
const
int
num_classes
=
softmax
.
dimension
(
kClassDim
);
const
int
num_remain
=
num_classes
/
axis_dim
;
Eigen
::
DSizes
<
int
,
1
>
along_class
(
kClassDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
Eigen
::
DSizes
<
int
,
3
>
batch_axis_remain
(
batch_size
,
axis_dim
,
num_remain
);
Eigen
::
DSizes
<
int
,
2
>
one_axis
(
1
,
axis_dim
);
auto
dot
=
(
softmax
*
softmax_grad
)
.
reshape
(
batch_axis_remain
)
.
sum
(
along_class
)
.
broadcast
(
one_axis
);
logits_grad
.
device
(
*
context
.
eigen_device
())
=
(
softmax_grad
-
dot
)
*
softmax
;
}
};
template
<
typename
DeviceContext
,
typename
T
,
typename
Enable
>
template
<
typename
DeviceContext
,
typename
T
,
typename
Enable
>
void
SoftmaxGradFunctor
<
DeviceContext
,
T
,
Enable
>::
operator
()(
void
SoftmaxGradFunctor
<
DeviceContext
,
T
,
Enable
>::
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
y
,
const
framework
::
Tensor
*
y_grad
,
const
framework
::
Tensor
*
y
,
const
framework
::
Tensor
*
y_grad
,
framework
::
Tensor
*
x_grad
)
{
framework
::
Tensor
*
x_grad
)
{
SoftmaxGradEigen
<
DeviceContext
,
T
>
(
context
,
axis_dim
,
y
,
y_grad
,
x_grad
);
SoftmaxGradEigen
<
DeviceContext
,
T
>
(
)(
context
,
axis_dim
,
y
,
y_grad
,
x_grad
);
}
}
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
...
@@ -228,7 +326,8 @@ class SoftmaxGradFunctor<DeviceContext, T, enable_if_CPU<DeviceContext>> {
...
@@ -228,7 +326,8 @@ class SoftmaxGradFunctor<DeviceContext, T, enable_if_CPU<DeviceContext>> {
in_grad
+=
num_classes
;
in_grad
+=
num_classes
;
}
}
}
else
{
}
else
{
SoftmaxGradEigen
<
DeviceContext
,
T
>
(
context
,
axis_dim
,
y
,
y_grad
,
x_grad
);
SoftmaxGradEigen
<
DeviceContext
,
T
>
()(
context
,
axis_dim
,
y
,
y_grad
,
x_grad
);
}
}
}
}
};
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录