Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
318dfa0d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
318dfa0d
编写于
4月 17, 2020
作者:
Z
Zhang Ting
提交者:
GitHub
4月 17, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove eval in eigen function when dtype is fp16 (#23845)
上级
66dc8e30
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
181 addition
and
82 deletion
+181
-82
paddle/fluid/operators/math/softmax_impl.h
paddle/fluid/operators/math/softmax_impl.h
+181
-82
未找到文件。
paddle/fluid/operators/math/softmax_impl.h
浏览文件 @
318dfa0d
...
...
@@ -37,67 +37,130 @@ struct ValueClip {
};
template
<
typename
DeviceContext
,
typename
T
,
bool
is_test
>
void
SoftmaxEigen
(
const
DeviceContext
&
context
,
const
int
axis_dim
,
class
SoftmaxEigen
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
constexpr
int
kBatchDim
=
0
;
constexpr
int
kClassDim
=
1
;
constexpr
int
kAxisDim
=
1
;
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
X
);
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
Y
);
const
int
batch_size
=
logits
.
dimension
(
kBatchDim
);
const
int
num_classes
=
logits
.
dimension
(
kClassDim
);
const
int
num_remain
=
num_classes
/
axis_dim
;
Eigen
::
DSizes
<
int
,
1
>
along_axis
(
kAxisDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_classes
(
batch_size
,
num_classes
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
Eigen
::
DSizes
<
int
,
3
>
batch_one_remain
(
batch_size
,
1
,
num_remain
);
Eigen
::
DSizes
<
int
,
3
>
one_axis_one
(
1
,
axis_dim
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_axis
(
1
,
axis_dim
);
Eigen
::
DSizes
<
int
,
3
>
batch_axis_remain
(
batch_size
,
axis_dim
,
num_remain
);
// For numerical stability, logits should be shifted by maximum number along
// axis, calculate shifted_logits into softmax tensor for memory reuse.
if
(
num_remain
==
1
)
{
// axis == -1, axis and class in same dimension, calculate along
// class dimension directly for higher performance
softmax
.
device
(
*
context
.
eigen_device
())
=
(
logits
-
logits
.
maximum
(
along_axis
)
.
eval
()
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
))
.
unaryExpr
(
ValueClip
<
T
>
());
}
else
{
// axis != -1, class dimension split into (axis, remain), max and sum
// should be calculated along axis dimension
constexpr
int
kBatchDim
=
0
;
constexpr
int
kClassDim
=
1
;
constexpr
int
kAxisDim
=
1
;
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
X
);
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
Y
);
const
int
batch_size
=
logits
.
dimension
(
kBatchDim
);
const
int
num_classes
=
logits
.
dimension
(
kClassDim
);
const
int
num_remain
=
num_classes
/
axis_dim
;
Eigen
::
DSizes
<
int
,
1
>
along_axis
(
kAxisDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_classes
(
batch_size
,
num_classes
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
Eigen
::
DSizes
<
int
,
3
>
batch_one_remain
(
batch_size
,
1
,
num_remain
);
Eigen
::
DSizes
<
int
,
3
>
one_axis_one
(
1
,
axis_dim
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_axis
(
1
,
axis_dim
);
Eigen
::
DSizes
<
int
,
3
>
batch_axis_remain
(
batch_size
,
axis_dim
,
num_remain
);
// For numerical stability, logits should be shifted by maximum number along
// axis, calculate shifted_logits into softmax tensor for memory reuse.
if
(
num_remain
==
1
)
{
// axis == -1, axis and class in same dimension, calculate along
// class dimension directly for higher performance
softmax
.
device
(
*
context
.
eigen_device
())
=
(
logits
-
logits
.
maximum
(
along_axis
)
.
eval
()
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
))
.
unaryExpr
(
ValueClip
<
T
>
());
}
else
{
// axis != -1, class dimension split into (axis, remain), max and sum
// should be calculated along axis dimension
softmax
.
device
(
*
context
.
eigen_device
())
=
(
logits
.
reshape
(
batch_axis_remain
)
-
logits
.
reshape
(
batch_axis_remain
)
.
maximum
(
along_axis
)
.
eval
()
.
reshape
(
batch_one_remain
)
.
broadcast
(
one_axis_one
)
.
reshape
(
batch_classes
))
.
unaryExpr
(
ValueClip
<
T
>
());
}
softmax
.
device
(
*
context
.
eigen_device
())
=
softmax
.
exp
();
softmax
.
device
(
*
context
.
eigen_device
())
=
(
logits
.
reshape
(
batch_axis_remain
)
-
logits
.
reshape
(
batch_axis_remain
)
.
maximum
(
along_axis
)
(
softmax
*
softmax
.
reshape
(
batch_axis_remain
)
.
sum
(
along_axis
)
.
inverse
()
.
eval
()
.
reshape
(
batch_one_remain
)
.
broadcast
(
one_axis_one
)
.
reshape
(
batch_classes
))
.
unaryExpr
(
ValueClip
<
T
>
());
.
broadcast
(
one_axis
));
}
};
softmax
.
device
(
*
context
.
eigen_device
())
=
softmax
.
exp
();
softmax
.
device
(
*
context
.
eigen_device
())
=
(
softmax
*
softmax
.
reshape
(
batch_axis_remain
)
.
sum
(
along_axis
)
.
inverse
()
.
eval
()
.
broadcast
(
one_axis
));
}
template
<
typename
DeviceContext
,
bool
is_test
>
class
SoftmaxEigen
<
DeviceContext
,
platform
::
float16
,
is_test
>
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
constexpr
int
kBatchDim
=
0
;
constexpr
int
kClassDim
=
1
;
constexpr
int
kAxisDim
=
1
;
auto
logits
=
EigenMatrix
<
platform
::
float16
>::
From
(
*
X
);
auto
softmax
=
EigenMatrix
<
platform
::
float16
>::
From
(
*
Y
);
const
int
batch_size
=
logits
.
dimension
(
kBatchDim
);
const
int
num_classes
=
logits
.
dimension
(
kClassDim
);
const
int
num_remain
=
num_classes
/
axis_dim
;
Eigen
::
DSizes
<
int
,
1
>
along_axis
(
kAxisDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_classes
(
batch_size
,
num_classes
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
Eigen
::
DSizes
<
int
,
3
>
batch_one_remain
(
batch_size
,
1
,
num_remain
);
Eigen
::
DSizes
<
int
,
3
>
one_axis_one
(
1
,
axis_dim
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_axis
(
1
,
axis_dim
);
Eigen
::
DSizes
<
int
,
3
>
batch_axis_remain
(
batch_size
,
axis_dim
,
num_remain
);
// For numerical stability, logits should be shifted by maximum number along
// axis, calculate shifted_logits into softmax tensor for memory reuse.
if
(
num_remain
==
1
)
{
// axis == -1, axis and class in same dimension, calculate along
// class dimension directly for higher performance
softmax
.
device
(
*
context
.
eigen_device
())
=
(
logits
-
logits
.
maximum
(
along_axis
)
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
))
.
unaryExpr
(
ValueClip
<
platform
::
float16
>
());
}
else
{
// axis != -1, class dimension split into (axis, remain), max and sum
// should be calculated along axis dimension
softmax
.
device
(
*
context
.
eigen_device
())
=
(
logits
.
reshape
(
batch_axis_remain
)
-
logits
.
reshape
(
batch_axis_remain
)
.
maximum
(
along_axis
)
.
reshape
(
batch_one_remain
)
.
broadcast
(
one_axis_one
)
.
reshape
(
batch_classes
))
.
unaryExpr
(
ValueClip
<
platform
::
float16
>
());
}
softmax
.
device
(
*
context
.
eigen_device
())
=
softmax
.
exp
();
softmax
.
device
(
*
context
.
eigen_device
())
=
(
softmax
*
softmax
.
reshape
(
batch_axis_remain
)
.
sum
(
along_axis
)
.
inverse
()
.
broadcast
(
one_axis
));
}
};
template
<
typename
DeviceContext
,
typename
T
,
bool
is_test
,
typename
Enable
>
void
SoftmaxFunctor
<
DeviceContext
,
T
,
is_test
,
Enable
>::
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
SoftmaxEigen
<
DeviceContext
,
T
,
is_test
>
(
context
,
axis_dim
,
X
,
Y
);
SoftmaxEigen
<
DeviceContext
,
T
,
is_test
>
(
)(
context
,
axis_dim
,
X
,
Y
);
}
template
<
class
DeviceContext
>
...
...
@@ -137,7 +200,7 @@ class SoftmaxFunctor<DeviceContext, T, is_test, enable_if_CPU<DeviceContext>> {
out_data
+=
num_classes
;
}
}
else
{
SoftmaxEigen
<
DeviceContext
,
T
,
is_test
>
(
context
,
axis_dim
,
X
,
Y
);
SoftmaxEigen
<
DeviceContext
,
T
,
is_test
>
(
)(
context
,
axis_dim
,
X
,
Y
);
}
}
};
...
...
@@ -162,41 +225,76 @@ class SoftmaxFunctor<DeviceContext, float, true, enable_if_CPU<DeviceContext>> {
};
template
<
typename
DeviceContext
,
typename
T
>
void
SoftmaxGradEigen
(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
y
,
const
framework
::
Tensor
*
y_grad
,
framework
::
Tensor
*
x_grad
)
{
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
y
);
auto
softmax_grad
=
EigenMatrix
<
T
>::
From
(
*
y_grad
);
auto
logits_grad
=
EigenMatrix
<
T
>::
From
(
*
x_grad
);
constexpr
int
kBatchDim
=
0
;
constexpr
int
kClassDim
=
1
;
const
int
batch_size
=
softmax
.
dimension
(
kBatchDim
);
const
int
num_classes
=
softmax
.
dimension
(
kClassDim
);
const
int
num_remain
=
num_classes
/
axis_dim
;
Eigen
::
DSizes
<
int
,
1
>
along_class
(
kClassDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
Eigen
::
DSizes
<
int
,
3
>
batch_axis_remain
(
batch_size
,
axis_dim
,
num_remain
);
Eigen
::
DSizes
<
int
,
2
>
one_axis
(
1
,
axis_dim
);
auto
dot
=
(
softmax
*
softmax_grad
)
.
reshape
(
batch_axis_remain
)
.
sum
(
along_class
)
.
eval
()
.
broadcast
(
one_axis
);
logits_grad
.
device
(
*
context
.
eigen_device
())
=
(
softmax_grad
-
dot
)
*
softmax
;
}
class
SoftmaxGradEigen
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
y
,
const
framework
::
Tensor
*
y_grad
,
framework
::
Tensor
*
x_grad
)
{
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
y
);
auto
softmax_grad
=
EigenMatrix
<
T
>::
From
(
*
y_grad
);
auto
logits_grad
=
EigenMatrix
<
T
>::
From
(
*
x_grad
);
constexpr
int
kBatchDim
=
0
;
constexpr
int
kClassDim
=
1
;
const
int
batch_size
=
softmax
.
dimension
(
kBatchDim
);
const
int
num_classes
=
softmax
.
dimension
(
kClassDim
);
const
int
num_remain
=
num_classes
/
axis_dim
;
Eigen
::
DSizes
<
int
,
1
>
along_class
(
kClassDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
Eigen
::
DSizes
<
int
,
3
>
batch_axis_remain
(
batch_size
,
axis_dim
,
num_remain
);
Eigen
::
DSizes
<
int
,
2
>
one_axis
(
1
,
axis_dim
);
auto
dot
=
(
softmax
*
softmax_grad
)
.
reshape
(
batch_axis_remain
)
.
sum
(
along_class
)
.
eval
()
.
broadcast
(
one_axis
);
logits_grad
.
device
(
*
context
.
eigen_device
())
=
(
softmax_grad
-
dot
)
*
softmax
;
}
};
template
<
typename
DeviceContext
>
class
SoftmaxGradEigen
<
DeviceContext
,
platform
::
float16
>
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
y
,
const
framework
::
Tensor
*
y_grad
,
framework
::
Tensor
*
x_grad
)
{
auto
softmax
=
EigenMatrix
<
platform
::
float16
>::
From
(
*
y
);
auto
softmax_grad
=
EigenMatrix
<
platform
::
float16
>::
From
(
*
y_grad
);
auto
logits_grad
=
EigenMatrix
<
platform
::
float16
>::
From
(
*
x_grad
);
constexpr
int
kBatchDim
=
0
;
constexpr
int
kClassDim
=
1
;
const
int
batch_size
=
softmax
.
dimension
(
kBatchDim
);
const
int
num_classes
=
softmax
.
dimension
(
kClassDim
);
const
int
num_remain
=
num_classes
/
axis_dim
;
Eigen
::
DSizes
<
int
,
1
>
along_class
(
kClassDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
Eigen
::
DSizes
<
int
,
3
>
batch_axis_remain
(
batch_size
,
axis_dim
,
num_remain
);
Eigen
::
DSizes
<
int
,
2
>
one_axis
(
1
,
axis_dim
);
auto
dot
=
(
softmax
*
softmax_grad
)
.
reshape
(
batch_axis_remain
)
.
sum
(
along_class
)
.
broadcast
(
one_axis
);
logits_grad
.
device
(
*
context
.
eigen_device
())
=
(
softmax_grad
-
dot
)
*
softmax
;
}
};
template
<
typename
DeviceContext
,
typename
T
,
typename
Enable
>
void
SoftmaxGradFunctor
<
DeviceContext
,
T
,
Enable
>::
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
y
,
const
framework
::
Tensor
*
y_grad
,
framework
::
Tensor
*
x_grad
)
{
SoftmaxGradEigen
<
DeviceContext
,
T
>
(
context
,
axis_dim
,
y
,
y_grad
,
x_grad
);
SoftmaxGradEigen
<
DeviceContext
,
T
>
(
)(
context
,
axis_dim
,
y
,
y_grad
,
x_grad
);
}
template
<
typename
DeviceContext
,
typename
T
>
...
...
@@ -228,7 +326,8 @@ class SoftmaxGradFunctor<DeviceContext, T, enable_if_CPU<DeviceContext>> {
in_grad
+=
num_classes
;
}
}
else
{
SoftmaxGradEigen
<
DeviceContext
,
T
>
(
context
,
axis_dim
,
y
,
y_grad
,
x_grad
);
SoftmaxGradEigen
<
DeviceContext
,
T
>
()(
context
,
axis_dim
,
y
,
y_grad
,
x_grad
);
}
}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录