Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
6c641827
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6c641827
编写于
3月 18, 2019
作者:
D
dengkaipeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine softmax kernel. test=develop
上级
412b7cbd
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
119 addition
and
249 deletion
+119
-249
paddle/fluid/operators/math/softmax.h
paddle/fluid/operators/math/softmax.h
+5
-4
paddle/fluid/operators/math/softmax_impl.h
paddle/fluid/operators/math/softmax_impl.h
+14
-8
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
+39
-95
paddle/fluid/operators/softmax_cudnn_op.cu.cc
paddle/fluid/operators/softmax_cudnn_op.cu.cc
+18
-67
paddle/fluid/operators/softmax_op.h
paddle/fluid/operators/softmax_op.h
+41
-73
paddle/fluid/operators/softmax_with_cross_entropy_op.h
paddle/fluid/operators/softmax_with_cross_entropy_op.h
+1
-1
paddle/fluid/operators/warpctc_cudnn_op.cu.cc
paddle/fluid/operators/warpctc_cudnn_op.cu.cc
+1
-1
未找到文件。
paddle/fluid/operators/math/softmax.h
浏览文件 @
6c641827
...
...
@@ -23,15 +23,16 @@ template <typename DeviceContext, typename T, bool is_test,
typename
Enable
=
void
>
class
SoftmaxFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
);
void
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
);
};
template
<
typename
DeviceContext
,
typename
T
>
class
SoftmaxGradFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
y
,
const
framework
::
Tensor
*
y_grad
,
framework
::
Tensor
*
x_grad
);
void
operator
()(
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
y
,
const
framework
::
Tensor
*
y_grad
,
framework
::
Tensor
*
x_grad
);
};
#ifdef PADDLE_WITH_CUDA
...
...
paddle/fluid/operators/math/softmax_impl.h
浏览文件 @
6c641827
...
...
@@ -36,8 +36,8 @@ struct ValueClip {
template
<
typename
DeviceContext
,
typename
T
,
bool
is_test
,
typename
Enable
>
void
SoftmaxFunctor
<
DeviceContext
,
T
,
is_test
,
Enable
>::
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
X
);
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
Y
);
...
...
@@ -46,10 +46,13 @@ void SoftmaxFunctor<DeviceContext, T, is_test, Enable>::operator()(
const
int
batch_size
=
logits
.
dimension
(
kBatchDim
);
const
int
num_classes
=
logits
.
dimension
(
kClassDim
);
const
int
num_remain
=
num_classes
/
axis_dim
;
Eigen
::
DSizes
<
int
,
1
>
along_class
(
kClassDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
Eigen
::
DSizes
<
int
,
3
>
batch_axis_remain
(
batch_size
,
axis_dim
,
num_remain
);
Eigen
::
DSizes
<
int
,
2
>
one_axis
(
1
,
axis_dim
);
auto
shifted_logits
=
(
logits
-
logits
.
maximum
(
along_class
)
...
...
@@ -60,11 +63,11 @@ void SoftmaxFunctor<DeviceContext, T, is_test, Enable>::operator()(
softmax
.
device
(
*
context
.
eigen_device
())
=
shifted_logits
.
exp
();
softmax
.
device
(
*
context
.
eigen_device
())
=
(
softmax
*
softmax
.
sum
(
along_class
)
softmax
.
reshape
(
batch_axis_remain
)
.
sum
(
along_class
)
.
inverse
()
.
eval
()
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
));
.
broadcast
(
one_axis
));
}
template
<
class
DeviceContext
>
...
...
@@ -90,7 +93,7 @@ class SoftmaxFunctor<DeviceContext, float, true, enable_if_CPU<DeviceContext>> {
template
<
typename
DeviceContext
,
typename
T
>
void
SoftmaxGradFunctor
<
DeviceContext
,
T
>::
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
y
,
const
DeviceContext
&
context
,
const
int
axis_dim
,
const
framework
::
Tensor
*
y
,
const
framework
::
Tensor
*
y_grad
,
framework
::
Tensor
*
x_grad
)
{
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
y
);
auto
softmax_grad
=
EigenMatrix
<
T
>::
From
(
*
y_grad
);
...
...
@@ -101,16 +104,19 @@ void SoftmaxGradFunctor<DeviceContext, T>::operator()(
const
int
batch_size
=
softmax
.
dimension
(
kBatchDim
);
const
int
num_classes
=
softmax
.
dimension
(
kClassDim
);
const
int
num_remain
=
num_classes
/
axis_dim
;
Eigen
::
DSizes
<
int
,
1
>
along_class
(
kClassDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
Eigen
::
DSizes
<
int
,
3
>
batch_axis_remain
(
batch_size
,
axis_dim
,
num_remain
);
Eigen
::
DSizes
<
int
,
2
>
one_axis
(
1
,
axis_dim
);
auto
dot
=
(
softmax
*
softmax_grad
)
.
reshape
(
batch_axis_remain
)
.
sum
(
along_class
)
.
eval
()
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
);
.
broadcast
(
one_axis
);
logits_grad
.
device
(
*
context
.
eigen_device
())
=
(
softmax_grad
-
dot
)
*
softmax
;
}
...
...
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
浏览文件 @
6c641827
...
...
@@ -110,46 +110,28 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
"It must use CPUPlace."
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
auto
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
Tensor
*
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
Tensor
*
O
ut
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
Tensor
*
outp
ut
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
PADDLE_ENFORCE_EQ
(
X
->
dims
(),
O
ut
->
dims
(),
input
->
dims
(),
outp
ut
->
dims
(),
"The shape of softmax's input and output must be identical."
);
const
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
int
rank
=
X
->
dims
().
size
();
// make sure 'output' holds memory, which will be shared by
// 'flattened_output' later.
Out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
std
::
vector
<
int
>
perm
,
shape
;
CalcTransPermAndShapeByAxis
(
*
X
,
axis
,
&
perm
,
&
shape
);
Tensor
X_2d
,
Out_2d
;
Tensor
X_trans
,
Out_trans
;
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
X_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
ctx
.
GetPlace
());
Out_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
ctx
.
GetPlace
());
TransCompute
<
platform
::
CPUDeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
X
,
&
X_trans
,
perm
);
TransCompute
<
platform
::
CPUDeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
Out
,
&
Out_trans
,
perm
);
auto
dims
=
X_trans
.
dims
();
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
X_2d
.
ShareDataWith
(
X_trans
).
Resize
(
flattened_dims
);
Out_2d
.
ShareDataWith
(
Out_trans
).
Resize
(
flattened_dims
);
}
else
{
auto
dims
=
X
->
dims
();
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
X_2d
.
ShareDataWith
(
*
X
).
Resize
(
flattened_dims
);
Out_2d
.
ShareDataWith
(
*
Out
).
Resize
(
flattened_dims
);
}
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// flatten input and output to 2-D matrixs
auto
dims
=
input
->
dims
();
// input and output share the same shape
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
framework
::
Tensor
flattened_input
;
framework
::
Tensor
flattened_output
;
flattened_input
.
ShareDataWith
(
*
input
).
Resize
(
flattened_dims
);
flattened_output
.
ShareDataWith
(
*
output
).
Resize
(
flattened_dims
);
const
T
*
input_data
=
X_2d
.
data
<
T
>
();
T
*
output_data
=
Out_2d
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
input_data
=
flattened_input
.
data
<
T
>
();
T
*
output_data
=
flattened_output
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
std
::
vector
<
int
>
src_tz
=
paddle
::
framework
::
vectorize2int
(
X_2d
.
dims
()
);
std
::
vector
<
int
>
src_tz
=
paddle
::
framework
::
vectorize2int
(
flattened_dims
);
std
::
vector
<
int
>
dst_tz
=
src_tz
;
// Same memory descriptor to be used for input and output
memory
::
dims
softmax_tz
=
{
src_tz
[
0
],
src_tz
[
1
]};
...
...
@@ -179,16 +161,10 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
// We cannot use softmax_dst_memory_p to get prim desc as
// it contains flattened dims (2D) while output tensor can
// have 2,3,4+ dims
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
auto
output_mem_pd
=
paddle
::
platform
::
create_prim_desc_from_dims
(
shape
,
mkldnn
::
memory
::
format
::
blocked
);
Out_trans
.
set_mkldnn_prim_desc
(
output_mem_pd
);
}
else
{
auto
output_mem_pd
=
paddle
::
platform
::
create_prim_desc_from_dims
(
paddle
::
framework
::
vectorize2int
(
Out
->
dims
()),
mkldnn
::
memory
::
format
::
blocked
);
Out
->
set_mkldnn_prim_desc
(
output_mem_pd
);
}
auto
output_mem_pd
=
paddle
::
platform
::
create_prim_desc_from_dims
(
paddle
::
framework
::
vectorize2int
(
output
->
dims
()),
mkldnn
::
memory
::
format
::
blocked
);
output
->
set_mkldnn_prim_desc
(
output_mem_pd
);
std
::
vector
<
primitive
>
pipeline
{
*
(
static_cast
<
softmax_forward
::
primitive
*>
(
softmax_p
.
get
()))};
...
...
@@ -202,11 +178,6 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
output_data
[
i
]
<
threshold
?
threshold
:
output_data
[
i
];
}
}
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
TransCompute
<
platform
::
CPUDeviceContext
,
T
>
(
rank
,
dev_ctx
,
Out_trans
,
Out
,
perm
);
}
}
};
...
...
@@ -219,55 +190,33 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
auto
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
Tensor
*
O
ut
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
d
O
ut
=
ctx
.
template
Input
<
Tensor
>(
framework
::
GradVarName
(
"Out"
));
auto
*
d
X
=
const
Tensor
*
outp
ut
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
d
o
ut
=
ctx
.
template
Input
<
Tensor
>(
framework
::
GradVarName
(
"Out"
));
auto
*
d
x
=
ctx
.
template
Output
<
framework
::
Tensor
>(
framework
::
GradVarName
(
"X"
));
PADDLE_ENFORCE_EQ
(
d
Out
->
dims
(),
dX
->
dims
(),
d
out
->
dims
(),
dx
->
dims
(),
"The shape of softmax_grad's input and output must be identical."
);
const
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
int
rank
=
Out
->
dims
().
size
();
// make sure 'dx' holds memory, which will be shared by 'flattened_dx'
// later.
dX
->
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
std
::
vector
<
int
>
perm
,
shape
;
CalcTransPermAndShapeByAxis
(
*
dX
,
axis
,
&
perm
,
&
shape
);
Tensor
dX_2d
,
Out_2d
,
dOut_2d
;
Tensor
dX_trans
,
Out_trans
,
dOut_trans
;
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
dX_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
ctx
.
GetPlace
());
Out_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
ctx
.
GetPlace
());
dOut_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
ctx
.
GetPlace
());
TransCompute
<
platform
::
CPUDeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
dX
,
&
dX_trans
,
perm
);
TransCompute
<
platform
::
CPUDeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
Out
,
&
Out_trans
,
perm
);
TransCompute
<
platform
::
CPUDeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
dOut
,
&
dOut_trans
,
perm
);
auto
dims
=
dX_trans
.
dims
();
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
dX_2d
.
ShareDataWith
(
dX_trans
).
Resize
(
flattened_dims
);
Out_2d
.
ShareDataWith
(
Out_trans
).
Resize
(
flattened_dims
);
dOut_2d
.
ShareDataWith
(
dOut_trans
).
Resize
(
flattened_dims
);
}
else
{
auto
dims
=
dX
->
dims
();
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
dX_2d
.
ShareDataWith
(
*
dX
).
Resize
(
flattened_dims
);
Out_2d
.
ShareDataWith
(
*
Out
).
Resize
(
flattened_dims
);
dOut_2d
.
ShareDataWith
(
*
dOut
).
Resize
(
flattened_dims
);
}
const
T
*
dst_data
=
Out_2d
.
data
<
T
>
();
const
T
*
diff_dst_ptr
=
dOut_2d
.
template
data
<
T
>();
T
*
diff_src_ptr
=
dX_2d
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
Out_2d
.
dims
());
dx
->
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
auto
dims
=
dout
->
dims
();
// input and output share the same shape
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
framework
::
Tensor
flattened_output
;
framework
::
Tensor
flattened_dout
;
framework
::
Tensor
flattened_dx
;
flattened_output
.
ShareDataWith
(
*
output
).
Resize
(
flattened_dims
);
flattened_dout
.
ShareDataWith
(
*
dout
).
Resize
(
flattened_dims
);
flattened_dx
.
ShareDataWith
(
*
dx
).
Resize
(
flattened_dims
);
const
T
*
dst_data
=
flattened_output
.
data
<
T
>
();
const
T
*
diff_dst_ptr
=
flattened_dout
.
template
data
<
T
>();
T
*
diff_src_ptr
=
flattened_dx
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
flattened_dims
);
std
::
vector
<
int
>
src_tz
(
dst_tz
);
// Same memory descriptor to be used for input and output
...
...
@@ -312,11 +261,6 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
std
::
vector
<
primitive
>
pipeline
{
*
softmax_bwd_p
};
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
TransCompute
<
platform
::
CPUDeviceContext
,
T
>
(
rank
,
dev_ctx
,
dX_trans
,
dX
,
perm
);
}
}
};
}
// namespace operators
...
...
paddle/fluid/operators/softmax_cudnn_op.cu.cc
浏览文件 @
6c641827
...
...
@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/softmax_op.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -25,44 +24,22 @@ template <typename T>
class
SoftmaxCUDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
&
dev_ctx
=
context
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
*
X
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
Out
=
context
.
Output
<
Tensor
>
(
"Out"
);
const
int
axis
=
context
.
Attr
<
int
>
(
"axis"
);
int
rank
=
X
->
dims
().
size
();
// allocate memory on device.
Out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
vector
<
int
>
perm
,
shape
;
CalcTransPermAndShapeByAxis
(
*
X
,
axis
,
&
perm
,
&
shape
);
Tensor
X_2d
,
Out_2d
;
Tensor
X_trans
,
Out_trans
;
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
X_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
context
.
GetPlace
());
Out_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
context
.
GetPlace
());
TransCompute
<
platform
::
CUDADeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
X
,
&
X_trans
,
perm
);
TransCompute
<
platform
::
CUDADeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
Out
,
&
Out_trans
,
perm
);
X_2d
=
framework
::
ReshapeToMatrix
(
X_trans
,
rank
-
1
);
Out_2d
=
framework
::
ReshapeToMatrix
(
Out_trans
,
rank
-
1
);
}
else
{
X_2d
=
framework
::
ReshapeToMatrix
(
*
X
,
rank
-
1
);
Out_2d
=
framework
::
ReshapeToMatrix
(
*
Out
,
rank
-
1
);
}
auto
dims
=
X
->
dims
();
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
framework
::
LoDTensor
flattened_x
;
framework
::
LoDTensor
flattened_out
;
flattened_x
.
ShareDataWith
(
*
X
).
Resize
(
flattened_dims
);
flattened_out
.
ShareDataWith
(
*
Out
).
Resize
(
flattened_dims
);
math
::
SoftmaxCUDNNFunctor
<
T
>
()(
context
.
template
device_context
<
platform
::
CUDADeviceContext
>(),
&
X_2d
,
&
Out_2d
);
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
TransCompute
<
platform
::
CUDADeviceContext
,
T
>
(
rank
,
dev_ctx
,
Out_trans
,
Out
,
perm
);
}
context
.
template
device_context
<
platform
::
CUDADeviceContext
>(),
&
flattened_x
,
&
flattened_out
);
}
};
...
...
@@ -70,51 +47,25 @@ template <typename T>
class
SoftmaxGradCUDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
&
dev_ctx
=
context
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
*
Out
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dOut
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dX
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
const
int
axis
=
context
.
Attr
<
int
>
(
"axis"
);
int
rank
=
Out
->
dims
().
size
();
// allocate memory on device.
dX
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
vector
<
int
>
perm
,
shape
;
CalcTransPermAndShapeByAxis
(
*
dX
,
axis
,
&
perm
,
&
shape
);
Tensor
dX_2d
,
Out_2d
,
dOut_2d
;
Tensor
dX_trans
,
Out_trans
,
dOut_trans
;
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
dX_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
context
.
GetPlace
());
Out_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
context
.
GetPlace
());
dOut_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
context
.
GetPlace
());
TransCompute
<
platform
::
CUDADeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
dX
,
&
dX_trans
,
perm
);
TransCompute
<
platform
::
CUDADeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
Out
,
&
Out_trans
,
perm
);
TransCompute
<
platform
::
CUDADeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
dOut
,
&
dOut_trans
,
perm
);
dX_2d
=
framework
::
ReshapeToMatrix
(
dX_trans
,
rank
-
1
);
Out_2d
=
framework
::
ReshapeToMatrix
(
Out_trans
,
rank
-
1
);
dOut_2d
=
framework
::
ReshapeToMatrix
(
dOut_trans
,
rank
-
1
);
}
else
{
dX_2d
=
framework
::
ReshapeToMatrix
(
*
dX
,
rank
-
1
);
Out_2d
=
framework
::
ReshapeToMatrix
(
*
Out
,
rank
-
1
);
dOut_2d
=
framework
::
ReshapeToMatrix
(
*
dOut
,
rank
-
1
);
}
auto
dims
=
Out
->
dims
();
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
framework
::
LoDTensor
flattened_out
;
framework
::
LoDTensor
flattened_d_out
;
framework
::
LoDTensor
flattened_d_x
;
flattened_out
.
ShareDataWith
(
*
Out
).
Resize
(
flattened_dims
);
flattened_d_out
.
ShareDataWith
(
*
dOut
).
Resize
(
flattened_dims
);
flattened_d_x
.
ShareDataWith
(
*
dX
).
Resize
(
flattened_dims
);
math
::
SoftmaxGradCUDNNFunctor
<
T
>
()(
context
.
template
device_context
<
platform
::
CUDADeviceContext
>(),
&
Out_2d
,
&
dOut_2d
,
&
dX_2d
);
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
TransCompute
<
platform
::
CUDADeviceContext
,
T
>
(
rank
,
dev_ctx
,
dX_trans
,
dX
,
perm
);
}
context
.
template
device_context
<
platform
::
CUDADeviceContext
>(),
&
flattened_out
,
&
flattened_d_out
,
&
flattened_d_x
);
}
};
...
...
paddle/fluid/operators/softmax_op.h
浏览文件 @
6c641827
...
...
@@ -13,81 +13,66 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/transpose_op.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
DDim
=
framework
::
DDim
;
static
inline
void
CalcTransPermAndShapeByAxis
(
const
Tensor
&
x
,
const
int
axis
,
std
::
vector
<
int
>*
perm
,
std
::
vector
<
int
>*
shape
)
{
auto
dim_x
=
x
.
dims
();
int
rank
=
dim_x
.
size
();
static
inline
int
CanonicalAxis
(
const
int
axis
,
const
int
rank
)
{
if
(
axis
<
0
)
{
return
axis
+
rank
;
}
return
axis
;
}
if
(
axis
==
-
1
||
axis
==
rank
-
1
)
{
return
;
static
inline
int
SizeToAxis
(
const
int
axis
,
DDim
dims
)
{
int
size
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
i
++
)
{
size
*=
dims
[
i
];
}
return
size
;
}
for
(
int
i
=
0
;
i
<
rank
-
1
;
i
++
)
{
if
(
i
==
axis
)
{
perm
->
push_back
(
rank
-
1
);
shape
->
push_back
(
dim_x
[
rank
-
1
]);
}
else
{
perm
->
push_back
(
i
);
shape
->
push_back
(
dim_x
[
i
]);
}
static
inline
int
SizeFromAxis
(
const
int
axis
,
DDim
dims
)
{
int
size
=
1
;
for
(
int
i
=
axis
;
i
<
dims
.
size
();
i
++
)
{
size
*=
dims
[
i
];
}
perm
->
push_back
(
axis
);
shape
->
push_back
(
dim_x
[
axis
]);
return
size
;
}
template
<
typename
DeviceContext
,
typename
T
>
class
SoftmaxKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
auto
*
X
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
Out
=
context
.
Output
<
Tensor
>
(
"Out"
);
const
int
axis
=
context
.
Attr
<
int
>
(
"axis"
);
int
rank
=
X
->
dims
().
size
();
const
int
rank
=
X
->
dims
().
size
();
const
int
axis
=
CanonicalAxis
(
context
.
Attr
<
int
>
(
"axis"
),
rank
);
int
axis_dim
=
X
->
dims
()[
axis
];
// allocate memory on device.
Out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
vector
<
int
>
perm
,
shape
;
CalcTransPermAndShapeByAxis
(
*
X
,
axis
,
&
perm
,
&
shape
);
const
int
n
=
SizeToAxis
(
axis
,
X
->
dims
());
const
int
d
=
SizeFromAxis
(
axis
,
X
->
dims
());
Tensor
X_2d
,
Out_2d
;
Tensor
X_trans
,
Out_trans
;
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
X_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
context
.
GetPlace
());
Out_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
context
.
GetPlace
());
TransCompute
<
DeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
X
,
&
X_trans
,
perm
);
TransCompute
<
DeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
Out
,
&
Out_trans
,
perm
);
X_2d
=
framework
::
ReshapeToMatrix
(
X_trans
,
rank
-
1
);
Out_2d
=
framework
::
ReshapeToMatrix
(
Out_trans
,
rank
-
1
);
}
else
{
X_2d
=
framework
::
ReshapeToMatrix
(
*
X
,
rank
-
1
);
Out_2d
=
framework
::
ReshapeToMatrix
(
*
Out
,
rank
-
1
);
}
X_2d
.
ShareDataWith
(
*
X
).
Resize
({
n
,
d
});
Out_2d
.
ShareDataWith
(
*
Out
).
Resize
({
n
,
d
});
// Tensor X_2d = framework::ReshapeToMatrix(*X, axis - 1);
// Tensor Out_2d = framework::ReshapeToMatrix(*Out, axis - 1);
#ifdef PADDLE_ON_INFERENCE
math
::
SoftmaxFunctor
<
DeviceContext
,
T
,
true
>
()(
context
.
template
device_context
<
DeviceContext
>(),
&
X_2d
,
&
Out_2d
);
context
.
template
device_context
<
DeviceContext
>(),
axis_dim
,
&
X_2d
,
&
Out_2d
);
#else
math
::
SoftmaxFunctor
<
DeviceContext
,
T
,
false
>
()(
context
.
template
device_context
<
DeviceContext
>(),
&
X_2d
,
&
Out_2d
);
context
.
template
device_context
<
DeviceContext
>(),
axis_dim
,
&
X_2d
,
&
Out_2d
);
#endif
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
TransCompute
<
DeviceContext
,
T
>
(
rank
,
dev_ctx
,
Out_trans
,
Out
,
perm
);
}
}
};
...
...
@@ -95,46 +80,29 @@ template <typename DeviceContext, typename T>
class
SoftmaxGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
auto
*
Out
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dOut
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dX
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
const
int
axis
=
context
.
Attr
<
int
>
(
"axis"
);
int
rank
=
Out
->
dims
().
size
();
const
int
rank
=
dX
->
dims
().
size
();
const
int
axis
=
CanonicalAxis
(
context
.
Attr
<
int
>
(
"axis"
),
rank
);
int
axis_dim
=
dX
->
dims
()[
axis
];
// allocate memory on device.
dX
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
vector
<
int
>
perm
,
shape
;
CalcTransPermAndShapeByAxis
(
*
dX
,
axis
,
&
perm
,
&
shape
);
const
int
n
=
SizeToAxis
(
axis
,
dX
->
dims
());
const
int
d
=
SizeFromAxis
(
axis
,
dX
->
dims
());
Tensor
dX_2d
,
Out_2d
,
dOut_2d
;
Tensor
dX_trans
,
Out_trans
,
dOut_trans
;
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
dX_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
context
.
GetPlace
());
Out_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
context
.
GetPlace
());
dOut_trans
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
shape
),
context
.
GetPlace
());
TransCompute
<
DeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
dX
,
&
dX_trans
,
perm
);
TransCompute
<
DeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
Out
,
&
Out_trans
,
perm
);
TransCompute
<
DeviceContext
,
T
>
(
rank
,
dev_ctx
,
*
dOut
,
&
dOut_trans
,
perm
);
dX_2d
=
framework
::
ReshapeToMatrix
(
dX_trans
,
rank
-
1
);
Out_2d
=
framework
::
ReshapeToMatrix
(
Out_trans
,
rank
-
1
);
dOut_2d
=
framework
::
ReshapeToMatrix
(
dOut_trans
,
rank
-
1
);
}
else
{
dX_2d
=
framework
::
ReshapeToMatrix
(
*
dX
,
rank
-
1
);
Out_2d
=
framework
::
ReshapeToMatrix
(
*
Out
,
rank
-
1
);
dOut_2d
=
framework
::
ReshapeToMatrix
(
*
dOut
,
rank
-
1
);
}
dX_2d
.
ShareDataWith
(
*
dX
).
Resize
({
n
,
d
});
Out_2d
.
ShareDataWith
(
*
Out
).
Resize
({
n
,
d
});
dOut_2d
.
ShareDataWith
(
*
dOut
).
Resize
({
n
,
d
});
// Tensor Out_2d = framework::ReshapeToMatrix(*Out, axis - 1);
// Tensor dOut_2d = framework::ReshapeToMatrix(*dOut, axis - 1);
// Tensor dX_2d = framework::ReshapeToMatrix(*dX, axis - 1);
math
::
SoftmaxGradFunctor
<
DeviceContext
,
T
>
()(
context
.
template
device_context
<
DeviceContext
>(),
&
Out_2d
,
&
dOut_2d
,
context
.
template
device_context
<
DeviceContext
>(),
axis_dim
,
&
Out_2d
,
&
dOut_2d
,
&
dX_2d
);
if
(
axis
!=
-
1
&&
axis
!=
rank
-
1
)
{
TransCompute
<
DeviceContext
,
T
>
(
rank
,
dev_ctx
,
dX_trans
,
dX
,
perm
);
}
}
};
...
...
paddle/fluid/operators/softmax_with_cross_entropy_op.h
浏览文件 @
6c641827
...
...
@@ -43,7 +43,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
auto
&
dev_ctx
=
context
.
template
device_context
<
platform
::
CPUDeviceContext
>();
math
::
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
T
,
false
>
()(
dev_ctx
,
logits
,
softmax
);
dev_ctx
,
-
1
,
logits
,
softmax
);
math
::
CrossEntropyFunctor
<
platform
::
CPUDeviceContext
,
T
>
()(
dev_ctx
,
loss
,
softmax
,
labels
,
context
.
Attr
<
bool
>
(
"soft_label"
),
context
.
Attr
<
int
>
(
"ignore_index"
));
...
...
paddle/fluid/operators/warpctc_cudnn_op.cu.cc
浏览文件 @
6c641827
...
...
@@ -69,7 +69,7 @@ class CudnnCTCKernel : public framework::OpKernel<T> {
int
rank
=
logits
->
dims
().
size
();
Tensor
in_2d
=
framework
::
ReshapeToMatrix
(
*
logits
,
rank
-
1
);
Tensor
out_2d
=
framework
::
ReshapeToMatrix
(
softmax_logits
,
rank
-
1
);
math
::
SoftmaxFunctor
<
DeviceContext
,
T
,
false
>
()(
dev_ctx
,
&
in_2d
,
&
out_2d
);
math
::
SoftmaxFunctor
<
DeviceContext
,
T
,
false
>
()(
dev_ctx
,
-
1
,
&
in_2d
,
&
out_2d
);
// ctc needs sequences data stored in transposed padding format
// logits and grad using padding data of layout 'TNC'
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录