Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
11fb8a1c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
11fb8a1c
编写于
8月 31, 2020
作者:
G
GaoWei8
提交者:
GitHub
8月 31, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine cudnn softmax (#25757)
* refine cudnn softmax
上级
885c61f0
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
155 addition
and
45 deletion
+155
-45
paddle/fluid/operators/softmax_cudnn_op.cu.cc
paddle/fluid/operators/softmax_cudnn_op.cu.cc
+68
-38
paddle/fluid/operators/softmax_op.cc
paddle/fluid/operators/softmax_op.cc
+0
-7
python/paddle/fluid/tests/unittests/test_softmax_op.py
python/paddle/fluid/tests/unittests/test_softmax_op.py
+87
-0
未找到文件。
paddle/fluid/operators/softmax_cudnn_op.cu.cc
浏览文件 @
11fb8a1c
...
...
@@ -12,60 +12,90 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/platform/cudnn_desc.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
using
ScopedTensorDescriptor
=
platform
::
ScopedTensorDescriptor
;
using
DataLayout
=
platform
::
DataLayout
;
using
Tensor
=
framework
::
Tensor
;
static
inline
int
SizeOutAxis
(
const
int
axis
,
DDim
dims
)
{
int
size
=
1
;
for
(
int
i
=
axis
+
1
;
i
<
dims
.
size
();
i
++
)
{
size
*=
dims
[
i
];
}
return
size
;
}
template
<
typename
T
>
class
SoftmaxCUDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
X
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
Out
=
context
.
Output
<
Tensor
>
(
"Out"
);
// allocate memory on device.
Out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
dims
=
X
->
dims
();
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
framework
::
LoDTensor
flattened_x
;
framework
::
LoDTensor
flattened_out
;
flattened_x
.
ShareDataWith
(
*
X
).
Resize
(
flattened_dims
);
flattened_out
.
ShareDataWith
(
*
Out
).
Resize
(
flattened_dims
);
math
::
SoftmaxCUDNNFunctor
<
T
>
()(
context
.
template
device_context
<
platform
::
CUDADeviceContext
>(),
&
flattened_x
,
&
flattened_out
);
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
out_data
=
out
->
data
<
T
>
();
auto
dims
=
x
->
dims
();
const
int
rank
=
dims
.
size
();
const
int
axis
=
CanonicalAxis
(
ctx
.
Attr
<
int
>
(
"axis"
),
rank
);
const
int
dim
=
dims
[
axis
];
const
int
N
=
SizeToAxis
(
axis
,
dims
);
const
int
D
=
SizeOutAxis
(
axis
,
dims
);
ScopedTensorDescriptor
desc
;
std
::
vector
<
int
>
tensor_dims
=
{
N
,
dim
,
D
,
1
};
DataLayout
layout
=
DataLayout
::
kNCHW
;
cudnnTensorDescriptor_t
desc_
=
desc
.
descriptor
<
T
>
(
layout
,
tensor_dims
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
handle
=
dev_ctx
.
cudnn_handle
();
auto
mode
=
axis
==
rank
-
1
?
CUDNN_SOFTMAX_MODE_INSTANCE
:
CUDNN_SOFTMAX_MODE_CHANNEL
;
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnSoftmaxForward
(
handle
,
CUDNN_SOFTMAX_ACCURATE
,
mode
,
platform
::
CudnnDataType
<
T
>::
kOne
(),
desc_
,
x
->
data
<
T
>
(),
platform
::
CudnnDataType
<
T
>::
kZero
(),
desc_
,
out_data
));
}
};
template
<
typename
T
>
class
SoftmaxGradCUDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
Out
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dOut
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dX
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
// allocate memory on device.
dX
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
dims
=
Out
->
dims
();
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
framework
::
LoDTensor
flattened_out
;
framework
::
LoDTensor
flattened_d_out
;
framework
::
LoDTensor
flattened_d_x
;
flattened_out
.
ShareDataWith
(
*
Out
).
Resize
(
flattened_dims
);
flattened_d_out
.
ShareDataWith
(
*
dOut
).
Resize
(
flattened_dims
);
flattened_d_x
.
ShareDataWith
(
*
dX
).
Resize
(
flattened_dims
);
math
::
SoftmaxGradCUDNNFunctor
<
T
>
()(
context
.
template
device_context
<
platform
::
CUDADeviceContext
>(),
&
flattened_out
,
&
flattened_d_out
,
&
flattened_d_x
);
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
out
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dout
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
dx_data
=
dx
->
data
<
T
>
();
auto
dims
=
out
->
dims
();
const
int
rank
=
dims
.
size
();
const
int
axis
=
CanonicalAxis
(
ctx
.
Attr
<
int
>
(
"axis"
),
rank
);
const
int
dim
=
dims
[
axis
];
const
int
N
=
SizeToAxis
(
axis
,
dims
);
const
int
D
=
SizeOutAxis
(
axis
,
dims
);
ScopedTensorDescriptor
desc
;
std
::
vector
<
int
>
tensor_dims
=
{
N
,
dim
,
D
,
1
};
DataLayout
layout
=
DataLayout
::
kNCHW
;
cudnnTensorDescriptor_t
desc_
=
desc
.
descriptor
<
T
>
(
layout
,
tensor_dims
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
handle
=
dev_ctx
.
cudnn_handle
();
auto
mode
=
axis
==
rank
-
1
?
CUDNN_SOFTMAX_MODE_INSTANCE
:
CUDNN_SOFTMAX_MODE_CHANNEL
;
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnSoftmaxBackward
(
handle
,
CUDNN_SOFTMAX_ACCURATE
,
mode
,
platform
::
CudnnDataType
<
T
>::
kOne
(),
desc_
,
out
->
data
<
T
>
(),
desc_
,
dout
->
data
<
T
>
(),
platform
::
CudnnDataType
<
T
>::
kZero
(),
desc_
,
dx_data
));
}
};
...
...
paddle/fluid/operators/softmax_op.cc
浏览文件 @
11fb8a1c
...
...
@@ -53,13 +53,6 @@ class SoftmaxOp : public framework::OperatorWithKernel {
"Attr(axis) value should be in range [-R, R-1], "
"R is the rank of Input(X)."
));
auto
use_cudnn
=
ctx
->
Attrs
().
Get
<
bool
>
(
"use_cudnn"
);
if
(
axis
!=
rank_x
-
1
&&
axis
!=
-
1
)
{
PADDLE_ENFORCE_EQ
(
use_cudnn
,
false
,
platform
::
errors
::
InvalidArgument
(
"CUDNN kernel only support axis as -1."
));
}
ctx
->
SetOutputDim
(
"Out"
,
ctx
->
GetInputDim
(
"X"
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
...
...
python/paddle/fluid/tests/unittests/test_softmax_op.py
浏览文件 @
11fb8a1c
...
...
@@ -153,16 +153,103 @@ class TestSoftmaxCUDNNOp2(TestSoftmaxCUDNNOp):
return
[
2
,
3
,
4
,
5
]
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestSoftmaxCUDNNOp3
(
TestSoftmaxCUDNNOp
):
def
get_x_shape
(
self
):
return
[
2
,
3
,
4
,
5
]
def
get_axis
(
self
):
return
0
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestSoftmaxCUDNNOp4
(
TestSoftmaxCUDNNOp
):
def
get_x_shape
(
self
):
return
[
2
,
3
,
4
,
5
]
def
get_axis
(
self
):
return
1
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestSoftmaxCUDNNOp5
(
TestSoftmaxCUDNNOp
):
def
get_x_shape
(
self
):
return
[
2
,
3
,
4
,
5
]
def
get_axis
(
self
):
return
2
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestSoftmaxCUDNNOp6
(
TestSoftmaxCUDNNOp
):
def
get_x_shape
(
self
):
return
[
2
,
3
,
4
,
5
]
def
get_axis
(
self
):
return
3
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestSoftmaxCUDNNOp7
(
TestSoftmaxCUDNNOp
):
def
get_x_shape
(
self
):
return
[
2
,
3
,
4
,
5
,
6
]
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestSoftmaxCUDNNOp8
(
TestSoftmaxCUDNNOp
):
def
get_x_shape
(
self
):
return
[
2
,
3
,
4
,
5
,
6
]
def
get_axis
(
self
):
return
0
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestSoftmaxCUDNNOp9
(
TestSoftmaxCUDNNOp
):
def
get_x_shape
(
self
):
return
[
2
,
3
,
4
,
5
,
6
]
def
get_axis
(
self
):
return
1
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestSoftmaxCUDNNOp10
(
TestSoftmaxCUDNNOp
):
def
get_x_shape
(
self
):
return
[
2
,
3
,
4
,
5
,
6
]
def
get_axis
(
self
):
return
2
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestSoftmaxCUDNNOp11
(
TestSoftmaxCUDNNOp
):
def
get_x_shape
(
self
):
return
[
2
,
3
,
4
,
5
,
6
]
def
get_axis
(
self
):
return
3
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestSoftmaxCUDNNOp12
(
TestSoftmaxCUDNNOp
):
def
get_x_shape
(
self
):
return
[
2
,
3
,
4
,
5
,
6
]
def
get_axis
(
self
):
return
4
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestSoftmaxFP16Op
(
TestSoftmaxOp
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录