Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
420527f0
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
420527f0
编写于
3月 19, 2021
作者:
R
ronnywang
提交者:
GitHub
3月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[ROCM] fix layer_norm, norm, p_norm, test_sequence_softmax_op, test_math_op_patch_var_base (#31709)
上级
87852616
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
34 addition
and
7 deletion
+34
-7
paddle/fluid/operators/layer_norm_op.cu
paddle/fluid/operators/layer_norm_op.cu
+8
-1
paddle/fluid/operators/norm_op.cu
paddle/fluid/operators/norm_op.cu
+8
-1
paddle/fluid/operators/p_norm_op.cu
paddle/fluid/operators/p_norm_op.cu
+10
-0
python/paddle/fluid/tests/unittests/sequence/test_sequence_softmax_op.py
...luid/tests/unittests/sequence/test_sequence_softmax_op.py
+3
-3
python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py
...ddle/fluid/tests/unittests/test_math_op_patch_var_base.py
+5
-2
未找到文件。
paddle/fluid/operators/layer_norm_op.cu
浏览文件 @
420527f0
...
@@ -43,7 +43,11 @@ template <typename T>
...
@@ -43,7 +43,11 @@ template <typename T>
using
LayerNormParamType
=
typename
CudnnDataType
<
T
>::
BatchNormParamType
;
using
LayerNormParamType
=
typename
CudnnDataType
<
T
>::
BatchNormParamType
;
inline
static
int
GetDesiredBlockDim
(
int
block_dim
)
{
inline
static
int
GetDesiredBlockDim
(
int
block_dim
)
{
#ifdef __HIPCC__
const
int
kMaxBlockDim
=
256
;
#else
const
int
kMaxBlockDim
=
512
;
const
int
kMaxBlockDim
=
512
;
#endif
return
block_dim
>=
kMaxBlockDim
return
block_dim
>=
kMaxBlockDim
?
kMaxBlockDim
?
kMaxBlockDim
:
(
1
<<
(
static_cast
<
int
>
(
std
::
log2f
(
block_dim
))));
:
(
1
<<
(
static_cast
<
int
>
(
std
::
log2f
(
block_dim
))));
...
@@ -698,8 +702,11 @@ static void LayerNormBackward(const T *x, const T *d_y, const U *scale,
...
@@ -698,8 +702,11 @@ static void LayerNormBackward(const T *x, const T *d_y, const U *scale,
const
framework
::
ExecutionContext
&
ctx
)
{
const
framework
::
ExecutionContext
&
ctx
)
{
auto
&
dev_ctx
=
ctx
.
cuda_device_context
();
auto
&
dev_ctx
=
ctx
.
cuda_device_context
();
auto
stream
=
dev_ctx
.
stream
();
auto
stream
=
dev_ctx
.
stream
();
#ifdef __HIPCC__
const
int
kMaxBlockDim
=
256
;
#else
const
int
kMaxBlockDim
=
512
;
const
int
kMaxBlockDim
=
512
;
#endif
const
int
kMaxBlockNum
=
128
;
const
int
kMaxBlockNum
=
128
;
int
gradient_flag
=
((
d_x
!=
nullptr
?
1
:
0
)
<<
2
)
|
int
gradient_flag
=
((
d_x
!=
nullptr
?
1
:
0
)
<<
2
)
|
((
d_scale
!=
nullptr
?
1
:
0
)
<<
1
)
|
((
d_scale
!=
nullptr
?
1
:
0
)
<<
1
)
|
...
...
paddle/fluid/operators/norm_op.cu
浏览文件 @
420527f0
...
@@ -79,8 +79,11 @@ class NormCUDAKernel : public framework::OpKernel<T> {
...
@@ -79,8 +79,11 @@ class NormCUDAKernel : public framework::OpKernel<T> {
GetDims
(
xdim
,
axis
,
&
pre
,
&
n
,
&
post
);
GetDims
(
xdim
,
axis
,
&
pre
,
&
n
,
&
post
);
auto
&
dev_ctx
=
ctx
.
cuda_device_context
();
auto
&
dev_ctx
=
ctx
.
cuda_device_context
();
#ifdef __HIPCC__
const
int
block
=
256
;
#else
const
int
block
=
512
;
const
int
block
=
512
;
#endif
int
max_threads
=
dev_ctx
.
GetMaxPhysicalThreadCount
();
int
max_threads
=
dev_ctx
.
GetMaxPhysicalThreadCount
();
const
int
max_blocks
=
std
::
max
(
max_threads
/
block
,
1
);
const
int
max_blocks
=
std
::
max
(
max_threads
/
block
,
1
);
int
grid
=
std
::
min
(
max_blocks
,
pre
*
post
);
int
grid
=
std
::
min
(
max_blocks
,
pre
*
post
);
...
@@ -146,7 +149,11 @@ class NormGradCUDAKernel : public framework::OpKernel<T> {
...
@@ -146,7 +149,11 @@ class NormGradCUDAKernel : public framework::OpKernel<T> {
auto
&
dev_ctx
=
ctx
.
cuda_device_context
();
auto
&
dev_ctx
=
ctx
.
cuda_device_context
();
#ifdef __HIPCC__
const
int
block
=
256
;
#else
const
int
block
=
512
;
const
int
block
=
512
;
#endif
int
max_threads
=
dev_ctx
.
GetMaxPhysicalThreadCount
();
int
max_threads
=
dev_ctx
.
GetMaxPhysicalThreadCount
();
const
int
max_blocks
=
std
::
max
(
max_threads
/
block
,
1
);
const
int
max_blocks
=
std
::
max
(
max_threads
/
block
,
1
);
int
grid
=
std
::
min
(
max_blocks
,
pre
*
post
);
int
grid
=
std
::
min
(
max_blocks
,
pre
*
post
);
...
...
paddle/fluid/operators/p_norm_op.cu
浏览文件 @
420527f0
...
@@ -142,7 +142,12 @@ class PnormCUDAKernel : public framework::OpKernel<T> {
...
@@ -142,7 +142,12 @@ class PnormCUDAKernel : public framework::OpKernel<T> {
auto
&
dev_ctx
=
ctx
.
cuda_device_context
();
auto
&
dev_ctx
=
ctx
.
cuda_device_context
();
#ifdef __HIPCC__
const
int
block
=
256
;
#else
const
int
block
=
512
;
const
int
block
=
512
;
#endif
int
max_threads
=
dev_ctx
.
GetMaxPhysicalThreadCount
();
int
max_threads
=
dev_ctx
.
GetMaxPhysicalThreadCount
();
const
int
max_blocks
=
std
::
max
(
max_threads
/
block
,
1
);
const
int
max_blocks
=
std
::
max
(
max_threads
/
block
,
1
);
int
grid
=
std
::
min
(
max_blocks
,
pre
*
post
);
int
grid
=
std
::
min
(
max_blocks
,
pre
*
post
);
...
@@ -244,7 +249,12 @@ class PnormGradCUDAKernel : public framework::OpKernel<T> {
...
@@ -244,7 +249,12 @@ class PnormGradCUDAKernel : public framework::OpKernel<T> {
auto
&
dev_ctx
=
ctx
.
cuda_device_context
();
auto
&
dev_ctx
=
ctx
.
cuda_device_context
();
#ifdef __HIPCC__
const
int
block
=
256
;
#else
const
int
block
=
512
;
const
int
block
=
512
;
#endif
int
max_threads
=
dev_ctx
.
GetMaxPhysicalThreadCount
();
int
max_threads
=
dev_ctx
.
GetMaxPhysicalThreadCount
();
const
int
max_blocks
=
std
::
max
(
max_threads
/
block
,
1
);
const
int
max_blocks
=
std
::
max
(
max_threads
/
block
,
1
);
int
grid
=
std
::
min
(
max_blocks
,
pre
*
post
);
int
grid
=
std
::
min
(
max_blocks
,
pre
*
post
);
...
...
python/paddle/fluid/tests/unittests/sequence/test_sequence_softmax_op.py
浏览文件 @
420527f0
...
@@ -28,10 +28,10 @@ class TestSequenceSoftmaxOp(OpTest):
...
@@ -28,10 +28,10 @@ class TestSequenceSoftmaxOp(OpTest):
self
.
op_type
=
"sequence_softmax"
self
.
op_type
=
"sequence_softmax"
self
.
use_cudnn
=
False
self
.
use_cudnn
=
False
self
.
init_op_type
()
self
.
init_op_type
()
self
.
dtype
=
"float32"
if
core
.
is_compiled_with_rocm
()
else
"float64"
x
=
np
.
random
.
uniform
(
0.1
,
1
,
(
110
,
1
)).
astype
(
"float64"
)
x
=
np
.
random
.
uniform
(
0.1
,
1
,
(
110
,
1
)).
astype
(
self
.
dtype
)
self
.
init_lod
()
self
.
init_lod
()
out
=
np
.
zeros
((
110
,
1
)).
astype
(
"float64"
)
out
=
np
.
zeros
((
110
,
1
)).
astype
(
self
.
dtype
)
offset
=
0
offset
=
0
for
i
in
range
(
len
(
self
.
lod
[
0
])):
for
i
in
range
(
len
(
self
.
lod
[
0
])):
if
(
self
.
lod
[
0
][
i
]
==
0
):
if
(
self
.
lod
[
0
][
i
]
==
0
):
...
...
python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py
浏览文件 @
420527f0
...
@@ -354,8 +354,11 @@ class TestMathOpPatchesVarBase(unittest.TestCase):
...
@@ -354,8 +354,11 @@ class TestMathOpPatchesVarBase(unittest.TestCase):
[
1.30058
,
1.0688717
,
1.4928783
],
[
1.30058
,
1.0688717
,
1.4928783
],
[
1.0958099
,
1.3724753
,
1.8926544
]])
[
1.0958099
,
1.3724753
,
1.8926544
]])
d
=
d
.
matmul
(
d
.
t
())
d
=
d
.
matmul
(
d
.
t
())
# ROCM not support cholesky
if
not
fluid
.
core
.
is_compiled_with_rocm
():
self
.
assertTrue
(
self
.
assertTrue
(
np
.
array_equal
(
d
.
cholesky
().
numpy
(),
paddle
.
cholesky
(
d
).
numpy
()))
np
.
array_equal
(
d
.
cholesky
().
numpy
(),
paddle
.
cholesky
(
d
).
numpy
(
)))
self
.
assertTrue
(
self
.
assertTrue
(
np
.
array_equal
(
x
.
is_empty
().
numpy
(),
paddle
.
is_empty
(
x
).
numpy
()))
np
.
array_equal
(
x
.
is_empty
().
numpy
(),
paddle
.
is_empty
(
x
).
numpy
()))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录