Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
ddf94ae4
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ddf94ae4
编写于
4月 30, 2023
作者:
zhouweiwei2014
提交者:
GitHub
4月 30, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Zero-Dim] Support paddle.sum/mean/loss api output 0D,test=allcase (#52739)
上级
0271d9e7
变更
57
隐藏空白更改
内联
并排
Showing
57 changed file
with
380 addition
and
258 deletion
+380
-258
paddle/fluid/prim/api/composite_backward/composite_backward_api.h
...luid/prim/api/composite_backward/composite_backward_api.h
+1
-1
paddle/phi/infermeta/unary.cc
paddle/phi/infermeta/unary.cc
+2
-5
python/paddle/distributed/auto_parallel/completion.py
python/paddle/distributed/auto_parallel/completion.py
+13
-7
python/paddle/distributed/auto_parallel/engine.py
python/paddle/distributed/auto_parallel/engine.py
+1
-1
python/paddle/distributed/auto_parallel/operators/common.py
python/paddle/distributed/auto_parallel/operators/common.py
+1
-1
python/paddle/distributed/auto_parallel/operators/dist_default.py
...addle/distributed/auto_parallel/operators/dist_default.py
+3
-1
python/paddle/distributed/auto_parallel/operators/dist_eltwise.py
...addle/distributed/auto_parallel/operators/dist_eltwise.py
+3
-1
python/paddle/distributed/auto_parallel/operators/dist_embedding.py
...dle/distributed/auto_parallel/operators/dist_embedding.py
+1
-1
python/paddle/distributed/auto_parallel/operators/dist_matmul.py
...paddle/distributed/auto_parallel/operators/dist_matmul.py
+9
-9
python/paddle/distributed/auto_parallel/operators/dist_reshape.py
...addle/distributed/auto_parallel/operators/dist_reshape.py
+9
-3
python/paddle/distributed/auto_parallel/operators/dist_scale.py
.../paddle/distributed/auto_parallel/operators/dist_scale.py
+3
-1
python/paddle/distributed/auto_parallel/operators/dist_softmax.py
...addle/distributed/auto_parallel/operators/dist_softmax.py
+3
-1
python/paddle/distributed/auto_parallel/operators/dist_transpose.py
...dle/distributed/auto_parallel/operators/dist_transpose.py
+3
-1
python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
...addle/distributed/auto_parallel/tuner/rule_based_tuner.py
+8
-4
python/paddle/distributed/auto_parallel/utils.py
python/paddle/distributed/auto_parallel/utils.py
+9
-4
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
...optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
+23
-26
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
...buted/fleet/meta_parallel/sharding/group_sharded_utils.py
+30
-25
python/paddle/distributed/fleet/metrics/metric.py
python/paddle/distributed/fleet/metrics/metric.py
+3
-3
python/paddle/distributed/passes/auto_parallel_amp.py
python/paddle/distributed/passes/auto_parallel_amp.py
+37
-9
python/paddle/distributed/passes/auto_parallel_fp16.py
python/paddle/distributed/passes/auto_parallel_fp16.py
+7
-2
python/paddle/distributed/passes/auto_parallel_grad_clip.py
python/paddle/distributed/passes/auto_parallel_grad_clip.py
+2
-2
python/paddle/distributed/passes/auto_parallel_quantization.py
...n/paddle/distributed/passes/auto_parallel_quantization.py
+2
-2
python/paddle/fluid/tests/unittests/check_nan_inf_base.py
python/paddle/fluid/tests/unittests/check_nan_inf_base.py
+1
-5
python/paddle/fluid/tests/unittests/seresnext_test_base.py
python/paddle/fluid/tests/unittests/seresnext_test_base.py
+8
-6
python/paddle/fluid/tests/unittests/test_argsort_op.py
python/paddle/fluid/tests/unittests/test_argsort_op.py
+3
-2
python/paddle/fluid/tests/unittests/test_cond.py
python/paddle/fluid/tests/unittests/test_cond.py
+1
-1
python/paddle/fluid/tests/unittests/test_cosine_embedding_loss.py
...addle/fluid/tests/unittests/test_cosine_embedding_loss.py
+4
-4
python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
...fluid/tests/unittests/test_eager_deletion_recurrent_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
...ddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
+5
-3
python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py
...paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py
+6
-4
python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
+2
-2
python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
...paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
+2
-2
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
...e/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
+6
-4
python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
.../paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
+6
-4
python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
...uid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
+6
-4
python/paddle/fluid/tests/unittests/test_gradient_clip.py
python/paddle/fluid/tests/unittests/test_gradient_clip.py
+1
-1
python/paddle/fluid/tests/unittests/test_hinge_embedding_loss.py
...paddle/fluid/tests/unittests/test_hinge_embedding_loss.py
+6
-6
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
...dle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
+3
-4
python/paddle/fluid/tests/unittests/test_l1_loss.py
python/paddle/fluid/tests/unittests/test_l1_loss.py
+2
-2
python/paddle/fluid/tests/unittests/test_mse_loss.py
python/paddle/fluid/tests/unittests/test_mse_loss.py
+6
-6
python/paddle/fluid/tests/unittests/test_nan_inf.py
python/paddle/fluid/tests/unittests/test_nan_inf.py
+0
-4
python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py
.../fluid/tests/unittests/test_parallel_executor_run_cinn.py
+1
-1
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py
...tests/test_parallel_executor_seresnext_with_reduce_cpu.py
+32
-16
python/paddle/fluid/tests/unittests/test_recurrent_op.py
python/paddle/fluid/tests/unittests/test_recurrent_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
...n/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
+1
-1
python/paddle/fluid/tests/unittests/test_run_program_op.py
python/paddle/fluid/tests/unittests/test_run_program_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
+85
-43
python/paddle/incubate/autograd/composite_rules.py
python/paddle/incubate/autograd/composite_rules.py
+1
-1
python/paddle/incubate/distributed/models/moe/grad_clip.py
python/paddle/incubate/distributed/models/moe/grad_clip.py
+4
-8
test/auto_parallel/test_while_op_partition.py
test/auto_parallel/test_while_op_partition.py
+1
-1
test/autograd/test_autograd_functional_static.py
test/autograd/test_autograd_functional_static.py
+2
-2
test/contrib/test_multi_precision_fp16_train.py
test/contrib/test_multi_precision_fp16_train.py
+1
-1
test/distribution/test_distribution_transform.py
test/distribution/test_distribution_transform.py
+1
-1
test/legacy_test/test_async_read_write.py
test/legacy_test/test_async_read_write.py
+1
-1
test/prim/prim/vjp/eager/test_comp_eager_sum_grad.py
test/prim/prim/vjp/eager/test_comp_eager_sum_grad.py
+1
-1
test/standalone_executor/test_standalone_cuda_graph_multi_stream.py
...alone_executor/test_standalone_cuda_graph_multi_stream.py
+1
-1
test/xpu/test_zero_dim_tensor_xpu.py
test/xpu/test_zero_dim_tensor_xpu.py
+2
-1
未找到文件。
paddle/fluid/prim/api/composite_backward/composite_backward_api.h
浏览文件 @
ddf94ae4
...
...
@@ -365,7 +365,7 @@ void sum_grad(const Tensor& x,
if
(
!
keepdim
)
{
auto
axis_
=
std
::
vector
<
int64_t
>
();
if
(
reduce_all
)
{
for
(
int64_t
i
=
1
;
i
<
x_dim_size
;
i
++
)
{
for
(
int64_t
i
=
0
;
i
<
x_dim_size
;
i
++
)
{
axis_
.
push_back
(
i
);
}
}
else
{
...
...
paddle/phi/infermeta/unary.cc
浏览文件 @
ddf94ae4
...
...
@@ -4004,9 +4004,6 @@ DDim OriginReduceInferDim(const MetaTensor& x,
out_dim_vector
.
push_back
(
x
.
dims
().
at
(
i
));
}
}
if
(
x_rank
>
0
&&
out_dim_vector
.
size
()
==
0
)
{
out_dim_vector
.
push_back
(
1
);
}
DDim
out_dim
=
phi
::
make_ddim
(
out_dim_vector
);
return
out_dim
;
...
...
@@ -4023,14 +4020,14 @@ DDim OriginReduceInferDimForIntArrayAxis(const MetaTensor& x,
if
(
keep_dim
)
{
vec_dim
=
std
::
vector
<
int64_t
>
(
x
.
dims
().
size
(),
1
);
}
else
{
vec_dim
=
{
1
};
vec_dim
=
{};
}
}
else
{
if
(
keep_dim
)
{
vec_dim
=
std
::
vector
<
int64_t
>
(
x
.
dims
().
size
(),
-
1
);
}
else
{
auto
x_rank
=
static_cast
<
size_t
>
(
x
.
dims
().
size
());
if
(
vec_axis
.
size
()
>
=
x_rank
)
{
if
(
vec_axis
.
size
()
>
x_rank
)
{
vec_dim
=
{
-
1
};
}
else
{
vec_dim
=
std
::
vector
<
int64_t
>
(
x
.
dims
().
size
()
-
vec_axis
.
size
(),
-
1
);
...
...
python/paddle/distributed/auto_parallel/completion.py
浏览文件 @
ddf94ae4
...
...
@@ -1688,7 +1688,7 @@ class Completer:
world_ranks
)
out_dist_attr
.
dims_mapping
=
[
-
1
for
_
in
range
(
len
(
out_var
.
shape
))
-
1
for
_
in
out_var
.
shape
]
self
.
_dist_context
.
set_tensor_dist_attr_for_program
(
out_var
,
out_dist_attr
...
...
@@ -1732,7 +1732,9 @@ class Completer:
len
(
out_var
.
shape
)
==
1
and
out_var
.
shape
[
0
]
==
1
)
out_dist_attr
.
dims_mapping
=
[
-
1
]
out_dist_attr
.
dims_mapping
=
[
-
1
for
_
in
out_var
.
shape
]
self
.
_dist_context
.
set_tensor_dist_attr_for_program
(
out_var
,
out_dist_attr
)
...
...
@@ -1802,16 +1804,20 @@ class Completer:
param
.
name
,
ref_dims_mapping
)
learning_var
=
vars
[
op
.
input
(
"LearningRate"
)[
0
]]
op_dist_attr
.
set_input_dims_mapping
(
learning_var
.
name
,
[
-
1
])
op_dist_attr
.
set_input_dims_mapping
(
learning_var
.
name
,
[
-
1
for
_
in
learning_var
.
shape
]
)
op_dist_attr
.
set_output_dims_mapping
(
learning_var
.
name
,
[
-
1
]
learning_var
.
name
,
[
-
1
for
_
in
learning_var
.
shape
]
)
if
not
learning_rate_completed
:
learning_rate_completed
=
True
var_dist_attr
=
TensorDistAttr
()
var_dist_attr
.
process_mesh
=
ProcessMesh
(
world_ranks
)
var_dist_attr
.
dims_mapping
=
[
-
1
]
var_dist_attr
.
dims_mapping
=
[
-
1
for
_
in
learning_var
.
shape
]
self
.
_dist_context
.
set_tensor_dist_attr_for_program
(
learning_var
,
var_dist_attr
)
...
...
@@ -1841,10 +1847,10 @@ class Completer:
):
input_var_attr
.
dims_mapping
=
[
-
1
]
op_dist_attr
.
set_input_dims_mapping
(
input_var
.
name
,
[
-
1
]
input_var
.
name
,
[
-
1
for
_
in
input_var
.
shape
]
)
op_dist_attr
.
set_output_dims_mapping
(
input_var
.
name
,
[
-
1
]
input_var
.
name
,
[
-
1
for
_
in
input_var
.
shape
]
)
else
:
input_var_attr
.
dims_mapping
=
ref_dims_mapping
...
...
python/paddle/distributed/auto_parallel/engine.py
浏览文件 @
ddf94ae4
...
...
@@ -511,7 +511,7 @@ class Engine:
loss_indices
=
fetch_indices
[
group_idx
]
assert
len
(
loss_indices
)
<=
1
for
idx
in
loss_indices
:
logs
[
"loss"
]
=
outs
[
idx
]
[
0
]
logs
[
"loss"
]
=
outs
[
idx
]
group_idx
+=
1
# logging metrics
dist_context
=
self
.
_dist_contexts
[
mode
]
...
...
python/paddle/distributed/auto_parallel/operators/common.py
浏览文件 @
ddf94ae4
...
...
@@ -393,7 +393,7 @@ def get_data_parallel_group(dist_ctx, op, act_grad_names, rank):
for
var_name
in
act_grad_names
:
var_dim_mapping
=
op_dist_attr
.
get_input_dims_mapping
(
var_name
)
# consider that the variable's shape is
None
# consider that the variable's shape is
[], which is 0D
# TODO utilize the batch_dim attr instead of "0" in future
batch_size_axis
=
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
...
...
python/paddle/distributed/auto_parallel/operators/dist_default.py
浏览文件 @
ddf94ae4
...
...
@@ -159,7 +159,9 @@ class DistributedDefaultImpl0(DistributedOperatorImpl):
):
var_dim_mapping
=
dist_attr
.
get_input_dims_mapping
(
varname
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
(
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
)
if
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
:
need_gradient_allreduce
=
True
break
...
...
python/paddle/distributed/auto_parallel/operators/dist_eltwise.py
浏览文件 @
ddf94ae4
...
...
@@ -101,7 +101,9 @@ class DistributedElementwiseImpl0(DistributedOperatorImpl):
):
var_dim_mapping
=
dist_attr
.
get_input_dims_mapping
(
varname
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
(
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
)
if
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
:
need_gradient_allreduce
=
True
break
...
...
python/paddle/distributed/auto_parallel/operators/dist_embedding.py
浏览文件 @
ddf94ae4
...
...
@@ -252,7 +252,7 @@ class DistributedEmbeddingImpl(DistributedOperatorImpl):
backward_op
.
input
(
"Ids"
)[
0
]
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
if
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
:
parallel_axis
=
batch_size_axis
attrs
=
{
"use_calc_stream"
:
True
}
...
...
python/paddle/distributed/auto_parallel/operators/dist_matmul.py
浏览文件 @
ddf94ae4
...
...
@@ -651,7 +651,7 @@ class DistributedMatmulImpl0(DistributedOperatorImpl):
backward_op
.
input
(
"X"
)[
0
]
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
if
(
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
...
...
@@ -1028,7 +1028,7 @@ class DistributedMatmulImpl1(DistributedOperatorImpl):
backward_op
.
input
(
"X"
)[
0
]
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
if
(
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
...
...
@@ -1365,7 +1365,7 @@ class DistributedMatmulImpl2(DistributedOperatorImpl):
backward_op
.
input
(
"X"
)[
0
]
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
if
(
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
...
...
@@ -1552,7 +1552,7 @@ class DistributedMatmulV2Impl0(DistributedOperatorImpl):
backward_op
.
input
(
"X"
)[
0
]
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
if
(
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
...
...
@@ -1929,7 +1929,7 @@ class DistributedMatmulV2Impl1(DistributedOperatorImpl):
backward_op
.
input
(
"X"
)[
0
]
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
if
(
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
...
...
@@ -2264,7 +2264,7 @@ class DistributedMatmulV2Impl2(DistributedOperatorImpl):
backward_op
.
input
(
"X"
)[
0
]
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
if
(
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
...
...
@@ -2449,7 +2449,7 @@ class DistributedMulImpl0(DistributedOperatorImpl):
backward_op
.
input
(
"X"
)[
0
]
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
if
(
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
...
...
@@ -2832,7 +2832,7 @@ class DistributedMulImpl1(DistributedOperatorImpl):
backward_op
.
input
(
"X"
)[
0
]
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
if
(
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
...
...
@@ -3178,7 +3178,7 @@ class DistributedMulImpl2(DistributedOperatorImpl):
backward_op
.
input
(
"X"
)[
0
]
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
if
(
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
...
...
python/paddle/distributed/auto_parallel/operators/dist_reshape.py
浏览文件 @
ddf94ae4
...
...
@@ -120,7 +120,9 @@ class DistributedReshapeImpl0(DistributedOperatorImpl):
var_dim_mapping
=
dist_attr
.
get_input_dims_mapping
(
varname
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
(
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
)
if
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
:
parallel_axis
=
batch_size_axis
attrs
=
{
"use_calc_stream"
:
True
}
...
...
@@ -377,7 +379,9 @@ class DistributedReshapeImpl1(DistributedOperatorImpl):
var_dim_mapping
=
dist_attr
.
get_input_dims_mapping
(
varname
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
(
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
)
if
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
:
parallel_axis
=
batch_size_axis
attrs
=
{
"use_calc_stream"
:
True
}
...
...
@@ -637,7 +641,9 @@ class DistributedReshapeImpl2(DistributedOperatorImpl):
var_dim_mapping
=
dist_attr
.
get_input_dims_mapping
(
varname
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
(
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
)
if
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
:
parallel_axis
=
batch_size_axis
attrs
=
{
"use_calc_stream"
:
True
}
...
...
python/paddle/distributed/auto_parallel/operators/dist_scale.py
浏览文件 @
ddf94ae4
...
...
@@ -100,7 +100,9 @@ class DistributedScaleImpl(DistributedOperatorImpl):
):
var_dim_mapping
=
dist_attr
.
get_input_dims_mapping
(
varname
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
(
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
)
if
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
:
need_gradient_allreduce
=
True
break
...
...
python/paddle/distributed/auto_parallel/operators/dist_softmax.py
浏览文件 @
ddf94ae4
...
...
@@ -94,7 +94,9 @@ class DistributedSoftmaxImpl(DistributedOperatorImpl):
var_dim_mapping
=
dist_attr
.
get_input_dims_mapping
(
varname
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
(
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
)
if
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
:
parallel_axis
=
batch_size_axis
attrs
=
{
"use_calc_stream"
:
True
}
...
...
python/paddle/distributed/auto_parallel/operators/dist_transpose.py
浏览文件 @
ddf94ae4
...
...
@@ -183,7 +183,9 @@ class DistributedTranspose2Impl(DistributedOperatorImpl):
var_dim_mapping
=
dist_attr
.
get_input_dims_mapping
(
varname
)
mesh_shape
=
process_mesh
.
shape
batch_size_axis
=
var_dim_mapping
[
0
]
batch_size_axis
=
(
var_dim_mapping
[
0
]
if
len
(
var_dim_mapping
)
>
0
else
-
1
)
if
batch_size_axis
>
-
1
and
mesh_shape
[
batch_size_axis
]
>
1
:
parallel_axis
=
batch_size_axis
attrs
=
{
"use_calc_stream"
:
True
}
...
...
python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
浏览文件 @
ddf94ae4
...
...
@@ -1727,7 +1727,9 @@ class RuleBasedTuner:
len
(
out_var
.
shape
)
==
1
and
out_var
.
shape
[
0
]
==
1
)
out_dist_attr
.
dims_mapping
=
[
-
1
]
out_dist_attr
.
dims_mapping
=
[
-
1
for
_
in
out_var
.
shape
]
sub_program_dist_context
.
set_tensor_dist_attr_for_program
(
out_var
,
out_dist_attr
)
...
...
@@ -1798,17 +1800,19 @@ class RuleBasedTuner:
)
learning_var
=
vars
[
op
.
input
(
"LearningRate"
)[
0
]]
op_dist_attr
.
set_input_dims_mapping
(
learning_var
.
name
,
[
-
1
]
learning_var
.
name
,
[
-
1
for
i
in
learning_var
.
shape
]
)
op_dist_attr
.
set_output_dims_mapping
(
learning_var
.
name
,
[
-
1
]
learning_var
.
name
,
[
-
1
for
i
in
learning_var
.
shape
]
)
if
not
learning_rate_completed
:
learning_rate_completed
=
True
var_dist_attr
=
TensorDistAttr
()
var_dist_attr
.
process_mesh
=
world_ranks
var_dist_attr
.
dims_mapping
=
[
-
1
]
var_dist_attr
.
dims_mapping
=
[
-
1
for
i
in
learning_var
.
shape
]
sub_program_dist_context
.
set_tensor_dist_attr_for_program
(
learning_var
,
var_dist_attr
)
...
...
python/paddle/distributed/auto_parallel/utils.py
浏览文件 @
ddf94ae4
...
...
@@ -1466,7 +1466,8 @@ def update_op_dims_mapping_by_default_dist_impl(dist_op):
),
"{} only the batch dimension (0-dim) can be sharded, but the dimension {} is sharded by {} part."
.
format
(
op_desc
.
type
(),
idx
,
mapping
)
batch_dim_mappings
.
append
(
dims_mapping
[
0
])
if
len
(
dims_mapping
)
>=
1
:
batch_dim_mappings
.
append
(
dims_mapping
[
0
])
for
arg_name
in
op_desc
.
output_arg_names
():
serial_tensor
=
dist_op
.
get_serial_output
(
arg_name
)
if
serial_tensor
.
is_parameter
:
...
...
@@ -1480,7 +1481,8 @@ def update_op_dims_mapping_by_default_dist_impl(dist_op):
),
"{} only the batch dimension (0-dim) can be sharded, but the dimension {} is sharded by {} part."
.
format
(
op_desc
.
type
(),
idx
,
mapping
)
batch_dim_mappings
.
append
(
dims_mapping
[
0
])
if
len
(
dims_mapping
)
>=
1
:
batch_dim_mappings
.
append
(
dims_mapping
[
0
])
else
:
assert
(
dims_mapping
[
0
]
==
-
1
...
...
@@ -1505,7 +1507,7 @@ def update_op_dims_mapping_by_default_dist_impl(dist_op):
if
serial_tensor
.
is_parameter
:
continue
dims_mapping
=
op_dist_attr
.
get_input_dims_mapping
(
arg_name
)
if
compatible_dim_mapping
!=
dims_mapping
[
0
]:
if
len
(
dims_mapping
)
>=
1
and
compatible_dim_mapping
!=
dims_mapping
[
0
]:
dims_mapping
[
0
]
=
compatible_dim_mapping
changed
=
True
for
arg_name
in
op_desc
.
output_arg_names
():
...
...
@@ -1514,7 +1516,10 @@ def update_op_dims_mapping_by_default_dist_impl(dist_op):
continue
dims_mapping
=
op_dist_attr
.
get_output_dims_mapping
(
arg_name
)
if
arg_name
not
in
xshape_arg_names
:
if
compatible_dim_mapping
!=
dims_mapping
[
0
]:
if
(
len
(
dims_mapping
)
>=
1
and
compatible_dim_mapping
!=
dims_mapping
[
0
]
):
dims_mapping
[
0
]
=
compatible_dim_mapping
changed
=
True
else
:
...
...
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
浏览文件 @
ddf94ae4
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
paddle
from
paddle
import
framework
...
...
@@ -95,11 +96,10 @@ class HybridParallelClipGrad:
# global norm of distributed FP16 params_and_grads
if
len
(
sum_square_dist_fp16
)
==
0
:
global_norm_dist_fp16
=
paddle
.
to_tensor
(
[
0.0
]
,
dtype
=
paddle
.
float32
np
.
array
(
0.0
)
,
dtype
=
paddle
.
float32
)
else
:
global_norm_dist_fp16
=
paddle
.
concat
(
sum_square_dist_fp16
)
global_norm_dist_fp16
=
paddle
.
sum
(
global_norm_dist_fp16
)
global_norm_dist_fp16
=
paddle
.
add_n
(
sum_square_dist_fp16
)
global_norm_dist_fp16
=
paddle
.
cast
(
global_norm_dist_fp16
,
dtype
=
paddle
.
float32
)
...
...
@@ -107,11 +107,10 @@ class HybridParallelClipGrad:
# global norm of non-distributed FP16 params_and_grads
if
len
(
sum_square_not_dist_fp16
)
==
0
:
global_norm_not_dist_fp16
=
paddle
.
to_tensor
(
[
0.0
]
,
dtype
=
paddle
.
float32
np
.
array
(
0.0
)
,
dtype
=
paddle
.
float32
)
else
:
global_norm_not_dist_fp16
=
paddle
.
concat
(
sum_square_not_dist_fp16
)
global_norm_not_dist_fp16
=
paddle
.
sum
(
global_norm_not_dist_fp16
)
global_norm_not_dist_fp16
=
paddle
.
add_n
(
sum_square_not_dist_fp16
)
global_norm_not_dist_fp16
=
paddle
.
cast
(
global_norm_not_dist_fp16
,
dtype
=
paddle
.
float32
)
...
...
@@ -119,11 +118,10 @@ class HybridParallelClipGrad:
# global norm of distributed BF16 params_and_grads
if
len
(
sum_square_dist_bf16
)
==
0
:
global_norm_dist_bf16
=
paddle
.
to_tensor
(
[
0.0
]
,
dtype
=
paddle
.
float32
np
.
array
(
0.0
)
,
dtype
=
paddle
.
float32
)
else
:
global_norm_dist_bf16
=
paddle
.
concat
(
sum_square_dist_bf16
)
global_norm_dist_bf16
=
paddle
.
sum
(
global_norm_dist_bf16
)
global_norm_dist_bf16
=
paddle
.
add_n
(
sum_square_dist_bf16
)
global_norm_dist_bf16
=
paddle
.
cast
(
global_norm_dist_bf16
,
dtype
=
paddle
.
float32
)
...
...
@@ -131,30 +129,29 @@ class HybridParallelClipGrad:
# global norm of non-distributed FP16 params_and_grads
if
len
(
sum_square_not_dist_bf16
)
==
0
:
global_norm_not_dist_bf16
=
paddle
.
to_tensor
(
[
0.0
]
,
dtype
=
paddle
.
float32
np
.
array
(
0.0
)
,
dtype
=
paddle
.
float32
)
else
:
global_norm_not_dist_bf16
=
paddle
.
concat
(
sum_square_not_dist_bf16
)
global_norm_not_dist_bf16
=
paddle
.
sum
(
global_norm_not_dist_bf16
)
global_norm_not_dist_bf16
=
paddle
.
add_n
(
sum_square_not_dist_bf16
)
global_norm_not_dist_bf16
=
paddle
.
cast
(
global_norm_not_dist_bf16
,
dtype
=
paddle
.
float32
)
# global norm of distributed FP32 params_and_grads
global_norm_dist_fp32
=
(
paddle
.
concat
(
sum_square_dist_fp32
)
if
len
(
sum_square_dist_fp32
)
!=
0
else
paddle
.
to_tensor
([
0.0
],
dtype
=
paddle
.
float32
)
)
global_norm_dist_fp32
=
paddle
.
sum
(
global_norm
_dist_fp32
)
if
len
(
sum_square_dist_fp32
)
==
0
:
global_norm_dist_fp32
=
paddle
.
to_tensor
(
np
.
array
(
0.0
),
dtype
=
paddle
.
float32
)
else
:
global_norm_dist_fp32
=
paddle
.
add_n
(
sum_square
_dist_fp32
)
# global norm of non-distributed FP32 params_and_grads
global_norm_not_dist_fp32
=
(
paddle
.
concat
(
sum_square_not_dist_fp32
)
if
len
(
sum_square_not_dist_fp32
)
!=
0
else
paddle
.
to_tensor
([
0.0
],
dtype
=
paddle
.
float32
)
)
global_norm_not_dist_fp32
=
paddle
.
sum
(
global_norm
_not_dist_fp32
)
if
len
(
sum_square_not_dist_fp32
)
==
0
:
global_norm_not_dist_fp32
=
paddle
.
to_tensor
(
np
.
array
(
0.0
),
dtype
=
paddle
.
float32
)
else
:
global_norm_not_dist_fp32
=
paddle
.
add_n
(
sum_square
_not_dist_fp32
)
global_norm_var_dist
=
(
global_norm_dist_fp16
...
...
@@ -193,14 +190,14 @@ class HybridParallelClipGrad:
)
max_global_norm
=
paddle
.
full
(
shape
=
[
1
],
shape
=
[],
dtype
=
global_norm_var_fp32
.
dtype
,
fill_value
=
self
.
clip_norm
,
)
clip_var
=
paddle
.
divide
(
x
=
max_global_norm
,
y
=
paddle
.
maximum
(
x
=
global_norm_var_fp32
,
y
=
max_global_norm
)
+
paddle
.
to_tensor
(
[
1.0e-6
]
,
dtype
=
paddle
.
float32
),
+
paddle
.
to_tensor
(
np
.
array
(
1.0e-6
)
,
dtype
=
paddle
.
float32
),
)
clip_var_fp16
=
paddle
.
cast
(
clip_var
,
paddle
.
float16
)
...
...
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
浏览文件 @
ddf94ae4
...
...
@@ -94,59 +94,64 @@ class GroupShardedClipGrad:
# global norm of non-distributed FP16 params_and_grads
if
len
(
sum_square_fp16
)
==
0
:
global_norm_fp16
=
paddle
.
to_tensor
([
0.0
],
dtype
=
paddle
.
float32
)
global_norm_fp16
=
paddle
.
to_tensor
(
np
.
array
(
0.0
),
dtype
=
paddle
.
float32
)
else
:
global_norm_fp16
=
paddle
.
concat
(
sum_square_fp16
)
global_norm_fp16
=
paddle
.
sum
(
global_norm_fp16
)
global_norm_fp16
=
paddle
.
add_n
(
sum_square_fp16
)
global_norm_fp16
=
paddle
.
cast
(
global_norm_fp16
,
dtype
=
paddle
.
float32
)
# global norm of non-distributed BFP16 params_and_grads
if
len
(
sum_square_bfp16
)
==
0
:
global_norm_bfp16
=
paddle
.
to_tensor
([
0.0
],
dtype
=
paddle
.
float32
)
global_norm_bfp16
=
paddle
.
to_tensor
(
np
.
array
(
0.0
),
dtype
=
paddle
.
float32
)
else
:
global_norm_bfp16
=
paddle
.
concat
(
sum_square_bfp16
)
global_norm_bfp16
=
paddle
.
sum
(
global_norm_bfp16
)
global_norm_bfp16
=
paddle
.
add_n
(
sum_square_bfp16
)
global_norm_bfp16
=
paddle
.
cast
(
global_norm_bfp16
,
dtype
=
paddle
.
float32
)
# global norm of non-distributed FP16 params_and_grads for unslice parameters
if
len
(
unslice_params_fp16
)
==
0
:
global_unslice_fp16
=
paddle
.
to_tensor
([
0.0
],
dtype
=
paddle
.
float32
)
global_unslice_fp16
=
paddle
.
to_tensor
(
np
.
array
(
0.0
),
dtype
=
paddle
.
float32
)
else
:
global_unslice_fp16
=
paddle
.
concat
(
unslice_params_fp16
)
global_unslice_fp16
=
paddle
.
sum
(
global_unslice_fp16
)
global_unslice_fp16
=
paddle
.
add_n
(
unslice_params_fp16
)
global_unslice_fp16
=
paddle
.
cast
(
global_unslice_fp16
,
dtype
=
paddle
.
float32
)
# global norm of non-distributed BFP16 params_and_grads for unslice parameters
if
len
(
unslice_params_bfp16
)
==
0
:
global_unslice_bfp16
=
paddle
.
to_tensor
([
0.0
],
dtype
=
paddle
.
float32
)
global_unslice_bfp16
=
paddle
.
to_tensor
(
np
.
array
(
0.0
),
dtype
=
paddle
.
float32
)
else
:
global_unslice_bfp16
=
paddle
.
concat
(
unslice_params_bfp16
)
global_unslice_bfp16
=
paddle
.
sum
(
global_unslice_bfp16
)
global_unslice_bfp16
=
paddle
.
add_n
(
unslice_params_bfp16
)
global_unslice_bfp16
=
paddle
.
cast
(
global_unslice_bfp16
,
dtype
=
paddle
.
float32
)
# global norm of non-distributed FP32 params_and_grads
global_norm_fp32
=
(
paddle
.
concat
(
sum_square_fp32
)
if
len
(
sum_square_fp32
)
!=
0
else
paddle
.
to_tensor
([
0.0
],
dtype
=
paddle
.
float32
)
)
global_norm_fp32
=
paddle
.
sum
(
global_norm
_fp32
)
if
len
(
sum_square_fp32
)
==
0
:
global_norm_fp32
=
paddle
.
to_tensor
(
np
.
array
(
0.0
),
dtype
=
paddle
.
float32
)
else
:
global_norm_fp32
=
paddle
.
add_n
(
sum_square
_fp32
)
# global norm of non-distributed FP32 params_and_grads for unslice parameters
global_unslice_fp32
=
(
paddle
.
concat
(
unslice_params_fp32
)
if
len
(
unslice_params_fp32
)
!=
0
else
paddle
.
to_tensor
([
0.0
],
dtype
=
paddle
.
float32
)
)
global_unslice_fp32
=
paddle
.
sum
(
global_unslice_fp32
)
if
len
(
unslice_params_fp32
)
==
0
:
global_unslice_fp32
=
paddle
.
to_tensor
(
np
.
array
(
0.0
),
dtype
=
paddle
.
float32
)
else
:
global_unslice_fp32
=
paddle
.
add_n
(
unslice_params_fp32
)
global_unslice_var
=
(
global_unslice_fp16
+
global_unslice_fp32
+
global_unslice_bfp16
)
...
...
@@ -165,7 +170,7 @@ class GroupShardedClipGrad:
global_norm_var
=
paddle
.
sqrt
(
global_norm_var
+
global_unslice_var
)
max_global_norm
=
paddle
.
full
(
shape
=
[
1
],
dtype
=
global_norm_var
.
dtype
,
fill_value
=
self
.
clip_norm
shape
=
[],
dtype
=
global_norm_var
.
dtype
,
fill_value
=
self
.
clip_norm
)
clip_var
=
paddle
.
divide
(
...
...
python/paddle/distributed/fleet/metrics/metric.py
浏览文件 @
ddf94ae4
...
...
@@ -40,7 +40,7 @@ def sum(input, scope=None, util=None):
# in model.py
input = paddle.cast(some_input, dtype='float32')
cnt = paddle.sum(input)
global_cnt = paddle.static.create_global_var(persistable=True, dtype='float32', shape=[
1
], value=0)
global_cnt = paddle.static.create_global_var(persistable=True, dtype='float32', shape=[], value=0)
tmp = paddle.add(cnt, global_cnt)
paddle.assign(tmp, global_cnt)
...
...
@@ -80,7 +80,7 @@ def max(input, scope=None, util=None):
# in model.py
input = paddle.cast(some_input, dtype='float32')
cnt = paddle.sum(input)
global_cnt = paddle.static.create_global_var(persistable=True, dtype='float32', shape=[
1
], value=0)
global_cnt = paddle.static.create_global_var(persistable=True, dtype='float32', shape=[], value=0)
tmp = paddle.maximum(cnt, global_cnt)
paddle.assign(tmp, global_cnt)
...
...
@@ -120,7 +120,7 @@ def min(input, scope=None, util=None):
# in model.py
input = paddle.cast(some_input, dtype='float32')
cnt = paddle.sum(input)
global_cnt = paddle.static.create_global_var(persistable=True, dtype='float32', shape=[
1
], value=0)
global_cnt = paddle.static.create_global_var(persistable=True, dtype='float32', shape=[], value=0)
tmp = paddle.minimum(cnt, global_cnt)
paddle.assign(tmp, global_cnt)
...
...
python/paddle/distributed/passes/auto_parallel_amp.py
浏览文件 @
ddf94ae4
...
...
@@ -955,7 +955,7 @@ class AMPPass(PassBase):
loss_op
.
_set_attr
(
OP_ROLE_KEY
,
OpRole
.
Forward
)
naive_set_dist_op_attr_for_program_by_mesh_and_mapping
(
cast_op
,
ref_mesh
,
[
-
1
],
self
.
dist_context
cast_op
,
ref_mesh
,
[
-
1
for
i
in
loss
.
shape
],
self
.
dist_context
)
# backward
...
...
@@ -970,12 +970,20 @@ class AMPPass(PassBase):
dtype
=
core
.
VarDesc
.
VarType
.
FP32
,
persistable
=
loss
.
persistable
,
)
set_var_dist_attr
(
self
.
dist_context
,
cast_loss_grad
,
[
-
1
],
ref_mesh
)
set_var_dist_attr
(
self
.
dist_context
,
cast_loss_grad
,
[
-
1
for
i
in
loss
.
shape
],
ref_mesh
,
)
pre_grad_name
=
first_backward_op
.
output_arg_names
[
0
]
first_backward_op
.
_rename_output
(
pre_grad_name
,
cast_loss_grad
.
name
)
naive_set_dist_op_attr_for_program_by_mesh_and_mapping
(
first_backward_op
,
ref_mesh
,
[
-
1
],
self
.
dist_context
first_backward_op
,
ref_mesh
,
[
-
1
for
i
in
loss
.
shape
],
self
.
dist_context
,
)
cast_grad_op
=
main_block
.
_insert_op
(
loss_op_idx
+
3
,
...
...
@@ -989,7 +997,10 @@ class AMPPass(PassBase):
},
)
naive_set_dist_op_attr_for_program_by_mesh_and_mapping
(
cast_grad_op
,
ref_mesh
,
[
-
1
],
self
.
dist_context
cast_grad_op
,
ref_mesh
,
[
-
1
for
i
in
loss
.
shape
],
self
.
dist_context
,
)
loss_op
=
cast_op
loss
=
cast_loss
...
...
@@ -1021,7 +1032,12 @@ class AMPPass(PassBase):
dtype
=
loss
.
dtype
,
persistable
=
loss
.
persistable
,
)
set_var_dist_attr
(
self
.
dist_context
,
scaled_loss
,
[
-
1
],
ref_mesh
)
set_var_dist_attr
(
self
.
dist_context
,
scaled_loss
,
[
-
1
for
i
in
loss
.
shape
],
ref_mesh
,
)
elementwise_mul_op
=
main_block
.
_insert_op
(
loss_op_idx
+
1
,
...
...
@@ -1034,7 +1050,10 @@ class AMPPass(PassBase):
)
loss_op
.
_set_attr
(
OP_ROLE_KEY
,
OpRole
.
Forward
)
naive_set_dist_op_attr_for_program_by_mesh_and_mapping
(
elementwise_mul_op
,
ref_mesh
,
[
-
1
],
self
.
dist_context
elementwise_mul_op
,
ref_mesh
,
[
-
1
for
i
in
loss
.
shape
],
self
.
dist_context
,
)
# backward
...
...
@@ -1050,14 +1069,20 @@ class AMPPass(PassBase):
persistable
=
loss
.
persistable
,
)
set_var_dist_attr
(
self
.
dist_context
,
scaled_loss_grad
,
[
-
1
],
ref_mesh
self
.
dist_context
,
scaled_loss_grad
,
[
-
1
for
i
in
loss
.
shape
],
ref_mesh
,
)
pre_grad_name
=
first_backward_op
.
output_arg_names
[
0
]
first_backward_op
.
_rename_output
(
pre_grad_name
,
scaled_loss_grad
.
name
)
naive_set_dist_op_attr_for_program_by_mesh_and_mapping
(
first_backward_op
,
ref_mesh
,
[
-
1
],
self
.
dist_context
first_backward_op
,
ref_mesh
,
[
-
1
for
i
in
loss
.
shape
],
self
.
dist_context
,
)
scaled_loss_grad
.
op
=
first_backward_op
# FIXME(JZ-LIANG) a trick to insert backward op
...
...
@@ -1085,7 +1110,10 @@ class AMPPass(PassBase):
elementwise_mul_grad_op
=
main_block
.
ops
[
loss_op_idx
+
3
]
assert
elementwise_mul_grad_op
.
type
==
"elementwise_mul_grad"
naive_set_dist_op_attr_for_program_by_mesh_and_mapping
(
elementwise_mul_grad_op
,
ref_mesh
,
[
-
1
],
self
.
dist_context
elementwise_mul_grad_op
,
ref_mesh
,
[
-
1
for
i
in
loss
.
shape
],
self
.
dist_context
,
)
else
:
scaled_loss
=
loss
...
...
python/paddle/distributed/passes/auto_parallel_fp16.py
浏览文件 @
ddf94ae4
...
...
@@ -678,7 +678,12 @@ def _insert_memcopy(block, idx, src_var, dist_context, direction="D2H"):
stop_gradient
=
src_var
.
stop_gradient
,
)
set_var_dist_attr
(
dist_context
,
output_var
,
[
-
1
],
world_process_group
.
ranks
)
set_var_dist_attr
(
dist_context
,
output_var
,
[
-
1
for
i
in
src_var
.
shape
],
world_process_group
.
ranks
,
)
# TODO to support CUDAPinned/NPU/XPU Places
if
direction
==
"D2H"
:
...
...
@@ -894,7 +899,7 @@ class FP16Pass(AMPPass):
set_var_dist_attr
(
self
.
dist_context
,
found_inf
,
[
-
1
],
[
-
1
for
i
in
found_inf
.
shape
],
world_process_group
.
ranks
,
)
_set_op_dist_attr_with_ranks
(
...
...
python/paddle/distributed/passes/auto_parallel_grad_clip.py
浏览文件 @
ddf94ae4
...
...
@@ -221,7 +221,7 @@ class ClipHelper:
in_var
=
self
.
block
.
vars
[
in_name
]
in_dist_attr
=
TensorDistAttr
()
in_dist_attr
.
process_mesh
=
ProcessMesh
(
self
.
world_ranks
)
in_dist_attr
.
dims_mapping
=
[
-
1
]
in_dist_attr
.
dims_mapping
=
[
-
1
for
i
in
in_var
.
shape
]
self
.
dist_context
.
set_tensor_dist_attr_for_program
(
in_var
,
in_dist_attr
)
...
...
@@ -230,7 +230,7 @@ class ClipHelper:
out_var
=
self
.
block
.
vars
[
out_name
]
out_dist_attr
=
TensorDistAttr
()
out_dist_attr
.
process_mesh
=
ProcessMesh
(
self
.
world_ranks
)
out_dist_attr
.
dims_mapping
=
[
-
1
]
out_dist_attr
.
dims_mapping
=
[
-
1
for
i
in
out_var
.
shape
]
self
.
dist_context
.
set_tensor_dist_attr_for_program
(
out_var
,
out_dist_attr
)
...
...
python/paddle/distributed/passes/auto_parallel_quantization.py
浏览文件 @
ddf94ae4
...
...
@@ -300,7 +300,7 @@ class QuantizationPass(PassBase):
for
slot_name
in
quant_op
.
desc
.
input_names
():
in_name
=
quant_op
.
desc
.
input
(
slot_name
)[
0
]
input_var
=
block
.
_var_recursive
(
in_name
)
ref_dims_mapping
=
[
-
1
]
ref_dims_mapping
=
[
-
1
for
i
in
input_var
.
shape
]
if
slot_name
==
"X"
:
continue
elif
slot_name
in
[
'Scale'
,
'ZeroPoint'
]:
...
...
@@ -333,7 +333,7 @@ class QuantizationPass(PassBase):
for
slot_name
in
quant_op
.
desc
.
output_names
():
output_name
=
quant_op
.
desc
.
output
(
slot_name
)[
0
]
output_var
=
block
.
_var_recursive
(
output_name
)
ref_dims_mapping
=
[
-
1
]
ref_dims_mapping
=
[
-
1
for
i
in
output_var
.
shape
]
if
slot_name
==
"Y"
:
dist_context
.
set_tensor_dist_attr_for_program
(
output_var
,
consume_input_dist_attr
...
...
python/paddle/fluid/tests/unittests/check_nan_inf_base.py
浏览文件 @
ddf94ae4
...
...
@@ -95,11 +95,7 @@ def check(use_cuda):
fetch_list
=
[
y_predict
.
name
,
avg_cost
.
name
,
acc_top1
.
name
],
)
step
+=
1
print
(
'iter={:.0f},cost={},acc1={}'
.
format
(
step
,
outs
[
1
][
0
],
outs
[
2
]
)
)
print
(
f
'iter=
{
step
:.
0
f
}
,cost=
{
outs
[
1
]
}
,acc1=
{
outs
[
2
]
}
'
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/seresnext_test_base.py
浏览文件 @
ddf94ae4
...
...
@@ -49,17 +49,19 @@ class TestResnetBase(TestParallelExecutorBase):
)
if
compare_separately
:
for
loss
in
zip
(
func_1_first_loss
,
func_2_first_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-5
)
for
loss
in
zip
(
func_1_last_loss
,
func_2_last_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
delta2
)
self
.
assertAlmostEqual
(
func_1_first_loss
,
func_2_first_loss
,
delta
=
1e-5
)
self
.
assertAlmostEqual
(
func_1_last_loss
,
func_2_last_loss
,
delta
=
delta2
)
else
:
np
.
testing
.
assert_allclose
(
func_1_loss_area
,
func_2_loss_area
,
rtol
=
delta2
)
self
.
assertAlmostEqual
(
np
.
mean
(
func_1_first_loss
),
func_2_first_loss
[
0
]
,
delta
=
1e-5
func_1_first_loss
,
func_2_first_loss
,
delta
=
1e-5
)
self
.
assertAlmostEqual
(
np
.
mean
(
func_1_last_loss
),
func_2_last_loss
[
0
]
,
delta
=
delta2
func_1_last_loss
,
func_2_last_loss
,
delta
=
delta2
)
python/paddle/fluid/tests/unittests/test_argsort_op.py
浏览文件 @
ddf94ae4
...
...
@@ -24,6 +24,7 @@ from paddle.fluid.executor import Executor
from
paddle.fluid.framework
import
Program
,
grad_var_name
np
.
random
.
seed
(
123
)
paddle
.
enable_static
()
class
PyArgsort
:
...
...
@@ -52,7 +53,7 @@ class PyArgsort:
out
=
(
np
.
array
(
self
.
indices
,
dtype
=
self
.
indices
.
dtype
),
np
.
array
(
self
.
sorted_x
,
dtype
=
self
.
sorted_x
.
dtype
),
np
.
array
(
[
self
.
loss
]
,
dtype
=
self
.
loss
.
dtype
),
np
.
array
(
self
.
loss
,
dtype
=
self
.
loss
.
dtype
),
)
return
out
...
...
@@ -178,7 +179,7 @@ class TestArgsortOpCPU(unittest.TestCase):
f
[...]
=
o
dout_dfeed
=
(
y_pos
-
y_neg
)
/
(
delta
*
2
)
g
[...]
=
dout_dfeed
[
0
]
g
[...]
=
dout_dfeed
return
grad_list
...
...
python/paddle/fluid/tests/unittests/test_cond.py
浏览文件 @
ddf94ae4
...
...
@@ -674,7 +674,7 @@ class TestCondBackward(unittest.TestCase):
},
fetch_list
=
[
loss
.
name
],
)
numerical_grad
[
0
][
j
]
=
(
loss_delta
[
0
]
-
loss_value
[
0
]
)
/
delta
numerical_grad
[
0
][
j
]
=
(
loss_delta
-
loss_value
)
/
delta
feed_img_delta
[
0
][
j
]
=
feed_img
[
0
][
j
]
np
.
testing
.
assert_allclose
(
img_grad
,
numerical_grad
,
rtol
=
0.05
,
atol
=
0.05
...
...
python/paddle/fluid/tests/unittests/test_cosine_embedding_loss.py
浏览文件 @
ddf94ae4
...
...
@@ -64,7 +64,7 @@ class TestFunctionCosineEmbeddingLoss(unittest.TestCase):
reduction
=
'mean'
,
)
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected1
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
])
self
.
assert
Equal
(
dy_result
.
shape
,
[
])
dy_result
=
paddle
.
nn
.
functional
.
cosine_embedding_loss
(
input1
,
input2
,
label
,
margin
=
0.5
,
reduction
=
'sum'
...
...
@@ -78,7 +78,7 @@ class TestFunctionCosineEmbeddingLoss(unittest.TestCase):
)
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected2
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
])
self
.
assert
Equal
(
dy_result
.
shape
,
[
])
dy_result
=
paddle
.
nn
.
functional
.
cosine_embedding_loss
(
input1
,
input2
,
label
,
margin
=
0.5
,
reduction
=
'none'
...
...
@@ -92,7 +92,7 @@ class TestFunctionCosineEmbeddingLoss(unittest.TestCase):
)
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected3
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
5
])
self
.
assert
Equal
(
dy_result
.
shape
,
[
5
])
def
run_static
(
self
,
use_gpu
=
False
):
input1
=
static
.
data
(
name
=
'input1'
,
shape
=
[
5
,
3
],
dtype
=
'float64'
)
...
...
@@ -257,7 +257,7 @@ class TestClassCosineEmbeddingLoss(unittest.TestCase):
reduction
=
'mean'
,
)
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected1
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
])
self
.
assert
Equal
(
dy_result
.
shape
,
[
])
input1_1D
=
paddle
.
to_tensor
(
self
.
input1_np_1D
)
input2_1D
=
paddle
.
to_tensor
(
self
.
input2_np_1D
)
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
浏览文件 @
ddf94ae4
...
...
@@ -42,7 +42,7 @@ class PyRNNBase:
def
forward
(
self
):
for
step_id
in
range
(
self
.
x
.
shape
[
0
]):
self
.
step
(
step_id
,
self
.
x
[
step_id
])
return
np
.
array
([
np
.
mean
(
self
.
y
)]
)
return
np
.
mean
(
self
.
y
)
def
segment_inputs
(
self
):
return
[
self
.
x
[
i
]
for
i
in
range
(
self
.
x
.
shape
[
0
])]
...
...
@@ -251,7 +251,7 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase):
f
[...]
=
o
dout_dfeed
=
(
y_pos
-
y_neg
)
/
(
delta
*
2
)
g
[...]
=
dout_dfeed
[
0
]
g
[...]
=
dout_dfeed
return
grad_list
...
...
python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
浏览文件 @
ddf94ae4
...
...
@@ -69,9 +69,10 @@ class TestFetchLoDTensorArray(unittest.TestCase):
loss_v
,
array_v
=
exe
.
run
(
binary
,
feed
=
feed_dict
,
fetch_list
=
[
loss
,
array
]
)
self
.
assertEqual
(
np
.
array
(
loss_v
).
shape
,
(
1
,))
self
.
assertEqual
(
np
.
array
(
array_v
[
0
]).
shape
,
(
batch_size
,
784
))
self
.
assertEqual
(
np
.
array
(
array_v
[
1
]).
shape
,
(
batch_size
,
1
))
self
.
assertEqual
(
loss_v
.
shape
,
())
self
.
assertEqual
(
array_v
[
0
].
shape
,
(
batch_size
,
784
))
self
.
assertEqual
(
array_v
[
1
].
shape
,
(
batch_size
,
1
))
self
.
assertEqual
(
array_v
[
2
].
shape
,
())
np
.
testing
.
assert_allclose
(
loss_v
,
array_v
[
2
],
rtol
=
1e-05
)
def
test_fetch_lod_tensor_array
(
self
):
...
...
@@ -81,4 +82,5 @@ class TestFetchLoDTensorArray(unittest.TestCase):
if
__name__
==
'__main__'
:
paddle
.
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py
浏览文件 @
ddf94ae4
...
...
@@ -78,10 +78,12 @@ class TestFuseAllReduceOpsBase(TestParallelExecutorBase):
optimizer
=
optimizer
,
)
for
loss
in
zip
(
not_fuse_op_first_loss
,
fuse_op_first_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
for
loss
in
zip
(
not_fuse_op_last_loss
,
fuse_op_last_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
self
.
assertAlmostEqual
(
not_fuse_op_first_loss
,
fuse_op_first_loss
,
delta
=
1e-6
)
self
.
assertAlmostEqual
(
not_fuse_op_last_loss
,
fuse_op_last_loss
,
delta
=
1e-6
)
def
optimizer
(
self
,
learning_rate
=
1e-3
):
optimizer
=
fluid
.
optimizer
.
SGD
(
...
...
python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
浏览文件 @
ddf94ae4
...
...
@@ -98,7 +98,7 @@ class TestFuseBatchNormActPass(unittest.TestCase):
loss_v
=
exe
.
run
(
binary
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
]
)
loss_vals
.
append
(
loss_v
[
0
]
[
0
]
)
loss_vals
.
append
(
loss_v
[
0
])
# open fused_bn_act_ops
build_strategy_fused
=
fluid
.
BuildStrategy
()
...
...
@@ -118,7 +118,7 @@ class TestFuseBatchNormActPass(unittest.TestCase):
loss_v
=
exe
.
run
(
binary_fused
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
]
)
loss_vals_fused
.
append
(
loss_v
[
0
]
[
0
]
)
loss_vals_fused
.
append
(
loss_v
[
0
])
# check loss
for
i
in
range
(
iters
):
...
...
python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
浏览文件 @
ddf94ae4
...
...
@@ -216,7 +216,7 @@ class TestFusedBnAddActAPI(unittest.TestCase):
loss_v
=
exe
.
run
(
binary_fused
,
feed
=
{
"x"
:
x
,
"y"
:
y
},
fetch_list
=
[
loss
]
)
loss_vals_fused
.
append
(
loss_v
[
0
]
[
0
]
)
loss_vals_fused
.
append
(
loss_v
[
0
])
# build_origin_program: turn off fused_bn_act_ops
build_strategy
=
fluid
.
BuildStrategy
()
...
...
@@ -234,7 +234,7 @@ class TestFusedBnAddActAPI(unittest.TestCase):
feed
=
{
"x"
:
x_data
[
i
],
"y"
:
y_data
[
i
]},
fetch_list
=
[
loss
],
)
loss_vals
.
append
(
loss_v
[
0
]
[
0
]
)
loss_vals
.
append
(
loss_v
[
0
])
# check loss
for
i
in
range
(
iters
):
...
...
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
浏览文件 @
ddf94ae4
...
...
@@ -74,10 +74,12 @@ class TestMNIST(TestParallelExecutorBase):
optimizer
=
_optimizer
,
)
for
loss
in
zip
(
not_fuse_op_first_loss
,
fuse_op_first_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
for
loss
in
zip
(
not_fuse_op_last_loss
,
fuse_op_last_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
self
.
assertAlmostEqual
(
not_fuse_op_first_loss
,
fuse_op_first_loss
,
delta
=
1e-6
)
self
.
assertAlmostEqual
(
not_fuse_op_last_loss
,
fuse_op_last_loss
,
delta
=
1e-6
)
def
test_simple_fc_with_fuse_op
(
self
):
self
.
_compare_fuse_elewise_add_act_ops
(
simple_fc_net
,
DeviceType
.
CUDA
)
...
...
python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
浏览文件 @
ddf94ae4
...
...
@@ -70,10 +70,12 @@ class TestFuseOptimizationOps(TestParallelExecutorBase):
optimizer
=
optimizer
,
)
for
loss
in
zip
(
not_fuse_op_first_loss
,
fuse_op_first_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
for
loss
in
zip
(
not_fuse_op_last_loss
,
fuse_op_last_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
self
.
assertAlmostEqual
(
not_fuse_op_first_loss
,
fuse_op_first_loss
,
delta
=
1e-6
)
self
.
assertAlmostEqual
(
not_fuse_op_last_loss
,
fuse_op_last_loss
,
delta
=
1e-6
)
def
_decorate_compare_fused_optimizer_ops
(
self
,
model
,
use_device
,
optimizer
...
...
python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
浏览文件 @
ddf94ae4
...
...
@@ -118,10 +118,12 @@ class TestMNIST(TestParallelExecutorBase):
optimizer
=
_optimizer
,
)
for
loss
in
zip
(
not_fuse_op_first_loss
,
fuse_op_first_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
for
loss
in
zip
(
not_fuse_op_last_loss
,
fuse_op_last_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
self
.
assertAlmostEqual
(
not_fuse_op_first_loss
,
fuse_op_first_loss
,
delta
=
1e-6
)
self
.
assertAlmostEqual
(
not_fuse_op_last_loss
,
fuse_op_last_loss
,
delta
=
1e-6
)
def
test_simple_depthwise_with_fuse_op
(
self
):
self
.
_compare
(
simple_depthwise_net
,
DeviceType
.
CUDA
)
...
...
python/paddle/fluid/tests/unittests/test_gradient_clip.py
浏览文件 @
ddf94ae4
...
...
@@ -152,7 +152,7 @@ class TestGradientClip(unittest.TestCase):
data
=
next
(
self
.
train_data
())
val
=
exe
.
run
(
prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
cost
])[
0
]
self
.
assertEqual
(
(
1
,),
val
.
shape
)
self
.
assertEqual
(
val
.
shape
,
()
)
self
.
assertFalse
(
np
.
isnan
(
val
))
def
backward_and_optimize
(
self
,
cost
):
...
...
python/paddle/fluid/tests/unittests/test_hinge_embedding_loss.py
浏览文件 @
ddf94ae4
...
...
@@ -50,7 +50,7 @@ class TestFunctionalHingeEmbeddingLoss(unittest.TestCase):
dy_result
=
paddle
.
nn
.
functional
.
hinge_embedding_loss
(
input
,
label
)
expected
=
calc_hinge_embedding_loss
(
self
.
input_np
,
self
.
label_np
)
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
])
self
.
assert
Equal
(
dy_result
.
shape
,
[
])
dy_result
=
paddle
.
nn
.
functional
.
hinge_embedding_loss
(
input
,
label
,
reduction
=
'sum'
...
...
@@ -59,7 +59,7 @@ class TestFunctionalHingeEmbeddingLoss(unittest.TestCase):
self
.
input_np
,
self
.
label_np
,
reduction
=
'sum'
)
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
])
self
.
assert
Equal
(
dy_result
.
shape
,
[
])
dy_result
=
paddle
.
nn
.
functional
.
hinge_embedding_loss
(
input
,
label
,
reduction
=
'none'
...
...
@@ -68,7 +68,7 @@ class TestFunctionalHingeEmbeddingLoss(unittest.TestCase):
self
.
input_np
,
self
.
label_np
,
reduction
=
'none'
)
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
self
.
shape
)
self
.
assert
Equal
(
dy_result
.
shape
,
list
(
self
.
shape
)
)
def
run_static_check
(
self
,
place
=
paddle
.
CPUPlace
):
paddle
.
enable_static
()
...
...
@@ -129,7 +129,7 @@ class TestClassHingeEmbeddingLoss(unittest.TestCase):
dy_result
=
hinge_embedding_loss
(
input
,
label
)
expected
=
calc_hinge_embedding_loss
(
self
.
input_np
,
self
.
label_np
)
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
])
self
.
assert
Equal
(
dy_result
.
shape
,
[
])
hinge_embedding_loss
=
paddle
.
nn
.
loss
.
HingeEmbeddingLoss
(
reduction
=
'sum'
...
...
@@ -139,7 +139,7 @@ class TestClassHingeEmbeddingLoss(unittest.TestCase):
self
.
input_np
,
self
.
label_np
,
reduction
=
'sum'
)
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
])
self
.
assert
Equal
(
dy_result
.
shape
,
[
])
hinge_embedding_loss
=
paddle
.
nn
.
loss
.
HingeEmbeddingLoss
(
reduction
=
'none'
...
...
@@ -149,7 +149,7 @@ class TestClassHingeEmbeddingLoss(unittest.TestCase):
self
.
input_np
,
self
.
label_np
,
reduction
=
'none'
)
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected
,
rtol
=
1e-05
)
self
.
assertTrue
(
dy_result
.
shape
,
self
.
shape
)
self
.
assertTrue
(
dy_result
.
shape
,
list
(
self
.
shape
)
)
def
run_static_check
(
self
,
place
=
paddle
.
CPUPlace
):
paddle
.
enable_static
()
...
...
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
浏览文件 @
ddf94ae4
...
...
@@ -80,10 +80,9 @@ class TestMNIST(TestParallelExecutorBase):
use_device
=
use_device
,
use_ir_memory_optimize
=
True
,
)
for
loss
in
zip
(
first_loss0
,
first_loss1
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
for
loss
in
zip
(
last_loss0
,
last_loss1
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
self
.
assertAlmostEqual
(
first_loss0
,
first_loss1
,
delta
=
1e-6
)
self
.
assertAlmostEqual
(
last_loss0
,
last_loss1
,
delta
=
1e-6
)
def
test_simple_fc_net
(
self
):
self
.
_compare_ir_memory_optimize
(
simple_fc_net
,
DeviceType
.
CPU
)
...
...
python/paddle/fluid/tests/unittests/test_l1_loss.py
浏览文件 @
ddf94ae4
...
...
@@ -36,7 +36,7 @@ class TestFunctionalL1Loss(unittest.TestCase):
dy_result
=
paddle
.
nn
.
functional
.
l1_loss
(
input
,
label
,
reduction
=
'sum'
)
expected
=
np
.
sum
(
np
.
abs
(
self
.
input_np
-
self
.
label_np
))
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected
,
rtol
=
1e-05
)
self
.
assertEqual
(
dy_result
.
shape
,
[
1
])
self
.
assertEqual
(
dy_result
.
shape
,
[])
dy_result
=
paddle
.
nn
.
functional
.
l1_loss
(
input
,
label
,
reduction
=
'none'
)
expected
=
np
.
abs
(
self
.
input_np
-
self
.
label_np
)
...
...
@@ -125,7 +125,7 @@ class TestClassL1Loss(unittest.TestCase):
dy_result
=
l1_loss
(
input
,
label
)
expected
=
np
.
sum
(
np
.
abs
(
self
.
input_np
-
self
.
label_np
))
np
.
testing
.
assert_allclose
(
dy_result
.
numpy
(),
expected
,
rtol
=
1e-05
)
self
.
assertEqual
(
dy_result
.
shape
,
[
1
])
self
.
assertEqual
(
dy_result
.
shape
,
[])
l1_loss
=
paddle
.
nn
.
loss
.
L1Loss
(
reduction
=
'none'
)
dy_result
=
l1_loss
(
input
,
label
)
...
...
python/paddle/fluid/tests/unittests/test_mse_loss.py
浏览文件 @
ddf94ae4
...
...
@@ -118,7 +118,7 @@ class TestNNMseLoss(unittest.TestCase):
np
.
testing
.
assert_allclose
(
static_result
,
expected
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
static_result
,
dy_result
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
dy_result
,
expected
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
]
)
self
.
assert
Equal
(
dy_result
.
shape
,
()
)
def
test_NNMseLoss_sum
(
self
):
for
dim
in
[[
10
,
10
],
[
2
,
10
,
10
],
[
3
,
3
,
10
,
10
]]:
...
...
@@ -164,7 +164,7 @@ class TestNNMseLoss(unittest.TestCase):
np
.
testing
.
assert_allclose
(
static_result
,
expected
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
static_result
,
dy_result
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
dy_result
,
expected
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
]
)
self
.
assert
Equal
(
dy_result
.
shape
,
()
)
def
test_NNMseLoss_none
(
self
):
for
dim
in
[[
10
,
10
],
[
2
,
10
,
10
],
[
3
,
3
,
10
,
10
]]:
...
...
@@ -210,7 +210,7 @@ class TestNNMseLoss(unittest.TestCase):
np
.
testing
.
assert_allclose
(
static_result
,
expected
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
static_result
,
dy_result
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
dy_result
,
expected
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
]
)
self
.
assert
Equal
(
dy_result
.
shape
,
tuple
(
dim
)
)
class
TestNNFunctionalMseLoss
(
unittest
.
TestCase
):
...
...
@@ -254,7 +254,7 @@ class TestNNFunctionalMseLoss(unittest.TestCase):
np
.
testing
.
assert_allclose
(
static_result
,
expected
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
static_result
,
dy_result
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
dy_result
,
expected
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
]
)
self
.
assert
Equal
(
dy_result
.
shape
,
()
)
def
test_NNFunctionalMseLoss_sum
(
self
):
for
dim
in
[[
10
,
10
],
[
2
,
10
,
10
],
[
3
,
3
,
10
,
10
]]:
...
...
@@ -296,7 +296,7 @@ class TestNNFunctionalMseLoss(unittest.TestCase):
np
.
testing
.
assert_allclose
(
static_result
,
expected
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
static_result
,
dy_result
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
dy_result
,
expected
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
]
)
self
.
assert
Equal
(
dy_result
.
shape
,
()
)
def
test_NNFunctionalMseLoss_none
(
self
):
for
dim
in
[[
10
,
10
],
[
2
,
10
,
10
],
[
3
,
3
,
10
,
10
]]:
...
...
@@ -338,7 +338,7 @@ class TestNNFunctionalMseLoss(unittest.TestCase):
np
.
testing
.
assert_allclose
(
static_result
,
expected
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
static_result
,
dy_result
,
rtol
=
1e-05
)
np
.
testing
.
assert_allclose
(
dy_result
,
expected
,
rtol
=
1e-05
)
self
.
assert
True
(
dy_result
.
shape
,
[
1
]
)
self
.
assert
Equal
(
dy_result
.
shape
,
tuple
(
dim
)
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_nan_inf.py
浏览文件 @
ddf94ae4
...
...
@@ -42,10 +42,6 @@ class TestNanInf(unittest.TestCase):
out
,
err
=
proc
.
communicate
()
returncode
=
proc
.
returncode
print
(
out
)
print
(
err
)
# in python3, type(out+err) is 'bytes', need use encode
assert
(
out
+
err
).
find
(
b
'There are NAN or INF'
)
!=
-
1
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py
浏览文件 @
ddf94ae4
...
...
@@ -110,7 +110,7 @@ def train(dot_save_dir, prefix, seed=1234):
loss_values
=
[]
for
step
in
range
(
iters
):
loss_v
=
exe
.
run
(
compiled_program
,
feed
=
feed
[
step
],
fetch_list
=
[
loss
])
loss_values
.
append
(
loss_v
[
0
]
[
0
]
)
loss_values
.
append
(
loss_v
[
0
])
return
loss_values
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py
浏览文件 @
ddf94ae4
...
...
@@ -48,10 +48,14 @@ class TestResnetWithReduceBase(TestParallelExecutorBase):
optimizer
=
seresnext_net
.
optimizer
,
)
for
loss
in
zip
(
all_reduce_first_loss
,
reduce_first_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-5
)
for
loss
in
zip
(
all_reduce_last_loss
,
reduce_last_loss
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
loss
[
0
]
*
delta2
)
self
.
assertAlmostEqual
(
all_reduce_first_loss
,
reduce_first_loss
,
delta
=
1e-5
)
self
.
assertAlmostEqual
(
all_reduce_last_loss
,
reduce_last_loss
,
delta
=
all_reduce_last_loss
*
delta2
,
)
if
not
use_device
:
return
...
...
@@ -86,20 +90,32 @@ class TestResnetWithReduceBase(TestParallelExecutorBase):
enable_sequential_execution
=
True
,
)
for
loss
in
zip
(
all_reduce_first_loss
,
all_reduce_first_loss_seq
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-5
)
for
loss
in
zip
(
all_reduce_last_loss
,
all_reduce_last_loss_seq
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
loss
[
0
]
*
delta2
)
self
.
assertAlmostEqual
(
all_reduce_first_loss
,
all_reduce_first_loss_seq
,
delta
=
1e-5
)
self
.
assertAlmostEqual
(
all_reduce_last_loss
,
all_reduce_last_loss_seq
,
delta
=
all_reduce_last_loss
*
delta2
,
)
for
loss
in
zip
(
reduce_first_loss
,
reduce_first_loss_seq
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-5
)
for
loss
in
zip
(
reduce_last_loss
,
reduce_last_loss_seq
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
loss
[
0
]
*
delta2
)
self
.
assertAlmostEqual
(
reduce_first_loss
,
reduce_first_loss_seq
,
delta
=
1e-5
)
self
.
assertAlmostEqual
(
reduce_last_loss
,
reduce_last_loss_seq
,
delta
=
reduce_last_loss
*
delta2
,
)
for
loss
in
zip
(
all_reduce_first_loss_seq
,
reduce_first_loss_seq
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
1e-5
)
for
loss
in
zip
(
all_reduce_last_loss_seq
,
reduce_last_loss_seq
):
self
.
assertAlmostEqual
(
loss
[
0
],
loss
[
1
],
delta
=
loss
[
0
]
*
delta2
)
self
.
assertAlmostEqual
(
all_reduce_first_loss_seq
,
reduce_first_loss_seq
,
delta
=
1e-5
)
self
.
assertAlmostEqual
(
all_reduce_last_loss_seq
,
reduce_last_loss_seq
,
delta
=
all_reduce_last_loss_seq
*
delta2
,
)
class
TestResnetWithReduceCPU
(
TestResnetWithReduceBase
):
...
...
python/paddle/fluid/tests/unittests/test_recurrent_op.py
浏览文件 @
ddf94ae4
...
...
@@ -37,7 +37,7 @@ class PyRNNBase:
def
forward
(
self
):
for
step_id
in
range
(
self
.
x
.
shape
[
0
]):
self
.
step
(
step_id
,
self
.
x
[
step_id
])
return
np
.
array
([
np
.
mean
(
self
.
y
)]
)
return
np
.
mean
(
self
.
y
)
def
segment_inputs
(
self
):
return
[
self
.
x
[
i
]
for
i
in
range
(
self
.
x
.
shape
[
0
])]
...
...
@@ -239,7 +239,7 @@ class RecurrentOpTest1(unittest.TestCase):
f
[...]
=
o
dout_dfeed
=
(
y_pos
-
y_neg
)
/
(
delta
*
2
)
g
[...]
=
dout_dfeed
[
0
]
g
[...]
=
dout_dfeed
return
grad_list
...
...
python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
浏览文件 @
ddf94ae4
...
...
@@ -103,7 +103,7 @@ class TestResnet50Accuracy(unittest.TestCase):
fetch_list
=
[
loss
],
return_numpy
=
True
,
)
loss_vals
.
append
(
loss_v
[
0
]
[
0
]
)
loss_vals
.
append
(
loss_v
[
0
])
return
loss_vals
def
test_check_resnet50_accuracy
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_run_program_op.py
浏览文件 @
ddf94ae4
...
...
@@ -514,7 +514,7 @@ class TestParametersWithStopGradient(unittest.TestCase):
dy_loss
=
self
.
train
(
to_static
=
False
)
st_loss
=
self
.
train
(
to_static
=
True
)
self
.
assertEqual
(
dy_loss
[
0
],
st_loss
[
0
]
)
self
.
assertEqual
(
dy_loss
,
st_loss
)
paddle
.
enable_static
()
...
...
python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
浏览文件 @
ddf94ae4
...
...
@@ -220,14 +220,6 @@ class TestReduceAPI(unittest.TestCase):
self
.
assertEqual
(
x
.
grad
.
shape
,
[])
np
.
testing
.
assert_allclose
(
x
.
grad
.
numpy
(),
np
.
array
(
3.0
))
if
api
in
[
paddle
.
sum
,
paddle
.
mean
,
paddle
.
nanmean
,
paddle
.
nansum
,
]:
return
# 2) x is ND, reduce to 0D
if
api
in
[
paddle
.
all
,
paddle
.
any
]:
x
=
paddle
.
randint
(
0
,
2
,
[
3
,
5
]).
astype
(
'bool'
)
...
...
@@ -302,20 +294,11 @@ class TestReduceAPI(unittest.TestCase):
np
.
testing
.
assert_allclose
(
res
[
2
],
np
.
array
(
1.0
))
np
.
testing
.
assert_allclose
(
res
[
3
],
np
.
array
(
1.0
))
if
api
in
[
paddle
.
sum
,
paddle
.
mean
,
paddle
.
nanmean
,
paddle
.
nansum
,
]:
return
# 2) x is ND, reduce to 0D
if
api
in
[
paddle
.
all
,
paddle
.
any
]:
x
=
paddle
.
randint
(
0
,
2
,
[
3
,
5
]).
astype
(
'bool'
)
else
:
x
=
paddle
.
rand
([
3
,
5
])
x
=
paddle
.
rand
([
3
,
5
])
x
.
stop_gradient
=
False
out
=
api
(
x
,
None
)
paddle
.
static
.
append_backward
(
out
)
...
...
@@ -1365,6 +1348,7 @@ class TestSundryAPI(unittest.TestCase):
self
.
assertEqual
(
out
.
shape
,
[])
def
test_std
(
self
):
# 1) x is 0D
x
=
paddle
.
rand
([])
x
.
stop_gradient
=
False
out1
=
paddle
.
std
(
x
)
...
...
@@ -1372,18 +1356,24 @@ class TestSundryAPI(unittest.TestCase):
out1
.
backward
()
out2
.
backward
()
# checkout shape of out
self
.
assertEqual
(
out1
.
shape
,
[])
self
.
assertEqual
(
out2
.
shape
,
[])
# checkout value of out
self
.
assertEqual
(
out1
,
0
)
self
.
assertEqual
(
out2
,
0
)
# checkout backward
self
.
assertEqual
(
x
.
grad
.
shape
,
[])
# 2) x is ND
x
=
paddle
.
rand
([
3
,
5
])
x
.
stop_gradient
=
False
out
=
paddle
.
std
(
x
)
out
.
backward
()
self
.
assertEqual
(
out
.
shape
,
[])
self
.
assertEqual
(
x
.
grad
.
shape
,
[
3
,
5
])
def
test_var
(
self
):
# 1) x is 0D
x
=
paddle
.
rand
([])
x
.
stop_gradient
=
False
out1
=
paddle
.
var
(
x
)
...
...
@@ -1391,18 +1381,23 @@ class TestSundryAPI(unittest.TestCase):
out1
.
backward
()
out2
.
backward
()
# checkout shape of out
self
.
assertEqual
(
out1
.
shape
,
[])
self
.
assertEqual
(
out2
.
shape
,
[])
# checkout value of out
self
.
assertEqual
(
out1
,
0
)
self
.
assertEqual
(
out2
,
0
)
# checkout backward
self
.
assertEqual
(
x
.
grad
.
shape
,
[])
np
.
testing
.
assert_allclose
(
x
.
grad
,
0
)
# 2) x is ND
x
=
paddle
.
rand
([
3
,
5
])
x
.
stop_gradient
=
False
out
=
paddle
.
std
(
x
)
out
.
backward
()
self
.
assertEqual
(
out
.
shape
,
[])
self
.
assertEqual
(
x
.
grad
.
shape
,
[
3
,
5
])
def
test_quantile
(
self
):
# 1) x is 0D
x
=
paddle
.
rand
([])
...
...
@@ -1598,7 +1593,6 @@ class TestSundryAPI(unittest.TestCase):
out
=
paddle
.
clip
(
x
,
-
5
,
5
)
out
.
retain_grads
()
out
.
backward
()
self
.
assertEqual
(
out
.
shape
,
[])
self
.
assertEqual
(
out
.
grad
.
shape
,
[])
self
.
assertEqual
(
x
.
grad
.
shape
,
[])
...
...
@@ -1608,7 +1602,6 @@ class TestSundryAPI(unittest.TestCase):
out1
=
paddle
.
clip
(
x1
,
paddle
.
full
([],
5.0
),
paddle
.
full
([],
5.0
))
out1
.
retain_grads
()
out1
.
backward
()
self
.
assertEqual
(
out1
.
shape
,
[])
self
.
assertEqual
(
out1
.
grad
.
shape
,
[])
self
.
assertEqual
(
x1
.
grad
.
shape
,
[])
...
...
@@ -5643,8 +5636,7 @@ class TestDistribution(unittest.TestCase):
self
.
assertEqual
(
d
.
log_prob
(
paddle
.
full
([],
2
,
dtype
=
'int64'
)).
shape
,
[]
)
# because use paddle.sum
# self.assertEqual(d.entropy().shape, [])
self
.
assertEqual
(
d
.
entropy
().
shape
,
[])
def
test_Normal
(
self
):
normal
=
paddle
.
distribution
.
Normal
(
0.0
,
3.0
)
...
...
@@ -5687,10 +5679,9 @@ class TestDistribution(unittest.TestCase):
self
.
assertEqual
(
beta
.
sample
([]).
shape
,
[])
self
.
assertEqual
(
beta
.
mean
.
shape
,
[])
self
.
assertEqual
(
beta
.
variance
.
shape
,
[])
# because use paddle.sum
# self.assertEqual(beta.prob(self.x).shape, [])
# self.assertEqual(beta.log_prob(self.x).shape, [])
# self.assertEqual(beta.entropy().shape, [])
self
.
assertEqual
(
beta
.
prob
(
self
.
x
).
shape
,
[])
self
.
assertEqual
(
beta
.
log_prob
(
self
.
x
).
shape
,
[])
self
.
assertEqual
(
beta
.
entropy
().
shape
,
[])
def
test_kl_divergence
(
self
):
p
=
paddle
.
distribution
.
Beta
(
alpha
=
0.5
,
beta
=
0.5
)
...
...
@@ -5749,10 +5740,9 @@ class TestDistribution(unittest.TestCase):
d
=
paddle
.
distribution
.
Multinomial
(
10
,
paddle
.
to_tensor
([
0.2
,
0.3
,
0.5
])
)
# because use paddle.sum
# self.assertEqual(d.prob(self.x).shape, [])
# self.assertEqual(d.log_prob(self.x).shape, [])
# self.assertEqual(d.entropy().shape, [])
self
.
assertEqual
(
d
.
prob
(
self
.
x
).
shape
,
[])
self
.
assertEqual
(
d
.
log_prob
(
self
.
x
).
shape
,
[])
self
.
assertEqual
(
d
.
entropy
().
shape
,
[])
class
TestLossAPI
(
unittest
.
TestCase
):
...
...
@@ -5770,10 +5760,10 @@ class TestLossAPI(unittest.TestCase):
fg_num_1
=
paddle
.
full
([
1
],
2.0
)
out0
=
F
.
sigmoid_focal_loss
(
logit
,
label
,
normalizer
=
fg_num_0
,
reduction
=
'
mean
'
logit
,
label
,
normalizer
=
fg_num_0
,
reduction
=
'
sum
'
)
out1
=
F
.
sigmoid_focal_loss
(
logit
,
label
,
normalizer
=
fg_num_1
,
reduction
=
'
mean
'
logit
,
label
,
normalizer
=
fg_num_1
,
reduction
=
'
sum
'
)
out0
.
retain_grads
()
...
...
@@ -5788,6 +5778,28 @@ class TestLossAPI(unittest.TestCase):
self
.
assertEqual
(
out0
.
grad
.
shape
,
[])
self
.
assertEqual
(
logit
.
grad
.
shape
,
[
2
,
3
])
def
test_cross_entropy
(
self
):
input
=
paddle
.
rand
([
3
,
5
])
input
.
stop_gradient
=
False
label
=
paddle
.
randint
(
0
,
5
,
shape
=
[
3
])
loss
=
paddle
.
nn
.
functional
.
cross_entropy
(
input
,
label
,
reduction
=
'sum'
)
loss
.
backward
()
self
.
assertEqual
(
loss
.
shape
,
[])
self
.
assertEqual
(
input
.
grad
.
shape
,
[
3
,
5
])
def
test_l1_loss
(
self
):
input
=
paddle
.
rand
([
3
,
5
])
input
.
stop_gradient
=
False
label
=
paddle
.
rand
([
3
,
5
])
loss
=
paddle
.
nn
.
functional
.
l1_loss
(
input
,
label
,
reduction
=
'mean'
)
loss
.
backward
()
self
.
assertEqual
(
loss
.
shape
,
[])
self
.
assertEqual
(
input
.
grad
.
shape
,
[
3
,
5
])
class
TestLossAPIStatic
(
unittest
.
TestCase
):
def
setUp
(
self
):
...
...
@@ -5818,12 +5830,42 @@ class TestLossAPIStatic(unittest.TestCase):
prog
,
fetch_list
=
[
out0
,
out1
,
out0
.
grad_name
,
logit
.
grad_name
]
)
np
.
testing
.
assert_allclose
(
res
[
0
],
res
[
1
])
# because static use paddle.mean
# self.assertEqual(res[0].shape, ())
# self.assertEqual(res[1].shape, ())
# self.assertEqual(res[2].shape, ())
self
.
assertEqual
(
res
[
0
].
shape
,
())
self
.
assertEqual
(
res
[
1
].
shape
,
())
self
.
assertEqual
(
res
[
2
].
shape
,
())
self
.
assertEqual
(
res
[
3
].
shape
,
(
2
,
3
))
@
prog_scope
()
def
test_cross_entropy
(
self
):
input
=
paddle
.
rand
([
3
,
5
])
input
.
stop_gradient
=
False
label
=
paddle
.
randint
(
0
,
5
,
shape
=
[
3
])
label
.
stop_gradient
=
False
loss
=
paddle
.
nn
.
functional
.
cross_entropy
(
input
,
label
,
reduction
=
'mean'
)
paddle
.
static
.
append_backward
(
loss
)
prog
=
paddle
.
static
.
default_main_program
()
res
=
self
.
exe
.
run
(
prog
,
fetch_list
=
[
loss
,
input
.
grad_name
])
self
.
assertEqual
(
res
[
0
].
shape
,
())
self
.
assertEqual
(
res
[
1
].
shape
,
(
3
,
5
))
@
prog_scope
()
def
test_l1_loss
(
self
):
input
=
paddle
.
rand
([
3
,
5
])
input
.
stop_gradient
=
False
label
=
paddle
.
rand
([
3
,
5
])
loss
=
paddle
.
nn
.
functional
.
l1_loss
(
input
,
label
,
reduction
=
'sum'
)
paddle
.
static
.
append_backward
(
loss
)
prog
=
paddle
.
static
.
default_main_program
()
res
=
self
.
exe
.
run
(
prog
,
fetch_list
=
[
loss
,
input
.
grad_name
])
self
.
assertEqual
(
res
[
0
].
shape
,
())
self
.
assertEqual
(
res
[
1
].
shape
,
(
3
,
5
))
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/incubate/autograd/composite_rules.py
浏览文件 @
ddf94ae4
...
...
@@ -252,7 +252,7 @@ def mean_composite(x, axis, keepdim):
operator
.
mul
,
[
x
.
shape
[
axis
]
for
axis
in
axes
]
)
norm
=
fill_constant
(
shape
=
x
.
shape
if
len
(
x
.
shape
)
==
0
else
[
1
],
shape
=
[
],
value
=
value_to_fill
,
dtype
=
sum_x
.
dtype
,
)
...
...
python/paddle/incubate/distributed/models/moe/grad_clip.py
浏览文件 @
ddf94ae4
...
...
@@ -142,22 +142,18 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase):
global_norm_var
=
[]
if
len
(
sum_square_list_fp16
)
>
0
:
global_norm_var_fp16
=
paddle
.
concat
(
sum_square_list_fp16
)
global_norm_var_fp16
=
paddle
.
sum
(
global_norm_var_fp16
)
global_norm_var_fp16
=
paddle
.
add_n
(
sum_square_list_fp16
)
global_norm_var
.
append
(
global_norm_var_fp16
.
astype
(
sum_dtype
))
if
len
(
sum_square_list_fp32
)
>
0
:
global_norm_var_fp32
=
paddle
.
concat
(
sum_square_list_fp32
)
global_norm_var_fp32
=
paddle
.
sum
(
global_norm_var_fp32
)
global_norm_var_fp32
=
paddle
.
add_n
(
sum_square_list_fp32
)
if
sum_dtype
==
'float32'
:
global_norm_var
.
append
(
global_norm_var_fp32
)
else
:
global_norm_var
.
append
(
global_norm_var_fp32
.
astype
(
sum_dtype
))
if
len
(
sum_square_list
)
>
0
:
global_norm_var_fp64
=
paddle
.
concat
(
sum_square_list
)
global_norm_var_fp64
=
paddle
.
sum
(
global_norm_var_fp64
)
global_norm_var_fp64
=
paddle
.
add_n
(
sum_square_list
)
global_norm_var
.
append
(
global_norm_var_fp64
)
global_norm_var
=
paddle
.
concat
(
global_norm_var
)
global_norm_var
=
paddle
.
sum
(
global_norm_var
)
global_norm_var
=
paddle
.
add_n
(
global_norm_var
)
return
global_norm_var
,
sum_dtype
@
no_grad
()
...
...
test/auto_parallel/test_while_op_partition.py
浏览文件 @
ddf94ae4
...
...
@@ -206,7 +206,7 @@ def get_program():
auto
.
shard_tensor
(
error_cost
,
_g_process_mesh
,
[
None
,
None
,
None
])
loss
=
paddle
.
mean
(
error_cost
)
auto
.
shard_tensor
(
loss
,
_g_process_mesh
,
[
None
])
auto
.
shard_tensor
(
loss
,
_g_process_mesh
,
[])
return
train_program
,
start_program
,
dataloader
,
i
,
loss
...
...
test/autograd/test_autograd_functional_static.py
浏览文件 @
ddf94ae4
...
...
@@ -41,14 +41,14 @@ paddle.enable_static()
'v_not_none'
,
utils
.
reduce
,
np
.
random
.
rand
(
2
,
3
),
np
.
random
.
rand
(
1
),
np
.
array
(
np
.
random
.
rand
()
),
False
,
),
(
'xs_stop_gradient'
,
utils
.
reduce
,
np
.
random
.
rand
(
2
,
3
),
np
.
random
.
rand
(
1
),
np
.
array
(
np
.
random
.
rand
()
),
True
,
),
(
...
...
test/contrib/test_multi_precision_fp16_train.py
浏览文件 @
ddf94ae4
...
...
@@ -178,7 +178,7 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""):
(
loss
,)
=
exe
.
run
(
train_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
sum_cost
]
)
loss_v
=
loss
[
0
]
if
isinstance
(
loss
,
np
.
ndarray
)
else
loss
loss_v
=
float
(
loss
)
if
isinstance
(
loss
,
np
.
ndarray
)
else
loss
print
(
'PassID {:1}, Train Batch ID {:04}, train loss {:2.4}'
.
format
(
pass_id
,
batch_id
+
1
,
float
(
loss_v
)
...
...
test/distribution/test_distribution_transform.py
浏览文件 @
ddf94ae4
...
...
@@ -1205,7 +1205,7 @@ class TestStickBreakingTransform(unittest.TestCase):
@
param
.
param_func
(((
np
.
random
.
random
(
10
),),))
def
test_forward_log_det_jacobian
(
self
,
x
):
self
.
assertEqual
(
self
.
_t
.
forward_log_det_jacobian
(
paddle
.
to_tensor
(
x
)).
shape
,
[
1
]
self
.
_t
.
forward_log_det_jacobian
(
paddle
.
to_tensor
(
x
)).
shape
,
[]
)
...
...
test/legacy_test/test_async_read_write.py
浏览文件 @
ddf94ae4
...
...
@@ -65,7 +65,7 @@ class TestAsyncRead(unittest.TestCase):
)
# index data
index_array1
=
paddle
.
gather
(
self
.
src
,
self
.
index
)
count_numel
=
paddle
.
sum
(
count
).
numpy
()[
0
]
count_numel
=
paddle
.
sum
(
count
).
item
()
index_array2
=
self
.
dst
[
count_numel
:
count_numel
+
len
(
self
.
index
)]
np
.
testing
.
assert_allclose
(
index_array1
.
numpy
(),
index_array2
.
numpy
(),
rtol
=
1e-05
...
...
test/prim/prim/vjp/eager/test_comp_eager_sum_grad.py
浏览文件 @
ddf94ae4
...
...
@@ -41,7 +41,7 @@ def desired(primal, cotangent, axis, keep_dim):
class
TestSumGradComp
(
unittest
.
TestCase
):
def
test_sum_grad_comp_1
(
self
):
self
.
primal
=
np
.
random
.
rand
(
10
,
10
)
self
.
cotangent
=
np
.
random
.
rand
(
1
)
self
.
cotangent
=
np
.
array
(
np
.
random
.
rand
()
)
paddle
.
disable_static
()
np
.
testing
.
assert_allclose
(
...
...
test/standalone_executor/test_standalone_cuda_graph_multi_stream.py
浏览文件 @
ddf94ae4
...
...
@@ -126,7 +126,7 @@ class TestCustomStream(unittest.TestCase):
for
out
in
outs
:
for
baseline
,
result
in
zip
(
outs
[
0
],
out
):
self
.
assertEqual
(
baseline
[
0
],
result
[
0
]
)
self
.
assertEqual
(
baseline
,
result
)
if
__name__
==
"__main__"
:
...
...
test/xpu/test_zero_dim_tensor_xpu.py
浏览文件 @
ddf94ae4
...
...
@@ -1269,10 +1269,11 @@ class TestSundryAPI(unittest.TestCase):
out0
.
numpy
(),
out1
.
numpy
(),
)
self
.
assertEqual
(
out0
.
shape
,
[])
out0
.
retain_grads
()
out0
.
backward
()
self
.
assertEqual
(
out0
.
grad
.
shape
,
[
1
])
self
.
assertEqual
(
out0
.
grad
.
shape
,
[])
self
.
assertEqual
(
logit
.
grad
.
shape
,
[
2
,
3
])
def
test_allclose
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录