Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
3631f064
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3631f064
编写于
5月 24, 2023
作者:
L
liangjianzhong
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'zyc/develop' into semi-auto/rule-base
上级
4cd1a2cb
c3ea2a6b
变更
62
显示空白变更内容
内联
并排
Showing
62 changed file
with
972 addition
and
210 deletion
+972
-210
.gitmodules
.gitmodules
+4
-4
cmake/external/protobuf.cmake
cmake/external/protobuf.cmake
+2
-0
cmake/external/xpu.cmake
cmake/external/xpu.cmake
+1
-1
paddle/fluid/distributed/auto_parallel/CMakeLists.txt
paddle/fluid/distributed/auto_parallel/CMakeLists.txt
+0
-3
paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc
.../distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc
+43
-15
paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h
...d/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h
+20
-4
paddle/fluid/operators/cinn/cinn_launch_op.cc
paddle/fluid/operators/cinn/cinn_launch_op.cc
+6
-0
paddle/fluid/operators/cinn/cinn_launch_op.h
paddle/fluid/operators/cinn/cinn_launch_op.h
+4
-0
paddle/fluid/operators/collective/alltoall_op.cu.cc
paddle/fluid/operators/collective/alltoall_op.cu.cc
+1
-1
paddle/fluid/operators/collective/c_allgather_op.cu.cc
paddle/fluid/operators/collective/c_allgather_op.cu.cc
+1
-1
paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc
paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc
+1
-1
paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc
paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc
+1
-1
paddle/fluid/operators/collective/c_broadcast_op.cu.cc
paddle/fluid/operators/collective/c_broadcast_op.cu.cc
+1
-1
paddle/fluid/operators/collective/c_concat_op.cu.cc
paddle/fluid/operators/collective/c_concat_op.cu.cc
+1
-1
paddle/fluid/operators/collective/c_embedding_op.cu
paddle/fluid/operators/collective/c_embedding_op.cu
+2
-2
paddle/fluid/operators/collective/c_identity_op.cu.cc
paddle/fluid/operators/collective/c_identity_op.cu.cc
+1
-1
paddle/fluid/operators/collective/c_reducescatter_op.cu.cc
paddle/fluid/operators/collective/c_reducescatter_op.cu.cc
+1
-1
paddle/fluid/operators/collective/c_split_op.cu
paddle/fluid/operators/collective/c_split_op.cu
+1
-1
paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc
paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc
+1
-1
paddle/fluid/operators/collective/partial_allgather_op.cu.cc
paddle/fluid/operators/collective/partial_allgather_op.cu.cc
+1
-1
paddle/fluid/operators/collective/partial_recv_op.cu.cc
paddle/fluid/operators/collective/partial_recv_op.cu.cc
+1
-1
paddle/fluid/operators/collective/partial_send_op.cu.cc
paddle/fluid/operators/collective/partial_send_op.cu.cc
+1
-1
paddle/fluid/operators/collective/recv_v2_op.cu.cc
paddle/fluid/operators/collective/recv_v2_op.cu.cc
+1
-1
paddle/fluid/operators/collective/send_v2_op.cu.cc
paddle/fluid/operators/collective/send_v2_op.cu.cc
+1
-1
paddle/fluid/platform/device/gpu/nccl_helper.h
paddle/fluid/platform/device/gpu/nccl_helper.h
+2
-2
paddle/fluid/pybind/auto_parallel_py.cc
paddle/fluid/pybind/auto_parallel_py.cc
+21
-0
paddle/phi/api/yaml/generator/api_base.py
paddle/phi/api/yaml/generator/api_base.py
+3
-3
paddle/phi/backends/xpu/xpu2_op_list.cc
paddle/phi/backends/xpu/xpu2_op_list.cc
+2
-0
paddle/phi/core/distributed/auto_parallel/CMakeLists.txt
paddle/phi/core/distributed/auto_parallel/CMakeLists.txt
+2
-1
paddle/phi/core/utils/data_type.h
paddle/phi/core/utils/data_type.h
+1
-1
paddle/phi/kernels/gpu/activation_kernel.cu
paddle/phi/kernels/gpu/activation_kernel.cu
+1
-0
paddle/phi/kernels/gpu/selu_grad_kernel.cu
paddle/phi/kernels/gpu/selu_grad_kernel.cu
+1
-0
paddle/phi/kernels/xpu/nll_loss_grad_kernel.cc
paddle/phi/kernels/xpu/nll_loss_grad_kernel.cc
+95
-0
paddle/phi/kernels/xpu/nll_loss_kernel.cc
paddle/phi/kernels/xpu/nll_loss_kernel.cc
+93
-0
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+58
-2
python/paddle/distributed/auto_parallel/dist_attribute.py
python/paddle/distributed/auto_parallel/dist_attribute.py
+1
-0
python/paddle/distributed/auto_parallel/operators/dist_matmul.py
...paddle/distributed/auto_parallel/operators/dist_matmul.py
+12
-0
python/paddle/distributed/auto_parallel/utils.py
python/paddle/distributed/auto_parallel/utils.py
+62
-48
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+9
-1
python/paddle/fluid/tests/unittests/test_assign_value_op.py
python/paddle/fluid/tests/unittests/test_assign_value_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_bitwise_op.py
python/paddle/fluid/tests/unittests/test_bitwise_op.py
+4
-4
python/paddle/fluid/tests/unittests/test_compare_op.py
python/paddle/fluid/tests/unittests/test_compare_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_expand_v2_op.py
python/paddle/fluid/tests/unittests/test_expand_v2_op.py
+10
-10
python/paddle/fluid/tests/unittests/test_lookup_table_op.py
python/paddle/fluid/tests/unittests/test_lookup_table_op.py
+14
-14
python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
...n/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
+6
-6
python/paddle/fluid/tests/unittests/test_matmul_op.py
python/paddle/fluid/tests/unittests/test_matmul_op.py
+14
-4
python/paddle/fluid/tests/unittests/test_matmul_v2_op.py
python/paddle/fluid/tests/unittests/test_matmul_v2_op.py
+50
-7
python/paddle/fluid/tests/unittests/test_norm_op.py
python/paddle/fluid/tests/unittests/test_norm_op.py
+20
-8
python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
+6
-4
python/paddle/fluid/tests/unittests/test_selu_op.py
python/paddle/fluid/tests/unittests/test_selu_op.py
+33
-6
python/paddle/fluid/tests/unittests/test_shape_op.py
python/paddle/fluid/tests/unittests/test_shape_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_sum_op.py
python/paddle/fluid/tests/unittests/test_sum_op.py
+4
-4
test/ir/inference/inference_pass_test.py
test/ir/inference/inference_pass_test.py
+6
-1
test/ir/inference/test_trt_activation_pass.py
test/ir/inference/test_trt_activation_pass.py
+3
-2
test/ir/inference/test_trt_elementwise_op.py
test/ir/inference/test_trt_elementwise_op.py
+3
-2
test/ir/inference/test_trt_instance_norm_op.py
test/ir/inference/test_trt_instance_norm_op.py
+3
-2
test/ir/inference/test_trt_pool3d_op.py
test/ir/inference/test_trt_pool3d_op.py
+9
-6
test/ir/inference/test_trt_pool_op.py
test/ir/inference/test_trt_pool_op.py
+3
-2
test/ir/inference/test_trt_skip_layernorm_fuse_pass.py
test/ir/inference/test_trt_skip_layernorm_fuse_pass.py
+12
-8
test/ir/inference/test_trt_subgraph_pass.py
test/ir/inference/test_trt_subgraph_pass.py
+18
-12
test/xpu/test_nll_loss_op_xpu.py
test/xpu/test_nll_loss_op_xpu.py
+288
-0
tools/check_file_diff_approvals.sh
tools/check_file_diff_approvals.sh
+1
-1
未找到文件。
.gitmodules
浏览文件 @
3631f064
...
...
@@ -30,10 +30,6 @@
path = third_party/xxhash
url = https://github.com/Cyan4973/xxHash.git
ignore = dirty
[submodule "third_party/eigen3"]
path = third_party/eigen3
url = https://gitlab.com/libeigen/eigen.git
ignore = dirty
[submodule "third_party/leveldb"]
path = third_party/leveldb
url = https://github.com/google/leveldb
...
...
@@ -50,3 +46,7 @@
path = third_party/glog
url = https://github.com/google/glog.git
ignore = dirty
[submodule "third_party/eigen3"]
path = third_party/eigen3
url = https://gitlab.com/libeigen/eigen.git
ignore = dirty
cmake/external/protobuf.cmake
浏览文件 @
3631f064
...
...
@@ -296,6 +296,8 @@ function(build_protobuf TARGET_NAME BUILD_FOR_HOST)
PREFIX
${
PROTOBUF_PREFIX_DIR
}
SOURCE_DIR
${
SOURCE_DIR
}
UPDATE_COMMAND
""
PATCH_COMMAND
COMMAND cd
${
SOURCE_DIR
}
&& git checkout
${
PROTOBUF_TAG
}
DEPENDS zlib
CONFIGURE_COMMAND
${
CMAKE_COMMAND
}
${
SOURCE_DIR
}
/cmake
${
OPTIONAL_ARGS
}
...
...
cmake/external/xpu.cmake
浏览文件 @
3631f064
...
...
@@ -8,7 +8,7 @@ set(XPU_API_LIB_NAME "libxpuapi.so")
set
(
XPU_RT_LIB_NAME
"libxpurt.so"
)
set
(
XPU_XFT_LIB_NAME
"libxft.so"
)
set
(
XPU_BASE_DATE
"202305
19
"
)
set
(
XPU_BASE_DATE
"202305
23
"
)
set
(
XPU_XCCL_BASE_VERSION
"1.0.49.2"
)
set
(
XPU_XFT_BASE_VERSION
"latest"
)
...
...
paddle/fluid/distributed/auto_parallel/CMakeLists.txt
浏览文件 @
3631f064
...
...
@@ -6,6 +6,3 @@ cc_library(
add_subdirectory
(
test
)
add_subdirectory
(
spmd_rules
)
cc_library
(
auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper
dist_tensor_spec
)
paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc
浏览文件 @
3631f064
...
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h"
#include "paddle/
fluid/distributed/auto_parallel/process_mesh
.h"
#include "paddle/
phi/core/distributed/auto_parallel/utils
.h"
namespace
paddle
{
namespace
distributed
{
...
...
@@ -27,28 +27,41 @@ DistTensorSpec::DistTensorSpec(const std::vector<int64_t>& shape,
dist_attr_
.
copy_from
(
dist_attr
);
}
DistTensorSpec
::
DistTensorSpec
(
const
DistTensorSpec
&
spec
)
{
std
::
vector
<
int64_t
>
spec_shape
=
spec
.
get_shape
();
shape_
.
assign
(
spec_shape
.
begin
(),
spec_shape
.
end
());
dist_attr_
.
copy_from
(
spec
.
get_dist_attr
());
}
DistTensorSpec
::~
DistTensorSpec
()
{}
DistTensorSpec
::
DistTensorSpec
(
const
Tensor
&
tensor
)
{
shape_
=
tensor
.
shape
();
std
::
vector
<
int64_t
>
pm_shape
,
pm_ids
;
pm_shape
=
{
4
};
pm_ids
=
{
0
,
1
,
2
,
3
};
std
::
vector
<
std
::
string
>
dim_name
=
{
"mp"
};
//
std::vector<int64_t> pm_shape, pm_ids;
//
pm_shape = {4};
//
pm_ids = {0, 1, 2, 3};
//
std::vector<std::string> dim_name = {"mp"};
ProcessMesh
pm
(
pm_shape
,
pm_ids
,
dim_name
);
std
::
vector
<
int64_t
>
dims_mapping
=
{
-
1
,
0
};
TensorDistAttr
dist_attr
;
dist_attr
.
set_process_mesh
(
pm
);
dist_attr
.
set_dims_mapping
(
dims_mapping
);
//
ProcessMesh pm(pm_shape, pm_ids, dim_name);
//
std::vector<int64_t> dims_mapping = {-1, 0};
//
TensorDistAttr dist_attr;
//
dist_attr.set_process_mesh(pm);
//
dist_attr.set_dims_mapping(dims_mapping);
dist_attr_
.
copy_from
(
dist_attr
);
//
dist_attr_.copy_from(dist_attr);
std
::
cout
<<
dist_attr_
;
//
std::cout << dist_attr_;
}
const
std
::
vector
<
int64_t
>&
DistTensorSpec
::
get_dims_mapping
()
{
DistTensorSpec
&
DistTensorSpec
::
operator
=
(
const
DistTensorSpec
&
spec
)
{
std
::
vector
<
int64_t
>
spec_shape
=
spec
.
get_shape
();
shape_
=
spec_shape
;
dist_attr_
.
copy_from
(
spec
.
get_dist_attr
());
return
*
this
;
}
const
std
::
vector
<
int64_t
>&
DistTensorSpec
::
get_dims_mapping
()
const
{
return
dist_attr_
.
dims_mapping
();
}
...
...
@@ -57,7 +70,7 @@ void DistTensorSpec::set_dims_mapping(
dist_attr_
.
set_dims_mapping
(
dims_mapping
);
}
const
ProcessMesh
&
DistTensorSpec
::
get_process_mesh
()
{
const
ProcessMesh
&
DistTensorSpec
::
get_process_mesh
()
const
{
return
dist_attr_
.
process_mesh
();
}
...
...
@@ -65,7 +78,22 @@ void DistTensorSpec::set_process_mesh(const ProcessMesh& process_mesh) {
dist_attr_
.
set_process_mesh
(
process_mesh
);
}
const
std
::
vector
<
int64_t
>&
DistTensorSpec
::
get_shape
()
{
return
shape_
;
}
const
std
::
vector
<
int64_t
>&
DistTensorSpec
::
get_shape
()
const
{
return
shape_
;
}
const
TensorDistAttr
&
DistTensorSpec
::
get_dist_attr
()
const
{
return
dist_attr_
;
}
void
DistTensorSpec
::
set_dist_attr
(
const
TensorDistAttr
&
dist_attr
)
{
dist_attr_
=
dist_attr
;
}
std
::
string
DistTensorSpec
::
to_string
()
const
{
using
phi
::
distributed
::
auto_parallel
::
str_join
;
std
::
string
spec_str
=
"{tensor_shape:["
+
str_join
(
shape_
)
+
"], "
;
spec_str
+=
"dist_attr:"
+
dist_attr_
.
to_string
()
+
"}"
;
return
spec_str
;
}
}
// namespace auto_parallel
}
// namespace distributed
...
...
paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h
浏览文件 @
3631f064
...
...
@@ -14,39 +14,55 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/distributed/auto_parallel/dist_attr.h"
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
namespace
paddle
{
namespace
distributed
{
namespace
auto_parallel
{
using
phi
::
distributed
::
auto_parallel
::
ProcessMesh
;
using
phi
::
distributed
::
auto_parallel
::
TensorDistAttr
;
/**
* A unified data class for inferring distributed attributes
* in both dygraph mode and static mode
*/
class
DistTensorSpec
{
public:
DistTensorSpec
()
=
default
;
DistTensorSpec
(
const
std
::
vector
<
int64_t
>&
shape
,
const
TensorDistAttr
&
dist_attr
);
DistTensorSpec
(
const
DistTensorSpec
&
spec
);
// temp function, only for test in dygraph mode
explicit
DistTensorSpec
(
const
Tensor
&
tensor
);
~
DistTensorSpec
();
DistTensorSpec
&
operator
=
(
const
DistTensorSpec
&
spec
);
// get dims_mapping from dist_attr_
const
std
::
vector
<
int64_t
>&
get_dims_mapping
();
const
std
::
vector
<
int64_t
>&
get_dims_mapping
()
const
;
// set dims_mapping in dist_attr_
void
set_dims_mapping
(
const
std
::
vector
<
int64_t
>&
dims_mapping
);
// get process_mesh from dist_attr_
const
ProcessMesh
&
get_process_mesh
();
const
ProcessMesh
&
get_process_mesh
()
const
;
// set process_mesh in dist_attr_
void
set_process_mesh
(
const
ProcessMesh
&
process_mesh
);
const
std
::
vector
<
int64_t
>&
get_shape
();
const
TensorDistAttr
&
get_dist_attr
()
const
;
void
set_dist_attr
(
const
TensorDistAttr
&
dist_attr
);
const
std
::
vector
<
int64_t
>&
get_shape
()
const
;
std
::
string
to_string
()
const
;
private:
std
::
vector
<
int64_t
>
shape_
;
...
...
paddle/fluid/operators/cinn/cinn_launch_op.cc
浏览文件 @
3631f064
...
...
@@ -17,6 +17,7 @@
#include <functional>
#include <vector>
#include "cinn/common/target.h"
#include "cinn/hlir/framework/graph_compiler.h"
#include "cinn/runtime/cinn_runtime.h"
#include "cinn/runtime/flags.h"
...
...
@@ -94,6 +95,11 @@ void SetCinnRandomSeed<phi::CPUContext>() {
::
cinn
::
runtime
::
RandomSeed
::
GetOrSet
(
seed
);
}
void
SetCinnTarget
(
const
::
cinn
::
common
::
Target
&
target
)
{
VLOG
(
4
)
<<
"Set CINN compile target to "
<<
target
;
::
cinn
::
runtime
::
CurrentTarget
::
SetCurrentTarget
(
target
);
}
}
// namespace details
class
CinnLaunchOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/cinn/cinn_launch_op.h
浏览文件 @
3631f064
...
...
@@ -58,6 +58,9 @@ void SetCinnRuntimeFlags();
template
<
typename
DeviceContext
>
void
SetCinnRandomSeed
();
// set CINN compile target
void
SetCinnTarget
(
const
::
cinn
::
common
::
Target
&
target
);
}
// namespace details
template
<
typename
T
,
typename
DeviceContext
>
...
...
@@ -115,6 +118,7 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
"Step 2. Get compilation result of the graph"
);
// Step 2. Get compilation result of the graph
auto
target
=
details
::
PlaceToCinnTarget
(
place
);
details
::
SetCinnTarget
(
target
);
using
ClockType
=
std
::
chrono
::
steady_clock
;
std
::
chrono
::
time_point
<
ClockType
>
start_t
,
end_t
;
if
(
VLOG_IS_ON
(
1
))
{
...
...
paddle/fluid/operators/collective/alltoall_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -98,7 +98,7 @@ PD_REGISTER_STRUCT_KERNEL(alltoall,
ops
::
AllToAllOpCUDAKernel
,
float
,
double
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
int
,
...
...
paddle/fluid/operators/collective/c_allgather_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -95,7 +95,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather,
ops
::
CAllGatherOpCUDAKernel
,
float
,
double
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
int
,
...
...
paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max,
ALL_LAYOUT
,
ops
::
CAllReduceMaxCUDAKernel
,
float
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
double
,
...
...
paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum,
ALL_LAYOUT
,
ops
::
CAllReduceSumCUDAKernel
,
float
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
double
,
...
...
paddle/fluid/operators/collective/c_broadcast_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -100,7 +100,7 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast,
int64_t
,
float
,
double
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
plat
::
float16
)
{
...
...
paddle/fluid/operators/collective/c_concat_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -137,7 +137,7 @@ PD_REGISTER_STRUCT_KERNEL(c_concat,
double
,
int
,
int64_t
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
plat
::
float16
)
{
...
...
paddle/fluid/operators/collective/c_embedding_op.cu
浏览文件 @
3631f064
...
...
@@ -239,7 +239,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding,
ops
::
CEmbeddingCUDAKernel
,
float
,
double
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
plat
::
float16
)
{
...
...
@@ -251,7 +251,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding_grad,
ops
::
CEmbeddingGradCUDAKernel
,
float
,
double
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
plat
::
float16
)
{
...
...
paddle/fluid/operators/collective/c_identity_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -25,7 +25,7 @@ PD_REGISTER_STRUCT_KERNEL(c_identity,
double
,
int
,
int64_t
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
plat
::
float16
)
{
...
...
paddle/fluid/operators/collective/c_reducescatter_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -87,7 +87,7 @@ PD_REGISTER_STRUCT_KERNEL(c_reducescatter,
ops
::
CReduceScatterOpCUDAKernel
,
float
,
double
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
int
,
...
...
paddle/fluid/operators/collective/c_split_op.cu
浏览文件 @
3631f064
...
...
@@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(c_split,
double
,
int
,
int64_t
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
plat
::
float16
)
{
...
...
paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -31,7 +31,7 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum,
double
,
int
,
int64_t
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
plat
::
float16
)
{
...
...
paddle/fluid/operators/collective/partial_allgather_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -108,7 +108,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_allgather,
ops
::
PartialAllGatherOpCUDAKernel
,
float
,
double
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
int
,
...
...
paddle/fluid/operators/collective/partial_recv_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -124,7 +124,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_recv,
ops
::
PartialRecvOpCUDAKernel
,
float
,
double
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
int
,
...
...
paddle/fluid/operators/collective/partial_send_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_send,
ops
::
PartialSendCUDAKernel
,
float
,
double
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
int
,
...
...
paddle/fluid/operators/collective/recv_v2_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -238,7 +238,7 @@ PD_REGISTER_STRUCT_KERNEL(recv_v2,
ops
::
RecvOpV2CUDAKernel
,
float
,
double
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
int
,
...
...
paddle/fluid/operators/collective/send_v2_op.cu.cc
浏览文件 @
3631f064
...
...
@@ -223,7 +223,7 @@ PD_REGISTER_STRUCT_KERNEL(send_v2,
ops
::
SendOpV2CUDAKernel
,
float
,
double
,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
plat
::
bfloat16
,
#endif
int
,
...
...
paddle/fluid/platform/device/gpu/nccl_helper.h
浏览文件 @
3631f064
...
...
@@ -59,7 +59,7 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) {
return
ncclUint8
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
BOOL
)
{
return
ncclUint8
;
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
}
else
if
(
type
==
framework
::
proto
::
VarType
::
BF16
)
{
return
ncclBfloat16
;
#endif
...
...
@@ -86,7 +86,7 @@ inline ncclDataType_t ToNCCLDataType(phi::DataType type) {
return
ncclInt8
;
}
else
if
(
type
==
phi
::
DataType
::
BOOL
)
{
return
ncclUint8
;
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
}
else
if
(
type
==
phi
::
DataType
::
BFLOAT16
)
{
return
ncclBfloat16
;
#endif
...
...
paddle/fluid/pybind/auto_parallel_py.cc
浏览文件 @
3631f064
...
...
@@ -15,6 +15,7 @@
#include <pybind11/operators.h>
#include <pybind11/stl.h>
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/pybind/auto_parallel_py.h"
...
...
@@ -29,6 +30,7 @@ namespace py = pybind11;
namespace
paddle
{
namespace
pybind
{
using
paddle
::
distributed
::
auto_parallel
::
DistTensorSpec
;
using
paddle
::
distributed
::
auto_parallel
::
OperatorDistAttr
;
using
paddle
::
framework
::
OpDesc
;
using
paddle
::
framework
::
VarDesc
;
...
...
@@ -276,6 +278,25 @@ void BindAutoParallel(py::module *m) {
py
::
arg
(
"memo"
))
.
def
(
"__str__"
,
&
TensorDistAttr
::
to_string
);
py
::
class_
<
DistTensorSpec
>
(
*
m
,
"DistTensorSpec"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<
const
DistTensorSpec
&>
())
.
def
(
py
::
init
<
const
std
::
vector
<
int64_t
>
&
,
const
TensorDistAttr
&>
())
.
def
(
"get_dims_mapping"
,
&
DistTensorSpec
::
get_dims_mapping
)
.
def
(
"set_dims_mapping"
,
&
DistTensorSpec
::
set_dims_mapping
)
.
def
(
"get_process_mesh"
,
&
DistTensorSpec
::
get_process_mesh
)
.
def
(
"set_process_mesh"
,
&
DistTensorSpec
::
set_process_mesh
)
.
def_property_readonly
(
"shape"
,
&
DistTensorSpec
::
get_shape
)
.
def
(
"__str__"
,
&
DistTensorSpec
::
to_string
)
.
def
(
"__copy__"
,
[](
const
DistTensorSpec
&
self
)
{
return
DistTensorSpec
(
self
);
})
.
def
(
"__deepcopy__"
,
[](
const
DistTensorSpec
&
self
,
py
::
dict
)
{
return
DistTensorSpec
(
self
);
},
py
::
arg
(
"memo"
));
py
::
class_
<
OperatorDistAttr
>
(
*
m
,
"OperatorDistAttr"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<
const
OpDesc
&>
())
...
...
paddle/phi/api/yaml/generator/api_base.py
浏览文件 @
3631f064
...
...
@@ -1280,7 +1280,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
def
gen_dist_tensor_code
(
self
):
# define the DistTensorSpec vector for input and output tensors
api_code
=
"
\n
std::vector<paddle::distributed::auto_parallel::DistTensorSpec> input_specs;
\n
"
api_code
=
"
\n
std::vector<paddle::distributed::auto_parallel::DistTensorSpec> input_specs;
\n
"
# get DistTensorSpec for each input tensor
for
tensor_name
in
self
.
inputs
[
'names'
]:
...
...
@@ -1297,8 +1297,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
PADDLE_API
{
self
.
get_return_type
(
inplace_flag
)
}
{
api_func_name
}
(
{
self
.
get_define_args
(
inplace_flag
)
}
) {{
{
self
.
gene_kernel_select
()
}
"""
if
api_func_name
==
'matmul'
:
api_code
+=
self
.
gen_dist_tensor_code
()
#
if api_func_name == 'matmul':
#
api_code += self.gen_dist_tensor_code()
if
len
(
self
.
kernel
[
'func'
])
>
1
:
kernel_dispatch_code
=
''
...
...
paddle/phi/backends/xpu/xpu2_op_list.cc
浏览文件 @
3631f064
...
...
@@ -525,6 +525,8 @@ XPUOpMap& get_kl2_ops() {
phi
::
DataType
::
FLOAT16
,
phi
::
DataType
::
INT64
})},
{
"nearest_interp_v2_grad"
,
XPUKernelSet
({
phi
::
DataType
::
FLOAT32
})},
{
"nll_loss"
,
XPUKernelSet
({
phi
::
DataType
::
FLOAT32
})},
{
"nll_loss_grad"
,
XPUKernelSet
({
phi
::
DataType
::
FLOAT32
})},
{
"not_equal"
,
XPUKernelSet
({
phi
::
DataType
::
INT64
,
phi
::
DataType
::
INT32
,
...
...
paddle/phi/core/distributed/auto_parallel/CMakeLists.txt
浏览文件 @
3631f064
...
...
@@ -20,4 +20,5 @@ cc_library(
SRCS dist_mapper.cc
DEPS device_mesh auto_parallel_proto phi_enforce
)
cc_library
(
auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper
)
cc_library
(
auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper
dist_tensor_spec
)
paddle/phi/core/utils/data_type.h
浏览文件 @
3631f064
...
...
@@ -229,7 +229,7 @@ inline ncclDataType_t ToNCCLDataType(DataType type) {
return
ncclInt8
;
}
else
if
(
type
==
DataType
::
BOOL
)
{
return
ncclUint8
;
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000
&& CUDA_VERSION >= 11000
}
else
if
(
type
==
DataType
::
BFLOAT16
)
{
return
ncclBfloat16
;
#endif
...
...
paddle/phi/kernels/gpu/activation_kernel.cu
浏览文件 @
3631f064
...
...
@@ -274,4 +274,5 @@ PD_REGISTER_KERNEL(selu,
phi
::
SeluKernel
,
float
,
double
,
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
)
{}
paddle/phi/kernels/gpu/selu_grad_kernel.cu
浏览文件 @
3631f064
...
...
@@ -24,4 +24,5 @@ PD_REGISTER_KERNEL(selu_grad,
phi
::
SeluGradKernel
,
float
,
double
,
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
)
{}
paddle/phi/kernels/xpu/nll_loss_grad_kernel.cc
0 → 100644
浏览文件 @
3631f064
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/nll_loss_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
NllLossGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
label
,
const
paddle
::
optional
<
DenseTensor
>&
weight
,
const
DenseTensor
&
total_weight
,
const
DenseTensor
&
d_out
,
int64_t
ignore_index
,
const
std
::
string
&
reduction
,
DenseTensor
*
d_x
)
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
const
auto
&
label_type
=
label
.
dtype
();
bool
label_type_match
=
label_type
==
phi
::
DataType
::
INT32
||
label_type
==
phi
::
DataType
::
INT64
;
PADDLE_ENFORCE_EQ
(
label_type_match
,
true
,
phi
::
errors
::
InvalidArgument
(
"Input(Label) holds the wrong type, it holds %s, but "
"desires to be %s or %s"
,
label_type
,
phi
::
DataType
::
INT32
,
phi
::
DataType
::
INT64
));
auto
d_out_data
=
d_out
.
data
<
XPUType
>
();
auto
d_x_data
=
dev_ctx
.
template
Alloc
<
XPUType
>(
d_x
);
auto
d_x_dims
=
d_x
->
dims
();
std
::
vector
<
int64_t
>
d_x_shape
=
phi
::
vectorize
<
int64_t
>
(
d_x_dims
);
auto
weight_data
=
weight
.
get_ptr
()
?
weight
.
get_ptr
()
->
data
<
float
>
()
:
nullptr
;
int64_t
reduction_id
=
0
;
if
(
reduction
==
"none"
)
{
reduction_id
=
0
;
}
else
if
(
reduction
==
"mean"
)
{
reduction_id
=
1
;
}
else
if
(
reduction
==
"sum"
)
{
reduction_id
=
2
;
}
auto
total_weight_data
=
total_weight
.
data
<
XPUType
>
();
int
r
;
if
(
label_type
==
phi
::
DataType
::
INT32
)
{
const
int
*
label_data
=
label
.
data
<
int
>
();
r
=
xpu
::
nll_loss_grad
(
dev_ctx
.
x_context
(),
d_out_data
,
d_x_data
,
d_x_shape
,
label_data
,
weight_data
,
reduction_id
,
ignore_index
,
total_weight_data
);
}
else
if
(
label_type
==
phi
::
DataType
::
INT64
)
{
const
int64_t
*
label_data
=
label
.
data
<
int64_t
>
();
r
=
xpu
::
nll_loss_grad
(
dev_ctx
.
x_context
(),
d_out_data
,
d_x_data
,
d_x_shape
,
label_data
,
weight_data
,
reduction_id
,
ignore_index
,
total_weight_data
);
}
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"nll_loss_grad"
);
}
}
// namespace phi
// TODO(xiongkun): add the non-raw kernel register here.
PD_REGISTER_KERNEL
(
nll_loss_grad
,
XPU
,
ALL_LAYOUT
,
phi
::
NllLossGradKernel
,
float
)
{}
paddle/phi/kernels/xpu/nll_loss_kernel.cc
0 → 100644
浏览文件 @
3631f064
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/nll_loss_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
NllLossRawKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
label
,
const
paddle
::
optional
<
DenseTensor
>&
weight
,
int64_t
ignore_index
,
const
std
::
string
&
reduction
,
DenseTensor
*
out
,
DenseTensor
*
total_weight
)
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
const
auto
&
label_type
=
label
.
dtype
();
bool
label_type_match
=
label_type
==
phi
::
DataType
::
INT32
||
label_type
==
phi
::
DataType
::
INT64
;
PADDLE_ENFORCE_EQ
(
label_type_match
,
true
,
phi
::
errors
::
InvalidArgument
(
"Input(Label) holds the wrong type, it holds %s, but "
"desires to be %s or %s"
,
label_type
,
phi
::
DataType
::
INT32
,
phi
::
DataType
::
INT64
));
auto
x_data
=
x
.
data
<
XPUType
>
();
auto
out_data
=
dev_ctx
.
template
Alloc
<
XPUType
>(
out
);
auto
weight_data
=
weight
.
get_ptr
()
?
weight
.
get_ptr
()
->
data
<
XPUType
>
()
:
nullptr
;
auto
total_weight_data
=
dev_ctx
.
template
Alloc
<
XPUType
>(
total_weight
);
auto
x_dims
=
x
.
dims
();
std
::
vector
<
int64_t
>
x_shape
=
phi
::
vectorize
<
int64_t
>
(
x_dims
);
int64_t
reduction_id
=
0
;
if
(
reduction
==
"none"
)
{
reduction_id
=
0
;
}
else
if
(
reduction
==
"mean"
)
{
reduction_id
=
1
;
}
else
if
(
reduction
==
"sum"
)
{
reduction_id
=
2
;
}
int
r
;
if
(
label_type
==
phi
::
DataType
::
INT32
)
{
const
int
*
label_data
=
label
.
data
<
int
>
();
r
=
xpu
::
nll_loss
(
dev_ctx
.
x_context
(),
x_data
,
out_data
,
total_weight_data
,
x_shape
,
label_data
,
weight_data
,
reduction_id
,
ignore_index
);
}
else
if
(
label_type
==
phi
::
DataType
::
INT64
)
{
const
int64_t
*
label_data
=
label
.
data
<
int64_t
>
();
r
=
xpu
::
nll_loss
(
dev_ctx
.
x_context
(),
x_data
,
out_data
,
total_weight_data
,
x_shape
,
label_data
,
weight_data
,
reduction_id
,
ignore_index
);
}
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"nll_loss"
);
}
}
// namespace phi
// TODO(xiongkun): add the non-raw kernel register here.
PD_REGISTER_KERNEL
(
nll_loss
,
XPU
,
ALL_LAYOUT
,
phi
::
NllLossRawKernel
,
float
)
{}
paddle/scripts/paddle_build.sh
浏览文件 @
3631f064
...
...
@@ -2235,14 +2235,70 @@ set +x
fi
done
<<<
"
$test_cases
"
;
card_test
"
$single_card_tests
"
1
failed_test_lists
=
''
collect_failed_tests
xputest_error
=
0
retry_unittests_record
=
''
retry_time
=
3
exec_times
=
0
exec_time_array
=(
'first'
'second'
'third'
)
exec_retry_threshold
=
10
is_retry_execuate
=
0
if
[
-n
"
$failed_test_lists
"
]
;
then
xputest_error
=
1
need_retry_ut_str
=
$(
echo
"
$failed_test_lists
"
|
grep
-oEi
"
\-
.+
\(
"
|
sed
's/(//'
|
sed
's/- //'
)
need_retry_ut_arr
=(
${
need_retry_ut_str
}
)
need_retry_ut_count
=
${#
need_retry_ut_arr
[@]
}
retry_unittests
=
$(
echo
"
$failed_test_lists
"
|
grep
-oEi
"
\-
.+
\(
"
|
sed
's/(//'
|
sed
's/- //'
)
if
[
$need_retry_ut_count
-lt
$exec_retry_threshold
]
;
then
while
(
[
$exec_times
-lt
$retry_time
]
)
do
set
+e
retry_unittests_record
=
"
$retry_unittests_record$failed_test_lists
"
failed_test_lists_ult
=
`
echo
"
${
failed_test_lists
}
"
`
set
-e
if
[[
"
${
exec_times
}
"
==
"1"
]]
;
then
if
[[
"
${
failed_test_lists
}
"
==
""
]]
;
then
break
else
retry_unittests
=
$(
echo
"
$failed_test_lists
"
|
grep
-oEi
"
\-
.+
\(
"
|
sed
's/(//'
|
sed
's/- //'
)
fi
fi
echo
"========================================="
echo
"This is the
${
exec_time_array
[
$exec_times
]
}
time to re-run"
echo
"========================================="
echo
"The following unittest will be re-run:"
echo
"
${
retry_unittests
}
"
echo
"========================================="
retry_unittests_regular
=
''
for
line
in
${
retry_unittests
[@]
}
;
do
if
[[
"
$retry_unittests_regular
"
==
""
]]
;
then
retry_unittests_regular
=
"^
$line
$"
else
retry_unittests_regular
=
"
$retry_unittests_regular
|^
$line
$"
fi
done
rm
-f
$tmp_dir
/
*
failed_test_lists
=
''
ctest
-R
"(
$retry_unittests_regular
)"
--output-on-failure
-j
$2
|
tee
$tmpfile
collect_failed_tests
exec_times
=
$[$exec_times
+1]
done
else
# There are more than 10 failed unit tests, so no unit test retry
is_retry_execuate
=
1
fi
fi
set
-x
ut_endTime_s
=
`
date
+%s
`
echo
"XPU testCase Time:
$[
$ut_endTime_s
-
$ut_startTime_s
]s"
python
${
PADDLE_ROOT
}
/build/test/xpu/get_test_cover_info.py
unset
XPU_OP_LIST_DIR
if
[
[
"
$EXIT_CODE
"
!=
"0"
]]
;
then
exit
8
;
if
[
"
$xputest_error
"
!=
0
]
;
then
show_ut_retry_result
fi
fi
}
...
...
python/paddle/distributed/auto_parallel/dist_attribute.py
浏览文件 @
3631f064
...
...
@@ -12,5 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License
from
paddle.fluid.core
import
DistTensorSpec
# noqa: F401
from
paddle.fluid.core
import
OperatorDistAttr
# noqa: F401
from
paddle.fluid.core
import
TensorDistAttr
# noqa: F401
python/paddle/distributed/auto_parallel/operators/dist_matmul.py
浏览文件 @
3631f064
...
...
@@ -105,6 +105,18 @@ def _update_dims_mapping_for_matmul(dist_op):
changed
=
False
op_desc
=
dist_op
.
serial_op
.
desc
op_dist_attr
=
dist_op
.
dist_attr
# test DistTensorSpec
# input_name_list = []
# output_name_list = []
# input_name_list.append(op_desc.input('X')[0])
# input_name_list.append(op_desc.input('Y')[0])
# output_name_list.append(op_desc.output('Out')[0])
# attr_name_list = ['trans_x', 'trans_y']
# input_specs, output_specs, attrs = wrap_data_for_completion(
# dist_op, input_name_list, output_name_list, attr_name_list
# )
x_name
=
op_desc
.
input
(
'X'
)[
0
]
y_name
=
op_desc
.
input
(
'Y'
)[
0
]
out_name
=
op_desc
.
output
(
'Out'
)[
0
]
...
...
python/paddle/distributed/auto_parallel/utils.py
浏览文件 @
3631f064
...
...
@@ -26,7 +26,7 @@ from paddle.framework import core
from
paddle.framework.io_utils
import
is_belong_to_optimizer
,
is_parameter
from
paddle.static
import
Variable
from
.dist_attribute
import
OperatorDistAttr
,
TensorDistAttr
from
.dist_attribute
import
DistTensorSpec
,
OperatorDistAttr
,
TensorDistAttr
from
.process_group
import
get_all_process_groups
from
.process_mesh
import
ProcessMesh
...
...
@@ -2357,50 +2357,64 @@ def is_dep_skip_op(op):
return
False
# def wrap_data_for_completion(
# dist_op: DistributedOperator,
# input_names: list,
# output_names: list,
# attr_names: list
# ):
# """
# Get data used in inferring distributed attributes, including:
# 1. DistTensorSpec for each input and output tensor of this dist_op.
# 2. Operator attributes of this dist_op, e.g. transpose_x in matmul op.
#
# Args:
# dist_op: the DistributedOperator
# input_names: list, name of the dist_op's input tensors
# output_names: list, name of the dist_op's output tensors
# attr_names: list, attribute name of the dist_op's corresponding serial op
#
# Returns:
# input_specs: list, DistTensorSpec for each input tensor of the dist_op
# output_specs: list, DistTensorSpec for each output tensor of the dist_op
# attrs: dict, attribute map of the dist op
# """
#
# input_specs = []
# output_specs = []
# attrs = {}
#
# serial_op = dist_op.serial_op
#
# # Construct each input tensor's DistTensorSpec with shape and dist_attr
# for name in input_names:
# tensor_dist_attr = dist_op.dist_attr.get_input_dist_attr(name)
# var = serial_op.block._var_recursive(name)
# tensor_shape = var.shape
# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr)
# input_specs.append(dist_spec)
#
# # Construct each output tensor's DistTensorSpec with shape and dist_attr
# for name in output_names:
# tensor_dist_attr = dist_op.dist_attr.get_output_dist_attr(name)
# var = serial_op.block._var_recursive(name)
# tensor_shape = var.shape
# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr)
# output_specs.append(dist_spec)
#
# for attr_name in attr_names:
# attrs[attr_name] = serial_op.desc.attr(attr_name)
def
wrap_data_for_completion
(
dist_op
,
input_names
:
list
,
output_names
:
list
,
attr_names
:
list
):
"""
Get data used in inferring distributed attributes, including:
1. DistTensorSpec for each input and output tensor of this dist_op.
2. Operator attributes of this dist_op, e.g. transpose_x in matmul op.
Args:
dist_op: the DistributedOperator
input_names: list, name of the dist_op's input tensors
output_names: list, name of the dist_op's output tensors
attr_names: list, attribute name of the dist_op's corresponding serial op
Returns:
input_specs: list, DistTensorSpec for each input tensor of the dist_op
output_specs: list, DistTensorSpec for each output tensor of the dist_op
attrs: dict, attribute map of the dist op
Usage:
op_desc = dist_op.serial_op.desc
input_name_list = []
output_name_list = []
input_name_list.append(op_desc.input('X')[0]) # 'X' is the arg name for op
input_name_list.append(op_desc.input('Y')[0])
output_name_list.append(op_desc.output('Out')[0])
attr_name_list = ['trans_x', 'trans_y']
input_specs, output_specs, attrs = wrap_data_for_completion(
dist_op,
input_name_list,
output_name_list,
attr_name_list)
"""
input_specs
=
[]
output_specs
=
[]
attrs
=
{}
serial_op
=
dist_op
.
serial_op
# Construct each input tensor's DistTensorSpec with shape and dist_attr
for
name
in
input_names
:
tensor_dist_attr
=
dist_op
.
dist_attr
.
get_input_dist_attr
(
name
)
var
=
serial_op
.
block
.
_var_recursive
(
name
)
tensor_shape
=
var
.
shape
dist_spec
=
DistTensorSpec
(
tensor_shape
,
tensor_dist_attr
)
input_specs
.
append
(
dist_spec
)
# Construct each output tensor's DistTensorSpec with shape and dist_attr
for
name
in
output_names
:
tensor_dist_attr
=
dist_op
.
dist_attr
.
get_output_dist_attr
(
name
)
var
=
serial_op
.
block
.
_var_recursive
(
name
)
tensor_shape
=
var
.
shape
dist_spec
=
DistTensorSpec
(
tensor_shape
,
tensor_dist_attr
)
output_specs
.
append
(
dist_spec
)
for
attr_name
in
attr_names
:
attrs
[
attr_name
]
=
serial_op
.
desc
.
attr
(
attr_name
)
return
input_specs
,
output_specs
,
attrs
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
3631f064
...
...
@@ -1116,7 +1116,15 @@ set(TEST_CINN_OPS
test_tile_op
test_roll_op
test_sum_op
test_elementwise_min_op
)
test_elementwise_min_op
test_bitwise_op
test_compare_op
test_shape_op
test_assign_value_op
test_lookup_table_op
test_lookup_table_v2_op
test_norm_op
test_one_hot_v2_op
)
foreach
(
TEST_CINN_OPS
${
TEST_CINN_OPS
}
)
if
(
WITH_CINN
)
...
...
python/paddle/fluid/tests/unittests/test_assign_value_op.py
浏览文件 @
3631f064
...
...
@@ -49,7 +49,7 @@ class TestAssignValueOp(eager_op_test.OpTest):
self
.
attrs
[
"fp32_values"
]
=
[
float
(
v
)
for
v
in
self
.
value
.
flat
]
def
test_forward
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
class
TestAssignValueOp2
(
TestAssignValueOp
):
...
...
python/paddle/fluid/tests/unittests/test_bitwise_op.py
浏览文件 @
3631f064
...
...
@@ -43,7 +43,7 @@ class TestBitwiseAnd(OpTest):
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
pass
...
...
@@ -150,7 +150,7 @@ class TestBitwiseOr(OpTest):
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
pass
...
...
@@ -258,7 +258,7 @@ class TestBitwiseXor(OpTest):
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
pass
...
...
@@ -363,7 +363,7 @@ class TestBitwiseNot(OpTest):
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
pass
...
...
python/paddle/fluid/tests/unittests/test_compare_op.py
浏览文件 @
3631f064
...
...
@@ -35,7 +35,7 @@ def create_test_class(op_type, typename, callback):
self
.
op_type
=
op_type
def
test_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_errors
(
self
):
paddle
.
enable_static
()
...
...
@@ -460,7 +460,7 @@ def create_bf16_case(op_type, callback):
self
.
outputs
=
{
'Out'
:
real_result
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
cls_name
=
f
"BF16TestCase_
{
op_type
}
"
TestCompareOpBF16Op
.
__name__
=
cls_name
...
...
python/paddle/fluid/tests/unittests/test_expand_v2_op.py
浏览文件 @
3631f064
...
...
@@ -44,7 +44,7 @@ class TestExpandV2OpRank1(OpTest):
self
.
expand_times
=
[
1
]
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
self
.
enable_cinn
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
,
check_prim
=
True
)
...
...
@@ -107,10 +107,10 @@ class TestExpandV2OpRank1_tensor_attr(OpTest):
self
.
infer_expand_shape
=
[
-
1
]
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
self
.
check_grad
([
'X'
],
'Out'
,
check_cinn
=
True
)
class
TestExpandV2OpRank2_Corner_tensor_attr
(
TestExpandV2OpRank1_tensor_attr
):
...
...
@@ -144,10 +144,10 @@ class TestExpandV2OpRank1_tensor(OpTest):
self
.
expand_shape
=
[
2
,
100
]
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
self
.
check_grad
([
'X'
],
'Out'
,
check_cinn
=
True
)
# Situation 4: input x is Integer
...
...
@@ -165,7 +165,7 @@ class TestExpandV2OpInteger(OpTest):
self
.
outputs
=
{
'Out'
:
output
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
# Situation 5: input x is Bool
...
...
@@ -181,7 +181,7 @@ class TestExpandV2OpBoolean(OpTest):
self
.
outputs
=
{
'Out'
:
output
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
# Situation 6: input x is Integer
...
...
@@ -199,7 +199,7 @@ class TestExpandV2OpInt64_t(OpTest):
self
.
outputs
=
{
'Out'
:
output
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
# Situation 7: input x is Float16
...
...
@@ -218,7 +218,7 @@ class TestExpandV2FP16Op(OpTest):
self
.
outputs
=
{
'Out'
:
output
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
,
check_prim
=
True
)
...
...
@@ -245,7 +245,7 @@ class TestExpandV2BF16Op(OpTest):
def
test_check_output
(
self
):
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
)
self
.
check_output_with_place
(
place
,
check_cinn
=
True
)
def
test_check_grad
(
self
):
place
=
core
.
CUDAPlace
(
0
)
...
...
python/paddle/fluid/tests/unittests/test_lookup_table_op.py
浏览文件 @
3631f064
...
...
@@ -39,10 +39,10 @@ class TestLookupTableOp(OpTest):
self
.
outputs
=
{
'Out'
:
table
[
ids
]}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'W'
],
'Out'
,
no_grad_set
=
set
(
'Ids'
))
self
.
check_grad
([
'W'
],
'Out'
,
no_grad_set
=
set
(
'Ids'
)
,
check_cinn
=
True
)
class
TestLookupTableOpWithTensorIds
(
OpTest
):
...
...
@@ -56,10 +56,10 @@ class TestLookupTableOpWithTensorIds(OpTest):
self
.
outputs
=
{
'Out'
:
table
[
ids
.
flatten
()].
reshape
((
2
,
4
,
5
,
31
))}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'W'
],
'Out'
,
no_grad_set
=
set
(
'Ids'
))
self
.
check_grad
([
'W'
],
'Out'
,
no_grad_set
=
set
(
'Ids'
)
,
check_cinn
=
True
)
@
skip_check_grad_ci
(
...
...
@@ -73,7 +73,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp):
padding_idx
=
np
.
random
.
choice
(
ids
,
1
)[
0
]
self
.
outputs
[
'Out'
][
ids
==
padding_idx
]
=
np
.
zeros
(
31
)
self
.
attrs
=
{
'padding_idx'
:
int
(
padding_idx
)}
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
@
skip_check_grad_ci
(
...
...
@@ -88,7 +88,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds):
padding_idx
=
np
.
random
.
choice
(
flatten_idx
,
1
)[
0
]
self
.
outputs
[
'Out'
][
np
.
squeeze
(
ids
==
padding_idx
)]
=
np
.
zeros
(
31
)
self
.
attrs
=
{
'padding_idx'
:
padding_idx
}
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
class
TestLookupTableWIsSelectedRows
(
unittest
.
TestCase
):
...
...
@@ -212,7 +212,7 @@ class TestLookupTableOpInt8(OpTest):
self
.
outputs
=
{
'Out'
:
table
[
ids
]}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
# since int8 type only be used in test and inference, there is
...
...
@@ -233,7 +233,7 @@ class TestLookupTableOpWithTensorIdsInt8(OpTest):
self
.
outputs
=
{
'Out'
:
table
[
ids
.
flatten
()].
reshape
((
2
,
4
,
5
,
31
))}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
# since int8 type only be used in test and inference, there is
...
...
@@ -247,7 +247,7 @@ class TestLookupTableOpWithPaddingInt8(TestLookupTableOpInt8):
padding_idx
=
np
.
random
.
choice
(
ids
,
1
)[
0
]
self
.
outputs
[
'Out'
][
ids
==
padding_idx
]
=
np
.
zeros
(
31
)
self
.
attrs
=
{
'padding_idx'
:
int
(
padding_idx
)}
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
# Since paddings are not trainable and fixed in forward, the gradient of
...
...
@@ -264,7 +264,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt8(
padding_idx
=
np
.
random
.
choice
(
flatten_idx
,
1
)[
0
]
self
.
outputs
[
'Out'
][
np
.
squeeze
(
ids
==
padding_idx
)]
=
np
.
zeros
(
31
)
self
.
attrs
=
{
'padding_idx'
:
padding_idx
}
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
# Since paddings are not trainable and fixed in forward, the gradient of
...
...
@@ -354,7 +354,7 @@ class TestLookupTableOpInt16(OpTest):
self
.
outputs
=
{
'Out'
:
table
[
ids
]}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
@
skip_check_grad_ci
(
reason
=
"Int16 type only be used in test and inference."
)
...
...
@@ -371,7 +371,7 @@ class TestLookupTableOpWithTensorIdsInt16(OpTest):
self
.
outputs
=
{
'Out'
:
table
[
ids
.
flatten
()].
reshape
((
2
,
4
,
5
,
31
))}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
@
skip_check_grad_ci
(
reason
=
"Int16 type only be used in test and inference."
)
...
...
@@ -381,7 +381,7 @@ class TestLookupTableOpWithPaddingInt16(TestLookupTableOpInt16):
padding_idx
=
np
.
random
.
choice
(
ids
,
1
)[
0
]
self
.
outputs
[
'Out'
][
ids
==
padding_idx
]
=
np
.
zeros
(
31
)
self
.
attrs
=
{
'padding_idx'
:
int
(
padding_idx
)}
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
@
skip_check_grad_ci
(
reason
=
"Int16 type only be used in test and inference."
)
...
...
@@ -394,7 +394,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt16(
padding_idx
=
np
.
random
.
choice
(
flatten_idx
,
1
)[
0
]
self
.
outputs
[
'Out'
][
np
.
squeeze
(
ids
==
padding_idx
)]
=
np
.
zeros
(
31
)
self
.
attrs
=
{
'padding_idx'
:
padding_idx
}
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
class
TestLookupTableWIsSelectedRowsInt16
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
浏览文件 @
3631f064
...
...
@@ -56,10 +56,10 @@ class TestLookupTableOp(OpTest):
return
"int64"
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'W'
],
'Out'
,
no_grad_set
=
set
(
'Ids'
))
self
.
check_grad
([
'W'
],
'Out'
,
no_grad_set
=
set
(
'Ids'
)
,
check_cinn
=
True
)
class
TestLookupTableOpInt16
(
OpTest
):
...
...
@@ -87,10 +87,10 @@ class TestLookupTableOpWithTensorIds(OpTest):
self
.
outputs
=
{
'Out'
:
table
[
ids
.
flatten
()].
reshape
((
2
,
4
,
5
,
31
))}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'W'
],
'Out'
,
no_grad_set
=
set
(
'Ids'
))
self
.
check_grad
([
'W'
],
'Out'
,
no_grad_set
=
set
(
'Ids'
)
,
check_cinn
=
True
)
@
skip_check_grad_ci
(
...
...
@@ -104,7 +104,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp):
padding_idx
=
np
.
random
.
choice
(
ids
,
1
)[
0
]
self
.
outputs
[
'Out'
][
ids
==
padding_idx
]
=
np
.
zeros
(
31
)
self
.
attrs
=
{
'padding_idx'
:
int
(
padding_idx
)}
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
@
skip_check_grad_ci
(
...
...
@@ -119,7 +119,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds):
padding_idx
=
np
.
random
.
choice
(
flatten_idx
,
1
)[
0
]
self
.
outputs
[
'Out'
][
np
.
squeeze
(
ids
==
padding_idx
)]
=
np
.
zeros
(
31
)
self
.
attrs
=
{
'padding_idx'
:
padding_idx
}
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
class
TestLookupTableWIsSelectedRows
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/unittests/test_matmul_op.py
浏览文件 @
3631f064
...
...
@@ -100,19 +100,29 @@ class Generator:
self
.
outputs
=
{
'Out'
:
Out
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad_normal
(
self
):
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
1e-3
)
self
.
check_grad
(
[
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
1e-3
,
check_cinn
=
True
)
def
test_check_grad_ignore_x
(
self
):
self
.
check_grad
(
[
'Y'
],
'Out'
,
max_relative_error
=
1e-3
,
no_grad_set
=
set
(
"X"
)
[
'Y'
],
'Out'
,
max_relative_error
=
1e-3
,
no_grad_set
=
set
(
"X"
),
check_cinn
=
True
,
)
def
test_check_grad_ignore_y
(
self
):
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
1e-3
,
no_grad_set
=
set
(
'Y'
)
[
'X'
],
'Out'
,
max_relative_error
=
1e-3
,
no_grad_set
=
set
(
'Y'
),
check_cinn
=
True
,
)
...
...
python/paddle/fluid/tests/unittests/test_matmul_v2_op.py
浏览文件 @
3631f064
...
...
@@ -103,13 +103,28 @@ class TestMatMulV2Op(OpTest):
self
.
outputs
=
{
'Out'
:
result
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
self
.
check_cinn
if
hasattr
(
self
,
'check_cinn'
)
else
True
)
def
test_check_grad
(
self
):
if
core
.
is_compiled_with_rocm
():
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
1e-2
)
self
.
check_grad
(
[
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
1e-2
,
check_cinn
=
self
.
check_cinn
if
hasattr
(
self
,
'check_cinn'
)
else
True
,
)
else
:
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
)
self
.
check_grad
(
[
'X'
,
'Y'
],
'Out'
,
check_cinn
=
self
.
check_cinn
if
hasattr
(
self
,
'check_cinn'
)
else
True
,
)
class
TestMatMulOp2
(
TestMatMulV2Op
):
...
...
@@ -290,6 +305,7 @@ class TestMatMulOp16(TestMatMulV2Op):
self
.
y_shape
=
(
1
,
2
,
2
,
100
,
2
)
self
.
trans_x
=
False
self
.
trans_y
=
False
self
.
check_cinn
=
False
class
TestMatMulOp17
(
TestMatMulV2Op
):
...
...
@@ -343,7 +359,13 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_output_with_place
(
place
,
atol
=
atol
)
self
.
check_output_with_place
(
place
,
atol
=
atol
,
check_cinn
=
self
.
check_cinn
if
hasattr
(
self
,
'check_cinn'
)
else
True
,
)
def
test_check_grad
(
self
):
place
=
core
.
CUDAPlace
(
0
)
...
...
@@ -353,6 +375,9 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0):
[
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
max_relative_error
,
check_cinn
=
self
.
check_cinn
if
hasattr
(
self
,
'check_cinn'
)
else
True
,
)
cls_name
=
"{}_{}"
.
format
(
parent
.
__name__
,
"Fp16"
)
...
...
@@ -405,7 +430,13 @@ def create_test_bf16_class(parent, atol=0.01):
def
test_check_output
(
self
):
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
atol
)
self
.
check_output_with_place
(
place
,
atol
=
atol
,
check_cinn
=
self
.
check_cinn
if
hasattr
(
self
,
'check_cinn'
)
else
True
,
)
def
test_check_grad_x
(
self
):
place
=
core
.
CUDAPlace
(
0
)
...
...
@@ -416,6 +447,9 @@ def create_test_bf16_class(parent, atol=0.01):
'Out'
,
no_grad_set
=
{
'Y'
},
user_defined_grads
=
[
numeric_grads
],
check_cinn
=
self
.
check_cinn
if
hasattr
(
self
,
'check_cinn'
)
else
True
,
)
def
test_check_grad_y
(
self
):
...
...
@@ -427,6 +461,9 @@ def create_test_bf16_class(parent, atol=0.01):
'Out'
,
no_grad_set
=
{
'X'
},
user_defined_grads
=
[
numeric_grads
],
check_cinn
=
self
.
check_cinn
if
hasattr
(
self
,
'check_cinn'
)
else
True
,
)
def
test_check_grad
(
self
):
...
...
@@ -596,7 +633,7 @@ class TestComplexMatMulOp(OpTest):
self
.
grad_y
=
np
.
matmul
(
np
.
conj
(
self
.
x
).
T
,
self
.
grad_out
)
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
False
)
def
test_check_grad_normal
(
self
):
self
.
check_grad
(
...
...
@@ -604,6 +641,7 @@ class TestComplexMatMulOp(OpTest):
'Out'
,
user_defined_grads
=
[
self
.
grad_x
,
self
.
grad_y
],
user_defined_grad_outputs
=
[
self
.
grad_out
],
check_cinn
=
False
,
)
def
test_check_grad_ingore_x
(
self
):
...
...
@@ -613,6 +651,7 @@ class TestComplexMatMulOp(OpTest):
no_grad_set
=
set
(
"X"
),
user_defined_grads
=
[
self
.
grad_y
],
user_defined_grad_outputs
=
[
self
.
grad_out
],
check_cinn
=
False
,
)
def
test_check_grad_ingore_y
(
self
):
...
...
@@ -622,6 +661,7 @@ class TestComplexMatMulOp(OpTest):
no_grad_set
=
set
(
'Y'
),
user_defined_grads
=
[
self
.
grad_x
],
user_defined_grad_outputs
=
[
self
.
grad_out
],
check_cinn
=
False
,
)
...
...
@@ -662,7 +702,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
)
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
False
)
def
test_check_grad_normal
(
self
):
self
.
check_grad
(
...
...
@@ -670,6 +710,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
'Out'
,
user_defined_grads
=
[
self
.
grad_x
,
self
.
grad_y
],
user_defined_grad_outputs
=
[
self
.
grad_out
],
check_cinn
=
False
,
)
def
test_check_grad_ingore_x
(
self
):
...
...
@@ -679,6 +720,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
no_grad_set
=
set
(
"X"
),
user_defined_grads
=
[
self
.
grad_y
],
user_defined_grad_outputs
=
[
self
.
grad_out
],
check_cinn
=
False
,
)
def
test_check_grad_ingore_y
(
self
):
...
...
@@ -688,6 +730,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
no_grad_set
=
set
(
'Y'
),
user_defined_grads
=
[
self
.
grad_x
],
user_defined_grad_outputs
=
[
self
.
grad_out
],
check_cinn
=
False
,
)
...
...
python/paddle/fluid/tests/unittests/test_norm_op.py
浏览文件 @
3631f064
...
...
@@ -48,10 +48,10 @@ class TestNormOp(OpTest):
self
.
python_out_sig
=
[
'Out'
]
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
self
.
check_grad
([
'X'
],
'Out'
,
check_cinn
=
True
)
def
init_test_case
(
self
):
self
.
shape
=
[
2
,
3
,
4
,
5
]
...
...
@@ -109,7 +109,7 @@ class TestNormOp6(TestNormOp):
self
.
dtype
=
"float32"
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.008
)
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.008
,
check_cinn
=
True
)
@
unittest
.
skipIf
(
...
...
@@ -120,11 +120,17 @@ class TestNormOp7(TestNormOp):
self
.
dtype
=
"float16"
def
test_check_output
(
self
):
self
.
check_output_with_place
(
fluid
.
core
.
CUDAPlace
(
0
),
atol
=
5e-2
)
self
.
check_output_with_place
(
fluid
.
core
.
CUDAPlace
(
0
),
atol
=
5e-2
,
check_cinn
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad_with_place
(
fluid
.
core
.
CUDAPlace
(
0
),
[
'X'
],
'Out'
,
max_relative_error
=
0.05
fluid
.
core
.
CUDAPlace
(
0
),
[
'X'
],
'Out'
,
max_relative_error
=
0.05
,
check_cinn
=
True
,
)
...
...
@@ -147,7 +153,7 @@ class TestNormTestOp(OpTest):
def
test_check_output
(
self
):
# dynamic graph just supports float tensor
self
.
check_output
(
check_dygraph
=
True
)
self
.
check_output
(
check_dygraph
=
True
,
check_cinn
=
True
)
def
test_check_grad
(
self
):
pass
...
...
@@ -176,11 +182,17 @@ class TestNormBF16Op(OpTest):
self
.
python_out_sig
=
[
'Out'
]
def
test_check_output
(
self
):
self
.
check_output_with_place
(
core
.
CUDAPlace
(
0
),
atol
=
1e-1
)
self
.
check_output_with_place
(
core
.
CUDAPlace
(
0
),
atol
=
1e-1
,
check_cinn
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad_with_place
(
core
.
CUDAPlace
(
0
),
[
'X'
],
'Out'
,
max_relative_error
=
1e-2
core
.
CUDAPlace
(
0
),
[
'X'
],
'Out'
,
max_relative_error
=
1e-2
,
check_cinn
=
True
,
)
def
init_test_case
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
浏览文件 @
3631f064
...
...
@@ -49,7 +49,7 @@ class TestOneHotOp(OpTest):
self
.
outputs
=
{
'Out'
:
(
out
,
x_lod
)}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
class
TestOneHotOp_attr
(
OpTest
):
...
...
@@ -57,6 +57,7 @@ class TestOneHotOp_attr(OpTest):
self
.
op_type
=
'one_hot_v2'
self
.
python_api
=
one_hot_wrapper
depth
=
10
depth_np
=
np
.
array
(
10
).
astype
(
'int32'
)
dimension
=
12
x_lod
=
[[
4
,
1
,
3
,
3
]]
x
=
[
np
.
random
.
randint
(
0
,
depth
-
1
)
for
i
in
range
(
sum
(
x_lod
[
0
]))]
...
...
@@ -69,12 +70,12 @@ class TestOneHotOp_attr(OpTest):
for
i
in
range
(
np
.
product
(
x
.
shape
)):
out
[
i
,
0
,
x
[
i
]]
=
1.0
self
.
inputs
=
{
'X'
:
(
x
,
x_lod
)}
self
.
inputs
=
{
'X'
:
(
x
,
x_lod
)
,
'depth_tensor'
:
depth_np
}
self
.
attrs
=
{
'dtype'
:
int
(
core
.
VarDesc
.
VarType
.
FP32
),
'depth'
:
depth
}
self
.
outputs
=
{
'Out'
:
(
out
,
x_lod
)}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
class
TestOneHotOp_default_dtype
(
OpTest
):
...
...
@@ -98,7 +99,7 @@ class TestOneHotOp_default_dtype(OpTest):
self
.
outputs
=
{
'Out'
:
(
out
,
x_lod
)}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
class
TestOneHotOp_default_dtype_attr
(
OpTest
):
...
...
@@ -106,6 +107,7 @@ class TestOneHotOp_default_dtype_attr(OpTest):
self
.
op_type
=
'one_hot_v2'
self
.
python_api
=
one_hot_wrapper
depth
=
10
depth_np
=
np
.
array
(
depth
).
astype
(
'int32'
)
dimension
=
12
x_lod
=
[[
4
,
1
,
3
,
3
]]
x
=
[
np
.
random
.
randint
(
0
,
depth
-
1
)
for
i
in
range
(
sum
(
x_lod
[
0
]))]
...
...
python/paddle/fluid/tests/unittests/test_selu_op.py
浏览文件 @
3631f064
...
...
@@ -15,7 +15,7 @@
import
unittest
import
numpy
as
np
from
eager_op_test
import
OpTest
from
eager_op_test
import
OpTest
,
convert_float_to_uint16
import
paddle
import
paddle.nn.functional
as
F
...
...
@@ -43,13 +43,15 @@ class SeluTest(OpTest):
self
.
op_type
=
"selu"
self
.
python_api
=
paddle
.
nn
.
functional
.
selu
self
.
x_shape
=
[
3
,
5
,
5
,
10
]
self
.
dtype
=
np
.
float64
self
.
init_x_shape
()
self
.
init_dtype
()
alpha
=
1.6732632423543772848170429916717
scale
=
1.0507009873554804934193349852946
if
self
.
dtype
==
np
.
uint16
:
x
=
np
.
random
.
normal
(
size
=
self
.
x_shape
).
astype
(
np
.
float32
)
else
:
x
=
np
.
random
.
normal
(
size
=
self
.
x_shape
).
astype
(
self
.
dtype
)
# Since zero point in selu is not differentiable, avoid randomize
...
...
@@ -58,6 +60,10 @@ class SeluTest(OpTest):
out
=
ref_selu
(
x
,
scale
,
alpha
)
if
self
.
dtype
==
np
.
uint16
:
self
.
inputs
=
{
'X'
:
convert_float_to_uint16
(
x
)}
self
.
outputs
=
{
'Out'
:
convert_float_to_uint16
(
out
)}
else
:
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'Out'
:
out
}
...
...
@@ -70,7 +76,7 @@ class SeluTest(OpTest):
pass
def
init_dtype
(
self
):
pass
self
.
dtype
=
np
.
float64
def
test_check_output
(
self
):
self
.
check_output
()
...
...
@@ -79,6 +85,27 @@ class SeluTest(OpTest):
self
.
check_grad
([
'X'
],
'Out'
)
class
SeluTestFP16OP
(
SeluTest
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
()
or
not
core
.
is_bfloat16_supported
(
core
.
CUDAPlace
(
0
)),
"core is not compiled with CUDA and do not support bfloat16"
,
)
class
SeluTestBF16OP
(
SeluTest
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
uint16
def
test_check_output
(
self
):
self
.
check_output_with_place
(
core
.
CUDAPlace
(
0
))
def
test_check_grad
(
self
):
self
.
check_grad_with_place
(
core
.
CUDAPlace
(
0
),
[
'X'
],
'Out'
)
class
TestSeluAPI
(
unittest
.
TestCase
):
# test paddle.nn.SELU, paddle.nn.functional.selu
def
setUp
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_shape_op.py
浏览文件 @
3631f064
...
...
@@ -36,7 +36,7 @@ class TestShapeOp(OpTest):
self
.
dtype
=
np
.
float32
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
check_cinn
=
True
)
class
case1
(
TestShapeOp
):
...
...
@@ -125,7 +125,7 @@ class TestShapeOpBf16(OpTest):
def
test_check_output
(
self
):
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
)
self
.
check_output_with_place
(
place
,
check_cinn
=
True
)
class
case1Bf16
(
TestShapeOpBf16
):
...
...
python/paddle/fluid/tests/unittests/test_sum_op.py
浏览文件 @
3631f064
...
...
@@ -62,10 +62,10 @@ class TestSumOp(OpTest):
self
.
dtype
=
np
.
float64
def
test_check_output
(
self
):
self
.
check_output
(
check_prim
=
True
)
self
.
check_output
(
check_prim
=
True
,
check_cinn
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'x0'
],
'Out'
,
check_prim
=
True
)
self
.
check_grad
([
'x0'
],
'Out'
,
check_prim
=
True
,
check_cinn
=
True
)
class
TestSelectedRowsSumOp
(
unittest
.
TestCase
):
...
...
@@ -299,14 +299,14 @@ class TestFP16SumOp(TestSumOp):
def
test_check_output
(
self
):
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_output_with_place
(
place
)
self
.
check_output_with_place
(
place
,
check_cinn
=
True
)
# FIXME: Because of the precision fp16, max_relative_error
# should be 0.15 here.
def
test_check_grad
(
self
):
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_grad
([
'x0'
],
'Out'
)
self
.
check_grad
([
'x0'
],
'Out'
,
check_cinn
=
True
)
def
create_test_sum_fp16_class
(
parent
):
...
...
test/ir/inference/inference_pass_test.py
浏览文件 @
3631f064
...
...
@@ -12,7 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
random
import
tempfile
import
unittest
import
numpy
as
np
...
...
@@ -41,7 +43,10 @@ class InferencePassTest(unittest.TestCase):
self
.
dynamic_shape_params
=
None
self
.
enable_lite
=
False
self
.
lite_parameters
=
None
self
.
path
=
"./inference_pass/"
+
self
.
__class__
.
__name__
+
"/"
self
.
temp_dir
=
tempfile
.
TemporaryDirectory
()
self
.
path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
'inference_pass'
,
self
.
__class__
.
__name__
)
np
.
random
.
seed
(
1
)
random
.
seed
(
1
)
...
...
test/ir/inference/test_trt_activation_pass.py
浏览文件 @
3631f064
...
...
@@ -53,8 +53,9 @@ class TensorRTSubgraphPassActivationTest(InferencePassTest):
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
(
self
.
trt_parameters
.
precision
==
AnalysisConfig
.
Precision
.
Float32
...
...
test/ir/inference/test_trt_elementwise_op.py
浏览文件 @
3631f064
...
...
@@ -53,8 +53,9 @@ class TensorRTSubgraphPassElementwiseBroadcastTest(InferencePassTest):
return
paddle
.
tensor
.
math
.
add
(
x
=
data1
,
y
=
data2
)
def
test_check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
)
...
...
test/ir/inference/test_trt_instance_norm_op.py
浏览文件 @
3631f064
...
...
@@ -55,8 +55,9 @@ class TRTInstanceNormTest(InferencePassTest):
self
.
fetch_list
=
[
out
]
def
check_output
(
self
,
remove_cache
=
False
):
if
remove_cache
and
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
remove_cache
and
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
atol
=
1e-5
...
...
test/ir/inference/test_trt_pool3d_op.py
浏览文件 @
3631f064
...
...
@@ -84,8 +84,9 @@ class TensorRTPool3dTest(InferencePassTest):
self
.
fetch_list
=
[
pool_out
]
def
check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
if
self
.
precision
==
AnalysisConfig
.
Precision
.
Float32
:
...
...
@@ -200,8 +201,9 @@ class TensorRTAdaptiveAvgPool3DTest(InferencePassTest):
self
.
fetch_list
=
[
pool_out
]
def
check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
)
...
...
@@ -300,8 +302,9 @@ class TensorRTAdaptiveMaxPool3DTest(InferencePassTest):
self
.
fetch_list
=
[
pool_out
]
def
check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
)
...
...
test/ir/inference/test_trt_pool_op.py
浏览文件 @
3631f064
...
...
@@ -86,8 +86,9 @@ class TensorRTPoolTest(InferencePassTest):
self
.
fetch_list
=
[
out
]
def
check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
if
self
.
precision
==
AnalysisConfig
.
Precision
.
Float32
:
...
...
test/ir/inference/test_trt_skip_layernorm_fuse_pass.py
浏览文件 @
3631f064
...
...
@@ -60,8 +60,9 @@ class SkipLayernormFusePassTest0(InferencePassTest):
return
paddle
.
add
(
data1
,
data2
)
def
test_check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
,
atol
=
0.01
,
rtol
=
0.00001
)
...
...
@@ -107,8 +108,9 @@ class SkipLayernormFusePassTest1(InferencePassTest):
return
paddle
.
add
(
data1
,
data2
)
def
test_check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
,
atol
=
0.01
,
rtol
=
0.00001
)
...
...
@@ -154,8 +156,9 @@ class SkipLayernormFusePassTest2(InferencePassTest):
return
paddle
.
add
(
data1
,
data2
)
def
test_check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
,
atol
=
0.1
,
rtol
=
0.00001
)
...
...
@@ -201,8 +204,9 @@ class SkipLayernormFusePassTest3(InferencePassTest):
return
paddle
.
add
(
data1
,
data2
)
def
test_check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
,
atol
=
0.1
,
rtol
=
0.00001
)
...
...
test/ir/inference/test_trt_subgraph_pass.py
浏览文件 @
3631f064
...
...
@@ -128,8 +128,9 @@ class TensorRTSubgraphPassSplitSerializeTest(InferencePassTest):
def
test_check_output
(
self
):
if
paddle
.
is_compiled_with_cuda
():
use_gpu
=
True
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
self
.
check_output_with_option
(
use_gpu
)
self
.
assertTrue
(
PassVersionChecker
.
IsCompatible
(
'tensorrt_subgraph_pass'
)
...
...
@@ -164,8 +165,9 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest):
def
test_check_output
(
self
):
if
paddle
.
is_compiled_with_cuda
():
use_gpu
=
True
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
self
.
check_output_with_option
(
use_gpu
,
1e-3
)
self
.
assertTrue
(
PassVersionChecker
.
IsCompatible
(
'tensorrt_subgraph_pass'
)
...
...
@@ -313,8 +315,9 @@ class TensorRTSubgraphPassLayerNormDynamicTest(InferencePassTest):
self
.
serialize
=
True
def
test_check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
paddle
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
)
...
...
@@ -332,8 +335,9 @@ class TensorRTSubgraphPassLayerNormDynamicFP16Test(
self
.
serialize
=
True
def
test_check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
paddle
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
,
atol
=
0.01
,
rtol
=
0.01
)
...
...
@@ -406,8 +410,9 @@ class TensorRTSubgraphPassElementwiseSerializeTest(
)
def
test_check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
super
().
test_check_output
()
...
...
@@ -444,8 +449,9 @@ class TensorRTSubgraphPassElementwiseBroadcastDynamicTest(InferencePassTest):
return
paddle
.
add
(
x
=
data1
,
y
=
data2
)
def
test_check_output
(
self
):
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
opt_path
=
os
.
path
.
join
(
self
.
path
,
'_opt_cache'
)
if
os
.
path
.
exists
(
opt_path
):
shutil
.
rmtree
(
opt_path
)
if
paddle
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
)
...
...
test/xpu/test_nll_loss_op_xpu.py
0 → 100644
浏览文件 @
3631f064
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
from
get_test_cover_info
import
(
XPUOpTestWrapper
,
create_test_class
,
get_xpu_op_support_types
,
)
from
op_test_xpu
import
XPUOpTest
import
paddle
paddle
.
enable_static
()
def
nll_loss_1d
(
logs
,
dtype
,
targets
,
weight
=
None
,
reduction
=
'mean'
,
ignore_index
=-
100
):
input_shape
=
logs
.
shape
N
=
input_shape
[
0
]
C
=
input_shape
[
1
]
out
=
np
.
zeros_like
(
targets
).
astype
(
dtype
)
total_weight
=
0
for
i
in
range
(
N
):
cur_target
=
targets
[
i
]
if
cur_target
==
ignore_index
:
out
[
i
]
=
0
continue
cur_weight
=
weight
[
cur_target
]
if
weight
is
not
None
else
1
total_weight
+=
cur_weight
out
[
i
]
=
-
logs
[
i
][
cur_target
]
*
cur_weight
if
reduction
==
'sum'
:
out
=
np
.
sum
(
out
)
total_weight
=
np
.
array
([
total_weight
]).
astype
(
dtype
)
return
{
'Out'
:
out
,
'Total_weight'
:
total_weight
}
elif
reduction
==
'mean'
:
out
=
np
.
sum
(
out
)
if
total_weight
!=
0
:
out
/=
total_weight
total_weight
=
np
.
array
([
total_weight
]).
astype
(
dtype
)
return
{
'Out'
:
out
,
'Total_weight'
:
total_weight
}
elif
reduction
==
'none'
:
total_weight
=
np
.
array
([
0
]).
astype
(
dtype
)
return
{
'Out'
:
out
,
'Total_weight'
:
total_weight
}
def
nll_loss_2d
(
logs
,
dtype
,
targets
,
weight
=
None
,
reduction
=
'mean'
,
ignore_index
=-
100
):
input_shape
=
logs
.
shape
N
=
input_shape
[
0
]
H
=
input_shape
[
2
]
W
=
input_shape
[
3
]
out
=
np
.
zeros_like
(
targets
).
astype
(
dtype
)
total_weight
=
0
for
i
in
range
(
N
):
for
h
in
range
(
H
):
for
w
in
range
(
W
):
cur_target
=
targets
[
i
][
h
][
w
]
if
cur_target
==
ignore_index
:
out
[
i
][
h
][
w
]
=
0
continue
cur_weight
=
weight
[
cur_target
]
if
weight
is
not
None
else
1
total_weight
+=
cur_weight
out
[
i
][
h
][
w
]
=
-
logs
[
i
][
cur_target
][
h
][
w
]
*
cur_weight
if
reduction
==
'sum'
:
out
=
np
.
sum
(
out
)
total_weight
=
np
.
array
([
total_weight
]).
astype
(
dtype
)
return
{
'Out'
:
out
,
'Total_weight'
:
total_weight
}
elif
reduction
==
'mean'
:
out
=
np
.
sum
(
out
)
if
total_weight
!=
0
:
out
/=
total_weight
total_weight
=
np
.
array
([
total_weight
]).
astype
(
dtype
)
return
{
'Out'
:
out
,
'Total_weight'
:
total_weight
}
elif
reduction
==
'none'
:
total_weight
=
np
.
array
([
0
]).
astype
(
dtype
)
return
{
'Out'
:
out
,
'Total_weight'
:
total_weight
}
class
XPUTestNLLLossOP
(
XPUOpTestWrapper
):
def
__init__
(
self
):
self
.
op_name
=
'nll_loss'
self
.
use_dynamic_create_class
=
False
class
TestNLLLossOpBase1D
(
XPUOpTest
):
op_type
=
'nll_loss'
def
setUp
(
self
):
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
self
.
set_attrs
()
self
.
set_inputs
()
self
.
inputs
=
{
'X'
:
self
.
x
,
'Label'
:
self
.
label
,
}
if
self
.
weight
is
not
None
:
self
.
inputs
[
'Weight'
]
=
self
.
weight
self
.
outputs
=
nll_loss_1d
(
self
.
x
,
self
.
dtype
,
self
.
label
,
self
.
weight
,
self
.
attrs
[
'reduction'
],
)
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'none'
}
def
set_inputs
(
self
):
self
.
class_num
=
3
x_shape
=
[
5
,
self
.
class_num
]
label_shape
=
[
5
]
self
.
x
=
np
.
random
.
random
(
x_shape
).
astype
(
self
.
dtype
)
self
.
label
=
np
.
random
.
randint
(
low
=
0
,
high
=
self
.
class_num
,
size
=
label_shape
).
astype
(
np
.
int64
)
self
.
weight
=
np
.
random
.
random
(
self
.
class_num
).
astype
(
self
.
dtype
)
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
)
class
TestNLLLossOpWithWeightMean1D
(
TestNLLLossOpBase1D
):
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'mean'
}
class
TestNLLLossOpWithWeightSum1D
(
TestNLLLossOpBase1D
):
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'sum'
}
class
TestNLLLossOpWithoutWeightNone1D
(
TestNLLLossOpBase1D
):
def
set_inputs
(
self
):
self
.
class_num
=
3
x_shape
=
[
5
,
self
.
class_num
]
label_shape
=
[
5
]
self
.
x
=
np
.
random
.
random
(
x_shape
).
astype
(
self
.
dtype
)
self
.
label
=
np
.
random
.
randint
(
low
=
0
,
high
=
self
.
class_num
,
size
=
label_shape
).
astype
(
np
.
int64
)
self
.
weight
=
None
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'none'
}
class
TestNLLLossOpWithoutWeightMean1D
(
TestNLLLossOpBase1D
):
def
set_inputs
(
self
):
self
.
class_num
=
3
x_shape
=
[
5
,
self
.
class_num
]
label_shape
=
[
5
]
self
.
x
=
np
.
random
.
random
(
x_shape
).
astype
(
self
.
dtype
)
self
.
label
=
np
.
random
.
randint
(
low
=
0
,
high
=
self
.
class_num
,
size
=
label_shape
).
astype
(
np
.
int64
)
self
.
weight
=
None
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'mean'
}
class
TestNLLLossOpWithoutWeightSum1D
(
TestNLLLossOpBase1D
):
def
set_inputs
(
self
):
self
.
class_num
=
3
x_shape
=
[
5
,
self
.
class_num
]
label_shape
=
[
5
]
self
.
x
=
np
.
random
.
random
(
x_shape
).
astype
(
self
.
dtype
)
self
.
label
=
np
.
random
.
randint
(
low
=
0
,
high
=
self
.
class_num
,
size
=
label_shape
).
astype
(
np
.
int64
)
self
.
weight
=
None
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'sum'
}
class
TestNLLLossOpBase2D
(
XPUOpTest
):
op_type
=
'nll_loss'
def
setUp
(
self
):
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
self
.
set_attrs
()
self
.
set_inputs
()
self
.
inputs
=
{
'X'
:
self
.
x
,
'Label'
:
self
.
label
}
if
self
.
weight
is
not
None
:
self
.
inputs
[
'Weight'
]
=
self
.
weight
self
.
outputs
=
nll_loss_2d
(
self
.
x
,
self
.
dtype
,
self
.
label
,
self
.
weight
,
self
.
attrs
[
'reduction'
],
)
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'none'
}
def
set_inputs
(
self
):
self
.
class_num
=
3
x_shape
=
[
5
,
self
.
class_num
,
7
,
11
]
label_shape
=
[
5
,
7
,
11
]
self
.
x
=
np
.
random
.
random
(
x_shape
).
astype
(
self
.
dtype
)
self
.
label
=
np
.
random
.
randint
(
low
=
0
,
high
=
self
.
class_num
,
size
=
label_shape
).
astype
(
np
.
int64
)
self
.
weight
=
np
.
random
.
random
(
self
.
class_num
).
astype
(
self
.
dtype
)
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
)
class
TestNLLLossOpWithWeightMean2D
(
TestNLLLossOpBase2D
):
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'mean'
}
class
TestNLLLossOpWithWeightSum2D
(
TestNLLLossOpBase2D
):
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'sum'
}
class
TestNLLLossOpWithoutWeightNone2D
(
TestNLLLossOpBase2D
):
def
set_inputs
(
self
):
self
.
dtype
=
self
.
in_type
self
.
class_num
=
3
x_shape
=
[
5
,
self
.
class_num
,
7
,
11
]
label_shape
=
[
5
,
7
,
11
]
self
.
x
=
np
.
random
.
random
(
x_shape
).
astype
(
self
.
dtype
)
self
.
label
=
np
.
random
.
randint
(
low
=
0
,
high
=
self
.
class_num
,
size
=
label_shape
).
astype
(
np
.
int64
)
self
.
weight
=
None
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'none'
}
class
TestNLLLossOpWithoutWeightMean2D
(
TestNLLLossOpBase2D
):
def
set_inputs
(
self
):
self
.
dtype
=
self
.
in_type
self
.
class_num
=
3
x_shape
=
[
5
,
self
.
class_num
,
7
,
11
]
label_shape
=
[
5
,
7
,
11
]
self
.
x
=
np
.
random
.
random
(
x_shape
).
astype
(
self
.
dtype
)
self
.
label
=
np
.
random
.
randint
(
low
=
0
,
high
=
self
.
class_num
,
size
=
label_shape
).
astype
(
np
.
int64
)
self
.
weight
=
None
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'mean'
}
class
TestNLLLossOpWithoutWeightSum2D
(
TestNLLLossOpBase2D
):
def
set_inputs
(
self
):
self
.
dtype
=
self
.
in_type
self
.
class_num
=
3
x_shape
=
[
5
,
self
.
class_num
,
7
,
11
]
label_shape
=
[
5
,
7
,
11
]
self
.
x
=
np
.
random
.
random
(
x_shape
).
astype
(
self
.
dtype
)
self
.
label
=
np
.
random
.
randint
(
low
=
0
,
high
=
self
.
class_num
,
size
=
label_shape
).
astype
(
np
.
int64
)
self
.
weight
=
None
def
set_attrs
(
self
):
self
.
attrs
=
{
'reduction'
:
'sum'
}
support_types
=
get_xpu_op_support_types
(
'nll_loss'
)
for
stype
in
support_types
:
create_test_class
(
globals
(),
XPUTestNLLLossOP
,
stype
)
if
__name__
==
'__main__'
:
unittest
.
main
()
tools/check_file_diff_approvals.sh
浏览文件 @
3631f064
...
...
@@ -344,7 +344,7 @@ fi
OUTPUT_LOG
=
`
echo
"
$ALL_ADDED_LINES
"
|
grep
-Ew
"print|printf|fprintf|std::cout"
||
true
`
if
[
"
$OUTPUT_LOG
"
!=
""
]
;
then
echo_line
=
"print or std::cout is not recommended for direct use, please use loggin or glog. If it is necessary to use, please contact tianshuo78520a (Recommend) or zhangbo9674 review and approve.
\n
"
check_approval 1 tianshuo7852a zhangbo9674
check_approval 1 tianshuo7852
0
a zhangbo9674
fi
HAS_MODIFIED_PHI_FILES
=
`
git diff
--name-only
upstream/
$BRANCH
|
grep
"paddle/phi/"
||
true
`
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录