diff --git a/.gitmodules b/.gitmodules index 4c3d2b21424dd6f541f407bf3d62612a3c04e0ba..8c294e25bd6095ee8bbc9f51d82ffebbf1bd7bcf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -30,10 +30,6 @@ path = third_party/xxhash url = https://github.com/Cyan4973/xxHash.git ignore = dirty -[submodule "third_party/eigen3"] - path = third_party/eigen3 - url = https://gitlab.com/libeigen/eigen.git - ignore = dirty [submodule "third_party/leveldb"] path = third_party/leveldb url = https://github.com/google/leveldb @@ -50,3 +46,7 @@ path = third_party/glog url = https://github.com/google/glog.git ignore = dirty +[submodule "third_party/eigen3"] + path = third_party/eigen3 + url = https://gitlab.com/libeigen/eigen.git + ignore = dirty diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index c6f3eb23e38d6ed9bd26704ea7e048451eb8ad8d..993d079b63f25756b377ce5bca1783b666262a02 100755 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -296,6 +296,8 @@ function(build_protobuf TARGET_NAME BUILD_FOR_HOST) PREFIX ${PROTOBUF_PREFIX_DIR} SOURCE_DIR ${SOURCE_DIR} UPDATE_COMMAND "" + PATCH_COMMAND + COMMAND cd ${SOURCE_DIR} && git checkout ${PROTOBUF_TAG} DEPENDS zlib CONFIGURE_COMMAND ${CMAKE_COMMAND} ${SOURCE_DIR}/cmake ${OPTIONAL_ARGS} diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 61188ae383a2d580f4b1621e6ff69d2a8f4234db..1ba00fe42c6d7660583745a218021ee7b61f699f 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -8,7 +8,7 @@ set(XPU_API_LIB_NAME "libxpuapi.so") set(XPU_RT_LIB_NAME "libxpurt.so") set(XPU_XFT_LIB_NAME "libxft.so") -set(XPU_BASE_DATE "20230519") +set(XPU_BASE_DATE "20230523") set(XPU_XCCL_BASE_VERSION "1.0.49.2") set(XPU_XFT_BASE_VERSION "latest") diff --git a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt index d4c74fe946c1283c653fc464ec0718f5c9499079..1245aebdf152a42364ab062b9c7513d217faa43f 100644 --- a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt +++ b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt @@ -6,6 +6,3 @@ cc_library( add_subdirectory(test) add_subdirectory(spmd_rules) - -cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper - dist_tensor_spec) diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc index 5775e72527a7591082b30c8f6ed90d8c99331855..c756c54c4adfcafd4621a686468efe80785c9010 100644 --- a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc +++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h" -#include "paddle/fluid/distributed/auto_parallel/process_mesh.h" +#include "paddle/phi/core/distributed/auto_parallel/utils.h" namespace paddle { namespace distributed { @@ -27,28 +27,41 @@ DistTensorSpec::DistTensorSpec(const std::vector& shape, dist_attr_.copy_from(dist_attr); } +DistTensorSpec::DistTensorSpec(const DistTensorSpec& spec) { + std::vector spec_shape = spec.get_shape(); + shape_.assign(spec_shape.begin(), spec_shape.end()); + dist_attr_.copy_from(spec.get_dist_attr()); +} + DistTensorSpec::~DistTensorSpec() {} DistTensorSpec::DistTensorSpec(const Tensor& tensor) { shape_ = tensor.shape(); - std::vector pm_shape, pm_ids; - pm_shape = {4}; - pm_ids = {0, 1, 2, 3}; - std::vector dim_name = {"mp"}; + // std::vector pm_shape, pm_ids; + // pm_shape = {4}; + // pm_ids = {0, 1, 2, 3}; + // std::vector dim_name = {"mp"}; - ProcessMesh pm(pm_shape, pm_ids, dim_name); - std::vector dims_mapping = {-1, 0}; - TensorDistAttr dist_attr; - dist_attr.set_process_mesh(pm); - dist_attr.set_dims_mapping(dims_mapping); + // ProcessMesh pm(pm_shape, pm_ids, dim_name); + // std::vector dims_mapping = {-1, 0}; + // TensorDistAttr dist_attr; + // dist_attr.set_process_mesh(pm); + // dist_attr.set_dims_mapping(dims_mapping); - dist_attr_.copy_from(dist_attr); + // dist_attr_.copy_from(dist_attr); - std::cout << dist_attr_; + // std::cout << dist_attr_; } -const std::vector& DistTensorSpec::get_dims_mapping() { +DistTensorSpec& DistTensorSpec::operator=(const DistTensorSpec& spec) { + std::vector spec_shape = spec.get_shape(); + shape_ = spec_shape; + dist_attr_.copy_from(spec.get_dist_attr()); + return *this; +} + +const std::vector& DistTensorSpec::get_dims_mapping() const { return dist_attr_.dims_mapping(); } @@ -57,7 +70,7 @@ void DistTensorSpec::set_dims_mapping( dist_attr_.set_dims_mapping(dims_mapping); } -const ProcessMesh& DistTensorSpec::get_process_mesh() { +const ProcessMesh& DistTensorSpec::get_process_mesh() const { return dist_attr_.process_mesh(); } @@ -65,7 +78,22 @@ void DistTensorSpec::set_process_mesh(const ProcessMesh& process_mesh) { dist_attr_.set_process_mesh(process_mesh); } -const std::vector& DistTensorSpec::get_shape() { return shape_; } +const std::vector& DistTensorSpec::get_shape() const { return shape_; } + +const TensorDistAttr& DistTensorSpec::get_dist_attr() const { + return dist_attr_; +} + +void DistTensorSpec::set_dist_attr(const TensorDistAttr& dist_attr) { + dist_attr_ = dist_attr; +} + +std::string DistTensorSpec::to_string() const { + using phi::distributed::auto_parallel::str_join; + std::string spec_str = "{tensor_shape:[" + str_join(shape_) + "], "; + spec_str += "dist_attr:" + dist_attr_.to_string() + "}"; + return spec_str; +} } // namespace auto_parallel } // namespace distributed diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h index 2e79148ab0efb6a291c21731c00771dc82cd09e4..dc1f157ccbfb39e5b98d7b92de8722a2e7fdb374 100644 --- a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h +++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h @@ -14,39 +14,55 @@ limitations under the License. */ #pragma once -#include "paddle/fluid/distributed/auto_parallel/dist_attr.h" #include "paddle/phi/api/include/tensor.h" +#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h" namespace paddle { namespace distributed { namespace auto_parallel { +using phi::distributed::auto_parallel::ProcessMesh; +using phi::distributed::auto_parallel::TensorDistAttr; + /** * A unified data class for inferring distributed attributes * in both dygraph mode and static mode */ class DistTensorSpec { public: + DistTensorSpec() = default; + DistTensorSpec(const std::vector& shape, const TensorDistAttr& dist_attr); + DistTensorSpec(const DistTensorSpec& spec); + + // temp function, only for test in dygraph mode explicit DistTensorSpec(const Tensor& tensor); ~DistTensorSpec(); + DistTensorSpec& operator=(const DistTensorSpec& spec); + // get dims_mapping from dist_attr_ - const std::vector& get_dims_mapping(); + const std::vector& get_dims_mapping() const; // set dims_mapping in dist_attr_ void set_dims_mapping(const std::vector& dims_mapping); // get process_mesh from dist_attr_ - const ProcessMesh& get_process_mesh(); + const ProcessMesh& get_process_mesh() const; // set process_mesh in dist_attr_ void set_process_mesh(const ProcessMesh& process_mesh); - const std::vector& get_shape(); + const TensorDistAttr& get_dist_attr() const; + + void set_dist_attr(const TensorDistAttr& dist_attr); + + const std::vector& get_shape() const; + + std::string to_string() const; private: std::vector shape_; diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.cc b/paddle/fluid/operators/cinn/cinn_launch_op.cc index ad74d88f70e1d8e96b74e6451d941d21a67462e6..3ab9f6ba99b58470e780cc65a05af03cd701a7ed 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_op.cc @@ -17,6 +17,7 @@ #include #include +#include "cinn/common/target.h" #include "cinn/hlir/framework/graph_compiler.h" #include "cinn/runtime/cinn_runtime.h" #include "cinn/runtime/flags.h" @@ -94,6 +95,11 @@ void SetCinnRandomSeed() { ::cinn::runtime::RandomSeed::GetOrSet(seed); } +void SetCinnTarget(const ::cinn::common::Target& target) { + VLOG(4) << "Set CINN compile target to " << target; + ::cinn::runtime::CurrentTarget::SetCurrentTarget(target); +} + } // namespace details class CinnLaunchOp : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.h b/paddle/fluid/operators/cinn/cinn_launch_op.h index 59970412ea6a3b772dd282caa9674ab738b7d7d4..90751c72c605bada307f999ec9cf4177ff1c3671 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op.h +++ b/paddle/fluid/operators/cinn/cinn_launch_op.h @@ -58,6 +58,9 @@ void SetCinnRuntimeFlags(); template void SetCinnRandomSeed(); +// set CINN compile target +void SetCinnTarget(const ::cinn::common::Target& target); + } // namespace details template @@ -115,6 +118,7 @@ class CinnLaunchOpKernel : public framework::OpKernel { "Step 2. Get compilation result of the graph"); // Step 2. Get compilation result of the graph auto target = details::PlaceToCinnTarget(place); + details::SetCinnTarget(target); using ClockType = std::chrono::steady_clock; std::chrono::time_point start_t, end_t; if (VLOG_IS_ON(1)) { diff --git a/paddle/fluid/operators/collective/alltoall_op.cu.cc b/paddle/fluid/operators/collective/alltoall_op.cu.cc index aacd76af4af0586de2cd2c97b439d8c380eaeefc..0ae338c745ae3a890b3e36a86ebc012b0f04636f 100644 --- a/paddle/fluid/operators/collective/alltoall_op.cu.cc +++ b/paddle/fluid/operators/collective/alltoall_op.cu.cc @@ -98,7 +98,7 @@ PD_REGISTER_STRUCT_KERNEL(alltoall, ops::AllToAllOpCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/c_allgather_op.cu.cc b/paddle/fluid/operators/collective/c_allgather_op.cu.cc index 70b7d70dc93b31b032bf80e9e41121eeb57c4848..c3eff905851e3fff741024d850cda95ef9ec3bcd 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cu.cc @@ -95,7 +95,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather, ops::CAllGatherOpCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc index 9be9674bb082bd1b5f6890f422521626d6da232b..277988b56916f8e682b8e67abd4adf20ef78fed5 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc @@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max, ALL_LAYOUT, ops::CAllReduceMaxCUDAKernel, float, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif double, diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc index 8e45b7e43b2ed15b17bd8cf1a5198ef6ff613fe6..76d809cd234f03813fdea62f982757340c85e3f2 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc @@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum, ALL_LAYOUT, ops::CAllReduceSumCUDAKernel, float, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif double, diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc index d0d3ebb1394cbbee971e070502bcc3d03a3681ec..e37657a1747dec1b3ccd14ea9b32188d7a636b76 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc @@ -100,7 +100,7 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast, int64_t, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/c_concat_op.cu.cc b/paddle/fluid/operators/collective/c_concat_op.cu.cc index dc47c184c221db33b07ff79599dad1a54fe284a4..1760b6ea3909393c3d8f982de3a6ad5af1891108 100644 --- a/paddle/fluid/operators/collective/c_concat_op.cu.cc +++ b/paddle/fluid/operators/collective/c_concat_op.cu.cc @@ -137,7 +137,7 @@ PD_REGISTER_STRUCT_KERNEL(c_concat, double, int, int64_t, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/c_embedding_op.cu b/paddle/fluid/operators/collective/c_embedding_op.cu index 4861b5d26ab0f026563305dcda4fa32da1dd0409..758734ada66e83ee46dfc0476628eb8275d5accf 100644 --- a/paddle/fluid/operators/collective/c_embedding_op.cu +++ b/paddle/fluid/operators/collective/c_embedding_op.cu @@ -239,7 +239,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding, ops::CEmbeddingCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { @@ -251,7 +251,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding_grad, ops::CEmbeddingGradCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/c_identity_op.cu.cc b/paddle/fluid/operators/collective/c_identity_op.cu.cc index 3d5f16c218c8c5fa5840c5af00a08e9e1c871bd5..9571168db152c61c4cb12461406730ebfc8b27c9 100644 --- a/paddle/fluid/operators/collective/c_identity_op.cu.cc +++ b/paddle/fluid/operators/collective/c_identity_op.cu.cc @@ -25,7 +25,7 @@ PD_REGISTER_STRUCT_KERNEL(c_identity, double, int, int64_t, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc index 35053b1511fcc3707aaabba690d01b42eb08e5c6..edae8feb61257b9678724124cedb01a29fac78b7 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc @@ -87,7 +87,7 @@ PD_REGISTER_STRUCT_KERNEL(c_reducescatter, ops::CReduceScatterOpCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/c_split_op.cu b/paddle/fluid/operators/collective/c_split_op.cu index b01ed790e851715063312336b3eeae18b0382a26..0b3e2aaf781dbe227c646c2c2161d49b954d6829 100644 --- a/paddle/fluid/operators/collective/c_split_op.cu +++ b/paddle/fluid/operators/collective/c_split_op.cu @@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(c_split, double, int, int64_t, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc index b6af2dbd1c847ca8e347fe3ce99a5b0a6ffc2ccf..b4773a8eb54562f3bb6c6a85e39f31788002c0cc 100644 --- a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc @@ -31,7 +31,7 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum, double, int, int64_t, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc index 2374f4a4aed8239053a4ccb51803377c0d75b596..d22fd70bd0f61846ec18eda7994ee2a31c9f2d70 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc @@ -108,7 +108,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_allgather, ops::PartialAllGatherOpCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/partial_recv_op.cu.cc b/paddle/fluid/operators/collective/partial_recv_op.cu.cc index b0df94194e4f87801b38a1a6df65236e8f9944a2..0c33ca7c25c3268db652356a2d78d8126dd53a5a 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_recv_op.cu.cc @@ -124,7 +124,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_recv, ops::PartialRecvOpCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/partial_send_op.cu.cc b/paddle/fluid/operators/collective/partial_send_op.cu.cc index dc24ea01fc98e96f59409f5a0628ba36642cc6c7..4f9fc41bc4e16fc1c8c243de7a329bebbcdc8324 100644 --- a/paddle/fluid/operators/collective/partial_send_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_send_op.cu.cc @@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_send, ops::PartialSendCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/recv_v2_op.cu.cc b/paddle/fluid/operators/collective/recv_v2_op.cu.cc index bfa12f911946d4b3eb17c99ce75caba3ba436c64..28058aa4868cd688e7470e83fea90d403b19065a 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cu.cc @@ -238,7 +238,7 @@ PD_REGISTER_STRUCT_KERNEL(recv_v2, ops::RecvOpV2CUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc index adea7db0b8088e61583f88c65a3b4f386177b5cd..a80dc1f91e45d55ac778e0f3a95050f299de30c6 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc @@ -223,7 +223,7 @@ PD_REGISTER_STRUCT_KERNEL(send_v2, ops::SendOpV2CUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/platform/device/gpu/nccl_helper.h b/paddle/fluid/platform/device/gpu/nccl_helper.h index 8dd0639ce72f3bd30a43c7f5141852b2338ad4df..6afcd2eb7cd9720c7dfffdfc2625f26ba9910a16 100644 --- a/paddle/fluid/platform/device/gpu/nccl_helper.h +++ b/paddle/fluid/platform/device/gpu/nccl_helper.h @@ -59,7 +59,7 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) { return ncclUint8; } else if (type == framework::proto::VarType::BOOL) { return ncclUint8; -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 } else if (type == framework::proto::VarType::BF16) { return ncclBfloat16; #endif @@ -86,7 +86,7 @@ inline ncclDataType_t ToNCCLDataType(phi::DataType type) { return ncclInt8; } else if (type == phi::DataType::BOOL) { return ncclUint8; -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 } else if (type == phi::DataType::BFLOAT16) { return ncclBfloat16; #endif diff --git a/paddle/fluid/pybind/auto_parallel_py.cc b/paddle/fluid/pybind/auto_parallel_py.cc index fdac30be8f07b397996681ac42c87c531ee9a3c1..439aa6a623769c2d07559a36f4571caa3d76a7f4 100644 --- a/paddle/fluid/pybind/auto_parallel_py.cc +++ b/paddle/fluid/pybind/auto_parallel_py.cc @@ -15,6 +15,7 @@ #include #include +#include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/pybind/auto_parallel_py.h" @@ -29,6 +30,7 @@ namespace py = pybind11; namespace paddle { namespace pybind { +using paddle::distributed::auto_parallel::DistTensorSpec; using paddle::distributed::auto_parallel::OperatorDistAttr; using paddle::framework::OpDesc; using paddle::framework::VarDesc; @@ -276,6 +278,25 @@ void BindAutoParallel(py::module *m) { py::arg("memo")) .def("__str__", &TensorDistAttr::to_string); + py::class_(*m, "DistTensorSpec") + .def(py::init<>()) + .def(py::init()) + .def(py::init &, const TensorDistAttr &>()) + .def("get_dims_mapping", &DistTensorSpec::get_dims_mapping) + .def("set_dims_mapping", &DistTensorSpec::set_dims_mapping) + .def("get_process_mesh", &DistTensorSpec::get_process_mesh) + .def("set_process_mesh", &DistTensorSpec::set_process_mesh) + .def_property_readonly("shape", &DistTensorSpec::get_shape) + .def("__str__", &DistTensorSpec::to_string) + .def("__copy__", + [](const DistTensorSpec &self) { return DistTensorSpec(self); }) + .def( + "__deepcopy__", + [](const DistTensorSpec &self, py::dict) { + return DistTensorSpec(self); + }, + py::arg("memo")); + py::class_(*m, "OperatorDistAttr") .def(py::init<>()) .def(py::init()) diff --git a/paddle/phi/api/yaml/generator/api_base.py b/paddle/phi/api/yaml/generator/api_base.py index 23d3be56a11ca16cd735b7bf4b94407f800c2595..db858bd85e562c03062090a2653fb3e008dff6a2 100644 --- a/paddle/phi/api/yaml/generator/api_base.py +++ b/paddle/phi/api/yaml/generator/api_base.py @@ -1280,7 +1280,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d def gen_dist_tensor_code(self): # define the DistTensorSpec vector for input and output tensors - api_code = " \nstd::vector input_specs;\n" + api_code = " \n std::vector input_specs;\n" # get DistTensorSpec for each input tensor for tensor_name in self.inputs['names']: @@ -1297,8 +1297,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d PADDLE_API {self.get_return_type(inplace_flag)} {api_func_name}({self.get_define_args(inplace_flag)}) {{ {self.gene_kernel_select()} """ - if api_func_name == 'matmul': - api_code += self.gen_dist_tensor_code() + # if api_func_name == 'matmul': + # api_code += self.gen_dist_tensor_code() if len(self.kernel['func']) > 1: kernel_dispatch_code = '' diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index a8bf526cf87b41a3e2a1436a3f23bda1e127af00..5b7c847d76d91b4a872e774c881f6151b280fd8e 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -525,6 +525,8 @@ XPUOpMap& get_kl2_ops() { phi::DataType::FLOAT16, phi::DataType::INT64})}, {"nearest_interp_v2_grad", XPUKernelSet({phi::DataType::FLOAT32})}, + {"nll_loss", XPUKernelSet({phi::DataType::FLOAT32})}, + {"nll_loss_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"not_equal", XPUKernelSet({phi::DataType::INT64, phi::DataType::INT32, diff --git a/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt b/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt index 2c4728c5a4c21e4801e67e6ece7776377b066aed..7121d93c05eaa795ac692f3ad6ce9a532324d7ac 100644 --- a/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt +++ b/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt @@ -20,4 +20,5 @@ cc_library( SRCS dist_mapper.cc DEPS device_mesh auto_parallel_proto phi_enforce) -cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper) +cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper + dist_tensor_spec) diff --git a/paddle/phi/core/utils/data_type.h b/paddle/phi/core/utils/data_type.h index 16b73e0f2baa6738702971a89101df99ce68c99f..018672e45b5970c4cb26d73cb47a1bb48a2842b2 100644 --- a/paddle/phi/core/utils/data_type.h +++ b/paddle/phi/core/utils/data_type.h @@ -229,7 +229,7 @@ inline ncclDataType_t ToNCCLDataType(DataType type) { return ncclInt8; } else if (type == DataType::BOOL) { return ncclUint8; -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 } else if (type == DataType::BFLOAT16) { return ncclBfloat16; #endif diff --git a/paddle/phi/kernels/gpu/activation_kernel.cu b/paddle/phi/kernels/gpu/activation_kernel.cu index cf3c66f53de2d56663c75e9b5ecbc17798716dfb..794d442ce2acc1114cfb06ab05f7b23d14f92a02 100644 --- a/paddle/phi/kernels/gpu/activation_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_kernel.cu @@ -274,4 +274,5 @@ PD_REGISTER_KERNEL(selu, phi::SeluKernel, float, double, + phi::dtype::float16, phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/gpu/selu_grad_kernel.cu b/paddle/phi/kernels/gpu/selu_grad_kernel.cu index c715831ffc7ffcef400eb7ff11551cf5d636f055..68f91aa2b45e730ba22a52f1c193f70455856bcd 100644 --- a/paddle/phi/kernels/gpu/selu_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/selu_grad_kernel.cu @@ -24,4 +24,5 @@ PD_REGISTER_KERNEL(selu_grad, phi::SeluGradKernel, float, double, + phi::dtype::float16, phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/xpu/nll_loss_grad_kernel.cc b/paddle/phi/kernels/xpu/nll_loss_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..1dbe679e67498e3cd32af71ac5ef75f2058f53f4 --- /dev/null +++ b/paddle/phi/kernels/xpu/nll_loss_grad_kernel.cc @@ -0,0 +1,95 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/nll_loss_grad_kernel.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void NllLossGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& label, + const paddle::optional& weight, + const DenseTensor& total_weight, + const DenseTensor& d_out, + int64_t ignore_index, + const std::string& reduction, + DenseTensor* d_x) { + using XPUType = typename XPUTypeTrait::Type; + const auto& label_type = label.dtype(); + bool label_type_match = + label_type == phi::DataType::INT32 || label_type == phi::DataType::INT64; + PADDLE_ENFORCE_EQ(label_type_match, + true, + phi::errors::InvalidArgument( + "Input(Label) holds the wrong type, it holds %s, but " + "desires to be %s or %s", + label_type, + phi::DataType::INT32, + phi::DataType::INT64)); + + auto d_out_data = d_out.data(); + auto d_x_data = dev_ctx.template Alloc(d_x); + + auto d_x_dims = d_x->dims(); + std::vector d_x_shape = phi::vectorize(d_x_dims); + + auto weight_data = + weight.get_ptr() ? weight.get_ptr()->data() : nullptr; + + int64_t reduction_id = 0; + if (reduction == "none") { + reduction_id = 0; + } else if (reduction == "mean") { + reduction_id = 1; + } else if (reduction == "sum") { + reduction_id = 2; + } + + auto total_weight_data = total_weight.data(); + + int r; + if (label_type == phi::DataType::INT32) { + const int* label_data = label.data(); + r = xpu::nll_loss_grad(dev_ctx.x_context(), + d_out_data, + d_x_data, + d_x_shape, + label_data, + weight_data, + reduction_id, + ignore_index, + total_weight_data); + } else if (label_type == phi::DataType::INT64) { + const int64_t* label_data = label.data(); + r = xpu::nll_loss_grad(dev_ctx.x_context(), + d_out_data, + d_x_data, + d_x_shape, + label_data, + weight_data, + reduction_id, + ignore_index, + total_weight_data); + } + PADDLE_ENFORCE_XDNN_SUCCESS(r, "nll_loss_grad"); +} + +} // namespace phi + +// TODO(xiongkun): add the non-raw kernel register here. +PD_REGISTER_KERNEL( + nll_loss_grad, XPU, ALL_LAYOUT, phi::NllLossGradKernel, float) {} diff --git a/paddle/phi/kernels/xpu/nll_loss_kernel.cc b/paddle/phi/kernels/xpu/nll_loss_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..2d9bf5baf576707a5bb9c6f53e3bb0575e3f777f --- /dev/null +++ b/paddle/phi/kernels/xpu/nll_loss_kernel.cc @@ -0,0 +1,93 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/nll_loss_kernel.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void NllLossRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& label, + const paddle::optional& weight, + int64_t ignore_index, + const std::string& reduction, + DenseTensor* out, + DenseTensor* total_weight) { + using XPUType = typename XPUTypeTrait::Type; + const auto& label_type = label.dtype(); + bool label_type_match = + label_type == phi::DataType::INT32 || label_type == phi::DataType::INT64; + PADDLE_ENFORCE_EQ(label_type_match, + true, + phi::errors::InvalidArgument( + "Input(Label) holds the wrong type, it holds %s, but " + "desires to be %s or %s", + label_type, + phi::DataType::INT32, + phi::DataType::INT64)); + + auto x_data = x.data(); + auto out_data = dev_ctx.template Alloc(out); + + auto weight_data = + weight.get_ptr() ? weight.get_ptr()->data() : nullptr; + + auto total_weight_data = dev_ctx.template Alloc(total_weight); + + auto x_dims = x.dims(); + std::vector x_shape = phi::vectorize(x_dims); + + int64_t reduction_id = 0; + if (reduction == "none") { + reduction_id = 0; + } else if (reduction == "mean") { + reduction_id = 1; + } else if (reduction == "sum") { + reduction_id = 2; + } + + int r; + if (label_type == phi::DataType::INT32) { + const int* label_data = label.data(); + r = xpu::nll_loss(dev_ctx.x_context(), + x_data, + out_data, + total_weight_data, + x_shape, + label_data, + weight_data, + reduction_id, + ignore_index); + } else if (label_type == phi::DataType::INT64) { + const int64_t* label_data = label.data(); + r = xpu::nll_loss(dev_ctx.x_context(), + x_data, + out_data, + total_weight_data, + x_shape, + label_data, + weight_data, + reduction_id, + ignore_index); + } + PADDLE_ENFORCE_XDNN_SUCCESS(r, "nll_loss"); +} + +} // namespace phi + +// TODO(xiongkun): add the non-raw kernel register here. +PD_REGISTER_KERNEL(nll_loss, XPU, ALL_LAYOUT, phi::NllLossRawKernel, float) {} diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 77ed8e531da0261bf89a078b0ffe531e3d10135d..697b74c39a41ea9d74ff93618dea5aa273931a2b 100644 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -2235,14 +2235,70 @@ set +x fi done <<< "$test_cases"; card_test "$single_card_tests" 1 + failed_test_lists='' collect_failed_tests + xputest_error=0 + retry_unittests_record='' + retry_time=3 + exec_times=0 + exec_time_array=('first' 'second' 'third') + exec_retry_threshold=10 + is_retry_execuate=0 + if [ -n "$failed_test_lists" ];then + xputest_error=1 + need_retry_ut_str=$(echo "$failed_test_lists" | grep -oEi "\-.+\(" | sed 's/(//' | sed 's/- //' ) + need_retry_ut_arr=(${need_retry_ut_str}) + need_retry_ut_count=${#need_retry_ut_arr[@]} + retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(" | sed 's/(//' | sed 's/- //' ) + if [ $need_retry_ut_count -lt $exec_retry_threshold ];then + while ( [ $exec_times -lt $retry_time ] ) + do + set +e + retry_unittests_record="$retry_unittests_record$failed_test_lists" + failed_test_lists_ult=`echo "${failed_test_lists}"` + set -e + if [[ "${exec_times}" == "1" ]];then + if [[ "${failed_test_lists}" == "" ]];then + break + else + retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(" | sed 's/(//' | sed 's/- //' ) + fi + fi + echo "=========================================" + echo "This is the ${exec_time_array[$exec_times]} time to re-run" + echo "=========================================" + echo "The following unittest will be re-run:" + echo "${retry_unittests}" + echo "=========================================" + + retry_unittests_regular='' + for line in ${retry_unittests[@]} ; + do + if [[ "$retry_unittests_regular" == "" ]];then + retry_unittests_regular="^$line$" + else + retry_unittests_regular="$retry_unittests_regular|^$line$" + fi + done + rm -f $tmp_dir/* + failed_test_lists='' + ctest -R "($retry_unittests_regular)" --output-on-failure -j $2 | tee $tmpfile + collect_failed_tests + exec_times=$[$exec_times+1] + done + else + # There are more than 10 failed unit tests, so no unit test retry + is_retry_execuate=1 + fi + + fi set -x ut_endTime_s=`date +%s` echo "XPU testCase Time: $[ $ut_endTime_s - $ut_startTime_s ]s" python ${PADDLE_ROOT}/build/test/xpu/get_test_cover_info.py unset XPU_OP_LIST_DIR - if [[ "$EXIT_CODE" != "0" ]]; then - exit 8; + if [ "$xputest_error" != 0 ];then + show_ut_retry_result fi fi } diff --git a/python/paddle/distributed/auto_parallel/dist_attribute.py b/python/paddle/distributed/auto_parallel/dist_attribute.py index 5c7fadf2e20771a263315670d0c4fa325c8296de..d31df134d6b6a0ff25e4ba8bdb93e36d172889d4 100644 --- a/python/paddle/distributed/auto_parallel/dist_attribute.py +++ b/python/paddle/distributed/auto_parallel/dist_attribute.py @@ -12,5 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License +from paddle.fluid.core import DistTensorSpec # noqa: F401 from paddle.fluid.core import OperatorDistAttr # noqa: F401 from paddle.fluid.core import TensorDistAttr # noqa: F401 diff --git a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py index 8825e14d9aba7da62d52e09d4f228be8afc1e056..a7e539d460a7047d8ae0132a56b7d65b8446b704 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py @@ -105,6 +105,18 @@ def _update_dims_mapping_for_matmul(dist_op): changed = False op_desc = dist_op.serial_op.desc op_dist_attr = dist_op.dist_attr + + # test DistTensorSpec + # input_name_list = [] + # output_name_list = [] + # input_name_list.append(op_desc.input('X')[0]) + # input_name_list.append(op_desc.input('Y')[0]) + # output_name_list.append(op_desc.output('Out')[0]) + # attr_name_list = ['trans_x', 'trans_y'] + # input_specs, output_specs, attrs = wrap_data_for_completion( + # dist_op, input_name_list, output_name_list, attr_name_list + # ) + x_name = op_desc.input('X')[0] y_name = op_desc.input('Y')[0] out_name = op_desc.output('Out')[0] diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py index 1a3299e20a48a9b116baed255c079c60d4726a83..43b293b750a9313f6f1cb55f5ae40ed43e676f9c 100644 --- a/python/paddle/distributed/auto_parallel/utils.py +++ b/python/paddle/distributed/auto_parallel/utils.py @@ -26,7 +26,7 @@ from paddle.framework import core from paddle.framework.io_utils import is_belong_to_optimizer, is_parameter from paddle.static import Variable -from .dist_attribute import OperatorDistAttr, TensorDistAttr +from .dist_attribute import DistTensorSpec, OperatorDistAttr, TensorDistAttr from .process_group import get_all_process_groups from .process_mesh import ProcessMesh @@ -2357,50 +2357,64 @@ def is_dep_skip_op(op): return False -# def wrap_data_for_completion( -# dist_op: DistributedOperator, -# input_names: list, -# output_names: list, -# attr_names: list -# ): -# """ -# Get data used in inferring distributed attributes, including: -# 1. DistTensorSpec for each input and output tensor of this dist_op. -# 2. Operator attributes of this dist_op, e.g. transpose_x in matmul op. -# -# Args: -# dist_op: the DistributedOperator -# input_names: list, name of the dist_op's input tensors -# output_names: list, name of the dist_op's output tensors -# attr_names: list, attribute name of the dist_op's corresponding serial op -# -# Returns: -# input_specs: list, DistTensorSpec for each input tensor of the dist_op -# output_specs: list, DistTensorSpec for each output tensor of the dist_op -# attrs: dict, attribute map of the dist op -# """ -# -# input_specs = [] -# output_specs = [] -# attrs = {} -# -# serial_op = dist_op.serial_op -# -# # Construct each input tensor's DistTensorSpec with shape and dist_attr -# for name in input_names: -# tensor_dist_attr = dist_op.dist_attr.get_input_dist_attr(name) -# var = serial_op.block._var_recursive(name) -# tensor_shape = var.shape -# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr) -# input_specs.append(dist_spec) -# -# # Construct each output tensor's DistTensorSpec with shape and dist_attr -# for name in output_names: -# tensor_dist_attr = dist_op.dist_attr.get_output_dist_attr(name) -# var = serial_op.block._var_recursive(name) -# tensor_shape = var.shape -# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr) -# output_specs.append(dist_spec) -# -# for attr_name in attr_names: -# attrs[attr_name] = serial_op.desc.attr(attr_name) +def wrap_data_for_completion( + dist_op, input_names: list, output_names: list, attr_names: list +): + """ + Get data used in inferring distributed attributes, including: + 1. DistTensorSpec for each input and output tensor of this dist_op. + 2. Operator attributes of this dist_op, e.g. transpose_x in matmul op. + + Args: + dist_op: the DistributedOperator + input_names: list, name of the dist_op's input tensors + output_names: list, name of the dist_op's output tensors + attr_names: list, attribute name of the dist_op's corresponding serial op + + Returns: + input_specs: list, DistTensorSpec for each input tensor of the dist_op + output_specs: list, DistTensorSpec for each output tensor of the dist_op + attrs: dict, attribute map of the dist op + + Usage: + op_desc = dist_op.serial_op.desc + input_name_list = [] + output_name_list = [] + input_name_list.append(op_desc.input('X')[0]) # 'X' is the arg name for op + input_name_list.append(op_desc.input('Y')[0]) + output_name_list.append(op_desc.output('Out')[0]) + attr_name_list = ['trans_x', 'trans_y'] + input_specs, output_specs, attrs = wrap_data_for_completion( + dist_op, + input_name_list, + output_name_list, + attr_name_list) + + """ + + input_specs = [] + output_specs = [] + attrs = {} + + serial_op = dist_op.serial_op + + # Construct each input tensor's DistTensorSpec with shape and dist_attr + for name in input_names: + tensor_dist_attr = dist_op.dist_attr.get_input_dist_attr(name) + var = serial_op.block._var_recursive(name) + tensor_shape = var.shape + dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr) + input_specs.append(dist_spec) + + # Construct each output tensor's DistTensorSpec with shape and dist_attr + for name in output_names: + tensor_dist_attr = dist_op.dist_attr.get_output_dist_attr(name) + var = serial_op.block._var_recursive(name) + tensor_shape = var.shape + dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr) + output_specs.append(dist_spec) + + for attr_name in attr_names: + attrs[attr_name] = serial_op.desc.attr(attr_name) + + return input_specs, output_specs, attrs diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 257e3d1a6b72cce73d579da15c495f1296ba3ca0..d4c50707cbe78238042985aa4141f5ae2099a249 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1116,7 +1116,15 @@ set(TEST_CINN_OPS test_tile_op test_roll_op test_sum_op - test_elementwise_min_op) + test_elementwise_min_op + test_bitwise_op + test_compare_op + test_shape_op + test_assign_value_op + test_lookup_table_op + test_lookup_table_v2_op + test_norm_op + test_one_hot_v2_op) foreach(TEST_CINN_OPS ${TEST_CINN_OPS}) if(WITH_CINN) diff --git a/python/paddle/fluid/tests/unittests/test_assign_value_op.py b/python/paddle/fluid/tests/unittests/test_assign_value_op.py index 243dccc2422444a87d29248f63c0bfe783779036..7cb5dece346c88476ea4710ce9b60a0f1ff60cf7 100644 --- a/python/paddle/fluid/tests/unittests/test_assign_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_assign_value_op.py @@ -49,7 +49,7 @@ class TestAssignValueOp(eager_op_test.OpTest): self.attrs["fp32_values"] = [float(v) for v in self.value.flat] def test_forward(self): - self.check_output() + self.check_output(check_cinn=True) class TestAssignValueOp2(TestAssignValueOp): diff --git a/python/paddle/fluid/tests/unittests/test_bitwise_op.py b/python/paddle/fluid/tests/unittests/test_bitwise_op.py index 084552e6b1ae653ccc01f54e44c97b6560478204..728ea62dbf2cb182aadadf80e8f71a6f8e229a48 100644 --- a/python/paddle/fluid/tests/unittests/test_bitwise_op.py +++ b/python/paddle/fluid/tests/unittests/test_bitwise_op.py @@ -43,7 +43,7 @@ class TestBitwiseAnd(OpTest): self.outputs = {'Out': out} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): pass @@ -150,7 +150,7 @@ class TestBitwiseOr(OpTest): self.outputs = {'Out': out} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): pass @@ -258,7 +258,7 @@ class TestBitwiseXor(OpTest): self.outputs = {'Out': out} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): pass @@ -363,7 +363,7 @@ class TestBitwiseNot(OpTest): self.outputs = {'Out': out} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_compare_op.py b/python/paddle/fluid/tests/unittests/test_compare_op.py index 0b8c4aa8eae4215545a3953fe96a05040e2f557a..2f4e12f2b4e40db3fdf3792a80f4f6b975232bac 100755 --- a/python/paddle/fluid/tests/unittests/test_compare_op.py +++ b/python/paddle/fluid/tests/unittests/test_compare_op.py @@ -35,7 +35,7 @@ def create_test_class(op_type, typename, callback): self.op_type = op_type def test_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_errors(self): paddle.enable_static() @@ -460,7 +460,7 @@ def create_bf16_case(op_type, callback): self.outputs = {'Out': real_result} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) cls_name = f"BF16TestCase_{op_type}" TestCompareOpBF16Op.__name__ = cls_name diff --git a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py index 27fc92292f36f976a6049b3ecda73302450c783b..92cf190cb60a21288ec7a3c858aca6f31237092e 100644 --- a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py @@ -44,7 +44,7 @@ class TestExpandV2OpRank1(OpTest): self.expand_times = [1] def test_check_output(self): - self.check_output() + self.check_output(check_cinn=self.enable_cinn) def test_check_grad(self): self.check_grad(['X'], 'Out', check_prim=True) @@ -107,10 +107,10 @@ class TestExpandV2OpRank1_tensor_attr(OpTest): self.infer_expand_shape = [-1] def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): - self.check_grad(['X'], 'Out') + self.check_grad(['X'], 'Out', check_cinn=True) class TestExpandV2OpRank2_Corner_tensor_attr(TestExpandV2OpRank1_tensor_attr): @@ -144,10 +144,10 @@ class TestExpandV2OpRank1_tensor(OpTest): self.expand_shape = [2, 100] def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): - self.check_grad(['X'], 'Out') + self.check_grad(['X'], 'Out', check_cinn=True) # Situation 4: input x is Integer @@ -165,7 +165,7 @@ class TestExpandV2OpInteger(OpTest): self.outputs = {'Out': output} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) # Situation 5: input x is Bool @@ -181,7 +181,7 @@ class TestExpandV2OpBoolean(OpTest): self.outputs = {'Out': output} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) # Situation 6: input x is Integer @@ -199,7 +199,7 @@ class TestExpandV2OpInt64_t(OpTest): self.outputs = {'Out': output} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) # Situation 7: input x is Float16 @@ -218,7 +218,7 @@ class TestExpandV2FP16Op(OpTest): self.outputs = {'Out': output} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): self.check_grad(['X'], 'Out', check_prim=True) @@ -245,7 +245,7 @@ class TestExpandV2BF16Op(OpTest): def test_check_output(self): place = core.CUDAPlace(0) - self.check_output_with_place(place) + self.check_output_with_place(place, check_cinn=True) def test_check_grad(self): place = core.CUDAPlace(0) diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_op.py index de868db11fb1a69244526cee698367899518e819..cd26f390747ee495779dfd8f5ea00eea50a47853 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_op.py @@ -39,10 +39,10 @@ class TestLookupTableOp(OpTest): self.outputs = {'Out': table[ids]} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): - self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) + self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True) class TestLookupTableOpWithTensorIds(OpTest): @@ -56,10 +56,10 @@ class TestLookupTableOpWithTensorIds(OpTest): self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): - self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) + self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True) @skip_check_grad_ci( @@ -73,7 +73,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp): padding_idx = np.random.choice(ids, 1)[0] self.outputs['Out'][ids == padding_idx] = np.zeros(31) self.attrs = {'padding_idx': int(padding_idx)} - self.check_output() + self.check_output(check_cinn=True) @skip_check_grad_ci( @@ -88,7 +88,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): padding_idx = np.random.choice(flatten_idx, 1)[0] self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31) self.attrs = {'padding_idx': padding_idx} - self.check_output() + self.check_output(check_cinn=True) class TestLookupTableWIsSelectedRows(unittest.TestCase): @@ -212,7 +212,7 @@ class TestLookupTableOpInt8(OpTest): self.outputs = {'Out': table[ids]} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): # since int8 type only be used in test and inference, there is @@ -233,7 +233,7 @@ class TestLookupTableOpWithTensorIdsInt8(OpTest): self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): # since int8 type only be used in test and inference, there is @@ -247,7 +247,7 @@ class TestLookupTableOpWithPaddingInt8(TestLookupTableOpInt8): padding_idx = np.random.choice(ids, 1)[0] self.outputs['Out'][ids == padding_idx] = np.zeros(31) self.attrs = {'padding_idx': int(padding_idx)} - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): # Since paddings are not trainable and fixed in forward, the gradient of @@ -264,7 +264,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt8( padding_idx = np.random.choice(flatten_idx, 1)[0] self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31) self.attrs = {'padding_idx': padding_idx} - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): # Since paddings are not trainable and fixed in forward, the gradient of @@ -354,7 +354,7 @@ class TestLookupTableOpInt16(OpTest): self.outputs = {'Out': table[ids]} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) @skip_check_grad_ci(reason="Int16 type only be used in test and inference.") @@ -371,7 +371,7 @@ class TestLookupTableOpWithTensorIdsInt16(OpTest): self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) @skip_check_grad_ci(reason="Int16 type only be used in test and inference.") @@ -381,7 +381,7 @@ class TestLookupTableOpWithPaddingInt16(TestLookupTableOpInt16): padding_idx = np.random.choice(ids, 1)[0] self.outputs['Out'][ids == padding_idx] = np.zeros(31) self.attrs = {'padding_idx': int(padding_idx)} - self.check_output() + self.check_output(check_cinn=True) @skip_check_grad_ci(reason="Int16 type only be used in test and inference.") @@ -394,7 +394,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt16( padding_idx = np.random.choice(flatten_idx, 1)[0] self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31) self.attrs = {'padding_idx': padding_idx} - self.check_output() + self.check_output(check_cinn=True) class TestLookupTableWIsSelectedRowsInt16(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py index 54e9992a13d6baf410290848a7adf4e61866391b..b36f914a25786bc0d8d9ee76da22884f5f791e51 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py @@ -56,10 +56,10 @@ class TestLookupTableOp(OpTest): return "int64" def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): - self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) + self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True) class TestLookupTableOpInt16(OpTest): @@ -87,10 +87,10 @@ class TestLookupTableOpWithTensorIds(OpTest): self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): - self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) + self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True) @skip_check_grad_ci( @@ -104,7 +104,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp): padding_idx = np.random.choice(ids, 1)[0] self.outputs['Out'][ids == padding_idx] = np.zeros(31) self.attrs = {'padding_idx': int(padding_idx)} - self.check_output() + self.check_output(check_cinn=True) @skip_check_grad_ci( @@ -119,7 +119,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): padding_idx = np.random.choice(flatten_idx, 1)[0] self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31) self.attrs = {'padding_idx': padding_idx} - self.check_output() + self.check_output(check_cinn=True) class TestLookupTableWIsSelectedRows(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_matmul_op.py b/python/paddle/fluid/tests/unittests/test_matmul_op.py index 30085a841de314588ee07217d5d1d5f1c345930a..c7c870b3c46eb67f6c5635ec83a4acc01852754b 100644 --- a/python/paddle/fluid/tests/unittests/test_matmul_op.py +++ b/python/paddle/fluid/tests/unittests/test_matmul_op.py @@ -100,19 +100,29 @@ class Generator: self.outputs = {'Out': Out} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad_normal(self): - self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-3) + self.check_grad( + ['X', 'Y'], 'Out', max_relative_error=1e-3, check_cinn=True + ) def test_check_grad_ignore_x(self): self.check_grad( - ['Y'], 'Out', max_relative_error=1e-3, no_grad_set=set("X") + ['Y'], + 'Out', + max_relative_error=1e-3, + no_grad_set=set("X"), + check_cinn=True, ) def test_check_grad_ignore_y(self): self.check_grad( - ['X'], 'Out', max_relative_error=1e-3, no_grad_set=set('Y') + ['X'], + 'Out', + max_relative_error=1e-3, + no_grad_set=set('Y'), + check_cinn=True, ) diff --git a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py index a0c41b63b05f2e9b2388566f1ddf5f0510c14e80..e0dcc3bfdd35ef896b4b0ea55402468abe7f1931 100644 --- a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py @@ -103,13 +103,28 @@ class TestMatMulV2Op(OpTest): self.outputs = {'Out': result} def test_check_output(self): - self.check_output() + self.check_output( + check_cinn=self.check_cinn if hasattr(self, 'check_cinn') else True + ) def test_check_grad(self): if core.is_compiled_with_rocm(): - self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-2) + self.check_grad( + ['X', 'Y'], + 'Out', + max_relative_error=1e-2, + check_cinn=self.check_cinn + if hasattr(self, 'check_cinn') + else True, + ) else: - self.check_grad(['X', 'Y'], 'Out') + self.check_grad( + ['X', 'Y'], + 'Out', + check_cinn=self.check_cinn + if hasattr(self, 'check_cinn') + else True, + ) class TestMatMulOp2(TestMatMulV2Op): @@ -290,6 +305,7 @@ class TestMatMulOp16(TestMatMulV2Op): self.y_shape = (1, 2, 2, 100, 2) self.trans_x = False self.trans_y = False + self.check_cinn = False class TestMatMulOp17(TestMatMulV2Op): @@ -343,7 +359,13 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_output_with_place(place, atol=atol) + self.check_output_with_place( + place, + atol=atol, + check_cinn=self.check_cinn + if hasattr(self, 'check_cinn') + else True, + ) def test_check_grad(self): place = core.CUDAPlace(0) @@ -353,6 +375,9 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0): ['X', 'Y'], 'Out', max_relative_error=max_relative_error, + check_cinn=self.check_cinn + if hasattr(self, 'check_cinn') + else True, ) cls_name = "{}_{}".format(parent.__name__, "Fp16") @@ -405,7 +430,13 @@ def create_test_bf16_class(parent, atol=0.01): def test_check_output(self): place = core.CUDAPlace(0) - self.check_output_with_place(place, atol=atol) + self.check_output_with_place( + place, + atol=atol, + check_cinn=self.check_cinn + if hasattr(self, 'check_cinn') + else True, + ) def test_check_grad_x(self): place = core.CUDAPlace(0) @@ -416,6 +447,9 @@ def create_test_bf16_class(parent, atol=0.01): 'Out', no_grad_set={'Y'}, user_defined_grads=[numeric_grads], + check_cinn=self.check_cinn + if hasattr(self, 'check_cinn') + else True, ) def test_check_grad_y(self): @@ -427,6 +461,9 @@ def create_test_bf16_class(parent, atol=0.01): 'Out', no_grad_set={'X'}, user_defined_grads=[numeric_grads], + check_cinn=self.check_cinn + if hasattr(self, 'check_cinn') + else True, ) def test_check_grad(self): @@ -596,7 +633,7 @@ class TestComplexMatMulOp(OpTest): self.grad_y = np.matmul(np.conj(self.x).T, self.grad_out) def test_check_output(self): - self.check_output() + self.check_output(check_cinn=False) def test_check_grad_normal(self): self.check_grad( @@ -604,6 +641,7 @@ class TestComplexMatMulOp(OpTest): 'Out', user_defined_grads=[self.grad_x, self.grad_y], user_defined_grad_outputs=[self.grad_out], + check_cinn=False, ) def test_check_grad_ingore_x(self): @@ -613,6 +651,7 @@ class TestComplexMatMulOp(OpTest): no_grad_set=set("X"), user_defined_grads=[self.grad_y], user_defined_grad_outputs=[self.grad_out], + check_cinn=False, ) def test_check_grad_ingore_y(self): @@ -622,6 +661,7 @@ class TestComplexMatMulOp(OpTest): no_grad_set=set('Y'), user_defined_grads=[self.grad_x], user_defined_grad_outputs=[self.grad_out], + check_cinn=False, ) @@ -662,7 +702,7 @@ class TestComplexMatMulOpBroadcast(OpTest): ) def test_check_output(self): - self.check_output() + self.check_output(check_cinn=False) def test_check_grad_normal(self): self.check_grad( @@ -670,6 +710,7 @@ class TestComplexMatMulOpBroadcast(OpTest): 'Out', user_defined_grads=[self.grad_x, self.grad_y], user_defined_grad_outputs=[self.grad_out], + check_cinn=False, ) def test_check_grad_ingore_x(self): @@ -679,6 +720,7 @@ class TestComplexMatMulOpBroadcast(OpTest): no_grad_set=set("X"), user_defined_grads=[self.grad_y], user_defined_grad_outputs=[self.grad_out], + check_cinn=False, ) def test_check_grad_ingore_y(self): @@ -688,6 +730,7 @@ class TestComplexMatMulOpBroadcast(OpTest): no_grad_set=set('Y'), user_defined_grads=[self.grad_x], user_defined_grad_outputs=[self.grad_out], + check_cinn=False, ) diff --git a/python/paddle/fluid/tests/unittests/test_norm_op.py b/python/paddle/fluid/tests/unittests/test_norm_op.py index f87d5250f1cf80037f7493f7085b87f37282c979..3144ec189ed4e16fe819a71deb3956641cfcbc87 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_norm_op.py @@ -48,10 +48,10 @@ class TestNormOp(OpTest): self.python_out_sig = ['Out'] def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) def test_check_grad(self): - self.check_grad(['X'], 'Out') + self.check_grad(['X'], 'Out', check_cinn=True) def init_test_case(self): self.shape = [2, 3, 4, 5] @@ -109,7 +109,7 @@ class TestNormOp6(TestNormOp): self.dtype = "float32" def test_check_grad(self): - self.check_grad(['X'], 'Out', max_relative_error=0.008) + self.check_grad(['X'], 'Out', max_relative_error=0.008, check_cinn=True) @unittest.skipIf( @@ -120,11 +120,17 @@ class TestNormOp7(TestNormOp): self.dtype = "float16" def test_check_output(self): - self.check_output_with_place(fluid.core.CUDAPlace(0), atol=5e-2) + self.check_output_with_place( + fluid.core.CUDAPlace(0), atol=5e-2, check_cinn=True + ) def test_check_grad(self): self.check_grad_with_place( - fluid.core.CUDAPlace(0), ['X'], 'Out', max_relative_error=0.05 + fluid.core.CUDAPlace(0), + ['X'], + 'Out', + max_relative_error=0.05, + check_cinn=True, ) @@ -147,7 +153,7 @@ class TestNormTestOp(OpTest): def test_check_output(self): # dynamic graph just supports float tensor - self.check_output(check_dygraph=True) + self.check_output(check_dygraph=True, check_cinn=True) def test_check_grad(self): pass @@ -176,11 +182,17 @@ class TestNormBF16Op(OpTest): self.python_out_sig = ['Out'] def test_check_output(self): - self.check_output_with_place(core.CUDAPlace(0), atol=1e-1) + self.check_output_with_place( + core.CUDAPlace(0), atol=1e-1, check_cinn=True + ) def test_check_grad(self): self.check_grad_with_place( - core.CUDAPlace(0), ['X'], 'Out', max_relative_error=1e-2 + core.CUDAPlace(0), + ['X'], + 'Out', + max_relative_error=1e-2, + check_cinn=True, ) def init_test_case(self): diff --git a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py index 173dae20ac690c2e552cdb90192d0fc18333d13d..a49060e536de8a1f777efd72fb9d868fe8becb20 100644 --- a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py @@ -49,7 +49,7 @@ class TestOneHotOp(OpTest): self.outputs = {'Out': (out, x_lod)} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) class TestOneHotOp_attr(OpTest): @@ -57,6 +57,7 @@ class TestOneHotOp_attr(OpTest): self.op_type = 'one_hot_v2' self.python_api = one_hot_wrapper depth = 10 + depth_np = np.array(10).astype('int32') dimension = 12 x_lod = [[4, 1, 3, 3]] x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] @@ -69,12 +70,12 @@ class TestOneHotOp_attr(OpTest): for i in range(np.product(x.shape)): out[i, 0, x[i]] = 1.0 - self.inputs = {'X': (x, x_lod)} + self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np} self.attrs = {'dtype': int(core.VarDesc.VarType.FP32), 'depth': depth} self.outputs = {'Out': (out, x_lod)} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) class TestOneHotOp_default_dtype(OpTest): @@ -98,7 +99,7 @@ class TestOneHotOp_default_dtype(OpTest): self.outputs = {'Out': (out, x_lod)} def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) class TestOneHotOp_default_dtype_attr(OpTest): @@ -106,6 +107,7 @@ class TestOneHotOp_default_dtype_attr(OpTest): self.op_type = 'one_hot_v2' self.python_api = one_hot_wrapper depth = 10 + depth_np = np.array(depth).astype('int32') dimension = 12 x_lod = [[4, 1, 3, 3]] x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] diff --git a/python/paddle/fluid/tests/unittests/test_selu_op.py b/python/paddle/fluid/tests/unittests/test_selu_op.py index 845cf97d7a2c802f56c2d5fe8ba69c5afc08cfbe..acc9214220ef37af3ebb7dafec7ab3972f6844c6 100644 --- a/python/paddle/fluid/tests/unittests/test_selu_op.py +++ b/python/paddle/fluid/tests/unittests/test_selu_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from eager_op_test import OpTest +from eager_op_test import OpTest, convert_float_to_uint16 import paddle import paddle.nn.functional as F @@ -43,14 +43,16 @@ class SeluTest(OpTest): self.op_type = "selu" self.python_api = paddle.nn.functional.selu self.x_shape = [3, 5, 5, 10] - self.dtype = np.float64 self.init_x_shape() self.init_dtype() alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 - x = np.random.normal(size=self.x_shape).astype(self.dtype) + if self.dtype == np.uint16: + x = np.random.normal(size=self.x_shape).astype(np.float32) + else: + x = np.random.normal(size=self.x_shape).astype(self.dtype) # Since zero point in selu is not differentiable, avoid randomize # zero. @@ -58,8 +60,12 @@ class SeluTest(OpTest): out = ref_selu(x, scale, alpha) - self.inputs = {'X': x} - self.outputs = {'Out': out} + if self.dtype == np.uint16: + self.inputs = {'X': convert_float_to_uint16(x)} + self.outputs = {'Out': convert_float_to_uint16(out)} + else: + self.inputs = {'X': x} + self.outputs = {'Out': out} self.attrs = { 'alpha': alpha, @@ -70,7 +76,7 @@ class SeluTest(OpTest): pass def init_dtype(self): - pass + self.dtype = np.float64 def test_check_output(self): self.check_output() @@ -79,6 +85,27 @@ class SeluTest(OpTest): self.check_grad(['X'], 'Out') +class SeluTestFP16OP(SeluTest): + def init_dtype(self): + self.dtype = np.float16 + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA and do not support bfloat16", +) +class SeluTestBF16OP(SeluTest): + def init_dtype(self): + self.dtype = np.uint16 + + def test_check_output(self): + self.check_output_with_place(core.CUDAPlace(0)) + + def test_check_grad(self): + self.check_grad_with_place(core.CUDAPlace(0), ['X'], 'Out') + + class TestSeluAPI(unittest.TestCase): # test paddle.nn.SELU, paddle.nn.functional.selu def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_shape_op.py b/python/paddle/fluid/tests/unittests/test_shape_op.py index 3609370e73cf852df8d107d87f8be70cc1b3073c..d9dade1cf99ce700ea8850e0b6a51768e6a6d36a 100644 --- a/python/paddle/fluid/tests/unittests/test_shape_op.py +++ b/python/paddle/fluid/tests/unittests/test_shape_op.py @@ -36,7 +36,7 @@ class TestShapeOp(OpTest): self.dtype = np.float32 def test_check_output(self): - self.check_output() + self.check_output(check_cinn=True) class case1(TestShapeOp): @@ -125,7 +125,7 @@ class TestShapeOpBf16(OpTest): def test_check_output(self): place = core.CUDAPlace(0) - self.check_output_with_place(place) + self.check_output_with_place(place, check_cinn=True) class case1Bf16(TestShapeOpBf16): diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py index 99406f4599c837459119f1ea4d47d4c42af4f108..49e42b5434242ba3f24eee4ae61f374f88c4a2c1 100644 --- a/python/paddle/fluid/tests/unittests/test_sum_op.py +++ b/python/paddle/fluid/tests/unittests/test_sum_op.py @@ -62,10 +62,10 @@ class TestSumOp(OpTest): self.dtype = np.float64 def test_check_output(self): - self.check_output(check_prim=True) + self.check_output(check_prim=True, check_cinn=True) def test_check_grad(self): - self.check_grad(['x0'], 'Out', check_prim=True) + self.check_grad(['x0'], 'Out', check_prim=True, check_cinn=True) class TestSelectedRowsSumOp(unittest.TestCase): @@ -299,14 +299,14 @@ class TestFP16SumOp(TestSumOp): def test_check_output(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_output_with_place(place) + self.check_output_with_place(place, check_cinn=True) # FIXME: Because of the precision fp16, max_relative_error # should be 0.15 here. def test_check_grad(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad(['x0'], 'Out') + self.check_grad(['x0'], 'Out', check_cinn=True) def create_test_sum_fp16_class(parent): diff --git a/test/ir/inference/inference_pass_test.py b/test/ir/inference/inference_pass_test.py index 4ac3c861213303e70823c3e338a499afac5bd921..cee27eca7d28a08498c47b6f76e984e165bf98aa 100644 --- a/test/ir/inference/inference_pass_test.py +++ b/test/ir/inference/inference_pass_test.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import random +import tempfile import unittest import numpy as np @@ -41,7 +43,10 @@ class InferencePassTest(unittest.TestCase): self.dynamic_shape_params = None self.enable_lite = False self.lite_parameters = None - self.path = "./inference_pass/" + self.__class__.__name__ + "/" + self.temp_dir = tempfile.TemporaryDirectory() + self.path = os.path.join( + self.temp_dir.name, 'inference_pass', self.__class__.__name__ + ) np.random.seed(1) random.seed(1) diff --git a/test/ir/inference/test_trt_activation_pass.py b/test/ir/inference/test_trt_activation_pass.py index 858a307dd629e881d767d98330f93f2ed14fc3a1..cf63d203224503f4d71f41d8fcec3bc7ff9f3668 100644 --- a/test/ir/inference/test_trt_activation_pass.py +++ b/test/ir/inference/test_trt_activation_pass.py @@ -53,8 +53,9 @@ class TensorRTSubgraphPassActivationTest(InferencePassTest): def test_check_output(self): if core.is_compiled_with_cuda(): use_gpu = True - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if ( self.trt_parameters.precision == AnalysisConfig.Precision.Float32 diff --git a/test/ir/inference/test_trt_elementwise_op.py b/test/ir/inference/test_trt_elementwise_op.py index 0f9caee745b9a6af6814521fb9d43e722351c40f..7f4a34db52fbb33941e9c71490aa2559481349a1 100644 --- a/test/ir/inference/test_trt_elementwise_op.py +++ b/test/ir/inference/test_trt_elementwise_op.py @@ -53,8 +53,9 @@ class TensorRTSubgraphPassElementwiseBroadcastTest(InferencePassTest): return paddle.tensor.math.add(x=data1, y=data2) def test_check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if core.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu) diff --git a/test/ir/inference/test_trt_instance_norm_op.py b/test/ir/inference/test_trt_instance_norm_op.py index 532c9e8c08a112d2ec0819a4ea3a7c96acbd1fef..fdf3523e880cec53c50ed3c336574d87d32fbadb 100644 --- a/test/ir/inference/test_trt_instance_norm_op.py +++ b/test/ir/inference/test_trt_instance_norm_op.py @@ -55,8 +55,9 @@ class TRTInstanceNormTest(InferencePassTest): self.fetch_list = [out] def check_output(self, remove_cache=False): - if remove_cache and os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if remove_cache and os.path.exists(opt_path): + shutil.rmtree(opt_path) if core.is_compiled_with_cuda(): use_gpu = True atol = 1e-5 diff --git a/test/ir/inference/test_trt_pool3d_op.py b/test/ir/inference/test_trt_pool3d_op.py index 886f46dbfd76e62dc4b10c92d99bc727b29d58e3..f64ff97e4e8db1426235d8ee33ddbc121452ecf9 100644 --- a/test/ir/inference/test_trt_pool3d_op.py +++ b/test/ir/inference/test_trt_pool3d_op.py @@ -84,8 +84,9 @@ class TensorRTPool3dTest(InferencePassTest): self.fetch_list = [pool_out] def check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if core.is_compiled_with_cuda(): use_gpu = True if self.precision == AnalysisConfig.Precision.Float32: @@ -200,8 +201,9 @@ class TensorRTAdaptiveAvgPool3DTest(InferencePassTest): self.fetch_list = [pool_out] def check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if core.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu) @@ -300,8 +302,9 @@ class TensorRTAdaptiveMaxPool3DTest(InferencePassTest): self.fetch_list = [pool_out] def check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if core.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu) diff --git a/test/ir/inference/test_trt_pool_op.py b/test/ir/inference/test_trt_pool_op.py index 0885ff6acc319af1ba91b20b5144f5413b8ffe50..8826a3f06cd01e7e44e36491c639f78da3820af1 100644 --- a/test/ir/inference/test_trt_pool_op.py +++ b/test/ir/inference/test_trt_pool_op.py @@ -86,8 +86,9 @@ class TensorRTPoolTest(InferencePassTest): self.fetch_list = [out] def check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if core.is_compiled_with_cuda(): use_gpu = True if self.precision == AnalysisConfig.Precision.Float32: diff --git a/test/ir/inference/test_trt_skip_layernorm_fuse_pass.py b/test/ir/inference/test_trt_skip_layernorm_fuse_pass.py index a91809584f63c5fb94197113d44a3e506216b953..1bf140a365aaeac004966bd5f8341958c3fc47f5 100644 --- a/test/ir/inference/test_trt_skip_layernorm_fuse_pass.py +++ b/test/ir/inference/test_trt_skip_layernorm_fuse_pass.py @@ -60,8 +60,9 @@ class SkipLayernormFusePassTest0(InferencePassTest): return paddle.add(data1, data2) def test_check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if core.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu, atol=0.01, rtol=0.00001) @@ -107,8 +108,9 @@ class SkipLayernormFusePassTest1(InferencePassTest): return paddle.add(data1, data2) def test_check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if core.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu, atol=0.01, rtol=0.00001) @@ -154,8 +156,9 @@ class SkipLayernormFusePassTest2(InferencePassTest): return paddle.add(data1, data2) def test_check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if core.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu, atol=0.1, rtol=0.00001) @@ -201,8 +204,9 @@ class SkipLayernormFusePassTest3(InferencePassTest): return paddle.add(data1, data2) def test_check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if core.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu, atol=0.1, rtol=0.00001) diff --git a/test/ir/inference/test_trt_subgraph_pass.py b/test/ir/inference/test_trt_subgraph_pass.py index bc102b60b30ef5c0c6127c9f2f9f76f982e6ecd9..4031a882758b99809843bfeed9d5e4034ee3655e 100644 --- a/test/ir/inference/test_trt_subgraph_pass.py +++ b/test/ir/inference/test_trt_subgraph_pass.py @@ -128,8 +128,9 @@ class TensorRTSubgraphPassSplitSerializeTest(InferencePassTest): def test_check_output(self): if paddle.is_compiled_with_cuda(): use_gpu = True - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) self.check_output_with_option(use_gpu) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass') @@ -164,8 +165,9 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest): def test_check_output(self): if paddle.is_compiled_with_cuda(): use_gpu = True - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) self.check_output_with_option(use_gpu, 1e-3) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass') @@ -313,8 +315,9 @@ class TensorRTSubgraphPassLayerNormDynamicTest(InferencePassTest): self.serialize = True def test_check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if paddle.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu) @@ -332,8 +335,9 @@ class TensorRTSubgraphPassLayerNormDynamicFP16Test( self.serialize = True def test_check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if paddle.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu, atol=0.01, rtol=0.01) @@ -406,8 +410,9 @@ class TensorRTSubgraphPassElementwiseSerializeTest( ) def test_check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) super().test_check_output() @@ -444,8 +449,9 @@ class TensorRTSubgraphPassElementwiseBroadcastDynamicTest(InferencePassTest): return paddle.add(x=data1, y=data2) def test_check_output(self): - if os.path.exists(self.path + "_opt_cache"): - shutil.rmtree(self.path + "_opt_cache") + opt_path = os.path.join(self.path, '_opt_cache') + if os.path.exists(opt_path): + shutil.rmtree(opt_path) if paddle.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu) diff --git a/test/xpu/test_nll_loss_op_xpu.py b/test/xpu/test_nll_loss_op_xpu.py new file mode 100644 index 0000000000000000000000000000000000000000..71ce382933449c8af9624c04c1658be0ff8e2dcd --- /dev/null +++ b/test/xpu/test_nll_loss_op_xpu.py @@ -0,0 +1,288 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from get_test_cover_info import ( + XPUOpTestWrapper, + create_test_class, + get_xpu_op_support_types, +) +from op_test_xpu import XPUOpTest + +import paddle + +paddle.enable_static() + + +def nll_loss_1d( + logs, dtype, targets, weight=None, reduction='mean', ignore_index=-100 +): + input_shape = logs.shape + N = input_shape[0] + C = input_shape[1] + out = np.zeros_like(targets).astype(dtype) + total_weight = 0 + for i in range(N): + cur_target = targets[i] + if cur_target == ignore_index: + out[i] = 0 + continue + cur_weight = weight[cur_target] if weight is not None else 1 + total_weight += cur_weight + out[i] = -logs[i][cur_target] * cur_weight + if reduction == 'sum': + out = np.sum(out) + total_weight = np.array([total_weight]).astype(dtype) + return {'Out': out, 'Total_weight': total_weight} + elif reduction == 'mean': + out = np.sum(out) + if total_weight != 0: + out /= total_weight + total_weight = np.array([total_weight]).astype(dtype) + return {'Out': out, 'Total_weight': total_weight} + elif reduction == 'none': + total_weight = np.array([0]).astype(dtype) + return {'Out': out, 'Total_weight': total_weight} + + +def nll_loss_2d( + logs, dtype, targets, weight=None, reduction='mean', ignore_index=-100 +): + input_shape = logs.shape + N = input_shape[0] + H = input_shape[2] + W = input_shape[3] + out = np.zeros_like(targets).astype(dtype) + total_weight = 0 + for i in range(N): + for h in range(H): + for w in range(W): + cur_target = targets[i][h][w] + if cur_target == ignore_index: + out[i][h][w] = 0 + continue + cur_weight = weight[cur_target] if weight is not None else 1 + total_weight += cur_weight + out[i][h][w] = -logs[i][cur_target][h][w] * cur_weight + if reduction == 'sum': + out = np.sum(out) + total_weight = np.array([total_weight]).astype(dtype) + return {'Out': out, 'Total_weight': total_weight} + elif reduction == 'mean': + out = np.sum(out) + if total_weight != 0: + out /= total_weight + total_weight = np.array([total_weight]).astype(dtype) + return {'Out': out, 'Total_weight': total_weight} + elif reduction == 'none': + total_weight = np.array([0]).astype(dtype) + return {'Out': out, 'Total_weight': total_weight} + + +class XPUTestNLLLossOP(XPUOpTestWrapper): + def __init__(self): + self.op_name = 'nll_loss' + self.use_dynamic_create_class = False + + class TestNLLLossOpBase1D(XPUOpTest): + op_type = 'nll_loss' + + def setUp(self): + self.dtype = self.in_type + self.place = paddle.XPUPlace(0) + self.set_attrs() + self.set_inputs() + self.inputs = { + 'X': self.x, + 'Label': self.label, + } + if self.weight is not None: + self.inputs['Weight'] = self.weight + self.outputs = nll_loss_1d( + self.x, + self.dtype, + self.label, + self.weight, + self.attrs['reduction'], + ) + + def set_attrs(self): + self.attrs = {'reduction': 'none'} + + def set_inputs(self): + self.class_num = 3 + x_shape = [5, self.class_num] + label_shape = [5] + self.x = np.random.random(x_shape).astype(self.dtype) + self.label = np.random.randint( + low=0, high=self.class_num, size=label_shape + ).astype(np.int64) + self.weight = np.random.random(self.class_num).astype(self.dtype) + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + self.check_grad_with_place(self.place, ['X'], 'Out') + + class TestNLLLossOpWithWeightMean1D(TestNLLLossOpBase1D): + def set_attrs(self): + self.attrs = {'reduction': 'mean'} + + class TestNLLLossOpWithWeightSum1D(TestNLLLossOpBase1D): + def set_attrs(self): + self.attrs = {'reduction': 'sum'} + + class TestNLLLossOpWithoutWeightNone1D(TestNLLLossOpBase1D): + def set_inputs(self): + self.class_num = 3 + x_shape = [5, self.class_num] + label_shape = [5] + self.x = np.random.random(x_shape).astype(self.dtype) + self.label = np.random.randint( + low=0, high=self.class_num, size=label_shape + ).astype(np.int64) + self.weight = None + + def set_attrs(self): + self.attrs = {'reduction': 'none'} + + class TestNLLLossOpWithoutWeightMean1D(TestNLLLossOpBase1D): + def set_inputs(self): + self.class_num = 3 + x_shape = [5, self.class_num] + label_shape = [5] + self.x = np.random.random(x_shape).astype(self.dtype) + self.label = np.random.randint( + low=0, high=self.class_num, size=label_shape + ).astype(np.int64) + self.weight = None + + def set_attrs(self): + self.attrs = {'reduction': 'mean'} + + class TestNLLLossOpWithoutWeightSum1D(TestNLLLossOpBase1D): + def set_inputs(self): + self.class_num = 3 + x_shape = [5, self.class_num] + label_shape = [5] + self.x = np.random.random(x_shape).astype(self.dtype) + self.label = np.random.randint( + low=0, high=self.class_num, size=label_shape + ).astype(np.int64) + self.weight = None + + def set_attrs(self): + self.attrs = {'reduction': 'sum'} + + class TestNLLLossOpBase2D(XPUOpTest): + op_type = 'nll_loss' + + def setUp(self): + self.dtype = self.in_type + self.place = paddle.XPUPlace(0) + self.set_attrs() + self.set_inputs() + self.inputs = {'X': self.x, 'Label': self.label} + if self.weight is not None: + self.inputs['Weight'] = self.weight + self.outputs = nll_loss_2d( + self.x, + self.dtype, + self.label, + self.weight, + self.attrs['reduction'], + ) + + def set_attrs(self): + self.attrs = {'reduction': 'none'} + + def set_inputs(self): + self.class_num = 3 + x_shape = [5, self.class_num, 7, 11] + label_shape = [5, 7, 11] + self.x = np.random.random(x_shape).astype(self.dtype) + self.label = np.random.randint( + low=0, high=self.class_num, size=label_shape + ).astype(np.int64) + self.weight = np.random.random(self.class_num).astype(self.dtype) + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + self.check_grad_with_place(self.place, ['X'], 'Out') + + class TestNLLLossOpWithWeightMean2D(TestNLLLossOpBase2D): + def set_attrs(self): + self.attrs = {'reduction': 'mean'} + + class TestNLLLossOpWithWeightSum2D(TestNLLLossOpBase2D): + def set_attrs(self): + self.attrs = {'reduction': 'sum'} + + class TestNLLLossOpWithoutWeightNone2D(TestNLLLossOpBase2D): + def set_inputs(self): + self.dtype = self.in_type + self.class_num = 3 + x_shape = [5, self.class_num, 7, 11] + label_shape = [5, 7, 11] + self.x = np.random.random(x_shape).astype(self.dtype) + self.label = np.random.randint( + low=0, high=self.class_num, size=label_shape + ).astype(np.int64) + self.weight = None + + def set_attrs(self): + self.attrs = {'reduction': 'none'} + + class TestNLLLossOpWithoutWeightMean2D(TestNLLLossOpBase2D): + def set_inputs(self): + self.dtype = self.in_type + self.class_num = 3 + x_shape = [5, self.class_num, 7, 11] + label_shape = [5, 7, 11] + self.x = np.random.random(x_shape).astype(self.dtype) + self.label = np.random.randint( + low=0, high=self.class_num, size=label_shape + ).astype(np.int64) + self.weight = None + + def set_attrs(self): + self.attrs = {'reduction': 'mean'} + + class TestNLLLossOpWithoutWeightSum2D(TestNLLLossOpBase2D): + def set_inputs(self): + self.dtype = self.in_type + self.class_num = 3 + x_shape = [5, self.class_num, 7, 11] + label_shape = [5, 7, 11] + self.x = np.random.random(x_shape).astype(self.dtype) + self.label = np.random.randint( + low=0, high=self.class_num, size=label_shape + ).astype(np.int64) + self.weight = None + + def set_attrs(self): + self.attrs = {'reduction': 'sum'} + + +support_types = get_xpu_op_support_types('nll_loss') +for stype in support_types: + create_test_class(globals(), XPUTestNLLLossOP, stype) + +if __name__ == '__main__': + unittest.main() diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index 97a5ee146df7008ce16b03ea30f02e117cb33892..80be47871ef4820f0eb4ae0f9f2c3f74175f3dd3 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -344,7 +344,7 @@ fi OUTPUT_LOG=`echo "$ALL_ADDED_LINES" | grep -Ew "print|printf|fprintf|std::cout" || true` if [ "$OUTPUT_LOG" != "" ];then echo_line="print or std::cout is not recommended for direct use, please use loggin or glog. If it is necessary to use, please contact tianshuo78520a (Recommend) or zhangbo9674 review and approve.\n" - check_approval 1 tianshuo7852a zhangbo9674 + check_approval 1 tianshuo78520a zhangbo9674 fi HAS_MODIFIED_PHI_FILES=`git diff --name-only upstream/$BRANCH | grep "paddle/phi/" || true`