提交 3631f064 编写于 作者: L liangjianzhong

Merge remote-tracking branch 'zyc/develop' into semi-auto/rule-base

...@@ -30,10 +30,6 @@ ...@@ -30,10 +30,6 @@
path = third_party/xxhash path = third_party/xxhash
url = https://github.com/Cyan4973/xxHash.git url = https://github.com/Cyan4973/xxHash.git
ignore = dirty ignore = dirty
[submodule "third_party/eigen3"]
path = third_party/eigen3
url = https://gitlab.com/libeigen/eigen.git
ignore = dirty
[submodule "third_party/leveldb"] [submodule "third_party/leveldb"]
path = third_party/leveldb path = third_party/leveldb
url = https://github.com/google/leveldb url = https://github.com/google/leveldb
...@@ -50,3 +46,7 @@ ...@@ -50,3 +46,7 @@
path = third_party/glog path = third_party/glog
url = https://github.com/google/glog.git url = https://github.com/google/glog.git
ignore = dirty ignore = dirty
[submodule "third_party/eigen3"]
path = third_party/eigen3
url = https://gitlab.com/libeigen/eigen.git
ignore = dirty
...@@ -296,6 +296,8 @@ function(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -296,6 +296,8 @@ function(build_protobuf TARGET_NAME BUILD_FOR_HOST)
PREFIX ${PROTOBUF_PREFIX_DIR} PREFIX ${PROTOBUF_PREFIX_DIR}
SOURCE_DIR ${SOURCE_DIR} SOURCE_DIR ${SOURCE_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
PATCH_COMMAND
COMMAND cd ${SOURCE_DIR} && git checkout ${PROTOBUF_TAG}
DEPENDS zlib DEPENDS zlib
CONFIGURE_COMMAND CONFIGURE_COMMAND
${CMAKE_COMMAND} ${SOURCE_DIR}/cmake ${OPTIONAL_ARGS} ${CMAKE_COMMAND} ${SOURCE_DIR}/cmake ${OPTIONAL_ARGS}
......
...@@ -8,7 +8,7 @@ set(XPU_API_LIB_NAME "libxpuapi.so") ...@@ -8,7 +8,7 @@ set(XPU_API_LIB_NAME "libxpuapi.so")
set(XPU_RT_LIB_NAME "libxpurt.so") set(XPU_RT_LIB_NAME "libxpurt.so")
set(XPU_XFT_LIB_NAME "libxft.so") set(XPU_XFT_LIB_NAME "libxft.so")
set(XPU_BASE_DATE "20230519") set(XPU_BASE_DATE "20230523")
set(XPU_XCCL_BASE_VERSION "1.0.49.2") set(XPU_XCCL_BASE_VERSION "1.0.49.2")
set(XPU_XFT_BASE_VERSION "latest") set(XPU_XFT_BASE_VERSION "latest")
......
...@@ -6,6 +6,3 @@ cc_library( ...@@ -6,6 +6,3 @@ cc_library(
add_subdirectory(test) add_subdirectory(test)
add_subdirectory(spmd_rules) add_subdirectory(spmd_rules)
cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper
dist_tensor_spec)
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h" #include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h"
#include "paddle/fluid/distributed/auto_parallel/process_mesh.h" #include "paddle/phi/core/distributed/auto_parallel/utils.h"
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
...@@ -27,28 +27,41 @@ DistTensorSpec::DistTensorSpec(const std::vector<int64_t>& shape, ...@@ -27,28 +27,41 @@ DistTensorSpec::DistTensorSpec(const std::vector<int64_t>& shape,
dist_attr_.copy_from(dist_attr); dist_attr_.copy_from(dist_attr);
} }
DistTensorSpec::DistTensorSpec(const DistTensorSpec& spec) {
std::vector<int64_t> spec_shape = spec.get_shape();
shape_.assign(spec_shape.begin(), spec_shape.end());
dist_attr_.copy_from(spec.get_dist_attr());
}
DistTensorSpec::~DistTensorSpec() {} DistTensorSpec::~DistTensorSpec() {}
DistTensorSpec::DistTensorSpec(const Tensor& tensor) { DistTensorSpec::DistTensorSpec(const Tensor& tensor) {
shape_ = tensor.shape(); shape_ = tensor.shape();
std::vector<int64_t> pm_shape, pm_ids; // std::vector<int64_t> pm_shape, pm_ids;
pm_shape = {4}; // pm_shape = {4};
pm_ids = {0, 1, 2, 3}; // pm_ids = {0, 1, 2, 3};
std::vector<std::string> dim_name = {"mp"}; // std::vector<std::string> dim_name = {"mp"};
ProcessMesh pm(pm_shape, pm_ids, dim_name); // ProcessMesh pm(pm_shape, pm_ids, dim_name);
std::vector<int64_t> dims_mapping = {-1, 0}; // std::vector<int64_t> dims_mapping = {-1, 0};
TensorDistAttr dist_attr; // TensorDistAttr dist_attr;
dist_attr.set_process_mesh(pm); // dist_attr.set_process_mesh(pm);
dist_attr.set_dims_mapping(dims_mapping); // dist_attr.set_dims_mapping(dims_mapping);
dist_attr_.copy_from(dist_attr); // dist_attr_.copy_from(dist_attr);
std::cout << dist_attr_; // std::cout << dist_attr_;
} }
const std::vector<int64_t>& DistTensorSpec::get_dims_mapping() { DistTensorSpec& DistTensorSpec::operator=(const DistTensorSpec& spec) {
std::vector<int64_t> spec_shape = spec.get_shape();
shape_ = spec_shape;
dist_attr_.copy_from(spec.get_dist_attr());
return *this;
}
const std::vector<int64_t>& DistTensorSpec::get_dims_mapping() const {
return dist_attr_.dims_mapping(); return dist_attr_.dims_mapping();
} }
...@@ -57,7 +70,7 @@ void DistTensorSpec::set_dims_mapping( ...@@ -57,7 +70,7 @@ void DistTensorSpec::set_dims_mapping(
dist_attr_.set_dims_mapping(dims_mapping); dist_attr_.set_dims_mapping(dims_mapping);
} }
const ProcessMesh& DistTensorSpec::get_process_mesh() { const ProcessMesh& DistTensorSpec::get_process_mesh() const {
return dist_attr_.process_mesh(); return dist_attr_.process_mesh();
} }
...@@ -65,7 +78,22 @@ void DistTensorSpec::set_process_mesh(const ProcessMesh& process_mesh) { ...@@ -65,7 +78,22 @@ void DistTensorSpec::set_process_mesh(const ProcessMesh& process_mesh) {
dist_attr_.set_process_mesh(process_mesh); dist_attr_.set_process_mesh(process_mesh);
} }
const std::vector<int64_t>& DistTensorSpec::get_shape() { return shape_; } const std::vector<int64_t>& DistTensorSpec::get_shape() const { return shape_; }
const TensorDistAttr& DistTensorSpec::get_dist_attr() const {
return dist_attr_;
}
void DistTensorSpec::set_dist_attr(const TensorDistAttr& dist_attr) {
dist_attr_ = dist_attr;
}
std::string DistTensorSpec::to_string() const {
using phi::distributed::auto_parallel::str_join;
std::string spec_str = "{tensor_shape:[" + str_join(shape_) + "], ";
spec_str += "dist_attr:" + dist_attr_.to_string() + "}";
return spec_str;
}
} // namespace auto_parallel } // namespace auto_parallel
} // namespace distributed } // namespace distributed
......
...@@ -14,39 +14,55 @@ limitations under the License. */ ...@@ -14,39 +14,55 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/distributed/auto_parallel/dist_attr.h"
#include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
namespace auto_parallel { namespace auto_parallel {
using phi::distributed::auto_parallel::ProcessMesh;
using phi::distributed::auto_parallel::TensorDistAttr;
/** /**
* A unified data class for inferring distributed attributes * A unified data class for inferring distributed attributes
* in both dygraph mode and static mode * in both dygraph mode and static mode
*/ */
class DistTensorSpec { class DistTensorSpec {
public: public:
DistTensorSpec() = default;
DistTensorSpec(const std::vector<int64_t>& shape, DistTensorSpec(const std::vector<int64_t>& shape,
const TensorDistAttr& dist_attr); const TensorDistAttr& dist_attr);
DistTensorSpec(const DistTensorSpec& spec);
// temp function, only for test in dygraph mode
explicit DistTensorSpec(const Tensor& tensor); explicit DistTensorSpec(const Tensor& tensor);
~DistTensorSpec(); ~DistTensorSpec();
DistTensorSpec& operator=(const DistTensorSpec& spec);
// get dims_mapping from dist_attr_ // get dims_mapping from dist_attr_
const std::vector<int64_t>& get_dims_mapping(); const std::vector<int64_t>& get_dims_mapping() const;
// set dims_mapping in dist_attr_ // set dims_mapping in dist_attr_
void set_dims_mapping(const std::vector<int64_t>& dims_mapping); void set_dims_mapping(const std::vector<int64_t>& dims_mapping);
// get process_mesh from dist_attr_ // get process_mesh from dist_attr_
const ProcessMesh& get_process_mesh(); const ProcessMesh& get_process_mesh() const;
// set process_mesh in dist_attr_ // set process_mesh in dist_attr_
void set_process_mesh(const ProcessMesh& process_mesh); void set_process_mesh(const ProcessMesh& process_mesh);
const std::vector<int64_t>& get_shape(); const TensorDistAttr& get_dist_attr() const;
void set_dist_attr(const TensorDistAttr& dist_attr);
const std::vector<int64_t>& get_shape() const;
std::string to_string() const;
private: private:
std::vector<int64_t> shape_; std::vector<int64_t> shape_;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <functional> #include <functional>
#include <vector> #include <vector>
#include "cinn/common/target.h"
#include "cinn/hlir/framework/graph_compiler.h" #include "cinn/hlir/framework/graph_compiler.h"
#include "cinn/runtime/cinn_runtime.h" #include "cinn/runtime/cinn_runtime.h"
#include "cinn/runtime/flags.h" #include "cinn/runtime/flags.h"
...@@ -94,6 +95,11 @@ void SetCinnRandomSeed<phi::CPUContext>() { ...@@ -94,6 +95,11 @@ void SetCinnRandomSeed<phi::CPUContext>() {
::cinn::runtime::RandomSeed::GetOrSet(seed); ::cinn::runtime::RandomSeed::GetOrSet(seed);
} }
void SetCinnTarget(const ::cinn::common::Target& target) {
VLOG(4) << "Set CINN compile target to " << target;
::cinn::runtime::CurrentTarget::SetCurrentTarget(target);
}
} // namespace details } // namespace details
class CinnLaunchOp : public framework::OperatorWithKernel { class CinnLaunchOp : public framework::OperatorWithKernel {
......
...@@ -58,6 +58,9 @@ void SetCinnRuntimeFlags(); ...@@ -58,6 +58,9 @@ void SetCinnRuntimeFlags();
template <typename DeviceContext> template <typename DeviceContext>
void SetCinnRandomSeed(); void SetCinnRandomSeed();
// set CINN compile target
void SetCinnTarget(const ::cinn::common::Target& target);
} // namespace details } // namespace details
template <typename T, typename DeviceContext> template <typename T, typename DeviceContext>
...@@ -115,6 +118,7 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> { ...@@ -115,6 +118,7 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
"Step 2. Get compilation result of the graph"); "Step 2. Get compilation result of the graph");
// Step 2. Get compilation result of the graph // Step 2. Get compilation result of the graph
auto target = details::PlaceToCinnTarget(place); auto target = details::PlaceToCinnTarget(place);
details::SetCinnTarget(target);
using ClockType = std::chrono::steady_clock; using ClockType = std::chrono::steady_clock;
std::chrono::time_point<ClockType> start_t, end_t; std::chrono::time_point<ClockType> start_t, end_t;
if (VLOG_IS_ON(1)) { if (VLOG_IS_ON(1)) {
......
...@@ -98,7 +98,7 @@ PD_REGISTER_STRUCT_KERNEL(alltoall, ...@@ -98,7 +98,7 @@ PD_REGISTER_STRUCT_KERNEL(alltoall,
ops::AllToAllOpCUDAKernel, ops::AllToAllOpCUDAKernel,
float, float,
double, double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
int, int,
......
...@@ -95,7 +95,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather, ...@@ -95,7 +95,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather,
ops::CAllGatherOpCUDAKernel, ops::CAllGatherOpCUDAKernel,
float, float,
double, double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
int, int,
......
...@@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max, ...@@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max,
ALL_LAYOUT, ALL_LAYOUT,
ops::CAllReduceMaxCUDAKernel, ops::CAllReduceMaxCUDAKernel,
float, float,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
double, double,
......
...@@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum, ...@@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum,
ALL_LAYOUT, ALL_LAYOUT,
ops::CAllReduceSumCUDAKernel, ops::CAllReduceSumCUDAKernel,
float, float,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
double, double,
......
...@@ -100,7 +100,7 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast, ...@@ -100,7 +100,7 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast,
int64_t, int64_t,
float, float,
double, double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
plat::float16) { plat::float16) {
......
...@@ -137,7 +137,7 @@ PD_REGISTER_STRUCT_KERNEL(c_concat, ...@@ -137,7 +137,7 @@ PD_REGISTER_STRUCT_KERNEL(c_concat,
double, double,
int, int,
int64_t, int64_t,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
plat::float16) { plat::float16) {
......
...@@ -239,7 +239,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding, ...@@ -239,7 +239,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding,
ops::CEmbeddingCUDAKernel, ops::CEmbeddingCUDAKernel,
float, float,
double, double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
plat::float16) { plat::float16) {
...@@ -251,7 +251,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding_grad, ...@@ -251,7 +251,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding_grad,
ops::CEmbeddingGradCUDAKernel, ops::CEmbeddingGradCUDAKernel,
float, float,
double, double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
plat::float16) { plat::float16) {
......
...@@ -25,7 +25,7 @@ PD_REGISTER_STRUCT_KERNEL(c_identity, ...@@ -25,7 +25,7 @@ PD_REGISTER_STRUCT_KERNEL(c_identity,
double, double,
int, int,
int64_t, int64_t,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
plat::float16) { plat::float16) {
......
...@@ -87,7 +87,7 @@ PD_REGISTER_STRUCT_KERNEL(c_reducescatter, ...@@ -87,7 +87,7 @@ PD_REGISTER_STRUCT_KERNEL(c_reducescatter,
ops::CReduceScatterOpCUDAKernel, ops::CReduceScatterOpCUDAKernel,
float, float,
double, double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
int, int,
......
...@@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(c_split, ...@@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(c_split,
double, double,
int, int,
int64_t, int64_t,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
plat::float16) { plat::float16) {
......
...@@ -31,7 +31,7 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum, ...@@ -31,7 +31,7 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum,
double, double,
int, int,
int64_t, int64_t,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
plat::float16) { plat::float16) {
......
...@@ -108,7 +108,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_allgather, ...@@ -108,7 +108,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_allgather,
ops::PartialAllGatherOpCUDAKernel, ops::PartialAllGatherOpCUDAKernel,
float, float,
double, double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
int, int,
......
...@@ -124,7 +124,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_recv, ...@@ -124,7 +124,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_recv,
ops::PartialRecvOpCUDAKernel, ops::PartialRecvOpCUDAKernel,
float, float,
double, double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
int, int,
......
...@@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_send, ...@@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_send,
ops::PartialSendCUDAKernel, ops::PartialSendCUDAKernel,
float, float,
double, double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
int, int,
......
...@@ -238,7 +238,7 @@ PD_REGISTER_STRUCT_KERNEL(recv_v2, ...@@ -238,7 +238,7 @@ PD_REGISTER_STRUCT_KERNEL(recv_v2,
ops::RecvOpV2CUDAKernel, ops::RecvOpV2CUDAKernel,
float, float,
double, double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
int, int,
......
...@@ -223,7 +223,7 @@ PD_REGISTER_STRUCT_KERNEL(send_v2, ...@@ -223,7 +223,7 @@ PD_REGISTER_STRUCT_KERNEL(send_v2,
ops::SendOpV2CUDAKernel, ops::SendOpV2CUDAKernel,
float, float,
double, double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16, plat::bfloat16,
#endif #endif
int, int,
......
...@@ -59,7 +59,7 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) { ...@@ -59,7 +59,7 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) {
return ncclUint8; return ncclUint8;
} else if (type == framework::proto::VarType::BOOL) { } else if (type == framework::proto::VarType::BOOL) {
return ncclUint8; return ncclUint8;
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
} else if (type == framework::proto::VarType::BF16) { } else if (type == framework::proto::VarType::BF16) {
return ncclBfloat16; return ncclBfloat16;
#endif #endif
...@@ -86,7 +86,7 @@ inline ncclDataType_t ToNCCLDataType(phi::DataType type) { ...@@ -86,7 +86,7 @@ inline ncclDataType_t ToNCCLDataType(phi::DataType type) {
return ncclInt8; return ncclInt8;
} else if (type == phi::DataType::BOOL) { } else if (type == phi::DataType::BOOL) {
return ncclUint8; return ncclUint8;
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
} else if (type == phi::DataType::BFLOAT16) { } else if (type == phi::DataType::BFLOAT16) {
return ncclBfloat16; return ncclBfloat16;
#endif #endif
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <pybind11/operators.h> #include <pybind11/operators.h>
#include <pybind11/stl.h> #include <pybind11/stl.h>
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h"
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/pybind/auto_parallel_py.h" #include "paddle/fluid/pybind/auto_parallel_py.h"
...@@ -29,6 +30,7 @@ namespace py = pybind11; ...@@ -29,6 +30,7 @@ namespace py = pybind11;
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
using paddle::distributed::auto_parallel::DistTensorSpec;
using paddle::distributed::auto_parallel::OperatorDistAttr; using paddle::distributed::auto_parallel::OperatorDistAttr;
using paddle::framework::OpDesc; using paddle::framework::OpDesc;
using paddle::framework::VarDesc; using paddle::framework::VarDesc;
...@@ -276,6 +278,25 @@ void BindAutoParallel(py::module *m) { ...@@ -276,6 +278,25 @@ void BindAutoParallel(py::module *m) {
py::arg("memo")) py::arg("memo"))
.def("__str__", &TensorDistAttr::to_string); .def("__str__", &TensorDistAttr::to_string);
py::class_<DistTensorSpec>(*m, "DistTensorSpec")
.def(py::init<>())
.def(py::init<const DistTensorSpec &>())
.def(py::init<const std::vector<int64_t> &, const TensorDistAttr &>())
.def("get_dims_mapping", &DistTensorSpec::get_dims_mapping)
.def("set_dims_mapping", &DistTensorSpec::set_dims_mapping)
.def("get_process_mesh", &DistTensorSpec::get_process_mesh)
.def("set_process_mesh", &DistTensorSpec::set_process_mesh)
.def_property_readonly("shape", &DistTensorSpec::get_shape)
.def("__str__", &DistTensorSpec::to_string)
.def("__copy__",
[](const DistTensorSpec &self) { return DistTensorSpec(self); })
.def(
"__deepcopy__",
[](const DistTensorSpec &self, py::dict) {
return DistTensorSpec(self);
},
py::arg("memo"));
py::class_<OperatorDistAttr>(*m, "OperatorDistAttr") py::class_<OperatorDistAttr>(*m, "OperatorDistAttr")
.def(py::init<>()) .def(py::init<>())
.def(py::init<const OpDesc &>()) .def(py::init<const OpDesc &>())
......
...@@ -1280,7 +1280,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d ...@@ -1280,7 +1280,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
def gen_dist_tensor_code(self): def gen_dist_tensor_code(self):
# define the DistTensorSpec vector for input and output tensors # define the DistTensorSpec vector for input and output tensors
api_code = " \nstd::vector<paddle::distributed::auto_parallel::DistTensorSpec> input_specs;\n" api_code = " \n std::vector<paddle::distributed::auto_parallel::DistTensorSpec> input_specs;\n"
# get DistTensorSpec for each input tensor # get DistTensorSpec for each input tensor
for tensor_name in self.inputs['names']: for tensor_name in self.inputs['names']:
...@@ -1297,8 +1297,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d ...@@ -1297,8 +1297,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
PADDLE_API {self.get_return_type(inplace_flag)} {api_func_name}({self.get_define_args(inplace_flag)}) {{ PADDLE_API {self.get_return_type(inplace_flag)} {api_func_name}({self.get_define_args(inplace_flag)}) {{
{self.gene_kernel_select()} {self.gene_kernel_select()}
""" """
if api_func_name == 'matmul': # if api_func_name == 'matmul':
api_code += self.gen_dist_tensor_code() # api_code += self.gen_dist_tensor_code()
if len(self.kernel['func']) > 1: if len(self.kernel['func']) > 1:
kernel_dispatch_code = '' kernel_dispatch_code = ''
......
...@@ -525,6 +525,8 @@ XPUOpMap& get_kl2_ops() { ...@@ -525,6 +525,8 @@ XPUOpMap& get_kl2_ops() {
phi::DataType::FLOAT16, phi::DataType::FLOAT16,
phi::DataType::INT64})}, phi::DataType::INT64})},
{"nearest_interp_v2_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"nearest_interp_v2_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"nll_loss", XPUKernelSet({phi::DataType::FLOAT32})},
{"nll_loss_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"not_equal", {"not_equal",
XPUKernelSet({phi::DataType::INT64, XPUKernelSet({phi::DataType::INT64,
phi::DataType::INT32, phi::DataType::INT32,
......
...@@ -20,4 +20,5 @@ cc_library( ...@@ -20,4 +20,5 @@ cc_library(
SRCS dist_mapper.cc SRCS dist_mapper.cc
DEPS device_mesh auto_parallel_proto phi_enforce) DEPS device_mesh auto_parallel_proto phi_enforce)
cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper) cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper
dist_tensor_spec)
...@@ -229,7 +229,7 @@ inline ncclDataType_t ToNCCLDataType(DataType type) { ...@@ -229,7 +229,7 @@ inline ncclDataType_t ToNCCLDataType(DataType type) {
return ncclInt8; return ncclInt8;
} else if (type == DataType::BOOL) { } else if (type == DataType::BOOL) {
return ncclUint8; return ncclUint8;
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
} else if (type == DataType::BFLOAT16) { } else if (type == DataType::BFLOAT16) {
return ncclBfloat16; return ncclBfloat16;
#endif #endif
......
...@@ -274,4 +274,5 @@ PD_REGISTER_KERNEL(selu, ...@@ -274,4 +274,5 @@ PD_REGISTER_KERNEL(selu,
phi::SeluKernel, phi::SeluKernel,
float, float,
double, double,
phi::dtype::float16,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
...@@ -24,4 +24,5 @@ PD_REGISTER_KERNEL(selu_grad, ...@@ -24,4 +24,5 @@ PD_REGISTER_KERNEL(selu_grad,
phi::SeluGradKernel, phi::SeluGradKernel,
float, float,
double, double,
phi::dtype::float16,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/nll_loss_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void NllLossGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& label,
const paddle::optional<DenseTensor>& weight,
const DenseTensor& total_weight,
const DenseTensor& d_out,
int64_t ignore_index,
const std::string& reduction,
DenseTensor* d_x) {
using XPUType = typename XPUTypeTrait<T>::Type;
const auto& label_type = label.dtype();
bool label_type_match =
label_type == phi::DataType::INT32 || label_type == phi::DataType::INT64;
PADDLE_ENFORCE_EQ(label_type_match,
true,
phi::errors::InvalidArgument(
"Input(Label) holds the wrong type, it holds %s, but "
"desires to be %s or %s",
label_type,
phi::DataType::INT32,
phi::DataType::INT64));
auto d_out_data = d_out.data<XPUType>();
auto d_x_data = dev_ctx.template Alloc<XPUType>(d_x);
auto d_x_dims = d_x->dims();
std::vector<int64_t> d_x_shape = phi::vectorize<int64_t>(d_x_dims);
auto weight_data =
weight.get_ptr() ? weight.get_ptr()->data<float>() : nullptr;
int64_t reduction_id = 0;
if (reduction == "none") {
reduction_id = 0;
} else if (reduction == "mean") {
reduction_id = 1;
} else if (reduction == "sum") {
reduction_id = 2;
}
auto total_weight_data = total_weight.data<XPUType>();
int r;
if (label_type == phi::DataType::INT32) {
const int* label_data = label.data<int>();
r = xpu::nll_loss_grad(dev_ctx.x_context(),
d_out_data,
d_x_data,
d_x_shape,
label_data,
weight_data,
reduction_id,
ignore_index,
total_weight_data);
} else if (label_type == phi::DataType::INT64) {
const int64_t* label_data = label.data<int64_t>();
r = xpu::nll_loss_grad(dev_ctx.x_context(),
d_out_data,
d_x_data,
d_x_shape,
label_data,
weight_data,
reduction_id,
ignore_index,
total_weight_data);
}
PADDLE_ENFORCE_XDNN_SUCCESS(r, "nll_loss_grad");
}
} // namespace phi
// TODO(xiongkun): add the non-raw kernel register here.
PD_REGISTER_KERNEL(
nll_loss_grad, XPU, ALL_LAYOUT, phi::NllLossGradKernel, float) {}
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/nll_loss_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void NllLossRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& label,
const paddle::optional<DenseTensor>& weight,
int64_t ignore_index,
const std::string& reduction,
DenseTensor* out,
DenseTensor* total_weight) {
using XPUType = typename XPUTypeTrait<T>::Type;
const auto& label_type = label.dtype();
bool label_type_match =
label_type == phi::DataType::INT32 || label_type == phi::DataType::INT64;
PADDLE_ENFORCE_EQ(label_type_match,
true,
phi::errors::InvalidArgument(
"Input(Label) holds the wrong type, it holds %s, but "
"desires to be %s or %s",
label_type,
phi::DataType::INT32,
phi::DataType::INT64));
auto x_data = x.data<XPUType>();
auto out_data = dev_ctx.template Alloc<XPUType>(out);
auto weight_data =
weight.get_ptr() ? weight.get_ptr()->data<XPUType>() : nullptr;
auto total_weight_data = dev_ctx.template Alloc<XPUType>(total_weight);
auto x_dims = x.dims();
std::vector<int64_t> x_shape = phi::vectorize<int64_t>(x_dims);
int64_t reduction_id = 0;
if (reduction == "none") {
reduction_id = 0;
} else if (reduction == "mean") {
reduction_id = 1;
} else if (reduction == "sum") {
reduction_id = 2;
}
int r;
if (label_type == phi::DataType::INT32) {
const int* label_data = label.data<int>();
r = xpu::nll_loss(dev_ctx.x_context(),
x_data,
out_data,
total_weight_data,
x_shape,
label_data,
weight_data,
reduction_id,
ignore_index);
} else if (label_type == phi::DataType::INT64) {
const int64_t* label_data = label.data<int64_t>();
r = xpu::nll_loss(dev_ctx.x_context(),
x_data,
out_data,
total_weight_data,
x_shape,
label_data,
weight_data,
reduction_id,
ignore_index);
}
PADDLE_ENFORCE_XDNN_SUCCESS(r, "nll_loss");
}
} // namespace phi
// TODO(xiongkun): add the non-raw kernel register here.
PD_REGISTER_KERNEL(nll_loss, XPU, ALL_LAYOUT, phi::NllLossRawKernel, float) {}
...@@ -2235,14 +2235,70 @@ set +x ...@@ -2235,14 +2235,70 @@ set +x
fi fi
done <<< "$test_cases"; done <<< "$test_cases";
card_test "$single_card_tests" 1 card_test "$single_card_tests" 1
failed_test_lists=''
collect_failed_tests collect_failed_tests
xputest_error=0
retry_unittests_record=''
retry_time=3
exec_times=0
exec_time_array=('first' 'second' 'third')
exec_retry_threshold=10
is_retry_execuate=0
if [ -n "$failed_test_lists" ];then
xputest_error=1
need_retry_ut_str=$(echo "$failed_test_lists" | grep -oEi "\-.+\(" | sed 's/(//' | sed 's/- //' )
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(" | sed 's/(//' | sed 's/- //' )
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
while ( [ $exec_times -lt $retry_time ] )
do
set +e
retry_unittests_record="$retry_unittests_record$failed_test_lists"
failed_test_lists_ult=`echo "${failed_test_lists}"`
set -e
if [[ "${exec_times}" == "1" ]];then
if [[ "${failed_test_lists}" == "" ]];then
break
else
retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(" | sed 's/(//' | sed 's/- //' )
fi
fi
echo "========================================="
echo "This is the ${exec_time_array[$exec_times]} time to re-run"
echo "========================================="
echo "The following unittest will be re-run:"
echo "${retry_unittests}"
echo "========================================="
retry_unittests_regular=''
for line in ${retry_unittests[@]} ;
do
if [[ "$retry_unittests_regular" == "" ]];then
retry_unittests_regular="^$line$"
else
retry_unittests_regular="$retry_unittests_regular|^$line$"
fi
done
rm -f $tmp_dir/*
failed_test_lists=''
ctest -R "($retry_unittests_regular)" --output-on-failure -j $2 | tee $tmpfile
collect_failed_tests
exec_times=$[$exec_times+1]
done
else
# There are more than 10 failed unit tests, so no unit test retry
is_retry_execuate=1
fi
fi
set -x set -x
ut_endTime_s=`date +%s` ut_endTime_s=`date +%s`
echo "XPU testCase Time: $[ $ut_endTime_s - $ut_startTime_s ]s" echo "XPU testCase Time: $[ $ut_endTime_s - $ut_startTime_s ]s"
python ${PADDLE_ROOT}/build/test/xpu/get_test_cover_info.py python ${PADDLE_ROOT}/build/test/xpu/get_test_cover_info.py
unset XPU_OP_LIST_DIR unset XPU_OP_LIST_DIR
if [[ "$EXIT_CODE" != "0" ]]; then if [ "$xputest_error" != 0 ];then
exit 8; show_ut_retry_result
fi fi
fi fi
} }
......
...@@ -12,5 +12,6 @@ ...@@ -12,5 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License # limitations under the License
from paddle.fluid.core import DistTensorSpec # noqa: F401
from paddle.fluid.core import OperatorDistAttr # noqa: F401 from paddle.fluid.core import OperatorDistAttr # noqa: F401
from paddle.fluid.core import TensorDistAttr # noqa: F401 from paddle.fluid.core import TensorDistAttr # noqa: F401
...@@ -105,6 +105,18 @@ def _update_dims_mapping_for_matmul(dist_op): ...@@ -105,6 +105,18 @@ def _update_dims_mapping_for_matmul(dist_op):
changed = False changed = False
op_desc = dist_op.serial_op.desc op_desc = dist_op.serial_op.desc
op_dist_attr = dist_op.dist_attr op_dist_attr = dist_op.dist_attr
# test DistTensorSpec
# input_name_list = []
# output_name_list = []
# input_name_list.append(op_desc.input('X')[0])
# input_name_list.append(op_desc.input('Y')[0])
# output_name_list.append(op_desc.output('Out')[0])
# attr_name_list = ['trans_x', 'trans_y']
# input_specs, output_specs, attrs = wrap_data_for_completion(
# dist_op, input_name_list, output_name_list, attr_name_list
# )
x_name = op_desc.input('X')[0] x_name = op_desc.input('X')[0]
y_name = op_desc.input('Y')[0] y_name = op_desc.input('Y')[0]
out_name = op_desc.output('Out')[0] out_name = op_desc.output('Out')[0]
......
...@@ -26,7 +26,7 @@ from paddle.framework import core ...@@ -26,7 +26,7 @@ from paddle.framework import core
from paddle.framework.io_utils import is_belong_to_optimizer, is_parameter from paddle.framework.io_utils import is_belong_to_optimizer, is_parameter
from paddle.static import Variable from paddle.static import Variable
from .dist_attribute import OperatorDistAttr, TensorDistAttr from .dist_attribute import DistTensorSpec, OperatorDistAttr, TensorDistAttr
from .process_group import get_all_process_groups from .process_group import get_all_process_groups
from .process_mesh import ProcessMesh from .process_mesh import ProcessMesh
...@@ -2357,50 +2357,64 @@ def is_dep_skip_op(op): ...@@ -2357,50 +2357,64 @@ def is_dep_skip_op(op):
return False return False
# def wrap_data_for_completion( def wrap_data_for_completion(
# dist_op: DistributedOperator, dist_op, input_names: list, output_names: list, attr_names: list
# input_names: list, ):
# output_names: list, """
# attr_names: list Get data used in inferring distributed attributes, including:
# ): 1. DistTensorSpec for each input and output tensor of this dist_op.
# """ 2. Operator attributes of this dist_op, e.g. transpose_x in matmul op.
# Get data used in inferring distributed attributes, including:
# 1. DistTensorSpec for each input and output tensor of this dist_op. Args:
# 2. Operator attributes of this dist_op, e.g. transpose_x in matmul op. dist_op: the DistributedOperator
# input_names: list, name of the dist_op's input tensors
# Args: output_names: list, name of the dist_op's output tensors
# dist_op: the DistributedOperator attr_names: list, attribute name of the dist_op's corresponding serial op
# input_names: list, name of the dist_op's input tensors
# output_names: list, name of the dist_op's output tensors Returns:
# attr_names: list, attribute name of the dist_op's corresponding serial op input_specs: list, DistTensorSpec for each input tensor of the dist_op
# output_specs: list, DistTensorSpec for each output tensor of the dist_op
# Returns: attrs: dict, attribute map of the dist op
# input_specs: list, DistTensorSpec for each input tensor of the dist_op
# output_specs: list, DistTensorSpec for each output tensor of the dist_op Usage:
# attrs: dict, attribute map of the dist op op_desc = dist_op.serial_op.desc
# """ input_name_list = []
# output_name_list = []
# input_specs = [] input_name_list.append(op_desc.input('X')[0]) # 'X' is the arg name for op
# output_specs = [] input_name_list.append(op_desc.input('Y')[0])
# attrs = {} output_name_list.append(op_desc.output('Out')[0])
# attr_name_list = ['trans_x', 'trans_y']
# serial_op = dist_op.serial_op input_specs, output_specs, attrs = wrap_data_for_completion(
# dist_op,
# # Construct each input tensor's DistTensorSpec with shape and dist_attr input_name_list,
# for name in input_names: output_name_list,
# tensor_dist_attr = dist_op.dist_attr.get_input_dist_attr(name) attr_name_list)
# var = serial_op.block._var_recursive(name)
# tensor_shape = var.shape """
# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr)
# input_specs.append(dist_spec) input_specs = []
# output_specs = []
# # Construct each output tensor's DistTensorSpec with shape and dist_attr attrs = {}
# for name in output_names:
# tensor_dist_attr = dist_op.dist_attr.get_output_dist_attr(name) serial_op = dist_op.serial_op
# var = serial_op.block._var_recursive(name)
# tensor_shape = var.shape # Construct each input tensor's DistTensorSpec with shape and dist_attr
# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr) for name in input_names:
# output_specs.append(dist_spec) tensor_dist_attr = dist_op.dist_attr.get_input_dist_attr(name)
# var = serial_op.block._var_recursive(name)
# for attr_name in attr_names: tensor_shape = var.shape
# attrs[attr_name] = serial_op.desc.attr(attr_name) dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr)
input_specs.append(dist_spec)
# Construct each output tensor's DistTensorSpec with shape and dist_attr
for name in output_names:
tensor_dist_attr = dist_op.dist_attr.get_output_dist_attr(name)
var = serial_op.block._var_recursive(name)
tensor_shape = var.shape
dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr)
output_specs.append(dist_spec)
for attr_name in attr_names:
attrs[attr_name] = serial_op.desc.attr(attr_name)
return input_specs, output_specs, attrs
...@@ -1116,7 +1116,15 @@ set(TEST_CINN_OPS ...@@ -1116,7 +1116,15 @@ set(TEST_CINN_OPS
test_tile_op test_tile_op
test_roll_op test_roll_op
test_sum_op test_sum_op
test_elementwise_min_op) test_elementwise_min_op
test_bitwise_op
test_compare_op
test_shape_op
test_assign_value_op
test_lookup_table_op
test_lookup_table_v2_op
test_norm_op
test_one_hot_v2_op)
foreach(TEST_CINN_OPS ${TEST_CINN_OPS}) foreach(TEST_CINN_OPS ${TEST_CINN_OPS})
if(WITH_CINN) if(WITH_CINN)
......
...@@ -49,7 +49,7 @@ class TestAssignValueOp(eager_op_test.OpTest): ...@@ -49,7 +49,7 @@ class TestAssignValueOp(eager_op_test.OpTest):
self.attrs["fp32_values"] = [float(v) for v in self.value.flat] self.attrs["fp32_values"] = [float(v) for v in self.value.flat]
def test_forward(self): def test_forward(self):
self.check_output() self.check_output(check_cinn=True)
class TestAssignValueOp2(TestAssignValueOp): class TestAssignValueOp2(TestAssignValueOp):
......
...@@ -43,7 +43,7 @@ class TestBitwiseAnd(OpTest): ...@@ -43,7 +43,7 @@ class TestBitwiseAnd(OpTest):
self.outputs = {'Out': out} self.outputs = {'Out': out}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
pass pass
...@@ -150,7 +150,7 @@ class TestBitwiseOr(OpTest): ...@@ -150,7 +150,7 @@ class TestBitwiseOr(OpTest):
self.outputs = {'Out': out} self.outputs = {'Out': out}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
pass pass
...@@ -258,7 +258,7 @@ class TestBitwiseXor(OpTest): ...@@ -258,7 +258,7 @@ class TestBitwiseXor(OpTest):
self.outputs = {'Out': out} self.outputs = {'Out': out}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
pass pass
...@@ -363,7 +363,7 @@ class TestBitwiseNot(OpTest): ...@@ -363,7 +363,7 @@ class TestBitwiseNot(OpTest):
self.outputs = {'Out': out} self.outputs = {'Out': out}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
pass pass
......
...@@ -35,7 +35,7 @@ def create_test_class(op_type, typename, callback): ...@@ -35,7 +35,7 @@ def create_test_class(op_type, typename, callback):
self.op_type = op_type self.op_type = op_type
def test_output(self): def test_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_errors(self): def test_errors(self):
paddle.enable_static() paddle.enable_static()
...@@ -460,7 +460,7 @@ def create_bf16_case(op_type, callback): ...@@ -460,7 +460,7 @@ def create_bf16_case(op_type, callback):
self.outputs = {'Out': real_result} self.outputs = {'Out': real_result}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
cls_name = f"BF16TestCase_{op_type}" cls_name = f"BF16TestCase_{op_type}"
TestCompareOpBF16Op.__name__ = cls_name TestCompareOpBF16Op.__name__ = cls_name
......
...@@ -44,7 +44,7 @@ class TestExpandV2OpRank1(OpTest): ...@@ -44,7 +44,7 @@ class TestExpandV2OpRank1(OpTest):
self.expand_times = [1] self.expand_times = [1]
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=self.enable_cinn)
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['X'], 'Out', check_prim=True) self.check_grad(['X'], 'Out', check_prim=True)
...@@ -107,10 +107,10 @@ class TestExpandV2OpRank1_tensor_attr(OpTest): ...@@ -107,10 +107,10 @@ class TestExpandV2OpRank1_tensor_attr(OpTest):
self.infer_expand_shape = [-1] self.infer_expand_shape = [-1]
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out', check_cinn=True)
class TestExpandV2OpRank2_Corner_tensor_attr(TestExpandV2OpRank1_tensor_attr): class TestExpandV2OpRank2_Corner_tensor_attr(TestExpandV2OpRank1_tensor_attr):
...@@ -144,10 +144,10 @@ class TestExpandV2OpRank1_tensor(OpTest): ...@@ -144,10 +144,10 @@ class TestExpandV2OpRank1_tensor(OpTest):
self.expand_shape = [2, 100] self.expand_shape = [2, 100]
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out', check_cinn=True)
# Situation 4: input x is Integer # Situation 4: input x is Integer
...@@ -165,7 +165,7 @@ class TestExpandV2OpInteger(OpTest): ...@@ -165,7 +165,7 @@ class TestExpandV2OpInteger(OpTest):
self.outputs = {'Out': output} self.outputs = {'Out': output}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
# Situation 5: input x is Bool # Situation 5: input x is Bool
...@@ -181,7 +181,7 @@ class TestExpandV2OpBoolean(OpTest): ...@@ -181,7 +181,7 @@ class TestExpandV2OpBoolean(OpTest):
self.outputs = {'Out': output} self.outputs = {'Out': output}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
# Situation 6: input x is Integer # Situation 6: input x is Integer
...@@ -199,7 +199,7 @@ class TestExpandV2OpInt64_t(OpTest): ...@@ -199,7 +199,7 @@ class TestExpandV2OpInt64_t(OpTest):
self.outputs = {'Out': output} self.outputs = {'Out': output}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
# Situation 7: input x is Float16 # Situation 7: input x is Float16
...@@ -218,7 +218,7 @@ class TestExpandV2FP16Op(OpTest): ...@@ -218,7 +218,7 @@ class TestExpandV2FP16Op(OpTest):
self.outputs = {'Out': output} self.outputs = {'Out': output}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['X'], 'Out', check_prim=True) self.check_grad(['X'], 'Out', check_prim=True)
...@@ -245,7 +245,7 @@ class TestExpandV2BF16Op(OpTest): ...@@ -245,7 +245,7 @@ class TestExpandV2BF16Op(OpTest):
def test_check_output(self): def test_check_output(self):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
self.check_output_with_place(place) self.check_output_with_place(place, check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
......
...@@ -39,10 +39,10 @@ class TestLookupTableOp(OpTest): ...@@ -39,10 +39,10 @@ class TestLookupTableOp(OpTest):
self.outputs = {'Out': table[ids]} self.outputs = {'Out': table[ids]}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True)
class TestLookupTableOpWithTensorIds(OpTest): class TestLookupTableOpWithTensorIds(OpTest):
...@@ -56,10 +56,10 @@ class TestLookupTableOpWithTensorIds(OpTest): ...@@ -56,10 +56,10 @@ class TestLookupTableOpWithTensorIds(OpTest):
self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True)
@skip_check_grad_ci( @skip_check_grad_ci(
...@@ -73,7 +73,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp): ...@@ -73,7 +73,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp):
padding_idx = np.random.choice(ids, 1)[0] padding_idx = np.random.choice(ids, 1)[0]
self.outputs['Out'][ids == padding_idx] = np.zeros(31) self.outputs['Out'][ids == padding_idx] = np.zeros(31)
self.attrs = {'padding_idx': int(padding_idx)} self.attrs = {'padding_idx': int(padding_idx)}
self.check_output() self.check_output(check_cinn=True)
@skip_check_grad_ci( @skip_check_grad_ci(
...@@ -88,7 +88,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): ...@@ -88,7 +88,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds):
padding_idx = np.random.choice(flatten_idx, 1)[0] padding_idx = np.random.choice(flatten_idx, 1)[0]
self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31) self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
self.attrs = {'padding_idx': padding_idx} self.attrs = {'padding_idx': padding_idx}
self.check_output() self.check_output(check_cinn=True)
class TestLookupTableWIsSelectedRows(unittest.TestCase): class TestLookupTableWIsSelectedRows(unittest.TestCase):
...@@ -212,7 +212,7 @@ class TestLookupTableOpInt8(OpTest): ...@@ -212,7 +212,7 @@ class TestLookupTableOpInt8(OpTest):
self.outputs = {'Out': table[ids]} self.outputs = {'Out': table[ids]}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
# since int8 type only be used in test and inference, there is # since int8 type only be used in test and inference, there is
...@@ -233,7 +233,7 @@ class TestLookupTableOpWithTensorIdsInt8(OpTest): ...@@ -233,7 +233,7 @@ class TestLookupTableOpWithTensorIdsInt8(OpTest):
self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
# since int8 type only be used in test and inference, there is # since int8 type only be used in test and inference, there is
...@@ -247,7 +247,7 @@ class TestLookupTableOpWithPaddingInt8(TestLookupTableOpInt8): ...@@ -247,7 +247,7 @@ class TestLookupTableOpWithPaddingInt8(TestLookupTableOpInt8):
padding_idx = np.random.choice(ids, 1)[0] padding_idx = np.random.choice(ids, 1)[0]
self.outputs['Out'][ids == padding_idx] = np.zeros(31) self.outputs['Out'][ids == padding_idx] = np.zeros(31)
self.attrs = {'padding_idx': int(padding_idx)} self.attrs = {'padding_idx': int(padding_idx)}
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
# Since paddings are not trainable and fixed in forward, the gradient of # Since paddings are not trainable and fixed in forward, the gradient of
...@@ -264,7 +264,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt8( ...@@ -264,7 +264,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt8(
padding_idx = np.random.choice(flatten_idx, 1)[0] padding_idx = np.random.choice(flatten_idx, 1)[0]
self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31) self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
self.attrs = {'padding_idx': padding_idx} self.attrs = {'padding_idx': padding_idx}
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
# Since paddings are not trainable and fixed in forward, the gradient of # Since paddings are not trainable and fixed in forward, the gradient of
...@@ -354,7 +354,7 @@ class TestLookupTableOpInt16(OpTest): ...@@ -354,7 +354,7 @@ class TestLookupTableOpInt16(OpTest):
self.outputs = {'Out': table[ids]} self.outputs = {'Out': table[ids]}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
@skip_check_grad_ci(reason="Int16 type only be used in test and inference.") @skip_check_grad_ci(reason="Int16 type only be used in test and inference.")
...@@ -371,7 +371,7 @@ class TestLookupTableOpWithTensorIdsInt16(OpTest): ...@@ -371,7 +371,7 @@ class TestLookupTableOpWithTensorIdsInt16(OpTest):
self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
@skip_check_grad_ci(reason="Int16 type only be used in test and inference.") @skip_check_grad_ci(reason="Int16 type only be used in test and inference.")
...@@ -381,7 +381,7 @@ class TestLookupTableOpWithPaddingInt16(TestLookupTableOpInt16): ...@@ -381,7 +381,7 @@ class TestLookupTableOpWithPaddingInt16(TestLookupTableOpInt16):
padding_idx = np.random.choice(ids, 1)[0] padding_idx = np.random.choice(ids, 1)[0]
self.outputs['Out'][ids == padding_idx] = np.zeros(31) self.outputs['Out'][ids == padding_idx] = np.zeros(31)
self.attrs = {'padding_idx': int(padding_idx)} self.attrs = {'padding_idx': int(padding_idx)}
self.check_output() self.check_output(check_cinn=True)
@skip_check_grad_ci(reason="Int16 type only be used in test and inference.") @skip_check_grad_ci(reason="Int16 type only be used in test and inference.")
...@@ -394,7 +394,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt16( ...@@ -394,7 +394,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt16(
padding_idx = np.random.choice(flatten_idx, 1)[0] padding_idx = np.random.choice(flatten_idx, 1)[0]
self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31) self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
self.attrs = {'padding_idx': padding_idx} self.attrs = {'padding_idx': padding_idx}
self.check_output() self.check_output(check_cinn=True)
class TestLookupTableWIsSelectedRowsInt16(unittest.TestCase): class TestLookupTableWIsSelectedRowsInt16(unittest.TestCase):
......
...@@ -56,10 +56,10 @@ class TestLookupTableOp(OpTest): ...@@ -56,10 +56,10 @@ class TestLookupTableOp(OpTest):
return "int64" return "int64"
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True)
class TestLookupTableOpInt16(OpTest): class TestLookupTableOpInt16(OpTest):
...@@ -87,10 +87,10 @@ class TestLookupTableOpWithTensorIds(OpTest): ...@@ -87,10 +87,10 @@ class TestLookupTableOpWithTensorIds(OpTest):
self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True)
@skip_check_grad_ci( @skip_check_grad_ci(
...@@ -104,7 +104,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp): ...@@ -104,7 +104,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp):
padding_idx = np.random.choice(ids, 1)[0] padding_idx = np.random.choice(ids, 1)[0]
self.outputs['Out'][ids == padding_idx] = np.zeros(31) self.outputs['Out'][ids == padding_idx] = np.zeros(31)
self.attrs = {'padding_idx': int(padding_idx)} self.attrs = {'padding_idx': int(padding_idx)}
self.check_output() self.check_output(check_cinn=True)
@skip_check_grad_ci( @skip_check_grad_ci(
...@@ -119,7 +119,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): ...@@ -119,7 +119,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds):
padding_idx = np.random.choice(flatten_idx, 1)[0] padding_idx = np.random.choice(flatten_idx, 1)[0]
self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31) self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
self.attrs = {'padding_idx': padding_idx} self.attrs = {'padding_idx': padding_idx}
self.check_output() self.check_output(check_cinn=True)
class TestLookupTableWIsSelectedRows(unittest.TestCase): class TestLookupTableWIsSelectedRows(unittest.TestCase):
......
...@@ -100,19 +100,29 @@ class Generator: ...@@ -100,19 +100,29 @@ class Generator:
self.outputs = {'Out': Out} self.outputs = {'Out': Out}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad_normal(self): def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-3) self.check_grad(
['X', 'Y'], 'Out', max_relative_error=1e-3, check_cinn=True
)
def test_check_grad_ignore_x(self): def test_check_grad_ignore_x(self):
self.check_grad( self.check_grad(
['Y'], 'Out', max_relative_error=1e-3, no_grad_set=set("X") ['Y'],
'Out',
max_relative_error=1e-3,
no_grad_set=set("X"),
check_cinn=True,
) )
def test_check_grad_ignore_y(self): def test_check_grad_ignore_y(self):
self.check_grad( self.check_grad(
['X'], 'Out', max_relative_error=1e-3, no_grad_set=set('Y') ['X'],
'Out',
max_relative_error=1e-3,
no_grad_set=set('Y'),
check_cinn=True,
) )
......
...@@ -103,13 +103,28 @@ class TestMatMulV2Op(OpTest): ...@@ -103,13 +103,28 @@ class TestMatMulV2Op(OpTest):
self.outputs = {'Out': result} self.outputs = {'Out': result}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(
check_cinn=self.check_cinn if hasattr(self, 'check_cinn') else True
)
def test_check_grad(self): def test_check_grad(self):
if core.is_compiled_with_rocm(): if core.is_compiled_with_rocm():
self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-2) self.check_grad(
['X', 'Y'],
'Out',
max_relative_error=1e-2,
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
)
else: else:
self.check_grad(['X', 'Y'], 'Out') self.check_grad(
['X', 'Y'],
'Out',
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
)
class TestMatMulOp2(TestMatMulV2Op): class TestMatMulOp2(TestMatMulV2Op):
...@@ -290,6 +305,7 @@ class TestMatMulOp16(TestMatMulV2Op): ...@@ -290,6 +305,7 @@ class TestMatMulOp16(TestMatMulV2Op):
self.y_shape = (1, 2, 2, 100, 2) self.y_shape = (1, 2, 2, 100, 2)
self.trans_x = False self.trans_x = False
self.trans_y = False self.trans_y = False
self.check_cinn = False
class TestMatMulOp17(TestMatMulV2Op): class TestMatMulOp17(TestMatMulV2Op):
...@@ -343,7 +359,13 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0): ...@@ -343,7 +359,13 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
if core.is_float16_supported(place): if core.is_float16_supported(place):
self.check_output_with_place(place, atol=atol) self.check_output_with_place(
place,
atol=atol,
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
)
def test_check_grad(self): def test_check_grad(self):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
...@@ -353,6 +375,9 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0): ...@@ -353,6 +375,9 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0):
['X', 'Y'], ['X', 'Y'],
'Out', 'Out',
max_relative_error=max_relative_error, max_relative_error=max_relative_error,
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
) )
cls_name = "{}_{}".format(parent.__name__, "Fp16") cls_name = "{}_{}".format(parent.__name__, "Fp16")
...@@ -405,7 +430,13 @@ def create_test_bf16_class(parent, atol=0.01): ...@@ -405,7 +430,13 @@ def create_test_bf16_class(parent, atol=0.01):
def test_check_output(self): def test_check_output(self):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
self.check_output_with_place(place, atol=atol) self.check_output_with_place(
place,
atol=atol,
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
)
def test_check_grad_x(self): def test_check_grad_x(self):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
...@@ -416,6 +447,9 @@ def create_test_bf16_class(parent, atol=0.01): ...@@ -416,6 +447,9 @@ def create_test_bf16_class(parent, atol=0.01):
'Out', 'Out',
no_grad_set={'Y'}, no_grad_set={'Y'},
user_defined_grads=[numeric_grads], user_defined_grads=[numeric_grads],
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
) )
def test_check_grad_y(self): def test_check_grad_y(self):
...@@ -427,6 +461,9 @@ def create_test_bf16_class(parent, atol=0.01): ...@@ -427,6 +461,9 @@ def create_test_bf16_class(parent, atol=0.01):
'Out', 'Out',
no_grad_set={'X'}, no_grad_set={'X'},
user_defined_grads=[numeric_grads], user_defined_grads=[numeric_grads],
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
) )
def test_check_grad(self): def test_check_grad(self):
...@@ -596,7 +633,7 @@ class TestComplexMatMulOp(OpTest): ...@@ -596,7 +633,7 @@ class TestComplexMatMulOp(OpTest):
self.grad_y = np.matmul(np.conj(self.x).T, self.grad_out) self.grad_y = np.matmul(np.conj(self.x).T, self.grad_out)
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=False)
def test_check_grad_normal(self): def test_check_grad_normal(self):
self.check_grad( self.check_grad(
...@@ -604,6 +641,7 @@ class TestComplexMatMulOp(OpTest): ...@@ -604,6 +641,7 @@ class TestComplexMatMulOp(OpTest):
'Out', 'Out',
user_defined_grads=[self.grad_x, self.grad_y], user_defined_grads=[self.grad_x, self.grad_y],
user_defined_grad_outputs=[self.grad_out], user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
) )
def test_check_grad_ingore_x(self): def test_check_grad_ingore_x(self):
...@@ -613,6 +651,7 @@ class TestComplexMatMulOp(OpTest): ...@@ -613,6 +651,7 @@ class TestComplexMatMulOp(OpTest):
no_grad_set=set("X"), no_grad_set=set("X"),
user_defined_grads=[self.grad_y], user_defined_grads=[self.grad_y],
user_defined_grad_outputs=[self.grad_out], user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
) )
def test_check_grad_ingore_y(self): def test_check_grad_ingore_y(self):
...@@ -622,6 +661,7 @@ class TestComplexMatMulOp(OpTest): ...@@ -622,6 +661,7 @@ class TestComplexMatMulOp(OpTest):
no_grad_set=set('Y'), no_grad_set=set('Y'),
user_defined_grads=[self.grad_x], user_defined_grads=[self.grad_x],
user_defined_grad_outputs=[self.grad_out], user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
) )
...@@ -662,7 +702,7 @@ class TestComplexMatMulOpBroadcast(OpTest): ...@@ -662,7 +702,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
) )
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=False)
def test_check_grad_normal(self): def test_check_grad_normal(self):
self.check_grad( self.check_grad(
...@@ -670,6 +710,7 @@ class TestComplexMatMulOpBroadcast(OpTest): ...@@ -670,6 +710,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
'Out', 'Out',
user_defined_grads=[self.grad_x, self.grad_y], user_defined_grads=[self.grad_x, self.grad_y],
user_defined_grad_outputs=[self.grad_out], user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
) )
def test_check_grad_ingore_x(self): def test_check_grad_ingore_x(self):
...@@ -679,6 +720,7 @@ class TestComplexMatMulOpBroadcast(OpTest): ...@@ -679,6 +720,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
no_grad_set=set("X"), no_grad_set=set("X"),
user_defined_grads=[self.grad_y], user_defined_grads=[self.grad_y],
user_defined_grad_outputs=[self.grad_out], user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
) )
def test_check_grad_ingore_y(self): def test_check_grad_ingore_y(self):
...@@ -688,6 +730,7 @@ class TestComplexMatMulOpBroadcast(OpTest): ...@@ -688,6 +730,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
no_grad_set=set('Y'), no_grad_set=set('Y'),
user_defined_grads=[self.grad_x], user_defined_grads=[self.grad_x],
user_defined_grad_outputs=[self.grad_out], user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
) )
......
...@@ -48,10 +48,10 @@ class TestNormOp(OpTest): ...@@ -48,10 +48,10 @@ class TestNormOp(OpTest):
self.python_out_sig = ['Out'] self.python_out_sig = ['Out']
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out', check_cinn=True)
def init_test_case(self): def init_test_case(self):
self.shape = [2, 3, 4, 5] self.shape = [2, 3, 4, 5]
...@@ -109,7 +109,7 @@ class TestNormOp6(TestNormOp): ...@@ -109,7 +109,7 @@ class TestNormOp6(TestNormOp):
self.dtype = "float32" self.dtype = "float32"
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['X'], 'Out', max_relative_error=0.008) self.check_grad(['X'], 'Out', max_relative_error=0.008, check_cinn=True)
@unittest.skipIf( @unittest.skipIf(
...@@ -120,11 +120,17 @@ class TestNormOp7(TestNormOp): ...@@ -120,11 +120,17 @@ class TestNormOp7(TestNormOp):
self.dtype = "float16" self.dtype = "float16"
def test_check_output(self): def test_check_output(self):
self.check_output_with_place(fluid.core.CUDAPlace(0), atol=5e-2) self.check_output_with_place(
fluid.core.CUDAPlace(0), atol=5e-2, check_cinn=True
)
def test_check_grad(self): def test_check_grad(self):
self.check_grad_with_place( self.check_grad_with_place(
fluid.core.CUDAPlace(0), ['X'], 'Out', max_relative_error=0.05 fluid.core.CUDAPlace(0),
['X'],
'Out',
max_relative_error=0.05,
check_cinn=True,
) )
...@@ -147,7 +153,7 @@ class TestNormTestOp(OpTest): ...@@ -147,7 +153,7 @@ class TestNormTestOp(OpTest):
def test_check_output(self): def test_check_output(self):
# dynamic graph just supports float tensor # dynamic graph just supports float tensor
self.check_output(check_dygraph=True) self.check_output(check_dygraph=True, check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
pass pass
...@@ -176,11 +182,17 @@ class TestNormBF16Op(OpTest): ...@@ -176,11 +182,17 @@ class TestNormBF16Op(OpTest):
self.python_out_sig = ['Out'] self.python_out_sig = ['Out']
def test_check_output(self): def test_check_output(self):
self.check_output_with_place(core.CUDAPlace(0), atol=1e-1) self.check_output_with_place(
core.CUDAPlace(0), atol=1e-1, check_cinn=True
)
def test_check_grad(self): def test_check_grad(self):
self.check_grad_with_place( self.check_grad_with_place(
core.CUDAPlace(0), ['X'], 'Out', max_relative_error=1e-2 core.CUDAPlace(0),
['X'],
'Out',
max_relative_error=1e-2,
check_cinn=True,
) )
def init_test_case(self): def init_test_case(self):
......
...@@ -49,7 +49,7 @@ class TestOneHotOp(OpTest): ...@@ -49,7 +49,7 @@ class TestOneHotOp(OpTest):
self.outputs = {'Out': (out, x_lod)} self.outputs = {'Out': (out, x_lod)}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
class TestOneHotOp_attr(OpTest): class TestOneHotOp_attr(OpTest):
...@@ -57,6 +57,7 @@ class TestOneHotOp_attr(OpTest): ...@@ -57,6 +57,7 @@ class TestOneHotOp_attr(OpTest):
self.op_type = 'one_hot_v2' self.op_type = 'one_hot_v2'
self.python_api = one_hot_wrapper self.python_api = one_hot_wrapper
depth = 10 depth = 10
depth_np = np.array(10).astype('int32')
dimension = 12 dimension = 12
x_lod = [[4, 1, 3, 3]] x_lod = [[4, 1, 3, 3]]
x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))]
...@@ -69,12 +70,12 @@ class TestOneHotOp_attr(OpTest): ...@@ -69,12 +70,12 @@ class TestOneHotOp_attr(OpTest):
for i in range(np.product(x.shape)): for i in range(np.product(x.shape)):
out[i, 0, x[i]] = 1.0 out[i, 0, x[i]] = 1.0
self.inputs = {'X': (x, x_lod)} self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np}
self.attrs = {'dtype': int(core.VarDesc.VarType.FP32), 'depth': depth} self.attrs = {'dtype': int(core.VarDesc.VarType.FP32), 'depth': depth}
self.outputs = {'Out': (out, x_lod)} self.outputs = {'Out': (out, x_lod)}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
class TestOneHotOp_default_dtype(OpTest): class TestOneHotOp_default_dtype(OpTest):
...@@ -98,7 +99,7 @@ class TestOneHotOp_default_dtype(OpTest): ...@@ -98,7 +99,7 @@ class TestOneHotOp_default_dtype(OpTest):
self.outputs = {'Out': (out, x_lod)} self.outputs = {'Out': (out, x_lod)}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
class TestOneHotOp_default_dtype_attr(OpTest): class TestOneHotOp_default_dtype_attr(OpTest):
...@@ -106,6 +107,7 @@ class TestOneHotOp_default_dtype_attr(OpTest): ...@@ -106,6 +107,7 @@ class TestOneHotOp_default_dtype_attr(OpTest):
self.op_type = 'one_hot_v2' self.op_type = 'one_hot_v2'
self.python_api = one_hot_wrapper self.python_api = one_hot_wrapper
depth = 10 depth = 10
depth_np = np.array(depth).astype('int32')
dimension = 12 dimension = 12
x_lod = [[4, 1, 3, 3]] x_lod = [[4, 1, 3, 3]]
x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))]
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import unittest import unittest
import numpy as np import numpy as np
from eager_op_test import OpTest from eager_op_test import OpTest, convert_float_to_uint16
import paddle import paddle
import paddle.nn.functional as F import paddle.nn.functional as F
...@@ -43,14 +43,16 @@ class SeluTest(OpTest): ...@@ -43,14 +43,16 @@ class SeluTest(OpTest):
self.op_type = "selu" self.op_type = "selu"
self.python_api = paddle.nn.functional.selu self.python_api = paddle.nn.functional.selu
self.x_shape = [3, 5, 5, 10] self.x_shape = [3, 5, 5, 10]
self.dtype = np.float64
self.init_x_shape() self.init_x_shape()
self.init_dtype() self.init_dtype()
alpha = 1.6732632423543772848170429916717 alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946 scale = 1.0507009873554804934193349852946
x = np.random.normal(size=self.x_shape).astype(self.dtype) if self.dtype == np.uint16:
x = np.random.normal(size=self.x_shape).astype(np.float32)
else:
x = np.random.normal(size=self.x_shape).astype(self.dtype)
# Since zero point in selu is not differentiable, avoid randomize # Since zero point in selu is not differentiable, avoid randomize
# zero. # zero.
...@@ -58,8 +60,12 @@ class SeluTest(OpTest): ...@@ -58,8 +60,12 @@ class SeluTest(OpTest):
out = ref_selu(x, scale, alpha) out = ref_selu(x, scale, alpha)
self.inputs = {'X': x} if self.dtype == np.uint16:
self.outputs = {'Out': out} self.inputs = {'X': convert_float_to_uint16(x)}
self.outputs = {'Out': convert_float_to_uint16(out)}
else:
self.inputs = {'X': x}
self.outputs = {'Out': out}
self.attrs = { self.attrs = {
'alpha': alpha, 'alpha': alpha,
...@@ -70,7 +76,7 @@ class SeluTest(OpTest): ...@@ -70,7 +76,7 @@ class SeluTest(OpTest):
pass pass
def init_dtype(self): def init_dtype(self):
pass self.dtype = np.float64
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
...@@ -79,6 +85,27 @@ class SeluTest(OpTest): ...@@ -79,6 +85,27 @@ class SeluTest(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
class SeluTestFP16OP(SeluTest):
def init_dtype(self):
self.dtype = np.float16
@unittest.skipIf(
not core.is_compiled_with_cuda()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA and do not support bfloat16",
)
class SeluTestBF16OP(SeluTest):
def init_dtype(self):
self.dtype = np.uint16
def test_check_output(self):
self.check_output_with_place(core.CUDAPlace(0))
def test_check_grad(self):
self.check_grad_with_place(core.CUDAPlace(0), ['X'], 'Out')
class TestSeluAPI(unittest.TestCase): class TestSeluAPI(unittest.TestCase):
# test paddle.nn.SELU, paddle.nn.functional.selu # test paddle.nn.SELU, paddle.nn.functional.selu
def setUp(self): def setUp(self):
......
...@@ -36,7 +36,7 @@ class TestShapeOp(OpTest): ...@@ -36,7 +36,7 @@ class TestShapeOp(OpTest):
self.dtype = np.float32 self.dtype = np.float32
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_cinn=True)
class case1(TestShapeOp): class case1(TestShapeOp):
...@@ -125,7 +125,7 @@ class TestShapeOpBf16(OpTest): ...@@ -125,7 +125,7 @@ class TestShapeOpBf16(OpTest):
def test_check_output(self): def test_check_output(self):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
self.check_output_with_place(place) self.check_output_with_place(place, check_cinn=True)
class case1Bf16(TestShapeOpBf16): class case1Bf16(TestShapeOpBf16):
......
...@@ -62,10 +62,10 @@ class TestSumOp(OpTest): ...@@ -62,10 +62,10 @@ class TestSumOp(OpTest):
self.dtype = np.float64 self.dtype = np.float64
def test_check_output(self): def test_check_output(self):
self.check_output(check_prim=True) self.check_output(check_prim=True, check_cinn=True)
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['x0'], 'Out', check_prim=True) self.check_grad(['x0'], 'Out', check_prim=True, check_cinn=True)
class TestSelectedRowsSumOp(unittest.TestCase): class TestSelectedRowsSumOp(unittest.TestCase):
...@@ -299,14 +299,14 @@ class TestFP16SumOp(TestSumOp): ...@@ -299,14 +299,14 @@ class TestFP16SumOp(TestSumOp):
def test_check_output(self): def test_check_output(self):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
if core.is_float16_supported(place): if core.is_float16_supported(place):
self.check_output_with_place(place) self.check_output_with_place(place, check_cinn=True)
# FIXME: Because of the precision fp16, max_relative_error # FIXME: Because of the precision fp16, max_relative_error
# should be 0.15 here. # should be 0.15 here.
def test_check_grad(self): def test_check_grad(self):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
if core.is_float16_supported(place): if core.is_float16_supported(place):
self.check_grad(['x0'], 'Out') self.check_grad(['x0'], 'Out', check_cinn=True)
def create_test_sum_fp16_class(parent): def create_test_sum_fp16_class(parent):
......
...@@ -12,7 +12,9 @@ ...@@ -12,7 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import random import random
import tempfile
import unittest import unittest
import numpy as np import numpy as np
...@@ -41,7 +43,10 @@ class InferencePassTest(unittest.TestCase): ...@@ -41,7 +43,10 @@ class InferencePassTest(unittest.TestCase):
self.dynamic_shape_params = None self.dynamic_shape_params = None
self.enable_lite = False self.enable_lite = False
self.lite_parameters = None self.lite_parameters = None
self.path = "./inference_pass/" + self.__class__.__name__ + "/" self.temp_dir = tempfile.TemporaryDirectory()
self.path = os.path.join(
self.temp_dir.name, 'inference_pass', self.__class__.__name__
)
np.random.seed(1) np.random.seed(1)
random.seed(1) random.seed(1)
......
...@@ -53,8 +53,9 @@ class TensorRTSubgraphPassActivationTest(InferencePassTest): ...@@ -53,8 +53,9 @@ class TensorRTSubgraphPassActivationTest(InferencePassTest):
def test_check_output(self): def test_check_output(self):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if ( if (
self.trt_parameters.precision self.trt_parameters.precision
== AnalysisConfig.Precision.Float32 == AnalysisConfig.Precision.Float32
......
...@@ -53,8 +53,9 @@ class TensorRTSubgraphPassElementwiseBroadcastTest(InferencePassTest): ...@@ -53,8 +53,9 @@ class TensorRTSubgraphPassElementwiseBroadcastTest(InferencePassTest):
return paddle.tensor.math.add(x=data1, y=data2) return paddle.tensor.math.add(x=data1, y=data2)
def test_check_output(self): def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu) self.check_output_with_option(use_gpu)
......
...@@ -55,8 +55,9 @@ class TRTInstanceNormTest(InferencePassTest): ...@@ -55,8 +55,9 @@ class TRTInstanceNormTest(InferencePassTest):
self.fetch_list = [out] self.fetch_list = [out]
def check_output(self, remove_cache=False): def check_output(self, remove_cache=False):
if remove_cache and os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if remove_cache and os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
atol = 1e-5 atol = 1e-5
......
...@@ -84,8 +84,9 @@ class TensorRTPool3dTest(InferencePassTest): ...@@ -84,8 +84,9 @@ class TensorRTPool3dTest(InferencePassTest):
self.fetch_list = [pool_out] self.fetch_list = [pool_out]
def check_output(self): def check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
if self.precision == AnalysisConfig.Precision.Float32: if self.precision == AnalysisConfig.Precision.Float32:
...@@ -200,8 +201,9 @@ class TensorRTAdaptiveAvgPool3DTest(InferencePassTest): ...@@ -200,8 +201,9 @@ class TensorRTAdaptiveAvgPool3DTest(InferencePassTest):
self.fetch_list = [pool_out] self.fetch_list = [pool_out]
def check_output(self): def check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu) self.check_output_with_option(use_gpu)
...@@ -300,8 +302,9 @@ class TensorRTAdaptiveMaxPool3DTest(InferencePassTest): ...@@ -300,8 +302,9 @@ class TensorRTAdaptiveMaxPool3DTest(InferencePassTest):
self.fetch_list = [pool_out] self.fetch_list = [pool_out]
def check_output(self): def check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu) self.check_output_with_option(use_gpu)
......
...@@ -86,8 +86,9 @@ class TensorRTPoolTest(InferencePassTest): ...@@ -86,8 +86,9 @@ class TensorRTPoolTest(InferencePassTest):
self.fetch_list = [out] self.fetch_list = [out]
def check_output(self): def check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
if self.precision == AnalysisConfig.Precision.Float32: if self.precision == AnalysisConfig.Precision.Float32:
......
...@@ -60,8 +60,9 @@ class SkipLayernormFusePassTest0(InferencePassTest): ...@@ -60,8 +60,9 @@ class SkipLayernormFusePassTest0(InferencePassTest):
return paddle.add(data1, data2) return paddle.add(data1, data2)
def test_check_output(self): def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu, atol=0.01, rtol=0.00001) self.check_output_with_option(use_gpu, atol=0.01, rtol=0.00001)
...@@ -107,8 +108,9 @@ class SkipLayernormFusePassTest1(InferencePassTest): ...@@ -107,8 +108,9 @@ class SkipLayernormFusePassTest1(InferencePassTest):
return paddle.add(data1, data2) return paddle.add(data1, data2)
def test_check_output(self): def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu, atol=0.01, rtol=0.00001) self.check_output_with_option(use_gpu, atol=0.01, rtol=0.00001)
...@@ -154,8 +156,9 @@ class SkipLayernormFusePassTest2(InferencePassTest): ...@@ -154,8 +156,9 @@ class SkipLayernormFusePassTest2(InferencePassTest):
return paddle.add(data1, data2) return paddle.add(data1, data2)
def test_check_output(self): def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu, atol=0.1, rtol=0.00001) self.check_output_with_option(use_gpu, atol=0.1, rtol=0.00001)
...@@ -201,8 +204,9 @@ class SkipLayernormFusePassTest3(InferencePassTest): ...@@ -201,8 +204,9 @@ class SkipLayernormFusePassTest3(InferencePassTest):
return paddle.add(data1, data2) return paddle.add(data1, data2)
def test_check_output(self): def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu, atol=0.1, rtol=0.00001) self.check_output_with_option(use_gpu, atol=0.1, rtol=0.00001)
......
...@@ -128,8 +128,9 @@ class TensorRTSubgraphPassSplitSerializeTest(InferencePassTest): ...@@ -128,8 +128,9 @@ class TensorRTSubgraphPassSplitSerializeTest(InferencePassTest):
def test_check_output(self): def test_check_output(self):
if paddle.is_compiled_with_cuda(): if paddle.is_compiled_with_cuda():
use_gpu = True use_gpu = True
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
self.check_output_with_option(use_gpu) self.check_output_with_option(use_gpu)
self.assertTrue( self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass') PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
...@@ -164,8 +165,9 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest): ...@@ -164,8 +165,9 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest):
def test_check_output(self): def test_check_output(self):
if paddle.is_compiled_with_cuda(): if paddle.is_compiled_with_cuda():
use_gpu = True use_gpu = True
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
self.check_output_with_option(use_gpu, 1e-3) self.check_output_with_option(use_gpu, 1e-3)
self.assertTrue( self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass') PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
...@@ -313,8 +315,9 @@ class TensorRTSubgraphPassLayerNormDynamicTest(InferencePassTest): ...@@ -313,8 +315,9 @@ class TensorRTSubgraphPassLayerNormDynamicTest(InferencePassTest):
self.serialize = True self.serialize = True
def test_check_output(self): def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if paddle.is_compiled_with_cuda(): if paddle.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu) self.check_output_with_option(use_gpu)
...@@ -332,8 +335,9 @@ class TensorRTSubgraphPassLayerNormDynamicFP16Test( ...@@ -332,8 +335,9 @@ class TensorRTSubgraphPassLayerNormDynamicFP16Test(
self.serialize = True self.serialize = True
def test_check_output(self): def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if paddle.is_compiled_with_cuda(): if paddle.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu, atol=0.01, rtol=0.01) self.check_output_with_option(use_gpu, atol=0.01, rtol=0.01)
...@@ -406,8 +410,9 @@ class TensorRTSubgraphPassElementwiseSerializeTest( ...@@ -406,8 +410,9 @@ class TensorRTSubgraphPassElementwiseSerializeTest(
) )
def test_check_output(self): def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
super().test_check_output() super().test_check_output()
...@@ -444,8 +449,9 @@ class TensorRTSubgraphPassElementwiseBroadcastDynamicTest(InferencePassTest): ...@@ -444,8 +449,9 @@ class TensorRTSubgraphPassElementwiseBroadcastDynamicTest(InferencePassTest):
return paddle.add(x=data1, y=data2) return paddle.add(x=data1, y=data2)
def test_check_output(self): def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"): opt_path = os.path.join(self.path, '_opt_cache')
shutil.rmtree(self.path + "_opt_cache") if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if paddle.is_compiled_with_cuda(): if paddle.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu) self.check_output_with_option(use_gpu)
......
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from get_test_cover_info import (
XPUOpTestWrapper,
create_test_class,
get_xpu_op_support_types,
)
from op_test_xpu import XPUOpTest
import paddle
paddle.enable_static()
def nll_loss_1d(
logs, dtype, targets, weight=None, reduction='mean', ignore_index=-100
):
input_shape = logs.shape
N = input_shape[0]
C = input_shape[1]
out = np.zeros_like(targets).astype(dtype)
total_weight = 0
for i in range(N):
cur_target = targets[i]
if cur_target == ignore_index:
out[i] = 0
continue
cur_weight = weight[cur_target] if weight is not None else 1
total_weight += cur_weight
out[i] = -logs[i][cur_target] * cur_weight
if reduction == 'sum':
out = np.sum(out)
total_weight = np.array([total_weight]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
elif reduction == 'mean':
out = np.sum(out)
if total_weight != 0:
out /= total_weight
total_weight = np.array([total_weight]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
elif reduction == 'none':
total_weight = np.array([0]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
def nll_loss_2d(
logs, dtype, targets, weight=None, reduction='mean', ignore_index=-100
):
input_shape = logs.shape
N = input_shape[0]
H = input_shape[2]
W = input_shape[3]
out = np.zeros_like(targets).astype(dtype)
total_weight = 0
for i in range(N):
for h in range(H):
for w in range(W):
cur_target = targets[i][h][w]
if cur_target == ignore_index:
out[i][h][w] = 0
continue
cur_weight = weight[cur_target] if weight is not None else 1
total_weight += cur_weight
out[i][h][w] = -logs[i][cur_target][h][w] * cur_weight
if reduction == 'sum':
out = np.sum(out)
total_weight = np.array([total_weight]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
elif reduction == 'mean':
out = np.sum(out)
if total_weight != 0:
out /= total_weight
total_weight = np.array([total_weight]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
elif reduction == 'none':
total_weight = np.array([0]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
class XPUTestNLLLossOP(XPUOpTestWrapper):
def __init__(self):
self.op_name = 'nll_loss'
self.use_dynamic_create_class = False
class TestNLLLossOpBase1D(XPUOpTest):
op_type = 'nll_loss'
def setUp(self):
self.dtype = self.in_type
self.place = paddle.XPUPlace(0)
self.set_attrs()
self.set_inputs()
self.inputs = {
'X': self.x,
'Label': self.label,
}
if self.weight is not None:
self.inputs['Weight'] = self.weight
self.outputs = nll_loss_1d(
self.x,
self.dtype,
self.label,
self.weight,
self.attrs['reduction'],
)
def set_attrs(self):
self.attrs = {'reduction': 'none'}
def set_inputs(self):
self.class_num = 3
x_shape = [5, self.class_num]
label_shape = [5]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = np.random.random(self.class_num).astype(self.dtype)
def test_check_output(self):
self.check_output_with_place(self.place)
def test_check_grad(self):
self.check_grad_with_place(self.place, ['X'], 'Out')
class TestNLLLossOpWithWeightMean1D(TestNLLLossOpBase1D):
def set_attrs(self):
self.attrs = {'reduction': 'mean'}
class TestNLLLossOpWithWeightSum1D(TestNLLLossOpBase1D):
def set_attrs(self):
self.attrs = {'reduction': 'sum'}
class TestNLLLossOpWithoutWeightNone1D(TestNLLLossOpBase1D):
def set_inputs(self):
self.class_num = 3
x_shape = [5, self.class_num]
label_shape = [5]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'none'}
class TestNLLLossOpWithoutWeightMean1D(TestNLLLossOpBase1D):
def set_inputs(self):
self.class_num = 3
x_shape = [5, self.class_num]
label_shape = [5]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'mean'}
class TestNLLLossOpWithoutWeightSum1D(TestNLLLossOpBase1D):
def set_inputs(self):
self.class_num = 3
x_shape = [5, self.class_num]
label_shape = [5]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'sum'}
class TestNLLLossOpBase2D(XPUOpTest):
op_type = 'nll_loss'
def setUp(self):
self.dtype = self.in_type
self.place = paddle.XPUPlace(0)
self.set_attrs()
self.set_inputs()
self.inputs = {'X': self.x, 'Label': self.label}
if self.weight is not None:
self.inputs['Weight'] = self.weight
self.outputs = nll_loss_2d(
self.x,
self.dtype,
self.label,
self.weight,
self.attrs['reduction'],
)
def set_attrs(self):
self.attrs = {'reduction': 'none'}
def set_inputs(self):
self.class_num = 3
x_shape = [5, self.class_num, 7, 11]
label_shape = [5, 7, 11]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = np.random.random(self.class_num).astype(self.dtype)
def test_check_output(self):
self.check_output_with_place(self.place)
def test_check_grad(self):
self.check_grad_with_place(self.place, ['X'], 'Out')
class TestNLLLossOpWithWeightMean2D(TestNLLLossOpBase2D):
def set_attrs(self):
self.attrs = {'reduction': 'mean'}
class TestNLLLossOpWithWeightSum2D(TestNLLLossOpBase2D):
def set_attrs(self):
self.attrs = {'reduction': 'sum'}
class TestNLLLossOpWithoutWeightNone2D(TestNLLLossOpBase2D):
def set_inputs(self):
self.dtype = self.in_type
self.class_num = 3
x_shape = [5, self.class_num, 7, 11]
label_shape = [5, 7, 11]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'none'}
class TestNLLLossOpWithoutWeightMean2D(TestNLLLossOpBase2D):
def set_inputs(self):
self.dtype = self.in_type
self.class_num = 3
x_shape = [5, self.class_num, 7, 11]
label_shape = [5, 7, 11]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'mean'}
class TestNLLLossOpWithoutWeightSum2D(TestNLLLossOpBase2D):
def set_inputs(self):
self.dtype = self.in_type
self.class_num = 3
x_shape = [5, self.class_num, 7, 11]
label_shape = [5, 7, 11]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'sum'}
support_types = get_xpu_op_support_types('nll_loss')
for stype in support_types:
create_test_class(globals(), XPUTestNLLLossOP, stype)
if __name__ == '__main__':
unittest.main()
...@@ -344,7 +344,7 @@ fi ...@@ -344,7 +344,7 @@ fi
OUTPUT_LOG=`echo "$ALL_ADDED_LINES" | grep -Ew "print|printf|fprintf|std::cout" || true` OUTPUT_LOG=`echo "$ALL_ADDED_LINES" | grep -Ew "print|printf|fprintf|std::cout" || true`
if [ "$OUTPUT_LOG" != "" ];then if [ "$OUTPUT_LOG" != "" ];then
echo_line="print or std::cout is not recommended for direct use, please use loggin or glog. If it is necessary to use, please contact tianshuo78520a (Recommend) or zhangbo9674 review and approve.\n" echo_line="print or std::cout is not recommended for direct use, please use loggin or glog. If it is necessary to use, please contact tianshuo78520a (Recommend) or zhangbo9674 review and approve.\n"
check_approval 1 tianshuo7852a zhangbo9674 check_approval 1 tianshuo78520a zhangbo9674
fi fi
HAS_MODIFIED_PHI_FILES=`git diff --name-only upstream/$BRANCH | grep "paddle/phi/" || true` HAS_MODIFIED_PHI_FILES=`git diff --name-only upstream/$BRANCH | grep "paddle/phi/" || true`
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册