提交 3631f064 编写于 作者: L liangjianzhong

Merge remote-tracking branch 'zyc/develop' into semi-auto/rule-base

......@@ -30,10 +30,6 @@
path = third_party/xxhash
url = https://github.com/Cyan4973/xxHash.git
ignore = dirty
[submodule "third_party/eigen3"]
path = third_party/eigen3
url = https://gitlab.com/libeigen/eigen.git
ignore = dirty
[submodule "third_party/leveldb"]
path = third_party/leveldb
url = https://github.com/google/leveldb
......@@ -50,3 +46,7 @@
path = third_party/glog
url = https://github.com/google/glog.git
ignore = dirty
[submodule "third_party/eigen3"]
path = third_party/eigen3
url = https://gitlab.com/libeigen/eigen.git
ignore = dirty
......@@ -296,6 +296,8 @@ function(build_protobuf TARGET_NAME BUILD_FOR_HOST)
PREFIX ${PROTOBUF_PREFIX_DIR}
SOURCE_DIR ${SOURCE_DIR}
UPDATE_COMMAND ""
PATCH_COMMAND
COMMAND cd ${SOURCE_DIR} && git checkout ${PROTOBUF_TAG}
DEPENDS zlib
CONFIGURE_COMMAND
${CMAKE_COMMAND} ${SOURCE_DIR}/cmake ${OPTIONAL_ARGS}
......
......@@ -8,7 +8,7 @@ set(XPU_API_LIB_NAME "libxpuapi.so")
set(XPU_RT_LIB_NAME "libxpurt.so")
set(XPU_XFT_LIB_NAME "libxft.so")
set(XPU_BASE_DATE "20230519")
set(XPU_BASE_DATE "20230523")
set(XPU_XCCL_BASE_VERSION "1.0.49.2")
set(XPU_XFT_BASE_VERSION "latest")
......
......@@ -6,6 +6,3 @@ cc_library(
add_subdirectory(test)
add_subdirectory(spmd_rules)
cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper
dist_tensor_spec)
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h"
#include "paddle/fluid/distributed/auto_parallel/process_mesh.h"
#include "paddle/phi/core/distributed/auto_parallel/utils.h"
namespace paddle {
namespace distributed {
......@@ -27,28 +27,41 @@ DistTensorSpec::DistTensorSpec(const std::vector<int64_t>& shape,
dist_attr_.copy_from(dist_attr);
}
DistTensorSpec::DistTensorSpec(const DistTensorSpec& spec) {
std::vector<int64_t> spec_shape = spec.get_shape();
shape_.assign(spec_shape.begin(), spec_shape.end());
dist_attr_.copy_from(spec.get_dist_attr());
}
DistTensorSpec::~DistTensorSpec() {}
DistTensorSpec::DistTensorSpec(const Tensor& tensor) {
shape_ = tensor.shape();
std::vector<int64_t> pm_shape, pm_ids;
pm_shape = {4};
pm_ids = {0, 1, 2, 3};
std::vector<std::string> dim_name = {"mp"};
// std::vector<int64_t> pm_shape, pm_ids;
// pm_shape = {4};
// pm_ids = {0, 1, 2, 3};
// std::vector<std::string> dim_name = {"mp"};
ProcessMesh pm(pm_shape, pm_ids, dim_name);
std::vector<int64_t> dims_mapping = {-1, 0};
TensorDistAttr dist_attr;
dist_attr.set_process_mesh(pm);
dist_attr.set_dims_mapping(dims_mapping);
// ProcessMesh pm(pm_shape, pm_ids, dim_name);
// std::vector<int64_t> dims_mapping = {-1, 0};
// TensorDistAttr dist_attr;
// dist_attr.set_process_mesh(pm);
// dist_attr.set_dims_mapping(dims_mapping);
dist_attr_.copy_from(dist_attr);
// dist_attr_.copy_from(dist_attr);
std::cout << dist_attr_;
// std::cout << dist_attr_;
}
const std::vector<int64_t>& DistTensorSpec::get_dims_mapping() {
DistTensorSpec& DistTensorSpec::operator=(const DistTensorSpec& spec) {
std::vector<int64_t> spec_shape = spec.get_shape();
shape_ = spec_shape;
dist_attr_.copy_from(spec.get_dist_attr());
return *this;
}
const std::vector<int64_t>& DistTensorSpec::get_dims_mapping() const {
return dist_attr_.dims_mapping();
}
......@@ -57,7 +70,7 @@ void DistTensorSpec::set_dims_mapping(
dist_attr_.set_dims_mapping(dims_mapping);
}
const ProcessMesh& DistTensorSpec::get_process_mesh() {
const ProcessMesh& DistTensorSpec::get_process_mesh() const {
return dist_attr_.process_mesh();
}
......@@ -65,7 +78,22 @@ void DistTensorSpec::set_process_mesh(const ProcessMesh& process_mesh) {
dist_attr_.set_process_mesh(process_mesh);
}
const std::vector<int64_t>& DistTensorSpec::get_shape() { return shape_; }
const std::vector<int64_t>& DistTensorSpec::get_shape() const { return shape_; }
const TensorDistAttr& DistTensorSpec::get_dist_attr() const {
return dist_attr_;
}
void DistTensorSpec::set_dist_attr(const TensorDistAttr& dist_attr) {
dist_attr_ = dist_attr;
}
std::string DistTensorSpec::to_string() const {
using phi::distributed::auto_parallel::str_join;
std::string spec_str = "{tensor_shape:[" + str_join(shape_) + "], ";
spec_str += "dist_attr:" + dist_attr_.to_string() + "}";
return spec_str;
}
} // namespace auto_parallel
} // namespace distributed
......
......@@ -14,39 +14,55 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/distributed/auto_parallel/dist_attr.h"
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
namespace paddle {
namespace distributed {
namespace auto_parallel {
using phi::distributed::auto_parallel::ProcessMesh;
using phi::distributed::auto_parallel::TensorDistAttr;
/**
* A unified data class for inferring distributed attributes
* in both dygraph mode and static mode
*/
class DistTensorSpec {
public:
DistTensorSpec() = default;
DistTensorSpec(const std::vector<int64_t>& shape,
const TensorDistAttr& dist_attr);
DistTensorSpec(const DistTensorSpec& spec);
// temp function, only for test in dygraph mode
explicit DistTensorSpec(const Tensor& tensor);
~DistTensorSpec();
DistTensorSpec& operator=(const DistTensorSpec& spec);
// get dims_mapping from dist_attr_
const std::vector<int64_t>& get_dims_mapping();
const std::vector<int64_t>& get_dims_mapping() const;
// set dims_mapping in dist_attr_
void set_dims_mapping(const std::vector<int64_t>& dims_mapping);
// get process_mesh from dist_attr_
const ProcessMesh& get_process_mesh();
const ProcessMesh& get_process_mesh() const;
// set process_mesh in dist_attr_
void set_process_mesh(const ProcessMesh& process_mesh);
const std::vector<int64_t>& get_shape();
const TensorDistAttr& get_dist_attr() const;
void set_dist_attr(const TensorDistAttr& dist_attr);
const std::vector<int64_t>& get_shape() const;
std::string to_string() const;
private:
std::vector<int64_t> shape_;
......
......@@ -17,6 +17,7 @@
#include <functional>
#include <vector>
#include "cinn/common/target.h"
#include "cinn/hlir/framework/graph_compiler.h"
#include "cinn/runtime/cinn_runtime.h"
#include "cinn/runtime/flags.h"
......@@ -94,6 +95,11 @@ void SetCinnRandomSeed<phi::CPUContext>() {
::cinn::runtime::RandomSeed::GetOrSet(seed);
}
void SetCinnTarget(const ::cinn::common::Target& target) {
VLOG(4) << "Set CINN compile target to " << target;
::cinn::runtime::CurrentTarget::SetCurrentTarget(target);
}
} // namespace details
class CinnLaunchOp : public framework::OperatorWithKernel {
......
......@@ -58,6 +58,9 @@ void SetCinnRuntimeFlags();
template <typename DeviceContext>
void SetCinnRandomSeed();
// set CINN compile target
void SetCinnTarget(const ::cinn::common::Target& target);
} // namespace details
template <typename T, typename DeviceContext>
......@@ -115,6 +118,7 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
"Step 2. Get compilation result of the graph");
// Step 2. Get compilation result of the graph
auto target = details::PlaceToCinnTarget(place);
details::SetCinnTarget(target);
using ClockType = std::chrono::steady_clock;
std::chrono::time_point<ClockType> start_t, end_t;
if (VLOG_IS_ON(1)) {
......
......@@ -98,7 +98,7 @@ PD_REGISTER_STRUCT_KERNEL(alltoall,
ops::AllToAllOpCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
int,
......
......@@ -95,7 +95,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather,
ops::CAllGatherOpCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
int,
......
......@@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max,
ALL_LAYOUT,
ops::CAllReduceMaxCUDAKernel,
float,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
double,
......
......@@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum,
ALL_LAYOUT,
ops::CAllReduceSumCUDAKernel,
float,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
double,
......
......@@ -100,7 +100,7 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast,
int64_t,
float,
double,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
plat::float16) {
......
......@@ -137,7 +137,7 @@ PD_REGISTER_STRUCT_KERNEL(c_concat,
double,
int,
int64_t,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
plat::float16) {
......
......@@ -239,7 +239,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding,
ops::CEmbeddingCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
plat::float16) {
......@@ -251,7 +251,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding_grad,
ops::CEmbeddingGradCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
plat::float16) {
......
......@@ -25,7 +25,7 @@ PD_REGISTER_STRUCT_KERNEL(c_identity,
double,
int,
int64_t,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
plat::float16) {
......
......@@ -87,7 +87,7 @@ PD_REGISTER_STRUCT_KERNEL(c_reducescatter,
ops::CReduceScatterOpCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
int,
......
......@@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(c_split,
double,
int,
int64_t,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
plat::float16) {
......
......@@ -31,7 +31,7 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum,
double,
int,
int64_t,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
plat::float16) {
......
......@@ -108,7 +108,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_allgather,
ops::PartialAllGatherOpCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
int,
......
......@@ -124,7 +124,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_recv,
ops::PartialRecvOpCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
int,
......
......@@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_send,
ops::PartialSendCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
int,
......
......@@ -238,7 +238,7 @@ PD_REGISTER_STRUCT_KERNEL(recv_v2,
ops::RecvOpV2CUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
int,
......
......@@ -223,7 +223,7 @@ PD_REGISTER_STRUCT_KERNEL(send_v2,
ops::SendOpV2CUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
plat::bfloat16,
#endif
int,
......
......@@ -59,7 +59,7 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) {
return ncclUint8;
} else if (type == framework::proto::VarType::BOOL) {
return ncclUint8;
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
} else if (type == framework::proto::VarType::BF16) {
return ncclBfloat16;
#endif
......@@ -86,7 +86,7 @@ inline ncclDataType_t ToNCCLDataType(phi::DataType type) {
return ncclInt8;
} else if (type == phi::DataType::BOOL) {
return ncclUint8;
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
} else if (type == phi::DataType::BFLOAT16) {
return ncclBfloat16;
#endif
......
......@@ -15,6 +15,7 @@
#include <pybind11/operators.h>
#include <pybind11/stl.h>
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/pybind/auto_parallel_py.h"
......@@ -29,6 +30,7 @@ namespace py = pybind11;
namespace paddle {
namespace pybind {
using paddle::distributed::auto_parallel::DistTensorSpec;
using paddle::distributed::auto_parallel::OperatorDistAttr;
using paddle::framework::OpDesc;
using paddle::framework::VarDesc;
......@@ -276,6 +278,25 @@ void BindAutoParallel(py::module *m) {
py::arg("memo"))
.def("__str__", &TensorDistAttr::to_string);
py::class_<DistTensorSpec>(*m, "DistTensorSpec")
.def(py::init<>())
.def(py::init<const DistTensorSpec &>())
.def(py::init<const std::vector<int64_t> &, const TensorDistAttr &>())
.def("get_dims_mapping", &DistTensorSpec::get_dims_mapping)
.def("set_dims_mapping", &DistTensorSpec::set_dims_mapping)
.def("get_process_mesh", &DistTensorSpec::get_process_mesh)
.def("set_process_mesh", &DistTensorSpec::set_process_mesh)
.def_property_readonly("shape", &DistTensorSpec::get_shape)
.def("__str__", &DistTensorSpec::to_string)
.def("__copy__",
[](const DistTensorSpec &self) { return DistTensorSpec(self); })
.def(
"__deepcopy__",
[](const DistTensorSpec &self, py::dict) {
return DistTensorSpec(self);
},
py::arg("memo"));
py::class_<OperatorDistAttr>(*m, "OperatorDistAttr")
.def(py::init<>())
.def(py::init<const OpDesc &>())
......
......@@ -1280,7 +1280,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
def gen_dist_tensor_code(self):
# define the DistTensorSpec vector for input and output tensors
api_code = " \nstd::vector<paddle::distributed::auto_parallel::DistTensorSpec> input_specs;\n"
api_code = " \n std::vector<paddle::distributed::auto_parallel::DistTensorSpec> input_specs;\n"
# get DistTensorSpec for each input tensor
for tensor_name in self.inputs['names']:
......@@ -1297,8 +1297,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
PADDLE_API {self.get_return_type(inplace_flag)} {api_func_name}({self.get_define_args(inplace_flag)}) {{
{self.gene_kernel_select()}
"""
if api_func_name == 'matmul':
api_code += self.gen_dist_tensor_code()
# if api_func_name == 'matmul':
# api_code += self.gen_dist_tensor_code()
if len(self.kernel['func']) > 1:
kernel_dispatch_code = ''
......
......@@ -525,6 +525,8 @@ XPUOpMap& get_kl2_ops() {
phi::DataType::FLOAT16,
phi::DataType::INT64})},
{"nearest_interp_v2_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"nll_loss", XPUKernelSet({phi::DataType::FLOAT32})},
{"nll_loss_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"not_equal",
XPUKernelSet({phi::DataType::INT64,
phi::DataType::INT32,
......
......@@ -20,4 +20,5 @@ cc_library(
SRCS dist_mapper.cc
DEPS device_mesh auto_parallel_proto phi_enforce)
cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper)
cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper
dist_tensor_spec)
......@@ -229,7 +229,7 @@ inline ncclDataType_t ToNCCLDataType(DataType type) {
return ncclInt8;
} else if (type == DataType::BOOL) {
return ncclUint8;
#if NCCL_VERSION_CODE >= 21000
#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000
} else if (type == DataType::BFLOAT16) {
return ncclBfloat16;
#endif
......
......@@ -274,4 +274,5 @@ PD_REGISTER_KERNEL(selu,
phi::SeluKernel,
float,
double,
phi::dtype::float16,
phi::dtype::bfloat16) {}
......@@ -24,4 +24,5 @@ PD_REGISTER_KERNEL(selu_grad,
phi::SeluGradKernel,
float,
double,
phi::dtype::float16,
phi::dtype::bfloat16) {}
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/nll_loss_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void NllLossGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& label,
const paddle::optional<DenseTensor>& weight,
const DenseTensor& total_weight,
const DenseTensor& d_out,
int64_t ignore_index,
const std::string& reduction,
DenseTensor* d_x) {
using XPUType = typename XPUTypeTrait<T>::Type;
const auto& label_type = label.dtype();
bool label_type_match =
label_type == phi::DataType::INT32 || label_type == phi::DataType::INT64;
PADDLE_ENFORCE_EQ(label_type_match,
true,
phi::errors::InvalidArgument(
"Input(Label) holds the wrong type, it holds %s, but "
"desires to be %s or %s",
label_type,
phi::DataType::INT32,
phi::DataType::INT64));
auto d_out_data = d_out.data<XPUType>();
auto d_x_data = dev_ctx.template Alloc<XPUType>(d_x);
auto d_x_dims = d_x->dims();
std::vector<int64_t> d_x_shape = phi::vectorize<int64_t>(d_x_dims);
auto weight_data =
weight.get_ptr() ? weight.get_ptr()->data<float>() : nullptr;
int64_t reduction_id = 0;
if (reduction == "none") {
reduction_id = 0;
} else if (reduction == "mean") {
reduction_id = 1;
} else if (reduction == "sum") {
reduction_id = 2;
}
auto total_weight_data = total_weight.data<XPUType>();
int r;
if (label_type == phi::DataType::INT32) {
const int* label_data = label.data<int>();
r = xpu::nll_loss_grad(dev_ctx.x_context(),
d_out_data,
d_x_data,
d_x_shape,
label_data,
weight_data,
reduction_id,
ignore_index,
total_weight_data);
} else if (label_type == phi::DataType::INT64) {
const int64_t* label_data = label.data<int64_t>();
r = xpu::nll_loss_grad(dev_ctx.x_context(),
d_out_data,
d_x_data,
d_x_shape,
label_data,
weight_data,
reduction_id,
ignore_index,
total_weight_data);
}
PADDLE_ENFORCE_XDNN_SUCCESS(r, "nll_loss_grad");
}
} // namespace phi
// TODO(xiongkun): add the non-raw kernel register here.
PD_REGISTER_KERNEL(
nll_loss_grad, XPU, ALL_LAYOUT, phi::NllLossGradKernel, float) {}
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/nll_loss_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void NllLossRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& label,
const paddle::optional<DenseTensor>& weight,
int64_t ignore_index,
const std::string& reduction,
DenseTensor* out,
DenseTensor* total_weight) {
using XPUType = typename XPUTypeTrait<T>::Type;
const auto& label_type = label.dtype();
bool label_type_match =
label_type == phi::DataType::INT32 || label_type == phi::DataType::INT64;
PADDLE_ENFORCE_EQ(label_type_match,
true,
phi::errors::InvalidArgument(
"Input(Label) holds the wrong type, it holds %s, but "
"desires to be %s or %s",
label_type,
phi::DataType::INT32,
phi::DataType::INT64));
auto x_data = x.data<XPUType>();
auto out_data = dev_ctx.template Alloc<XPUType>(out);
auto weight_data =
weight.get_ptr() ? weight.get_ptr()->data<XPUType>() : nullptr;
auto total_weight_data = dev_ctx.template Alloc<XPUType>(total_weight);
auto x_dims = x.dims();
std::vector<int64_t> x_shape = phi::vectorize<int64_t>(x_dims);
int64_t reduction_id = 0;
if (reduction == "none") {
reduction_id = 0;
} else if (reduction == "mean") {
reduction_id = 1;
} else if (reduction == "sum") {
reduction_id = 2;
}
int r;
if (label_type == phi::DataType::INT32) {
const int* label_data = label.data<int>();
r = xpu::nll_loss(dev_ctx.x_context(),
x_data,
out_data,
total_weight_data,
x_shape,
label_data,
weight_data,
reduction_id,
ignore_index);
} else if (label_type == phi::DataType::INT64) {
const int64_t* label_data = label.data<int64_t>();
r = xpu::nll_loss(dev_ctx.x_context(),
x_data,
out_data,
total_weight_data,
x_shape,
label_data,
weight_data,
reduction_id,
ignore_index);
}
PADDLE_ENFORCE_XDNN_SUCCESS(r, "nll_loss");
}
} // namespace phi
// TODO(xiongkun): add the non-raw kernel register here.
PD_REGISTER_KERNEL(nll_loss, XPU, ALL_LAYOUT, phi::NllLossRawKernel, float) {}
......@@ -2235,14 +2235,70 @@ set +x
fi
done <<< "$test_cases";
card_test "$single_card_tests" 1
failed_test_lists=''
collect_failed_tests
xputest_error=0
retry_unittests_record=''
retry_time=3
exec_times=0
exec_time_array=('first' 'second' 'third')
exec_retry_threshold=10
is_retry_execuate=0
if [ -n "$failed_test_lists" ];then
xputest_error=1
need_retry_ut_str=$(echo "$failed_test_lists" | grep -oEi "\-.+\(" | sed 's/(//' | sed 's/- //' )
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(" | sed 's/(//' | sed 's/- //' )
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
while ( [ $exec_times -lt $retry_time ] )
do
set +e
retry_unittests_record="$retry_unittests_record$failed_test_lists"
failed_test_lists_ult=`echo "${failed_test_lists}"`
set -e
if [[ "${exec_times}" == "1" ]];then
if [[ "${failed_test_lists}" == "" ]];then
break
else
retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(" | sed 's/(//' | sed 's/- //' )
fi
fi
echo "========================================="
echo "This is the ${exec_time_array[$exec_times]} time to re-run"
echo "========================================="
echo "The following unittest will be re-run:"
echo "${retry_unittests}"
echo "========================================="
retry_unittests_regular=''
for line in ${retry_unittests[@]} ;
do
if [[ "$retry_unittests_regular" == "" ]];then
retry_unittests_regular="^$line$"
else
retry_unittests_regular="$retry_unittests_regular|^$line$"
fi
done
rm -f $tmp_dir/*
failed_test_lists=''
ctest -R "($retry_unittests_regular)" --output-on-failure -j $2 | tee $tmpfile
collect_failed_tests
exec_times=$[$exec_times+1]
done
else
# There are more than 10 failed unit tests, so no unit test retry
is_retry_execuate=1
fi
fi
set -x
ut_endTime_s=`date +%s`
echo "XPU testCase Time: $[ $ut_endTime_s - $ut_startTime_s ]s"
python ${PADDLE_ROOT}/build/test/xpu/get_test_cover_info.py
unset XPU_OP_LIST_DIR
if [[ "$EXIT_CODE" != "0" ]]; then
exit 8;
if [ "$xputest_error" != 0 ];then
show_ut_retry_result
fi
fi
}
......
......@@ -12,5 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License
from paddle.fluid.core import DistTensorSpec # noqa: F401
from paddle.fluid.core import OperatorDistAttr # noqa: F401
from paddle.fluid.core import TensorDistAttr # noqa: F401
......@@ -105,6 +105,18 @@ def _update_dims_mapping_for_matmul(dist_op):
changed = False
op_desc = dist_op.serial_op.desc
op_dist_attr = dist_op.dist_attr
# test DistTensorSpec
# input_name_list = []
# output_name_list = []
# input_name_list.append(op_desc.input('X')[0])
# input_name_list.append(op_desc.input('Y')[0])
# output_name_list.append(op_desc.output('Out')[0])
# attr_name_list = ['trans_x', 'trans_y']
# input_specs, output_specs, attrs = wrap_data_for_completion(
# dist_op, input_name_list, output_name_list, attr_name_list
# )
x_name = op_desc.input('X')[0]
y_name = op_desc.input('Y')[0]
out_name = op_desc.output('Out')[0]
......
......@@ -26,7 +26,7 @@ from paddle.framework import core
from paddle.framework.io_utils import is_belong_to_optimizer, is_parameter
from paddle.static import Variable
from .dist_attribute import OperatorDistAttr, TensorDistAttr
from .dist_attribute import DistTensorSpec, OperatorDistAttr, TensorDistAttr
from .process_group import get_all_process_groups
from .process_mesh import ProcessMesh
......@@ -2357,50 +2357,64 @@ def is_dep_skip_op(op):
return False
# def wrap_data_for_completion(
# dist_op: DistributedOperator,
# input_names: list,
# output_names: list,
# attr_names: list
# ):
# """
# Get data used in inferring distributed attributes, including:
# 1. DistTensorSpec for each input and output tensor of this dist_op.
# 2. Operator attributes of this dist_op, e.g. transpose_x in matmul op.
#
# Args:
# dist_op: the DistributedOperator
# input_names: list, name of the dist_op's input tensors
# output_names: list, name of the dist_op's output tensors
# attr_names: list, attribute name of the dist_op's corresponding serial op
#
# Returns:
# input_specs: list, DistTensorSpec for each input tensor of the dist_op
# output_specs: list, DistTensorSpec for each output tensor of the dist_op
# attrs: dict, attribute map of the dist op
# """
#
# input_specs = []
# output_specs = []
# attrs = {}
#
# serial_op = dist_op.serial_op
#
# # Construct each input tensor's DistTensorSpec with shape and dist_attr
# for name in input_names:
# tensor_dist_attr = dist_op.dist_attr.get_input_dist_attr(name)
# var = serial_op.block._var_recursive(name)
# tensor_shape = var.shape
# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr)
# input_specs.append(dist_spec)
#
# # Construct each output tensor's DistTensorSpec with shape and dist_attr
# for name in output_names:
# tensor_dist_attr = dist_op.dist_attr.get_output_dist_attr(name)
# var = serial_op.block._var_recursive(name)
# tensor_shape = var.shape
# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr)
# output_specs.append(dist_spec)
#
# for attr_name in attr_names:
# attrs[attr_name] = serial_op.desc.attr(attr_name)
def wrap_data_for_completion(
dist_op, input_names: list, output_names: list, attr_names: list
):
"""
Get data used in inferring distributed attributes, including:
1. DistTensorSpec for each input and output tensor of this dist_op.
2. Operator attributes of this dist_op, e.g. transpose_x in matmul op.
Args:
dist_op: the DistributedOperator
input_names: list, name of the dist_op's input tensors
output_names: list, name of the dist_op's output tensors
attr_names: list, attribute name of the dist_op's corresponding serial op
Returns:
input_specs: list, DistTensorSpec for each input tensor of the dist_op
output_specs: list, DistTensorSpec for each output tensor of the dist_op
attrs: dict, attribute map of the dist op
Usage:
op_desc = dist_op.serial_op.desc
input_name_list = []
output_name_list = []
input_name_list.append(op_desc.input('X')[0]) # 'X' is the arg name for op
input_name_list.append(op_desc.input('Y')[0])
output_name_list.append(op_desc.output('Out')[0])
attr_name_list = ['trans_x', 'trans_y']
input_specs, output_specs, attrs = wrap_data_for_completion(
dist_op,
input_name_list,
output_name_list,
attr_name_list)
"""
input_specs = []
output_specs = []
attrs = {}
serial_op = dist_op.serial_op
# Construct each input tensor's DistTensorSpec with shape and dist_attr
for name in input_names:
tensor_dist_attr = dist_op.dist_attr.get_input_dist_attr(name)
var = serial_op.block._var_recursive(name)
tensor_shape = var.shape
dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr)
input_specs.append(dist_spec)
# Construct each output tensor's DistTensorSpec with shape and dist_attr
for name in output_names:
tensor_dist_attr = dist_op.dist_attr.get_output_dist_attr(name)
var = serial_op.block._var_recursive(name)
tensor_shape = var.shape
dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr)
output_specs.append(dist_spec)
for attr_name in attr_names:
attrs[attr_name] = serial_op.desc.attr(attr_name)
return input_specs, output_specs, attrs
......@@ -1116,7 +1116,15 @@ set(TEST_CINN_OPS
test_tile_op
test_roll_op
test_sum_op
test_elementwise_min_op)
test_elementwise_min_op
test_bitwise_op
test_compare_op
test_shape_op
test_assign_value_op
test_lookup_table_op
test_lookup_table_v2_op
test_norm_op
test_one_hot_v2_op)
foreach(TEST_CINN_OPS ${TEST_CINN_OPS})
if(WITH_CINN)
......
......@@ -49,7 +49,7 @@ class TestAssignValueOp(eager_op_test.OpTest):
self.attrs["fp32_values"] = [float(v) for v in self.value.flat]
def test_forward(self):
self.check_output()
self.check_output(check_cinn=True)
class TestAssignValueOp2(TestAssignValueOp):
......
......@@ -43,7 +43,7 @@ class TestBitwiseAnd(OpTest):
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
pass
......@@ -150,7 +150,7 @@ class TestBitwiseOr(OpTest):
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
pass
......@@ -258,7 +258,7 @@ class TestBitwiseXor(OpTest):
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
pass
......@@ -363,7 +363,7 @@ class TestBitwiseNot(OpTest):
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
pass
......
......@@ -35,7 +35,7 @@ def create_test_class(op_type, typename, callback):
self.op_type = op_type
def test_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_errors(self):
paddle.enable_static()
......@@ -460,7 +460,7 @@ def create_bf16_case(op_type, callback):
self.outputs = {'Out': real_result}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
cls_name = f"BF16TestCase_{op_type}"
TestCompareOpBF16Op.__name__ = cls_name
......
......@@ -44,7 +44,7 @@ class TestExpandV2OpRank1(OpTest):
self.expand_times = [1]
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=self.enable_cinn)
def test_check_grad(self):
self.check_grad(['X'], 'Out', check_prim=True)
......@@ -107,10 +107,10 @@ class TestExpandV2OpRank1_tensor_attr(OpTest):
self.infer_expand_shape = [-1]
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
self.check_grad(['X'], 'Out')
self.check_grad(['X'], 'Out', check_cinn=True)
class TestExpandV2OpRank2_Corner_tensor_attr(TestExpandV2OpRank1_tensor_attr):
......@@ -144,10 +144,10 @@ class TestExpandV2OpRank1_tensor(OpTest):
self.expand_shape = [2, 100]
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
self.check_grad(['X'], 'Out')
self.check_grad(['X'], 'Out', check_cinn=True)
# Situation 4: input x is Integer
......@@ -165,7 +165,7 @@ class TestExpandV2OpInteger(OpTest):
self.outputs = {'Out': output}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
# Situation 5: input x is Bool
......@@ -181,7 +181,7 @@ class TestExpandV2OpBoolean(OpTest):
self.outputs = {'Out': output}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
# Situation 6: input x is Integer
......@@ -199,7 +199,7 @@ class TestExpandV2OpInt64_t(OpTest):
self.outputs = {'Out': output}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
# Situation 7: input x is Float16
......@@ -218,7 +218,7 @@ class TestExpandV2FP16Op(OpTest):
self.outputs = {'Out': output}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
self.check_grad(['X'], 'Out', check_prim=True)
......@@ -245,7 +245,7 @@ class TestExpandV2BF16Op(OpTest):
def test_check_output(self):
place = core.CUDAPlace(0)
self.check_output_with_place(place)
self.check_output_with_place(place, check_cinn=True)
def test_check_grad(self):
place = core.CUDAPlace(0)
......
......@@ -39,10 +39,10 @@ class TestLookupTableOp(OpTest):
self.outputs = {'Out': table[ids]}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids'))
self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True)
class TestLookupTableOpWithTensorIds(OpTest):
......@@ -56,10 +56,10 @@ class TestLookupTableOpWithTensorIds(OpTest):
self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids'))
self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True)
@skip_check_grad_ci(
......@@ -73,7 +73,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp):
padding_idx = np.random.choice(ids, 1)[0]
self.outputs['Out'][ids == padding_idx] = np.zeros(31)
self.attrs = {'padding_idx': int(padding_idx)}
self.check_output()
self.check_output(check_cinn=True)
@skip_check_grad_ci(
......@@ -88,7 +88,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds):
padding_idx = np.random.choice(flatten_idx, 1)[0]
self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
self.attrs = {'padding_idx': padding_idx}
self.check_output()
self.check_output(check_cinn=True)
class TestLookupTableWIsSelectedRows(unittest.TestCase):
......@@ -212,7 +212,7 @@ class TestLookupTableOpInt8(OpTest):
self.outputs = {'Out': table[ids]}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
# since int8 type only be used in test and inference, there is
......@@ -233,7 +233,7 @@ class TestLookupTableOpWithTensorIdsInt8(OpTest):
self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
# since int8 type only be used in test and inference, there is
......@@ -247,7 +247,7 @@ class TestLookupTableOpWithPaddingInt8(TestLookupTableOpInt8):
padding_idx = np.random.choice(ids, 1)[0]
self.outputs['Out'][ids == padding_idx] = np.zeros(31)
self.attrs = {'padding_idx': int(padding_idx)}
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
# Since paddings are not trainable and fixed in forward, the gradient of
......@@ -264,7 +264,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt8(
padding_idx = np.random.choice(flatten_idx, 1)[0]
self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
self.attrs = {'padding_idx': padding_idx}
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
# Since paddings are not trainable and fixed in forward, the gradient of
......@@ -354,7 +354,7 @@ class TestLookupTableOpInt16(OpTest):
self.outputs = {'Out': table[ids]}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
@skip_check_grad_ci(reason="Int16 type only be used in test and inference.")
......@@ -371,7 +371,7 @@ class TestLookupTableOpWithTensorIdsInt16(OpTest):
self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
@skip_check_grad_ci(reason="Int16 type only be used in test and inference.")
......@@ -381,7 +381,7 @@ class TestLookupTableOpWithPaddingInt16(TestLookupTableOpInt16):
padding_idx = np.random.choice(ids, 1)[0]
self.outputs['Out'][ids == padding_idx] = np.zeros(31)
self.attrs = {'padding_idx': int(padding_idx)}
self.check_output()
self.check_output(check_cinn=True)
@skip_check_grad_ci(reason="Int16 type only be used in test and inference.")
......@@ -394,7 +394,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt16(
padding_idx = np.random.choice(flatten_idx, 1)[0]
self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
self.attrs = {'padding_idx': padding_idx}
self.check_output()
self.check_output(check_cinn=True)
class TestLookupTableWIsSelectedRowsInt16(unittest.TestCase):
......
......@@ -56,10 +56,10 @@ class TestLookupTableOp(OpTest):
return "int64"
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids'))
self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True)
class TestLookupTableOpInt16(OpTest):
......@@ -87,10 +87,10 @@ class TestLookupTableOpWithTensorIds(OpTest):
self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids'))
self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_cinn=True)
@skip_check_grad_ci(
......@@ -104,7 +104,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp):
padding_idx = np.random.choice(ids, 1)[0]
self.outputs['Out'][ids == padding_idx] = np.zeros(31)
self.attrs = {'padding_idx': int(padding_idx)}
self.check_output()
self.check_output(check_cinn=True)
@skip_check_grad_ci(
......@@ -119,7 +119,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds):
padding_idx = np.random.choice(flatten_idx, 1)[0]
self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
self.attrs = {'padding_idx': padding_idx}
self.check_output()
self.check_output(check_cinn=True)
class TestLookupTableWIsSelectedRows(unittest.TestCase):
......
......@@ -100,19 +100,29 @@ class Generator:
self.outputs = {'Out': Out}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-3)
self.check_grad(
['X', 'Y'], 'Out', max_relative_error=1e-3, check_cinn=True
)
def test_check_grad_ignore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=1e-3, no_grad_set=set("X")
['Y'],
'Out',
max_relative_error=1e-3,
no_grad_set=set("X"),
check_cinn=True,
)
def test_check_grad_ignore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=1e-3, no_grad_set=set('Y')
['X'],
'Out',
max_relative_error=1e-3,
no_grad_set=set('Y'),
check_cinn=True,
)
......
......@@ -103,13 +103,28 @@ class TestMatMulV2Op(OpTest):
self.outputs = {'Out': result}
def test_check_output(self):
self.check_output()
self.check_output(
check_cinn=self.check_cinn if hasattr(self, 'check_cinn') else True
)
def test_check_grad(self):
if core.is_compiled_with_rocm():
self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-2)
self.check_grad(
['X', 'Y'],
'Out',
max_relative_error=1e-2,
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
)
else:
self.check_grad(['X', 'Y'], 'Out')
self.check_grad(
['X', 'Y'],
'Out',
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
)
class TestMatMulOp2(TestMatMulV2Op):
......@@ -290,6 +305,7 @@ class TestMatMulOp16(TestMatMulV2Op):
self.y_shape = (1, 2, 2, 100, 2)
self.trans_x = False
self.trans_y = False
self.check_cinn = False
class TestMatMulOp17(TestMatMulV2Op):
......@@ -343,7 +359,13 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=atol)
self.check_output_with_place(
place,
atol=atol,
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
)
def test_check_grad(self):
place = core.CUDAPlace(0)
......@@ -353,6 +375,9 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0):
['X', 'Y'],
'Out',
max_relative_error=max_relative_error,
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
)
cls_name = "{}_{}".format(parent.__name__, "Fp16")
......@@ -405,7 +430,13 @@ def create_test_bf16_class(parent, atol=0.01):
def test_check_output(self):
place = core.CUDAPlace(0)
self.check_output_with_place(place, atol=atol)
self.check_output_with_place(
place,
atol=atol,
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
)
def test_check_grad_x(self):
place = core.CUDAPlace(0)
......@@ -416,6 +447,9 @@ def create_test_bf16_class(parent, atol=0.01):
'Out',
no_grad_set={'Y'},
user_defined_grads=[numeric_grads],
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
)
def test_check_grad_y(self):
......@@ -427,6 +461,9 @@ def create_test_bf16_class(parent, atol=0.01):
'Out',
no_grad_set={'X'},
user_defined_grads=[numeric_grads],
check_cinn=self.check_cinn
if hasattr(self, 'check_cinn')
else True,
)
def test_check_grad(self):
......@@ -596,7 +633,7 @@ class TestComplexMatMulOp(OpTest):
self.grad_y = np.matmul(np.conj(self.x).T, self.grad_out)
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=False)
def test_check_grad_normal(self):
self.check_grad(
......@@ -604,6 +641,7 @@ class TestComplexMatMulOp(OpTest):
'Out',
user_defined_grads=[self.grad_x, self.grad_y],
user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
)
def test_check_grad_ingore_x(self):
......@@ -613,6 +651,7 @@ class TestComplexMatMulOp(OpTest):
no_grad_set=set("X"),
user_defined_grads=[self.grad_y],
user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
)
def test_check_grad_ingore_y(self):
......@@ -622,6 +661,7 @@ class TestComplexMatMulOp(OpTest):
no_grad_set=set('Y'),
user_defined_grads=[self.grad_x],
user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
)
......@@ -662,7 +702,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
)
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=False)
def test_check_grad_normal(self):
self.check_grad(
......@@ -670,6 +710,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
'Out',
user_defined_grads=[self.grad_x, self.grad_y],
user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
)
def test_check_grad_ingore_x(self):
......@@ -679,6 +720,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
no_grad_set=set("X"),
user_defined_grads=[self.grad_y],
user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
)
def test_check_grad_ingore_y(self):
......@@ -688,6 +730,7 @@ class TestComplexMatMulOpBroadcast(OpTest):
no_grad_set=set('Y'),
user_defined_grads=[self.grad_x],
user_defined_grad_outputs=[self.grad_out],
check_cinn=False,
)
......
......@@ -48,10 +48,10 @@ class TestNormOp(OpTest):
self.python_out_sig = ['Out']
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
def test_check_grad(self):
self.check_grad(['X'], 'Out')
self.check_grad(['X'], 'Out', check_cinn=True)
def init_test_case(self):
self.shape = [2, 3, 4, 5]
......@@ -109,7 +109,7 @@ class TestNormOp6(TestNormOp):
self.dtype = "float32"
def test_check_grad(self):
self.check_grad(['X'], 'Out', max_relative_error=0.008)
self.check_grad(['X'], 'Out', max_relative_error=0.008, check_cinn=True)
@unittest.skipIf(
......@@ -120,11 +120,17 @@ class TestNormOp7(TestNormOp):
self.dtype = "float16"
def test_check_output(self):
self.check_output_with_place(fluid.core.CUDAPlace(0), atol=5e-2)
self.check_output_with_place(
fluid.core.CUDAPlace(0), atol=5e-2, check_cinn=True
)
def test_check_grad(self):
self.check_grad_with_place(
fluid.core.CUDAPlace(0), ['X'], 'Out', max_relative_error=0.05
fluid.core.CUDAPlace(0),
['X'],
'Out',
max_relative_error=0.05,
check_cinn=True,
)
......@@ -147,7 +153,7 @@ class TestNormTestOp(OpTest):
def test_check_output(self):
# dynamic graph just supports float tensor
self.check_output(check_dygraph=True)
self.check_output(check_dygraph=True, check_cinn=True)
def test_check_grad(self):
pass
......@@ -176,11 +182,17 @@ class TestNormBF16Op(OpTest):
self.python_out_sig = ['Out']
def test_check_output(self):
self.check_output_with_place(core.CUDAPlace(0), atol=1e-1)
self.check_output_with_place(
core.CUDAPlace(0), atol=1e-1, check_cinn=True
)
def test_check_grad(self):
self.check_grad_with_place(
core.CUDAPlace(0), ['X'], 'Out', max_relative_error=1e-2
core.CUDAPlace(0),
['X'],
'Out',
max_relative_error=1e-2,
check_cinn=True,
)
def init_test_case(self):
......
......@@ -49,7 +49,7 @@ class TestOneHotOp(OpTest):
self.outputs = {'Out': (out, x_lod)}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
class TestOneHotOp_attr(OpTest):
......@@ -57,6 +57,7 @@ class TestOneHotOp_attr(OpTest):
self.op_type = 'one_hot_v2'
self.python_api = one_hot_wrapper
depth = 10
depth_np = np.array(10).astype('int32')
dimension = 12
x_lod = [[4, 1, 3, 3]]
x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))]
......@@ -69,12 +70,12 @@ class TestOneHotOp_attr(OpTest):
for i in range(np.product(x.shape)):
out[i, 0, x[i]] = 1.0
self.inputs = {'X': (x, x_lod)}
self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np}
self.attrs = {'dtype': int(core.VarDesc.VarType.FP32), 'depth': depth}
self.outputs = {'Out': (out, x_lod)}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
class TestOneHotOp_default_dtype(OpTest):
......@@ -98,7 +99,7 @@ class TestOneHotOp_default_dtype(OpTest):
self.outputs = {'Out': (out, x_lod)}
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
class TestOneHotOp_default_dtype_attr(OpTest):
......@@ -106,6 +107,7 @@ class TestOneHotOp_default_dtype_attr(OpTest):
self.op_type = 'one_hot_v2'
self.python_api = one_hot_wrapper
depth = 10
depth_np = np.array(depth).astype('int32')
dimension = 12
x_lod = [[4, 1, 3, 3]]
x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))]
......
......@@ -15,7 +15,7 @@
import unittest
import numpy as np
from eager_op_test import OpTest
from eager_op_test import OpTest, convert_float_to_uint16
import paddle
import paddle.nn.functional as F
......@@ -43,14 +43,16 @@ class SeluTest(OpTest):
self.op_type = "selu"
self.python_api = paddle.nn.functional.selu
self.x_shape = [3, 5, 5, 10]
self.dtype = np.float64
self.init_x_shape()
self.init_dtype()
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
x = np.random.normal(size=self.x_shape).astype(self.dtype)
if self.dtype == np.uint16:
x = np.random.normal(size=self.x_shape).astype(np.float32)
else:
x = np.random.normal(size=self.x_shape).astype(self.dtype)
# Since zero point in selu is not differentiable, avoid randomize
# zero.
......@@ -58,8 +60,12 @@ class SeluTest(OpTest):
out = ref_selu(x, scale, alpha)
self.inputs = {'X': x}
self.outputs = {'Out': out}
if self.dtype == np.uint16:
self.inputs = {'X': convert_float_to_uint16(x)}
self.outputs = {'Out': convert_float_to_uint16(out)}
else:
self.inputs = {'X': x}
self.outputs = {'Out': out}
self.attrs = {
'alpha': alpha,
......@@ -70,7 +76,7 @@ class SeluTest(OpTest):
pass
def init_dtype(self):
pass
self.dtype = np.float64
def test_check_output(self):
self.check_output()
......@@ -79,6 +85,27 @@ class SeluTest(OpTest):
self.check_grad(['X'], 'Out')
class SeluTestFP16OP(SeluTest):
def init_dtype(self):
self.dtype = np.float16
@unittest.skipIf(
not core.is_compiled_with_cuda()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA and do not support bfloat16",
)
class SeluTestBF16OP(SeluTest):
def init_dtype(self):
self.dtype = np.uint16
def test_check_output(self):
self.check_output_with_place(core.CUDAPlace(0))
def test_check_grad(self):
self.check_grad_with_place(core.CUDAPlace(0), ['X'], 'Out')
class TestSeluAPI(unittest.TestCase):
# test paddle.nn.SELU, paddle.nn.functional.selu
def setUp(self):
......
......@@ -36,7 +36,7 @@ class TestShapeOp(OpTest):
self.dtype = np.float32
def test_check_output(self):
self.check_output()
self.check_output(check_cinn=True)
class case1(TestShapeOp):
......@@ -125,7 +125,7 @@ class TestShapeOpBf16(OpTest):
def test_check_output(self):
place = core.CUDAPlace(0)
self.check_output_with_place(place)
self.check_output_with_place(place, check_cinn=True)
class case1Bf16(TestShapeOpBf16):
......
......@@ -62,10 +62,10 @@ class TestSumOp(OpTest):
self.dtype = np.float64
def test_check_output(self):
self.check_output(check_prim=True)
self.check_output(check_prim=True, check_cinn=True)
def test_check_grad(self):
self.check_grad(['x0'], 'Out', check_prim=True)
self.check_grad(['x0'], 'Out', check_prim=True, check_cinn=True)
class TestSelectedRowsSumOp(unittest.TestCase):
......@@ -299,14 +299,14 @@ class TestFP16SumOp(TestSumOp):
def test_check_output(self):
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place)
self.check_output_with_place(place, check_cinn=True)
# FIXME: Because of the precision fp16, max_relative_error
# should be 0.15 here.
def test_check_grad(self):
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_grad(['x0'], 'Out')
self.check_grad(['x0'], 'Out', check_cinn=True)
def create_test_sum_fp16_class(parent):
......
......@@ -12,7 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import random
import tempfile
import unittest
import numpy as np
......@@ -41,7 +43,10 @@ class InferencePassTest(unittest.TestCase):
self.dynamic_shape_params = None
self.enable_lite = False
self.lite_parameters = None
self.path = "./inference_pass/" + self.__class__.__name__ + "/"
self.temp_dir = tempfile.TemporaryDirectory()
self.path = os.path.join(
self.temp_dir.name, 'inference_pass', self.__class__.__name__
)
np.random.seed(1)
random.seed(1)
......
......@@ -53,8 +53,9 @@ class TensorRTSubgraphPassActivationTest(InferencePassTest):
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if (
self.trt_parameters.precision
== AnalysisConfig.Precision.Float32
......
......@@ -53,8 +53,9 @@ class TensorRTSubgraphPassElementwiseBroadcastTest(InferencePassTest):
return paddle.tensor.math.add(x=data1, y=data2)
def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
......
......@@ -55,8 +55,9 @@ class TRTInstanceNormTest(InferencePassTest):
self.fetch_list = [out]
def check_output(self, remove_cache=False):
if remove_cache and os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if remove_cache and os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda():
use_gpu = True
atol = 1e-5
......
......@@ -84,8 +84,9 @@ class TensorRTPool3dTest(InferencePassTest):
self.fetch_list = [pool_out]
def check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda():
use_gpu = True
if self.precision == AnalysisConfig.Precision.Float32:
......@@ -200,8 +201,9 @@ class TensorRTAdaptiveAvgPool3DTest(InferencePassTest):
self.fetch_list = [pool_out]
def check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
......@@ -300,8 +302,9 @@ class TensorRTAdaptiveMaxPool3DTest(InferencePassTest):
self.fetch_list = [pool_out]
def check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
......
......@@ -86,8 +86,9 @@ class TensorRTPoolTest(InferencePassTest):
self.fetch_list = [out]
def check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda():
use_gpu = True
if self.precision == AnalysisConfig.Precision.Float32:
......
......@@ -60,8 +60,9 @@ class SkipLayernormFusePassTest0(InferencePassTest):
return paddle.add(data1, data2)
def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, atol=0.01, rtol=0.00001)
......@@ -107,8 +108,9 @@ class SkipLayernormFusePassTest1(InferencePassTest):
return paddle.add(data1, data2)
def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, atol=0.01, rtol=0.00001)
......@@ -154,8 +156,9 @@ class SkipLayernormFusePassTest2(InferencePassTest):
return paddle.add(data1, data2)
def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, atol=0.1, rtol=0.00001)
......@@ -201,8 +204,9 @@ class SkipLayernormFusePassTest3(InferencePassTest):
return paddle.add(data1, data2)
def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, atol=0.1, rtol=0.00001)
......
......@@ -128,8 +128,9 @@ class TensorRTSubgraphPassSplitSerializeTest(InferencePassTest):
def test_check_output(self):
if paddle.is_compiled_with_cuda():
use_gpu = True
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
self.check_output_with_option(use_gpu)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
......@@ -164,8 +165,9 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest):
def test_check_output(self):
if paddle.is_compiled_with_cuda():
use_gpu = True
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
self.check_output_with_option(use_gpu, 1e-3)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
......@@ -313,8 +315,9 @@ class TensorRTSubgraphPassLayerNormDynamicTest(InferencePassTest):
self.serialize = True
def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if paddle.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
......@@ -332,8 +335,9 @@ class TensorRTSubgraphPassLayerNormDynamicFP16Test(
self.serialize = True
def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if paddle.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, atol=0.01, rtol=0.01)
......@@ -406,8 +410,9 @@ class TensorRTSubgraphPassElementwiseSerializeTest(
)
def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
super().test_check_output()
......@@ -444,8 +449,9 @@ class TensorRTSubgraphPassElementwiseBroadcastDynamicTest(InferencePassTest):
return paddle.add(x=data1, y=data2)
def test_check_output(self):
if os.path.exists(self.path + "_opt_cache"):
shutil.rmtree(self.path + "_opt_cache")
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if paddle.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
......
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from get_test_cover_info import (
XPUOpTestWrapper,
create_test_class,
get_xpu_op_support_types,
)
from op_test_xpu import XPUOpTest
import paddle
paddle.enable_static()
def nll_loss_1d(
logs, dtype, targets, weight=None, reduction='mean', ignore_index=-100
):
input_shape = logs.shape
N = input_shape[0]
C = input_shape[1]
out = np.zeros_like(targets).astype(dtype)
total_weight = 0
for i in range(N):
cur_target = targets[i]
if cur_target == ignore_index:
out[i] = 0
continue
cur_weight = weight[cur_target] if weight is not None else 1
total_weight += cur_weight
out[i] = -logs[i][cur_target] * cur_weight
if reduction == 'sum':
out = np.sum(out)
total_weight = np.array([total_weight]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
elif reduction == 'mean':
out = np.sum(out)
if total_weight != 0:
out /= total_weight
total_weight = np.array([total_weight]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
elif reduction == 'none':
total_weight = np.array([0]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
def nll_loss_2d(
logs, dtype, targets, weight=None, reduction='mean', ignore_index=-100
):
input_shape = logs.shape
N = input_shape[0]
H = input_shape[2]
W = input_shape[3]
out = np.zeros_like(targets).astype(dtype)
total_weight = 0
for i in range(N):
for h in range(H):
for w in range(W):
cur_target = targets[i][h][w]
if cur_target == ignore_index:
out[i][h][w] = 0
continue
cur_weight = weight[cur_target] if weight is not None else 1
total_weight += cur_weight
out[i][h][w] = -logs[i][cur_target][h][w] * cur_weight
if reduction == 'sum':
out = np.sum(out)
total_weight = np.array([total_weight]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
elif reduction == 'mean':
out = np.sum(out)
if total_weight != 0:
out /= total_weight
total_weight = np.array([total_weight]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
elif reduction == 'none':
total_weight = np.array([0]).astype(dtype)
return {'Out': out, 'Total_weight': total_weight}
class XPUTestNLLLossOP(XPUOpTestWrapper):
def __init__(self):
self.op_name = 'nll_loss'
self.use_dynamic_create_class = False
class TestNLLLossOpBase1D(XPUOpTest):
op_type = 'nll_loss'
def setUp(self):
self.dtype = self.in_type
self.place = paddle.XPUPlace(0)
self.set_attrs()
self.set_inputs()
self.inputs = {
'X': self.x,
'Label': self.label,
}
if self.weight is not None:
self.inputs['Weight'] = self.weight
self.outputs = nll_loss_1d(
self.x,
self.dtype,
self.label,
self.weight,
self.attrs['reduction'],
)
def set_attrs(self):
self.attrs = {'reduction': 'none'}
def set_inputs(self):
self.class_num = 3
x_shape = [5, self.class_num]
label_shape = [5]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = np.random.random(self.class_num).astype(self.dtype)
def test_check_output(self):
self.check_output_with_place(self.place)
def test_check_grad(self):
self.check_grad_with_place(self.place, ['X'], 'Out')
class TestNLLLossOpWithWeightMean1D(TestNLLLossOpBase1D):
def set_attrs(self):
self.attrs = {'reduction': 'mean'}
class TestNLLLossOpWithWeightSum1D(TestNLLLossOpBase1D):
def set_attrs(self):
self.attrs = {'reduction': 'sum'}
class TestNLLLossOpWithoutWeightNone1D(TestNLLLossOpBase1D):
def set_inputs(self):
self.class_num = 3
x_shape = [5, self.class_num]
label_shape = [5]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'none'}
class TestNLLLossOpWithoutWeightMean1D(TestNLLLossOpBase1D):
def set_inputs(self):
self.class_num = 3
x_shape = [5, self.class_num]
label_shape = [5]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'mean'}
class TestNLLLossOpWithoutWeightSum1D(TestNLLLossOpBase1D):
def set_inputs(self):
self.class_num = 3
x_shape = [5, self.class_num]
label_shape = [5]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'sum'}
class TestNLLLossOpBase2D(XPUOpTest):
op_type = 'nll_loss'
def setUp(self):
self.dtype = self.in_type
self.place = paddle.XPUPlace(0)
self.set_attrs()
self.set_inputs()
self.inputs = {'X': self.x, 'Label': self.label}
if self.weight is not None:
self.inputs['Weight'] = self.weight
self.outputs = nll_loss_2d(
self.x,
self.dtype,
self.label,
self.weight,
self.attrs['reduction'],
)
def set_attrs(self):
self.attrs = {'reduction': 'none'}
def set_inputs(self):
self.class_num = 3
x_shape = [5, self.class_num, 7, 11]
label_shape = [5, 7, 11]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = np.random.random(self.class_num).astype(self.dtype)
def test_check_output(self):
self.check_output_with_place(self.place)
def test_check_grad(self):
self.check_grad_with_place(self.place, ['X'], 'Out')
class TestNLLLossOpWithWeightMean2D(TestNLLLossOpBase2D):
def set_attrs(self):
self.attrs = {'reduction': 'mean'}
class TestNLLLossOpWithWeightSum2D(TestNLLLossOpBase2D):
def set_attrs(self):
self.attrs = {'reduction': 'sum'}
class TestNLLLossOpWithoutWeightNone2D(TestNLLLossOpBase2D):
def set_inputs(self):
self.dtype = self.in_type
self.class_num = 3
x_shape = [5, self.class_num, 7, 11]
label_shape = [5, 7, 11]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'none'}
class TestNLLLossOpWithoutWeightMean2D(TestNLLLossOpBase2D):
def set_inputs(self):
self.dtype = self.in_type
self.class_num = 3
x_shape = [5, self.class_num, 7, 11]
label_shape = [5, 7, 11]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'mean'}
class TestNLLLossOpWithoutWeightSum2D(TestNLLLossOpBase2D):
def set_inputs(self):
self.dtype = self.in_type
self.class_num = 3
x_shape = [5, self.class_num, 7, 11]
label_shape = [5, 7, 11]
self.x = np.random.random(x_shape).astype(self.dtype)
self.label = np.random.randint(
low=0, high=self.class_num, size=label_shape
).astype(np.int64)
self.weight = None
def set_attrs(self):
self.attrs = {'reduction': 'sum'}
support_types = get_xpu_op_support_types('nll_loss')
for stype in support_types:
create_test_class(globals(), XPUTestNLLLossOP, stype)
if __name__ == '__main__':
unittest.main()
......@@ -344,7 +344,7 @@ fi
OUTPUT_LOG=`echo "$ALL_ADDED_LINES" | grep -Ew "print|printf|fprintf|std::cout" || true`
if [ "$OUTPUT_LOG" != "" ];then
echo_line="print or std::cout is not recommended for direct use, please use loggin or glog. If it is necessary to use, please contact tianshuo78520a (Recommend) or zhangbo9674 review and approve.\n"
check_approval 1 tianshuo7852a zhangbo9674
check_approval 1 tianshuo78520a zhangbo9674
fi
HAS_MODIFIED_PHI_FILES=`git diff --name-only upstream/$BRANCH | grep "paddle/phi/" || true`
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册