未验证 提交 23f87442 编写于 作者: S Sonder 提交者: GitHub

Add output defs for some kernelsPhi register (#52941)

* add register info for eigh and eig_gard

* add sync_batch_norm_op.cu register info

* add lamb output register info

* add unique register info

* change type name

* change type name

* add output register info for check_finite_and_unscale

* update cmake and config file

* add register info for adagrad

* fix build error

* add sync to run_unittests.sh

* add register info for unique_consecutive

* fix build error

* add eigh to STATIC_BUILD_TESTS

* update eig_kernel.cc

* update eig_kernel.cc

* fix infer mate error

* fix unique register error

* fix lamb register info error

* fix lamb register info

* update lamb register info

* fix lamb

* remove one Output Register

* update static build file

* add eigh op to disable_wingpu_test

* update run_unittests
上级 002f2185
......@@ -27,17 +27,6 @@ std::set<std::string> OperatorBasesMustRunInStaticBuild = {
std::set<std::string> OpsCanSkipedFakeAllocInStaticBuild = {
"create_double_buffer_reader", "create_py_reader", "fetch_v2"};
// These Op needs set output dtype when register phi kernel, but they didn't
std::set<std::string> OpsNeedSetOutputDtypeWhenRegisterPhiKernel = {
"eig_grad",
"eigh",
"lamb",
"sync_batch_norm_grad",
"update_loss_scaling",
"unique",
"unique_consecutive_flattened_tensor",
"unique_raw"};
// Cannot static analysis these Ops' output dtype or backend because their
// kernels have not moved to PHI yet.
std::set<std::string> OpsWithFluidKernelNeedMoveToPhi = {
......@@ -73,7 +62,6 @@ bool BlockCanBeStaticBuilt(const framework::BlockDesc& block) {
// has_fluid_kernel = (kernelCode >> 3) & 1
// has_structed_kernel = (kernelCode >> 2) & 1
// need_move_to_phi = (kernelCode >> 1) & 1
// need_set_dtype = KernelCode & 1
using KernelCode = int8_t;
std::set<std::pair<std::string, KernelCode>> invalid_ops;
for (auto& op : block.AllOps()) {
......@@ -98,18 +86,16 @@ bool BlockCanBeStaticBuilt(const framework::BlockDesc& block) {
phi::KernelFactory::Instance().HasStructuredKernel(op_type);
bool need_move_to_phi = (has_fluid_kernel || has_structured_kernel) &&
OpsWithFluidKernelNeedMoveToPhi.count(op_type);
bool need_set_dtype =
OpsNeedSetOutputDtypeWhenRegisterPhiKernel.count(op_type);
KernelCode kernel_code =
(in_black_list << 7) + (is_operator_base << 6) + (is_custom_op << 5) +
(use_mkldnn << 4) + (has_fluid_kernel << 3) +
(has_structured_kernel << 2) + (need_move_to_phi << 1) + need_set_dtype;
(has_structured_kernel << 2) + (need_move_to_phi << 1);
if (!OpsCanSkipedFakeAllocInStaticBuild.count(op_type)) {
if (in_black_list ||
(is_operator_base &&
!OperatorBasesHandledInStaticBuild.count(op_type)) ||
is_custom_op || use_mkldnn || need_move_to_phi || need_set_dtype) {
is_custom_op || use_mkldnn || need_move_to_phi) {
invalid_ops.insert(std::make_pair(op_type, kernel_code));
}
}
......@@ -125,8 +111,7 @@ bool BlockCanBeStaticBuilt(const framework::BlockDesc& block) {
<< ", use_mkldnn = " << (item.second >> 4 & 1)
<< ", has_fluid_kernel = " << (item.second >> 3 & 1)
<< ", has_structed_kerenl = " << (item.second >> 2 & 1)
<< ", need_move_to_phi = " << (item.second >> 1 & 1)
<< ", need_set_dtype = " << (item.second & 1) << "]\n";
<< ", need_move_to_phi = " << (item.second >> 1 & 1) << "]\n";
}
VLOG(1) << ss.str();
}
......@@ -454,18 +439,23 @@ void FakeInitializeOutputsForFunctionKernel(
// analyze dtype
phi::DataType dtype = tensor_arg_def.dtype;
if (dtype == DataType::UNDEFINED ||
OpsNeedSetOutputDtypeWhenRegisterPhiKernel.count(
std::string(op_type))) {
if (dtype == DataType::UNDEFINED) {
// Some OP's InferMeta is sensitive to DDim, so we cannot get their
// output dtype from InferMeta
if (op_type == "adam" || op_type == "adamw") {
dtype = InferMPDType(runtime_ctx, "Param");
} else if (op_type == "arg_min" || op_type == "arg_max" ||
op_type == "coalesce_tensor" || op_type == "one_hot_v2") {
op_type == "coalesce_tensor" || op_type == "one_hot_v2" ||
op_type == "unique") {
dtype = InferDTypeFromAttr(op, runtime_ctx, "dtype");
} else if (op_type == "bincount" || op_type == "reduce_sum_grad") {
dtype = GetInputDType(runtime_ctx, "X");
} else if (op_type == "lamb") {
bool multi_precision = op.Attr<bool>("multi_precision");
dtype = GetInputDType(runtime_ctx, "Moment1");
if (multi_precision && dtype == phi::DataType::FLOAT16) {
dtype = phi::DataType::FLOAT32;
}
} else if (op_type == "layer_norm") {
dtype = InferMPDType(runtime_ctx, "X");
} else if (op_type == "reduce_sum") {
......
......@@ -370,7 +370,12 @@ PD_REGISTER_KERNEL(sync_batch_norm_grad,
ALL_LAYOUT,
phi::SyncBatchNormGradKernel,
float,
phi::dtype::float16) {}
phi::dtype::float16) {
if (kernel_key.dtype() == phi::DataType::FLOAT16) {
kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32); // scale_grad
kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); // bias_grad
}
}
#else
#if CUDNN_VERSION_MIN(8, 1, 0)
PD_REGISTER_KERNEL(sync_batch_norm_grad,
......
......@@ -2045,19 +2045,26 @@ void LambInferMeta(const MetaTensor& param,
PADDLE_ENFORCE_NOT_NULL(
beta2_pow_out,
errors::NotFound("The output beta2_pow_out can not be nullptr"));
param_out->set_dims(param_dims);
param_out->set_dtype(param.dtype());
phi::DataType dtype = param.dtype();
if (multi_precision && param.dtype() == phi::DataType::FLOAT16) {
dtype = phi::DataType::FLOAT32;
}
moment1_out->set_dims(param_dims);
moment1_out->set_dtype(moment1.dtype());
moment1_out->set_dtype(dtype);
moment2_out->set_dims(param_dims);
moment2_out->set_dtype(moment2.dtype());
moment2_out->set_dtype(dtype);
beta1_pow_out->set_dims(beta1_pow_dims);
beta1_pow_out->set_dtype(beta1_pow.dtype());
beta1_pow_out->set_dtype(dtype);
beta2_pow_out->set_dims(beta2_pow_dims);
beta2_pow_out->set_dtype(beta2_pow.dtype());
beta2_pow_out->set_dtype(dtype);
if (master_param_outs) {
master_param_outs->set_dtype(dtype);
}
}
void LogspaceInferMeta(const MetaTensor& start,
......
......@@ -4824,13 +4824,16 @@ void UniqueRawInferMeta(const MetaTensor& x,
out->set_dims(out_dims);
if (return_inverse) {
index->set_dims(phi::make_ddim({x.dims()[axis_value]}));
index->set_dtype(dtype);
}
}
if (return_index) {
indices->set_dims(phi::make_ddim({-1}));
indices->set_dtype(dtype);
}
if (return_counts) {
counts->set_dims(phi::make_ddim({-1}));
counts->set_dtype(dtype);
}
}
......
......@@ -120,4 +120,7 @@ PD_REGISTER_KERNEL(update_loss_scaling,
ALL_LAYOUT,
phi::UpdateLossScalingKernel,
float,
double) {}
double) {
kernel->OutputAt(2).SetDataType(phi::DataType::INT32);
kernel->OutputAt(3).SetDataType(phi::DataType::INT32);
}
......@@ -50,4 +50,5 @@ PD_REGISTER_KERNEL(eig_grad,
phi::dtype::complex<double>) {
kernel->InputAt(0).SetDataType(phi::dtype::ToReal(kernel_key.dtype()));
kernel->InputAt(2).SetDataType(phi::dtype::ToReal(kernel_key.dtype()));
kernel->OutputAt(0).SetDataType(phi::dtype::ToComplex(kernel_key.dtype()));
}
......@@ -105,7 +105,9 @@ PD_REGISTER_KERNEL(eig,
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {
const phi::DataType& out_dtype = phi::dtype::ToComplex(kernel_key.dtype());
kernel->OutputAt(0).SetDataType(out_dtype);
kernel->OutputAt(1).SetDataType(out_dtype);
if (kernel_key.dtype() == phi::DataType::FLOAT32 ||
kernel_key.dtype() == phi::DataType::FLOAT64) {
kernel->OutputAt(0).SetDataType(phi::dtype::ToComplex(kernel_key.dtype()));
kernel->OutputAt(1).SetDataType(phi::dtype::ToComplex(kernel_key.dtype()));
}
}
......@@ -42,6 +42,6 @@ PD_REGISTER_KERNEL(eigh,
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {
kernel->OutputAt(0).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(1).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(0).SetDataType(phi::dtype::ToReal(kernel_key.dtype()));
kernel->OutputAt(1).SetDataType(phi::dtype::ToReal(kernel_key.dtype()));
}
......@@ -76,4 +76,7 @@ PD_REGISTER_KERNEL(unique_consecutive,
float,
double,
int32_t,
int64_t) {}
int64_t) {
kernel->OutputAt(1).SetDataType(kernel_key.dtype());
kernel->OutputAt(2).SetDataType(kernel_key.dtype());
}
......@@ -123,7 +123,11 @@ PD_REGISTER_KERNEL(unique,
float,
double,
int32_t,
int64_t) {}
int64_t) {
kernel->OutputAt(1).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(2).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(3).SetDataType(phi::DataType::UNDEFINED);
}
PD_REGISTER_KERNEL(unique_raw,
CPU,
......@@ -132,4 +136,8 @@ PD_REGISTER_KERNEL(unique_raw,
float,
double,
int32_t,
int64_t) {}
int64_t) {
kernel->OutputAt(1).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(2).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(3).SetDataType(phi::DataType::UNDEFINED);
}
......@@ -210,7 +210,12 @@ PD_REGISTER_KERNEL(adagrad,
phi::AdagradDenseKernel,
float,
double,
phi::dtype::float16) {}
phi::dtype::float16) {
if (kernel_key.dtype() == phi::DataType::FLOAT16) {
kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32);
kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32);
}
}
PD_REGISTER_KERNEL(adagrad_dense_param_sparse_grad,
GPU,
......
......@@ -369,4 +369,10 @@ PD_REGISTER_KERNEL(update_loss_scaling,
phi::dtype::float16,
phi::dtype::bfloat16) {
kernel->InputAt(1).SetBackend(phi::Backend::ALL_BACKEND);
if (kernel_key.dtype() == phi::DataType::FLOAT16 ||
kernel_key.dtype() == phi::DataType::BFLOAT16) {
kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32);
}
kernel->OutputAt(2).SetDataType(phi::DataType::INT32);
kernel->OutputAt(3).SetDataType(phi::DataType::INT32);
}
......@@ -45,8 +45,8 @@ PD_REGISTER_KERNEL(eigh, // cuda_only
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {
kernel->OutputAt(0).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(1).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(0).SetDataType(phi::dtype::ToReal(kernel_key.dtype()));
kernel->OutputAt(1).SetDataType(phi::dtype::ToReal(kernel_key.dtype()));
}
#endif // not PADDLE_WITH_HIP
......@@ -27,4 +27,9 @@ PD_REGISTER_KERNEL(lamb,
double) {
kernel->InputAt(5).SetBackend(phi::Backend::ALL_BACKEND);
kernel->InputAt(6).SetBackend(phi::Backend::ALL_BACKEND);
kernel->OutputAt(1).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(2).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(3).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(4).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(5).SetDataType(phi::DataType::UNDEFINED);
}
......@@ -77,4 +77,7 @@ PD_REGISTER_KERNEL(unique_consecutive,
float,
double,
int32_t,
int64_t) {}
int64_t) {
kernel->OutputAt(1).SetDataType(kernel_key.dtype());
kernel->OutputAt(2).SetDataType(kernel_key.dtype());
}
......@@ -549,7 +549,11 @@ void UniqueKernel(const Context& context,
} // namespace phi
PD_REGISTER_KERNEL(
unique, GPU, ALL_LAYOUT, phi::UniqueKernel, float, double, int64_t, int) {}
unique, GPU, ALL_LAYOUT, phi::UniqueKernel, float, double, int64_t, int) {
kernel->OutputAt(1).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(2).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(3).SetDataType(phi::DataType::UNDEFINED);
}
PD_REGISTER_KERNEL(unique_raw,
GPU,
......@@ -558,4 +562,8 @@ PD_REGISTER_KERNEL(unique_raw,
float,
double,
int64_t,
int) {}
int) {
kernel->OutputAt(1).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(2).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(3).SetDataType(phi::DataType::UNDEFINED);
}
......@@ -225,4 +225,9 @@ PD_REGISTER_KERNEL(
lamb, XPU, ALL_LAYOUT, phi::LambKernel, float, phi::dtype::float16) {
kernel->InputAt(5).SetBackend(phi::Backend::ALL_BACKEND);
kernel->InputAt(6).SetBackend(phi::Backend::ALL_BACKEND);
kernel->OutputAt(1).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(2).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(3).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(4).SetDataType(phi::DataType::UNDEFINED);
kernel->OutputAt(5).SetDataType(phi::DataType::UNDEFINED);
}
......@@ -1165,6 +1165,8 @@ set(STATIC_BUILD_TESTS
test_arg_min_max_op
test_bincount_op
test_decoupled_py_reader
test_eig_op
test_eigh_op
test_fake_quantize_op
test_fetch_lod_tensor_array
test_imperative_optimizer
......@@ -1186,7 +1188,10 @@ set(STATIC_BUILD_TESTS
test_sparse_conv_op
test_sparse_norm_op
test_sparse_pooling_op
test_sync_batch_norm_op
test_tensor_array_to_tensor
test_unique
test_update_loss_scaling_op
test_while_op
test_one_hot_v2_op)
......@@ -1206,3 +1211,7 @@ set_tests_properties(test_layer_norm_op_static_build PROPERTIES TIMEOUT 1500)
set_tests_properties(test_paddle_save_load_binary_static_build
PROPERTIES TIMEOUT 120)
set_tests_properties(test_reduce_op_static_build PROPERTIES TIMEOUT 500)
set_tests_properties(test_sync_batch_norm_op_static_build
PROPERTIES LABELS "RUN_TYPE=DIST")
set_tests_properties(test_sync_batch_norm_op_static_build PROPERTIES TIMEOUT
120)
......@@ -307,7 +307,7 @@ if [ "${HAS_MODIFIED_SETUP}" != "" ] || ([ "${HAS_MODIFIED_SETUP_IN}" != "" ] &&
fi
HAS_MODIFIED_STATIC_BUILD=`git diff --name-only upstream/$BRANCH | grep "new_executor/interpreter/static_build.cc" || true`
if [ "${HAS_MODIFIED_STATIC_BUILD}" != "" ] && [ "${GIT_PR_ID}" != ""]; then
if [ "${HAS_MODIFIED_STATIC_BUILD}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
echo_line="You must have one RD (From00 or zhiqiu) approval for file changes in new_executor/interpreter/static_build.cc.\n"
check_approval 1 From00 zhiqiu
fi
......
......@@ -39,6 +39,7 @@ disable_wingpu_test="^test_model$|\
^test_reader_reset$|\
^test_imperative_se_resnext$|\
^test_sync_batch_norm_op$|\
^test_sync_batch_norm_op_static_build$|\
^test_dataloader_keep_order$|\
^test_dataloader_unkeep_order$|\
^test_multiprocess_dataloader_iterable_dataset_static$|\
......@@ -86,6 +87,7 @@ disable_win_inference_test="^trt_quant_int8_yolov3_r50_test$|\
^test_full_name_usage$|\
^test_trt_convert_unary$|\
^test_eigh_op$|\
^test_eigh_op_static_build$|\
^test_fc_op$|\
^test_stack_op$|\
^trt_split_converter_test$|\
......@@ -161,6 +163,7 @@ disable_win_inference_test="^trt_quant_int8_yolov3_r50_test$|\
^test_dataloader_keep_order$|\
^test_dataloader_unkeep_order$|\
^test_sync_batch_norm_op$|\
^test_sync_batch_norm_op_static_build$|\
^test_fuse_bn_act_pass$|\
^test_fuse_bn_add_act_pass$|\
^test_decoupled_py_reader_data_check$|\
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册