From d7d9807e8dd45ca11da43c6d0bfd7b84819465b4 Mon Sep 17 00:00:00 2001 From: niuliling123 <51102941+niuliling123@users.noreply.github.com> Date: Mon, 5 Sep 2022 12:20:03 +0800 Subject: [PATCH] Add eager layout autotune (#45409) --- .../forwards/conv2d_fwd_function.cc | 32 ++ .../generator/eager_gen.py | 131 ++++- paddle/fluid/eager/eager_layout_auto_tune.h | 276 ++++++++++ paddle/fluid/eager/eager_layout_transformer.h | 493 ++++++++++++++++++ paddle/fluid/imperative/layout_autotune.cc | 14 +- paddle/fluid/imperative/layout_autotune.h | 12 +- paddle/fluid/imperative/layout_transformer.h | 9 +- paddle/fluid/pybind/eager_properties.cc | 21 + paddle/fluid/pybind/imperative.cc | 9 + paddle/phi/api/lib/data_transform.cc | 26 +- .../tests/unittests/test_layout_autotune.py | 3 - python/paddle/nn/functional/conv.py | 2 + 12 files changed, 997 insertions(+), 31 deletions(-) create mode 100644 paddle/fluid/eager/eager_layout_auto_tune.h create mode 100644 paddle/fluid/eager/eager_layout_transformer.h diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc index ee1bfb17b3e..cfa43270b78 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc +++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc @@ -17,6 +17,7 @@ #include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h" #include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/eager_amp_auto_cast.h" +#include "paddle/fluid/eager/eager_layout_auto_tune.h" #include "paddle/fluid/eager/nan_inf_utils.h" #include "paddle/fluid/platform/profiler/event_tracing.h" @@ -73,6 +74,37 @@ paddle::experimental::Tensor conv2d_dygraph_function( } } + // Layout autotune + + if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) { + VLOG(5) << "Check and Prepare For LAYOUT"; + paddle::small_vector, + egr::kSlotSmallVectorSize> + tensors_vector = {{input}, {filter}}; + + auto op_name = phi::TransToFluidOpName("conv2d"); + auto transformer = egr::EagerLayoutAutotune( + op_name, tensors_vector, &data_format); + auto NEW_input = transformer->TransInTensor("input", input); + + paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune(); + auto out = conv2d_dygraph_function(NEW_input, + filter, + strides, + paddings, + paddding_algorithm, + groups, + dilations, + data_format, + use_addto, + workspace_size_MB, + exhaustive_search); + transformer->SetOutTensorLayout(&out); + paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune(); + // Returns + return out; + } + // Get Input AutoGradMeta egr::AutogradMeta* input_autograd_meta = egr::EagerUtils::nullable_autograd_meta(input); diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index 505dd9377c5..41af2c3f150 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -185,6 +185,8 @@ FORWARD_FUNCTION_TEMPLATE = \ // Dygraph Record Event {} // AMP Logic +{} + // Layout autotune {} // Get Input AutoGradMeta {} @@ -217,7 +219,8 @@ FORWARD_ONLY_FUNCTION_TEMPLATE = \ {} // AMP Logic {} - + // Layout autotune +{} // Forward API Call VLOG(3) << \"Final State Running: \" << \"{}\"; {} @@ -295,7 +298,6 @@ NODE_CC_FILE_TEMPLATE = \ #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" #include "paddle/fluid/eager/to_static/run_program_op_node.h" #include "paddle/fluid/eager/nan_inf_utils.h" - #include "paddle/phi/api/include/sparse_api.h" #include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h" DECLARE_bool(check_nan_inf); @@ -317,7 +319,7 @@ FORWARD_CC_FILE_TEMPLATE = \ #include "paddle/phi/api/lib/dygraph_api.h" #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" #include "paddle/fluid/eager/api/generated/eager_generated/backwards/nodes.h" - +#include "paddle/fluid/eager/eager_layout_auto_tune.h" #include "paddle/phi/api/include/strings_api.h" #include "paddle/phi/api/include/sparse_api.h" #include "paddle/fluid/eager/api/utils/global_utils.h" @@ -396,7 +398,21 @@ AMP_LOGIC_TEMPLATE = \ }} }} """ - +LAYOUT_LOGIC_TEMPLATE=\ +""" + if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {{ + VLOG(5) << "Check and Prepare For LAYOUT"; + paddle::small_vector, egr::kSlotSmallVectorSize> tensors_vector = {}; + {} + {} + paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune(); + {} + {} + paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune(); + // Returns + return {}; + }} +""" CREATE_PLAIN_OPTIONAL_TENSOR_TEMPLATE = \ """ paddle::optional {}_optional; @@ -992,6 +1008,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): amp_tensors_vector_optional_list = [] amp_autocast_list = [] amp_autocast_optional_list = [] + layout_autotune_list = [] + layout_autotune_optional_list = [] + layout_tensors_vector_optional_list = [] for name, (ttype, pos) in forward_inputs_position_map.items(): inputs_call_list[pos] = f"{name}" amp_inputs_call_list[pos] = f"NEW_{name}" @@ -1009,6 +1028,12 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): amp_autocast_optional_list.append( f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n" ) + layout_tensors_vector_optional_list.append( + f"if ({name}) tensors_vector.push_back({{ *{name} }});\n" + ) + layout_autotune_optional_list.append( + f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n" + ) else: if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys( ): @@ -1023,6 +1048,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): amp_autocast_list.append( f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n" ) + layout_autotune_list.append( + f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n" + ) else: assert IsVectorTensorType(ttype) if is_optional: @@ -1037,6 +1065,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): amp_autocast_optional_list.append( f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n" ) + layout_autotune_optional_list.append( + f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n" + ) else: if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys( ): @@ -1047,10 +1078,59 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): amp_autocast_list.append( f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n" ) + layout_autotune_list.append( + f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n" + ) inputs_args_definition_list[pos] = arg_str inputs_args_declaration_list[pos] = arg_str + # for layout autotune attr + lightly_sensitive_attr = [ + 'axis', 'axes', 'dim', 'dims', 'start', 'end', 'stop' + ] + heavily_sensitive_attr = ['data_format', 'data_layout'] + layout_autotune_attr = [] + layout_autotune_attr_code_list = [] + layout_autotune_attr_type_list = [] + layout_autotune_attr_code_list.append( + f"auto op_name = phi::TransToFluidOpName(\"{forward_api_name}\");\n" + ) + + lightly_flag = False + heavily_flag = False + for name, atype, default_val, pos in forward_attrs_list: + for attr_name in lightly_sensitive_attr: + if name.find( + attr_name) != -1 and name not in layout_autotune_attr: + lightly_flag = True + layout_autotune_attr.append(name) + layout_autotune_attr_type_list.append(atype) + if lightly_flag is False: + for attr_name in heavily_sensitive_attr: + if name.find(attr_name + ) != -1 and name not in layout_autotune_attr: + layout_autotune_attr.append(name) + layout_autotune_attr_type_list.append(atype) + heavily_flag = True + if len(layout_autotune_attr) == 0: + layout_autotune_attr_code_list.append( + f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);\n" + ) + elif len(layout_autotune_attr) == 1: + layout_autotune_attr_code_list.append( + f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector, &{layout_autotune_attr[0]});\n" + ) + elif len(layout_autotune_attr) == 2: + layout_autotune_attr_code_list.append( + f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}, {layout_autotune_attr_type_list[1]}>(op_name, tensors_vector, &{layout_autotune_attr[0]}, &{layout_autotune_attr[1]});\n" + ) + else: + layout_autotune_attr_code_list.append( + f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector, {len(layout_autotune_attr)});\n" + ) + + # forward attrs for name, atype, default_val, pos in forward_attrs_list: inputs_call_list[pos] = name amp_inputs_call_list[pos] = name @@ -1236,6 +1316,35 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): amp_tensors_vector_optional_list_str, amp_get_dst_dtype_str, amp_autocast_list_str, amp_call_str) + # Forward layout autotune + layout_inputs_call_args_str = amp_inputs_call_args_str + layout_tmp_result_list = [] + layout_autotune_outs_list = "" + if num_outputs == 1: + layout_autotune_outs_list += f"{indent}auto {returns_str} = api_result;\n" + layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{returns_str});\n" + else: + for name, (rtype, pos) in forward_outputs_position_map.items(): + if name in intermediate_outputs: + continue + layout_autotune_outs_list += f"{indent}auto& {name} = std::get<{len(layout_tmp_result_list)}>(api_result);\n" + layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{name});\n" + layout_tmp_result_list.append(f"{name}") + + if returns_type_str == "paddle::experimental::Tensor&" or forward_api_name == "slice" or forward_api_name == "strided_slice" or len( + layout_autotune_attr) == 0: + layout_logic_str = "" + else: + # after_call_str = f"return {forward_function_name}({layout_inputs_call_args_str});\n" + after_call_str = f"auto api_result = {forward_function_name}({layout_inputs_call_args_str});\n" + layout_logic_str = LAYOUT_LOGIC_TEMPLATE.format( + amp_tensors_vector_list_str, + " ".join(layout_tensors_vector_optional_list), + " ".join(layout_autotune_attr_code_list) + " " + + " ".join(layout_autotune_list) + + " ".join(layout_autotune_optional_list), after_call_str, + layout_autotune_outs_list, returns_str) + # Generate forward_definition_str and forward_declaration_str if self.is_forward_only: if len(amp_tensors_vector_list) == 0: @@ -1243,17 +1352,17 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): self.forward_definition_str += FORWARD_ONLY_FUNCTION_TEMPLATE.format( returns_type_str, forward_function_name, inputs_args_definition_str, dygraph_event_str, amp_logic_str, - forward_function_name, forward_call_str, get_outputs_str, - returns_str) + layout_logic_str, forward_function_name, forward_call_str, + get_outputs_str, returns_str) else: self.forward_definition_str += FORWARD_FUNCTION_TEMPLATE.format( returns_type_str, forward_function_name, inputs_args_definition_str, dygraph_event_str, amp_logic_str, - inputs_autograd_meta_str, forward_function_name, - forward_call_str, check_nan_inf_str, get_outputs_str, - outputs_autograd_meta_str, compute_require_grad_args_str, - check_inplace_str, bump_inplace_version_str, node_creation_str, - returns_str) + layout_logic_str, inputs_autograd_meta_str, + forward_function_name, forward_call_str, check_nan_inf_str, + get_outputs_str, outputs_autograd_meta_str, + compute_require_grad_args_str, check_inplace_str, + bump_inplace_version_str, node_creation_str, returns_str) self.forward_declaration_str += f"{returns_type_str} {forward_function_name}({inputs_args_declaration_str});\n" diff --git a/paddle/fluid/eager/eager_layout_auto_tune.h b/paddle/fluid/eager/eager_layout_auto_tune.h new file mode 100644 index 00000000000..eebdd9caa6d --- /dev/null +++ b/paddle/fluid/eager/eager_layout_auto_tune.h @@ -0,0 +1,276 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" +#include "paddle/fluid/eager/eager_layout_transformer.h" +#include "paddle/fluid/imperative/layout_autotune.h" +#include "paddle/phi/backends/gpu/gpu_info.h" +namespace egr { + +// layout_agnostic_ops_ +// For agnostic op like add / relu +inline std::shared_ptr EagerLayoutAutotune( + const std::string& op_name, + const paddle::small_vector, + kSlotSmallVectorSize>& tensors_vector) { + VLOG(3) << " Optimze Layout agnostic op: " << op_name; + std::shared_ptr transposer = nullptr; + transposer = + std::make_shared(op_name, tensors_vector); + return transposer; +} + +// For lightly op like reduce +template +inline std::shared_ptr EagerLayoutAutotune( + const std::string& op_name, + const paddle::small_vector, + kSlotSmallVectorSize>& tensors_vector, + T* attr) { + std::shared_ptr transposer = nullptr; + bool unstart = + (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == + paddle::experimental::DataLayout::UNDEFINED); + if (unstart) { + VLOG(3) << "Optimze Layout was not started" << op_name; + transposer = + std::make_shared(op_name, tensors_vector); + return transposer; + } + transposer = + std::make_shared(op_name); + return transposer; +} + +// For lightly op like argmax +template +inline std::shared_ptr EagerLayoutAutotune( + const std::string& op_name, + const paddle::small_vector, + kSlotSmallVectorSize>& tensors_vector, + T1* axis, + T2* keep_dim) { + return EagerLayoutAutotune(op_name, tensors_vector, axis); +} + +// heavily string data_format data_layout +template <> +inline std::shared_ptr EagerLayoutAutotune( + const std::string& op_name, + const paddle::small_vector, + kSlotSmallVectorSize>& tensors_vector, + std::string* attr) { + VLOG(3) << " Optimze Layout heavily op: " << op_name; + auto transposer = + std::make_shared(op_name, tensors_vector); + if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == + paddle::experimental::DataLayout::UNDEFINED) { + // Layout autotune only supports model with convolutional layers + VLOG(3) << "Optimze Layout was not started" << op_name; + if (op_name != "conv2d") { + return transposer; + } else { +#if defined(PADDLE_WITH_CUDA) + if (paddle::platform::is_gpu_place(tensors_vector[0][0].place()) && + !phi::backends::gpu::TensorCoreAvailable()) { + paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune(); + return transposer; + } +#endif + auto data_type = tensors_vector[0][0].dtype(); + bool is_tune_fp32 = + (data_type == paddle::experimental::DataType::FLOAT32) && + (*attr == "NHWC"); + bool is_tune_fp16 = + (data_type == paddle::experimental::DataType::FLOAT16) && + (*attr == "NCHW"); + if (is_tune_fp32) { + paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout( + paddle::experimental::DataLayout::NCHW); + + paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout( + paddle::experimental::DataLayout::NHWC); + } else if (is_tune_fp16) { + paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout( + paddle::experimental::DataLayout::NHWC); + paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout( + paddle::experimental::DataLayout::NCHW); + } else { + paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune(); + return transposer; + } + VLOG(3) << "Tune the layout from " << attr << " to " + << paddle::framework::DataLayoutToString( + paddle::imperative::LayoutAutoTune::Instance() + .GetDesiredLayout()); + } + } + + if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive( + op_name)) { + auto heavily_transposer = + std::make_shared(op_name, + attr); + return heavily_transposer; + } + VLOG(3) << op_name + << "'s LayoutTransformer is unimplemented. Use default " + "LayoutTransformer instead."; + return transposer; +} + +// lightly transpose +template <> +inline std::shared_ptr EagerLayoutAutotune( + const std::string& op_name, + const paddle::small_vector, + kSlotSmallVectorSize>& tensors_vector, + std::vector* attr) { + std::shared_ptr transposer = nullptr; + if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == + paddle::experimental::DataLayout::UNDEFINED) { + VLOG(3) << " Optimze Layout Unstarted : " << op_name; + transposer = + std::make_shared(op_name, tensors_vector); + return transposer; + } + VLOG(3) << " Optimze Layout lightly op: " << op_name; + if (op_name == "transpose2") { + auto trans = std::make_shared(op_name); + if (tensors_vector[0][0].layout() == + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout()) { + trans->SetAttr(attr, + tensors_vector[0][0].layout() == + paddle::experimental::DataLayout::NHWC); + return trans; + } + } + transposer = + std::make_shared(op_name); + return transposer; +} + +// lightly int argmax +template <> +inline std::shared_ptr +EagerLayoutAutotune( + const std::string& op_name, + const paddle::small_vector, + kSlotSmallVectorSize>& tensors_vector, + paddle::experimental::Scalar* axis, + bool* keep_dim) { + std::shared_ptr transposer = nullptr; + if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == + paddle::experimental::DataLayout::UNDEFINED) { + VLOG(3) << " Optimze Layout Unstarted : " << op_name; + transposer = + std::make_shared(op_name, tensors_vector); + return transposer; + } + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + if (op_name == "argmax") { + std::shared_ptr argmax_transform = nullptr; + argmax_transform = std::make_shared(op_name); + if ((tensors_vector[0][0].layout() == desired_layout) && (*keep_dim)) { + argmax_transform->SetAttr(axis, + tensors_vector[0][0].layout() == + paddle::experimental::DataLayout::NHWC); + return argmax_transform; + } + } + VLOG(3) << " Optimze Layout lightly op: " << op_name; + transposer = + std::make_shared(op_name); + return transposer; +} + +// lightly int flatten +template <> +inline std::shared_ptr EagerLayoutAutotune( + const std::string& op_name, + const paddle::small_vector, + kSlotSmallVectorSize>& tensors_vector, + int* start_axis, + int* stop_axis) { + std::shared_ptr transposer = nullptr; + if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == + paddle::experimental::DataLayout::UNDEFINED) { + VLOG(3) << " Optimze Layout Unstarted : " << op_name; + transposer = + std::make_shared(op_name, tensors_vector); + return transposer; + } + bool no_tranpose = + tensors_vector[0][0].layout() == + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3); + if (op_name == "flatten" || op_name == "flatten_contiguous_range") { + if (no_tranpose && is_valid) { + std::shared_ptr flatten_transform = nullptr; + flatten_transform = std::make_shared(op_name); + return flatten_transform; + } + } + + VLOG(3) << " Optimze Layout lightly op: " << op_name; + transposer = + std::make_shared(op_name); + return transposer; +} + +// lightly int Concat +// lightly T can be int vector vector IntArray +template <> // default int +inline std::shared_ptr +EagerLayoutAutotune( + const std::string& op_name, + const paddle::small_vector, + kSlotSmallVectorSize>& tensors_vector, + paddle::experimental::Scalar* axis) { + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + std::shared_ptr transposer = nullptr; + if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) { + VLOG(3) << " Optimze Layout Unstarted : " << op_name; + transposer = + std::make_shared(op_name, tensors_vector); + return transposer; + } + + bool need_transpose = false; + for (size_t i = 0; i < tensors_vector.size(); i++) { + for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) { + if (desired_layout != tensors_vector[i][idx].layout()) { + need_transpose = true; + } + } + } + + if (need_transpose) { + VLOG(3) << "Concat need transpose to NCHW " << op_name; + transposer = + std::make_shared(op_name); + return transposer; + } else { + VLOG(3) << " Optimze Layout lightly op: " << op_name; + auto trans = std::make_shared(op_name); + trans->SetAttr(axis, desired_layout); + return trans; + } +} + +} // namespace egr diff --git a/paddle/fluid/eager/eager_layout_transformer.h b/paddle/fluid/eager/eager_layout_transformer.h new file mode 100644 index 00000000000..3f2717be6be --- /dev/null +++ b/paddle/fluid/eager/eager_layout_transformer.h @@ -0,0 +1,493 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" +#include "paddle/fluid/imperative/layout_autotune.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/tensor_utils.h" +namespace egr { +inline paddle::experimental::Tensor EagerTraceTransposeOp( + const paddle::experimental::DataLayout layout, + const paddle::experimental::Tensor& in) { + if (in.shape().size() != 4) { + VLOG(4) << "Shape is " << in.shape().size() << " can't transpose to" + << paddle::framework::DataLayoutToString(layout); + return in; + } + std::vector axis; + if (layout == paddle::experimental::DataLayout::NHWC) { + axis = {0, 2, 3, 1}; + } else if (layout == paddle::experimental::DataLayout::NCHW) { + axis = {0, 3, 1, 2}; + } else { + axis = {0, 1, 2, 3}; + } + auto out_tensor = transpose_dygraph_function(in, axis); + VLOG(4) << "AutoTune Transpose from " + << paddle::framework::DataLayoutToString(in.layout()) << " to " + << paddle::framework::DataLayoutToString(layout); + return out_tensor; +} + +// agnostic op +class EagerLayoutTransformer { + public: + EagerLayoutTransformer() : op_name_("") {} + explicit EagerLayoutTransformer( + const std::string& op_name, + const paddle::small_vector, + kSlotSmallVectorSize>& tensors_vector) + : op_name_(op_name) { + final_layout_ = "UNDEFINED"; + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + for (size_t i = 0; i < tensors_vector.size(); i++) { + for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) { + if (final_layout_ == "UNDEFINED") { + final_layout_ = paddle::framework::DataLayoutToString( + tensors_vector[0][0].layout()); + } else if (tensors_vector[i][idx].layout() == desired_layout) { + final_layout_ = paddle::framework::DataLayoutToString(desired_layout); + break; + } + } + } + VLOG(4) << op_name_ << "final_layout_ is " << final_layout_; + } + + EagerLayoutTransformer(const EagerLayoutTransformer&) = delete; + + EagerLayoutTransformer& operator=(const EagerLayoutTransformer&) = delete; + + virtual ~EagerLayoutTransformer() {} + + virtual paddle::optional TransInTensor( + const std::string& in_name, + const paddle::optional& in) { + VLOG(4) << op_name_ << "is is agnostic, final_layout_ is " << final_layout_; + return in; + } + + virtual paddle::optional> + TransInTensor( + const std::string& in_name, + const paddle::optional>& in) { + return in; + } + + virtual std::vector TransInTensor( + const std::string& in_name, + const std::vector& in) { + return in; + } + + virtual paddle::experimental::Tensor TransInTensor( + const std::string& in_name, const paddle::experimental::Tensor& in) { + return in; + } + + virtual void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { + bool use_default = (final_layout_ == "Undefined(AnyLayout)" || + final_layout_ == ("UNDEFINED")); + auto layout = paddle::framework::StringToDataLayout(final_layout_); + if (!use_default) { + phi::DenseTensorUtils::GetMutableMeta( + static_cast(out_tensor->impl().get())) + ->layout = layout; + } + VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default; + } + + virtual void SetOutTensorLayout( + std::vector* out_tensor) { + bool use_default = (final_layout_ == "Undefined(AnyLayout)" || + final_layout_ == ("UNDEFINED")); + if (!use_default) { + for (size_t i = 0; i < out_tensor->size(); i++) { + phi::DenseTensorUtils::GetMutableMeta( + static_cast((*out_tensor)[i].impl().get())) + ->layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + } + } + VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default; + } + + protected: + std::string op_name_; + std::string final_layout_; +}; + +class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { + public: + explicit EagerHeavilyLayoutSensitiveOpTransformer(const std::string& op_name, + std::string* layout) + : op_name_(op_name), + desired_layout_( + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout()) { + VLOG(3) << "Optimze Layout heavily op: " << op_name; + final_layout_ = paddle::framework::DataLayoutToString(desired_layout_); + if ((*layout) != final_layout_) { + *layout = final_layout_; + } + } + + virtual paddle::optional> + TransInTensor( + const std::string& in_name, + const paddle::optional>& in) { + VLOG(4) << op_name_ << "is is heavily"; + return in; + } + + virtual paddle::optional TransInTensor( + const std::string& in_name, + const paddle::optional& in) { + VLOG(4) << op_name_ << "is is heavily"; + return in; + } + + paddle::experimental::Tensor TransInTensor( + const std::string& in_name, const paddle::experimental::Tensor& in) { + if (heavily_input_.count(in_name) != 0 && in.layout() != desired_layout_) { + VLOG(4) << op_name_ << "'s " << in_name << " need transpose from " + << paddle::framework::DataLayoutToString(in.layout()) << " to " + << final_layout_; + auto out_tensor = EagerTraceTransposeOp(desired_layout_, in); + return out_tensor; + } + return in; + } + + void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { + if (out_tensor->layout() != desired_layout_) { + VLOG(4) << " Set Out_tensor's layout from " + << paddle::framework::DataLayoutToString(out_tensor->layout()) + << " to " << final_layout_; + phi::DenseTensorUtils::GetMutableMeta( + static_cast(out_tensor->impl().get())) + ->layout = desired_layout_; + } + } + + void SetOutTensorLayout( + std::vector* out_tensor) { + for (size_t i = 0; i < out_tensor->size(); i++) { + SetOutTensorLayout((*out_tensor)[i]); + } + } + + void SetOutTensorLayout( + std::vector* out_tensor) { + for (size_t i = 0; i < out_tensor->size(); i++) { + if ((*out_tensor)[i].layout() != desired_layout_) { + VLOG(4) << " Set Out_tensor's layout from " + << paddle::framework::DataLayoutToString( + (*out_tensor)[i].layout()) + << " to " << final_layout_; + phi::DenseTensorUtils::GetMutableMeta( + static_cast((*out_tensor)[i].impl().get())) + ->layout = desired_layout_; + } + } + } + + protected: + std::string op_name_; + std::string final_layout_; + const paddle::experimental::DataLayout desired_layout_; + std::unordered_set heavily_input_{"x", "y", "input"}; +}; + +class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { + public: + EagerLightlyLayoutSensitiveOpTransformer() {} + explicit EagerLightlyLayoutSensitiveOpTransformer(const std::string& op_name) + : op_name_(op_name) { + VLOG(3) << "Optimze Layout lightly " << op_name; + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + final_layout_ = paddle::framework::DataLayoutToString(desired_layout); + } + + // transpose from desired to default + paddle::experimental::Tensor TransInTensor( + const std::string& in_name, const paddle::experimental::Tensor& in) { + std::string input_layout = + paddle::framework::DataLayoutToString(in.layout()); + auto default_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); + + if (final_layout_ == input_layout && in.shape().size() == 4) { + VLOG(4) << op_name_ << "'s " << in_name << " need transpose from " + << input_layout << " to default_layout"; + auto out_tensor = EagerTraceTransposeOp( + paddle::experimental::DataLayout::UNDEFINED, in); + phi::DenseTensorUtils::GetMutableMeta( + static_cast(out_tensor.impl().get())) + ->layout = default_layout; + return out_tensor; + } + VLOG(4) << in_name << "'s layout is " << input_layout; + return in; + } + + virtual std::vector TransInTensor( + const std::string& in_name, + const std::vector& in) { + std::vector result; + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + auto default_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); + for (size_t i = 0; i < in.size(); i++) { + auto in_tensor = in[i]; + if (in_tensor.layout() == desired_layout) { + VLOG(4) << op_name_ << "'s " << in_name << " need transpose from " + << final_layout_ << " to default_layout"; + auto out_tensor = EagerTraceTransposeOp( + paddle::experimental::DataLayout::UNDEFINED, in_tensor); + phi::DenseTensorUtils::GetMutableMeta( + static_cast(out_tensor.impl().get())) + ->layout = default_layout; + result.emplace_back(out_tensor); + } else { + result.emplace_back(in_tensor); + } + } + return result; + } + + void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { + auto out_layout = out_tensor->layout(); + auto default_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); + if (out_layout != default_layout) { + VLOG(4) << op_name_ << "'s out need transpose to default_layout"; + phi::DenseTensorUtils::GetMutableMeta( + static_cast(out_tensor->impl().get())) + ->layout = default_layout; + } + } + + void SetOutTensorLayout( + std::vector* out_tensor) { + for (size_t i = 0; i < out_tensor->size(); i++) { + VLOG(4) << "out layout is" + << paddle::framework::DataLayoutToString( + (*out_tensor)[i]->layout()); + SetOutTensorLayout((*out_tensor)[i]); + } + } + + void SetOutTensorLayout( + std::vector* out_tensor) { + auto default_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); + for (size_t i = 0; i < out_tensor->size(); i++) { + VLOG(4) << " out_tensor layout trans to default "; + phi::DenseTensorUtils::GetMutableMeta( + static_cast((*out_tensor)[i].impl().get())) + ->layout = default_layout; + } + } + + protected: + std::string op_name_; + std::string final_layout_; + std::unordered_set heavily_input_{"x", "y", "input"}; +}; + +class EagerTransposeOpTransformer + : public EagerLightlyLayoutSensitiveOpTransformer { + public: + EagerTransposeOpTransformer() {} + explicit EagerTransposeOpTransformer(const std::string& op_name) + : op_name_(op_name) { + VLOG(3) << "Optimze Layout TransposeOpTransformer " << op_name; + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + std::string desired_layout_str = + paddle::framework::DataLayoutToString(desired_layout); + final_layout_ = desired_layout_str; + } + + void SetAttr(std::vector* axis, bool is_nhwc) { + // input's layout is nhwc and input's layout === desired_layout + std::vector perm_nchw = {0, 2, 3, 1}; + std::vector perm_nhwc = {0, 3, 1, 2}; + auto perm = is_nhwc ? perm_nhwc : perm_nchw; + (*axis)[0] = perm[(*axis)[0]]; + (*axis)[1] = perm[(*axis)[1]]; + (*axis)[2] = perm[(*axis)[2]]; + (*axis)[3] = perm[(*axis)[3]]; + VLOG(4) << " EagerTransposeOpTransformer " << op_name_ + << "'s layout is equal to desire: " << is_nhwc; + } + + paddle::experimental::Tensor TransInTensor( + const std::string& in_name, const paddle::experimental::Tensor& in) { + VLOG(4) << "with no transpose: EagerTransposeOpTransformer " << in_name + << "'s layout is " + << paddle::framework::DataLayoutToString(in.layout()); + return in; + } + + void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + if (out_tensor->layout() != desired_layout) { + VLOG(4) << " Set Out_tensor's layout from " + << paddle::framework::DataLayoutToString(out_tensor->layout()) + << " to " << final_layout_; + phi::DenseTensorUtils::GetMutableMeta( + static_cast(out_tensor->impl().get())) + ->layout = desired_layout; + } + } + + protected: + std::string op_name_; + std::string final_layout_; + std::unordered_set heavily_input_{"x", "y", "input"}; +}; + +class EagerArgmaxOpTransformer + : public EagerLightlyLayoutSensitiveOpTransformer { + public: + EagerArgmaxOpTransformer() {} + explicit EagerArgmaxOpTransformer(const std::string& op_name) + : op_name_(op_name) { + VLOG(3) << "Optimze Layout lightly " << op_name; + } + + void SetAttr(paddle::experimental::Scalar* axis, bool is_nhwc) { + std::vector perm_nhwc = {0, 3, 1, 2}; + std::vector perm_nchw = {0, 2, 3, 1}; + auto perm = is_nhwc ? perm_nhwc : perm_nchw; + int axes = axis->to(); + (*axis) = static_cast(perm[axes]); + } + + void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { + VLOG(4) << "EagerArgmaxOpTransformer's out layout is" + << paddle::framework::DataLayoutToString(out_tensor->layout()); + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + if (desired_layout != out_tensor->layout()) { + VLOG(4) << "Change layout from " + << paddle::framework::DataLayoutToString(out_tensor->layout()) + << " to " << final_layout_; + phi::DenseTensorUtils::GetMutableMeta( + static_cast(out_tensor->impl().get())) + ->layout = desired_layout; + } + } + + protected: + std::string op_name_; + std::string final_layout_; + std::unordered_set heavily_input_{"x", "y", "input"}; +}; + +class EagerFlattenOpTransformer + : public EagerLightlyLayoutSensitiveOpTransformer { + public: + EagerFlattenOpTransformer() {} + explicit EagerFlattenOpTransformer(const std::string& op_name) + : op_name_(op_name) { + VLOG(3) << "Optimze Layout lightly " << op_name; + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + std::string desired_layout_str = + paddle::framework::DataLayoutToString(desired_layout); + final_layout_ = desired_layout_str; + } + + // transpose from NHWC to NCHW + paddle::experimental::Tensor TransInTensor( + const std::string& in_name, const paddle::experimental::Tensor& in) { + return in; + } + + void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { + VLOG(4) << "EagerArgmaxOpTransformer's out layout is" + << paddle::framework::DataLayoutToString(out_tensor->layout()); + auto layout = paddle::framework::StringToDataLayout(final_layout_); + if (layout != out_tensor->layout()) { + VLOG(4) << "Change layout from " + << paddle::framework::DataLayoutToString(out_tensor->layout()) + << " to " << final_layout_; + phi::DenseTensorUtils::GetMutableMeta( + static_cast(out_tensor->impl().get())) + ->layout = layout; + } + } + + protected: + std::string op_name_; + std::string final_layout_; + std::unordered_set heavily_input_{"x", "y", "input"}; +}; + +class EagerConcatOpTransformer + : public EagerLightlyLayoutSensitiveOpTransformer { + public: + EagerConcatOpTransformer() {} + explicit EagerConcatOpTransformer(const std::string& op_name) + : op_name_(op_name) { + VLOG(3) << "Optimze Layout lightly " << op_name; + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + std::string desired_layout_str = + paddle::framework::DataLayoutToString(desired_layout); + final_layout_ = desired_layout_str; + } + + void SetAttr(paddle::experimental::Scalar* axis, + paddle::framework::DataLayout layout) { + std::vector perm_nhwc = {0, 3, 1, 2}; + std::vector perm_nchw = {0, 2, 3, 1}; + int axes = axis->to(); + auto perm = + (paddle::framework::DataLayout::NHWC == layout) ? perm_nhwc : perm_nchw; + (*axis) = static_cast(perm[axes]); + } + + virtual std::vector TransInTensor( + const std::string& in_name, + const std::vector& in) { + return in; + } + + void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { + auto layout = paddle::framework::StringToDataLayout(final_layout_); + if (layout != out_tensor->layout()) { + VLOG(4) << "Change layout from " + << paddle::framework::DataLayoutToString(out_tensor->layout()) + << " to " << final_layout_; + phi::DenseTensorUtils::GetMutableMeta( + static_cast(out_tensor->impl().get())) + ->layout = layout; + } + } + + protected: + std::string op_name_; + std::string final_layout_; + std::unordered_set heavily_input_{"x", "y", "input"}; +}; +} // namespace egr diff --git a/paddle/fluid/imperative/layout_autotune.cc b/paddle/fluid/imperative/layout_autotune.cc index f37105e2581..d5a9ba69010 100644 --- a/paddle/fluid/imperative/layout_autotune.cc +++ b/paddle/fluid/imperative/layout_autotune.cc @@ -25,12 +25,7 @@ namespace imperative { bool LayoutAutoTune::UseLayoutAutoTune() const { #if defined(PADDLE_WITH_CUDA) - if (!phi::backends::gpu::TensorCoreAvailable()) { - LayoutAutoTune::Instance().DisableLayoutAutoTune(); - return false; - } else { - return use_layout_autotune_; - } + return use_layout_autotune_; #else return false; #endif @@ -168,6 +163,12 @@ paddle::imperative::NameVarMap AutoTuneLayout( if (op_type != "conv2d") { return ins; } else { +#if defined(PADDLE_WITH_CUDA) + if (!phi::backends::gpu::TensorCoreAvailable()) { + LayoutAutoTune::Instance().DisableLayoutAutoTune(); + return ins; + } +#endif auto conv_in_type = framework::proto::VarType::FP32; auto& in_vars = ins.at("Input")[0]; if (GetDataType(in_vars) == framework::proto::VarType::FP16) { @@ -213,6 +214,7 @@ paddle::imperative::NameVarMap AutoTuneLayout( return transposer->Apply(ins, outs, attrs, tracer); } } + template paddle::imperative::NameVarMap AutoTuneLayout( const std::string& op_type, const paddle::imperative::NameVarMap& ins, diff --git a/paddle/fluid/imperative/layout_autotune.h b/paddle/fluid/imperative/layout_autotune.h index 2f3d9c38e9c..af7a89123ef 100644 --- a/paddle/fluid/imperative/layout_autotune.h +++ b/paddle/fluid/imperative/layout_autotune.h @@ -53,9 +53,13 @@ class LayoutAutoTune { return layout_agnostic_ops_.count(op_type) != 0; } - DataLayout GetDesiredLayout() const { return layout_; } + DataLayout GetDesiredLayout() const { return desired_layout_; } - void SetDesiredLayout(const DataLayout& layout) { layout_ = layout; } + DataLayout GetDefaultLayout() const { return default_layout_; } + + void SetDesiredLayout(const DataLayout& layout) { desired_layout_ = layout; } + + void SetDefaultLayout(const DataLayout& layout) { default_layout_ = layout; } private: LayoutAutoTune(); @@ -69,7 +73,9 @@ class LayoutAutoTune { std::unordered_set lightly_layout_sensitive_ops_{ "instance_norm", "softmax", "transpose", "transpose2", "reshape2"}; - DataLayout layout_{DataLayout::UNDEFINED}; + DataLayout desired_layout_{DataLayout::UNDEFINED}; + + DataLayout default_layout_{DataLayout::UNDEFINED}; }; template diff --git a/paddle/fluid/imperative/layout_transformer.h b/paddle/fluid/imperative/layout_transformer.h index 401b37a428e..3e857c4ec26 100644 --- a/paddle/fluid/imperative/layout_transformer.h +++ b/paddle/fluid/imperative/layout_transformer.h @@ -77,6 +77,9 @@ class LayoutTransformer { for (auto& var : pair.second) { // Once the any input is desired layout, we set in_layout is desired // layout. + if (in_layout == DataLayout::UNDEFINED) { + in_layout = paddle::imperative::GetDataLayout(var); + } if (var != nullptr && (paddle::imperative::GetDataLayout(var) == LayoutAutoTune::Instance().GetDesiredLayout())) { in_layout = LayoutAutoTune::Instance().GetDesiredLayout(); @@ -84,7 +87,11 @@ class LayoutTransformer { } } } - SetVarsLayout(outs, in_layout); + VLOG(3) << "Optimze Layout agnostic op: " << type_ << " " + << paddle::framework::DataLayoutToString(in_layout); + if (in_layout != DataLayout::UNDEFINED) { + SetVarsLayout(outs, in_layout); + } return ins; } diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index 12e262b3f7c..dfe9e03df5f 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -188,6 +188,25 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) { EAGER_CATCH_AND_THROW_RETURN_NULL } +PyObject* tensor_properties_get_layout(TensorObject* self, void* closure) { + EAGER_TRY + std::string layout = ""; + if (!self->tensor.defined()) { + return ToPyObject(layout); + } + + if (egr::IsVariableCompatTensor(self->tensor)) { + VLOG(3) << "VariableCompatTensor does not support `layout` method."; + return ToPyObject(layout); + } else { + return ToPyObject( + paddle::framework::DataLayoutToString(self->tensor.layout())); + } + + return ToPyObject(layout); + EAGER_CATCH_AND_THROW_RETURN_NULL +} + PyObject* tensor_properties_get_place(TensorObject* self, void* closure) { EAGER_TRY return ToPyObject(self->tensor.place()); @@ -249,6 +268,7 @@ struct PyGetSetDef variable_properties[] = { nullptr, nullptr}, {"shape", (getter)tensor_properties_get_shape, nullptr, nullptr, nullptr}, + {"layout", (getter)tensor_properties_get_layout, nullptr, nullptr, nullptr}, // {"is_leaf", (getter)tensor_properties_get_is_leaf, nullptr, // nullptr, // nullptr}, @@ -271,6 +291,7 @@ struct PyGetSetDef string_tensor_variable_properties[] = { nullptr, nullptr}, {"shape", (getter)tensor_properties_get_shape, nullptr, nullptr, nullptr}, + {"layout", (getter)tensor_properties_get_layout, nullptr, nullptr, nullptr}, {"place", (getter)tensor_properties_get_place, nullptr, nullptr, nullptr}, {"_place_str", (getter)tensor_properties_get_place_str, diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 34bfd385d4c..aeaa0dbff78 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -2062,6 +2062,15 @@ void BindImperative(py::module *m_ptr) { return std::vector(); } }) + .def_property_readonly( + "layout", + [](imperative::VarBase &self) { + if (self.Var().IsType()) { + auto layout = self.Var().Get().layout(); + return paddle::framework::DataLayoutToString(layout); + } + return std::string(""); + }) .def_property_readonly("is_leaf", &imperative::VarBase::IsLeaf, R"DOC( diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc index 4f5ecf0aee1..10b01f94662 100644 --- a/paddle/phi/api/lib/data_transform.cc +++ b/paddle/phi/api/lib/data_transform.cc @@ -52,12 +52,16 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input, return ret; } -inline bool NeedTransformLayout(const DataLayout& input, +inline bool NeedTransformLayout(const paddle::platform::Place& place, + const DataLayout& input, const DataLayout& target, const TransformFlag& transform_flag) { bool ret = transform_flag.need_trans_layout() && (input != DataLayout::ALL_LAYOUT && target != DataLayout::ALL_LAYOUT && input != target); + if (platform::is_gpu_place(place)) { + return false; + } return ret; } @@ -73,6 +77,7 @@ inline phi::DenseTensor TransDataLayout(const phi::DenseTensor& tensor, PADDLE_THROW(phi::errors::PreconditionNotMet( "Unsupported data layout cast from CPU to GPU.")); } + return tensor; } template @@ -196,8 +201,11 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor, phi::DenseTensor out = *tensor; bool trans_layout = false; bool trans_dtype = false; - if (NeedTransformLayout( - tensor->layout(), target_args_def.layout, transform_flag)) { + + if (NeedTransformLayout(tensor->place(), + tensor->layout(), + target_args_def.layout, + transform_flag)) { out = TransDataLayout(out, target_args_def.layout); trans_layout = true; } @@ -232,8 +240,10 @@ std::shared_ptr PrepareData( dense_tensor.place(), target_args_def.backend, transform_flag) && !NeedTransformDataType( dense_tensor.dtype(), target_args_def.dtype, transform_flag) && - !NeedTransformLayout( - dense_tensor.layout(), target_args_def.layout, transform_flag))) { + !NeedTransformLayout(dense_tensor.place(), + dense_tensor.layout(), + target_args_def.layout, + transform_flag))) { return std::static_pointer_cast(tensor_in); } phi::DenseTensor out = @@ -267,8 +277,10 @@ std::unique_ptr> PrepareData( tensor_in->place(), target_args_def.backend, transform_flag) && !NeedTransformDataType( tensor_in->dtype(), target_args_def.dtype, transform_flag) && - !NeedTransformLayout( - tensor_in->layout(), target_args_def.layout, transform_flag))) { + !NeedTransformLayout(tensor_in->place(), + tensor_in->layout(), + target_args_def.layout, + transform_flag))) { pt_tensors->emplace_back( *std::dynamic_pointer_cast(tensor_in)); } else { diff --git a/python/paddle/fluid/tests/unittests/test_layout_autotune.py b/python/paddle/fluid/tests/unittests/test_layout_autotune.py index 5cb53437fe9..fb91298c59d 100644 --- a/python/paddle/fluid/tests/unittests/test_layout_autotune.py +++ b/python/paddle/fluid/tests/unittests/test_layout_autotune.py @@ -21,9 +21,6 @@ import numpy import paddle import paddle.nn.functional as F -from paddle.fluid.framework import _enable_legacy_dygraph - -_enable_legacy_dygraph() class SimpleNet(paddle.nn.Layer): diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 5cdd8732f6c..1f2ddb98bfb 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -129,6 +129,8 @@ def _conv_nd(x, if bias is not None: channel_dim = channel_dim + len( x.shape) if channel_dim < 0 else channel_dim + if pre_bias.layout == "NHWC": + channel_dim = 3 # last dim if isinstance(x, tuple): x = x[0] if isinstance(bias, tuple): -- GitLab