From 3da3462f3499cb4233cc779118a9ad9670c0ebf0 Mon Sep 17 00:00:00 2001 From: niuliling123 <51102941+niuliling123@users.noreply.github.com> Date: Tue, 11 Oct 2022 10:49:54 +0800 Subject: [PATCH] Update layout autotune for module with no modified (#46541) --- .../generator/eager_gen.py | 2 +- paddle/fluid/eager/eager_layout_auto_tune.h | 194 ++++------- paddle/fluid/eager/eager_layout_transformer.h | 303 +++++++----------- paddle/fluid/imperative/layout_autotune.cc | 2 + paddle/fluid/pybind/eager_properties.cc | 36 +++ paddle/fluid/pybind/imperative.cc | 43 ++- paddle/phi/api/lib/data_transform.cc | 3 +- .../tests/unittests/test_layout_autotune.py | 84 +---- python/paddle/nn/functional/conv.py | 4 +- 9 files changed, 285 insertions(+), 386 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index 308e0348a21..8ec8e43cc0b 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -1093,7 +1093,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): tensors_vector_list_str = "{ " + ",".join( amp_tensors_vector_list) + " }" - if len(amp_tensors_vector_list) == 0: + if len(amp_tensors_vector_list) == 0: # or forward_api_name == "shape": layout_logic_str = "" else: after_call_str = f"{returns_type_str} {result_name} = {forward_function_name}({layout_inputs_call_args_str});\n" diff --git a/paddle/fluid/eager/eager_layout_auto_tune.h b/paddle/fluid/eager/eager_layout_auto_tune.h index 5670275e2b7..7e0f916a7d3 100644 --- a/paddle/fluid/eager/eager_layout_auto_tune.h +++ b/paddle/fluid/eager/eager_layout_auto_tune.h @@ -32,70 +32,50 @@ inline bool NeedTransLayout( } return false; } -inline std::shared_ptr BaseTransformer( - const std::string& op_name, - const paddle::small_vector, - kSlotSmallVectorSize>& tensors_vector) { - std::shared_ptr transposer = nullptr; - bool unstart = - (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == - paddle::experimental::DataLayout::UNDEFINED); - auto first_layout = tensors_vector[0][0].layout(); - VLOG(3) << "Layout autotune was is start ? " << (!unstart) << op_name - << "'s layout is " << first_layout; - transposer = std::make_shared( - op_name, tensors_vector, first_layout); - return transposer; -} - -// For agnostic op like add, relu, exp inline std::shared_ptr EagerLayoutAutotune( const std::string& op_name, const paddle::small_vector, kSlotSmallVectorSize>& tensors_vector) { - auto desired_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); - auto default_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); + // For agnostic op like add, relu, exp auto first_layout = tensors_vector[0][0].layout(); - if (NeedTransLayout(tensors_vector, first_layout)) { + auto desired_layout = DesiredLayout(); + bool is_started = + !(desired_layout == paddle::experimental::DataLayout::UNDEFINED); + if (is_started && NeedTransLayout(tensors_vector, first_layout)) { bool need_trans_back = false; for (size_t i = 0; i < tensors_vector.size(); i++) { for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) { if (4 != tensors_vector[i][idx].shape().size()) { need_trans_back = true; - VLOG(3) << "Agnostic op " << op_name << " shape is " - << tensors_vector[i][idx].shape().size() << " and layout is " - << tensors_vector[i][idx].layout(); } } } - auto final_layout = need_trans_back ? default_layout : desired_layout; + auto final_layout = need_trans_back ? DefaultLayout() : desired_layout; + VLOG(4) << op_name << "'s has different layout, need trans to " + << final_layout; return std::make_shared( op_name, tensors_vector, final_layout); } - return BaseTransformer(op_name, tensors_vector); + return std::make_shared( + op_name, tensors_vector, first_layout); } -// For lightly op like reduce template inline std::shared_ptr EagerLayoutAutotune( const std::string& op_name, const paddle::small_vector, kSlotSmallVectorSize>& tensors_vector, T* attr) { - VLOG(3) << "Lightly op " << op_name << "'s shape is " - << tensors_vector[0][0].shape().size() << " and layout is " - << tensors_vector[0][0].layout(); - - std::shared_ptr transposer = nullptr; - transposer = - std::make_shared(op_name); - return transposer; + // For lightly op like reduce + if (!(DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED)) { + VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name; + return std::make_shared( + op_name, tensors_vector, tensors_vector[0][0].layout()); + } + return std::make_shared(op_name); } -// For lightly op like argmax template inline std::shared_ptr EagerLayoutAutotune( const std::string& op_name, @@ -103,28 +83,23 @@ inline std::shared_ptr EagerLayoutAutotune( kSlotSmallVectorSize>& tensors_vector, T1* axis, T2* keep_dim) { - VLOG(3) << "Lightly op " << op_name << "'s shape is " - << tensors_vector[0][0].shape().size() << " and layout is " - << tensors_vector[0][0].layout(); - + // For lightly op like argmax return EagerLayoutAutotune(op_name, tensors_vector, axis); } -// heavily string data_format, data_layout template <> inline std::shared_ptr EagerLayoutAutotune( const std::string& op_name, const paddle::small_vector, kSlotSmallVectorSize>& tensors_vector, std::string* attr) { - auto first_layout = tensors_vector[0][0].layout(); + // Heavily op with (string) data_format, data_layout auto transposer = std::make_shared( - op_name, tensors_vector, first_layout); - if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == - paddle::experimental::DataLayout::UNDEFINED) { + op_name, tensors_vector, tensors_vector[0][0].layout()); + if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) { // Layout autotune only supports model with convolutional layers - VLOG(3) << "Optimze Layout was not started " << op_name; if (op_name != "conv2d") { + VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name; return transposer; } else { auto data_type = tensors_vector[0][0].dtype(); @@ -134,7 +109,8 @@ inline std::shared_ptr EagerLayoutAutotune( bool is_tune_fp16 = (data_type == paddle::experimental::DataType::FLOAT16) && (*attr == "NCHW"); - VLOG(3) << "Conv2d_dy's dtype " << data_type << " format" << (*attr); + VLOG(4) << "LayoutAutoTune assert with dtype and layout, Current op : " + << op_name; if (is_tune_fp32) { paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout( paddle::experimental::DataLayout::NCHW); @@ -147,58 +123,45 @@ inline std::shared_ptr EagerLayoutAutotune( paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout( paddle::experimental::DataLayout::NCHW); } else { + VLOG(4) << "DisableLayoutAutoTune accoding to Conv op" + << " dtype : " << data_type << " format : " << (*attr); egr::Controller::Instance().DisableLayoutAutoTune(); return transposer; } - VLOG(3) - << "Tune the layout from " << *attr << " to " - << paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + VLOG(4) << "LayoutAutoTune from " << *attr << " to " << DesiredLayout(); } } if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive( op_name)) { - VLOG(3) - << op_name - << "'s LayoutTransformer is EagerHeavilyLayoutSensitiveOpTransformer"; - auto heavily_transposer = - std::make_shared(op_name, - attr); - return heavily_transposer; + return std::make_shared(op_name, + attr); } - - VLOG(3) << op_name << "'s LayoutTransformer is unimplemented. Use default."; - return transposer; + return std::make_shared(op_name); } -// lightly transpose template <> inline std::shared_ptr EagerLayoutAutotune( const std::string& op_name, const paddle::small_vector, kSlotSmallVectorSize>& tensors_vector, std::vector* attr) { - auto first_layout = tensors_vector[0][0].layout(); - std::shared_ptr transposer = nullptr; - if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == - paddle::experimental::DataLayout::UNDEFINED) { - VLOG(3) << "Optimze Layout was not started" << op_name; - transposer = std::make_shared( - op_name, tensors_vector, first_layout); - return transposer; + // lightly transpose + if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) { + VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name; + return std::make_shared( + op_name, tensors_vector, tensors_vector[0][0].layout()); } + if (op_name == "transpose2" && - (tensors_vector[0][0].layout() == - paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout())) { + (tensors_vector[0][0].layout() == DesiredLayout())) { auto trans = std::make_shared(op_name); trans->SetAttr(attr, tensors_vector[0][0].layout() == paddle::experimental::DataLayout::NHWC); return trans; } - transposer = - std::make_shared(op_name); - return transposer; + return std::make_shared(op_name); } // lightly int argmax @@ -210,19 +173,14 @@ EagerLayoutAutotune( kSlotSmallVectorSize>& tensors_vector, paddle::experimental::Scalar* axis, bool* keep_dim) { - auto first_layout = tensors_vector[0][0].layout(); - std::shared_ptr transposer = nullptr; - if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == - paddle::experimental::DataLayout::UNDEFINED) { - VLOG(3) << "Optimze Layout was not started" << op_name; - transposer = std::make_shared( - op_name, tensors_vector, first_layout); - return transposer; + if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) { + VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name; + return std::make_shared( + op_name, tensors_vector, tensors_vector[0][0].layout()); } - auto desired_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + if (op_name == "argmax" && - (tensors_vector[0][0].layout() == desired_layout) && (*keep_dim)) { + (tensors_vector[0][0].layout() == DesiredLayout()) && (*keep_dim)) { std::shared_ptr argmax_transform = nullptr; argmax_transform = std::make_shared(op_name); argmax_transform->SetAttr(axis, @@ -230,12 +188,9 @@ EagerLayoutAutotune( paddle::experimental::DataLayout::NHWC); return argmax_transform; } - transposer = - std::make_shared(op_name); - return transposer; + return std::make_shared(op_name); } -// lightly for flatten template <> inline std::shared_ptr EagerLayoutAutotune( const std::string& op_name, @@ -243,32 +198,22 @@ inline std::shared_ptr EagerLayoutAutotune( kSlotSmallVectorSize>& tensors_vector, int* start_axis, int* stop_axis) { - auto first_layout = tensors_vector[0][0].layout(); - std::shared_ptr transposer = nullptr; - auto desired_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); - if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) { - VLOG(3) << "Optimze Layout was not started" << op_name; - transposer = std::make_shared( - op_name, tensors_vector, first_layout); - return transposer; + if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) { + VLOG(4) << "Optimze Layout was not started" << op_name; + return std::make_shared( + op_name, tensors_vector, tensors_vector[0][0].layout()); } - bool no_tranpose = tensors_vector[0][0].layout() == desired_layout; + + bool no_tranpose = tensors_vector[0][0].layout() == DesiredLayout(); bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3); if (op_name == "flatten" || op_name == "flatten_contiguous_range") { if (no_tranpose && is_valid) { - std::shared_ptr flatten_transform = nullptr; - flatten_transform = std::make_shared(op_name); - return flatten_transform; + return std::make_shared(op_name); } } - - transposer = - std::make_shared(op_name); - return transposer; + return std::make_shared(op_name); } -// lightly int Concat template <> inline std::shared_ptr EagerLayoutAutotune( @@ -276,27 +221,26 @@ EagerLayoutAutotune( const paddle::small_vector, kSlotSmallVectorSize>& tensors_vector, paddle::experimental::Scalar* axis) { - auto desired_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); - auto first_layout = tensors_vector[0][0].layout(); - std::shared_ptr transposer = nullptr; - if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) { - VLOG(3) << "Optimze Layout was not started" << op_name; - transposer = std::make_shared( - op_name, tensors_vector, first_layout); - return transposer; + if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) { + VLOG(4) << "Optimze Layout was not started" << op_name; + return std::make_shared( + op_name, tensors_vector, tensors_vector[0][0].layout()); } + auto desired_layout = DesiredLayout(); if (NeedTransLayout(tensors_vector, desired_layout)) { - VLOG(3) << op_name << " need transpose to default layout"; - transposer = - std::make_shared(op_name); - return transposer; - } else { - auto trans = std::make_shared(op_name); - trans->SetAttr(axis, desired_layout); - return trans; + VLOG(4) << op_name << "'s has different layout"; + return std::make_shared(op_name); + } + if (op_name == "Concat") { + if (desired_layout == tensors_vector[0][0].layout() && + tensors_vector[0][0].shape().size() == 4) { + auto trans = std::make_shared(op_name); + trans->SetAttr(axis, desired_layout); + return trans; + } } + return std::make_shared(op_name); } } // namespace egr diff --git a/paddle/fluid/eager/eager_layout_transformer.h b/paddle/fluid/eager/eager_layout_transformer.h index 80398973c4f..4f161d3aa37 100644 --- a/paddle/fluid/eager/eager_layout_transformer.h +++ b/paddle/fluid/eager/eager_layout_transformer.h @@ -23,7 +23,7 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp( const paddle::experimental::DataLayout layout, const paddle::experimental::Tensor& in) { VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout - << ", tensor's shape is " << in.shape().size(); + << ", tensor's dim size is " << in.shape().size(); if (in.shape().size() != 4) { return in; } @@ -36,12 +36,72 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp( axis = {0, 1, 2, 3}; } auto out_tensor = transpose_ad_func(in, axis); - VLOG(4) << "AutoTune Transpose from " - << paddle::framework::DataLayoutToString(in.layout()) << " to " - << paddle::framework::DataLayoutToString(layout); + VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout; return out_tensor; } +inline paddle::experimental::DataLayout DesiredLayout() { + return paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); +} + +inline paddle::experimental::DataLayout DefaultLayout() { + return paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); +} + +inline void UpdateLayout(paddle::experimental::Tensor* out_tensor, + const paddle::experimental::DataLayout layout) { + if (out_tensor->layout() != layout) { + VLOG(4) << "Update out_tensor's layout from " << out_tensor->layout() + << " to " << layout; + phi::DenseTensorUtils::GetMutableMeta( + static_cast(out_tensor->impl().get())) + ->layout = layout; + } +} + +inline void DealWithShapeOp(paddle::experimental::Tensor* out_tensor, + const paddle::experimental::DataLayout layout, + int dim_size) { + auto des_layout = DesiredLayout(); + auto def_layout = DefaultLayout(); + int32_t* value = + static_cast(out_tensor->impl().get())->data(); + bool change_dim = + (des_layout != def_layout && layout == des_layout && dim_size == 4); + VLOG(6) << "'Shape OP', layout autotune: True" + << " desired_layout: " << des_layout + << " default_layout: " << def_layout + << " tensor layout: " << out_tensor->layout() + << " tensor's shape size is : " << dim_size; + // It's means input tensor has been autotune and tensor's layout is + // desired_layout + std::vector dims; + dims.resize(dim_size); + for (int i = 0; i < dim_size; i++) { + dims[i] = value[i]; + } + auto des_str = paddle::framework::DataLayoutToString(des_layout); + if (change_dim && des_str == "NCHW") { + // NCHW -> NHWC + VLOG(6) << "layout autotune get Shape from NCHW -> NHWC " << value[0] << " " + << value[1] << " " << value[2] << " " << value[3] << " to " + << dims[0] << " " << dims[2] << " " << dims[3] << " " << dims[1]; + value[0] = dims[0]; + value[1] = dims[2]; + value[2] = dims[3]; + value[3] = dims[1]; + } else if (change_dim && des_str == "NHWC") { + // NHWC -> NCHW + VLOG(6) << "layout autotune get Shape from NHWC -> NCHW " << value[0] << " " + << value[1] << " " << value[2] << " " << value[3] << " to " + << dims[0] << " " << dims[3] << " " << dims[1] << " " << dims[2]; + value[0] = dims[0]; + value[1] = dims[3]; + value[2] = dims[1]; + value[3] = dims[2]; + } +} + // agnostic op class EagerLayoutTransformer { using Layout = paddle::experimental::DataLayout; @@ -58,27 +118,27 @@ class EagerLayoutTransformer { const paddle::small_vector, kSlotSmallVectorSize>& tensors_vector, const Layout final_layout = Layout::UNDEFINED) - : op_name_(op_name), final_layout_(final_layout) { - VLOG(4) << "Agnostic op : " << op_name_ << " final_layout_ is " - << final_layout_; + : op_name_(op_name), final_layout_(final_layout), dim_size_(1) { + VLOG(4) << "Agnostic op : " << op_name_ << "'s layout is " << final_layout_; } virtual ~EagerLayoutTransformer() {} virtual paddle::experimental::Tensor TransInTensor( const std::string& in_name, const paddle::experimental::Tensor& in) { - if (final_layout_ == Layout::UNDEFINED || final_layout_ == in.layout()) { - VLOG(4) << "EagerLayoutTransformer with no trans"; - return in; - } else { // from NCHW to NHWC - VLOG(4) << "EagerLayoutTransformer with trans from " << in.layout() - << " to " << final_layout_; + // update in shape size + dim_size_ = in.shape().size(); + bool need_trans = + !(final_layout_ == Layout::UNDEFINED || final_layout_ == in.layout()); + // This is for Agnostic op when layout is differnet + if (need_trans) { auto out_tensor = EagerTraceTransposeOp(final_layout_, in); phi::DenseTensorUtils::GetMutableMeta( static_cast(out_tensor.impl().get())) ->layout = final_layout_; return out_tensor; } + return in; } virtual paddle::optional TransInTensor( @@ -90,7 +150,6 @@ class EagerLayoutTransformer { virtual std::vector TransInTensors( const std::string& in_name, const std::vector& in) { - VLOG(4) << " TransInTensor"; return in; } @@ -98,72 +157,59 @@ class EagerLayoutTransformer { TransInTensors( const std::string& in_name, const paddle::optional>& in) { - VLOG(4) << " TransInTensor"; - if (in) { - return TransInTensors(in_name, *in); - } - return in; - } - - virtual void SetOutTensorLayout( - paddle::optional* out_tensor) { - VLOG(4) << "optional out_tensor"; + return (in ? TransInTensors(in_name, *in) : in); } virtual void SetOutTensorLayout( std::vector* out_tensor) { - bool use_default = (final_layout_ == Layout::UNDEFINED); - if (!use_default) { + bool update_layout = !(final_layout_ == Layout::UNDEFINED); + if (update_layout) { for (size_t i = 0; i < out_tensor->size(); i++) { phi::DenseTensorUtils::GetMutableMeta( static_cast((*out_tensor)[i].impl().get())) - ->layout = - paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + ->layout = DesiredLayout(); } } - VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default; + } + + virtual void SetOutTensorLayout( + paddle::optional* out_tensor) { + VLOG(4) << "AutoTune out tensor is optional"; } virtual void SetOutTensorLayout( paddle::optional>* out_tensor) { - VLOG(4) << "optional out_tensor"; + VLOG(4) << "AutoTune out tensor is optional"; } virtual void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { - bool use_default = final_layout_ == Layout::UNDEFINED; - if (!use_default) { - phi::DenseTensorUtils::GetMutableMeta( - static_cast(out_tensor->impl().get())) - ->layout = final_layout_; + if (op_name_ == "shape") { + return DealWithShapeOp(out_tensor, final_layout_, dim_size_); + } + bool need_update = !(final_layout_ == Layout::UNDEFINED); + if (need_update) { + UpdateLayout(out_tensor, final_layout_); } - VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default; } protected: std::string op_name_; const Layout final_layout_; + int dim_size_; }; class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { public: explicit EagerHeavilyLayoutSensitiveOpTransformer(const std::string& op_name, std::string* layout) - : op_name_(op_name), - desired_layout_( - paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout()) { - VLOG(3) << "Optimze Layout heavily op: " << op_name; - final_layout_ = paddle::framework::DataLayoutToString(desired_layout_); - if ((*layout) != final_layout_) { - *layout = final_layout_; - } + : op_name_(op_name), desired_layout_(DesiredLayout()) { + VLOG(4) << "Heavily op: " << op_name; + *layout = paddle::framework::DataLayoutToString(DesiredLayout()); } paddle::experimental::Tensor TransInTensor( const std::string& in_name, const paddle::experimental::Tensor& in) { if (heavily_input_.count(in_name) != 0 && in.layout() != desired_layout_) { - VLOG(4) << op_name_ << "'s " << in_name << " need transpose from " - << paddle::framework::DataLayoutToString(in.layout()) << " to " - << final_layout_; auto out_tensor = EagerTraceTransposeOp(desired_layout_, in); return out_tensor; } @@ -171,14 +217,7 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { } void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { - if (out_tensor->layout() != desired_layout_) { - VLOG(4) << " Set Out_tensor's layout from " - << paddle::framework::DataLayoutToString(out_tensor->layout()) - << " to " << final_layout_; - phi::DenseTensorUtils::GetMutableMeta( - static_cast(out_tensor->impl().get())) - ->layout = desired_layout_; - } + UpdateLayout(out_tensor, desired_layout_); } void SetOutTensorLayout( @@ -192,10 +231,8 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { std::vector* out_tensor) { for (size_t i = 0; i < out_tensor->size(); i++) { if ((*out_tensor)[i].layout() != desired_layout_) { - VLOG(4) << " Set Out_tensor's layout from " - << paddle::framework::DataLayoutToString( - (*out_tensor)[i].layout()) - << " to " << final_layout_; + VLOG(4) << "Update out_tensor's layout from " + << (*out_tensor)[i].layout() << " to " << desired_layout_; phi::DenseTensorUtils::GetMutableMeta( static_cast((*out_tensor)[i].impl().get())) ->layout = desired_layout_; @@ -205,7 +242,6 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { protected: std::string op_name_; - std::string final_layout_; const paddle::experimental::DataLayout desired_layout_; std::unordered_set heavily_input_{"x", "y", "input"}; }; @@ -213,11 +249,10 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { public: EagerLightlyLayoutSensitiveOpTransformer() {} - explicit EagerLightlyLayoutSensitiveOpTransformer(const std::string& op_name) - : op_name_(op_name) { - VLOG(3) << "Optimze Layout lightly " << op_name; - auto desired_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + explicit EagerLightlyLayoutSensitiveOpTransformer( + const std::string& op_name) { + VLOG(4) << "Lightly op : " << op_name; + auto desired_layout = DesiredLayout(); final_layout_ = paddle::framework::DataLayoutToString(desired_layout); } @@ -226,11 +261,8 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { const std::string& in_name, const paddle::experimental::Tensor& in) { std::string input_layout = paddle::framework::DataLayoutToString(in.layout()); - auto default_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); + auto default_layout = DefaultLayout(); if (final_layout_ == input_layout && in.shape().size() == 4) { - VLOG(4) << op_name_ << "'s " << in_name << " need transpose from " - << input_layout << " to default_layout"; auto out_tensor = EagerTraceTransposeOp( paddle::experimental::DataLayout::UNDEFINED, in); phi::DenseTensorUtils::GetMutableMeta( @@ -238,7 +270,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ->layout = default_layout; return out_tensor; } - VLOG(4) << in_name << "'s layout is " << input_layout; return in; } @@ -246,15 +277,11 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { const std::string& in_name, const std::vector& in) { std::vector result; - auto desired_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); - auto default_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); + auto desired_layout = DesiredLayout(); + auto default_layout = DefaultLayout(); for (size_t i = 0; i < in.size(); i++) { auto in_tensor = in[i]; if (in_tensor.layout() == desired_layout) { - VLOG(4) << op_name_ << "'s " << in_name << " need transpose from " - << final_layout_ << " to default_layout"; auto out_tensor = EagerTraceTransposeOp( paddle::experimental::DataLayout::UNDEFINED, in_tensor); phi::DenseTensorUtils::GetMutableMeta( @@ -269,33 +296,20 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { } void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { - auto out_layout = out_tensor->layout(); - auto default_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); - if (out_layout != default_layout) { - VLOG(4) << op_name_ << "'s out need transpose to default_layout"; - phi::DenseTensorUtils::GetMutableMeta( - static_cast(out_tensor->impl().get())) - ->layout = default_layout; - } + UpdateLayout(out_tensor, DefaultLayout()); } void SetOutTensorLayout( std::vector* out_tensor) { for (size_t i = 0; i < out_tensor->size(); i++) { - VLOG(4) << "out layout is" - << paddle::framework::DataLayoutToString( - (*out_tensor)[i]->layout()); SetOutTensorLayout((*out_tensor)[i]); } } void SetOutTensorLayout( std::vector* out_tensor) { - auto default_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); + auto default_layout = DefaultLayout(); for (size_t i = 0; i < out_tensor->size(); i++) { - VLOG(4) << " out_tensor layout trans to default "; phi::DenseTensorUtils::GetMutableMeta( static_cast((*out_tensor)[i].impl().get())) ->layout = default_layout; @@ -303,7 +317,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { } protected: - std::string op_name_; std::string final_layout_; std::unordered_set heavily_input_{"x", "y", "input"}; }; @@ -312,18 +325,11 @@ class EagerTransposeOpTransformer : public EagerLightlyLayoutSensitiveOpTransformer { public: EagerTransposeOpTransformer() {} - explicit EagerTransposeOpTransformer(const std::string& op_name) - : op_name_(op_name) { - VLOG(3) << "Optimze Layout TransposeOpTransformer " << op_name; - auto desired_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); - std::string desired_layout_str = - paddle::framework::DataLayoutToString(desired_layout); - final_layout_ = desired_layout_str; + explicit EagerTransposeOpTransformer(const std::string& op_name) { + VLOG(4) << "AutoTuneTransformer op: " << op_name; } void SetAttr(std::vector* axis, bool is_nhwc) { - // input's layout is nhwc and input's layout === desired_layout std::vector perm_nchw = {0, 2, 3, 1}; std::vector perm_nhwc = {0, 3, 1, 2}; auto perm = is_nhwc ? perm_nhwc : perm_nchw; @@ -331,8 +337,6 @@ class EagerTransposeOpTransformer (*axis)[1] = perm[(*axis)[1]]; (*axis)[2] = perm[(*axis)[2]]; (*axis)[3] = perm[(*axis)[3]]; - VLOG(4) << " EagerTransposeOpTransformer " << op_name_ - << "'s layout is equal to desire: " << is_nhwc; } paddle::experimental::Tensor TransInTensor( @@ -341,31 +345,16 @@ class EagerTransposeOpTransformer } void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { - auto default_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); - if (out_tensor->layout() != default_layout) { - VLOG(4) << " Set Out_tensor's layout from " - << paddle::framework::DataLayoutToString(out_tensor->layout()) - << " to " << default_layout; - phi::DenseTensorUtils::GetMutableMeta( - static_cast(out_tensor->impl().get())) - ->layout = default_layout; - } + UpdateLayout(out_tensor, DefaultLayout()); } - - protected: - std::string op_name_; - std::string final_layout_; - std::unordered_set heavily_input_{"x", "y", "input"}; }; class EagerArgmaxOpTransformer : public EagerLightlyLayoutSensitiveOpTransformer { public: EagerArgmaxOpTransformer() {} - explicit EagerArgmaxOpTransformer(const std::string& op_name) - : op_name_(op_name) { - VLOG(3) << "Optimze Layout lightly " << op_name; + explicit EagerArgmaxOpTransformer(const std::string& op_name) { + VLOG(4) << "AutoTuneTransformer op: " << op_name; } void SetAttr(paddle::experimental::Scalar* axis, bool is_nhwc) { @@ -377,38 +366,16 @@ class EagerArgmaxOpTransformer } void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { - VLOG(4) << "EagerArgmaxOpTransformer's out layout is" - << paddle::framework::DataLayoutToString(out_tensor->layout()); - auto default_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); - if (default_layout != out_tensor->layout()) { - VLOG(4) << "Change layout from " - << paddle::framework::DataLayoutToString(out_tensor->layout()) - << " to " << default_layout; - phi::DenseTensorUtils::GetMutableMeta( - static_cast(out_tensor->impl().get())) - ->layout = default_layout; - } + UpdateLayout(out_tensor, DesiredLayout()); } - - protected: - std::string op_name_; - std::string final_layout_; - std::unordered_set heavily_input_{"x", "y", "input"}; }; class EagerFlattenOpTransformer : public EagerLightlyLayoutSensitiveOpTransformer { public: EagerFlattenOpTransformer() {} - explicit EagerFlattenOpTransformer(const std::string& op_name) - : op_name_(op_name) { - VLOG(3) << "Optimze Layout lightly " << op_name; - auto default_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); - std::string default_layout_str = - paddle::framework::DataLayoutToString(default_layout); - final_layout_ = default_layout_str; + explicit EagerFlattenOpTransformer(const std::string& op_name) { + VLOG(4) << "AutoTuneTransformer op: " << op_name; } // transpose from NHWC to NCHW @@ -418,38 +385,16 @@ class EagerFlattenOpTransformer } void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { - VLOG(4) << "EagerFlattenOpTransformer's out layout is" - << paddle::framework::DataLayoutToString(out_tensor->layout()); - auto desired_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); - if (desired_layout != out_tensor->layout()) { - VLOG(4) << "Change layout from " - << paddle::framework::DataLayoutToString(out_tensor->layout()) - << " to " << desired_layout; - phi::DenseTensorUtils::GetMutableMeta( - static_cast(out_tensor->impl().get())) - ->layout = desired_layout; - } + UpdateLayout(out_tensor, DefaultLayout()); } - - protected: - std::string op_name_; - std::string final_layout_; - std::unordered_set heavily_input_{"x", "y", "input"}; }; class EagerConcatOpTransformer : public EagerLightlyLayoutSensitiveOpTransformer { public: EagerConcatOpTransformer() {} - explicit EagerConcatOpTransformer(const std::string& op_name) - : op_name_(op_name) { - VLOG(3) << "Optimze Layout lightly " << op_name; - auto default_layout = - paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); - std::string default_layout_str = - paddle::framework::DataLayoutToString(default_layout); - final_layout_ = default_layout_str; + explicit EagerConcatOpTransformer(const std::string& op_name) { + VLOG(4) << "AutoTuneTransformer op : " << op_name; } void SetAttr(paddle::experimental::Scalar* axis, @@ -457,6 +402,7 @@ class EagerConcatOpTransformer std::vector perm_nhwc = {0, 3, 1, 2}; std::vector perm_nchw = {0, 2, 3, 1}; int axes = axis->to(); + axes = axes < 0 ? axes + 4 : axes; auto perm = (paddle::framework::DataLayout::NHWC == layout) ? perm_nhwc : perm_nchw; (*axis) = static_cast(perm[axes]); @@ -469,20 +415,7 @@ class EagerConcatOpTransformer } void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { - auto layout = paddle::framework::StringToDataLayout(final_layout_); - if (layout != out_tensor->layout()) { - VLOG(4) << "Change layout from " - << paddle::framework::DataLayoutToString(out_tensor->layout()) - << " to " << final_layout_; - phi::DenseTensorUtils::GetMutableMeta( - static_cast(out_tensor->impl().get())) - ->layout = layout; - } + UpdateLayout(out_tensor, DesiredLayout()); } - - protected: - std::string op_name_; - std::string final_layout_; - std::unordered_set heavily_input_{"x", "y", "input"}; }; } // namespace egr diff --git a/paddle/fluid/imperative/layout_autotune.cc b/paddle/fluid/imperative/layout_autotune.cc index 24d5e1ee896..1a5a0d9c5d8 100644 --- a/paddle/fluid/imperative/layout_autotune.cc +++ b/paddle/fluid/imperative/layout_autotune.cc @@ -194,8 +194,10 @@ paddle::imperative::NameVarMap AutoTuneLayout( (conv_in_type == framework::proto::VarType::FP16); if (is_tune_fp32) { LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NCHW); + LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NHWC); } else if (is_tune_fp16) { LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC); + LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NCHW); } else { tracer->DisableLayoutAutoTune(); return ins; diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index dfe9e03df5f..6987950c9e0 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -184,6 +184,42 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) { } } + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); + auto default_layout = + paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); + bool change_dim = + (desired_layout != default_layout && + self->tensor.layout() == desired_layout && value.size() == 4); + VLOG(6) << "eager_properties 'Shape' method, layout autotune " + << " desired_layout: " << desired_layout + << " default_layout: " << default_layout + << " tensor layout: " << self->tensor.layout() + << " tensor's shape size is : " << value.size(); + std::vector dims = value; + if (change_dim && + paddle::framework::DataLayoutToString(desired_layout) == "NCHW") { + // NCHW -> NHWC + VLOG(6) << "layout autotune get Shape from NCHW -> NHWC " << value[0] << " " + << value[1] << " " << value[2] << " " << value[3] << " to " + << dims[0] << " " << dims[2] << " " << dims[3] << " " << dims[1]; + value[0] = dims[0]; + value[1] = dims[2]; + value[2] = dims[3]; + value[3] = dims[1]; + } else if (change_dim && + paddle::framework::DataLayoutToString(desired_layout) == "NHWC") { + // NHWC -> NCHW + VLOG(6) << "layout autotune get Shape from NHWC -> NCHW " << value[0] << " " + << value[1] << " " << value[2] << " " << value[3] << " to " + << dims[0] << " " << dims[3] << " " << dims[1] << " " << dims[2] + << " " << dims[1]; + value[0] = dims[0]; + value[1] = dims[3]; + value[2] = dims[1]; + value[3] = dims[2]; + } + return ToPyObject(value); EAGER_CATCH_AND_THROW_RETURN_NULL } diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 5e19c4b557c..420a9839474 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -2044,8 +2044,49 @@ void BindImperative(py::module *m_ptr) { "shape", [](imperative::VarBase &self) { if (self.Var().IsType()) { - return phi::vectorize( + auto value = phi::vectorize( self.Var().Get().dims()); + auto tensor = self.Var().Get(); + auto tmp_value = value; + auto desired_layout = + paddle::imperative::LayoutAutoTune::Instance() + .GetDesiredLayout(); + auto default_layout = + paddle::imperative::LayoutAutoTune::Instance() + .GetDefaultLayout(); + bool change_dim = + (desired_layout != default_layout && + tensor.layout() == desired_layout && value.size() == 4); + VLOG(6) << "'Shape' method, layout autotune," + << " desired_layout: " << desired_layout + << " default_layout: " << default_layout + << " tensor layout: " << tensor.layout() + << " tensor's shape size is : " << value.size(); + + if (change_dim && paddle::framework::DataLayoutToString( + desired_layout) == "NCHW") { + VLOG(6) << "layout autotune get Shape from NHWC -> NCHW " + << value[0] << " " << value[1] << " " << value[2] << " " + << value[3] << " to " << tmp_value[3] << " " + << tmp_value[1] << " " << tmp_value[2] << " " + << tmp_value[1]; + // NCHW -> NHWC + value[1] = tmp_value[2]; + value[2] = tmp_value[3]; + value[3] = tmp_value[1]; + } else if (change_dim && paddle::framework::DataLayoutToString( + desired_layout) == "NHWC") { + VLOG(6) << "layout autotune get Shape from NHWC -> NCHW " + << value[0] << " " << value[1] << " " << value[2] << " " + << value[3] << " to " << tmp_value[0] << " " + << tmp_value[3] << " " << tmp_value[1] << " " + << tmp_value[2]; + // NHWC -> NCHW + value[1] = tmp_value[3]; + value[2] = tmp_value[1]; + value[3] = tmp_value[2]; + } + return value; } else if (self.Var().IsType()) { return phi::vectorize( self.Var().Get().value().dims()); diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc index 04ac701ae0f..048a24ff5e3 100644 --- a/paddle/phi/api/lib/data_transform.cc +++ b/paddle/phi/api/lib/data_transform.cc @@ -205,7 +205,8 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor, if (NeedTransformLayout(tensor->layout(), target_args_def.layout, tensor->place(), - transform_flag)) { + transform_flag) && + tensor->dims().size() != 1) { out = TransDataLayout(out, target_args_def.layout); trans_layout = true; } diff --git a/python/paddle/fluid/tests/unittests/test_layout_autotune.py b/python/paddle/fluid/tests/unittests/test_layout_autotune.py index 21d6d97617d..497bcd89e17 100644 --- a/python/paddle/fluid/tests/unittests/test_layout_autotune.py +++ b/python/paddle/fluid/tests/unittests/test_layout_autotune.py @@ -93,18 +93,9 @@ class LayoutAutoTune(unittest.TestCase): return conv_out, predict def test_enable_autotune(self): - if self.use_autoune(): - conv_out, predict = self.train(data_format="NCHW") - if paddle.fluid.core.use_layout_autotune(): - self.assertEqual(conv_out.shape, [1, 14, 14, 8]) - self.assertEqual(predict.shape, [1, 2]) - else: - self.assertEqual(conv_out.shape, [1, 8, 14, 14]) - self.assertEqual(predict.shape, [1, 2]) - else: - conv_out, predict = self.train(data_format="NCHW") - self.assertEqual(conv_out.shape, [1, 8, 14, 14]) - self.assertEqual(predict.shape, [1, 2]) + conv_out, predict = self.train(data_format="NCHW") + self.assertEqual(conv_out.shape, [1, 8, 14, 14]) + self.assertEqual(predict.shape, [1, 2]) def test_transpose_op_transposer(self): conv = paddle.nn.Conv2D(3, 8, (3, 3)) @@ -124,12 +115,8 @@ class LayoutAutoTune(unittest.TestCase): scaled.backward() scaler.minimize(optimizer, scaled) - if paddle.fluid.core.use_layout_autotune(): - self.assertEqual(conv_out.shape, [1, 14, 12, 8]) - self.assertEqual(out.shape, [1, 12, 8, 14]) - else: - self.assertEqual(conv_out.shape, [1, 8, 14, 12]) - self.assertEqual(out.shape, [1, 12, 8, 14]) + self.assertEqual(conv_out.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [1, 12, 8, 14]) def test_flatten_op_transposer(self): conv = paddle.nn.Conv2D(3, 8, (3, 3)) @@ -143,12 +130,8 @@ class LayoutAutoTune(unittest.TestCase): # because it flatten the C and H dimensions. out = flatten(conv_out) - if paddle.fluid.core.use_layout_autotune(): - self.assertEqual(conv_out.shape, [1, 14, 12, 8]) - self.assertEqual(out.shape, [1, 112, 12]) - else: - self.assertEqual(conv_out.shape, [1, 8, 14, 12]) - self.assertEqual(out.shape, [1, 112, 12]) + self.assertEqual(conv_out.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [1, 112, 12]) def test_argmax_op_transposer_keep_dims(self): conv = paddle.nn.Conv2D(3, 8, (3, 3)) @@ -157,41 +140,8 @@ class LayoutAutoTune(unittest.TestCase): conv_out = conv(data) # conv_out.shape = [1, 14, 12, 8] with NHWC out = paddle.argmax(conv_out, axis=1, keepdim=True) - if paddle.fluid.core.use_layout_autotune(): - self.assertEqual(conv_out.shape, [1, 14, 12, 8]) - self.assertEqual(out.shape, [1, 14, 12, 1]) - else: - self.assertEqual(conv_out.shape, [1, 8, 14, 12]) - self.assertEqual(out.shape, [1, 1, 14, 12]) - - def test_argmax_op_transposer_ff(self): - conv = paddle.nn.Conv2D(3, 8, (3, 3)) - data = paddle.rand([1, 3, 16, 14]) - with paddle.amp.auto_cast(level="O2"): - conv_out = conv(data) - # conv_out.shape = [1, 14, 12, 8] with NHWC - out = paddle.argmax(conv_out) - if paddle.fluid.core.use_layout_autotune(): - self.assertEqual(conv_out.shape, [1, 14, 12, 8]) - self.assertEqual(out.shape, [1]) - else: - self.assertEqual(conv_out.shape, [1, 8, 14, 12]) - self.assertEqual(out.shape, [1]) - - def test_argmax_op_transposer_t(self): - conv = paddle.nn.Conv2D(3, 8, (3, 3)) - data = paddle.rand([1, 3, 16, 14]) - with paddle.amp.auto_cast(level="O2"): - conv_out = conv(data) - # conv_out.shape = [1, 14, 12, 8] with NHWC - out = paddle.argmax(conv_out) - - if paddle.fluid.core.use_layout_autotune(): - self.assertEqual(conv_out.shape, [1, 14, 12, 8]) - self.assertEqual(out.shape, [1]) - else: - self.assertEqual(conv_out.shape, [1, 8, 14, 12]) - self.assertEqual(out.shape, [1]) + self.assertEqual(conv_out.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [1, 1, 14, 12]) def test_concat_op_transposer(self): in1 = paddle.rand([1, 8, 14, 12]) @@ -202,12 +152,8 @@ class LayoutAutoTune(unittest.TestCase): # conv_out.shape = [1, 14, 12, 8] with NHWC out = paddle.concat(x=[conv_out, in1], axis=0) - if paddle.fluid.core.use_layout_autotune(): - self.assertEqual(conv_out.shape, [1, 14, 12, 8]) - self.assertEqual(out.shape, [2, 8, 14, 12]) - else: - self.assertEqual(conv_out.shape, [1, 8, 14, 12]) - self.assertEqual(out.shape, [2, 8, 14, 12]) + self.assertEqual(conv_out.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [2, 8, 14, 12]) def test_concat_op_no_transposer(self): conv = paddle.nn.Conv2D(3, 8, (3, 3)) @@ -219,12 +165,8 @@ class LayoutAutoTune(unittest.TestCase): # conv_out.shape = [1, 14, 12, 8] with NHWC out = paddle.concat(x=[conv_out1, conv_out2], axis=0) - if paddle.fluid.core.use_layout_autotune(): - self.assertEqual(conv_out1.shape, [1, 14, 12, 8]) - self.assertEqual(out.shape, [2, 14, 12, 8]) - else: - self.assertEqual(conv_out1.shape, [1, 8, 14, 12]) - self.assertEqual(out.shape, [2, 8, 14, 12]) + self.assertEqual(conv_out1.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [2, 8, 14, 12]) class TestAutoTuneAPI(unittest.TestCase): diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index e70fa037ae2..eacc22975f1 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -152,8 +152,8 @@ def _conv_nd(x, channel_dim = channel_dim + len( x.shape) if channel_dim < 0 else channel_dim tmp_bias = _C_ops.reshape( - bias, - bias.shape + [1 for i in range(len(x.shape) - channel_dim - 1)]) + bias, [1 for i in range(channel_dim)] + bias.shape + + [1 for i in range(len(x.shape) - channel_dim - 1)]) return _C_ops.add(pre_bias, tmp_bias) else: return pre_bias -- GitLab