未验证 提交 61273c0e 编写于 作者: N niuliling123 提交者: GitHub

[Cherry-pick]Update layout autotune for module with no modified (#46541) (#46515) (#46880)

Cherry-pick 46541
保证Reset50 TSM deeplabv3模型零修改下实现Layout自动调优
上级 08d233f9
...@@ -32,70 +32,50 @@ inline bool NeedTransLayout( ...@@ -32,70 +32,50 @@ inline bool NeedTransLayout(
} }
return false; return false;
} }
inline std::shared_ptr<EagerLayoutTransformer> BaseTransformer(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
bool unstart =
(paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED);
auto first_layout = tensors_vector[0][0].layout();
VLOG(3) << "Layout autotune was is start ? " << (!unstart) << op_name
<< "'s layout is " << first_layout;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
}
// For agnostic op like add, relu, exp
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) { kSlotSmallVectorSize>& tensors_vector) {
auto desired_layout = // For agnostic op like add, relu, exp
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
auto first_layout = tensors_vector[0][0].layout(); auto first_layout = tensors_vector[0][0].layout();
if (NeedTransLayout(tensors_vector, first_layout)) { auto desired_layout = DesiredLayout();
bool is_started =
!(desired_layout == paddle::experimental::DataLayout::UNDEFINED);
if (is_started && NeedTransLayout(tensors_vector, first_layout)) {
bool need_trans_back = false; bool need_trans_back = false;
for (size_t i = 0; i < tensors_vector.size(); i++) { for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) { for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (4 != tensors_vector[i][idx].shape().size()) { if (4 != tensors_vector[i][idx].shape().size()) {
need_trans_back = true; need_trans_back = true;
VLOG(3) << "Agnostic op " << op_name << " shape is "
<< tensors_vector[i][idx].shape().size() << " and layout is "
<< tensors_vector[i][idx].layout();
} }
} }
} }
auto final_layout = need_trans_back ? default_layout : desired_layout; auto final_layout = need_trans_back ? DefaultLayout() : desired_layout;
VLOG(4) << op_name << "'s has different layout, need trans to "
<< final_layout;
return std::make_shared<EagerLayoutTransformer>( return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, final_layout); op_name, tensors_vector, final_layout);
} }
return BaseTransformer(op_name, tensors_vector); return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
} }
// For lightly op like reduce
template <typename T> template <typename T>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
T* attr) { T* attr) {
VLOG(3) << "Lightly op " << op_name << "'s shape is " // For lightly op like reduce
<< tensors_vector[0][0].shape().size() << " and layout is " if (!(DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED)) {
<< tensors_vector[0][0].layout(); VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return std::make_shared<EagerLayoutTransformer>(
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; op_name, tensors_vector, tensors_vector[0][0].layout());
transposer = }
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name); return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} }
// For lightly op like argmax
template <typename T1, typename T2> template <typename T1, typename T2>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
...@@ -103,28 +83,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -103,28 +83,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
T1* axis, T1* axis,
T2* keep_dim) { T2* keep_dim) {
VLOG(3) << "Lightly op " << op_name << "'s shape is " // For lightly op like argmax
<< tensors_vector[0][0].shape().size() << " and layout is "
<< tensors_vector[0][0].layout();
return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis); return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis);
} }
// heavily string data_format, data_layout
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
std::string* attr) { std::string* attr) {
auto first_layout = tensors_vector[0][0].layout(); // Heavily op with (string) data_format, data_layout
auto transposer = std::make_shared<EagerLayoutTransformer>( auto transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout); op_name, tensors_vector, tensors_vector[0][0].layout());
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
paddle::experimental::DataLayout::UNDEFINED) {
// Layout autotune only supports model with convolutional layers // Layout autotune only supports model with convolutional layers
VLOG(3) << "Optimze Layout was not started " << op_name;
if (op_name != "conv2d") { if (op_name != "conv2d") {
VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return transposer; return transposer;
} else { } else {
auto data_type = tensors_vector[0][0].dtype(); auto data_type = tensors_vector[0][0].dtype();
...@@ -134,7 +109,8 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -134,7 +109,8 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
bool is_tune_fp16 = bool is_tune_fp16 =
(data_type == paddle::experimental::DataType::FLOAT16) && (data_type == paddle::experimental::DataType::FLOAT16) &&
(*attr == "NCHW"); (*attr == "NCHW");
VLOG(3) << "Conv2d_dy's dtype " << data_type << " format" << (*attr); VLOG(4) << "LayoutAutoTune assert with dtype and layout, Current op : "
<< op_name;
if (is_tune_fp32) { if (is_tune_fp32) {
paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout( paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
paddle::experimental::DataLayout::NCHW); paddle::experimental::DataLayout::NCHW);
...@@ -147,58 +123,45 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -147,58 +123,45 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout( paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
paddle::experimental::DataLayout::NCHW); paddle::experimental::DataLayout::NCHW);
} else { } else {
VLOG(4) << "DisableLayoutAutoTune accoding to Conv op"
<< " dtype : " << data_type << " format : " << (*attr);
egr::Controller::Instance().DisableLayoutAutoTune(); egr::Controller::Instance().DisableLayoutAutoTune();
return transposer; return transposer;
} }
VLOG(3) VLOG(4) << "LayoutAutoTune from " << *attr << " to " << DesiredLayout();
<< "Tune the layout from " << *attr << " to "
<< paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
} }
} }
if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive( if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive(
op_name)) { op_name)) {
VLOG(3) return std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
<< op_name attr);
<< "'s LayoutTransformer is EagerHeavilyLayoutSensitiveOpTransformer";
auto heavily_transposer =
std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
attr);
return heavily_transposer;
} }
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
VLOG(3) << op_name << "'s LayoutTransformer is unimplemented. Use default.";
return transposer;
} }
// lightly transpose
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
std::vector<int>* attr) { std::vector<int>* attr) {
auto first_layout = tensors_vector[0][0].layout(); // lightly transpose
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
paddle::experimental::DataLayout::UNDEFINED) { return std::make_shared<EagerLayoutTransformer>(
VLOG(3) << "Optimze Layout was not started" << op_name; op_name, tensors_vector, tensors_vector[0][0].layout());
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
} }
if (op_name == "transpose2" && if (op_name == "transpose2" &&
(tensors_vector[0][0].layout() == (tensors_vector[0][0].layout() == DesiredLayout())) {
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout())) {
auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name); auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name);
trans->SetAttr(attr, trans->SetAttr(attr,
tensors_vector[0][0].layout() == tensors_vector[0][0].layout() ==
paddle::experimental::DataLayout::NHWC); paddle::experimental::DataLayout::NHWC);
return trans; return trans;
} }
transposer = return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} }
// lightly int argmax // lightly int argmax
...@@ -210,19 +173,14 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>( ...@@ -210,19 +173,14 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis, paddle::experimental::Scalar* axis,
bool* keep_dim) { bool* keep_dim) {
auto first_layout = tensors_vector[0][0].layout(); if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == return std::make_shared<EagerLayoutTransformer>(
paddle::experimental::DataLayout::UNDEFINED) { op_name, tensors_vector, tensors_vector[0][0].layout());
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
} }
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (op_name == "argmax" && if (op_name == "argmax" &&
(tensors_vector[0][0].layout() == desired_layout) && (*keep_dim)) { (tensors_vector[0][0].layout() == DesiredLayout()) && (*keep_dim)) {
std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr; std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr;
argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name); argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name);
argmax_transform->SetAttr(axis, argmax_transform->SetAttr(axis,
...@@ -230,12 +188,9 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>( ...@@ -230,12 +188,9 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
paddle::experimental::DataLayout::NHWC); paddle::experimental::DataLayout::NHWC);
return argmax_transform; return argmax_transform;
} }
transposer = return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} }
// lightly for flatten
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
const std::string& op_name, const std::string& op_name,
...@@ -243,32 +198,22 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>( ...@@ -243,32 +198,22 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
int* start_axis, int* start_axis,
int* stop_axis) { int* stop_axis) {
auto first_layout = tensors_vector[0][0].layout(); if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; VLOG(4) << "Optimze Layout was not started" << op_name;
auto desired_layout = return std::make_shared<EagerLayoutTransformer>(
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); op_name, tensors_vector, tensors_vector[0][0].layout());
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
} }
bool no_tranpose = tensors_vector[0][0].layout() == desired_layout;
bool no_tranpose = tensors_vector[0][0].layout() == DesiredLayout();
bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3); bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3);
if (op_name == "flatten" || op_name == "flatten_contiguous_range") { if (op_name == "flatten" || op_name == "flatten_contiguous_range") {
if (no_tranpose && is_valid) { if (no_tranpose && is_valid) {
std::shared_ptr<EagerFlattenOpTransformer> flatten_transform = nullptr; return std::make_shared<EagerFlattenOpTransformer>(op_name);
flatten_transform = std::make_shared<EagerFlattenOpTransformer>(op_name);
return flatten_transform;
} }
} }
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} }
// lightly int Concat
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar>( EagerLayoutAutotune<paddle::experimental::Scalar>(
...@@ -276,27 +221,26 @@ EagerLayoutAutotune<paddle::experimental::Scalar>( ...@@ -276,27 +221,26 @@ EagerLayoutAutotune<paddle::experimental::Scalar>(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis) { paddle::experimental::Scalar* axis) {
auto desired_layout = if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); VLOG(4) << "Optimze Layout was not started" << op_name;
auto first_layout = tensors_vector[0][0].layout(); return std::make_shared<EagerLayoutTransformer>(
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; op_name, tensors_vector, tensors_vector[0][0].layout());
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
} }
auto desired_layout = DesiredLayout();
if (NeedTransLayout(tensors_vector, desired_layout)) { if (NeedTransLayout(tensors_vector, desired_layout)) {
VLOG(3) << op_name << " need transpose to default layout"; VLOG(4) << op_name << "'s has different layout";
transposer = return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name); }
return transposer; if (op_name == "Concat") {
} else { if (desired_layout == tensors_vector[0][0].layout() &&
auto trans = std::make_shared<EagerConcatOpTransformer>(op_name); tensors_vector[0][0].shape().size() == 4) {
trans->SetAttr(axis, desired_layout); auto trans = std::make_shared<EagerConcatOpTransformer>(op_name);
return trans; trans->SetAttr(axis, desired_layout);
return trans;
}
} }
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
} }
} // namespace egr } // namespace egr
...@@ -23,7 +23,7 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp( ...@@ -23,7 +23,7 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp(
const paddle::experimental::DataLayout layout, const paddle::experimental::DataLayout layout,
const paddle::experimental::Tensor& in) { const paddle::experimental::Tensor& in) {
VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout
<< ", tensor's shape is " << in.shape().size(); << ", tensor's dim size is " << in.shape().size();
if (in.shape().size() != 4) { if (in.shape().size() != 4) {
return in; return in;
} }
...@@ -36,12 +36,72 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp( ...@@ -36,12 +36,72 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp(
axis = {0, 1, 2, 3}; axis = {0, 1, 2, 3};
} }
auto out_tensor = transpose_ad_func(in, axis); auto out_tensor = transpose_ad_func(in, axis);
VLOG(4) << "AutoTune Transpose from " VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout;
<< paddle::framework::DataLayoutToString(in.layout()) << " to "
<< paddle::framework::DataLayoutToString(layout);
return out_tensor; return out_tensor;
} }
inline paddle::experimental::DataLayout DesiredLayout() {
return paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
}
inline paddle::experimental::DataLayout DefaultLayout() {
return paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
}
inline void UpdateLayout(paddle::experimental::Tensor* out_tensor,
const paddle::experimental::DataLayout layout) {
if (out_tensor->layout() != layout) {
VLOG(4) << "Update out_tensor's layout from " << out_tensor->layout()
<< " to " << layout;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = layout;
}
}
inline void DealWithShapeOp(paddle::experimental::Tensor* out_tensor,
const paddle::experimental::DataLayout layout,
int dim_size) {
auto des_layout = DesiredLayout();
auto def_layout = DefaultLayout();
int32_t* value =
static_cast<phi::DenseTensor*>(out_tensor->impl().get())->data<int32_t>();
bool change_dim =
(des_layout != def_layout && layout == des_layout && dim_size == 4);
VLOG(6) << "'Shape OP', layout autotune: True"
<< " desired_layout: " << des_layout
<< " default_layout: " << def_layout
<< " tensor layout: " << out_tensor->layout()
<< " tensor's shape size is : " << dim_size;
// It's means input tensor has been autotune and tensor's layout is
// desired_layout
std::vector<int32_t> dims;
dims.resize(dim_size);
for (int i = 0; i < dim_size; i++) {
dims[i] = value[i];
}
auto des_str = paddle::framework::DataLayoutToString(des_layout);
if (change_dim && des_str == "NCHW") {
// NCHW -> NHWC
VLOG(6) << "layout autotune get Shape from NCHW -> NHWC " << value[0] << " "
<< value[1] << " " << value[2] << " " << value[3] << " to "
<< dims[0] << " " << dims[2] << " " << dims[3] << " " << dims[1];
value[0] = dims[0];
value[1] = dims[2];
value[2] = dims[3];
value[3] = dims[1];
} else if (change_dim && des_str == "NHWC") {
// NHWC -> NCHW
VLOG(6) << "layout autotune get Shape from NHWC -> NCHW " << value[0] << " "
<< value[1] << " " << value[2] << " " << value[3] << " to "
<< dims[0] << " " << dims[3] << " " << dims[1] << " " << dims[2];
value[0] = dims[0];
value[1] = dims[3];
value[2] = dims[1];
value[3] = dims[2];
}
}
// agnostic op // agnostic op
class EagerLayoutTransformer { class EagerLayoutTransformer {
using Layout = paddle::experimental::DataLayout; using Layout = paddle::experimental::DataLayout;
...@@ -58,27 +118,27 @@ class EagerLayoutTransformer { ...@@ -58,27 +118,27 @@ class EagerLayoutTransformer {
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
const Layout final_layout = Layout::UNDEFINED) const Layout final_layout = Layout::UNDEFINED)
: op_name_(op_name), final_layout_(final_layout) { : op_name_(op_name), final_layout_(final_layout), dim_size_(1) {
VLOG(4) << "Agnostic op : " << op_name_ << " final_layout_ is " VLOG(4) << "Agnostic op : " << op_name_ << "'s layout is " << final_layout_;
<< final_layout_;
} }
virtual ~EagerLayoutTransformer() {} virtual ~EagerLayoutTransformer() {}
virtual paddle::experimental::Tensor TransInTensor( virtual paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) { const std::string& in_name, const paddle::experimental::Tensor& in) {
if (final_layout_ == Layout::UNDEFINED || final_layout_ == in.layout()) { // update in shape size
VLOG(4) << "EagerLayoutTransformer with no trans"; dim_size_ = in.shape().size();
return in; bool need_trans =
} else { // from NCHW to NHWC !(final_layout_ == Layout::UNDEFINED || final_layout_ == in.layout());
VLOG(4) << "EagerLayoutTransformer with trans from " << in.layout() // This is for Agnostic op when layout is differnet
<< " to " << final_layout_; if (need_trans) {
auto out_tensor = EagerTraceTransposeOp(final_layout_, in); auto out_tensor = EagerTraceTransposeOp(final_layout_, in);
phi::DenseTensorUtils::GetMutableMeta( phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor.impl().get())) static_cast<phi::DenseTensor*>(out_tensor.impl().get()))
->layout = final_layout_; ->layout = final_layout_;
return out_tensor; return out_tensor;
} }
return in;
} }
virtual paddle::optional<paddle::experimental::Tensor> TransInTensor( virtual paddle::optional<paddle::experimental::Tensor> TransInTensor(
...@@ -90,7 +150,6 @@ class EagerLayoutTransformer { ...@@ -90,7 +150,6 @@ class EagerLayoutTransformer {
virtual std::vector<paddle::experimental::Tensor> TransInTensors( virtual std::vector<paddle::experimental::Tensor> TransInTensors(
const std::string& in_name, const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) { const std::vector<paddle::experimental::Tensor>& in) {
VLOG(4) << " TransInTensor";
return in; return in;
} }
...@@ -98,72 +157,59 @@ class EagerLayoutTransformer { ...@@ -98,72 +157,59 @@ class EagerLayoutTransformer {
TransInTensors( TransInTensors(
const std::string& in_name, const std::string& in_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) { const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) {
VLOG(4) << " TransInTensor"; return (in ? TransInTensors(in_name, *in) : in);
if (in) {
return TransInTensors(in_name, *in);
}
return in;
}
virtual void SetOutTensorLayout(
paddle::optional<paddle::experimental::Tensor>* out_tensor) {
VLOG(4) << "optional out_tensor";
} }
virtual void SetOutTensorLayout( virtual void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor>* out_tensor) { std::vector<paddle::experimental::Tensor>* out_tensor) {
bool use_default = (final_layout_ == Layout::UNDEFINED); bool update_layout = !(final_layout_ == Layout::UNDEFINED);
if (!use_default) { if (update_layout) {
for (size_t i = 0; i < out_tensor->size(); i++) { for (size_t i = 0; i < out_tensor->size(); i++) {
phi::DenseTensorUtils::GetMutableMeta( phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get())) static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get()))
->layout = ->layout = DesiredLayout();
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
} }
} }
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default; }
virtual void SetOutTensorLayout(
paddle::optional<paddle::experimental::Tensor>* out_tensor) {
VLOG(4) << "AutoTune out tensor is optional";
} }
virtual void SetOutTensorLayout( virtual void SetOutTensorLayout(
paddle::optional<std::vector<paddle::experimental::Tensor>>* out_tensor) { paddle::optional<std::vector<paddle::experimental::Tensor>>* out_tensor) {
VLOG(4) << "optional out_tensor"; VLOG(4) << "AutoTune out tensor is optional";
} }
virtual void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { virtual void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
bool use_default = final_layout_ == Layout::UNDEFINED; if (op_name_ == "shape") {
if (!use_default) { return DealWithShapeOp(out_tensor, final_layout_, dim_size_);
phi::DenseTensorUtils::GetMutableMeta( }
static_cast<phi::DenseTensor*>(out_tensor->impl().get())) bool need_update = !(final_layout_ == Layout::UNDEFINED);
->layout = final_layout_; if (need_update) {
UpdateLayout(out_tensor, final_layout_);
} }
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default;
} }
protected: protected:
std::string op_name_; std::string op_name_;
const Layout final_layout_; const Layout final_layout_;
int dim_size_;
}; };
class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
public: public:
explicit EagerHeavilyLayoutSensitiveOpTransformer(const std::string& op_name, explicit EagerHeavilyLayoutSensitiveOpTransformer(const std::string& op_name,
std::string* layout) std::string* layout)
: op_name_(op_name), : op_name_(op_name), desired_layout_(DesiredLayout()) {
desired_layout_( VLOG(4) << "Heavily op: " << op_name;
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout()) { *layout = paddle::framework::DataLayoutToString(DesiredLayout());
VLOG(3) << "Optimze Layout heavily op: " << op_name;
final_layout_ = paddle::framework::DataLayoutToString(desired_layout_);
if ((*layout) != final_layout_) {
*layout = final_layout_;
}
} }
paddle::experimental::Tensor TransInTensor( paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) { const std::string& in_name, const paddle::experimental::Tensor& in) {
if (heavily_input_.count(in_name) != 0 && in.layout() != desired_layout_) { if (heavily_input_.count(in_name) != 0 && in.layout() != desired_layout_) {
VLOG(4) << op_name_ << "'s " << in_name << " need transpose from "
<< paddle::framework::DataLayoutToString(in.layout()) << " to "
<< final_layout_;
auto out_tensor = EagerTraceTransposeOp(desired_layout_, in); auto out_tensor = EagerTraceTransposeOp(desired_layout_, in);
return out_tensor; return out_tensor;
} }
...@@ -171,14 +217,7 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -171,14 +217,7 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
if (out_tensor->layout() != desired_layout_) { UpdateLayout(out_tensor, desired_layout_);
VLOG(4) << " Set Out_tensor's layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = desired_layout_;
}
} }
void SetOutTensorLayout( void SetOutTensorLayout(
...@@ -192,10 +231,8 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -192,10 +231,8 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
std::vector<paddle::experimental::Tensor>* out_tensor) { std::vector<paddle::experimental::Tensor>* out_tensor) {
for (size_t i = 0; i < out_tensor->size(); i++) { for (size_t i = 0; i < out_tensor->size(); i++) {
if ((*out_tensor)[i].layout() != desired_layout_) { if ((*out_tensor)[i].layout() != desired_layout_) {
VLOG(4) << " Set Out_tensor's layout from " VLOG(4) << "Update out_tensor's layout from "
<< paddle::framework::DataLayoutToString( << (*out_tensor)[i].layout() << " to " << desired_layout_;
(*out_tensor)[i].layout())
<< " to " << final_layout_;
phi::DenseTensorUtils::GetMutableMeta( phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get())) static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get()))
->layout = desired_layout_; ->layout = desired_layout_;
...@@ -205,7 +242,6 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -205,7 +242,6 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
protected: protected:
std::string op_name_; std::string op_name_;
std::string final_layout_;
const paddle::experimental::DataLayout desired_layout_; const paddle::experimental::DataLayout desired_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"}; std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
}; };
...@@ -213,11 +249,10 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -213,11 +249,10 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
public: public:
EagerLightlyLayoutSensitiveOpTransformer() {} EagerLightlyLayoutSensitiveOpTransformer() {}
explicit EagerLightlyLayoutSensitiveOpTransformer(const std::string& op_name) explicit EagerLightlyLayoutSensitiveOpTransformer(
: op_name_(op_name) { const std::string& op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name; VLOG(4) << "Lightly op : " << op_name;
auto desired_layout = auto desired_layout = DesiredLayout();
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
final_layout_ = paddle::framework::DataLayoutToString(desired_layout); final_layout_ = paddle::framework::DataLayoutToString(desired_layout);
} }
...@@ -226,11 +261,8 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -226,11 +261,8 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
const std::string& in_name, const paddle::experimental::Tensor& in) { const std::string& in_name, const paddle::experimental::Tensor& in) {
std::string input_layout = std::string input_layout =
paddle::framework::DataLayoutToString(in.layout()); paddle::framework::DataLayoutToString(in.layout());
auto default_layout = auto default_layout = DefaultLayout();
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (final_layout_ == input_layout && in.shape().size() == 4) { if (final_layout_ == input_layout && in.shape().size() == 4) {
VLOG(4) << op_name_ << "'s " << in_name << " need transpose from "
<< input_layout << " to default_layout";
auto out_tensor = EagerTraceTransposeOp( auto out_tensor = EagerTraceTransposeOp(
paddle::experimental::DataLayout::UNDEFINED, in); paddle::experimental::DataLayout::UNDEFINED, in);
phi::DenseTensorUtils::GetMutableMeta( phi::DenseTensorUtils::GetMutableMeta(
...@@ -238,7 +270,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -238,7 +270,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
->layout = default_layout; ->layout = default_layout;
return out_tensor; return out_tensor;
} }
VLOG(4) << in_name << "'s layout is " << input_layout;
return in; return in;
} }
...@@ -246,15 +277,11 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -246,15 +277,11 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
const std::string& in_name, const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) { const std::vector<paddle::experimental::Tensor>& in) {
std::vector<paddle::experimental::Tensor> result; std::vector<paddle::experimental::Tensor> result;
auto desired_layout = auto desired_layout = DesiredLayout();
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); auto default_layout = DefaultLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
for (size_t i = 0; i < in.size(); i++) { for (size_t i = 0; i < in.size(); i++) {
auto in_tensor = in[i]; auto in_tensor = in[i];
if (in_tensor.layout() == desired_layout) { if (in_tensor.layout() == desired_layout) {
VLOG(4) << op_name_ << "'s " << in_name << " need transpose from "
<< final_layout_ << " to default_layout";
auto out_tensor = EagerTraceTransposeOp( auto out_tensor = EagerTraceTransposeOp(
paddle::experimental::DataLayout::UNDEFINED, in_tensor); paddle::experimental::DataLayout::UNDEFINED, in_tensor);
phi::DenseTensorUtils::GetMutableMeta( phi::DenseTensorUtils::GetMutableMeta(
...@@ -269,33 +296,20 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -269,33 +296,20 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
auto out_layout = out_tensor->layout(); UpdateLayout(out_tensor, DefaultLayout());
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (out_layout != default_layout) {
VLOG(4) << op_name_ << "'s out need transpose to default_layout";
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = default_layout;
}
} }
void SetOutTensorLayout( void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor*>* out_tensor) { std::vector<paddle::experimental::Tensor*>* out_tensor) {
for (size_t i = 0; i < out_tensor->size(); i++) { for (size_t i = 0; i < out_tensor->size(); i++) {
VLOG(4) << "out layout is"
<< paddle::framework::DataLayoutToString(
(*out_tensor)[i]->layout());
SetOutTensorLayout((*out_tensor)[i]); SetOutTensorLayout((*out_tensor)[i]);
} }
} }
void SetOutTensorLayout( void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor>* out_tensor) { std::vector<paddle::experimental::Tensor>* out_tensor) {
auto default_layout = auto default_layout = DefaultLayout();
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
for (size_t i = 0; i < out_tensor->size(); i++) { for (size_t i = 0; i < out_tensor->size(); i++) {
VLOG(4) << " out_tensor layout trans to default ";
phi::DenseTensorUtils::GetMutableMeta( phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get())) static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get()))
->layout = default_layout; ->layout = default_layout;
...@@ -303,7 +317,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -303,7 +317,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
} }
protected: protected:
std::string op_name_;
std::string final_layout_; std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"}; std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
}; };
...@@ -312,18 +325,11 @@ class EagerTransposeOpTransformer ...@@ -312,18 +325,11 @@ class EagerTransposeOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer { : public EagerLightlyLayoutSensitiveOpTransformer {
public: public:
EagerTransposeOpTransformer() {} EagerTransposeOpTransformer() {}
explicit EagerTransposeOpTransformer(const std::string& op_name) explicit EagerTransposeOpTransformer(const std::string& op_name) {
: op_name_(op_name) { VLOG(4) << "AutoTuneTransformer op: " << op_name;
VLOG(3) << "Optimze Layout TransposeOpTransformer " << op_name;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
std::string desired_layout_str =
paddle::framework::DataLayoutToString(desired_layout);
final_layout_ = desired_layout_str;
} }
void SetAttr(std::vector<int>* axis, bool is_nhwc) { void SetAttr(std::vector<int>* axis, bool is_nhwc) {
// input's layout is nhwc and input's layout === desired_layout
std::vector<int> perm_nchw = {0, 2, 3, 1}; std::vector<int> perm_nchw = {0, 2, 3, 1};
std::vector<int> perm_nhwc = {0, 3, 1, 2}; std::vector<int> perm_nhwc = {0, 3, 1, 2};
auto perm = is_nhwc ? perm_nhwc : perm_nchw; auto perm = is_nhwc ? perm_nhwc : perm_nchw;
...@@ -331,8 +337,6 @@ class EagerTransposeOpTransformer ...@@ -331,8 +337,6 @@ class EagerTransposeOpTransformer
(*axis)[1] = perm[(*axis)[1]]; (*axis)[1] = perm[(*axis)[1]];
(*axis)[2] = perm[(*axis)[2]]; (*axis)[2] = perm[(*axis)[2]];
(*axis)[3] = perm[(*axis)[3]]; (*axis)[3] = perm[(*axis)[3]];
VLOG(4) << " EagerTransposeOpTransformer " << op_name_
<< "'s layout is equal to desire: " << is_nhwc;
} }
paddle::experimental::Tensor TransInTensor( paddle::experimental::Tensor TransInTensor(
...@@ -341,31 +345,16 @@ class EagerTransposeOpTransformer ...@@ -341,31 +345,16 @@ class EagerTransposeOpTransformer
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
auto default_layout = UpdateLayout(out_tensor, DefaultLayout());
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (out_tensor->layout() != default_layout) {
VLOG(4) << " Set Out_tensor's layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << default_layout;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = default_layout;
}
} }
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
}; };
class EagerArgmaxOpTransformer class EagerArgmaxOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer { : public EagerLightlyLayoutSensitiveOpTransformer {
public: public:
EagerArgmaxOpTransformer() {} EagerArgmaxOpTransformer() {}
explicit EagerArgmaxOpTransformer(const std::string& op_name) explicit EagerArgmaxOpTransformer(const std::string& op_name) {
: op_name_(op_name) { VLOG(4) << "AutoTuneTransformer op: " << op_name;
VLOG(3) << "Optimze Layout lightly " << op_name;
} }
void SetAttr(paddle::experimental::Scalar* axis, bool is_nhwc) { void SetAttr(paddle::experimental::Scalar* axis, bool is_nhwc) {
...@@ -377,38 +366,16 @@ class EagerArgmaxOpTransformer ...@@ -377,38 +366,16 @@ class EagerArgmaxOpTransformer
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
VLOG(4) << "EagerArgmaxOpTransformer's out layout is" UpdateLayout(out_tensor, DesiredLayout());
<< paddle::framework::DataLayoutToString(out_tensor->layout());
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (default_layout != out_tensor->layout()) {
VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << default_layout;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = default_layout;
}
} }
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
}; };
class EagerFlattenOpTransformer class EagerFlattenOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer { : public EagerLightlyLayoutSensitiveOpTransformer {
public: public:
EagerFlattenOpTransformer() {} EagerFlattenOpTransformer() {}
explicit EagerFlattenOpTransformer(const std::string& op_name) explicit EagerFlattenOpTransformer(const std::string& op_name) {
: op_name_(op_name) { VLOG(4) << "AutoTuneTransformer op: " << op_name;
VLOG(3) << "Optimze Layout lightly " << op_name;
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
std::string default_layout_str =
paddle::framework::DataLayoutToString(default_layout);
final_layout_ = default_layout_str;
} }
// transpose from NHWC to NCHW // transpose from NHWC to NCHW
...@@ -418,38 +385,16 @@ class EagerFlattenOpTransformer ...@@ -418,38 +385,16 @@ class EagerFlattenOpTransformer
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
VLOG(4) << "EagerFlattenOpTransformer's out layout is" UpdateLayout(out_tensor, DefaultLayout());
<< paddle::framework::DataLayoutToString(out_tensor->layout());
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (desired_layout != out_tensor->layout()) {
VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << desired_layout;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = desired_layout;
}
} }
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
}; };
class EagerConcatOpTransformer class EagerConcatOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer { : public EagerLightlyLayoutSensitiveOpTransformer {
public: public:
EagerConcatOpTransformer() {} EagerConcatOpTransformer() {}
explicit EagerConcatOpTransformer(const std::string& op_name) explicit EagerConcatOpTransformer(const std::string& op_name) {
: op_name_(op_name) { VLOG(4) << "AutoTuneTransformer op : " << op_name;
VLOG(3) << "Optimze Layout lightly " << op_name;
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
std::string default_layout_str =
paddle::framework::DataLayoutToString(default_layout);
final_layout_ = default_layout_str;
} }
void SetAttr(paddle::experimental::Scalar* axis, void SetAttr(paddle::experimental::Scalar* axis,
...@@ -457,6 +402,7 @@ class EagerConcatOpTransformer ...@@ -457,6 +402,7 @@ class EagerConcatOpTransformer
std::vector<int> perm_nhwc = {0, 3, 1, 2}; std::vector<int> perm_nhwc = {0, 3, 1, 2};
std::vector<int> perm_nchw = {0, 2, 3, 1}; std::vector<int> perm_nchw = {0, 2, 3, 1};
int axes = axis->to<int>(); int axes = axis->to<int>();
axes = axes < 0 ? axes + 4 : axes;
auto perm = auto perm =
(paddle::framework::DataLayout::NHWC == layout) ? perm_nhwc : perm_nchw; (paddle::framework::DataLayout::NHWC == layout) ? perm_nhwc : perm_nchw;
(*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]); (*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]);
...@@ -469,20 +415,7 @@ class EagerConcatOpTransformer ...@@ -469,20 +415,7 @@ class EagerConcatOpTransformer
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
auto layout = paddle::framework::StringToDataLayout(final_layout_); UpdateLayout(out_tensor, DesiredLayout());
if (layout != out_tensor->layout()) {
VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = layout;
}
} }
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
}; };
} // namespace egr } // namespace egr
...@@ -194,8 +194,10 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout( ...@@ -194,8 +194,10 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
(conv_in_type == framework::proto::VarType::FP16); (conv_in_type == framework::proto::VarType::FP16);
if (is_tune_fp32) { if (is_tune_fp32) {
LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NCHW); LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NCHW);
LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NHWC);
} else if (is_tune_fp16) { } else if (is_tune_fp16) {
LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC); LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC);
LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NCHW);
} else { } else {
tracer->DisableLayoutAutoTune(); tracer->DisableLayoutAutoTune();
return ins; return ins;
......
...@@ -25,7 +25,7 @@ namespace imperative { ...@@ -25,7 +25,7 @@ namespace imperative {
template <typename VarType> template <typename VarType>
void SetOutDataLayout(std::shared_ptr<VarType> var, void SetOutDataLayout(std::shared_ptr<VarType> var,
const paddle::experimental::DataLayout layout) { const paddle::experimental::DataLayout layout) {
if (var != nullptr) { if (var != nullptr && var->Var().IsInitialized()) {
paddle::imperative::SetDataLayout(var, layout); paddle::imperative::SetDataLayout(var, layout);
// set out_tensor's layout // set out_tensor's layout
if (var->MutableVar()->IsInitialized()) { if (var->MutableVar()->IsInitialized()) {
......
...@@ -184,6 +184,42 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) { ...@@ -184,6 +184,42 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) {
} }
} }
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
bool change_dim =
(desired_layout != default_layout &&
self->tensor.layout() == desired_layout && value.size() == 4);
VLOG(6) << "eager_properties 'Shape' method, layout autotune "
<< " desired_layout: " << desired_layout
<< " default_layout: " << default_layout
<< " tensor layout: " << self->tensor.layout()
<< " tensor's shape size is : " << value.size();
std::vector<int64_t> dims = value;
if (change_dim &&
paddle::framework::DataLayoutToString(desired_layout) == "NCHW") {
// NCHW -> NHWC
VLOG(6) << "layout autotune get Shape from NCHW -> NHWC " << value[0] << " "
<< value[1] << " " << value[2] << " " << value[3] << " to "
<< dims[0] << " " << dims[2] << " " << dims[3] << " " << dims[1];
value[0] = dims[0];
value[1] = dims[2];
value[2] = dims[3];
value[3] = dims[1];
} else if (change_dim &&
paddle::framework::DataLayoutToString(desired_layout) == "NHWC") {
// NHWC -> NCHW
VLOG(6) << "layout autotune get Shape from NHWC -> NCHW " << value[0] << " "
<< value[1] << " " << value[2] << " " << value[3] << " to "
<< dims[0] << " " << dims[3] << " " << dims[1] << " " << dims[2]
<< " " << dims[1];
value[0] = dims[0];
value[1] = dims[3];
value[2] = dims[1];
value[3] = dims[2];
}
return ToPyObject(value); return ToPyObject(value);
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
......
...@@ -2044,8 +2044,49 @@ void BindImperative(py::module *m_ptr) { ...@@ -2044,8 +2044,49 @@ void BindImperative(py::module *m_ptr) {
"shape", "shape",
[](imperative::VarBase &self) { [](imperative::VarBase &self) {
if (self.Var().IsType<framework::LoDTensor>()) { if (self.Var().IsType<framework::LoDTensor>()) {
return phi::vectorize<int>( auto value = phi::vectorize<int>(
self.Var().Get<framework::LoDTensor>().dims()); self.Var().Get<framework::LoDTensor>().dims());
auto tensor = self.Var().Get<framework::LoDTensor>();
auto tmp_value = value;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance()
.GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance()
.GetDefaultLayout();
bool change_dim =
(desired_layout != default_layout &&
tensor.layout() == desired_layout && value.size() == 4);
VLOG(6) << "'Shape' method, layout autotune,"
<< " desired_layout: " << desired_layout
<< " default_layout: " << default_layout
<< " tensor layout: " << tensor.layout()
<< " tensor's shape size is : " << value.size();
if (change_dim && paddle::framework::DataLayoutToString(
desired_layout) == "NCHW") {
VLOG(6) << "layout autotune get Shape from NHWC -> NCHW "
<< value[0] << " " << value[1] << " " << value[2] << " "
<< value[3] << " to " << tmp_value[3] << " "
<< tmp_value[1] << " " << tmp_value[2] << " "
<< tmp_value[1];
// NCHW -> NHWC
value[1] = tmp_value[2];
value[2] = tmp_value[3];
value[3] = tmp_value[1];
} else if (change_dim && paddle::framework::DataLayoutToString(
desired_layout) == "NHWC") {
VLOG(6) << "layout autotune get Shape from NHWC -> NCHW "
<< value[0] << " " << value[1] << " " << value[2] << " "
<< value[3] << " to " << tmp_value[0] << " "
<< tmp_value[3] << " " << tmp_value[1] << " "
<< tmp_value[2];
// NHWC -> NCHW
value[1] = tmp_value[3];
value[2] = tmp_value[1];
value[3] = tmp_value[2];
}
return value;
} else if (self.Var().IsType<phi::SelectedRows>()) { } else if (self.Var().IsType<phi::SelectedRows>()) {
return phi::vectorize<int>( return phi::vectorize<int>(
self.Var().Get<phi::SelectedRows>().value().dims()); self.Var().Get<phi::SelectedRows>().value().dims());
......
...@@ -205,7 +205,8 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor, ...@@ -205,7 +205,8 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor,
if (NeedTransformLayout(tensor->layout(), if (NeedTransformLayout(tensor->layout(),
target_args_def.layout, target_args_def.layout,
tensor->place(), tensor->place(),
transform_flag)) { transform_flag) &&
tensor->dims().size() != 1) {
out = TransDataLayout(out, target_args_def.layout); out = TransDataLayout(out, target_args_def.layout);
trans_layout = true; trans_layout = true;
} }
......
...@@ -93,18 +93,9 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -93,18 +93,9 @@ class LayoutAutoTune(unittest.TestCase):
return conv_out, predict return conv_out, predict
def test_enable_autotune(self): def test_enable_autotune(self):
if self.use_autoune(): conv_out, predict = self.train(data_format="NCHW")
conv_out, predict = self.train(data_format="NCHW") self.assertEqual(conv_out.shape, [1, 8, 14, 14])
if paddle.fluid.core.use_layout_autotune(): self.assertEqual(predict.shape, [1, 2])
self.assertEqual(conv_out.shape, [1, 14, 14, 8])
self.assertEqual(predict.shape, [1, 2])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 14])
self.assertEqual(predict.shape, [1, 2])
else:
conv_out, predict = self.train(data_format="NCHW")
self.assertEqual(conv_out.shape, [1, 8, 14, 14])
self.assertEqual(predict.shape, [1, 2])
def test_transpose_op_transposer(self): def test_transpose_op_transposer(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3)) conv = paddle.nn.Conv2D(3, 8, (3, 3))
...@@ -124,12 +115,8 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -124,12 +115,8 @@ class LayoutAutoTune(unittest.TestCase):
scaled.backward() scaled.backward()
scaler.minimize(optimizer, scaled) scaler.minimize(optimizer, scaled)
if paddle.fluid.core.use_layout_autotune(): self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(conv_out.shape, [1, 14, 12, 8]) self.assertEqual(out.shape, [1, 12, 8, 14])
self.assertEqual(out.shape, [1, 12, 8, 14])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1, 12, 8, 14])
def test_flatten_op_transposer(self): def test_flatten_op_transposer(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3)) conv = paddle.nn.Conv2D(3, 8, (3, 3))
...@@ -143,12 +130,8 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -143,12 +130,8 @@ class LayoutAutoTune(unittest.TestCase):
# because it flatten the C and H dimensions. # because it flatten the C and H dimensions.
out = flatten(conv_out) out = flatten(conv_out)
if paddle.fluid.core.use_layout_autotune(): self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(conv_out.shape, [1, 14, 12, 8]) self.assertEqual(out.shape, [1, 112, 12])
self.assertEqual(out.shape, [1, 112, 12])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1, 112, 12])
def test_argmax_op_transposer_keep_dims(self): def test_argmax_op_transposer_keep_dims(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3)) conv = paddle.nn.Conv2D(3, 8, (3, 3))
...@@ -157,41 +140,8 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -157,41 +140,8 @@ class LayoutAutoTune(unittest.TestCase):
conv_out = conv(data) conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC # conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.argmax(conv_out, axis=1, keepdim=True) out = paddle.argmax(conv_out, axis=1, keepdim=True)
if paddle.fluid.core.use_layout_autotune(): self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(conv_out.shape, [1, 14, 12, 8]) self.assertEqual(out.shape, [1, 1, 14, 12])
self.assertEqual(out.shape, [1, 14, 12, 1])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1, 1, 14, 12])
def test_argmax_op_transposer_ff(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3))
data = paddle.rand([1, 3, 16, 14])
with paddle.amp.auto_cast(level="O2"):
conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.argmax(conv_out)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1])
def test_argmax_op_transposer_t(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3))
data = paddle.rand([1, 3, 16, 14])
with paddle.amp.auto_cast(level="O2"):
conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.argmax(conv_out)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1])
def test_concat_op_transposer(self): def test_concat_op_transposer(self):
in1 = paddle.rand([1, 8, 14, 12]) in1 = paddle.rand([1, 8, 14, 12])
...@@ -202,12 +152,8 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -202,12 +152,8 @@ class LayoutAutoTune(unittest.TestCase):
# conv_out.shape = [1, 14, 12, 8] with NHWC # conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.concat(x=[conv_out, in1], axis=0) out = paddle.concat(x=[conv_out, in1], axis=0)
if paddle.fluid.core.use_layout_autotune(): self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(conv_out.shape, [1, 14, 12, 8]) self.assertEqual(out.shape, [2, 8, 14, 12])
self.assertEqual(out.shape, [2, 8, 14, 12])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [2, 8, 14, 12])
def test_concat_op_no_transposer(self): def test_concat_op_no_transposer(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3)) conv = paddle.nn.Conv2D(3, 8, (3, 3))
...@@ -219,12 +165,8 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -219,12 +165,8 @@ class LayoutAutoTune(unittest.TestCase):
# conv_out.shape = [1, 14, 12, 8] with NHWC # conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.concat(x=[conv_out1, conv_out2], axis=0) out = paddle.concat(x=[conv_out1, conv_out2], axis=0)
if paddle.fluid.core.use_layout_autotune(): self.assertEqual(conv_out1.shape, [1, 8, 14, 12])
self.assertEqual(conv_out1.shape, [1, 14, 12, 8]) self.assertEqual(out.shape, [2, 8, 14, 12])
self.assertEqual(out.shape, [2, 14, 12, 8])
else:
self.assertEqual(conv_out1.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [2, 8, 14, 12])
class TestAutoTuneAPI(unittest.TestCase): class TestAutoTuneAPI(unittest.TestCase):
......
...@@ -153,8 +153,8 @@ def _conv_nd(x, ...@@ -153,8 +153,8 @@ def _conv_nd(x,
channel_dim = channel_dim + len( channel_dim = channel_dim + len(
x.shape) if channel_dim < 0 else channel_dim x.shape) if channel_dim < 0 else channel_dim
tmp_bias = _C_ops.reshape( tmp_bias = _C_ops.reshape(
bias, bias, [1 for i in range(channel_dim)] + bias.shape +
bias.shape + [1 for i in range(len(x.shape) - channel_dim - 1)]) [1 for i in range(len(x.shape) - channel_dim - 1)])
return _C_ops.add(pre_bias, tmp_bias) return _C_ops.add(pre_bias, tmp_bias)
else: else:
return pre_bias return pre_bias
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册