未验证 提交 3da3462f 编写于 作者: N niuliling123 提交者: GitHub

Update layout autotune for module with no modified (#46541)

上级 20eb6e00
...@@ -1093,7 +1093,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1093,7 +1093,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
tensors_vector_list_str = "{ " + ",".join( tensors_vector_list_str = "{ " + ",".join(
amp_tensors_vector_list) + " }" amp_tensors_vector_list) + " }"
if len(amp_tensors_vector_list) == 0: if len(amp_tensors_vector_list) == 0: # or forward_api_name == "shape":
layout_logic_str = "" layout_logic_str = ""
else: else:
after_call_str = f"{returns_type_str} {result_name} = {forward_function_name}({layout_inputs_call_args_str});\n" after_call_str = f"{returns_type_str} {result_name} = {forward_function_name}({layout_inputs_call_args_str});\n"
......
...@@ -32,70 +32,50 @@ inline bool NeedTransLayout( ...@@ -32,70 +32,50 @@ inline bool NeedTransLayout(
} }
return false; return false;
} }
inline std::shared_ptr<EagerLayoutTransformer> BaseTransformer(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
bool unstart =
(paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED);
auto first_layout = tensors_vector[0][0].layout();
VLOG(3) << "Layout autotune was is start ? " << (!unstart) << op_name
<< "'s layout is " << first_layout;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
}
// For agnostic op like add, relu, exp
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) { kSlotSmallVectorSize>& tensors_vector) {
auto desired_layout = // For agnostic op like add, relu, exp
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
auto first_layout = tensors_vector[0][0].layout(); auto first_layout = tensors_vector[0][0].layout();
if (NeedTransLayout(tensors_vector, first_layout)) { auto desired_layout = DesiredLayout();
bool is_started =
!(desired_layout == paddle::experimental::DataLayout::UNDEFINED);
if (is_started && NeedTransLayout(tensors_vector, first_layout)) {
bool need_trans_back = false; bool need_trans_back = false;
for (size_t i = 0; i < tensors_vector.size(); i++) { for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) { for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (4 != tensors_vector[i][idx].shape().size()) { if (4 != tensors_vector[i][idx].shape().size()) {
need_trans_back = true; need_trans_back = true;
VLOG(3) << "Agnostic op " << op_name << " shape is "
<< tensors_vector[i][idx].shape().size() << " and layout is "
<< tensors_vector[i][idx].layout();
} }
} }
} }
auto final_layout = need_trans_back ? default_layout : desired_layout; auto final_layout = need_trans_back ? DefaultLayout() : desired_layout;
VLOG(4) << op_name << "'s has different layout, need trans to "
<< final_layout;
return std::make_shared<EagerLayoutTransformer>( return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, final_layout); op_name, tensors_vector, final_layout);
} }
return BaseTransformer(op_name, tensors_vector); return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
} }
// For lightly op like reduce
template <typename T> template <typename T>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
T* attr) { T* attr) {
VLOG(3) << "Lightly op " << op_name << "'s shape is " // For lightly op like reduce
<< tensors_vector[0][0].shape().size() << " and layout is " if (!(DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED)) {
<< tensors_vector[0][0].layout(); VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return std::make_shared<EagerLayoutTransformer>(
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; op_name, tensors_vector, tensors_vector[0][0].layout());
transposer = }
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name); return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} }
// For lightly op like argmax
template <typename T1, typename T2> template <typename T1, typename T2>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
...@@ -103,28 +83,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -103,28 +83,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
T1* axis, T1* axis,
T2* keep_dim) { T2* keep_dim) {
VLOG(3) << "Lightly op " << op_name << "'s shape is " // For lightly op like argmax
<< tensors_vector[0][0].shape().size() << " and layout is "
<< tensors_vector[0][0].layout();
return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis); return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis);
} }
// heavily string data_format, data_layout
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
std::string* attr) { std::string* attr) {
auto first_layout = tensors_vector[0][0].layout(); // Heavily op with (string) data_format, data_layout
auto transposer = std::make_shared<EagerLayoutTransformer>( auto transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout); op_name, tensors_vector, tensors_vector[0][0].layout());
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
paddle::experimental::DataLayout::UNDEFINED) {
// Layout autotune only supports model with convolutional layers // Layout autotune only supports model with convolutional layers
VLOG(3) << "Optimze Layout was not started " << op_name;
if (op_name != "conv2d") { if (op_name != "conv2d") {
VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return transposer; return transposer;
} else { } else {
auto data_type = tensors_vector[0][0].dtype(); auto data_type = tensors_vector[0][0].dtype();
...@@ -134,7 +109,8 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -134,7 +109,8 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
bool is_tune_fp16 = bool is_tune_fp16 =
(data_type == paddle::experimental::DataType::FLOAT16) && (data_type == paddle::experimental::DataType::FLOAT16) &&
(*attr == "NCHW"); (*attr == "NCHW");
VLOG(3) << "Conv2d_dy's dtype " << data_type << " format" << (*attr); VLOG(4) << "LayoutAutoTune assert with dtype and layout, Current op : "
<< op_name;
if (is_tune_fp32) { if (is_tune_fp32) {
paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout( paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
paddle::experimental::DataLayout::NCHW); paddle::experimental::DataLayout::NCHW);
...@@ -147,58 +123,45 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -147,58 +123,45 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout( paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
paddle::experimental::DataLayout::NCHW); paddle::experimental::DataLayout::NCHW);
} else { } else {
VLOG(4) << "DisableLayoutAutoTune accoding to Conv op"
<< " dtype : " << data_type << " format : " << (*attr);
egr::Controller::Instance().DisableLayoutAutoTune(); egr::Controller::Instance().DisableLayoutAutoTune();
return transposer; return transposer;
} }
VLOG(3) VLOG(4) << "LayoutAutoTune from " << *attr << " to " << DesiredLayout();
<< "Tune the layout from " << *attr << " to "
<< paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
} }
} }
if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive( if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive(
op_name)) { op_name)) {
VLOG(3) return std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
<< op_name
<< "'s LayoutTransformer is EagerHeavilyLayoutSensitiveOpTransformer";
auto heavily_transposer =
std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
attr); attr);
return heavily_transposer;
} }
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
VLOG(3) << op_name << "'s LayoutTransformer is unimplemented. Use default.";
return transposer;
} }
// lightly transpose
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
std::vector<int>* attr) { std::vector<int>* attr) {
auto first_layout = tensors_vector[0][0].layout(); // lightly transpose
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
paddle::experimental::DataLayout::UNDEFINED) { return std::make_shared<EagerLayoutTransformer>(
VLOG(3) << "Optimze Layout was not started" << op_name; op_name, tensors_vector, tensors_vector[0][0].layout());
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
} }
if (op_name == "transpose2" && if (op_name == "transpose2" &&
(tensors_vector[0][0].layout() == (tensors_vector[0][0].layout() == DesiredLayout())) {
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout())) {
auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name); auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name);
trans->SetAttr(attr, trans->SetAttr(attr,
tensors_vector[0][0].layout() == tensors_vector[0][0].layout() ==
paddle::experimental::DataLayout::NHWC); paddle::experimental::DataLayout::NHWC);
return trans; return trans;
} }
transposer = return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} }
// lightly int argmax // lightly int argmax
...@@ -210,19 +173,14 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>( ...@@ -210,19 +173,14 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis, paddle::experimental::Scalar* axis,
bool* keep_dim) { bool* keep_dim) {
auto first_layout = tensors_vector[0][0].layout(); if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == return std::make_shared<EagerLayoutTransformer>(
paddle::experimental::DataLayout::UNDEFINED) { op_name, tensors_vector, tensors_vector[0][0].layout());
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
} }
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (op_name == "argmax" && if (op_name == "argmax" &&
(tensors_vector[0][0].layout() == desired_layout) && (*keep_dim)) { (tensors_vector[0][0].layout() == DesiredLayout()) && (*keep_dim)) {
std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr; std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr;
argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name); argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name);
argmax_transform->SetAttr(axis, argmax_transform->SetAttr(axis,
...@@ -230,12 +188,9 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>( ...@@ -230,12 +188,9 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
paddle::experimental::DataLayout::NHWC); paddle::experimental::DataLayout::NHWC);
return argmax_transform; return argmax_transform;
} }
transposer = return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} }
// lightly for flatten
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
const std::string& op_name, const std::string& op_name,
...@@ -243,32 +198,22 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>( ...@@ -243,32 +198,22 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
int* start_axis, int* start_axis,
int* stop_axis) { int* stop_axis) {
auto first_layout = tensors_vector[0][0].layout(); if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; VLOG(4) << "Optimze Layout was not started" << op_name;
auto desired_layout = return std::make_shared<EagerLayoutTransformer>(
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); op_name, tensors_vector, tensors_vector[0][0].layout());
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
} }
bool no_tranpose = tensors_vector[0][0].layout() == desired_layout;
bool no_tranpose = tensors_vector[0][0].layout() == DesiredLayout();
bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3); bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3);
if (op_name == "flatten" || op_name == "flatten_contiguous_range") { if (op_name == "flatten" || op_name == "flatten_contiguous_range") {
if (no_tranpose && is_valid) { if (no_tranpose && is_valid) {
std::shared_ptr<EagerFlattenOpTransformer> flatten_transform = nullptr; return std::make_shared<EagerFlattenOpTransformer>(op_name);
flatten_transform = std::make_shared<EagerFlattenOpTransformer>(op_name);
return flatten_transform;
} }
} }
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} }
// lightly int Concat
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar>( EagerLayoutAutotune<paddle::experimental::Scalar>(
...@@ -276,27 +221,26 @@ EagerLayoutAutotune<paddle::experimental::Scalar>( ...@@ -276,27 +221,26 @@ EagerLayoutAutotune<paddle::experimental::Scalar>(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis) { paddle::experimental::Scalar* axis) {
auto desired_layout = if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); VLOG(4) << "Optimze Layout was not started" << op_name;
auto first_layout = tensors_vector[0][0].layout(); return std::make_shared<EagerLayoutTransformer>(
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; op_name, tensors_vector, tensors_vector[0][0].layout());
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
} }
auto desired_layout = DesiredLayout();
if (NeedTransLayout(tensors_vector, desired_layout)) { if (NeedTransLayout(tensors_vector, desired_layout)) {
VLOG(3) << op_name << " need transpose to default layout"; VLOG(4) << op_name << "'s has different layout";
transposer = return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name); }
return transposer; if (op_name == "Concat") {
} else { if (desired_layout == tensors_vector[0][0].layout() &&
tensors_vector[0][0].shape().size() == 4) {
auto trans = std::make_shared<EagerConcatOpTransformer>(op_name); auto trans = std::make_shared<EagerConcatOpTransformer>(op_name);
trans->SetAttr(axis, desired_layout); trans->SetAttr(axis, desired_layout);
return trans; return trans;
} }
}
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
} }
} // namespace egr } // namespace egr
...@@ -194,8 +194,10 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout( ...@@ -194,8 +194,10 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
(conv_in_type == framework::proto::VarType::FP16); (conv_in_type == framework::proto::VarType::FP16);
if (is_tune_fp32) { if (is_tune_fp32) {
LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NCHW); LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NCHW);
LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NHWC);
} else if (is_tune_fp16) { } else if (is_tune_fp16) {
LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC); LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC);
LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NCHW);
} else { } else {
tracer->DisableLayoutAutoTune(); tracer->DisableLayoutAutoTune();
return ins; return ins;
......
...@@ -184,6 +184,42 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) { ...@@ -184,6 +184,42 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) {
} }
} }
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
bool change_dim =
(desired_layout != default_layout &&
self->tensor.layout() == desired_layout && value.size() == 4);
VLOG(6) << "eager_properties 'Shape' method, layout autotune "
<< " desired_layout: " << desired_layout
<< " default_layout: " << default_layout
<< " tensor layout: " << self->tensor.layout()
<< " tensor's shape size is : " << value.size();
std::vector<int64_t> dims = value;
if (change_dim &&
paddle::framework::DataLayoutToString(desired_layout) == "NCHW") {
// NCHW -> NHWC
VLOG(6) << "layout autotune get Shape from NCHW -> NHWC " << value[0] << " "
<< value[1] << " " << value[2] << " " << value[3] << " to "
<< dims[0] << " " << dims[2] << " " << dims[3] << " " << dims[1];
value[0] = dims[0];
value[1] = dims[2];
value[2] = dims[3];
value[3] = dims[1];
} else if (change_dim &&
paddle::framework::DataLayoutToString(desired_layout) == "NHWC") {
// NHWC -> NCHW
VLOG(6) << "layout autotune get Shape from NHWC -> NCHW " << value[0] << " "
<< value[1] << " " << value[2] << " " << value[3] << " to "
<< dims[0] << " " << dims[3] << " " << dims[1] << " " << dims[2]
<< " " << dims[1];
value[0] = dims[0];
value[1] = dims[3];
value[2] = dims[1];
value[3] = dims[2];
}
return ToPyObject(value); return ToPyObject(value);
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
......
...@@ -2044,8 +2044,49 @@ void BindImperative(py::module *m_ptr) { ...@@ -2044,8 +2044,49 @@ void BindImperative(py::module *m_ptr) {
"shape", "shape",
[](imperative::VarBase &self) { [](imperative::VarBase &self) {
if (self.Var().IsType<framework::LoDTensor>()) { if (self.Var().IsType<framework::LoDTensor>()) {
return phi::vectorize<int>( auto value = phi::vectorize<int>(
self.Var().Get<framework::LoDTensor>().dims()); self.Var().Get<framework::LoDTensor>().dims());
auto tensor = self.Var().Get<framework::LoDTensor>();
auto tmp_value = value;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance()
.GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance()
.GetDefaultLayout();
bool change_dim =
(desired_layout != default_layout &&
tensor.layout() == desired_layout && value.size() == 4);
VLOG(6) << "'Shape' method, layout autotune,"
<< " desired_layout: " << desired_layout
<< " default_layout: " << default_layout
<< " tensor layout: " << tensor.layout()
<< " tensor's shape size is : " << value.size();
if (change_dim && paddle::framework::DataLayoutToString(
desired_layout) == "NCHW") {
VLOG(6) << "layout autotune get Shape from NHWC -> NCHW "
<< value[0] << " " << value[1] << " " << value[2] << " "
<< value[3] << " to " << tmp_value[3] << " "
<< tmp_value[1] << " " << tmp_value[2] << " "
<< tmp_value[1];
// NCHW -> NHWC
value[1] = tmp_value[2];
value[2] = tmp_value[3];
value[3] = tmp_value[1];
} else if (change_dim && paddle::framework::DataLayoutToString(
desired_layout) == "NHWC") {
VLOG(6) << "layout autotune get Shape from NHWC -> NCHW "
<< value[0] << " " << value[1] << " " << value[2] << " "
<< value[3] << " to " << tmp_value[0] << " "
<< tmp_value[3] << " " << tmp_value[1] << " "
<< tmp_value[2];
// NHWC -> NCHW
value[1] = tmp_value[3];
value[2] = tmp_value[1];
value[3] = tmp_value[2];
}
return value;
} else if (self.Var().IsType<phi::SelectedRows>()) { } else if (self.Var().IsType<phi::SelectedRows>()) {
return phi::vectorize<int>( return phi::vectorize<int>(
self.Var().Get<phi::SelectedRows>().value().dims()); self.Var().Get<phi::SelectedRows>().value().dims());
......
...@@ -205,7 +205,8 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor, ...@@ -205,7 +205,8 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor,
if (NeedTransformLayout(tensor->layout(), if (NeedTransformLayout(tensor->layout(),
target_args_def.layout, target_args_def.layout,
tensor->place(), tensor->place(),
transform_flag)) { transform_flag) &&
tensor->dims().size() != 1) {
out = TransDataLayout(out, target_args_def.layout); out = TransDataLayout(out, target_args_def.layout);
trans_layout = true; trans_layout = true;
} }
......
...@@ -93,15 +93,6 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -93,15 +93,6 @@ class LayoutAutoTune(unittest.TestCase):
return conv_out, predict return conv_out, predict
def test_enable_autotune(self): def test_enable_autotune(self):
if self.use_autoune():
conv_out, predict = self.train(data_format="NCHW")
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 14, 8])
self.assertEqual(predict.shape, [1, 2])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 14])
self.assertEqual(predict.shape, [1, 2])
else:
conv_out, predict = self.train(data_format="NCHW") conv_out, predict = self.train(data_format="NCHW")
self.assertEqual(conv_out.shape, [1, 8, 14, 14]) self.assertEqual(conv_out.shape, [1, 8, 14, 14])
self.assertEqual(predict.shape, [1, 2]) self.assertEqual(predict.shape, [1, 2])
...@@ -124,10 +115,6 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -124,10 +115,6 @@ class LayoutAutoTune(unittest.TestCase):
scaled.backward() scaled.backward()
scaler.minimize(optimizer, scaled) scaler.minimize(optimizer, scaled)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1, 12, 8, 14])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12]) self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1, 12, 8, 14]) self.assertEqual(out.shape, [1, 12, 8, 14])
...@@ -143,10 +130,6 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -143,10 +130,6 @@ class LayoutAutoTune(unittest.TestCase):
# because it flatten the C and H dimensions. # because it flatten the C and H dimensions.
out = flatten(conv_out) out = flatten(conv_out)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1, 112, 12])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12]) self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1, 112, 12]) self.assertEqual(out.shape, [1, 112, 12])
...@@ -157,42 +140,9 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -157,42 +140,9 @@ class LayoutAutoTune(unittest.TestCase):
conv_out = conv(data) conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC # conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.argmax(conv_out, axis=1, keepdim=True) out = paddle.argmax(conv_out, axis=1, keepdim=True)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1, 14, 12, 1])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12]) self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1, 1, 14, 12]) self.assertEqual(out.shape, [1, 1, 14, 12])
def test_argmax_op_transposer_ff(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3))
data = paddle.rand([1, 3, 16, 14])
with paddle.amp.auto_cast(level="O2"):
conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.argmax(conv_out)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1])
def test_argmax_op_transposer_t(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3))
data = paddle.rand([1, 3, 16, 14])
with paddle.amp.auto_cast(level="O2"):
conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.argmax(conv_out)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1])
def test_concat_op_transposer(self): def test_concat_op_transposer(self):
in1 = paddle.rand([1, 8, 14, 12]) in1 = paddle.rand([1, 8, 14, 12])
conv = paddle.nn.Conv2D(3, 8, (3, 3)) conv = paddle.nn.Conv2D(3, 8, (3, 3))
...@@ -202,10 +152,6 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -202,10 +152,6 @@ class LayoutAutoTune(unittest.TestCase):
# conv_out.shape = [1, 14, 12, 8] with NHWC # conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.concat(x=[conv_out, in1], axis=0) out = paddle.concat(x=[conv_out, in1], axis=0)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [2, 8, 14, 12])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12]) self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [2, 8, 14, 12]) self.assertEqual(out.shape, [2, 8, 14, 12])
...@@ -219,10 +165,6 @@ class LayoutAutoTune(unittest.TestCase): ...@@ -219,10 +165,6 @@ class LayoutAutoTune(unittest.TestCase):
# conv_out.shape = [1, 14, 12, 8] with NHWC # conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.concat(x=[conv_out1, conv_out2], axis=0) out = paddle.concat(x=[conv_out1, conv_out2], axis=0)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out1.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [2, 14, 12, 8])
else:
self.assertEqual(conv_out1.shape, [1, 8, 14, 12]) self.assertEqual(conv_out1.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [2, 8, 14, 12]) self.assertEqual(out.shape, [2, 8, 14, 12])
......
...@@ -152,8 +152,8 @@ def _conv_nd(x, ...@@ -152,8 +152,8 @@ def _conv_nd(x,
channel_dim = channel_dim + len( channel_dim = channel_dim + len(
x.shape) if channel_dim < 0 else channel_dim x.shape) if channel_dim < 0 else channel_dim
tmp_bias = _C_ops.reshape( tmp_bias = _C_ops.reshape(
bias, bias, [1 for i in range(channel_dim)] + bias.shape +
bias.shape + [1 for i in range(len(x.shape) - channel_dim - 1)]) [1 for i in range(len(x.shape) - channel_dim - 1)])
return _C_ops.add(pre_bias, tmp_bias) return _C_ops.add(pre_bias, tmp_bias)
else: else:
return pre_bias return pre_bias
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册