未验证 提交 d7d9807e 编写于 作者: N niuliling123 提交者: GitHub

Add eager layout autotune (#45409)

上级 cfaee812
......@@ -17,6 +17,7 @@
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/eager_amp_auto_cast.h"
#include "paddle/fluid/eager/eager_layout_auto_tune.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
......@@ -73,6 +74,37 @@ paddle::experimental::Tensor conv2d_dygraph_function(
}
}
// Layout autotune
if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {
VLOG(5) << "Check and Prepare For LAYOUT";
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
tensors_vector = {{input}, {filter}};
auto op_name = phi::TransToFluidOpName("conv2d");
auto transformer = egr::EagerLayoutAutotune<std::string>(
op_name, tensors_vector, &data_format);
auto NEW_input = transformer->TransInTensor("input", input);
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
auto out = conv2d_dygraph_function(NEW_input,
filter,
strides,
paddings,
paddding_algorithm,
groups,
dilations,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search);
transformer->SetOutTensorLayout(&out);
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
// Returns
return out;
}
// Get Input AutoGradMeta
egr::AutogradMeta* input_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(input);
......
......@@ -185,6 +185,8 @@ FORWARD_FUNCTION_TEMPLATE = \
// Dygraph Record Event
{}
// AMP Logic
{}
// Layout autotune
{}
// Get Input AutoGradMeta
{}
......@@ -217,7 +219,8 @@ FORWARD_ONLY_FUNCTION_TEMPLATE = \
{}
// AMP Logic
{}
// Layout autotune
{}
// Forward API Call
VLOG(3) << \"Final State Running: \" << \"{}\";
{}
......@@ -295,7 +298,6 @@ NODE_CC_FILE_TEMPLATE = \
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/to_static/run_program_op_node.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/phi/api/include/sparse_api.h"
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
DECLARE_bool(check_nan_inf);
......@@ -317,7 +319,7 @@ FORWARD_CC_FILE_TEMPLATE = \
#include "paddle/phi/api/lib/dygraph_api.h"
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/api/generated/eager_generated/backwards/nodes.h"
#include "paddle/fluid/eager/eager_layout_auto_tune.h"
#include "paddle/phi/api/include/strings_api.h"
#include "paddle/phi/api/include/sparse_api.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
......@@ -396,7 +398,21 @@ AMP_LOGIC_TEMPLATE = \
}}
}}
"""
LAYOUT_LOGIC_TEMPLATE=\
"""
if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {{
VLOG(5) << "Check and Prepare For LAYOUT";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {};
{}
{}
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
{}
{}
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
// Returns
return {};
}}
"""
CREATE_PLAIN_OPTIONAL_TENSOR_TEMPLATE = \
"""
paddle::optional<paddle::experimental::Tensor> {}_optional;
......@@ -992,6 +1008,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_tensors_vector_optional_list = []
amp_autocast_list = []
amp_autocast_optional_list = []
layout_autotune_list = []
layout_autotune_optional_list = []
layout_tensors_vector_optional_list = []
for name, (ttype, pos) in forward_inputs_position_map.items():
inputs_call_list[pos] = f"{name}"
amp_inputs_call_list[pos] = f"NEW_{name}"
......@@ -1009,6 +1028,12 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_optional_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_tensors_vector_optional_list.append(
f"if ({name}) tensors_vector.push_back({{ *{name} }});\n"
)
layout_autotune_optional_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
)
else:
if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
):
......@@ -1023,6 +1048,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_autotune_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
)
else:
assert IsVectorTensorType(ttype)
if is_optional:
......@@ -1037,6 +1065,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_optional_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_autotune_optional_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
)
else:
if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
):
......@@ -1047,10 +1078,59 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_autotune_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
)
inputs_args_definition_list[pos] = arg_str
inputs_args_declaration_list[pos] = arg_str
# for layout autotune attr
lightly_sensitive_attr = [
'axis', 'axes', 'dim', 'dims', 'start', 'end', 'stop'
]
heavily_sensitive_attr = ['data_format', 'data_layout']
layout_autotune_attr = []
layout_autotune_attr_code_list = []
layout_autotune_attr_type_list = []
layout_autotune_attr_code_list.append(
f"auto op_name = phi::TransToFluidOpName(\"{forward_api_name}\");\n"
)
lightly_flag = False
heavily_flag = False
for name, atype, default_val, pos in forward_attrs_list:
for attr_name in lightly_sensitive_attr:
if name.find(
attr_name) != -1 and name not in layout_autotune_attr:
lightly_flag = True
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
if lightly_flag is False:
for attr_name in heavily_sensitive_attr:
if name.find(attr_name
) != -1 and name not in layout_autotune_attr:
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
heavily_flag = True
if len(layout_autotune_attr) == 0:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);\n"
)
elif len(layout_autotune_attr) == 1:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector, &{layout_autotune_attr[0]});\n"
)
elif len(layout_autotune_attr) == 2:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}, {layout_autotune_attr_type_list[1]}>(op_name, tensors_vector, &{layout_autotune_attr[0]}, &{layout_autotune_attr[1]});\n"
)
else:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector, {len(layout_autotune_attr)});\n"
)
# forward attrs
for name, atype, default_val, pos in forward_attrs_list:
inputs_call_list[pos] = name
amp_inputs_call_list[pos] = name
......@@ -1236,6 +1316,35 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_tensors_vector_optional_list_str, amp_get_dst_dtype_str,
amp_autocast_list_str, amp_call_str)
# Forward layout autotune
layout_inputs_call_args_str = amp_inputs_call_args_str
layout_tmp_result_list = []
layout_autotune_outs_list = ""
if num_outputs == 1:
layout_autotune_outs_list += f"{indent}auto {returns_str} = api_result;\n"
layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{returns_str});\n"
else:
for name, (rtype, pos) in forward_outputs_position_map.items():
if name in intermediate_outputs:
continue
layout_autotune_outs_list += f"{indent}auto& {name} = std::get<{len(layout_tmp_result_list)}>(api_result);\n"
layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{name});\n"
layout_tmp_result_list.append(f"{name}")
if returns_type_str == "paddle::experimental::Tensor&" or forward_api_name == "slice" or forward_api_name == "strided_slice" or len(
layout_autotune_attr) == 0:
layout_logic_str = ""
else:
# after_call_str = f"return {forward_function_name}({layout_inputs_call_args_str});\n"
after_call_str = f"auto api_result = {forward_function_name}({layout_inputs_call_args_str});\n"
layout_logic_str = LAYOUT_LOGIC_TEMPLATE.format(
amp_tensors_vector_list_str,
" ".join(layout_tensors_vector_optional_list),
" ".join(layout_autotune_attr_code_list) + " " +
" ".join(layout_autotune_list) +
" ".join(layout_autotune_optional_list), after_call_str,
layout_autotune_outs_list, returns_str)
# Generate forward_definition_str and forward_declaration_str
if self.is_forward_only:
if len(amp_tensors_vector_list) == 0:
......@@ -1243,17 +1352,17 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
self.forward_definition_str += FORWARD_ONLY_FUNCTION_TEMPLATE.format(
returns_type_str, forward_function_name,
inputs_args_definition_str, dygraph_event_str, amp_logic_str,
forward_function_name, forward_call_str, get_outputs_str,
returns_str)
layout_logic_str, forward_function_name, forward_call_str,
get_outputs_str, returns_str)
else:
self.forward_definition_str += FORWARD_FUNCTION_TEMPLATE.format(
returns_type_str, forward_function_name,
inputs_args_definition_str, dygraph_event_str, amp_logic_str,
inputs_autograd_meta_str, forward_function_name,
forward_call_str, check_nan_inf_str, get_outputs_str,
outputs_autograd_meta_str, compute_require_grad_args_str,
check_inplace_str, bump_inplace_version_str, node_creation_str,
returns_str)
layout_logic_str, inputs_autograd_meta_str,
forward_function_name, forward_call_str, check_nan_inf_str,
get_outputs_str, outputs_autograd_meta_str,
compute_require_grad_args_str, check_inplace_str,
bump_inplace_version_str, node_creation_str, returns_str)
self.forward_declaration_str += f"{returns_type_str} {forward_function_name}({inputs_args_declaration_str});\n"
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/eager_layout_transformer.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
namespace egr {
// layout_agnostic_ops_
// For agnostic op like add / relu
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) {
VLOG(3) << " Optimze Layout agnostic op: " << op_name;
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
return transposer;
}
// For lightly op like reduce
template <typename T>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
T* attr) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
bool unstart =
(paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED);
if (unstart) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
return transposer;
}
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
}
// For lightly op like argmax
template <typename T1, typename T2>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
T1* axis,
T2* keep_dim) {
return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis);
}
// heavily string data_format data_layout
template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
std::string* attr) {
VLOG(3) << " Optimze Layout heavily op: " << op_name;
auto transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
// Layout autotune only supports model with convolutional layers
VLOG(3) << "Optimze Layout was not started" << op_name;
if (op_name != "conv2d") {
return transposer;
} else {
#if defined(PADDLE_WITH_CUDA)
if (paddle::platform::is_gpu_place(tensors_vector[0][0].place()) &&
!phi::backends::gpu::TensorCoreAvailable()) {
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
return transposer;
}
#endif
auto data_type = tensors_vector[0][0].dtype();
bool is_tune_fp32 =
(data_type == paddle::experimental::DataType::FLOAT32) &&
(*attr == "NHWC");
bool is_tune_fp16 =
(data_type == paddle::experimental::DataType::FLOAT16) &&
(*attr == "NCHW");
if (is_tune_fp32) {
paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
paddle::experimental::DataLayout::NCHW);
paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
paddle::experimental::DataLayout::NHWC);
} else if (is_tune_fp16) {
paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
paddle::experimental::DataLayout::NHWC);
paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
paddle::experimental::DataLayout::NCHW);
} else {
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
return transposer;
}
VLOG(3) << "Tune the layout from " << attr << " to "
<< paddle::framework::DataLayoutToString(
paddle::imperative::LayoutAutoTune::Instance()
.GetDesiredLayout());
}
}
if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive(
op_name)) {
auto heavily_transposer =
std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
attr);
return heavily_transposer;
}
VLOG(3) << op_name
<< "'s LayoutTransformer is unimplemented. Use default "
"LayoutTransformer instead.";
return transposer;
}
// lightly transpose
template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
std::vector<int>* attr) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << " Optimze Layout Unstarted : " << op_name;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
return transposer;
}
VLOG(3) << " Optimze Layout lightly op: " << op_name;
if (op_name == "transpose2") {
auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name);
if (tensors_vector[0][0].layout() ==
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout()) {
trans->SetAttr(attr,
tensors_vector[0][0].layout() ==
paddle::experimental::DataLayout::NHWC);
return trans;
}
}
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
}
// lightly int argmax
template <>
inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis,
bool* keep_dim) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << " Optimze Layout Unstarted : " << op_name;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
return transposer;
}
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (op_name == "argmax") {
std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr;
argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name);
if ((tensors_vector[0][0].layout() == desired_layout) && (*keep_dim)) {
argmax_transform->SetAttr(axis,
tensors_vector[0][0].layout() ==
paddle::experimental::DataLayout::NHWC);
return argmax_transform;
}
}
VLOG(3) << " Optimze Layout lightly op: " << op_name;
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
}
// lightly int flatten
template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
int* start_axis,
int* stop_axis) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << " Optimze Layout Unstarted : " << op_name;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
return transposer;
}
bool no_tranpose =
tensors_vector[0][0].layout() ==
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3);
if (op_name == "flatten" || op_name == "flatten_contiguous_range") {
if (no_tranpose && is_valid) {
std::shared_ptr<EagerFlattenOpTransformer> flatten_transform = nullptr;
flatten_transform = std::make_shared<EagerFlattenOpTransformer>(op_name);
return flatten_transform;
}
}
VLOG(3) << " Optimze Layout lightly op: " << op_name;
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
}
// lightly int Concat
// lightly T can be int vector<int> vector<int64_t> IntArray
template <> // default int
inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar>(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis) {
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << " Optimze Layout Unstarted : " << op_name;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
return transposer;
}
bool need_transpose = false;
for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (desired_layout != tensors_vector[i][idx].layout()) {
need_transpose = true;
}
}
}
if (need_transpose) {
VLOG(3) << "Concat need transpose to NCHW " << op_name;
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} else {
VLOG(3) << " Optimze Layout lightly op: " << op_name;
auto trans = std::make_shared<EagerConcatOpTransformer>(op_name);
trans->SetAttr(axis, desired_layout);
return trans;
}
}
} // namespace egr
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_utils.h"
namespace egr {
inline paddle::experimental::Tensor EagerTraceTransposeOp(
const paddle::experimental::DataLayout layout,
const paddle::experimental::Tensor& in) {
if (in.shape().size() != 4) {
VLOG(4) << "Shape is " << in.shape().size() << " can't transpose to"
<< paddle::framework::DataLayoutToString(layout);
return in;
}
std::vector<int> axis;
if (layout == paddle::experimental::DataLayout::NHWC) {
axis = {0, 2, 3, 1};
} else if (layout == paddle::experimental::DataLayout::NCHW) {
axis = {0, 3, 1, 2};
} else {
axis = {0, 1, 2, 3};
}
auto out_tensor = transpose_dygraph_function(in, axis);
VLOG(4) << "AutoTune Transpose from "
<< paddle::framework::DataLayoutToString(in.layout()) << " to "
<< paddle::framework::DataLayoutToString(layout);
return out_tensor;
}
// agnostic op
class EagerLayoutTransformer {
public:
EagerLayoutTransformer() : op_name_("") {}
explicit EagerLayoutTransformer(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector)
: op_name_(op_name) {
final_layout_ = "UNDEFINED";
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (final_layout_ == "UNDEFINED") {
final_layout_ = paddle::framework::DataLayoutToString(
tensors_vector[0][0].layout());
} else if (tensors_vector[i][idx].layout() == desired_layout) {
final_layout_ = paddle::framework::DataLayoutToString(desired_layout);
break;
}
}
}
VLOG(4) << op_name_ << "final_layout_ is " << final_layout_;
}
EagerLayoutTransformer(const EagerLayoutTransformer&) = delete;
EagerLayoutTransformer& operator=(const EagerLayoutTransformer&) = delete;
virtual ~EagerLayoutTransformer() {}
virtual paddle::optional<paddle::experimental::Tensor> TransInTensor(
const std::string& in_name,
const paddle::optional<paddle::experimental::Tensor>& in) {
VLOG(4) << op_name_ << "is is agnostic, final_layout_ is " << final_layout_;
return in;
}
virtual paddle::optional<std::vector<paddle::experimental::Tensor>>
TransInTensor(
const std::string& in_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) {
return in;
}
virtual std::vector<paddle::experimental::Tensor> TransInTensor(
const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) {
return in;
}
virtual paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) {
return in;
}
virtual void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
bool use_default = (final_layout_ == "Undefined(AnyLayout)" ||
final_layout_ == ("UNDEFINED"));
auto layout = paddle::framework::StringToDataLayout(final_layout_);
if (!use_default) {
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = layout;
}
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default;
}
virtual void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor>* out_tensor) {
bool use_default = (final_layout_ == "Undefined(AnyLayout)" ||
final_layout_ == ("UNDEFINED"));
if (!use_default) {
for (size_t i = 0; i < out_tensor->size(); i++) {
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get()))
->layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
}
}
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default;
}
protected:
std::string op_name_;
std::string final_layout_;
};
class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
public:
explicit EagerHeavilyLayoutSensitiveOpTransformer(const std::string& op_name,
std::string* layout)
: op_name_(op_name),
desired_layout_(
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout()) {
VLOG(3) << "Optimze Layout heavily op: " << op_name;
final_layout_ = paddle::framework::DataLayoutToString(desired_layout_);
if ((*layout) != final_layout_) {
*layout = final_layout_;
}
}
virtual paddle::optional<std::vector<paddle::experimental::Tensor>>
TransInTensor(
const std::string& in_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) {
VLOG(4) << op_name_ << "is is heavily";
return in;
}
virtual paddle::optional<paddle::experimental::Tensor> TransInTensor(
const std::string& in_name,
const paddle::optional<paddle::experimental::Tensor>& in) {
VLOG(4) << op_name_ << "is is heavily";
return in;
}
paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) {
if (heavily_input_.count(in_name) != 0 && in.layout() != desired_layout_) {
VLOG(4) << op_name_ << "'s " << in_name << " need transpose from "
<< paddle::framework::DataLayoutToString(in.layout()) << " to "
<< final_layout_;
auto out_tensor = EagerTraceTransposeOp(desired_layout_, in);
return out_tensor;
}
return in;
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
if (out_tensor->layout() != desired_layout_) {
VLOG(4) << " Set Out_tensor's layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = desired_layout_;
}
}
void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor*>* out_tensor) {
for (size_t i = 0; i < out_tensor->size(); i++) {
SetOutTensorLayout((*out_tensor)[i]);
}
}
void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor>* out_tensor) {
for (size_t i = 0; i < out_tensor->size(); i++) {
if ((*out_tensor)[i].layout() != desired_layout_) {
VLOG(4) << " Set Out_tensor's layout from "
<< paddle::framework::DataLayoutToString(
(*out_tensor)[i].layout())
<< " to " << final_layout_;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get()))
->layout = desired_layout_;
}
}
}
protected:
std::string op_name_;
std::string final_layout_;
const paddle::experimental::DataLayout desired_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
public:
EagerLightlyLayoutSensitiveOpTransformer() {}
explicit EagerLightlyLayoutSensitiveOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
final_layout_ = paddle::framework::DataLayoutToString(desired_layout);
}
// transpose from desired to default
paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) {
std::string input_layout =
paddle::framework::DataLayoutToString(in.layout());
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (final_layout_ == input_layout && in.shape().size() == 4) {
VLOG(4) << op_name_ << "'s " << in_name << " need transpose from "
<< input_layout << " to default_layout";
auto out_tensor = EagerTraceTransposeOp(
paddle::experimental::DataLayout::UNDEFINED, in);
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor.impl().get()))
->layout = default_layout;
return out_tensor;
}
VLOG(4) << in_name << "'s layout is " << input_layout;
return in;
}
virtual std::vector<paddle::experimental::Tensor> TransInTensor(
const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) {
std::vector<paddle::experimental::Tensor> result;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
for (size_t i = 0; i < in.size(); i++) {
auto in_tensor = in[i];
if (in_tensor.layout() == desired_layout) {
VLOG(4) << op_name_ << "'s " << in_name << " need transpose from "
<< final_layout_ << " to default_layout";
auto out_tensor = EagerTraceTransposeOp(
paddle::experimental::DataLayout::UNDEFINED, in_tensor);
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor.impl().get()))
->layout = default_layout;
result.emplace_back(out_tensor);
} else {
result.emplace_back(in_tensor);
}
}
return result;
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
auto out_layout = out_tensor->layout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (out_layout != default_layout) {
VLOG(4) << op_name_ << "'s out need transpose to default_layout";
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = default_layout;
}
}
void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor*>* out_tensor) {
for (size_t i = 0; i < out_tensor->size(); i++) {
VLOG(4) << "out layout is"
<< paddle::framework::DataLayoutToString(
(*out_tensor)[i]->layout());
SetOutTensorLayout((*out_tensor)[i]);
}
}
void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor>* out_tensor) {
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
for (size_t i = 0; i < out_tensor->size(); i++) {
VLOG(4) << " out_tensor layout trans to default ";
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get()))
->layout = default_layout;
}
}
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
class EagerTransposeOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer {
public:
EagerTransposeOpTransformer() {}
explicit EagerTransposeOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout TransposeOpTransformer " << op_name;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
std::string desired_layout_str =
paddle::framework::DataLayoutToString(desired_layout);
final_layout_ = desired_layout_str;
}
void SetAttr(std::vector<int>* axis, bool is_nhwc) {
// input's layout is nhwc and input's layout === desired_layout
std::vector<int> perm_nchw = {0, 2, 3, 1};
std::vector<int> perm_nhwc = {0, 3, 1, 2};
auto perm = is_nhwc ? perm_nhwc : perm_nchw;
(*axis)[0] = perm[(*axis)[0]];
(*axis)[1] = perm[(*axis)[1]];
(*axis)[2] = perm[(*axis)[2]];
(*axis)[3] = perm[(*axis)[3]];
VLOG(4) << " EagerTransposeOpTransformer " << op_name_
<< "'s layout is equal to desire: " << is_nhwc;
}
paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) {
VLOG(4) << "with no transpose: EagerTransposeOpTransformer " << in_name
<< "'s layout is "
<< paddle::framework::DataLayoutToString(in.layout());
return in;
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (out_tensor->layout() != desired_layout) {
VLOG(4) << " Set Out_tensor's layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = desired_layout;
}
}
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
class EagerArgmaxOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer {
public:
EagerArgmaxOpTransformer() {}
explicit EagerArgmaxOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name;
}
void SetAttr(paddle::experimental::Scalar* axis, bool is_nhwc) {
std::vector<int> perm_nhwc = {0, 3, 1, 2};
std::vector<int> perm_nchw = {0, 2, 3, 1};
auto perm = is_nhwc ? perm_nhwc : perm_nchw;
int axes = axis->to<int>();
(*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]);
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
VLOG(4) << "EagerArgmaxOpTransformer's out layout is"
<< paddle::framework::DataLayoutToString(out_tensor->layout());
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (desired_layout != out_tensor->layout()) {
VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = desired_layout;
}
}
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
class EagerFlattenOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer {
public:
EagerFlattenOpTransformer() {}
explicit EagerFlattenOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
std::string desired_layout_str =
paddle::framework::DataLayoutToString(desired_layout);
final_layout_ = desired_layout_str;
}
// transpose from NHWC to NCHW
paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) {
return in;
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
VLOG(4) << "EagerArgmaxOpTransformer's out layout is"
<< paddle::framework::DataLayoutToString(out_tensor->layout());
auto layout = paddle::framework::StringToDataLayout(final_layout_);
if (layout != out_tensor->layout()) {
VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = layout;
}
}
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
class EagerConcatOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer {
public:
EagerConcatOpTransformer() {}
explicit EagerConcatOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
std::string desired_layout_str =
paddle::framework::DataLayoutToString(desired_layout);
final_layout_ = desired_layout_str;
}
void SetAttr(paddle::experimental::Scalar* axis,
paddle::framework::DataLayout layout) {
std::vector<int> perm_nhwc = {0, 3, 1, 2};
std::vector<int> perm_nchw = {0, 2, 3, 1};
int axes = axis->to<int>();
auto perm =
(paddle::framework::DataLayout::NHWC == layout) ? perm_nhwc : perm_nchw;
(*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]);
}
virtual std::vector<paddle::experimental::Tensor> TransInTensor(
const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) {
return in;
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
auto layout = paddle::framework::StringToDataLayout(final_layout_);
if (layout != out_tensor->layout()) {
VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = layout;
}
}
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
} // namespace egr
......@@ -25,12 +25,7 @@ namespace imperative {
bool LayoutAutoTune::UseLayoutAutoTune() const {
#if defined(PADDLE_WITH_CUDA)
if (!phi::backends::gpu::TensorCoreAvailable()) {
LayoutAutoTune::Instance().DisableLayoutAutoTune();
return false;
} else {
return use_layout_autotune_;
}
return use_layout_autotune_;
#else
return false;
#endif
......@@ -168,6 +163,12 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
if (op_type != "conv2d") {
return ins;
} else {
#if defined(PADDLE_WITH_CUDA)
if (!phi::backends::gpu::TensorCoreAvailable()) {
LayoutAutoTune::Instance().DisableLayoutAutoTune();
return ins;
}
#endif
auto conv_in_type = framework::proto::VarType::FP32;
auto& in_vars = ins.at("Input")[0];
if (GetDataType<VarType>(in_vars) == framework::proto::VarType::FP16) {
......@@ -213,6 +214,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
return transposer->Apply(ins, outs, attrs, tracer);
}
}
template paddle::imperative::NameVarMap<VarBase> AutoTuneLayout<VarBase>(
const std::string& op_type,
const paddle::imperative::NameVarMap<VarBase>& ins,
......
......@@ -53,9 +53,13 @@ class LayoutAutoTune {
return layout_agnostic_ops_.count(op_type) != 0;
}
DataLayout GetDesiredLayout() const { return layout_; }
DataLayout GetDesiredLayout() const { return desired_layout_; }
void SetDesiredLayout(const DataLayout& layout) { layout_ = layout; }
DataLayout GetDefaultLayout() const { return default_layout_; }
void SetDesiredLayout(const DataLayout& layout) { desired_layout_ = layout; }
void SetDefaultLayout(const DataLayout& layout) { default_layout_ = layout; }
private:
LayoutAutoTune();
......@@ -69,7 +73,9 @@ class LayoutAutoTune {
std::unordered_set<std::string> lightly_layout_sensitive_ops_{
"instance_norm", "softmax", "transpose", "transpose2", "reshape2"};
DataLayout layout_{DataLayout::UNDEFINED};
DataLayout desired_layout_{DataLayout::UNDEFINED};
DataLayout default_layout_{DataLayout::UNDEFINED};
};
template <typename VarType>
......
......@@ -77,6 +77,9 @@ class LayoutTransformer {
for (auto& var : pair.second) {
// Once the any input is desired layout, we set in_layout is desired
// layout.
if (in_layout == DataLayout::UNDEFINED) {
in_layout = paddle::imperative::GetDataLayout(var);
}
if (var != nullptr && (paddle::imperative::GetDataLayout(var) ==
LayoutAutoTune::Instance().GetDesiredLayout())) {
in_layout = LayoutAutoTune::Instance().GetDesiredLayout();
......@@ -84,7 +87,11 @@ class LayoutTransformer {
}
}
}
SetVarsLayout(outs, in_layout);
VLOG(3) << "Optimze Layout agnostic op: " << type_ << " "
<< paddle::framework::DataLayoutToString(in_layout);
if (in_layout != DataLayout::UNDEFINED) {
SetVarsLayout(outs, in_layout);
}
return ins;
}
......
......@@ -188,6 +188,25 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) {
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyObject* tensor_properties_get_layout(TensorObject* self, void* closure) {
EAGER_TRY
std::string layout = "";
if (!self->tensor.defined()) {
return ToPyObject(layout);
}
if (egr::IsVariableCompatTensor(self->tensor)) {
VLOG(3) << "VariableCompatTensor does not support `layout` method.";
return ToPyObject(layout);
} else {
return ToPyObject(
paddle::framework::DataLayoutToString(self->tensor.layout()));
}
return ToPyObject(layout);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyObject* tensor_properties_get_place(TensorObject* self, void* closure) {
EAGER_TRY
return ToPyObject(self->tensor.place());
......@@ -249,6 +268,7 @@ struct PyGetSetDef variable_properties[] = {
nullptr,
nullptr},
{"shape", (getter)tensor_properties_get_shape, nullptr, nullptr, nullptr},
{"layout", (getter)tensor_properties_get_layout, nullptr, nullptr, nullptr},
// {"is_leaf", (getter)tensor_properties_get_is_leaf, nullptr,
// nullptr,
// nullptr},
......@@ -271,6 +291,7 @@ struct PyGetSetDef string_tensor_variable_properties[] = {
nullptr,
nullptr},
{"shape", (getter)tensor_properties_get_shape, nullptr, nullptr, nullptr},
{"layout", (getter)tensor_properties_get_layout, nullptr, nullptr, nullptr},
{"place", (getter)tensor_properties_get_place, nullptr, nullptr, nullptr},
{"_place_str",
(getter)tensor_properties_get_place_str,
......
......@@ -2062,6 +2062,15 @@ void BindImperative(py::module *m_ptr) {
return std::vector<int>();
}
})
.def_property_readonly(
"layout",
[](imperative::VarBase &self) {
if (self.Var().IsType<framework::LoDTensor>()) {
auto layout = self.Var().Get<framework::LoDTensor>().layout();
return paddle::framework::DataLayoutToString(layout);
}
return std::string("");
})
.def_property_readonly("is_leaf",
&imperative::VarBase::IsLeaf,
R"DOC(
......
......@@ -52,12 +52,16 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input,
return ret;
}
inline bool NeedTransformLayout(const DataLayout& input,
inline bool NeedTransformLayout(const paddle::platform::Place& place,
const DataLayout& input,
const DataLayout& target,
const TransformFlag& transform_flag) {
bool ret = transform_flag.need_trans_layout() &&
(input != DataLayout::ALL_LAYOUT &&
target != DataLayout::ALL_LAYOUT && input != target);
if (platform::is_gpu_place(place)) {
return false;
}
return ret;
}
......@@ -73,6 +77,7 @@ inline phi::DenseTensor TransDataLayout(const phi::DenseTensor& tensor,
PADDLE_THROW(phi::errors::PreconditionNotMet(
"Unsupported data layout cast from CPU to GPU."));
}
return tensor;
}
template <typename Context>
......@@ -196,8 +201,11 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor,
phi::DenseTensor out = *tensor;
bool trans_layout = false;
bool trans_dtype = false;
if (NeedTransformLayout(
tensor->layout(), target_args_def.layout, transform_flag)) {
if (NeedTransformLayout(tensor->place(),
tensor->layout(),
target_args_def.layout,
transform_flag)) {
out = TransDataLayout(out, target_args_def.layout);
trans_layout = true;
}
......@@ -232,8 +240,10 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
dense_tensor.place(), target_args_def.backend, transform_flag) &&
!NeedTransformDataType(
dense_tensor.dtype(), target_args_def.dtype, transform_flag) &&
!NeedTransformLayout(
dense_tensor.layout(), target_args_def.layout, transform_flag))) {
!NeedTransformLayout(dense_tensor.place(),
dense_tensor.layout(),
target_args_def.layout,
transform_flag))) {
return std::static_pointer_cast<phi::DenseTensor>(tensor_in);
}
phi::DenseTensor out =
......@@ -267,8 +277,10 @@ std::unique_ptr<std::vector<phi::DenseTensor>> PrepareData(
tensor_in->place(), target_args_def.backend, transform_flag) &&
!NeedTransformDataType(
tensor_in->dtype(), target_args_def.dtype, transform_flag) &&
!NeedTransformLayout(
tensor_in->layout(), target_args_def.layout, transform_flag))) {
!NeedTransformLayout(tensor_in->place(),
tensor_in->layout(),
target_args_def.layout,
transform_flag))) {
pt_tensors->emplace_back(
*std::dynamic_pointer_cast<phi::DenseTensor>(tensor_in));
} else {
......
......@@ -21,9 +21,6 @@ import numpy
import paddle
import paddle.nn.functional as F
from paddle.fluid.framework import _enable_legacy_dygraph
_enable_legacy_dygraph()
class SimpleNet(paddle.nn.Layer):
......
......@@ -129,6 +129,8 @@ def _conv_nd(x,
if bias is not None:
channel_dim = channel_dim + len(
x.shape) if channel_dim < 0 else channel_dim
if pre_bias.layout == "NHWC":
channel_dim = 3 # last dim
if isinstance(x, tuple):
x = x[0]
if isinstance(bias, tuple):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册