未验证 提交 16439bb9 编写于 作者: N niuliling123 提交者: GitHub

Update layoutautotune for inplace (#45826)

上级 46e4fb2a
......@@ -51,17 +51,17 @@ paddle::experimental::Tensor conv2d_ad_func(
auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector);
auto NEW_input =
auto new_input =
egr::EagerAmpAutoCast("input", input, amp_dst_dtype, op_name);
auto NEW_filter =
auto new_filter =
egr::EagerAmpAutoCast("filter", filter, amp_dst_dtype, op_name);
{
paddle::imperative::AutoCastGuard guard(
egr::Controller::Instance().GetCurrentTracer(),
paddle::imperative::AmpLevel::O0);
return conv2d_ad_func(NEW_input,
NEW_filter,
return conv2d_ad_func(new_input,
new_filter,
strides,
paddings,
paddding_algorithm,
......@@ -76,7 +76,7 @@ paddle::experimental::Tensor conv2d_ad_func(
// Layout autotune
if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {
if (egr::Controller::Instance().UseLayoutAutoTune()) {
VLOG(5) << "Check and Prepare For LAYOUT";
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
......@@ -85,11 +85,10 @@ paddle::experimental::Tensor conv2d_ad_func(
auto op_name = phi::TransToFluidOpName("conv2d");
auto transformer = egr::EagerLayoutAutotune<std::string>(
op_name, tensors_vector, &data_format);
auto NEW_input = transformer->TransInTensor("input", input);
bool is_enable_tune =
paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune();
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
auto out = conv2d_ad_func(NEW_input,
auto new_input = transformer->TransInTensor("input", input);
bool need_tune = egr::Controller::Instance().UseLayoutAutoTune();
egr::Controller::Instance().DisableLayoutAutoTune();
auto out = conv2d_ad_func(new_input,
filter,
strides,
paddings,
......@@ -101,8 +100,8 @@ paddle::experimental::Tensor conv2d_ad_func(
workspace_size_MB,
exhaustive_search);
transformer->SetOutTensorLayout(&out);
if (is_enable_tune) {
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
if (need_tune) {
egr::Controller::Instance().EnableLayoutAutoTune();
}
// Returns
return out;
......
......@@ -55,6 +55,23 @@ class Controller {
paddle::imperative::AmpLevel GetAMPLevel() const {
return tracer_->GetAmpLevel();
}
bool UseLayoutAutoTune() {
bool use_autotune = false;
#if defined(PADDLE_WITH_CUDA)
auto place = tracer_->ExpectedPlace();
bool is_gpu_place = paddle::platform::is_gpu_place(place);
if (is_gpu_place) {
use_autotune = tracer_->UseLayoutAutoTune();
}
#endif
return use_autotune;
}
void DisableLayoutAutoTune() { tracer_->DisableLayoutAutoTune(); }
void EnableLayoutAutoTune() { tracer_->EnableLayoutAutoTune(); }
bool HasGrad() const { return tracer_->HasGrad(); }
void SetHasGrad(bool has_grad) { tracer_->SetHasGrad(has_grad); }
std::string GenerateUniqueName(std::string key = "eager_in_tmp") {
......
......@@ -437,15 +437,14 @@ AMP_LOGIC_TEMPLATE = \
"""
LAYOUT_LOGIC_TEMPLATE=\
"""
if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {{
VLOG(5) << "Check and Prepare For LAYOUT";
if (egr::Controller::Instance().UseLayoutAutoTune()) {{
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {};
{}
{}
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
VLOG(5) << "Check and Prepare For LAYOUT "<< op_name;
paddle::imperative::LayoutAutotuneGuard guard(egr::Controller::Instance().GetCurrentTracer(), false);
{}
{}
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
// Returns
return {};
}}
......@@ -922,6 +921,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
set_grad_in_meta = f"{indent}grad_node->SetGradInMeta({name}, {pos});"
set_retain_grad = f"{indent}egr::EagerUtils::CheckAndRetainGrad({name});"
set_out_rank_list.append(set_out_rank)
set_history_list.append(set_history)
set_grad_in_meta_list.append(set_grad_in_meta)
......@@ -1014,6 +1014,98 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
self.forward_definition_str = ""
self.forward_declaration_str = ""
def GenerateForwardLayoutAutotune(self, forward_api_name,
amp_tensors_vector_list,
layout_tensors_vector_optional_list,
layout_autotune_list_str,
returns_type_str, returns_str,
amp_inputs_call_args_str):
intermediate_outputs = self.intermediate_outputs
forward_attrs_list = self.forward_attrs_list
forward_outputs_position_map = self.forward_outputs_position_map
num_outputs = len(
forward_outputs_position_map.keys()) - len(intermediate_outputs)
# for layout autotune attr
lightly_sensitive_attr = [
'axis', 'axes', 'dim', 'dims', 'start', 'end', 'stop'
]
heavily_sensitive_attr = ['data_format', 'data_layout']
layout_autotune_attr = []
layout_autotune_attr_code_list = []
layout_autotune_attr_type_list = []
layout_autotune_attr_code_list.append(
f"auto op_name = phi::TransToFluidOpName(\"{forward_api_name}\");\n"
)
lightly_flag = False
heavily_flag = False
for name, atype, default_val, pos in forward_attrs_list:
for attr_name in lightly_sensitive_attr:
if name.find(attr_name) != -1 and (name
not in layout_autotune_attr):
lightly_flag = True
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
if lightly_flag is False:
for attr_name in heavily_sensitive_attr:
if name.find(attr_name) != -1 and (
name not in layout_autotune_attr):
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
heavily_flag = True
if len(layout_autotune_attr) == 0:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);\n"
)
elif len(layout_autotune_attr) == 1:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector, &{layout_autotune_attr[0]});\n"
)
elif len(layout_autotune_attr) == 2:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}, {layout_autotune_attr_type_list[1]}>(op_name, tensors_vector, &{layout_autotune_attr[0]}, &{layout_autotune_attr[1]});\n"
)
else:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector,&{layout_autotune_attr[0]});\n"
)
# Out tensor
layout_inputs_call_args_str = amp_inputs_call_args_str
forward_function_name = GetDygraphForwardFunctionName(forward_api_name)
layout_tmp_result_list = []
layout_autotune_outs_list = []
result_name = "api_result"
if num_outputs == 1:
result_name = returns_str
layout_autotune_outs_list.append(
f"transformer -> SetOutTensorLayout(&{returns_str});\n")
else:
for name, (rtype, pos) in forward_outputs_position_map.items():
if name in intermediate_outputs:
continue
layout_autotune_outs_list.append(
f" auto& {name} = std::get<{len(layout_tmp_result_list)}>(api_result);\n"
)
layout_autotune_outs_list.append(
f" transformer -> SetOutTensorLayout(&{name});\n")
layout_tmp_result_list.append(f"{name}")
tensors_vector_list_str = "{ " + ",".join(
amp_tensors_vector_list) + " }"
if len(amp_tensors_vector_list) == 0:
layout_logic_str = ""
else:
after_call_str = f"{returns_type_str} {result_name} = {forward_function_name}({layout_inputs_call_args_str});\n"
layout_logic_str = LAYOUT_LOGIC_TEMPLATE.format(
tensors_vector_list_str,
" ".join(layout_tensors_vector_optional_list),
" ".join(layout_autotune_attr_code_list) + " " +
layout_autotune_list_str, after_call_str,
" ".join(layout_autotune_outs_list), returns_str)
return layout_logic_str
def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
namespace = self.namespace
if self.forward_api_name[-1] == '_' and not is_inplaced:
......@@ -1049,7 +1141,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
layout_tensors_vector_optional_list = []
for name, (ttype, pos) in forward_inputs_position_map.items():
inputs_call_list[pos] = f"{name}"
amp_inputs_call_list[pos] = f"NEW_{name}"
amp_inputs_call_list[pos] = f"new_{name}"
is_optional = (name in optional_inputs)
if IsPlainTensorType(ttype):
if is_optional:
......@@ -1062,13 +1154,13 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
f"if ({name}) amp_tensors_vector.push_back({{ *{name} }});\n"
)
amp_autocast_optional_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_tensors_vector_optional_list.append(
f"if ({name}) tensors_vector.push_back({{ *{name} }});\n"
)
layout_autotune_optional_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
f"auto new_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
)
else:
if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
......@@ -1076,16 +1168,16 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
arg_str = f"paddle::experimental::Tensor& {name}"
amp_tensors_vector_list.append(f"{{{name}}}")
amp_autocast_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
else:
arg_str = f"const paddle::experimental::Tensor& {name}"
amp_tensors_vector_list.append(f"{{{name}}}")
amp_autocast_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_autotune_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
f"auto new_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
)
else:
assert IsVectorTensorType(ttype)
......@@ -1099,10 +1191,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
f"if ({name}) amp_tensors_vector.push_back( *{name} );\n"
)
amp_autocast_optional_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
f"auto new_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_autotune_optional_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
f"auto new_{name} = transformer->TransInTensors(\"{name}\", {name});\n"
)
else:
if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
......@@ -1112,60 +1204,15 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
arg_str = f"const std::vector<paddle::experimental::Tensor>& {name}"
amp_tensors_vector_list.append(f"{name}")
amp_autocast_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
f"auto new_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
layout_autotune_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
f"auto new_{name} = transformer->TransInTensors(\"{name}\", {name});\n"
)
inputs_args_definition_list[pos] = arg_str
inputs_args_declaration_list[pos] = arg_str
# for layout autotune attr
lightly_sensitive_attr = [
'axis', 'axes', 'dim', 'dims', 'start', 'end', 'stop'
]
heavily_sensitive_attr = ['data_format', 'data_layout']
layout_autotune_attr = []
layout_autotune_attr_code_list = []
layout_autotune_attr_type_list = []
layout_autotune_attr_code_list.append(
f"auto op_name = phi::TransToFluidOpName(\"{forward_api_name}\");\n"
)
lightly_flag = False
heavily_flag = False
for name, atype, default_val, pos in forward_attrs_list:
for attr_name in lightly_sensitive_attr:
if name.find(
attr_name) != -1 and name not in layout_autotune_attr:
lightly_flag = True
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
if lightly_flag is False:
for attr_name in heavily_sensitive_attr:
if name.find(attr_name
) != -1 and name not in layout_autotune_attr:
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
heavily_flag = True
if len(layout_autotune_attr) == 0:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);\n"
)
elif len(layout_autotune_attr) == 1:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector, &{layout_autotune_attr[0]});\n"
)
elif len(layout_autotune_attr) == 2:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}, {layout_autotune_attr_type_list[1]}>(op_name, tensors_vector, &{layout_autotune_attr[0]}, &{layout_autotune_attr[1]});\n"
)
else:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector, {len(layout_autotune_attr)});\n"
)
# forward attrs
for name, atype, default_val, pos in forward_attrs_list:
inputs_call_list[pos] = name
......@@ -1356,33 +1403,12 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_list_str, amp_call_str)
# Forward layout autotune
layout_inputs_call_args_str = amp_inputs_call_args_str
layout_tmp_result_list = []
layout_autotune_outs_list = ""
if num_outputs == 1:
layout_autotune_outs_list += f"{indent}auto {returns_str} = api_result;\n"
layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{returns_str});\n"
else:
for name, (rtype, pos) in forward_outputs_position_map.items():
if name in intermediate_outputs:
continue
layout_autotune_outs_list += f"{indent}auto& {name} = std::get<{len(layout_tmp_result_list)}>(api_result);\n"
layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{name});\n"
layout_tmp_result_list.append(f"{name}")
if returns_type_str == "paddle::experimental::Tensor&" or forward_api_name == "slice" or forward_api_name == "strided_slice" or len(
layout_autotune_attr) == 0:
layout_logic_str = ""
else:
# after_call_str = f"return {forward_ad_function_name}({layout_inputs_call_args_str});\n"
after_call_str = f"auto api_result = {forward_ad_function_name}({layout_inputs_call_args_str});\n"
layout_logic_str = LAYOUT_LOGIC_TEMPLATE.format(
amp_tensors_vector_list_str,
" ".join(layout_tensors_vector_optional_list),
" ".join(layout_autotune_attr_code_list) + " " +
" ".join(layout_autotune_list) +
" ".join(layout_autotune_optional_list), after_call_str,
layout_autotune_outs_list, returns_str)
layout_autotune_list_str = " ".join(
layout_autotune_list) + " ".join(layout_autotune_optional_list)
layout_logic_str = self.GenerateForwardLayoutAutotune(
forward_api_name, amp_tensors_vector_list,
layout_tensors_vector_optional_list, layout_autotune_list_str,
returns_type_str, returns_str, amp_inputs_call_args_str)
# For inputs outputs prepare for logging
var_str = f"\n{indent} std::string input_str = \"\";"
......
......@@ -19,20 +19,65 @@
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
namespace egr {
// layout_agnostic_ops_
// For agnostic op like add / relu
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
inline bool NeedTransLayout(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
const paddle::experimental::DataLayout& layout) {
for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (layout != tensors_vector[i][idx].layout()) {
return true;
}
}
}
return false;
}
inline std::shared_ptr<EagerLayoutTransformer> BaseTransformer(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) {
VLOG(3) << " Optimze Layout agnostic op: " << op_name;
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
bool unstart =
(paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED);
auto first_layout = tensors_vector[0][0].layout();
VLOG(3) << "Layout autotune was is start ? " << (!unstart) << op_name
<< "'s layout is " << first_layout;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
}
// For agnostic op like add, relu, exp
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) {
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
auto first_layout = tensors_vector[0][0].layout();
if (NeedTransLayout(tensors_vector, first_layout)) {
bool need_trans_back = false;
for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (4 != tensors_vector[i][idx].shape().size()) {
need_trans_back = true;
VLOG(3) << "Agnostic op " << op_name << " shape is "
<< tensors_vector[i][idx].shape().size() << " and layout is "
<< tensors_vector[i][idx].layout();
}
}
}
auto final_layout = need_trans_back ? default_layout : desired_layout;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, final_layout);
}
return BaseTransformer(op_name, tensors_vector);
}
// For lightly op like reduce
template <typename T>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
......@@ -40,16 +85,11 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
T* attr) {
VLOG(3) << "Lightly op " << op_name << "'s shape is "
<< tensors_vector[0][0].shape().size() << " and layout is "
<< tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
bool unstart =
(paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED);
if (unstart) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
return transposer;
}
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
......@@ -63,33 +103,30 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
kSlotSmallVectorSize>& tensors_vector,
T1* axis,
T2* keep_dim) {
VLOG(3) << "Lightly op " << op_name << "'s shape is "
<< tensors_vector[0][0].shape().size() << " and layout is "
<< tensors_vector[0][0].layout();
return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis);
}
// heavily string data_format data_layout
// heavily string data_format, data_layout
template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
std::string* attr) {
VLOG(3) << " Optimze Layout heavily op: " << op_name;
auto transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
auto first_layout = tensors_vector[0][0].layout();
auto transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
// Layout autotune only supports model with convolutional layers
VLOG(3) << "Optimze Layout was not started" << op_name;
VLOG(3) << "Optimze Layout was not started " << op_name;
if (op_name != "conv2d") {
return transposer;
} else {
#if defined(PADDLE_WITH_CUDA)
if (paddle::platform::is_gpu_place(tensors_vector[0][0].place()) &&
!phi::backends::gpu::TensorCoreAvailable()) {
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
return transposer;
}
#endif
auto data_type = tensors_vector[0][0].dtype();
bool is_tune_fp32 =
(data_type == paddle::experimental::DataType::FLOAT32) &&
......@@ -97,6 +134,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
bool is_tune_fp16 =
(data_type == paddle::experimental::DataType::FLOAT16) &&
(*attr == "NCHW");
VLOG(3) << "Conv2d_dy's dtype " << data_type << " format" << (*attr);
if (is_tune_fp32) {
paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
paddle::experimental::DataLayout::NCHW);
......@@ -109,26 +147,27 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
paddle::experimental::DataLayout::NCHW);
} else {
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
egr::Controller::Instance().DisableLayoutAutoTune();
return transposer;
}
VLOG(3) << "Tune the layout from " << attr << " to "
<< paddle::framework::DataLayoutToString(
paddle::imperative::LayoutAutoTune::Instance()
.GetDesiredLayout());
VLOG(3)
<< "Tune the layout from " << *attr << " to "
<< paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
}
}
if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive(
op_name)) {
VLOG(3)
<< op_name
<< "'s LayoutTransformer is EagerHeavilyLayoutSensitiveOpTransformer";
auto heavily_transposer =
std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
attr);
return heavily_transposer;
}
VLOG(3) << op_name
<< "'s LayoutTransformer is unimplemented. Use default "
"LayoutTransformer instead.";
VLOG(3) << op_name << "'s LayoutTransformer is unimplemented. Use default.";
return transposer;
}
......@@ -139,24 +178,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
std::vector<int>* attr) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << " Optimze Layout Unstarted : " << op_name;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
}
VLOG(3) << " Optimze Layout lightly op: " << op_name;
if (op_name == "transpose2") {
if (op_name == "transpose2" &&
(tensors_vector[0][0].layout() ==
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout())) {
auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name);
if (tensors_vector[0][0].layout() ==
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout()) {
trans->SetAttr(attr,
tensors_vector[0][0].layout() ==
paddle::experimental::DataLayout::NHWC);
return trans;
}
trans->SetAttr(attr,
tensors_vector[0][0].layout() ==
paddle::experimental::DataLayout::NHWC);
return trans;
}
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
......@@ -172,33 +210,32 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis,
bool* keep_dim) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << " Optimze Layout Unstarted : " << op_name;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
}
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (op_name == "argmax") {
if (op_name == "argmax" &&
(tensors_vector[0][0].layout() == desired_layout) && (*keep_dim)) {
std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr;
argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name);
if ((tensors_vector[0][0].layout() == desired_layout) && (*keep_dim)) {
argmax_transform->SetAttr(axis,
tensors_vector[0][0].layout() ==
paddle::experimental::DataLayout::NHWC);
return argmax_transform;
}
argmax_transform->SetAttr(axis,
tensors_vector[0][0].layout() ==
paddle::experimental::DataLayout::NHWC);
return argmax_transform;
}
VLOG(3) << " Optimze Layout lightly op: " << op_name;
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
}
// lightly int flatten
// lightly for flatten
template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
const std::string& op_name,
......@@ -206,17 +243,17 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
kSlotSmallVectorSize>& tensors_vector,
int* start_axis,
int* stop_axis) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << " Optimze Layout Unstarted : " << op_name;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
}
bool no_tranpose =
tensors_vector[0][0].layout() ==
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
bool no_tranpose = tensors_vector[0][0].layout() == desired_layout;
bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3);
if (op_name == "flatten" || op_name == "flatten_contiguous_range") {
if (no_tranpose && is_valid) {
......@@ -226,15 +263,13 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
}
}
VLOG(3) << " Optimze Layout lightly op: " << op_name;
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
}
// lightly int Concat
// lightly T can be int vector<int> vector<int64_t> IntArray
template <> // default int
template <>
inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar>(
const std::string& op_name,
......@@ -243,30 +278,21 @@ EagerLayoutAutotune<paddle::experimental::Scalar>(
paddle::experimental::Scalar* axis) {
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << " Optimze Layout Unstarted : " << op_name;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
}
bool need_transpose = false;
for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (desired_layout != tensors_vector[i][idx].layout()) {
need_transpose = true;
}
}
}
if (need_transpose) {
VLOG(3) << "Concat need transpose to NCHW " << op_name;
if (NeedTransLayout(tensors_vector, desired_layout)) {
VLOG(3) << op_name << " need transpose to default layout";
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} else {
VLOG(3) << " Optimze Layout lightly op: " << op_name;
auto trans = std::make_shared<EagerConcatOpTransformer>(op_name);
trans->SetAttr(axis, desired_layout);
return trans;
......
......@@ -22,9 +22,9 @@ namespace egr {
inline paddle::experimental::Tensor EagerTraceTransposeOp(
const paddle::experimental::DataLayout layout,
const paddle::experimental::Tensor& in) {
VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout
<< ", tensor's shape is " << in.shape().size();
if (in.shape().size() != 4) {
VLOG(4) << "Shape is " << in.shape().size() << " can't transpose to"
<< paddle::framework::DataLayoutToString(layout);
return in;
}
std::vector<int> axis;
......@@ -44,77 +44,75 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp(
// agnostic op
class EagerLayoutTransformer {
using Layout = paddle::experimental::DataLayout;
public:
EagerLayoutTransformer() : op_name_("") {}
explicit EagerLayoutTransformer(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector)
: op_name_(op_name) {
final_layout_ = "UNDEFINED";
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (final_layout_ == "UNDEFINED") {
final_layout_ = paddle::framework::DataLayoutToString(
tensors_vector[0][0].layout());
} else if (tensors_vector[i][idx].layout() == desired_layout) {
final_layout_ = paddle::framework::DataLayoutToString(desired_layout);
break;
}
}
}
VLOG(4) << op_name_ << "final_layout_ is " << final_layout_;
}
EagerLayoutTransformer() : op_name_(""), final_layout_(Layout::UNDEFINED) {}
EagerLayoutTransformer(const EagerLayoutTransformer&) = delete;
EagerLayoutTransformer& operator=(const EagerLayoutTransformer&) = delete;
explicit EagerLayoutTransformer(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
const Layout final_layout = Layout::UNDEFINED)
: op_name_(op_name), final_layout_(final_layout) {
VLOG(4) << "Agnostic op : " << op_name_ << " final_layout_ is "
<< final_layout_;
}
virtual ~EagerLayoutTransformer() {}
virtual paddle::optional<paddle::experimental::Tensor> TransInTensor(
const std::string& in_name,
const paddle::optional<paddle::experimental::Tensor>& in) {
VLOG(4) << op_name_ << "is is agnostic, final_layout_ is " << final_layout_;
return in;
virtual paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) {
if (final_layout_ == Layout::UNDEFINED || final_layout_ == in.layout()) {
VLOG(4) << "EagerLayoutTransformer with no trans";
return in;
} else { // from NCHW to NHWC
VLOG(4) << "EagerLayoutTransformer with trans from " << in.layout()
<< " to " << final_layout_;
auto out_tensor = EagerTraceTransposeOp(final_layout_, in);
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor.impl().get()))
->layout = final_layout_;
return out_tensor;
}
}
virtual paddle::optional<std::vector<paddle::experimental::Tensor>>
TransInTensor(
virtual paddle::optional<paddle::experimental::Tensor> TransInTensor(
const std::string& in_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) {
return in;
const paddle::optional<paddle::experimental::Tensor>& in) {
return in ? TransInTensor(in_name, *in) : in;
}
virtual std::vector<paddle::experimental::Tensor> TransInTensor(
virtual std::vector<paddle::experimental::Tensor> TransInTensors(
const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) {
VLOG(4) << " TransInTensor";
return in;
}
virtual paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) {
virtual paddle::optional<std::vector<paddle::experimental::Tensor>>
TransInTensors(
const std::string& in_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) {
VLOG(4) << " TransInTensor";
if (in) {
return TransInTensors(in_name, *in);
}
return in;
}
virtual void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
bool use_default = (final_layout_ == "Undefined(AnyLayout)" ||
final_layout_ == ("UNDEFINED"));
auto layout = paddle::framework::StringToDataLayout(final_layout_);
if (!use_default) {
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = layout;
}
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default;
virtual void SetOutTensorLayout(
paddle::optional<paddle::experimental::Tensor>* out_tensor) {
VLOG(4) << "optional out_tensor";
}
virtual void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor>* out_tensor) {
bool use_default = (final_layout_ == "Undefined(AnyLayout)" ||
final_layout_ == ("UNDEFINED"));
bool use_default = (final_layout_ == Layout::UNDEFINED);
if (!use_default) {
for (size_t i = 0; i < out_tensor->size(); i++) {
phi::DenseTensorUtils::GetMutableMeta(
......@@ -126,9 +124,24 @@ class EagerLayoutTransformer {
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default;
}
virtual void SetOutTensorLayout(
paddle::optional<std::vector<paddle::experimental::Tensor>>* out_tensor) {
VLOG(4) << "optional out_tensor";
}
virtual void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
bool use_default = final_layout_ == Layout::UNDEFINED;
if (!use_default) {
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = final_layout_;
}
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default;
}
protected:
std::string op_name_;
std::string final_layout_;
const Layout final_layout_;
};
class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
......@@ -145,21 +158,6 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
}
}
virtual paddle::optional<std::vector<paddle::experimental::Tensor>>
TransInTensor(
const std::string& in_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) {
VLOG(4) << op_name_ << "is is heavily";
return in;
}
virtual paddle::optional<paddle::experimental::Tensor> TransInTensor(
const std::string& in_name,
const paddle::optional<paddle::experimental::Tensor>& in) {
VLOG(4) << op_name_ << "is is heavily";
return in;
}
paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) {
if (heavily_input_.count(in_name) != 0 && in.layout() != desired_layout_) {
......@@ -230,7 +228,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
paddle::framework::DataLayoutToString(in.layout());
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (final_layout_ == input_layout && in.shape().size() == 4) {
VLOG(4) << op_name_ << "'s " << in_name << " need transpose from "
<< input_layout << " to default_layout";
......@@ -245,7 +242,7 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
return in;
}
virtual std::vector<paddle::experimental::Tensor> TransInTensor(
virtual std::vector<paddle::experimental::Tensor> TransInTensors(
const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) {
std::vector<paddle::experimental::Tensor> result;
......@@ -340,22 +337,19 @@ class EagerTransposeOpTransformer
paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) {
VLOG(4) << "with no transpose: EagerTransposeOpTransformer " << in_name
<< "'s layout is "
<< paddle::framework::DataLayoutToString(in.layout());
return in;
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (out_tensor->layout() != desired_layout) {
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (out_tensor->layout() != default_layout) {
VLOG(4) << " Set Out_tensor's layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
<< " to " << default_layout;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = desired_layout;
->layout = default_layout;
}
}
......@@ -385,15 +379,15 @@ class EagerArgmaxOpTransformer
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
VLOG(4) << "EagerArgmaxOpTransformer's out layout is"
<< paddle::framework::DataLayoutToString(out_tensor->layout());
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (desired_layout != out_tensor->layout()) {
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (default_layout != out_tensor->layout()) {
VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
<< " to " << default_layout;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = desired_layout;
->layout = default_layout;
}
}
......@@ -410,11 +404,11 @@ class EagerFlattenOpTransformer
explicit EagerFlattenOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
std::string desired_layout_str =
paddle::framework::DataLayoutToString(desired_layout);
final_layout_ = desired_layout_str;
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
std::string default_layout_str =
paddle::framework::DataLayoutToString(default_layout);
final_layout_ = default_layout_str;
}
// transpose from NHWC to NCHW
......@@ -424,16 +418,17 @@ class EagerFlattenOpTransformer
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
VLOG(4) << "EagerArgmaxOpTransformer's out layout is"
VLOG(4) << "EagerFlattenOpTransformer's out layout is"
<< paddle::framework::DataLayoutToString(out_tensor->layout());
auto layout = paddle::framework::StringToDataLayout(final_layout_);
if (layout != out_tensor->layout()) {
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (desired_layout != out_tensor->layout()) {
VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
<< " to " << desired_layout;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = layout;
->layout = desired_layout;
}
}
......@@ -450,11 +445,11 @@ class EagerConcatOpTransformer
explicit EagerConcatOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
std::string desired_layout_str =
paddle::framework::DataLayoutToString(desired_layout);
final_layout_ = desired_layout_str;
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
std::string default_layout_str =
paddle::framework::DataLayoutToString(default_layout);
final_layout_ = default_layout_str;
}
void SetAttr(paddle::experimental::Scalar* axis,
......@@ -467,7 +462,7 @@ class EagerConcatOpTransformer
(*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]);
}
virtual std::vector<paddle::experimental::Tensor> TransInTensor(
virtual std::vector<paddle::experimental::Tensor> TransInTensors(
const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) {
return in;
......
......@@ -14,23 +14,15 @@
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/imperative/layout_transformer.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
namespace paddle {
namespace imperative {
bool LayoutAutoTune::UseLayoutAutoTune() const {
#if defined(PADDLE_WITH_CUDA)
return use_layout_autotune_;
#else
return false;
#endif
}
LayoutAutoTune::LayoutAutoTune() {
const auto& op_info = paddle::framework::OpInfoMap::Instance().map();
for (auto it = op_info.begin(); it != op_info.end(); it++) {
......@@ -140,6 +132,26 @@ paddle::imperative::NameVarMap<VarType> DealLightlyLayoutSensitive(
return transposer->Apply(ins, outs, attrs, tracer);
}
LayoutAutotuneGuard::LayoutAutotuneGuard(std::shared_ptr<Tracer> tracer,
bool use_autotune)
: tracer_(tracer) {
pre_layout_autotune_ = tracer_->UseLayoutAutoTune();
if (pre_layout_autotune_ != use_autotune) {
tracer_->EnableLayoutAutoTune();
if (!use_autotune) {
tracer_->DisableLayoutAutoTune();
}
}
}
LayoutAutotuneGuard::~LayoutAutotuneGuard() {
if (pre_layout_autotune_) {
tracer_->EnableLayoutAutoTune();
} else {
tracer_->DisableLayoutAutoTune();
}
}
template <typename VarType>
paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
const std::string& op_type,
......@@ -147,7 +159,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
const paddle::imperative::NameVarMap<VarType>& outs,
paddle::framework::AttributeMap* attrs,
const std::shared_ptr<imperative::Tracer>& tracer) {
if (!LayoutAutoTune::Instance().UseLayoutAutoTune()) {
if (!tracer->UseLayoutAutoTune()) {
return ins;
}
// When layout autotuning is enabled, the tuner will check the desired layout.
......@@ -165,7 +177,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
} else {
#if defined(PADDLE_WITH_CUDA)
if (!phi::backends::gpu::TensorCoreAvailable()) {
LayoutAutoTune::Instance().DisableLayoutAutoTune();
tracer->DisableLayoutAutoTune();
return ins;
}
#endif
......@@ -185,7 +197,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
} else if (is_tune_fp16) {
LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC);
} else {
LayoutAutoTune::Instance().DisableLayoutAutoTune();
tracer->DisableLayoutAutoTune();
return ins;
}
VLOG(3) << "Tune the layout from "
......
......@@ -19,8 +19,8 @@
#include <unordered_set>
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/common/layout.h"
namespace paddle {
namespace imperative {
......@@ -35,12 +35,6 @@ class LayoutAutoTune {
return layout_autoTune;
}
bool UseLayoutAutoTune() const;
void EnableLayoutAutoTune() { use_layout_autotune_ = true; }
void DisableLayoutAutoTune() { use_layout_autotune_ = false; }
bool IsHeavilyLayoutSensitive(const std::string& op_type) const {
return heavily_layout_sensitive_ops_.count(op_type) != 0;
}
......@@ -64,8 +58,6 @@ class LayoutAutoTune {
private:
LayoutAutoTune();
bool use_layout_autotune_{false};
std::unordered_set<std::string> layout_agnostic_ops_{};
std::unordered_set<std::string> heavily_layout_sensitive_ops_{"batch_norm"};
......@@ -73,11 +65,29 @@ class LayoutAutoTune {
std::unordered_set<std::string> lightly_layout_sensitive_ops_{
"instance_norm", "softmax", "transpose", "transpose2", "reshape2"};
// Best Layout in this platform
DataLayout desired_layout_{DataLayout::UNDEFINED};
// Default Layout in this model
DataLayout default_layout_{DataLayout::UNDEFINED};
};
// LayoutAutotuneGuard is used for RAII.
class LayoutAutotuneGuard {
public:
LayoutAutotuneGuard(std::shared_ptr<Tracer> tracer, bool use_autotune);
~LayoutAutotuneGuard();
// forbid copy and operator=
LayoutAutotuneGuard(const LayoutAutotuneGuard& guard) = delete;
LayoutAutotuneGuard& operator=(const LayoutAutotuneGuard& guard) = delete;
private:
std::shared_ptr<Tracer> tracer_;
bool pre_layout_autotune_;
};
template <typename VarType>
paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
const std::string& op_type,
......
......@@ -19,8 +19,24 @@
#include "paddle/fluid/imperative/var_helper.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/tensor_utils.h"
namespace paddle {
namespace imperative {
template <typename VarType>
void SetOutDataLayout(std::shared_ptr<VarType> var,
const paddle::experimental::DataLayout layout) {
if (var != nullptr) {
paddle::imperative::SetDataLayout(var, layout);
// set out_tensor's layout
if (var->MutableVar()->IsInitialized()) {
paddle::framework::Variable* tmp_var = var->MutableVar();
auto* out = tmp_var->GetMutable<framework::LoDTensor>();
phi::DenseTensorUtils::GetMutableMeta(
static_cast<framework::LoDTensor*>(out))
->layout = layout;
}
}
}
template <typename VarType>
std::shared_ptr<VarType> TraceTransposeOp(
......@@ -118,7 +134,7 @@ class LayoutTransformer {
auto out_vars = outs.at(name);
for (auto& var : out_vars) {
if (var != nullptr) {
paddle::imperative::SetDataLayout(var, layout);
paddle::imperative::SetOutDataLayout(var, layout);
}
}
not_in_out = false;
......@@ -130,7 +146,7 @@ class LayoutTransformer {
for (auto& pair : outs) {
for (auto& var : pair.second) {
if (var != nullptr) {
paddle::imperative::SetDataLayout(var, layout);
paddle::imperative::SetOutDataLayout(var, layout);
}
}
}
......
......@@ -42,6 +42,8 @@ thread_local bool Tracer::enable_program_desc_tracing_ = false;
thread_local bool Tracer::has_grad_ = true;
thread_local bool Tracer::use_layout_autotune_ = false;
thread_local AmpLevel Tracer::amp_level_ = AmpLevel::O0;
thread_local phi::DataType Tracer::amp_dtype_ = phi::DataType::FLOAT32;
......
......@@ -28,9 +28,9 @@
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/jit/program_desc_tracer.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/core/compat/arg_map_context.h"
namespace paddle {
namespace imperative {
......@@ -184,6 +184,20 @@ class Tracer {
}
}
void DisableLayoutAutoTune() { use_layout_autotune_ = false; }
void EnableLayoutAutoTune() { use_layout_autotune_ = true; }
bool UseLayoutAutoTune() {
#if defined(PADDLE_WITH_CUDA)
if (phi::backends::gpu::TensorCoreAvailable()) {
return use_layout_autotune_;
}
#endif
use_layout_autotune_ = false;
return false;
}
phi::KernelSignature GetExpectedKernelSignature(
const std::string& type,
const NameTensorMap& ins,
......@@ -199,8 +213,8 @@ class Tracer {
std::unique_ptr<UniqueNameGenerator> generator_;
platform::Place expected_place_;
GarbageCollectorMap gcs_;
static thread_local bool enable_program_desc_tracing_;
static thread_local bool use_layout_autotune_;
static thread_local bool has_grad_;
static thread_local AmpLevel amp_level_;
static thread_local phi::DataType amp_dtype_;
......
......@@ -2503,19 +2503,14 @@ All parameter, weight, gradient are variables in Paddle.
return res;
});
m.def("enable_layout_autotune", [] {
return paddle::imperative::LayoutAutoTune::Instance()
.EnableLayoutAutoTune();
});
m.def("enable_layout_autotune",
[] { return egr::Controller::Instance().EnableLayoutAutoTune(); });
m.def("disable_layout_autotune", [] {
return paddle::imperative::LayoutAutoTune::Instance()
.DisableLayoutAutoTune();
});
m.def("disable_layout_autotune",
[] { return egr::Controller::Instance().DisableLayoutAutoTune(); });
m.def("use_layout_autotune", [] {
return paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune();
});
m.def("use_layout_autotune",
[] { return egr::Controller::Instance().UseLayoutAutoTune(); });
BindFleetWrapper(&m);
BindIO(&m);
......
......@@ -52,9 +52,9 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input,
return ret;
}
inline bool NeedTransformLayout(const paddle::platform::Place& place,
const DataLayout& input,
inline bool NeedTransformLayout(const DataLayout& input,
const DataLayout& target,
const paddle::platform::Place& place,
const TransformFlag& transform_flag) {
bool ret = transform_flag.need_trans_layout() &&
(input != DataLayout::ALL_LAYOUT &&
......@@ -202,9 +202,9 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor,
bool trans_layout = false;
bool trans_dtype = false;
if (NeedTransformLayout(tensor->place(),
tensor->layout(),
if (NeedTransformLayout(tensor->layout(),
target_args_def.layout,
tensor->place(),
transform_flag)) {
out = TransDataLayout(out, target_args_def.layout);
trans_layout = true;
......@@ -240,9 +240,9 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
dense_tensor.place(), target_args_def.backend, transform_flag) &&
!NeedTransformDataType(
dense_tensor.dtype(), target_args_def.dtype, transform_flag) &&
!NeedTransformLayout(dense_tensor.place(),
dense_tensor.layout(),
!NeedTransformLayout(dense_tensor.layout(),
target_args_def.layout,
dense_tensor.place(),
transform_flag))) {
return std::static_pointer_cast<phi::DenseTensor>(tensor_in);
}
......@@ -277,9 +277,9 @@ std::unique_ptr<std::vector<phi::DenseTensor>> PrepareData(
tensor_in->place(), target_args_def.backend, transform_flag) &&
!NeedTransformDataType(
tensor_in->dtype(), target_args_def.dtype, transform_flag) &&
!NeedTransformLayout(tensor_in->place(),
tensor_in->layout(),
!NeedTransformLayout(tensor_in->layout(),
target_args_def.layout,
tensor_in->place(),
transform_flag))) {
pt_tensors->emplace_back(
*std::dynamic_pointer_cast<phi::DenseTensor>(tensor_in));
......
......@@ -46,6 +46,13 @@ class SimpleNet(paddle.nn.Layer):
class LayoutAutoTune(unittest.TestCase):
def test_config(self):
paddle.fluid.core.enable_layout_autotune()
if self.use_autoune():
self.assertEqual(paddle.fluid.core.use_layout_autotune(), True)
paddle.fluid.core.disable_layout_autotune()
self.assertEqual(paddle.fluid.core.use_layout_autotune(), False)
def setUp(self):
self.use_autoune()
......
......@@ -130,15 +130,13 @@ def _conv_nd(x,
if bias is not None:
channel_dim = channel_dim + len(
x.shape) if channel_dim < 0 else channel_dim
if pre_bias.layout == "NHWC":
channel_dim = 3 # last dim
if isinstance(x, tuple):
x = x[0]
if isinstance(bias, tuple):
bias = bias[0]
if len(bias.shape) < len(x.shape):
tmp_bias = _C_ops.reshape(
bias, bias.shape +
bias, [1 for i in range(channel_dim)] + bias.shape +
[1 for i in range(len(x.shape) - channel_dim - 1)])
return _C_ops.add(pre_bias, tmp_bias)
else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册