未验证 提交 16439bb9 编写于 作者: N niuliling123 提交者: GitHub

Update layoutautotune for inplace (#45826)

上级 46e4fb2a
...@@ -51,17 +51,17 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -51,17 +51,17 @@ paddle::experimental::Tensor conv2d_ad_func(
auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector); auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector);
auto NEW_input = auto new_input =
egr::EagerAmpAutoCast("input", input, amp_dst_dtype, op_name); egr::EagerAmpAutoCast("input", input, amp_dst_dtype, op_name);
auto NEW_filter = auto new_filter =
egr::EagerAmpAutoCast("filter", filter, amp_dst_dtype, op_name); egr::EagerAmpAutoCast("filter", filter, amp_dst_dtype, op_name);
{ {
paddle::imperative::AutoCastGuard guard( paddle::imperative::AutoCastGuard guard(
egr::Controller::Instance().GetCurrentTracer(), egr::Controller::Instance().GetCurrentTracer(),
paddle::imperative::AmpLevel::O0); paddle::imperative::AmpLevel::O0);
return conv2d_ad_func(NEW_input, return conv2d_ad_func(new_input,
NEW_filter, new_filter,
strides, strides,
paddings, paddings,
paddding_algorithm, paddding_algorithm,
...@@ -76,7 +76,7 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -76,7 +76,7 @@ paddle::experimental::Tensor conv2d_ad_func(
// Layout autotune // Layout autotune
if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) { if (egr::Controller::Instance().UseLayoutAutoTune()) {
VLOG(5) << "Check and Prepare For LAYOUT"; VLOG(5) << "Check and Prepare For LAYOUT";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize> egr::kSlotSmallVectorSize>
...@@ -85,11 +85,10 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -85,11 +85,10 @@ paddle::experimental::Tensor conv2d_ad_func(
auto op_name = phi::TransToFluidOpName("conv2d"); auto op_name = phi::TransToFluidOpName("conv2d");
auto transformer = egr::EagerLayoutAutotune<std::string>( auto transformer = egr::EagerLayoutAutotune<std::string>(
op_name, tensors_vector, &data_format); op_name, tensors_vector, &data_format);
auto NEW_input = transformer->TransInTensor("input", input); auto new_input = transformer->TransInTensor("input", input);
bool is_enable_tune = bool need_tune = egr::Controller::Instance().UseLayoutAutoTune();
paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune(); egr::Controller::Instance().DisableLayoutAutoTune();
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune(); auto out = conv2d_ad_func(new_input,
auto out = conv2d_ad_func(NEW_input,
filter, filter,
strides, strides,
paddings, paddings,
...@@ -101,8 +100,8 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -101,8 +100,8 @@ paddle::experimental::Tensor conv2d_ad_func(
workspace_size_MB, workspace_size_MB,
exhaustive_search); exhaustive_search);
transformer->SetOutTensorLayout(&out); transformer->SetOutTensorLayout(&out);
if (is_enable_tune) { if (need_tune) {
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune(); egr::Controller::Instance().EnableLayoutAutoTune();
} }
// Returns // Returns
return out; return out;
......
...@@ -55,6 +55,23 @@ class Controller { ...@@ -55,6 +55,23 @@ class Controller {
paddle::imperative::AmpLevel GetAMPLevel() const { paddle::imperative::AmpLevel GetAMPLevel() const {
return tracer_->GetAmpLevel(); return tracer_->GetAmpLevel();
} }
bool UseLayoutAutoTune() {
bool use_autotune = false;
#if defined(PADDLE_WITH_CUDA)
auto place = tracer_->ExpectedPlace();
bool is_gpu_place = paddle::platform::is_gpu_place(place);
if (is_gpu_place) {
use_autotune = tracer_->UseLayoutAutoTune();
}
#endif
return use_autotune;
}
void DisableLayoutAutoTune() { tracer_->DisableLayoutAutoTune(); }
void EnableLayoutAutoTune() { tracer_->EnableLayoutAutoTune(); }
bool HasGrad() const { return tracer_->HasGrad(); } bool HasGrad() const { return tracer_->HasGrad(); }
void SetHasGrad(bool has_grad) { tracer_->SetHasGrad(has_grad); } void SetHasGrad(bool has_grad) { tracer_->SetHasGrad(has_grad); }
std::string GenerateUniqueName(std::string key = "eager_in_tmp") { std::string GenerateUniqueName(std::string key = "eager_in_tmp") {
......
...@@ -437,15 +437,14 @@ AMP_LOGIC_TEMPLATE = \ ...@@ -437,15 +437,14 @@ AMP_LOGIC_TEMPLATE = \
""" """
LAYOUT_LOGIC_TEMPLATE=\ LAYOUT_LOGIC_TEMPLATE=\
""" """
if (paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune()) {{ if (egr::Controller::Instance().UseLayoutAutoTune()) {{
VLOG(5) << "Check and Prepare For LAYOUT";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {}; paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {};
{} {}
{} {}
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune(); VLOG(5) << "Check and Prepare For LAYOUT "<< op_name;
paddle::imperative::LayoutAutotuneGuard guard(egr::Controller::Instance().GetCurrentTracer(), false);
{} {}
{} {}
paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
// Returns // Returns
return {}; return {};
}} }}
...@@ -922,6 +921,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): ...@@ -922,6 +921,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
set_grad_in_meta = f"{indent}grad_node->SetGradInMeta({name}, {pos});" set_grad_in_meta = f"{indent}grad_node->SetGradInMeta({name}, {pos});"
set_retain_grad = f"{indent}egr::EagerUtils::CheckAndRetainGrad({name});" set_retain_grad = f"{indent}egr::EagerUtils::CheckAndRetainGrad({name});"
set_out_rank_list.append(set_out_rank) set_out_rank_list.append(set_out_rank)
set_history_list.append(set_history) set_history_list.append(set_history)
set_grad_in_meta_list.append(set_grad_in_meta) set_grad_in_meta_list.append(set_grad_in_meta)
...@@ -1014,6 +1014,98 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1014,6 +1014,98 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
self.forward_definition_str = "" self.forward_definition_str = ""
self.forward_declaration_str = "" self.forward_declaration_str = ""
def GenerateForwardLayoutAutotune(self, forward_api_name,
amp_tensors_vector_list,
layout_tensors_vector_optional_list,
layout_autotune_list_str,
returns_type_str, returns_str,
amp_inputs_call_args_str):
intermediate_outputs = self.intermediate_outputs
forward_attrs_list = self.forward_attrs_list
forward_outputs_position_map = self.forward_outputs_position_map
num_outputs = len(
forward_outputs_position_map.keys()) - len(intermediate_outputs)
# for layout autotune attr
lightly_sensitive_attr = [
'axis', 'axes', 'dim', 'dims', 'start', 'end', 'stop'
]
heavily_sensitive_attr = ['data_format', 'data_layout']
layout_autotune_attr = []
layout_autotune_attr_code_list = []
layout_autotune_attr_type_list = []
layout_autotune_attr_code_list.append(
f"auto op_name = phi::TransToFluidOpName(\"{forward_api_name}\");\n"
)
lightly_flag = False
heavily_flag = False
for name, atype, default_val, pos in forward_attrs_list:
for attr_name in lightly_sensitive_attr:
if name.find(attr_name) != -1 and (name
not in layout_autotune_attr):
lightly_flag = True
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
if lightly_flag is False:
for attr_name in heavily_sensitive_attr:
if name.find(attr_name) != -1 and (
name not in layout_autotune_attr):
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
heavily_flag = True
if len(layout_autotune_attr) == 0:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);\n"
)
elif len(layout_autotune_attr) == 1:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector, &{layout_autotune_attr[0]});\n"
)
elif len(layout_autotune_attr) == 2:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}, {layout_autotune_attr_type_list[1]}>(op_name, tensors_vector, &{layout_autotune_attr[0]}, &{layout_autotune_attr[1]});\n"
)
else:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector,&{layout_autotune_attr[0]});\n"
)
# Out tensor
layout_inputs_call_args_str = amp_inputs_call_args_str
forward_function_name = GetDygraphForwardFunctionName(forward_api_name)
layout_tmp_result_list = []
layout_autotune_outs_list = []
result_name = "api_result"
if num_outputs == 1:
result_name = returns_str
layout_autotune_outs_list.append(
f"transformer -> SetOutTensorLayout(&{returns_str});\n")
else:
for name, (rtype, pos) in forward_outputs_position_map.items():
if name in intermediate_outputs:
continue
layout_autotune_outs_list.append(
f" auto& {name} = std::get<{len(layout_tmp_result_list)}>(api_result);\n"
)
layout_autotune_outs_list.append(
f" transformer -> SetOutTensorLayout(&{name});\n")
layout_tmp_result_list.append(f"{name}")
tensors_vector_list_str = "{ " + ",".join(
amp_tensors_vector_list) + " }"
if len(amp_tensors_vector_list) == 0:
layout_logic_str = ""
else:
after_call_str = f"{returns_type_str} {result_name} = {forward_function_name}({layout_inputs_call_args_str});\n"
layout_logic_str = LAYOUT_LOGIC_TEMPLATE.format(
tensors_vector_list_str,
" ".join(layout_tensors_vector_optional_list),
" ".join(layout_autotune_attr_code_list) + " " +
layout_autotune_list_str, after_call_str,
" ".join(layout_autotune_outs_list), returns_str)
return layout_logic_str
def GenerateForwardDefinitionAndDeclaration(self, is_inplaced): def GenerateForwardDefinitionAndDeclaration(self, is_inplaced):
namespace = self.namespace namespace = self.namespace
if self.forward_api_name[-1] == '_' and not is_inplaced: if self.forward_api_name[-1] == '_' and not is_inplaced:
...@@ -1049,7 +1141,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1049,7 +1141,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
layout_tensors_vector_optional_list = [] layout_tensors_vector_optional_list = []
for name, (ttype, pos) in forward_inputs_position_map.items(): for name, (ttype, pos) in forward_inputs_position_map.items():
inputs_call_list[pos] = f"{name}" inputs_call_list[pos] = f"{name}"
amp_inputs_call_list[pos] = f"NEW_{name}" amp_inputs_call_list[pos] = f"new_{name}"
is_optional = (name in optional_inputs) is_optional = (name in optional_inputs)
if IsPlainTensorType(ttype): if IsPlainTensorType(ttype):
if is_optional: if is_optional:
...@@ -1062,13 +1154,13 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1062,13 +1154,13 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
f"if ({name}) amp_tensors_vector.push_back({{ *{name} }});\n" f"if ({name}) amp_tensors_vector.push_back({{ *{name} }});\n"
) )
amp_autocast_optional_list.append( amp_autocast_optional_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n" f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
) )
layout_tensors_vector_optional_list.append( layout_tensors_vector_optional_list.append(
f"if ({name}) tensors_vector.push_back({{ *{name} }});\n" f"if ({name}) tensors_vector.push_back({{ *{name} }});\n"
) )
layout_autotune_optional_list.append( layout_autotune_optional_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n" f"auto new_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
) )
else: else:
if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys( if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
...@@ -1076,16 +1168,16 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1076,16 +1168,16 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
arg_str = f"paddle::experimental::Tensor& {name}" arg_str = f"paddle::experimental::Tensor& {name}"
amp_tensors_vector_list.append(f"{{{name}}}") amp_tensors_vector_list.append(f"{{{name}}}")
amp_autocast_list.append( amp_autocast_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n" f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
) )
else: else:
arg_str = f"const paddle::experimental::Tensor& {name}" arg_str = f"const paddle::experimental::Tensor& {name}"
amp_tensors_vector_list.append(f"{{{name}}}") amp_tensors_vector_list.append(f"{{{name}}}")
amp_autocast_list.append( amp_autocast_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n" f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
) )
layout_autotune_list.append( layout_autotune_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n" f"auto new_{name} = transformer->TransInTensor(\"{name}\", {name});\n"
) )
else: else:
assert IsVectorTensorType(ttype) assert IsVectorTensorType(ttype)
...@@ -1099,10 +1191,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1099,10 +1191,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
f"if ({name}) amp_tensors_vector.push_back( *{name} );\n" f"if ({name}) amp_tensors_vector.push_back( *{name} );\n"
) )
amp_autocast_optional_list.append( amp_autocast_optional_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n" f"auto new_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
) )
layout_autotune_optional_list.append( layout_autotune_optional_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n" f"auto new_{name} = transformer->TransInTensors(\"{name}\", {name});\n"
) )
else: else:
if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys( if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
...@@ -1112,60 +1204,15 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1112,60 +1204,15 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
arg_str = f"const std::vector<paddle::experimental::Tensor>& {name}" arg_str = f"const std::vector<paddle::experimental::Tensor>& {name}"
amp_tensors_vector_list.append(f"{name}") amp_tensors_vector_list.append(f"{name}")
amp_autocast_list.append( amp_autocast_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n" f"auto new_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
) )
layout_autotune_list.append( layout_autotune_list.append(
f"auto NEW_{name} = transformer->TransInTensor(\"{name}\", {name});\n" f"auto new_{name} = transformer->TransInTensors(\"{name}\", {name});\n"
) )
inputs_args_definition_list[pos] = arg_str inputs_args_definition_list[pos] = arg_str
inputs_args_declaration_list[pos] = arg_str inputs_args_declaration_list[pos] = arg_str
# for layout autotune attr
lightly_sensitive_attr = [
'axis', 'axes', 'dim', 'dims', 'start', 'end', 'stop'
]
heavily_sensitive_attr = ['data_format', 'data_layout']
layout_autotune_attr = []
layout_autotune_attr_code_list = []
layout_autotune_attr_type_list = []
layout_autotune_attr_code_list.append(
f"auto op_name = phi::TransToFluidOpName(\"{forward_api_name}\");\n"
)
lightly_flag = False
heavily_flag = False
for name, atype, default_val, pos in forward_attrs_list:
for attr_name in lightly_sensitive_attr:
if name.find(
attr_name) != -1 and name not in layout_autotune_attr:
lightly_flag = True
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
if lightly_flag is False:
for attr_name in heavily_sensitive_attr:
if name.find(attr_name
) != -1 and name not in layout_autotune_attr:
layout_autotune_attr.append(name)
layout_autotune_attr_type_list.append(atype)
heavily_flag = True
if len(layout_autotune_attr) == 0:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector);\n"
)
elif len(layout_autotune_attr) == 1:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}>(op_name, tensors_vector, &{layout_autotune_attr[0]});\n"
)
elif len(layout_autotune_attr) == 2:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune<{layout_autotune_attr_type_list[0]}, {layout_autotune_attr_type_list[1]}>(op_name, tensors_vector, &{layout_autotune_attr[0]}, &{layout_autotune_attr[1]});\n"
)
else:
layout_autotune_attr_code_list.append(
f"auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector, {len(layout_autotune_attr)});\n"
)
# forward attrs # forward attrs
for name, atype, default_val, pos in forward_attrs_list: for name, atype, default_val, pos in forward_attrs_list:
inputs_call_list[pos] = name inputs_call_list[pos] = name
...@@ -1356,33 +1403,12 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1356,33 +1403,12 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_list_str, amp_call_str) amp_autocast_list_str, amp_call_str)
# Forward layout autotune # Forward layout autotune
layout_inputs_call_args_str = amp_inputs_call_args_str layout_autotune_list_str = " ".join(
layout_tmp_result_list = [] layout_autotune_list) + " ".join(layout_autotune_optional_list)
layout_autotune_outs_list = "" layout_logic_str = self.GenerateForwardLayoutAutotune(
if num_outputs == 1: forward_api_name, amp_tensors_vector_list,
layout_autotune_outs_list += f"{indent}auto {returns_str} = api_result;\n" layout_tensors_vector_optional_list, layout_autotune_list_str,
layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{returns_str});\n" returns_type_str, returns_str, amp_inputs_call_args_str)
else:
for name, (rtype, pos) in forward_outputs_position_map.items():
if name in intermediate_outputs:
continue
layout_autotune_outs_list += f"{indent}auto& {name} = std::get<{len(layout_tmp_result_list)}>(api_result);\n"
layout_autotune_outs_list += f"{indent}transformer -> SetOutTensorLayout(&{name});\n"
layout_tmp_result_list.append(f"{name}")
if returns_type_str == "paddle::experimental::Tensor&" or forward_api_name == "slice" or forward_api_name == "strided_slice" or len(
layout_autotune_attr) == 0:
layout_logic_str = ""
else:
# after_call_str = f"return {forward_ad_function_name}({layout_inputs_call_args_str});\n"
after_call_str = f"auto api_result = {forward_ad_function_name}({layout_inputs_call_args_str});\n"
layout_logic_str = LAYOUT_LOGIC_TEMPLATE.format(
amp_tensors_vector_list_str,
" ".join(layout_tensors_vector_optional_list),
" ".join(layout_autotune_attr_code_list) + " " +
" ".join(layout_autotune_list) +
" ".join(layout_autotune_optional_list), after_call_str,
layout_autotune_outs_list, returns_str)
# For inputs outputs prepare for logging # For inputs outputs prepare for logging
var_str = f"\n{indent} std::string input_str = \"\";" var_str = f"\n{indent} std::string input_str = \"\";"
......
...@@ -19,20 +19,65 @@ ...@@ -19,20 +19,65 @@
#include "paddle/fluid/imperative/layout_autotune.h" #include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
namespace egr { namespace egr {
inline bool NeedTransLayout(
// layout_agnostic_ops_ const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
// For agnostic op like add / relu kSlotSmallVectorSize>& tensors_vector,
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( const paddle::experimental::DataLayout& layout) {
for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (layout != tensors_vector[i][idx].layout()) {
return true;
}
}
}
return false;
}
inline std::shared_ptr<EagerLayoutTransformer> BaseTransformer(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) { kSlotSmallVectorSize>& tensors_vector) {
VLOG(3) << " Optimze Layout agnostic op: " << op_name;
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
transposer = bool unstart =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector); (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED);
auto first_layout = tensors_vector[0][0].layout();
VLOG(3) << "Layout autotune was is start ? " << (!unstart) << op_name
<< "'s layout is " << first_layout;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer; return transposer;
} }
// For agnostic op like add, relu, exp
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) {
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
auto first_layout = tensors_vector[0][0].layout();
if (NeedTransLayout(tensors_vector, first_layout)) {
bool need_trans_back = false;
for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (4 != tensors_vector[i][idx].shape().size()) {
need_trans_back = true;
VLOG(3) << "Agnostic op " << op_name << " shape is "
<< tensors_vector[i][idx].shape().size() << " and layout is "
<< tensors_vector[i][idx].layout();
}
}
}
auto final_layout = need_trans_back ? default_layout : desired_layout;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, final_layout);
}
return BaseTransformer(op_name, tensors_vector);
}
// For lightly op like reduce // For lightly op like reduce
template <typename T> template <typename T>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
...@@ -40,16 +85,11 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -40,16 +85,11 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
T* attr) { T* attr) {
VLOG(3) << "Lightly op " << op_name << "'s shape is "
<< tensors_vector[0][0].shape().size() << " and layout is "
<< tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
bool unstart =
(paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED);
if (unstart) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer =
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector);
return transposer;
}
transposer = transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name); std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer; return transposer;
...@@ -63,33 +103,30 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -63,33 +103,30 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
T1* axis, T1* axis,
T2* keep_dim) { T2* keep_dim) {
VLOG(3) << "Lightly op " << op_name << "'s shape is "
<< tensors_vector[0][0].shape().size() << " and layout is "
<< tensors_vector[0][0].layout();
return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis); return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis);
} }
// heavily string data_format data_layout // heavily string data_format, data_layout
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
std::string* attr) { std::string* attr) {
VLOG(3) << " Optimze Layout heavily op: " << op_name; auto first_layout = tensors_vector[0][0].layout();
auto transposer = auto transposer = std::make_shared<EagerLayoutTransformer>(
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector); op_name, tensors_vector, first_layout);
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) { paddle::experimental::DataLayout::UNDEFINED) {
// Layout autotune only supports model with convolutional layers // Layout autotune only supports model with convolutional layers
VLOG(3) << "Optimze Layout was not started" << op_name; VLOG(3) << "Optimze Layout was not started " << op_name;
if (op_name != "conv2d") { if (op_name != "conv2d") {
return transposer; return transposer;
} else { } else {
#if defined(PADDLE_WITH_CUDA)
if (paddle::platform::is_gpu_place(tensors_vector[0][0].place()) &&
!phi::backends::gpu::TensorCoreAvailable()) {
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
return transposer;
}
#endif
auto data_type = tensors_vector[0][0].dtype(); auto data_type = tensors_vector[0][0].dtype();
bool is_tune_fp32 = bool is_tune_fp32 =
(data_type == paddle::experimental::DataType::FLOAT32) && (data_type == paddle::experimental::DataType::FLOAT32) &&
...@@ -97,6 +134,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -97,6 +134,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
bool is_tune_fp16 = bool is_tune_fp16 =
(data_type == paddle::experimental::DataType::FLOAT16) && (data_type == paddle::experimental::DataType::FLOAT16) &&
(*attr == "NCHW"); (*attr == "NCHW");
VLOG(3) << "Conv2d_dy's dtype " << data_type << " format" << (*attr);
if (is_tune_fp32) { if (is_tune_fp32) {
paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout( paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
paddle::experimental::DataLayout::NCHW); paddle::experimental::DataLayout::NCHW);
...@@ -109,26 +147,27 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -109,26 +147,27 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout( paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
paddle::experimental::DataLayout::NCHW); paddle::experimental::DataLayout::NCHW);
} else { } else {
paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune(); egr::Controller::Instance().DisableLayoutAutoTune();
return transposer; return transposer;
} }
VLOG(3) << "Tune the layout from " << attr << " to " VLOG(3)
<< paddle::framework::DataLayoutToString( << "Tune the layout from " << *attr << " to "
paddle::imperative::LayoutAutoTune::Instance() << paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
.GetDesiredLayout());
} }
} }
if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive( if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive(
op_name)) { op_name)) {
VLOG(3)
<< op_name
<< "'s LayoutTransformer is EagerHeavilyLayoutSensitiveOpTransformer";
auto heavily_transposer = auto heavily_transposer =
std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name, std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
attr); attr);
return heavily_transposer; return heavily_transposer;
} }
VLOG(3) << op_name
<< "'s LayoutTransformer is unimplemented. Use default " VLOG(3) << op_name << "'s LayoutTransformer is unimplemented. Use default.";
"LayoutTransformer instead.";
return transposer; return transposer;
} }
...@@ -139,24 +178,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -139,24 +178,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
std::vector<int>* attr) { std::vector<int>* attr) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) { paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << " Optimze Layout Unstarted : " << op_name; VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = transposer = std::make_shared<EagerLayoutTransformer>(
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector); op_name, tensors_vector, first_layout);
return transposer; return transposer;
} }
VLOG(3) << " Optimze Layout lightly op: " << op_name; if (op_name == "transpose2" &&
if (op_name == "transpose2") { (tensors_vector[0][0].layout() ==
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout())) {
auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name); auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name);
if (tensors_vector[0][0].layout() == trans->SetAttr(attr,
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout()) { tensors_vector[0][0].layout() ==
trans->SetAttr(attr, paddle::experimental::DataLayout::NHWC);
tensors_vector[0][0].layout() == return trans;
paddle::experimental::DataLayout::NHWC);
return trans;
}
} }
transposer = transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name); std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
...@@ -172,33 +210,32 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>( ...@@ -172,33 +210,32 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis, paddle::experimental::Scalar* axis,
bool* keep_dim) { bool* keep_dim) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) { paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << " Optimze Layout Unstarted : " << op_name; VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = transposer = std::make_shared<EagerLayoutTransformer>(
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector); op_name, tensors_vector, first_layout);
return transposer; return transposer;
} }
auto desired_layout = auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (op_name == "argmax") { if (op_name == "argmax" &&
(tensors_vector[0][0].layout() == desired_layout) && (*keep_dim)) {
std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr; std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr;
argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name); argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name);
if ((tensors_vector[0][0].layout() == desired_layout) && (*keep_dim)) { argmax_transform->SetAttr(axis,
argmax_transform->SetAttr(axis, tensors_vector[0][0].layout() ==
tensors_vector[0][0].layout() == paddle::experimental::DataLayout::NHWC);
paddle::experimental::DataLayout::NHWC); return argmax_transform;
return argmax_transform;
}
} }
VLOG(3) << " Optimze Layout lightly op: " << op_name;
transposer = transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name); std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer; return transposer;
} }
// lightly int flatten // lightly for flatten
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
const std::string& op_name, const std::string& op_name,
...@@ -206,17 +243,17 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>( ...@@ -206,17 +243,17 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
int* start_axis, int* start_axis,
int* stop_axis) { int* stop_axis) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() == auto desired_layout =
paddle::experimental::DataLayout::UNDEFINED) { paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
VLOG(3) << " Optimze Layout Unstarted : " << op_name; if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
transposer = VLOG(3) << "Optimze Layout was not started" << op_name;
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector); transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer; return transposer;
} }
bool no_tranpose = bool no_tranpose = tensors_vector[0][0].layout() == desired_layout;
tensors_vector[0][0].layout() ==
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3); bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3);
if (op_name == "flatten" || op_name == "flatten_contiguous_range") { if (op_name == "flatten" || op_name == "flatten_contiguous_range") {
if (no_tranpose && is_valid) { if (no_tranpose && is_valid) {
...@@ -226,15 +263,13 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>( ...@@ -226,15 +263,13 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
} }
} }
VLOG(3) << " Optimze Layout lightly op: " << op_name;
transposer = transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name); std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer; return transposer;
} }
// lightly int Concat // lightly int Concat
// lightly T can be int vector<int> vector<int64_t> IntArray template <>
template <> // default int
inline std::shared_ptr<EagerLayoutTransformer> inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar>( EagerLayoutAutotune<paddle::experimental::Scalar>(
const std::string& op_name, const std::string& op_name,
...@@ -243,30 +278,21 @@ EagerLayoutAutotune<paddle::experimental::Scalar>( ...@@ -243,30 +278,21 @@ EagerLayoutAutotune<paddle::experimental::Scalar>(
paddle::experimental::Scalar* axis) { paddle::experimental::Scalar* axis) {
auto desired_layout = auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr; std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) { if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << " Optimze Layout Unstarted : " << op_name; VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = transposer = std::make_shared<EagerLayoutTransformer>(
std::make_shared<EagerLayoutTransformer>(op_name, tensors_vector); op_name, tensors_vector, first_layout);
return transposer; return transposer;
} }
bool need_transpose = false; if (NeedTransLayout(tensors_vector, desired_layout)) {
for (size_t i = 0; i < tensors_vector.size(); i++) { VLOG(3) << op_name << " need transpose to default layout";
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (desired_layout != tensors_vector[i][idx].layout()) {
need_transpose = true;
}
}
}
if (need_transpose) {
VLOG(3) << "Concat need transpose to NCHW " << op_name;
transposer = transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name); std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer; return transposer;
} else { } else {
VLOG(3) << " Optimze Layout lightly op: " << op_name;
auto trans = std::make_shared<EagerConcatOpTransformer>(op_name); auto trans = std::make_shared<EagerConcatOpTransformer>(op_name);
trans->SetAttr(axis, desired_layout); trans->SetAttr(axis, desired_layout);
return trans; return trans;
......
...@@ -22,9 +22,9 @@ namespace egr { ...@@ -22,9 +22,9 @@ namespace egr {
inline paddle::experimental::Tensor EagerTraceTransposeOp( inline paddle::experimental::Tensor EagerTraceTransposeOp(
const paddle::experimental::DataLayout layout, const paddle::experimental::DataLayout layout,
const paddle::experimental::Tensor& in) { const paddle::experimental::Tensor& in) {
VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout
<< ", tensor's shape is " << in.shape().size();
if (in.shape().size() != 4) { if (in.shape().size() != 4) {
VLOG(4) << "Shape is " << in.shape().size() << " can't transpose to"
<< paddle::framework::DataLayoutToString(layout);
return in; return in;
} }
std::vector<int> axis; std::vector<int> axis;
...@@ -44,77 +44,75 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp( ...@@ -44,77 +44,75 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp(
// agnostic op // agnostic op
class EagerLayoutTransformer { class EagerLayoutTransformer {
using Layout = paddle::experimental::DataLayout;
public: public:
EagerLayoutTransformer() : op_name_("") {} EagerLayoutTransformer() : op_name_(""), final_layout_(Layout::UNDEFINED) {}
explicit EagerLayoutTransformer(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector)
: op_name_(op_name) {
final_layout_ = "UNDEFINED";
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (final_layout_ == "UNDEFINED") {
final_layout_ = paddle::framework::DataLayoutToString(
tensors_vector[0][0].layout());
} else if (tensors_vector[i][idx].layout() == desired_layout) {
final_layout_ = paddle::framework::DataLayoutToString(desired_layout);
break;
}
}
}
VLOG(4) << op_name_ << "final_layout_ is " << final_layout_;
}
EagerLayoutTransformer(const EagerLayoutTransformer&) = delete; EagerLayoutTransformer(const EagerLayoutTransformer&) = delete;
EagerLayoutTransformer& operator=(const EagerLayoutTransformer&) = delete; EagerLayoutTransformer& operator=(const EagerLayoutTransformer&) = delete;
explicit EagerLayoutTransformer(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
const Layout final_layout = Layout::UNDEFINED)
: op_name_(op_name), final_layout_(final_layout) {
VLOG(4) << "Agnostic op : " << op_name_ << " final_layout_ is "
<< final_layout_;
}
virtual ~EagerLayoutTransformer() {} virtual ~EagerLayoutTransformer() {}
virtual paddle::optional<paddle::experimental::Tensor> TransInTensor( virtual paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const std::string& in_name, const paddle::experimental::Tensor& in) {
const paddle::optional<paddle::experimental::Tensor>& in) { if (final_layout_ == Layout::UNDEFINED || final_layout_ == in.layout()) {
VLOG(4) << op_name_ << "is is agnostic, final_layout_ is " << final_layout_; VLOG(4) << "EagerLayoutTransformer with no trans";
return in; return in;
} else { // from NCHW to NHWC
VLOG(4) << "EagerLayoutTransformer with trans from " << in.layout()
<< " to " << final_layout_;
auto out_tensor = EagerTraceTransposeOp(final_layout_, in);
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor.impl().get()))
->layout = final_layout_;
return out_tensor;
}
} }
virtual paddle::optional<std::vector<paddle::experimental::Tensor>> virtual paddle::optional<paddle::experimental::Tensor> TransInTensor(
TransInTensor(
const std::string& in_name, const std::string& in_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) { const paddle::optional<paddle::experimental::Tensor>& in) {
return in; return in ? TransInTensor(in_name, *in) : in;
} }
virtual std::vector<paddle::experimental::Tensor> TransInTensor( virtual std::vector<paddle::experimental::Tensor> TransInTensors(
const std::string& in_name, const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) { const std::vector<paddle::experimental::Tensor>& in) {
VLOG(4) << " TransInTensor";
return in; return in;
} }
virtual paddle::experimental::Tensor TransInTensor( virtual paddle::optional<std::vector<paddle::experimental::Tensor>>
const std::string& in_name, const paddle::experimental::Tensor& in) { TransInTensors(
const std::string& in_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) {
VLOG(4) << " TransInTensor";
if (in) {
return TransInTensors(in_name, *in);
}
return in; return in;
} }
virtual void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { virtual void SetOutTensorLayout(
bool use_default = (final_layout_ == "Undefined(AnyLayout)" || paddle::optional<paddle::experimental::Tensor>* out_tensor) {
final_layout_ == ("UNDEFINED")); VLOG(4) << "optional out_tensor";
auto layout = paddle::framework::StringToDataLayout(final_layout_);
if (!use_default) {
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = layout;
}
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default;
} }
virtual void SetOutTensorLayout( virtual void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor>* out_tensor) { std::vector<paddle::experimental::Tensor>* out_tensor) {
bool use_default = (final_layout_ == "Undefined(AnyLayout)" || bool use_default = (final_layout_ == Layout::UNDEFINED);
final_layout_ == ("UNDEFINED"));
if (!use_default) { if (!use_default) {
for (size_t i = 0; i < out_tensor->size(); i++) { for (size_t i = 0; i < out_tensor->size(); i++) {
phi::DenseTensorUtils::GetMutableMeta( phi::DenseTensorUtils::GetMutableMeta(
...@@ -126,9 +124,24 @@ class EagerLayoutTransformer { ...@@ -126,9 +124,24 @@ class EagerLayoutTransformer {
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default; VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default;
} }
virtual void SetOutTensorLayout(
paddle::optional<std::vector<paddle::experimental::Tensor>>* out_tensor) {
VLOG(4) << "optional out_tensor";
}
virtual void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
bool use_default = final_layout_ == Layout::UNDEFINED;
if (!use_default) {
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = final_layout_;
}
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default;
}
protected: protected:
std::string op_name_; std::string op_name_;
std::string final_layout_; const Layout final_layout_;
}; };
class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
...@@ -145,21 +158,6 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -145,21 +158,6 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
} }
} }
virtual paddle::optional<std::vector<paddle::experimental::Tensor>>
TransInTensor(
const std::string& in_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) {
VLOG(4) << op_name_ << "is is heavily";
return in;
}
virtual paddle::optional<paddle::experimental::Tensor> TransInTensor(
const std::string& in_name,
const paddle::optional<paddle::experimental::Tensor>& in) {
VLOG(4) << op_name_ << "is is heavily";
return in;
}
paddle::experimental::Tensor TransInTensor( paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) { const std::string& in_name, const paddle::experimental::Tensor& in) {
if (heavily_input_.count(in_name) != 0 && in.layout() != desired_layout_) { if (heavily_input_.count(in_name) != 0 && in.layout() != desired_layout_) {
...@@ -230,7 +228,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -230,7 +228,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
paddle::framework::DataLayoutToString(in.layout()); paddle::framework::DataLayoutToString(in.layout());
auto default_layout = auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (final_layout_ == input_layout && in.shape().size() == 4) { if (final_layout_ == input_layout && in.shape().size() == 4) {
VLOG(4) << op_name_ << "'s " << in_name << " need transpose from " VLOG(4) << op_name_ << "'s " << in_name << " need transpose from "
<< input_layout << " to default_layout"; << input_layout << " to default_layout";
...@@ -245,7 +242,7 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -245,7 +242,7 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
return in; return in;
} }
virtual std::vector<paddle::experimental::Tensor> TransInTensor( virtual std::vector<paddle::experimental::Tensor> TransInTensors(
const std::string& in_name, const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) { const std::vector<paddle::experimental::Tensor>& in) {
std::vector<paddle::experimental::Tensor> result; std::vector<paddle::experimental::Tensor> result;
...@@ -340,22 +337,19 @@ class EagerTransposeOpTransformer ...@@ -340,22 +337,19 @@ class EagerTransposeOpTransformer
paddle::experimental::Tensor TransInTensor( paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) { const std::string& in_name, const paddle::experimental::Tensor& in) {
VLOG(4) << "with no transpose: EagerTransposeOpTransformer " << in_name
<< "'s layout is "
<< paddle::framework::DataLayoutToString(in.layout());
return in; return in;
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
auto desired_layout = auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (out_tensor->layout() != desired_layout) { if (out_tensor->layout() != default_layout) {
VLOG(4) << " Set Out_tensor's layout from " VLOG(4) << " Set Out_tensor's layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout()) << paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_; << " to " << default_layout;
phi::DenseTensorUtils::GetMutableMeta( phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get())) static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = desired_layout; ->layout = default_layout;
} }
} }
...@@ -385,15 +379,15 @@ class EagerArgmaxOpTransformer ...@@ -385,15 +379,15 @@ class EagerArgmaxOpTransformer
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
VLOG(4) << "EagerArgmaxOpTransformer's out layout is" VLOG(4) << "EagerArgmaxOpTransformer's out layout is"
<< paddle::framework::DataLayoutToString(out_tensor->layout()); << paddle::framework::DataLayoutToString(out_tensor->layout());
auto desired_layout = auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (desired_layout != out_tensor->layout()) { if (default_layout != out_tensor->layout()) {
VLOG(4) << "Change layout from " VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout()) << paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_; << " to " << default_layout;
phi::DenseTensorUtils::GetMutableMeta( phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get())) static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = desired_layout; ->layout = default_layout;
} }
} }
...@@ -410,11 +404,11 @@ class EagerFlattenOpTransformer ...@@ -410,11 +404,11 @@ class EagerFlattenOpTransformer
explicit EagerFlattenOpTransformer(const std::string& op_name) explicit EagerFlattenOpTransformer(const std::string& op_name)
: op_name_(op_name) { : op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name; VLOG(3) << "Optimze Layout lightly " << op_name;
auto desired_layout = auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
std::string desired_layout_str = std::string default_layout_str =
paddle::framework::DataLayoutToString(desired_layout); paddle::framework::DataLayoutToString(default_layout);
final_layout_ = desired_layout_str; final_layout_ = default_layout_str;
} }
// transpose from NHWC to NCHW // transpose from NHWC to NCHW
...@@ -424,16 +418,17 @@ class EagerFlattenOpTransformer ...@@ -424,16 +418,17 @@ class EagerFlattenOpTransformer
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
VLOG(4) << "EagerArgmaxOpTransformer's out layout is" VLOG(4) << "EagerFlattenOpTransformer's out layout is"
<< paddle::framework::DataLayoutToString(out_tensor->layout()); << paddle::framework::DataLayoutToString(out_tensor->layout());
auto layout = paddle::framework::StringToDataLayout(final_layout_); auto desired_layout =
if (layout != out_tensor->layout()) { paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (desired_layout != out_tensor->layout()) {
VLOG(4) << "Change layout from " VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout()) << paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_; << " to " << desired_layout;
phi::DenseTensorUtils::GetMutableMeta( phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get())) static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = layout; ->layout = desired_layout;
} }
} }
...@@ -450,11 +445,11 @@ class EagerConcatOpTransformer ...@@ -450,11 +445,11 @@ class EagerConcatOpTransformer
explicit EagerConcatOpTransformer(const std::string& op_name) explicit EagerConcatOpTransformer(const std::string& op_name)
: op_name_(op_name) { : op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name; VLOG(3) << "Optimze Layout lightly " << op_name;
auto desired_layout = auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout(); paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
std::string desired_layout_str = std::string default_layout_str =
paddle::framework::DataLayoutToString(desired_layout); paddle::framework::DataLayoutToString(default_layout);
final_layout_ = desired_layout_str; final_layout_ = default_layout_str;
} }
void SetAttr(paddle::experimental::Scalar* axis, void SetAttr(paddle::experimental::Scalar* axis,
...@@ -467,7 +462,7 @@ class EagerConcatOpTransformer ...@@ -467,7 +462,7 @@ class EagerConcatOpTransformer
(*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]); (*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]);
} }
virtual std::vector<paddle::experimental::Tensor> TransInTensor( virtual std::vector<paddle::experimental::Tensor> TransInTensors(
const std::string& in_name, const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) { const std::vector<paddle::experimental::Tensor>& in) {
return in; return in;
......
...@@ -14,23 +14,15 @@ ...@@ -14,23 +14,15 @@
#include "paddle/fluid/imperative/layout_autotune.h" #include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/imperative/layout_transformer.h" #include "paddle/fluid/imperative/layout_transformer.h"
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h" #include "paddle/phi/core/errors.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
bool LayoutAutoTune::UseLayoutAutoTune() const {
#if defined(PADDLE_WITH_CUDA)
return use_layout_autotune_;
#else
return false;
#endif
}
LayoutAutoTune::LayoutAutoTune() { LayoutAutoTune::LayoutAutoTune() {
const auto& op_info = paddle::framework::OpInfoMap::Instance().map(); const auto& op_info = paddle::framework::OpInfoMap::Instance().map();
for (auto it = op_info.begin(); it != op_info.end(); it++) { for (auto it = op_info.begin(); it != op_info.end(); it++) {
...@@ -140,6 +132,26 @@ paddle::imperative::NameVarMap<VarType> DealLightlyLayoutSensitive( ...@@ -140,6 +132,26 @@ paddle::imperative::NameVarMap<VarType> DealLightlyLayoutSensitive(
return transposer->Apply(ins, outs, attrs, tracer); return transposer->Apply(ins, outs, attrs, tracer);
} }
LayoutAutotuneGuard::LayoutAutotuneGuard(std::shared_ptr<Tracer> tracer,
bool use_autotune)
: tracer_(tracer) {
pre_layout_autotune_ = tracer_->UseLayoutAutoTune();
if (pre_layout_autotune_ != use_autotune) {
tracer_->EnableLayoutAutoTune();
if (!use_autotune) {
tracer_->DisableLayoutAutoTune();
}
}
}
LayoutAutotuneGuard::~LayoutAutotuneGuard() {
if (pre_layout_autotune_) {
tracer_->EnableLayoutAutoTune();
} else {
tracer_->DisableLayoutAutoTune();
}
}
template <typename VarType> template <typename VarType>
paddle::imperative::NameVarMap<VarType> AutoTuneLayout( paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
const std::string& op_type, const std::string& op_type,
...@@ -147,7 +159,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout( ...@@ -147,7 +159,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
const paddle::imperative::NameVarMap<VarType>& outs, const paddle::imperative::NameVarMap<VarType>& outs,
paddle::framework::AttributeMap* attrs, paddle::framework::AttributeMap* attrs,
const std::shared_ptr<imperative::Tracer>& tracer) { const std::shared_ptr<imperative::Tracer>& tracer) {
if (!LayoutAutoTune::Instance().UseLayoutAutoTune()) { if (!tracer->UseLayoutAutoTune()) {
return ins; return ins;
} }
// When layout autotuning is enabled, the tuner will check the desired layout. // When layout autotuning is enabled, the tuner will check the desired layout.
...@@ -165,7 +177,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout( ...@@ -165,7 +177,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
} else { } else {
#if defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_CUDA)
if (!phi::backends::gpu::TensorCoreAvailable()) { if (!phi::backends::gpu::TensorCoreAvailable()) {
LayoutAutoTune::Instance().DisableLayoutAutoTune(); tracer->DisableLayoutAutoTune();
return ins; return ins;
} }
#endif #endif
...@@ -185,7 +197,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout( ...@@ -185,7 +197,7 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
} else if (is_tune_fp16) { } else if (is_tune_fp16) {
LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC); LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC);
} else { } else {
LayoutAutoTune::Instance().DisableLayoutAutoTune(); tracer->DisableLayoutAutoTune();
return ins; return ins;
} }
VLOG(3) << "Tune the layout from " VLOG(3) << "Tune the layout from "
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
#include <unordered_set> #include <unordered_set>
#include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/common/layout.h" #include "paddle/phi/common/layout.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
...@@ -35,12 +35,6 @@ class LayoutAutoTune { ...@@ -35,12 +35,6 @@ class LayoutAutoTune {
return layout_autoTune; return layout_autoTune;
} }
bool UseLayoutAutoTune() const;
void EnableLayoutAutoTune() { use_layout_autotune_ = true; }
void DisableLayoutAutoTune() { use_layout_autotune_ = false; }
bool IsHeavilyLayoutSensitive(const std::string& op_type) const { bool IsHeavilyLayoutSensitive(const std::string& op_type) const {
return heavily_layout_sensitive_ops_.count(op_type) != 0; return heavily_layout_sensitive_ops_.count(op_type) != 0;
} }
...@@ -64,8 +58,6 @@ class LayoutAutoTune { ...@@ -64,8 +58,6 @@ class LayoutAutoTune {
private: private:
LayoutAutoTune(); LayoutAutoTune();
bool use_layout_autotune_{false};
std::unordered_set<std::string> layout_agnostic_ops_{}; std::unordered_set<std::string> layout_agnostic_ops_{};
std::unordered_set<std::string> heavily_layout_sensitive_ops_{"batch_norm"}; std::unordered_set<std::string> heavily_layout_sensitive_ops_{"batch_norm"};
...@@ -73,11 +65,29 @@ class LayoutAutoTune { ...@@ -73,11 +65,29 @@ class LayoutAutoTune {
std::unordered_set<std::string> lightly_layout_sensitive_ops_{ std::unordered_set<std::string> lightly_layout_sensitive_ops_{
"instance_norm", "softmax", "transpose", "transpose2", "reshape2"}; "instance_norm", "softmax", "transpose", "transpose2", "reshape2"};
// Best Layout in this platform
DataLayout desired_layout_{DataLayout::UNDEFINED}; DataLayout desired_layout_{DataLayout::UNDEFINED};
// Default Layout in this model
DataLayout default_layout_{DataLayout::UNDEFINED}; DataLayout default_layout_{DataLayout::UNDEFINED};
}; };
// LayoutAutotuneGuard is used for RAII.
class LayoutAutotuneGuard {
public:
LayoutAutotuneGuard(std::shared_ptr<Tracer> tracer, bool use_autotune);
~LayoutAutotuneGuard();
// forbid copy and operator=
LayoutAutotuneGuard(const LayoutAutotuneGuard& guard) = delete;
LayoutAutotuneGuard& operator=(const LayoutAutotuneGuard& guard) = delete;
private:
std::shared_ptr<Tracer> tracer_;
bool pre_layout_autotune_;
};
template <typename VarType> template <typename VarType>
paddle::imperative::NameVarMap<VarType> AutoTuneLayout( paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
const std::string& op_type, const std::string& op_type,
......
...@@ -19,8 +19,24 @@ ...@@ -19,8 +19,24 @@
#include "paddle/fluid/imperative/var_helper.h" #include "paddle/fluid/imperative/var_helper.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h" #include "paddle/phi/core/errors.h"
#include "paddle/phi/core/tensor_utils.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
template <typename VarType>
void SetOutDataLayout(std::shared_ptr<VarType> var,
const paddle::experimental::DataLayout layout) {
if (var != nullptr) {
paddle::imperative::SetDataLayout(var, layout);
// set out_tensor's layout
if (var->MutableVar()->IsInitialized()) {
paddle::framework::Variable* tmp_var = var->MutableVar();
auto* out = tmp_var->GetMutable<framework::LoDTensor>();
phi::DenseTensorUtils::GetMutableMeta(
static_cast<framework::LoDTensor*>(out))
->layout = layout;
}
}
}
template <typename VarType> template <typename VarType>
std::shared_ptr<VarType> TraceTransposeOp( std::shared_ptr<VarType> TraceTransposeOp(
...@@ -118,7 +134,7 @@ class LayoutTransformer { ...@@ -118,7 +134,7 @@ class LayoutTransformer {
auto out_vars = outs.at(name); auto out_vars = outs.at(name);
for (auto& var : out_vars) { for (auto& var : out_vars) {
if (var != nullptr) { if (var != nullptr) {
paddle::imperative::SetDataLayout(var, layout); paddle::imperative::SetOutDataLayout(var, layout);
} }
} }
not_in_out = false; not_in_out = false;
...@@ -130,7 +146,7 @@ class LayoutTransformer { ...@@ -130,7 +146,7 @@ class LayoutTransformer {
for (auto& pair : outs) { for (auto& pair : outs) {
for (auto& var : pair.second) { for (auto& var : pair.second) {
if (var != nullptr) { if (var != nullptr) {
paddle::imperative::SetDataLayout(var, layout); paddle::imperative::SetOutDataLayout(var, layout);
} }
} }
} }
......
...@@ -42,6 +42,8 @@ thread_local bool Tracer::enable_program_desc_tracing_ = false; ...@@ -42,6 +42,8 @@ thread_local bool Tracer::enable_program_desc_tracing_ = false;
thread_local bool Tracer::has_grad_ = true; thread_local bool Tracer::has_grad_ = true;
thread_local bool Tracer::use_layout_autotune_ = false;
thread_local AmpLevel Tracer::amp_level_ = AmpLevel::O0; thread_local AmpLevel Tracer::amp_level_ = AmpLevel::O0;
thread_local phi::DataType Tracer::amp_dtype_ = phi::DataType::FLOAT32; thread_local phi::DataType Tracer::amp_dtype_ = phi::DataType::FLOAT32;
......
...@@ -28,9 +28,9 @@ ...@@ -28,9 +28,9 @@
#include "paddle/fluid/imperative/basic_engine.h" #include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/jit/program_desc_tracer.h" #include "paddle/fluid/imperative/jit/program_desc_tracer.h"
#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
#include "paddle/phi/core/compat/arg_map_context.h" #include "paddle/phi/core/compat/arg_map_context.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
...@@ -184,6 +184,20 @@ class Tracer { ...@@ -184,6 +184,20 @@ class Tracer {
} }
} }
void DisableLayoutAutoTune() { use_layout_autotune_ = false; }
void EnableLayoutAutoTune() { use_layout_autotune_ = true; }
bool UseLayoutAutoTune() {
#if defined(PADDLE_WITH_CUDA)
if (phi::backends::gpu::TensorCoreAvailable()) {
return use_layout_autotune_;
}
#endif
use_layout_autotune_ = false;
return false;
}
phi::KernelSignature GetExpectedKernelSignature( phi::KernelSignature GetExpectedKernelSignature(
const std::string& type, const std::string& type,
const NameTensorMap& ins, const NameTensorMap& ins,
...@@ -199,8 +213,8 @@ class Tracer { ...@@ -199,8 +213,8 @@ class Tracer {
std::unique_ptr<UniqueNameGenerator> generator_; std::unique_ptr<UniqueNameGenerator> generator_;
platform::Place expected_place_; platform::Place expected_place_;
GarbageCollectorMap gcs_; GarbageCollectorMap gcs_;
static thread_local bool enable_program_desc_tracing_; static thread_local bool enable_program_desc_tracing_;
static thread_local bool use_layout_autotune_;
static thread_local bool has_grad_; static thread_local bool has_grad_;
static thread_local AmpLevel amp_level_; static thread_local AmpLevel amp_level_;
static thread_local phi::DataType amp_dtype_; static thread_local phi::DataType amp_dtype_;
......
...@@ -2503,19 +2503,14 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -2503,19 +2503,14 @@ All parameter, weight, gradient are variables in Paddle.
return res; return res;
}); });
m.def("enable_layout_autotune", [] { m.def("enable_layout_autotune",
return paddle::imperative::LayoutAutoTune::Instance() [] { return egr::Controller::Instance().EnableLayoutAutoTune(); });
.EnableLayoutAutoTune();
});
m.def("disable_layout_autotune", [] { m.def("disable_layout_autotune",
return paddle::imperative::LayoutAutoTune::Instance() [] { return egr::Controller::Instance().DisableLayoutAutoTune(); });
.DisableLayoutAutoTune();
});
m.def("use_layout_autotune", [] { m.def("use_layout_autotune",
return paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune(); [] { return egr::Controller::Instance().UseLayoutAutoTune(); });
});
BindFleetWrapper(&m); BindFleetWrapper(&m);
BindIO(&m); BindIO(&m);
......
...@@ -52,9 +52,9 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input, ...@@ -52,9 +52,9 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input,
return ret; return ret;
} }
inline bool NeedTransformLayout(const paddle::platform::Place& place, inline bool NeedTransformLayout(const DataLayout& input,
const DataLayout& input,
const DataLayout& target, const DataLayout& target,
const paddle::platform::Place& place,
const TransformFlag& transform_flag) { const TransformFlag& transform_flag) {
bool ret = transform_flag.need_trans_layout() && bool ret = transform_flag.need_trans_layout() &&
(input != DataLayout::ALL_LAYOUT && (input != DataLayout::ALL_LAYOUT &&
...@@ -202,9 +202,9 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor, ...@@ -202,9 +202,9 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor,
bool trans_layout = false; bool trans_layout = false;
bool trans_dtype = false; bool trans_dtype = false;
if (NeedTransformLayout(tensor->place(), if (NeedTransformLayout(tensor->layout(),
tensor->layout(),
target_args_def.layout, target_args_def.layout,
tensor->place(),
transform_flag)) { transform_flag)) {
out = TransDataLayout(out, target_args_def.layout); out = TransDataLayout(out, target_args_def.layout);
trans_layout = true; trans_layout = true;
...@@ -240,9 +240,9 @@ std::shared_ptr<phi::DenseTensor> PrepareData( ...@@ -240,9 +240,9 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
dense_tensor.place(), target_args_def.backend, transform_flag) && dense_tensor.place(), target_args_def.backend, transform_flag) &&
!NeedTransformDataType( !NeedTransformDataType(
dense_tensor.dtype(), target_args_def.dtype, transform_flag) && dense_tensor.dtype(), target_args_def.dtype, transform_flag) &&
!NeedTransformLayout(dense_tensor.place(), !NeedTransformLayout(dense_tensor.layout(),
dense_tensor.layout(),
target_args_def.layout, target_args_def.layout,
dense_tensor.place(),
transform_flag))) { transform_flag))) {
return std::static_pointer_cast<phi::DenseTensor>(tensor_in); return std::static_pointer_cast<phi::DenseTensor>(tensor_in);
} }
...@@ -277,9 +277,9 @@ std::unique_ptr<std::vector<phi::DenseTensor>> PrepareData( ...@@ -277,9 +277,9 @@ std::unique_ptr<std::vector<phi::DenseTensor>> PrepareData(
tensor_in->place(), target_args_def.backend, transform_flag) && tensor_in->place(), target_args_def.backend, transform_flag) &&
!NeedTransformDataType( !NeedTransformDataType(
tensor_in->dtype(), target_args_def.dtype, transform_flag) && tensor_in->dtype(), target_args_def.dtype, transform_flag) &&
!NeedTransformLayout(tensor_in->place(), !NeedTransformLayout(tensor_in->layout(),
tensor_in->layout(),
target_args_def.layout, target_args_def.layout,
tensor_in->place(),
transform_flag))) { transform_flag))) {
pt_tensors->emplace_back( pt_tensors->emplace_back(
*std::dynamic_pointer_cast<phi::DenseTensor>(tensor_in)); *std::dynamic_pointer_cast<phi::DenseTensor>(tensor_in));
......
...@@ -46,6 +46,13 @@ class SimpleNet(paddle.nn.Layer): ...@@ -46,6 +46,13 @@ class SimpleNet(paddle.nn.Layer):
class LayoutAutoTune(unittest.TestCase): class LayoutAutoTune(unittest.TestCase):
def test_config(self):
paddle.fluid.core.enable_layout_autotune()
if self.use_autoune():
self.assertEqual(paddle.fluid.core.use_layout_autotune(), True)
paddle.fluid.core.disable_layout_autotune()
self.assertEqual(paddle.fluid.core.use_layout_autotune(), False)
def setUp(self): def setUp(self):
self.use_autoune() self.use_autoune()
......
...@@ -130,15 +130,13 @@ def _conv_nd(x, ...@@ -130,15 +130,13 @@ def _conv_nd(x,
if bias is not None: if bias is not None:
channel_dim = channel_dim + len( channel_dim = channel_dim + len(
x.shape) if channel_dim < 0 else channel_dim x.shape) if channel_dim < 0 else channel_dim
if pre_bias.layout == "NHWC":
channel_dim = 3 # last dim
if isinstance(x, tuple): if isinstance(x, tuple):
x = x[0] x = x[0]
if isinstance(bias, tuple): if isinstance(bias, tuple):
bias = bias[0] bias = bias[0]
if len(bias.shape) < len(x.shape): if len(bias.shape) < len(x.shape):
tmp_bias = _C_ops.reshape( tmp_bias = _C_ops.reshape(
bias, bias.shape + bias, [1 for i in range(channel_dim)] + bias.shape +
[1 for i in range(len(x.shape) - channel_dim - 1)]) [1 for i in range(len(x.shape) - channel_dim - 1)])
return _C_ops.add(pre_bias, tmp_bias) return _C_ops.add(pre_bias, tmp_bias)
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册