未验证 提交 3da3462f 编写于 作者: N niuliling123 提交者: GitHub

Update layout autotune for module with no modified (#46541)

上级 20eb6e00
......@@ -1093,7 +1093,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
tensors_vector_list_str = "{ " + ",".join(
amp_tensors_vector_list) + " }"
if len(amp_tensors_vector_list) == 0:
if len(amp_tensors_vector_list) == 0: # or forward_api_name == "shape":
layout_logic_str = ""
else:
after_call_str = f"{returns_type_str} {result_name} = {forward_function_name}({layout_inputs_call_args_str});\n"
......
......@@ -32,70 +32,50 @@ inline bool NeedTransLayout(
}
return false;
}
inline std::shared_ptr<EagerLayoutTransformer> BaseTransformer(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
bool unstart =
(paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED);
auto first_layout = tensors_vector[0][0].layout();
VLOG(3) << "Layout autotune was is start ? " << (!unstart) << op_name
<< "'s layout is " << first_layout;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
}
// For agnostic op like add, relu, exp
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) {
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
// For agnostic op like add, relu, exp
auto first_layout = tensors_vector[0][0].layout();
if (NeedTransLayout(tensors_vector, first_layout)) {
auto desired_layout = DesiredLayout();
bool is_started =
!(desired_layout == paddle::experimental::DataLayout::UNDEFINED);
if (is_started && NeedTransLayout(tensors_vector, first_layout)) {
bool need_trans_back = false;
for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (4 != tensors_vector[i][idx].shape().size()) {
need_trans_back = true;
VLOG(3) << "Agnostic op " << op_name << " shape is "
<< tensors_vector[i][idx].shape().size() << " and layout is "
<< tensors_vector[i][idx].layout();
}
}
}
auto final_layout = need_trans_back ? default_layout : desired_layout;
auto final_layout = need_trans_back ? DefaultLayout() : desired_layout;
VLOG(4) << op_name << "'s has different layout, need trans to "
<< final_layout;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, final_layout);
}
return BaseTransformer(op_name, tensors_vector);
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
}
// For lightly op like reduce
template <typename T>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
T* attr) {
VLOG(3) << "Lightly op " << op_name << "'s shape is "
<< tensors_vector[0][0].shape().size() << " and layout is "
<< tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
// For lightly op like reduce
if (!(DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED)) {
VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, tensors_vector[0][0].layout());
}
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}
// For lightly op like argmax
template <typename T1, typename T2>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
......@@ -103,28 +83,23 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
kSlotSmallVectorSize>& tensors_vector,
T1* axis,
T2* keep_dim) {
VLOG(3) << "Lightly op " << op_name << "'s shape is "
<< tensors_vector[0][0].shape().size() << " and layout is "
<< tensors_vector[0][0].layout();
// For lightly op like argmax
return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis);
}
// heavily string data_format, data_layout
template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
std::string* attr) {
auto first_layout = tensors_vector[0][0].layout();
// Heavily op with (string) data_format, data_layout
auto transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
op_name, tensors_vector, tensors_vector[0][0].layout());
if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
// Layout autotune only supports model with convolutional layers
VLOG(3) << "Optimze Layout was not started " << op_name;
if (op_name != "conv2d") {
VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return transposer;
} else {
auto data_type = tensors_vector[0][0].dtype();
......@@ -134,7 +109,8 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
bool is_tune_fp16 =
(data_type == paddle::experimental::DataType::FLOAT16) &&
(*attr == "NCHW");
VLOG(3) << "Conv2d_dy's dtype " << data_type << " format" << (*attr);
VLOG(4) << "LayoutAutoTune assert with dtype and layout, Current op : "
<< op_name;
if (is_tune_fp32) {
paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
paddle::experimental::DataLayout::NCHW);
......@@ -147,58 +123,45 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
paddle::experimental::DataLayout::NCHW);
} else {
VLOG(4) << "DisableLayoutAutoTune accoding to Conv op"
<< " dtype : " << data_type << " format : " << (*attr);
egr::Controller::Instance().DisableLayoutAutoTune();
return transposer;
}
VLOG(3)
<< "Tune the layout from " << *attr << " to "
<< paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
VLOG(4) << "LayoutAutoTune from " << *attr << " to " << DesiredLayout();
}
}
if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive(
op_name)) {
VLOG(3)
<< op_name
<< "'s LayoutTransformer is EagerHeavilyLayoutSensitiveOpTransformer";
auto heavily_transposer =
std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
return std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
attr);
return heavily_transposer;
}
VLOG(3) << op_name << "'s LayoutTransformer is unimplemented. Use default.";
return transposer;
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}
// lightly transpose
template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
std::vector<int>* attr) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
// lightly transpose
if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, tensors_vector[0][0].layout());
}
if (op_name == "transpose2" &&
(tensors_vector[0][0].layout() ==
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout())) {
(tensors_vector[0][0].layout() == DesiredLayout())) {
auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name);
trans->SetAttr(attr,
tensors_vector[0][0].layout() ==
paddle::experimental::DataLayout::NHWC);
return trans;
}
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}
// lightly int argmax
......@@ -210,19 +173,14 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis,
bool* keep_dim) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, tensors_vector[0][0].layout());
}
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (op_name == "argmax" &&
(tensors_vector[0][0].layout() == desired_layout) && (*keep_dim)) {
(tensors_vector[0][0].layout() == DesiredLayout()) && (*keep_dim)) {
std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr;
argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name);
argmax_transform->SetAttr(axis,
......@@ -230,12 +188,9 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
paddle::experimental::DataLayout::NHWC);
return argmax_transform;
}
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}
// lightly for flatten
template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
const std::string& op_name,
......@@ -243,32 +198,22 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
kSlotSmallVectorSize>& tensors_vector,
int* start_axis,
int* stop_axis) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(4) << "Optimze Layout was not started" << op_name;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, tensors_vector[0][0].layout());
}
bool no_tranpose = tensors_vector[0][0].layout() == desired_layout;
bool no_tranpose = tensors_vector[0][0].layout() == DesiredLayout();
bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3);
if (op_name == "flatten" || op_name == "flatten_contiguous_range") {
if (no_tranpose && is_valid) {
std::shared_ptr<EagerFlattenOpTransformer> flatten_transform = nullptr;
flatten_transform = std::make_shared<EagerFlattenOpTransformer>(op_name);
return flatten_transform;
return std::make_shared<EagerFlattenOpTransformer>(op_name);
}
}
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}
// lightly int Concat
template <>
inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar>(
......@@ -276,27 +221,26 @@ EagerLayoutAutotune<paddle::experimental::Scalar>(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis) {
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(4) << "Optimze Layout was not started" << op_name;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, tensors_vector[0][0].layout());
}
auto desired_layout = DesiredLayout();
if (NeedTransLayout(tensors_vector, desired_layout)) {
VLOG(3) << op_name << " need transpose to default layout";
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} else {
VLOG(4) << op_name << "'s has different layout";
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}
if (op_name == "Concat") {
if (desired_layout == tensors_vector[0][0].layout() &&
tensors_vector[0][0].shape().size() == 4) {
auto trans = std::make_shared<EagerConcatOpTransformer>(op_name);
trans->SetAttr(axis, desired_layout);
return trans;
}
}
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}
} // namespace egr
......@@ -23,7 +23,7 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp(
const paddle::experimental::DataLayout layout,
const paddle::experimental::Tensor& in) {
VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout
<< ", tensor's shape is " << in.shape().size();
<< ", tensor's dim size is " << in.shape().size();
if (in.shape().size() != 4) {
return in;
}
......@@ -36,12 +36,72 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp(
axis = {0, 1, 2, 3};
}
auto out_tensor = transpose_ad_func(in, axis);
VLOG(4) << "AutoTune Transpose from "
<< paddle::framework::DataLayoutToString(in.layout()) << " to "
<< paddle::framework::DataLayoutToString(layout);
VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout;
return out_tensor;
}
inline paddle::experimental::DataLayout DesiredLayout() {
return paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
}
inline paddle::experimental::DataLayout DefaultLayout() {
return paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
}
inline void UpdateLayout(paddle::experimental::Tensor* out_tensor,
const paddle::experimental::DataLayout layout) {
if (out_tensor->layout() != layout) {
VLOG(4) << "Update out_tensor's layout from " << out_tensor->layout()
<< " to " << layout;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = layout;
}
}
inline void DealWithShapeOp(paddle::experimental::Tensor* out_tensor,
const paddle::experimental::DataLayout layout,
int dim_size) {
auto des_layout = DesiredLayout();
auto def_layout = DefaultLayout();
int32_t* value =
static_cast<phi::DenseTensor*>(out_tensor->impl().get())->data<int32_t>();
bool change_dim =
(des_layout != def_layout && layout == des_layout && dim_size == 4);
VLOG(6) << "'Shape OP', layout autotune: True"
<< " desired_layout: " << des_layout
<< " default_layout: " << def_layout
<< " tensor layout: " << out_tensor->layout()
<< " tensor's shape size is : " << dim_size;
// It's means input tensor has been autotune and tensor's layout is
// desired_layout
std::vector<int32_t> dims;
dims.resize(dim_size);
for (int i = 0; i < dim_size; i++) {
dims[i] = value[i];
}
auto des_str = paddle::framework::DataLayoutToString(des_layout);
if (change_dim && des_str == "NCHW") {
// NCHW -> NHWC
VLOG(6) << "layout autotune get Shape from NCHW -> NHWC " << value[0] << " "
<< value[1] << " " << value[2] << " " << value[3] << " to "
<< dims[0] << " " << dims[2] << " " << dims[3] << " " << dims[1];
value[0] = dims[0];
value[1] = dims[2];
value[2] = dims[3];
value[3] = dims[1];
} else if (change_dim && des_str == "NHWC") {
// NHWC -> NCHW
VLOG(6) << "layout autotune get Shape from NHWC -> NCHW " << value[0] << " "
<< value[1] << " " << value[2] << " " << value[3] << " to "
<< dims[0] << " " << dims[3] << " " << dims[1] << " " << dims[2];
value[0] = dims[0];
value[1] = dims[3];
value[2] = dims[1];
value[3] = dims[2];
}
}
// agnostic op
class EagerLayoutTransformer {
using Layout = paddle::experimental::DataLayout;
......@@ -58,27 +118,27 @@ class EagerLayoutTransformer {
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
const Layout final_layout = Layout::UNDEFINED)
: op_name_(op_name), final_layout_(final_layout) {
VLOG(4) << "Agnostic op : " << op_name_ << " final_layout_ is "
<< final_layout_;
: op_name_(op_name), final_layout_(final_layout), dim_size_(1) {
VLOG(4) << "Agnostic op : " << op_name_ << "'s layout is " << final_layout_;
}
virtual ~EagerLayoutTransformer() {}
virtual paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) {
if (final_layout_ == Layout::UNDEFINED || final_layout_ == in.layout()) {
VLOG(4) << "EagerLayoutTransformer with no trans";
return in;
} else { // from NCHW to NHWC
VLOG(4) << "EagerLayoutTransformer with trans from " << in.layout()
<< " to " << final_layout_;
// update in shape size
dim_size_ = in.shape().size();
bool need_trans =
!(final_layout_ == Layout::UNDEFINED || final_layout_ == in.layout());
// This is for Agnostic op when layout is differnet
if (need_trans) {
auto out_tensor = EagerTraceTransposeOp(final_layout_, in);
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor.impl().get()))
->layout = final_layout_;
return out_tensor;
}
return in;
}
virtual paddle::optional<paddle::experimental::Tensor> TransInTensor(
......@@ -90,7 +150,6 @@ class EagerLayoutTransformer {
virtual std::vector<paddle::experimental::Tensor> TransInTensors(
const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) {
VLOG(4) << " TransInTensor";
return in;
}
......@@ -98,72 +157,59 @@ class EagerLayoutTransformer {
TransInTensors(
const std::string& in_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) {
VLOG(4) << " TransInTensor";
if (in) {
return TransInTensors(in_name, *in);
}
return in;
}
virtual void SetOutTensorLayout(
paddle::optional<paddle::experimental::Tensor>* out_tensor) {
VLOG(4) << "optional out_tensor";
return (in ? TransInTensors(in_name, *in) : in);
}
virtual void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor>* out_tensor) {
bool use_default = (final_layout_ == Layout::UNDEFINED);
if (!use_default) {
bool update_layout = !(final_layout_ == Layout::UNDEFINED);
if (update_layout) {
for (size_t i = 0; i < out_tensor->size(); i++) {
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get()))
->layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
->layout = DesiredLayout();
}
}
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default;
}
virtual void SetOutTensorLayout(
paddle::optional<paddle::experimental::Tensor>* out_tensor) {
VLOG(4) << "AutoTune out tensor is optional";
}
virtual void SetOutTensorLayout(
paddle::optional<std::vector<paddle::experimental::Tensor>>* out_tensor) {
VLOG(4) << "optional out_tensor";
VLOG(4) << "AutoTune out tensor is optional";
}
virtual void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
bool use_default = final_layout_ == Layout::UNDEFINED;
if (!use_default) {
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = final_layout_;
if (op_name_ == "shape") {
return DealWithShapeOp(out_tensor, final_layout_, dim_size_);
}
bool need_update = !(final_layout_ == Layout::UNDEFINED);
if (need_update) {
UpdateLayout(out_tensor, final_layout_);
}
VLOG(4) << op_name_ << "is is agnostic, use_default " << use_default;
}
protected:
std::string op_name_;
const Layout final_layout_;
int dim_size_;
};
class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
public:
explicit EagerHeavilyLayoutSensitiveOpTransformer(const std::string& op_name,
std::string* layout)
: op_name_(op_name),
desired_layout_(
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout()) {
VLOG(3) << "Optimze Layout heavily op: " << op_name;
final_layout_ = paddle::framework::DataLayoutToString(desired_layout_);
if ((*layout) != final_layout_) {
*layout = final_layout_;
}
: op_name_(op_name), desired_layout_(DesiredLayout()) {
VLOG(4) << "Heavily op: " << op_name;
*layout = paddle::framework::DataLayoutToString(DesiredLayout());
}
paddle::experimental::Tensor TransInTensor(
const std::string& in_name, const paddle::experimental::Tensor& in) {
if (heavily_input_.count(in_name) != 0 && in.layout() != desired_layout_) {
VLOG(4) << op_name_ << "'s " << in_name << " need transpose from "
<< paddle::framework::DataLayoutToString(in.layout()) << " to "
<< final_layout_;
auto out_tensor = EagerTraceTransposeOp(desired_layout_, in);
return out_tensor;
}
......@@ -171,14 +217,7 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
if (out_tensor->layout() != desired_layout_) {
VLOG(4) << " Set Out_tensor's layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = desired_layout_;
}
UpdateLayout(out_tensor, desired_layout_);
}
void SetOutTensorLayout(
......@@ -192,10 +231,8 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
std::vector<paddle::experimental::Tensor>* out_tensor) {
for (size_t i = 0; i < out_tensor->size(); i++) {
if ((*out_tensor)[i].layout() != desired_layout_) {
VLOG(4) << " Set Out_tensor's layout from "
<< paddle::framework::DataLayoutToString(
(*out_tensor)[i].layout())
<< " to " << final_layout_;
VLOG(4) << "Update out_tensor's layout from "
<< (*out_tensor)[i].layout() << " to " << desired_layout_;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get()))
->layout = desired_layout_;
......@@ -205,7 +242,6 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
protected:
std::string op_name_;
std::string final_layout_;
const paddle::experimental::DataLayout desired_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
......@@ -213,11 +249,10 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
public:
EagerLightlyLayoutSensitiveOpTransformer() {}
explicit EagerLightlyLayoutSensitiveOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
explicit EagerLightlyLayoutSensitiveOpTransformer(
const std::string& op_name) {
VLOG(4) << "Lightly op : " << op_name;
auto desired_layout = DesiredLayout();
final_layout_ = paddle::framework::DataLayoutToString(desired_layout);
}
......@@ -226,11 +261,8 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
const std::string& in_name, const paddle::experimental::Tensor& in) {
std::string input_layout =
paddle::framework::DataLayoutToString(in.layout());
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
auto default_layout = DefaultLayout();
if (final_layout_ == input_layout && in.shape().size() == 4) {
VLOG(4) << op_name_ << "'s " << in_name << " need transpose from "
<< input_layout << " to default_layout";
auto out_tensor = EagerTraceTransposeOp(
paddle::experimental::DataLayout::UNDEFINED, in);
phi::DenseTensorUtils::GetMutableMeta(
......@@ -238,7 +270,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
->layout = default_layout;
return out_tensor;
}
VLOG(4) << in_name << "'s layout is " << input_layout;
return in;
}
......@@ -246,15 +277,11 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
const std::string& in_name,
const std::vector<paddle::experimental::Tensor>& in) {
std::vector<paddle::experimental::Tensor> result;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
auto desired_layout = DesiredLayout();
auto default_layout = DefaultLayout();
for (size_t i = 0; i < in.size(); i++) {
auto in_tensor = in[i];
if (in_tensor.layout() == desired_layout) {
VLOG(4) << op_name_ << "'s " << in_name << " need transpose from "
<< final_layout_ << " to default_layout";
auto out_tensor = EagerTraceTransposeOp(
paddle::experimental::DataLayout::UNDEFINED, in_tensor);
phi::DenseTensorUtils::GetMutableMeta(
......@@ -269,33 +296,20 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
auto out_layout = out_tensor->layout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (out_layout != default_layout) {
VLOG(4) << op_name_ << "'s out need transpose to default_layout";
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = default_layout;
}
UpdateLayout(out_tensor, DefaultLayout());
}
void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor*>* out_tensor) {
for (size_t i = 0; i < out_tensor->size(); i++) {
VLOG(4) << "out layout is"
<< paddle::framework::DataLayoutToString(
(*out_tensor)[i]->layout());
SetOutTensorLayout((*out_tensor)[i]);
}
}
void SetOutTensorLayout(
std::vector<paddle::experimental::Tensor>* out_tensor) {
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
auto default_layout = DefaultLayout();
for (size_t i = 0; i < out_tensor->size(); i++) {
VLOG(4) << " out_tensor layout trans to default ";
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>((*out_tensor)[i].impl().get()))
->layout = default_layout;
......@@ -303,7 +317,6 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
}
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
......@@ -312,18 +325,11 @@ class EagerTransposeOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer {
public:
EagerTransposeOpTransformer() {}
explicit EagerTransposeOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout TransposeOpTransformer " << op_name;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
std::string desired_layout_str =
paddle::framework::DataLayoutToString(desired_layout);
final_layout_ = desired_layout_str;
explicit EagerTransposeOpTransformer(const std::string& op_name) {
VLOG(4) << "AutoTuneTransformer op: " << op_name;
}
void SetAttr(std::vector<int>* axis, bool is_nhwc) {
// input's layout is nhwc and input's layout === desired_layout
std::vector<int> perm_nchw = {0, 2, 3, 1};
std::vector<int> perm_nhwc = {0, 3, 1, 2};
auto perm = is_nhwc ? perm_nhwc : perm_nchw;
......@@ -331,8 +337,6 @@ class EagerTransposeOpTransformer
(*axis)[1] = perm[(*axis)[1]];
(*axis)[2] = perm[(*axis)[2]];
(*axis)[3] = perm[(*axis)[3]];
VLOG(4) << " EagerTransposeOpTransformer " << op_name_
<< "'s layout is equal to desire: " << is_nhwc;
}
paddle::experimental::Tensor TransInTensor(
......@@ -341,31 +345,16 @@ class EagerTransposeOpTransformer
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (out_tensor->layout() != default_layout) {
VLOG(4) << " Set Out_tensor's layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << default_layout;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = default_layout;
}
UpdateLayout(out_tensor, DefaultLayout());
}
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
class EagerArgmaxOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer {
public:
EagerArgmaxOpTransformer() {}
explicit EagerArgmaxOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name;
explicit EagerArgmaxOpTransformer(const std::string& op_name) {
VLOG(4) << "AutoTuneTransformer op: " << op_name;
}
void SetAttr(paddle::experimental::Scalar* axis, bool is_nhwc) {
......@@ -377,38 +366,16 @@ class EagerArgmaxOpTransformer
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
VLOG(4) << "EagerArgmaxOpTransformer's out layout is"
<< paddle::framework::DataLayoutToString(out_tensor->layout());
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
if (default_layout != out_tensor->layout()) {
VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << default_layout;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = default_layout;
UpdateLayout(out_tensor, DesiredLayout());
}
}
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
class EagerFlattenOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer {
public:
EagerFlattenOpTransformer() {}
explicit EagerFlattenOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name;
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
std::string default_layout_str =
paddle::framework::DataLayoutToString(default_layout);
final_layout_ = default_layout_str;
explicit EagerFlattenOpTransformer(const std::string& op_name) {
VLOG(4) << "AutoTuneTransformer op: " << op_name;
}
// transpose from NHWC to NCHW
......@@ -418,38 +385,16 @@ class EagerFlattenOpTransformer
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
VLOG(4) << "EagerFlattenOpTransformer's out layout is"
<< paddle::framework::DataLayoutToString(out_tensor->layout());
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (desired_layout != out_tensor->layout()) {
VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << desired_layout;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = desired_layout;
}
UpdateLayout(out_tensor, DefaultLayout());
}
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
class EagerConcatOpTransformer
: public EagerLightlyLayoutSensitiveOpTransformer {
public:
EagerConcatOpTransformer() {}
explicit EagerConcatOpTransformer(const std::string& op_name)
: op_name_(op_name) {
VLOG(3) << "Optimze Layout lightly " << op_name;
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
std::string default_layout_str =
paddle::framework::DataLayoutToString(default_layout);
final_layout_ = default_layout_str;
explicit EagerConcatOpTransformer(const std::string& op_name) {
VLOG(4) << "AutoTuneTransformer op : " << op_name;
}
void SetAttr(paddle::experimental::Scalar* axis,
......@@ -457,6 +402,7 @@ class EagerConcatOpTransformer
std::vector<int> perm_nhwc = {0, 3, 1, 2};
std::vector<int> perm_nchw = {0, 2, 3, 1};
int axes = axis->to<int>();
axes = axes < 0 ? axes + 4 : axes;
auto perm =
(paddle::framework::DataLayout::NHWC == layout) ? perm_nhwc : perm_nchw;
(*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]);
......@@ -469,20 +415,7 @@ class EagerConcatOpTransformer
}
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) {
auto layout = paddle::framework::StringToDataLayout(final_layout_);
if (layout != out_tensor->layout()) {
VLOG(4) << "Change layout from "
<< paddle::framework::DataLayoutToString(out_tensor->layout())
<< " to " << final_layout_;
phi::DenseTensorUtils::GetMutableMeta(
static_cast<phi::DenseTensor*>(out_tensor->impl().get()))
->layout = layout;
UpdateLayout(out_tensor, DesiredLayout());
}
}
protected:
std::string op_name_;
std::string final_layout_;
std::unordered_set<std::string> heavily_input_{"x", "y", "input"};
};
} // namespace egr
......@@ -194,8 +194,10 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
(conv_in_type == framework::proto::VarType::FP16);
if (is_tune_fp32) {
LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NCHW);
LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NHWC);
} else if (is_tune_fp16) {
LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC);
LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NCHW);
} else {
tracer->DisableLayoutAutoTune();
return ins;
......
......@@ -184,6 +184,42 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) {
}
}
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
bool change_dim =
(desired_layout != default_layout &&
self->tensor.layout() == desired_layout && value.size() == 4);
VLOG(6) << "eager_properties 'Shape' method, layout autotune "
<< " desired_layout: " << desired_layout
<< " default_layout: " << default_layout
<< " tensor layout: " << self->tensor.layout()
<< " tensor's shape size is : " << value.size();
std::vector<int64_t> dims = value;
if (change_dim &&
paddle::framework::DataLayoutToString(desired_layout) == "NCHW") {
// NCHW -> NHWC
VLOG(6) << "layout autotune get Shape from NCHW -> NHWC " << value[0] << " "
<< value[1] << " " << value[2] << " " << value[3] << " to "
<< dims[0] << " " << dims[2] << " " << dims[3] << " " << dims[1];
value[0] = dims[0];
value[1] = dims[2];
value[2] = dims[3];
value[3] = dims[1];
} else if (change_dim &&
paddle::framework::DataLayoutToString(desired_layout) == "NHWC") {
// NHWC -> NCHW
VLOG(6) << "layout autotune get Shape from NHWC -> NCHW " << value[0] << " "
<< value[1] << " " << value[2] << " " << value[3] << " to "
<< dims[0] << " " << dims[3] << " " << dims[1] << " " << dims[2]
<< " " << dims[1];
value[0] = dims[0];
value[1] = dims[3];
value[2] = dims[1];
value[3] = dims[2];
}
return ToPyObject(value);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
......
......@@ -2044,8 +2044,49 @@ void BindImperative(py::module *m_ptr) {
"shape",
[](imperative::VarBase &self) {
if (self.Var().IsType<framework::LoDTensor>()) {
return phi::vectorize<int>(
auto value = phi::vectorize<int>(
self.Var().Get<framework::LoDTensor>().dims());
auto tensor = self.Var().Get<framework::LoDTensor>();
auto tmp_value = value;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance()
.GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance()
.GetDefaultLayout();
bool change_dim =
(desired_layout != default_layout &&
tensor.layout() == desired_layout && value.size() == 4);
VLOG(6) << "'Shape' method, layout autotune,"
<< " desired_layout: " << desired_layout
<< " default_layout: " << default_layout
<< " tensor layout: " << tensor.layout()
<< " tensor's shape size is : " << value.size();
if (change_dim && paddle::framework::DataLayoutToString(
desired_layout) == "NCHW") {
VLOG(6) << "layout autotune get Shape from NHWC -> NCHW "
<< value[0] << " " << value[1] << " " << value[2] << " "
<< value[3] << " to " << tmp_value[3] << " "
<< tmp_value[1] << " " << tmp_value[2] << " "
<< tmp_value[1];
// NCHW -> NHWC
value[1] = tmp_value[2];
value[2] = tmp_value[3];
value[3] = tmp_value[1];
} else if (change_dim && paddle::framework::DataLayoutToString(
desired_layout) == "NHWC") {
VLOG(6) << "layout autotune get Shape from NHWC -> NCHW "
<< value[0] << " " << value[1] << " " << value[2] << " "
<< value[3] << " to " << tmp_value[0] << " "
<< tmp_value[3] << " " << tmp_value[1] << " "
<< tmp_value[2];
// NHWC -> NCHW
value[1] = tmp_value[3];
value[2] = tmp_value[1];
value[3] = tmp_value[2];
}
return value;
} else if (self.Var().IsType<phi::SelectedRows>()) {
return phi::vectorize<int>(
self.Var().Get<phi::SelectedRows>().value().dims());
......
......@@ -205,7 +205,8 @@ phi::DenseTensor TransformData(phi::DenseTensor* tensor,
if (NeedTransformLayout(tensor->layout(),
target_args_def.layout,
tensor->place(),
transform_flag)) {
transform_flag) &&
tensor->dims().size() != 1) {
out = TransDataLayout(out, target_args_def.layout);
trans_layout = true;
}
......
......@@ -93,15 +93,6 @@ class LayoutAutoTune(unittest.TestCase):
return conv_out, predict
def test_enable_autotune(self):
if self.use_autoune():
conv_out, predict = self.train(data_format="NCHW")
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 14, 8])
self.assertEqual(predict.shape, [1, 2])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 14])
self.assertEqual(predict.shape, [1, 2])
else:
conv_out, predict = self.train(data_format="NCHW")
self.assertEqual(conv_out.shape, [1, 8, 14, 14])
self.assertEqual(predict.shape, [1, 2])
......@@ -124,10 +115,6 @@ class LayoutAutoTune(unittest.TestCase):
scaled.backward()
scaler.minimize(optimizer, scaled)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1, 12, 8, 14])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1, 12, 8, 14])
......@@ -143,10 +130,6 @@ class LayoutAutoTune(unittest.TestCase):
# because it flatten the C and H dimensions.
out = flatten(conv_out)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1, 112, 12])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1, 112, 12])
......@@ -157,42 +140,9 @@ class LayoutAutoTune(unittest.TestCase):
conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.argmax(conv_out, axis=1, keepdim=True)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1, 14, 12, 1])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1, 1, 14, 12])
def test_argmax_op_transposer_ff(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3))
data = paddle.rand([1, 3, 16, 14])
with paddle.amp.auto_cast(level="O2"):
conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.argmax(conv_out)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1])
def test_argmax_op_transposer_t(self):
conv = paddle.nn.Conv2D(3, 8, (3, 3))
data = paddle.rand([1, 3, 16, 14])
with paddle.amp.auto_cast(level="O2"):
conv_out = conv(data)
# conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.argmax(conv_out)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [1])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [1])
def test_concat_op_transposer(self):
in1 = paddle.rand([1, 8, 14, 12])
conv = paddle.nn.Conv2D(3, 8, (3, 3))
......@@ -202,10 +152,6 @@ class LayoutAutoTune(unittest.TestCase):
# conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.concat(x=[conv_out, in1], axis=0)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [2, 8, 14, 12])
else:
self.assertEqual(conv_out.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [2, 8, 14, 12])
......@@ -219,10 +165,6 @@ class LayoutAutoTune(unittest.TestCase):
# conv_out.shape = [1, 14, 12, 8] with NHWC
out = paddle.concat(x=[conv_out1, conv_out2], axis=0)
if paddle.fluid.core.use_layout_autotune():
self.assertEqual(conv_out1.shape, [1, 14, 12, 8])
self.assertEqual(out.shape, [2, 14, 12, 8])
else:
self.assertEqual(conv_out1.shape, [1, 8, 14, 12])
self.assertEqual(out.shape, [2, 8, 14, 12])
......
......@@ -152,8 +152,8 @@ def _conv_nd(x,
channel_dim = channel_dim + len(
x.shape) if channel_dim < 0 else channel_dim
tmp_bias = _C_ops.reshape(
bias,
bias.shape + [1 for i in range(len(x.shape) - channel_dim - 1)])
bias, [1 for i in range(channel_dim)] + bias.shape +
[1 for i in range(len(x.shape) - channel_dim - 1)])
return _C_ops.add(pre_bias, tmp_bias)
else:
return pre_bias
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册