layout_autotune.cc 9.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/imperative/layout_autotune.h"
16

17
#include "paddle/fluid/eager/api/utils/global_utils.h"
18 19 20 21 22 23 24 25 26 27 28
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/imperative/layout_transformer.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
namespace paddle {
namespace imperative {

LayoutAutoTune::LayoutAutoTune() {
  const auto& op_info = paddle::framework::OpInfoMap::Instance().map();
  for (auto it = op_info.begin(); it != op_info.end(); it++) {
29 30 31 32
    // only when op was not in Lightly、Heavily or Agnostic Set
    if (IsLightlyLayoutSensitive(it->first) ||
        IsHeavilyLayoutSensitive(it->first) || IsLayoutAgnostic(it->first)) {
      VLOG(4) << "Already exists in Layout OP: " << it->first;
33 34 35
      continue;
    }

36 37
    // only record forwrd operators
    if (it->first.find("_grad") != std::string::npos) {
38 39 40 41
      continue;
    }

    auto* attr_checker = it->second.Checker();
42
    bool layout_agnostic = true;
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
    if (attr_checker) {
      auto attrs = attr_checker->GetDefaultAttrMap();
      // Attribute name is fuzzy matched, such as start and start_axis.
      for (auto& attr : attrs) {
        auto attr_name = attr.first;
        VLOG(6) << "OP: " << it->first << " Attr Name: " << attr_name;
        if (attr_name.find("axis") != std::string::npos ||
            attr_name.find("axes") != std::string::npos ||
            attr_name.find("dim") != std::string::npos ||
            attr_name.find("start") != std::string::npos ||
            attr_name.find("end") != std::string::npos) {
          VLOG(4) << "Lightly layout sensitive OP: " << it->first;
          layout_agnostic = false;
          lightly_layout_sensitive_ops_.emplace(it->first);
          break;
        }
      }

61 62 63 64 65 66 67
      if ((attrs.find("data_format") != attrs.end() ||
           attrs.find("data_layout") != attrs.end()) &&
          layout_agnostic == true) {
        VLOG(4) << "Heavily layout sensitive OP: " << it->first;
        heavily_layout_sensitive_ops_.emplace(it->first);
        layout_agnostic = false;
        continue;
68 69
      }
    }
70 71 72 73 74 75 76 77 78 79 80 81

    // some normalization operators such as instance_norm and layer_norm
    // do not have data_format attr, but are layout sensitive.
    if (it->first.find("norm") != std::string::npos && layout_agnostic) {
      lightly_layout_sensitive_ops_.emplace(it->first);
      continue;
    }

    if (layout_agnostic) {
      VLOG(4) << "Layout agnostic_ops: " << it->first;
      layout_agnostic_ops_.emplace(it->first);
    }
82 83 84 85 86 87 88 89 90
  }

  VLOG(3) << "The number of layout agnostic OPs: "
          << layout_agnostic_ops_.size() << ", heavily layout sensitive OPs: "
          << heavily_layout_sensitive_ops_.size()
          << ", lightly layout sensitive OPs: "
          << lightly_layout_sensitive_ops_.size();
}

91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
template <typename VarType>
paddle::imperative::NameVarMap<VarType> DealHeavilyLayoutSensitive(
    const std::string& op_type,
    const paddle::imperative::NameVarMap<VarType>& ins,
    const paddle::imperative::NameVarMap<VarType>& outs,
    paddle::framework::AttributeMap* attrs,
    const std::shared_ptr<imperative::Tracer>& tracer) {
  std::shared_ptr<LayoutTransformer<VarType>> transposer = nullptr;
  transposer =
      std::make_shared<HeavilyLayoutSensitiveOpTransformer<VarType>>(op_type);
  transposer->SetArguments(
      {"Input", "X"}, {"Output", "Out", "Y"}, {"data_format", "data_layout"});

  return transposer->Apply(ins, outs, attrs, tracer);
}

template <typename VarType>
paddle::imperative::NameVarMap<VarType> DealLightlyLayoutSensitive(
    const std::string& op_type,
    const paddle::imperative::NameVarMap<VarType>& ins,
    const paddle::imperative::NameVarMap<VarType>& outs,
    paddle::framework::AttributeMap* attrs,
    const std::shared_ptr<imperative::Tracer>& tracer) {
  std::shared_ptr<LayoutTransformer<VarType>> transposer = nullptr;
  if (op_type == "transpose2") {
    transposer = std::make_shared<TransposeOpTransformer<VarType>>(op_type);
  } else if (op_type == "flatten_contiguous_range") {
    transposer = std::make_shared<FlattenOpTransformer<VarType>>(op_type);
  } else if (op_type == "arg_max") {
    transposer = std::make_shared<ArgmaxOpTransformer<VarType>>(op_type);
121 122
  } else if (op_type == "concat") {
    transposer = std::make_shared<ConcatOpTransformer<VarType>>(op_type);
123 124 125 126 127 128 129 130 131 132 133 134
  } else if (op_type.find("elementwise_") != std::string::npos) {
    transposer = std::make_shared<ElementwiseOpTransformer<VarType>>(op_type);
  } else {
    VLOG(4) << op_type
            << "'s LayoutTransformer is unimplemented. Use default "
               "LightlyLayoutTransformer instead.";
    transposer =
        std::make_shared<LightlyLayoutSensitiveOpTransformer<VarType>>(op_type);
  }
  return transposer->Apply(ins, outs, attrs, tracer);
}

135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
LayoutAutotuneGuard::LayoutAutotuneGuard(std::shared_ptr<Tracer> tracer,
                                         bool use_autotune)
    : tracer_(tracer) {
  pre_layout_autotune_ = tracer_->UseLayoutAutoTune();
  if (pre_layout_autotune_ != use_autotune) {
    tracer_->EnableLayoutAutoTune();
    if (!use_autotune) {
      tracer_->DisableLayoutAutoTune();
    }
  }
}

LayoutAutotuneGuard::~LayoutAutotuneGuard() {
  if (pre_layout_autotune_) {
    tracer_->EnableLayoutAutoTune();
  } else {
    tracer_->DisableLayoutAutoTune();
  }
}

155 156 157 158 159 160 161
template <typename VarType>
paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
    const std::string& op_type,
    const paddle::imperative::NameVarMap<VarType>& ins,
    const paddle::imperative::NameVarMap<VarType>& outs,
    paddle::framework::AttributeMap* attrs,
    const std::shared_ptr<imperative::Tracer>& tracer) {
162
  if (!tracer->UseLayoutAutoTune()) {
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
    return ins;
  }
  // When layout autotuning is enabled, the tuner will check the desired layout.
  // (1) If the desired layout is undefined, and there is no convolutional
  // layers, layout optimization is unnecessary. Otherwise, the desired layout
  // will be set to the best layout only when these is a convolutional layer
  // with
  // NCHW-Layout and the TensorCore is available.
  // (2) If the desired layout is defined, run the transposer.

  if (LayoutAutoTune::Instance().GetDesiredLayout() == DataLayout::UNDEFINED) {
    // Layout autotune only supports model with convolutional layers
    if (op_type != "conv2d") {
      return ins;
    } else {
178 179
#if defined(PADDLE_WITH_CUDA)
      if (!phi::backends::gpu::TensorCoreAvailable()) {
180
        tracer->DisableLayoutAutoTune();
181 182 183
        return ins;
      }
#endif
184 185 186 187 188 189
      auto conv_in_type = framework::proto::VarType::FP32;
      auto& in_vars = ins.at("Input")[0];
      if (GetDataType<VarType>(in_vars) == framework::proto::VarType::FP16) {
        conv_in_type = framework::proto::VarType::FP16;
      }
      bool is_tune_fp32 =
R
Ruibiao Chen 已提交
190
          (PADDLE_GET_CONST(std::string, (*attrs)["data_format"]) == "NHWC") &&
191 192
          (conv_in_type == framework::proto::VarType::FP32);
      bool is_tune_fp16 =
R
Ruibiao Chen 已提交
193
          (PADDLE_GET_CONST(std::string, (*attrs)["data_format"]) == "NCHW") &&
194 195 196
          (conv_in_type == framework::proto::VarType::FP16);
      if (is_tune_fp32) {
        LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NCHW);
197
        LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NHWC);
198
      } else if (is_tune_fp16) {
199
        LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC);
200
        LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NCHW);
201
      } else {
202
        tracer->DisableLayoutAutoTune();
203 204
        return ins;
      }
205
      VLOG(3) << "Tune the layout from "
R
Ruibiao Chen 已提交
206 207
              << PADDLE_GET_CONST(std::string, (*attrs)["data_format"])
              << " to "
208 209
              << paddle::framework::DataLayoutToString(
                     LayoutAutoTune::Instance().GetDesiredLayout());
210 211 212
    }
  }

213 214 215
  if (LayoutAutoTune::Instance().IsHeavilyLayoutSensitive(op_type)) {
    return DealHeavilyLayoutSensitive<VarType>(
        op_type, ins, outs, attrs, tracer);
216
  } else if (LayoutAutoTune::Instance().IsLightlyLayoutSensitive(op_type)) {
217 218
    return DealLightlyLayoutSensitive<VarType>(
        op_type, ins, outs, attrs, tracer);
219
  } else {
220 221 222 223
    std::shared_ptr<LayoutTransformer<VarType>> transposer = nullptr;
    if (LayoutAutoTune::Instance().IsLayoutAgnostic(op_type)) {
      transposer = std::make_shared<LayoutTransformer<VarType>>(op_type);
    }
224
    PADDLE_ENFORCE_NOT_NULL(
225 226 227 228
        transposer,
        phi::errors::Unimplemented("%s 's LayoutTransformer is unimplemented.",
                                   op_type));
    return transposer->Apply(ins, outs, attrs, tracer);
229 230
  }
}
231

232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
template paddle::imperative::NameVarMap<VarBase> AutoTuneLayout<VarBase>(
    const std::string& op_type,
    const paddle::imperative::NameVarMap<VarBase>& ins,
    const paddle::imperative::NameVarMap<VarBase>& outs,
    paddle::framework::AttributeMap* attrs,
    const std::shared_ptr<imperative::Tracer>& tracer);
template paddle::imperative::NameVarMap<egr::EagerVariable>
AutoTuneLayout<egr::EagerVariable>(
    const std::string& op_type,
    const paddle::imperative::NameVarMap<egr::EagerVariable>& ins,
    const paddle::imperative::NameVarMap<egr::EagerVariable>& outs,
    paddle::framework::AttributeMap* attrs,
    const std::shared_ptr<imperative::Tracer>& tracer);

}  // namespace imperative
}  // namespace paddle