eager_layout_auto_tune.h 10.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/eager_layout_transformer.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
namespace egr {
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
inline bool NeedTransLayout(
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    const paddle::experimental::DataLayout& layout) {
  for (size_t i = 0; i < tensors_vector.size(); i++) {
    for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
      if (layout != tensors_vector[i][idx].layout()) {
        return true;
      }
    }
  }
  return false;
}

inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector) {
40
  // For agnostic op like add, relu, exp
41
  auto first_layout = tensors_vector[0][0].layout();
42 43 44 45
  auto desired_layout = DesiredLayout();
  bool is_started =
      !(desired_layout == paddle::experimental::DataLayout::UNDEFINED);
  if (is_started && NeedTransLayout(tensors_vector, first_layout)) {
46 47 48 49 50 51 52 53
    bool need_trans_back = false;
    for (size_t i = 0; i < tensors_vector.size(); i++) {
      for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
        if (4 != tensors_vector[i][idx].shape().size()) {
          need_trans_back = true;
        }
      }
    }
54 55 56
    auto final_layout = need_trans_back ? DefaultLayout() : desired_layout;
    VLOG(4) << op_name << "'s has different layout, need trans to "
            << final_layout;
57 58 59
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, final_layout);
  }
60 61
  return std::make_shared<EagerLayoutTransformer>(
      op_name, tensors_vector, first_layout);
62 63
}

64 65 66 67 68 69
template <typename T>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    T* attr) {
70 71 72 73 74 75 76
  // For lightly op like reduce
  if (!(DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED)) {
    VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, tensors_vector[0][0].layout());
  }
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
77 78 79 80 81 82 83 84 85
}

template <typename T1, typename T2>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    T1* axis,
    T2* keep_dim) {
86
  // For lightly op like argmax
87 88 89 90 91 92 93 94 95
  return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis);
}

template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    std::string* attr) {
96
  // Heavily op with (string) data_format, data_layout
97
  auto transposer = std::make_shared<EagerLayoutTransformer>(
98 99
      op_name, tensors_vector, tensors_vector[0][0].layout());
  if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
100 101
    // Layout autotune only supports model with convolutional layers
    if (op_name != "conv2d") {
102
      VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
103 104 105 106 107 108 109 110 111
      return transposer;
    } else {
      auto data_type = tensors_vector[0][0].dtype();
      bool is_tune_fp32 =
          (data_type == paddle::experimental::DataType::FLOAT32) &&
          (*attr == "NHWC");
      bool is_tune_fp16 =
          (data_type == paddle::experimental::DataType::FLOAT16) &&
          (*attr == "NCHW");
112 113
      VLOG(4) << "LayoutAutoTune assert with dtype and layout, Current op : "
              << op_name;
114 115 116 117 118 119 120 121 122 123 124 125
      if (is_tune_fp32) {
        paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
            paddle::experimental::DataLayout::NCHW);

        paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
            paddle::experimental::DataLayout::NHWC);
      } else if (is_tune_fp16) {
        paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
            paddle::experimental::DataLayout::NHWC);
        paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
            paddle::experimental::DataLayout::NCHW);
      } else {
126 127
        VLOG(4) << "DisableLayoutAutoTune accoding to Conv op"
                << " dtype : " << data_type << " format : " << (*attr);
128
        egr::Controller::Instance().DisableLayoutAutoTune();
129 130
        return transposer;
      }
131
      VLOG(4) << "LayoutAutoTune from " << *attr << " to " << DesiredLayout();
132 133 134 135 136
    }
  }

  if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive(
          op_name)) {
137 138
    return std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
                                                                      attr);
139
  }
140
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
141 142 143 144 145 146 147 148
}

template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    std::vector<int>* attr) {
149 150 151 152 153
  // lightly  transpose
  if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
    VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, tensors_vector[0][0].layout());
154
  }
155

156
  if (op_name == "transpose2" &&
157
      (tensors_vector[0][0].layout() == DesiredLayout())) {
158
    auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name);
159 160 161 162
    trans->SetAttr(attr,
                   tensors_vector[0][0].layout() ==
                       paddle::experimental::DataLayout::NHWC);
    return trans;
163
  }
164
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
165 166 167 168 169 170 171 172 173 174 175
}

// lightly int argmax
template <>
inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    paddle::experimental::Scalar* axis,
    bool* keep_dim) {
176 177 178 179
  if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
    VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, tensors_vector[0][0].layout());
180
  }
181

182
  if (op_name == "argmax" &&
183
      (tensors_vector[0][0].layout() == DesiredLayout()) && (*keep_dim)) {
184 185
    std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr;
    argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name);
186 187 188 189
    argmax_transform->SetAttr(axis,
                              tensors_vector[0][0].layout() ==
                                  paddle::experimental::DataLayout::NHWC);
    return argmax_transform;
190
  }
191
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
192 193 194 195 196 197 198 199 200
}

template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    int* start_axis,
    int* stop_axis) {
201 202 203 204
  if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
    VLOG(4) << "Optimze Layout was not started" << op_name;
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, tensors_vector[0][0].layout());
205
  }
206 207

  bool no_tranpose = tensors_vector[0][0].layout() == DesiredLayout();
208 209 210
  bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3);
  if (op_name == "flatten" || op_name == "flatten_contiguous_range") {
    if (no_tranpose && is_valid) {
211
      return std::make_shared<EagerFlattenOpTransformer>(op_name);
212 213
    }
  }
214
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
215 216
}

217
template <>
218 219 220 221 222 223
inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar>(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    paddle::experimental::Scalar* axis) {
224 225 226 227
  if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
    VLOG(4) << "Optimze Layout was not started" << op_name;
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, tensors_vector[0][0].layout());
228 229
  }

230
  auto desired_layout = DesiredLayout();
231
  if (NeedTransLayout(tensors_vector, desired_layout)) {
232 233 234 235 236 237 238 239 240 241
    VLOG(4) << op_name << "'s has different layout";
    return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
  }
  if (op_name == "Concat") {
    if (desired_layout == tensors_vector[0][0].layout() &&
        tensors_vector[0][0].shape().size() == 4) {
      auto trans = std::make_shared<EagerConcatOpTransformer>(op_name);
      trans->SetAttr(axis, desired_layout);
      return trans;
    }
242
  }
243
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
244 245 246
}

}  // namespace egr