eager_layout_auto_tune.h 9.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/eager_layout_transformer.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
namespace egr {
22 23 24
inline bool NeedTransLayout(
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
25
    const phi::DataLayout& layout) {
26 27 28 29 30 31 32 33 34 35 36 37 38 39
  for (size_t i = 0; i < tensors_vector.size(); i++) {
    for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
      if (layout != tensors_vector[i][idx].layout()) {
        return true;
      }
    }
  }
  return false;
}

inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector) {
40
  // For agnostic op like add, relu, exp
41
  auto first_layout = tensors_vector[0][0].layout();
42
  auto desired_layout = DesiredLayout();
43
  bool is_started = !(desired_layout == phi::DataLayout::UNDEFINED);
44
  if (is_started && NeedTransLayout(tensors_vector, first_layout)) {
45 46 47 48 49 50 51 52
    bool need_trans_back = false;
    for (size_t i = 0; i < tensors_vector.size(); i++) {
      for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
        if (4 != tensors_vector[i][idx].shape().size()) {
          need_trans_back = true;
        }
      }
    }
53 54 55
    auto final_layout = need_trans_back ? DefaultLayout() : desired_layout;
    VLOG(4) << op_name << "'s has different layout, need trans to "
            << final_layout;
56 57 58
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, final_layout);
  }
59 60
  return std::make_shared<EagerLayoutTransformer>(
      op_name, tensors_vector, first_layout);
61 62
}

63 64 65 66 67 68
template <typename T>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    T* attr) {
69
  // For lightly op like reduce
70
  if (!(DesiredLayout() == phi::DataLayout::UNDEFINED)) {
71 72 73 74 75
    VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, tensors_vector[0][0].layout());
  }
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
76 77 78 79 80 81 82 83 84
}

template <typename T1, typename T2>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    T1* axis,
    T2* keep_dim) {
85
  // For lightly op like argmax
86 87 88 89 90 91 92 93 94
  return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis);
}

template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    std::string* attr) {
95
  // Heavily op with (string) data_format, data_layout
96
  auto transposer = std::make_shared<EagerLayoutTransformer>(
97
      op_name, tensors_vector, tensors_vector[0][0].layout());
98
  if (DesiredLayout() == phi::DataLayout::UNDEFINED) {
99 100
    // Layout autotune only supports model with convolutional layers
    if (op_name != "conv2d") {
101
      VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
102 103 104 105 106 107 108 109 110
      return transposer;
    } else {
      auto data_type = tensors_vector[0][0].dtype();
      bool is_tune_fp32 =
          (data_type == paddle::experimental::DataType::FLOAT32) &&
          (*attr == "NHWC");
      bool is_tune_fp16 =
          (data_type == paddle::experimental::DataType::FLOAT16) &&
          (*attr == "NCHW");
111 112
      VLOG(4) << "LayoutAutoTune assert with dtype and layout, Current op : "
              << op_name;
113 114
      if (is_tune_fp32) {
        paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
115
            phi::DataLayout::NCHW);
116 117

        paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
118
            phi::DataLayout::NHWC);
119 120
      } else if (is_tune_fp16) {
        paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
121
            phi::DataLayout::NHWC);
122
        paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
123
            phi::DataLayout::NCHW);
124
      } else {
125 126
        VLOG(4) << "DisableLayoutAutoTune accoding to Conv op"
                << " dtype : " << data_type << " format : " << (*attr);
127
        egr::Controller::Instance().DisableLayoutAutoTune();
128 129
        return transposer;
      }
130
      VLOG(4) << "LayoutAutoTune from " << *attr << " to " << DesiredLayout();
131 132 133 134 135
    }
  }

  if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive(
          op_name)) {
136 137
    return std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
                                                                      attr);
138
  }
139
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
140 141 142 143 144 145 146 147
}

template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    std::vector<int>* attr) {
148
  // lightly  transpose
149
  if (DesiredLayout() == phi::DataLayout::UNDEFINED) {
150 151 152
    VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, tensors_vector[0][0].layout());
153
  }
154

155
  if (op_name == "transpose2" &&
156
      (tensors_vector[0][0].layout() == DesiredLayout())) {
157
    auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name);
158
    trans->SetAttr(attr,
159
                   tensors_vector[0][0].layout() == phi::DataLayout::NHWC);
160
    return trans;
161
  }
162
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
163 164 165 166 167 168 169 170 171 172 173
}

// lightly int argmax
template <>
inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    paddle::experimental::Scalar* axis,
    bool* keep_dim) {
174
  if (DesiredLayout() == phi::DataLayout::UNDEFINED) {
175 176 177
    VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, tensors_vector[0][0].layout());
178
  }
179

180
  if (op_name == "argmax" &&
181
      (tensors_vector[0][0].layout() == DesiredLayout()) && (*keep_dim)) {
182 183
    std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr;
    argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name);
184 185
    argmax_transform->SetAttr(
        axis, tensors_vector[0][0].layout() == phi::DataLayout::NHWC);
186
    return argmax_transform;
187
  }
188
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
189 190 191 192 193 194 195 196 197
}

template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    int* start_axis,
    int* stop_axis) {
198
  if (DesiredLayout() == phi::DataLayout::UNDEFINED) {
199 200 201
    VLOG(4) << "Optimze Layout was not started" << op_name;
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, tensors_vector[0][0].layout());
202
  }
203 204

  bool no_tranpose = tensors_vector[0][0].layout() == DesiredLayout();
205 206 207
  bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3);
  if (op_name == "flatten" || op_name == "flatten_contiguous_range") {
    if (no_tranpose && is_valid) {
208
      return std::make_shared<EagerFlattenOpTransformer>(op_name);
209 210
    }
  }
211
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
212 213
}

214
template <>
215 216 217 218 219 220
inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar>(
    const std::string& op_name,
    const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                               kSlotSmallVectorSize>& tensors_vector,
    paddle::experimental::Scalar* axis) {
221
  if (DesiredLayout() == phi::DataLayout::UNDEFINED) {
222 223 224
    VLOG(4) << "Optimze Layout was not started" << op_name;
    return std::make_shared<EagerLayoutTransformer>(
        op_name, tensors_vector, tensors_vector[0][0].layout());
225 226
  }

227
  auto desired_layout = DesiredLayout();
228
  if (NeedTransLayout(tensors_vector, desired_layout)) {
229 230 231 232 233 234 235 236 237 238
    VLOG(4) << op_name << "'s has different layout";
    return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
  }
  if (op_name == "Concat") {
    if (desired_layout == tensors_vector[0][0].layout() &&
        tensors_vector[0][0].shape().size() == 4) {
      auto trans = std::make_shared<EagerConcatOpTransformer>(op_name);
      trans->SetAttr(axis, desired_layout);
      return trans;
    }
239
  }
240
  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
241 242 243
}

}  // namespace egr