Addition of marco for auto_tune_base.h (#50516)

27281e1f · limingshu · GitHub · 7fe44feb · 27281e1f · 27281e1f
4 changed file
--- a/paddle/phi/kernels/autotune/auto_tune_base.h
+++ b/paddle/phi/kernels/autotune/auto_tune_base.h
@@ -67,13 +67,8 @@ class AutoTuneBase {
           const AlgorithmType& algo,
           const size_t key,
           Args&&... args) {
-    PADDLE_ENFORCE_GT(
-        kernels_.size(),
-        0,
-        phi::errors::InvalidArgument(
-            "kernel num must be greater than 0, now is %d", kernels_.size()));
    is_init_ = true;
+    CheckKernelSize();
    auto& cache = AutoTuneCache::Instance().Get(algo);
    if (cache.Find(key)) {
      auto best_idx = cache.Get(key);
@@ -91,19 +86,22 @@ class AutoTuneBase {
    }
  }
- private:
+ protected:
  bool is_init_{false};
  std::vector<KernelType> kernels_;
  mutable std::mutex mutex_;
-  template <typename Context, typename... Args>
+  void CheckKernelSize() {
-  size_t PickBestKernel(const Context& ctx, Args&&... args) {
-    std::lock_guard<std::mutex> lock(mutex_);
    PADDLE_ENFORCE_GT(
        kernels_.size(),
        0,
        phi::errors::InvalidArgument(
            "kernel num must be greater than 0, now is %d", kernels_.size()));
+  }
+  template <typename Context, typename... Args>
+  size_t PickBestKernel(const Context& ctx, Args&&... args) {
+    std::lock_guard<std::mutex> lock(mutex_);
    size_t best_idx = 0;
    float min_time = std::numeric_limits<float>::max();
@@ -143,36 +141,42 @@ class AutoTuneBase {
  }
 };
-template <typename T, typename ReturnType, typename... Args>
+// To init the auto_tuner object.
-static AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>> MakeAutoTuner(
+#define DEFINE_AUTOTUNER_COMMON_OBJ(name)                                \
-    ReturnType (*func)(Args...)) {
+  template <typename T, typename ReturnType, typename... Args>           \
-  auto obj = MakeCallback<T>(func);
+  class name##AutoTuner                                                  \
-  return AutoTuneBase<T, decltype(obj)>(obj);
+      : public AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>> { \
-}
+   public:                                                               \
+    static name##AutoTuner<T, ReturnType, Args...>* Instance(            \
-template <typename T, typename ReturnType, typename... Args>
+        ReturnType (*func)(Args...)) {                                   \
-class TransposeAutoTuner
+      static std::once_flag name##_init_flag;                            \
-    : public AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>> {
+      static std::unique_ptr<name##AutoTuner<T, ReturnType, Args...>>    \
- public:
+          instance;                                                      \
-  static AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>>* Instance(
+      std::call_once(name##_init_flag, [&] {                             \
-      ReturnType (*func)(Args...)) {
+        auto obj = MakeCallback<T>(func);                                \
-    static std::once_flag transpose_init_flag_;
+        instance.reset(new name##AutoTuner<T, ReturnType, Args...>);     \
-    static std::unique_ptr<
+        instance->AddCallBack(func);                                     \
-        AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>>>
+      });                                                                \
-        instance_;
+      return instance.get();                                             \
-    std::call_once(transpose_init_flag_, [&] {
+    }                                                                    \
-      auto obj = MakeCallback<T>(func);
+  };
-      instance_.reset(new AutoTuneBase<T, decltype(obj)>(obj));
-    });
+// To init auto_tuner inital function.
-    return instance_.get();
+#define DEFINE_AUTOTUNER_FN(name)                                    \
+  template <typename T, typename ReturnType, typename... Args>       \
+  static name##AutoTuner<T, ReturnType, Args...>* Make##name##Tuner( \
+      ReturnType (*func)(Args...)) {                                 \
+    return name##AutoTuner<T, ReturnType, Args...>::Instance(func);  \
  }
-};
-template <typename T, typename ReturnType, typename... Args>
+#define DEFINE_AUTOTUNER(name) \
-static AutoTuneBase<T, KernelCallback<T, ReturnType, Args...>>*
+  DEFINE_AUTOTUNER_COMMON_OBJ(name) DEFINE_AUTOTUNER_FN(name)
-MakeTransposeTuner(ReturnType (*func)(Args...)) {
-  return TransposeAutoTuner<T, ReturnType, Args...>::Instance(func);
+DEFINE_AUTOTUNER(Transpose)
-}
+#undef DEFINE_AUTOTUNER_COMMON_OBJECT
+#undef DEFINE_AUTOTUNER_FN
+#undef DEFINE_AUTOTUNER
 }  // namespace autotune
 }  // namespace phi
--- a/paddle/phi/kernels/autotune/cache.cc
+++ b/paddle/phi/kernels/autotune/cache.cc
@@ -25,7 +25,7 @@ size_t TransposeKey(const std::vector<int64_t>& x_dims,
                    const std::vector<int32_t>& perm,
                    phi::DataType dtype) {
  const auto rank = perm.size();
-  return GetKey(x_dims, perm, rank, static_cast<int64_t>(dtype));
+  return GenKey(x_dims, perm, rank, static_cast<int64_t>(dtype));
 }
 std::string AlgorithmTypeString(int64_t algo_type) {

--- a/paddle/phi/kernels/autotune/cache_base.h
+++ b/paddle/phi/kernels/autotune/cache_base.h
@@ -54,7 +54,7 @@ namespace phi {
 namespace autotune {
 template <typename... Args>
-size_t GetKey(Args&&... args) {
+size_t GenKey(Args&&... args) {
  size_t seed = 0;
  HashCombine(&seed, std::forward<Args>(args)...);
  return seed;
@@ -79,7 +79,7 @@ struct ConvCacheKey {
        groups(arg_groups),
        data_layout(arg_data_layout) {}
  size_t hash_value() const {
-    return GetKey(x_dims,
+    return GenKey(x_dims,
                  w_dims,
                  strides,
                  paddings,

--- a/python/paddle/fluid/tests/unittests/test_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_transpose_op.py
@@ -157,6 +157,44 @@ class TestAutoTuneTransposeOp(OpTest):
        self.check_grad(['X'], 'Out')
+class TestAutoTuneTransposeBF16Op(OpTest):
+    def setUp(self):
+        self.init_op_type()
+        self.initTestCase()
+        self.dtype = np.uint16
+        self.python_api = paddle.transpose
+        x = np.random.random(self.shape).astype("float32")
+        self.inputs = {'X': convert_float_to_uint16(x)}
+        self.attrs = {
+            'axis': list(self.axis),
+            'use_mkldnn': self.use_mkldnn,
+        }
+        self.outputs = {
+            'XShape': convert_float_to_uint16(
+                np.random.random(self.shape).astype("float32")
+            ),
+            'Out': self.inputs['X'].transpose(self.axis),
+        }
+    def initTestCase(self):
+        fluid.core.set_autotune_range(0, 3)
+        fluid.core.update_autotune_status()
+        fluid.core.enable_autotune()
+        self.shape = (2, 8, 10)
+        self.axis = (0, 2, 1)
+    def init_op_type(self):
+        self.op_type = "transpose2"
+        self.use_mkldnn = False
+    def test_check_output(self):
+        self.check_output(no_check_set=['XShape'])
+        fluid.core.disable_autotune()
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Out')
 class TestTransposeBF16Op(OpTest):
    def setUp(self):
        self.init_op_type()