op_registry.h.patch 9.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h
index a1f07f9f25..179df3b981 100644
--- a/paddle/fluid/framework/op_registry.h
+++ b/paddle/fluid/framework/op_registry.h
@@ -178,9 +178,8 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
     RegisterKernelClass<PlaceType, T>(
         op_type, library_type, customized_type_value,
 
-        [op_type](const framework::ExecutionContext& ctx) {
+        [](const framework::ExecutionContext& ctx) {
           KERNEL_TYPE().Compute(ctx);
-          CheckKernelLaunch<PlaceType>(op_type);
         });
     constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value;
     OpKernelRegistrarFunctor<PlaceType, I + 1 == size, I + 1, KernelTypes...>
@@ -240,13 +239,8 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
 
   void operator()(const char* op_type, const char* library_type,
                   int customized_type_value) const {
-    RegisterKernelClass<PlaceType, T>(
-        op_type, library_type, customized_type_value,
-
-        [op_type](const framework::ExecutionContext& ctx) {
-          Functor()(ctx);
-          CheckKernelLaunch<PlaceType>(op_type);
-        });
+    RegisterKernelClass<PlaceType, T>(op_type, library_type,
+                                      customized_type_value, Functor());
 
     constexpr auto size =
         std::tuple_size<std::tuple<DataTypeAndKernelType...>>::value;
@@ -275,7 +269,7 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
     VarTypeInference
     InferShapeBase
 */
-#define REGISTER_OPERATOR(op_type, op_class, ...)                        \
+#define REGISTER_OPERATOR__(op_type, op_class, ...)                        \
   STATIC_ASSERT_GLOBAL_NAMESPACE(                                        \
       __reg_op__##op_type,                                               \
       "REGISTER_OPERATOR must be called in global namespace");           \
@@ -286,15 +280,22 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
     return 0;                                                            \
   }
 
-#define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, ...) \
+#define REGISTER_OPERATOR(op_type, op_class, ...)
+
+#define REGISTER_OP_WITHOUT_GRADIENT__(op_type, op_class, ...) \
   REGISTER_OPERATOR(op_type, op_class, __VA_ARGS__, \
         paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,   \
         paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>)
 
+#define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, ...)
 /**
  * Macro to register OperatorKernel.
  */
 #define REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(op_type, library_type,             \
+                                            place_class, customized_name,      \
+                                            customized_type_value, ...)
+
+#define REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE__(op_type, library_type,             \
                                             place_class, customized_name,      \
                                             customized_type_value, ...)        \
   STATIC_ASSERT_GLOBAL_NAMESPACE(                                              \
@@ -311,18 +312,22 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
     return 0;                                                                  \
   }
 
-#define REGISTER_OP_KERNEL(op_type, library_type, place_class, ...)   \
-  REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(                                \
+#define REGISTER_OP_KERNEL(op_type, library_type, place_class, ...)
+
+#define REGISTER_OP_KERNEL__(op_type, library_type, place_class, ...)   \
+  REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE__(                                \
       op_type, library_type, place_class, DEFAULT_TYPE,               \
       ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
       __VA_ARGS__)
 
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#define REGISTER_OP_CUDA_KERNEL__(op_type, ...) \
+  REGISTER_OP_KERNEL__(op_type, CUDA, ::paddle::platform::CUDAPlace, __VA_ARGS__)
+
 #define REGISTER_OP_CUDA_KERNEL(op_type, ...) \
   REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::CUDAPlace, __VA_ARGS__)
-#else
-#define REGISTER_OP_CUDA_KERNEL(op_type, ...)
-#endif
+
+#define REGISTER_OP_CPU_KERNEL__(op_type, ...) \
+  REGISTER_OP_KERNEL__(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
 
 #define REGISTER_OP_CPU_KERNEL(op_type, ...) \
   REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
@@ -340,6 +345,11 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
   REGISTER_OP_KERNEL(op_type, MLU, ::paddle::platform::MLUPlace, __VA_ARGS__)
 
 #define REGISTER_OP_KERNEL_EX(op_type, library_type, place_class,  \
+                              customized_name,                     \
+                              customized_type_value,               \
+			      ...)
+
+#define REGISTER_OP_KERNEL_EX__(op_type, library_type, place_class,  \
                               customized_name,                     \
                               customized_type_value,               \
                               ...)                                 \
@@ -357,8 +367,10 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
     return 0;                                                                  \
   }
 
-#define REGISTER_OP_CUDA_KERNEL_FUNCTOR(op_type, ...)                 \
-  REGISTER_OP_KERNEL_EX(                                              \
+#define REGISTER_OP_CUDA_KERNEL_FUNCTOR(op_type, ...)
+
+#define REGISTER_OP_CUDA_KERNEL_FUNCTOR__(op_type, ...)                 \
+  REGISTER_OP_KERNEL_EX__(                                              \
       op_type, CUDA, ::paddle::platform::CUDAPlace, DEFAULT_TYPE,     \
       ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
       __VA_ARGS__)
@@ -375,12 +387,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
       ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
       __VA_ARGS__)
 
-#define REGISTER_OP_NPU_KERNEL_FUNCTOR(op_type, ...)                  \
-  REGISTER_OP_KERNEL_EX(                                              \
-      op_type, NPU, ::paddle::platform::NPUPlace, DEFAULT_TYPE,       \
-      ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
-      __VA_ARGS__)
-
 #define REGISTER_OP_MLU_KERNEL_FUNCTOR(op_type, ...)                  \
   REGISTER_OP_KERNEL_EX(                                              \
       op_type, MLU, ::paddle::platform::MLUPlace, DEFAULT_TYPE,       \
@@ -392,7 +398,9 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
  * we will use and tell the compiler to
  * link them into target.
  */
-#define USE_OP_ITSELF(op_type)                             \
+#define USE_OP_ITSELF(op_type)
+
+#define USE_OP_ITSELF__(op_type)                             \
   STATIC_ASSERT_GLOBAL_NAMESPACE(                          \
       __use_op_itself_##op_type,                           \
       "USE_OP_ITSELF must be called in global namespace"); \
@@ -400,6 +408,10 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
   UNUSED static int use_op_itself_##op_type##_ = TouchOpRegistrar_##op_type()
 
 #define USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(op_type,                     \
+                                              LIBRARY_TYPE,                \
+                                              customized_name)
+
+#define USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE__(op_type,                     \
                                               LIBRARY_TYPE,                \
                                               customized_name)             \
   STATIC_ASSERT_GLOBAL_NAMESPACE(                                          \
@@ -410,33 +422,58 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
   UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_##customized_name##_ = /* NOLINT */ \
       TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name()
 
-#define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE) \
-  USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(op_type, LIBRARY_TYPE, DEFAULT_TYPE)
+#define USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(op_type,                \
+                                              LIBRARY_TYPE,           \
+                                              customized_name)
+
+#define USE_OP_DEVICE_KERNEL__(op_type, LIBRARY_TYPE) \
+  USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE__(op_type, LIBRARY_TYPE, DEFAULT_TYPE)
+
+#define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE)
 
 // TODO(fengjiayi): The following macros
 // seems ugly, do we have better method?
 
-#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
+#ifndef PADDLE_WITH_CUDA
 #define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
+#define USE_OP_KERNEL__(op_type) USE_OP_DEVICE_KERNEL__(op_type, CPU)
 #else
 #define USE_OP_KERNEL(op_type)        \
   USE_OP_DEVICE_KERNEL(op_type, CPU); \
   USE_OP_DEVICE_KERNEL(op_type, CUDA)
+
+#define USE_OP_KERNEL__(op_type)        \
+  USE_OP_DEVICE_KERNEL__(op_type, CPU); \
+  USE_OP_DEVICE_KERNEL__(op_type, CUDA)
 #endif
 
 #define USE_NO_KERNEL_OP(op_type) USE_OP_ITSELF(op_type);
 
+#define USE_NO_KERNEL_OP__(op_type) USE_OP_ITSELF__(op_type);
+
 #define USE_CPU_ONLY_OP(op_type) \
   USE_OP_ITSELF(op_type);        \
   USE_OP_DEVICE_KERNEL(op_type, CPU);
 
+#define USE_CPU_ONLY_OP__(op_type) \
+  USE_OP_ITSELF__(op_type);        \
+  USE_OP_DEVICE_KERNEL__(op_type, CPU);
+
 #define USE_CUDA_ONLY_OP(op_type) \
   USE_OP_ITSELF(op_type);         \
   USE_OP_DEVICE_KERNEL(op_type, CUDA)
 
+#define USE_CUDA_ONLY_OP__(op_type) \
+  USE_OP_ITSELF__(op_type);         \
+  USE_OP_DEVICE_KERNEL__(op_type, CUDA)
+
 #define USE_OP(op_type)   \
   USE_OP_ITSELF(op_type); \
   USE_OP_KERNEL(op_type)
+
+#define USE_OP__(op_type)   \
+  USE_OP_ITSELF__(op_type); \
+  USE_OP_KERNEL__(op_type)
 // clang-format on
 
 }  // namespace framework