op_registry.h.patch 9.6 KB
Newer Older
1
diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h
S
Shang Zhizhou 已提交
2
index d38efbff31..f5bef776d6 100644
3 4
--- a/paddle/fluid/framework/op_registry.h
+++ b/paddle/fluid/framework/op_registry.h
S
Shang Zhizhou 已提交
5 6 7
@@ -186,9 +186,8 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
         library_type,
         customized_type_value,
8 9 10 11 12 13 14 15
 
-        [op_type](const framework::ExecutionContext& ctx) {
+        [](const framework::ExecutionContext& ctx) {
           KERNEL_TYPE().Compute(ctx);
-          CheckKernelLaunch<PlaceType>(op_type);
         });
     constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value;
     OpKernelRegistrarFunctor<PlaceType, I + 1 == size, I + 1, KernelTypes...>
S
Shang Zhizhou 已提交
16 17 18
@@ -257,15 +256,8 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
   void operator()(const char* op_type,
                   const char* library_type,
19 20
                   int customized_type_value) const {
-    RegisterKernelClass<PlaceType, T>(
S
Shang Zhizhou 已提交
21 22 23
-        op_type,
-        library_type,
-        customized_type_value,
24 25 26 27 28 29 30 31 32 33
-
-        [op_type](const framework::ExecutionContext& ctx) {
-          Functor()(ctx);
-          CheckKernelLaunch<PlaceType>(op_type);
-        });
+    RegisterKernelClass<PlaceType, T>(op_type, library_type,
+                                      customized_type_value, Functor());
 
     constexpr auto size =
         std::tuple_size<std::tuple<DataTypeAndKernelType...>>::value;
S
Shang Zhizhou 已提交
34
@@ -296,7 +288,7 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
35 36 37 38 39 40 41 42
     VarTypeInference
     InferShapeBase
 */
-#define REGISTER_OPERATOR(op_type, op_class, ...)                        \
+#define REGISTER_OPERATOR__(op_type, op_class, ...)                        \
   STATIC_ASSERT_GLOBAL_NAMESPACE(                                        \
       __reg_op__##op_type,                                               \
       "REGISTER_OPERATOR must be called in global namespace");           \
S
Shang Zhizhou 已提交
43
@@ -307,15 +299,22 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
     return 0;                                                            \
   }
 
-#define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, ...) \
+#define REGISTER_OPERATOR(op_type, op_class, ...)
+
+#define REGISTER_OP_WITHOUT_GRADIENT__(op_type, op_class, ...) \
   REGISTER_OPERATOR(op_type, op_class, __VA_ARGS__, \
         paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,   \
         paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>)
 
+#define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, ...)
 /**
  * Macro to register OperatorKernel.
  */
 #define REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(op_type, library_type,             \
+                                            place_class, customized_name,      \
+                                            customized_type_value, ...)
+
+#define REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE__(op_type, library_type,             \
                                             place_class, customized_name,      \
                                             customized_type_value, ...)        \
   STATIC_ASSERT_GLOBAL_NAMESPACE(                                              \
S
Shang Zhizhou 已提交
67
@@ -332,18 +331,22 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
     return 0;                                                                  \
   }
 
-#define REGISTER_OP_KERNEL(op_type, library_type, place_class, ...)   \
-  REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(                                \
+#define REGISTER_OP_KERNEL(op_type, library_type, place_class, ...)
+
+#define REGISTER_OP_KERNEL__(op_type, library_type, place_class, ...)   \
+  REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE__(                                \
       op_type, library_type, place_class, DEFAULT_TYPE,               \
       ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
       __VA_ARGS__)
 
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#define REGISTER_OP_CUDA_KERNEL__(op_type, ...) \
+  REGISTER_OP_KERNEL__(op_type, CUDA, ::paddle::platform::CUDAPlace, __VA_ARGS__)
+
 #define REGISTER_OP_CUDA_KERNEL(op_type, ...) \
   REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::CUDAPlace, __VA_ARGS__)
-#else
-#define REGISTER_OP_CUDA_KERNEL(op_type, ...)
-#endif
+
+#define REGISTER_OP_CPU_KERNEL__(op_type, ...) \
+  REGISTER_OP_KERNEL__(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
 
 #define REGISTER_OP_CPU_KERNEL(op_type, ...) \
   REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
S
Shang Zhizhou 已提交
96
@@ -361,6 +364,11 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
97 98 99 100 101 102 103 104 105 106 107
   REGISTER_OP_KERNEL(op_type, MLU, ::paddle::platform::MLUPlace, __VA_ARGS__)
 
 #define REGISTER_OP_KERNEL_EX(op_type, library_type, place_class,  \
+                              customized_name,                     \
+                              customized_type_value,               \
+			      ...)
+
+#define REGISTER_OP_KERNEL_EX__(op_type, library_type, place_class,  \
                               customized_name,                     \
                               customized_type_value,               \
                               ...)                                 \
S
Shang Zhizhou 已提交
108
@@ -378,8 +386,10 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
109 110 111 112 113 114 115 116 117 118 119 120
     return 0;                                                                  \
   }
 
-#define REGISTER_OP_CUDA_KERNEL_FUNCTOR(op_type, ...)                 \
-  REGISTER_OP_KERNEL_EX(                                              \
+#define REGISTER_OP_CUDA_KERNEL_FUNCTOR(op_type, ...)
+
+#define REGISTER_OP_CUDA_KERNEL_FUNCTOR__(op_type, ...)                 \
+  REGISTER_OP_KERNEL_EX__(                                              \
       op_type, CUDA, ::paddle::platform::CUDAPlace, DEFAULT_TYPE,     \
       ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
       __VA_ARGS__)
S
Shang Zhizhou 已提交
121
@@ -396,12 +406,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
122 123 124 125 126 127 128 129 130 131 132 133
       ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
       __VA_ARGS__)
 
-#define REGISTER_OP_NPU_KERNEL_FUNCTOR(op_type, ...)                  \
-  REGISTER_OP_KERNEL_EX(                                              \
-      op_type, NPU, ::paddle::platform::NPUPlace, DEFAULT_TYPE,       \
-      ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
-      __VA_ARGS__)
-
 #define REGISTER_OP_MLU_KERNEL_FUNCTOR(op_type, ...)                  \
   REGISTER_OP_KERNEL_EX(                                              \
       op_type, MLU, ::paddle::platform::MLUPlace, DEFAULT_TYPE,       \
S
Shang Zhizhou 已提交
134
@@ -413,7 +417,9 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
135 136 137 138 139 140 141 142 143 144
  * we will use and tell the compiler to
  * link them into target.
  */
-#define USE_OP_ITSELF(op_type)                             \
+#define USE_OP_ITSELF(op_type)
+
+#define USE_OP_ITSELF__(op_type)                             \
   STATIC_ASSERT_GLOBAL_NAMESPACE(                          \
       __use_op_itself_##op_type,                           \
       "USE_OP_ITSELF must be called in global namespace"); \
S
Shang Zhizhou 已提交
145
@@ -421,6 +427,10 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
146 147 148 149 150 151 152 153 154 155
   UNUSED static int use_op_itself_##op_type##_ = TouchOpRegistrar_##op_type()
 
 #define USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(op_type,                     \
+                                              LIBRARY_TYPE,                \
+                                              customized_name)
+
+#define USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE__(op_type,                     \
                                               LIBRARY_TYPE,                \
                                               customized_name)             \
   STATIC_ASSERT_GLOBAL_NAMESPACE(                                          \
S
Shang Zhizhou 已提交
156
@@ -431,33 +441,58 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
   UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_##customized_name##_ = /* NOLINT */ \
       TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name()
 
-#define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE) \
-  USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(op_type, LIBRARY_TYPE, DEFAULT_TYPE)
+#define USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(op_type,                \
+                                              LIBRARY_TYPE,           \
+                                              customized_name)
+
+#define USE_OP_DEVICE_KERNEL__(op_type, LIBRARY_TYPE) \
+  USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE__(op_type, LIBRARY_TYPE, DEFAULT_TYPE)
+
+#define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE)
 
 // TODO(fengjiayi): The following macros
 // seems ugly, do we have better method?
 
-#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
+#ifndef PADDLE_WITH_CUDA
 #define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
+#define USE_OP_KERNEL__(op_type) USE_OP_DEVICE_KERNEL__(op_type, CPU)
 #else
 #define USE_OP_KERNEL(op_type)        \
   USE_OP_DEVICE_KERNEL(op_type, CPU); \
   USE_OP_DEVICE_KERNEL(op_type, CUDA)
+
+#define USE_OP_KERNEL__(op_type)        \
+  USE_OP_DEVICE_KERNEL__(op_type, CPU); \
+  USE_OP_DEVICE_KERNEL__(op_type, CUDA)
 #endif
 
 #define USE_NO_KERNEL_OP(op_type) USE_OP_ITSELF(op_type);
 
+#define USE_NO_KERNEL_OP__(op_type) USE_OP_ITSELF__(op_type);
+
 #define USE_CPU_ONLY_OP(op_type) \
   USE_OP_ITSELF(op_type);        \
   USE_OP_DEVICE_KERNEL(op_type, CPU);
 
+#define USE_CPU_ONLY_OP__(op_type) \
+  USE_OP_ITSELF__(op_type);        \
+  USE_OP_DEVICE_KERNEL__(op_type, CPU);
+
 #define USE_CUDA_ONLY_OP(op_type) \
   USE_OP_ITSELF(op_type);         \
   USE_OP_DEVICE_KERNEL(op_type, CUDA)
 
+#define USE_CUDA_ONLY_OP__(op_type) \
+  USE_OP_ITSELF__(op_type);         \
+  USE_OP_DEVICE_KERNEL__(op_type, CUDA)
+
 #define USE_OP(op_type)   \
   USE_OP_ITSELF(op_type); \
   USE_OP_KERNEL(op_type)
+
+#define USE_OP__(op_type)   \
+  USE_OP_ITSELF__(op_type); \
+  USE_OP_KERNEL__(op_type)
 // clang-format on
 
 }  // namespace framework