add .part to speed up compile (#27044)

19ca6d9d · zhupengyang · GitHub · fab8bbf2 · 19ca6d9d · 19ca6d9d
2 changed file
--- a/paddle/fluid/operators/reduce_ops/logsumexp_op.cu
+++ b/paddle/fluid/operators/reduce_ops/logsumexp_op.cu
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/fluid/operators/reduce_ops/cub_reduce.h"
 #include "paddle/fluid/operators/reduce_ops/logsumexp_op.h"

 REGISTER_OP_CUDA_KERNEL(logsumexp,
@@ -20,8 +19,3 @@ REGISTER_OP_CUDA_KERNEL(logsumexp,
                                          float, ops::LogsumexpFunctor>,
                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
                                          double, ops::LogsumexpFunctor>);
-REGISTER_OP_CUDA_KERNEL(
-    logsumexp_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
-                                          float, ops::LogsumexpGradFunctor>,
-    ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
-                          ops::LogsumexpGradFunctor>);
--- a/paddle/fluid/operators/reduce_ops/logsumexp_op.part.cu
+++ b/paddle/fluid/operators/reduce_ops/logsumexp_op.part.cu
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// .part used to speed up nvcc compile
+#include "paddle/fluid/operators/reduce_ops/logsumexp_op.h"
+
+REGISTER_OP_CUDA_KERNEL(
+    logsumexp_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
+                                          float, ops::LogsumexpGradFunctor>,
+    ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
+                          ops::LogsumexpGradFunctor>);