diff --git a/paddle/operators/math/selected_rows_functor.cc b/paddle/operators/math/selected_rows_functor.cc index 075196b47eeaf118a588b96532d87a05e4e600c6..514f2adef284c8877e2e74b943b4e6419c6ae721 100644 --- a/paddle/operators/math/selected_rows_functor.cc +++ b/paddle/operators/math/selected_rows_functor.cc @@ -145,6 +145,8 @@ struct SelectedRowsAddTo { template struct SelectedRowsAddTo; template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; template struct SelectedRowsAddToTensor { @@ -175,6 +177,8 @@ struct SelectedRowsAddToTensor { template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/selected_rows_functor.cu b/paddle/operators/math/selected_rows_functor.cu index 47fe3b44a50fee9f41ae807793187258159b9f29..c40649e55ef93dec852ff6949b5cb134495e4ebf 100644 --- a/paddle/operators/math/selected_rows_functor.cu +++ b/paddle/operators/math/selected_rows_functor.cu @@ -173,6 +173,8 @@ struct SelectedRowsAddTo { template struct SelectedRowsAddTo; template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; namespace { template @@ -223,6 +225,8 @@ struct SelectedRowsAddToTensor { template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; } // namespace math } // namespace operators diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index c2b7632b2865a3ef66051d815d7722a08c6a8cbd..ddc210c26e69566fef9baa20f49ba1052e993b3f 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -176,4 +176,6 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker, ops::SumOpVarTypeInference); REGISTER_OP_CPU_KERNEL(sum, ops::SumKernel, - ops::SumKernel); + ops::SumKernel, + ops::SumKernel, + ops::SumKernel); diff --git a/paddle/operators/sum_op.cu b/paddle/operators/sum_op.cu index 5cf05b876b6d6a2ce61d9e10b7ec52ed3cef57d7..5c30dd4d470c2e0acecef18524a4a81f9eb786a9 100644 --- a/paddle/operators/sum_op.cu +++ b/paddle/operators/sum_op.cu @@ -14,4 +14,6 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(sum, ops::SumKernel, - ops::SumKernel); + ops::SumKernel, + ops::SumKernel, + ops::SumKernel); diff --git a/paddle/platform/cuda_helper.h b/paddle/platform/cuda_helper.h index a7d99cde106a0a66f122a8c43f49717c03e60dec..376bb0e6887c797c3c1019e92f738a62d01a9c51 100644 --- a/paddle/platform/cuda_helper.h +++ b/paddle/platform/cuda_helper.h @@ -31,6 +31,16 @@ constexpr int PADDLE_CUDA_NUM_THREADS = 512; // For atomicAdd. USE_CUDA_ATOMIC(Add, float); +USE_CUDA_ATOMIC(Add, int); +USE_CUDA_ATOMIC(Add, unsigned int); +USE_CUDA_ATOMIC(Add, unsigned long long int); + +CUDA_ATOMIC_WRAPPER(Add, int64_t) { + static_assert(sizeof(int64_t) == sizeof(long long int), + "long long should be int64"); + return CudaAtomicAdd(reinterpret_cast(address), + static_cast(val)); +} #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 USE_CUDA_ATOMIC(Add, double);