From c077a6d57cf43aa5da1bf7ca378f37bde06b8c11 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 23 Nov 2017 16:36:10 +0800 Subject: [PATCH] Feature/support int64 for sum (#5832) * Support int64 for sum op * Refine code --- paddle/operators/math/selected_rows_functor.cc | 4 ++++ paddle/operators/math/selected_rows_functor.cu | 4 ++++ paddle/operators/sum_op.cc | 4 +++- paddle/operators/sum_op.cu | 4 +++- paddle/platform/cuda_helper.h | 10 ++++++++++ 5 files changed, 24 insertions(+), 2 deletions(-) diff --git a/paddle/operators/math/selected_rows_functor.cc b/paddle/operators/math/selected_rows_functor.cc index 075196b47ee..514f2adef28 100644 --- a/paddle/operators/math/selected_rows_functor.cc +++ b/paddle/operators/math/selected_rows_functor.cc @@ -145,6 +145,8 @@ struct SelectedRowsAddTo { template struct SelectedRowsAddTo; template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; template struct SelectedRowsAddToTensor { @@ -175,6 +177,8 @@ struct SelectedRowsAddToTensor { template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/selected_rows_functor.cu b/paddle/operators/math/selected_rows_functor.cu index 47fe3b44a50..c40649e55ef 100644 --- a/paddle/operators/math/selected_rows_functor.cu +++ b/paddle/operators/math/selected_rows_functor.cu @@ -173,6 +173,8 @@ struct SelectedRowsAddTo { template struct SelectedRowsAddTo; template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; namespace { template @@ -223,6 +225,8 @@ struct SelectedRowsAddToTensor { template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; } // namespace math } // namespace operators diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index c2b7632b286..ddc210c26e6 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -176,4 +176,6 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker, ops::SumOpVarTypeInference); REGISTER_OP_CPU_KERNEL(sum, ops::SumKernel, - ops::SumKernel); + ops::SumKernel, + ops::SumKernel, + ops::SumKernel); diff --git a/paddle/operators/sum_op.cu b/paddle/operators/sum_op.cu index 5cf05b876b6..5c30dd4d470 100644 --- a/paddle/operators/sum_op.cu +++ b/paddle/operators/sum_op.cu @@ -14,4 +14,6 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(sum, ops::SumKernel, - ops::SumKernel); + ops::SumKernel, + ops::SumKernel, + ops::SumKernel); diff --git a/paddle/platform/cuda_helper.h b/paddle/platform/cuda_helper.h index a7d99cde106..376bb0e6887 100644 --- a/paddle/platform/cuda_helper.h +++ b/paddle/platform/cuda_helper.h @@ -31,6 +31,16 @@ constexpr int PADDLE_CUDA_NUM_THREADS = 512; // For atomicAdd. USE_CUDA_ATOMIC(Add, float); +USE_CUDA_ATOMIC(Add, int); +USE_CUDA_ATOMIC(Add, unsigned int); +USE_CUDA_ATOMIC(Add, unsigned long long int); + +CUDA_ATOMIC_WRAPPER(Add, int64_t) { + static_assert(sizeof(int64_t) == sizeof(long long int), + "long long should be int64"); + return CudaAtomicAdd(reinterpret_cast(address), + static_cast(val)); +} #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 USE_CUDA_ATOMIC(Add, double); -- GitLab