From 90dc33b5ff53871563a34b1183dedadcf6b2fce1 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Wed, 28 Feb 2018 14:25:20 +0800 Subject: [PATCH] Add todo for reduceSum --- paddle/fluid/platform/cuda_helper.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/paddle/fluid/platform/cuda_helper.h b/paddle/fluid/platform/cuda_helper.h index 029ca609a..a4ea4f21e 100644 --- a/paddle/fluid/platform/cuda_helper.h +++ b/paddle/fluid/platform/cuda_helper.h @@ -77,6 +77,12 @@ __forceinline__ __device__ T __shfl_down_sync(unsigned, T val, int delta) { template __device__ T reduceSum(T val, int tid, int len) { + // TODO(zcd): The warp size should be taken from the + // parameters of the GPU but not specified as 32 simply. + // To make the reduceSum more efficiently, + // I use Warp-Level Parallelism and assume the Warp size + // is 32 which may be different for different GPU, + // but most card's warp size is 32. __shared__ T shm[32]; const int warpSize = 32; unsigned mask = 0u; -- GitLab