From 07923ba006220bf39ebd9fcf19c6b930012e5139 Mon Sep 17 00:00:00 2001 From: dzhwinter <dongzhihong01@baidu.com> Date: Mon, 12 Feb 2018 17:56:15 +0800 Subject: [PATCH] Memory/dropout4 (#8407) * "merge random generator kernel and mul" * "fix dropout" --- paddle/fluid/operators/dropout_op.cu | 42 ++++++++++++++-------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/paddle/fluid/operators/dropout_op.cu b/paddle/fluid/operators/dropout_op.cu index 4ae9f4ce54d..a4a96d48f99 100644 --- a/paddle/fluid/operators/dropout_op.cu +++ b/paddle/fluid/operators/dropout_op.cu @@ -23,24 +23,23 @@ namespace paddle { namespace operators { template <typename T, typename AttrType> -struct MaskGenerator { - AttrType dropout_prob; - int seed; +__global__ void RandomGenerator(const size_t n, const int seed, + const AttrType dropout_prob, const T* src, + T* mask_data, T* dst) { + thrust::minstd_rand rng; + rng.seed(seed); + thrust::uniform_real_distribution<AttrType> dist(0, 1); - __host__ __device__ MaskGenerator(AttrType dropout_prob, int seed) - : dropout_prob(dropout_prob), seed(seed) {} - - inline __host__ __device__ T operator()(const unsigned int n) const { - thrust::minstd_rand rng; - rng.seed(seed); - thrust::uniform_real_distribution<AttrType> dist(0, 1); - rng.discard(n); + int idx = blockDim.x * blockIdx.x + threadIdx.x; + for (; idx < n; idx += blockDim.x * gridDim.x) { if (dist(rng) < dropout_prob) { - return static_cast<T>(0); + mask_data[idx] = static_cast<T>(0); + } else { + mask_data[idx] = static_cast<T>(1); } - return static_cast<T>(1); + dst[idx] = mask_data[idx] * src[idx]; } -}; +} // It seems that Eigen::Tensor::setRandom in GPU will SEGFAULT. // Use std::random and thrust::random(thrust is a std library in CUDA) to @@ -61,18 +60,19 @@ class GPUDropoutKernel : public framework::OpKernel<T> { if (!context.Attr<bool>("is_test")) { auto* mask = context.Output<Tensor>("Mask"); auto* mask_data = mask->mutable_data<T>(context.GetPlace()); - int size = framework::product(mask->dims()); + size_t size = framework::product(mask->dims()); + auto* x_data = x->data<T>(); + auto* y_data = y->mutable_data<T>(context.GetPlace()); std::random_device rnd; int seed = context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd(); - thrust::counting_iterator<unsigned int> index_sequence_begin(0); - thrust::transform(index_sequence_begin, index_sequence_begin + size, - thrust::device_ptr<T>(mask_data), - MaskGenerator<T, AttrType>(dropout_prob, seed)); - auto M = EigenMatrix<T>::Reshape(*mask, 1); - Y.device(place) = X * M; + int threads = 512; + int grid = (x->numel() + threads - 1) / threads; + RandomGenerator<T, AttrType><<<grid, threads, 0, + context.cuda_device_context().stream()>>>( + size, seed, dropout_prob, x_data, mask_data, y_data); } else { Y.device(place) = X * (1.0f - dropout_prob); } -- GitLab