diff --git a/paddle/fluid/operators/dropout_op.cu b/paddle/fluid/operators/dropout_op.cu index 4ae9f4ce54d27dd1ad0312b5ad8d78a4cb904c79..a4a96d48f992aaab46332aff81b3a5521b481478 100644 --- a/paddle/fluid/operators/dropout_op.cu +++ b/paddle/fluid/operators/dropout_op.cu @@ -23,24 +23,23 @@ namespace paddle { namespace operators { template -struct MaskGenerator { - AttrType dropout_prob; - int seed; +__global__ void RandomGenerator(const size_t n, const int seed, + const AttrType dropout_prob, const T* src, + T* mask_data, T* dst) { + thrust::minstd_rand rng; + rng.seed(seed); + thrust::uniform_real_distribution dist(0, 1); - __host__ __device__ MaskGenerator(AttrType dropout_prob, int seed) - : dropout_prob(dropout_prob), seed(seed) {} - - inline __host__ __device__ T operator()(const unsigned int n) const { - thrust::minstd_rand rng; - rng.seed(seed); - thrust::uniform_real_distribution dist(0, 1); - rng.discard(n); + int idx = blockDim.x * blockIdx.x + threadIdx.x; + for (; idx < n; idx += blockDim.x * gridDim.x) { if (dist(rng) < dropout_prob) { - return static_cast(0); + mask_data[idx] = static_cast(0); + } else { + mask_data[idx] = static_cast(1); } - return static_cast(1); + dst[idx] = mask_data[idx] * src[idx]; } -}; +} // It seems that Eigen::Tensor::setRandom in GPU will SEGFAULT. // Use std::random and thrust::random(thrust is a std library in CUDA) to @@ -61,18 +60,19 @@ class GPUDropoutKernel : public framework::OpKernel { if (!context.Attr("is_test")) { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); - int size = framework::product(mask->dims()); + size_t size = framework::product(mask->dims()); + auto* x_data = x->data(); + auto* y_data = y->mutable_data(context.GetPlace()); std::random_device rnd; int seed = context.Attr("fix_seed") ? context.Attr("seed") : rnd(); - thrust::counting_iterator index_sequence_begin(0); - thrust::transform(index_sequence_begin, index_sequence_begin + size, - thrust::device_ptr(mask_data), - MaskGenerator(dropout_prob, seed)); - auto M = EigenMatrix::Reshape(*mask, 1); - Y.device(place) = X * M; + int threads = 512; + int grid = (x->numel() + threads - 1) / threads; + RandomGenerator<<>>( + size, seed, dropout_prob, x_data, mask_data, y_data); } else { Y.device(place) = X * (1.0f - dropout_prob); }