未验证 提交 07923ba0 编写于 作者: D dzhwinter 提交者: GitHub

Memory/dropout4 (#8407)

* "merge random generator kernel and mul"

* "fix dropout"
上级 91aac572
...@@ -23,24 +23,23 @@ namespace paddle { ...@@ -23,24 +23,23 @@ namespace paddle {
namespace operators { namespace operators {
template <typename T, typename AttrType> template <typename T, typename AttrType>
struct MaskGenerator { __global__ void RandomGenerator(const size_t n, const int seed,
AttrType dropout_prob; const AttrType dropout_prob, const T* src,
int seed; T* mask_data, T* dst) {
thrust::minstd_rand rng;
rng.seed(seed);
thrust::uniform_real_distribution<AttrType> dist(0, 1);
__host__ __device__ MaskGenerator(AttrType dropout_prob, int seed) int idx = blockDim.x * blockIdx.x + threadIdx.x;
: dropout_prob(dropout_prob), seed(seed) {} for (; idx < n; idx += blockDim.x * gridDim.x) {
inline __host__ __device__ T operator()(const unsigned int n) const {
thrust::minstd_rand rng;
rng.seed(seed);
thrust::uniform_real_distribution<AttrType> dist(0, 1);
rng.discard(n);
if (dist(rng) < dropout_prob) { if (dist(rng) < dropout_prob) {
return static_cast<T>(0); mask_data[idx] = static_cast<T>(0);
} else {
mask_data[idx] = static_cast<T>(1);
} }
return static_cast<T>(1); dst[idx] = mask_data[idx] * src[idx];
} }
}; }
// It seems that Eigen::Tensor::setRandom in GPU will SEGFAULT. // It seems that Eigen::Tensor::setRandom in GPU will SEGFAULT.
// Use std::random and thrust::random(thrust is a std library in CUDA) to // Use std::random and thrust::random(thrust is a std library in CUDA) to
...@@ -61,18 +60,19 @@ class GPUDropoutKernel : public framework::OpKernel<T> { ...@@ -61,18 +60,19 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
if (!context.Attr<bool>("is_test")) { if (!context.Attr<bool>("is_test")) {
auto* mask = context.Output<Tensor>("Mask"); auto* mask = context.Output<Tensor>("Mask");
auto* mask_data = mask->mutable_data<T>(context.GetPlace()); auto* mask_data = mask->mutable_data<T>(context.GetPlace());
int size = framework::product(mask->dims()); size_t size = framework::product(mask->dims());
auto* x_data = x->data<T>();
auto* y_data = y->mutable_data<T>(context.GetPlace());
std::random_device rnd; std::random_device rnd;
int seed = int seed =
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd(); context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
thrust::counting_iterator<unsigned int> index_sequence_begin(0); int threads = 512;
thrust::transform(index_sequence_begin, index_sequence_begin + size, int grid = (x->numel() + threads - 1) / threads;
thrust::device_ptr<T>(mask_data), RandomGenerator<T, AttrType><<<grid, threads, 0,
MaskGenerator<T, AttrType>(dropout_prob, seed)); context.cuda_device_context().stream()>>>(
auto M = EigenMatrix<T>::Reshape(*mask, 1); size, seed, dropout_prob, x_data, mask_data, y_data);
Y.device(place) = X * M;
} else { } else {
Y.device(place) = X * (1.0f - dropout_prob); Y.device(place) = X * (1.0f - dropout_prob);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册