未验证 提交 07923ba0 编写于 作者: D dzhwinter 提交者: GitHub

Memory/dropout4 (#8407)

* "merge random generator kernel and mul"

* "fix dropout"
上级 91aac572
......@@ -23,24 +23,23 @@ namespace paddle {
namespace operators {
template <typename T, typename AttrType>
struct MaskGenerator {
AttrType dropout_prob;
int seed;
__global__ void RandomGenerator(const size_t n, const int seed,
const AttrType dropout_prob, const T* src,
T* mask_data, T* dst) {
thrust::minstd_rand rng;
rng.seed(seed);
thrust::uniform_real_distribution<AttrType> dist(0, 1);
__host__ __device__ MaskGenerator(AttrType dropout_prob, int seed)
: dropout_prob(dropout_prob), seed(seed) {}
inline __host__ __device__ T operator()(const unsigned int n) const {
thrust::minstd_rand rng;
rng.seed(seed);
thrust::uniform_real_distribution<AttrType> dist(0, 1);
rng.discard(n);
int idx = blockDim.x * blockIdx.x + threadIdx.x;
for (; idx < n; idx += blockDim.x * gridDim.x) {
if (dist(rng) < dropout_prob) {
return static_cast<T>(0);
mask_data[idx] = static_cast<T>(0);
} else {
mask_data[idx] = static_cast<T>(1);
}
return static_cast<T>(1);
dst[idx] = mask_data[idx] * src[idx];
}
};
}
// It seems that Eigen::Tensor::setRandom in GPU will SEGFAULT.
// Use std::random and thrust::random(thrust is a std library in CUDA) to
......@@ -61,18 +60,19 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
if (!context.Attr<bool>("is_test")) {
auto* mask = context.Output<Tensor>("Mask");
auto* mask_data = mask->mutable_data<T>(context.GetPlace());
int size = framework::product(mask->dims());
size_t size = framework::product(mask->dims());
auto* x_data = x->data<T>();
auto* y_data = y->mutable_data<T>(context.GetPlace());
std::random_device rnd;
int seed =
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
thrust::transform(index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(mask_data),
MaskGenerator<T, AttrType>(dropout_prob, seed));
auto M = EigenMatrix<T>::Reshape(*mask, 1);
Y.device(place) = X * M;
int threads = 512;
int grid = (x->numel() + threads - 1) / threads;
RandomGenerator<T, AttrType><<<grid, threads, 0,
context.cuda_device_context().stream()>>>(
size, seed, dropout_prob, x_data, mask_data, y_data);
} else {
Y.device(place) = X * (1.0f - dropout_prob);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册