gaussian_random_op.cu 5.7 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
L
Luo Tao 已提交
2 3 4 5 6 7 8 9 10 11 12 13

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
14 15
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
Q
qijun 已提交
16 17
#include <thrust/random.h>
#include <thrust/transform.h>
Y
yaoxuefeng 已提交
18
#include "paddle/fluid/framework/generator.h"
Y
Yi Wang 已提交
19 20
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
21
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
22
#include "paddle/fluid/operators/distribution_helper.h"
23
#include "paddle/fluid/operators/fill_constant_op.h"
Q
qijun 已提交
24

25 26
DECLARE_bool(use_curand);

Q
qijun 已提交
27 28 29 30 31 32 33
namespace paddle {
namespace operators {

template <typename T>
struct GaussianGenerator {
  T mean_, std_;
  unsigned int seed_;
Y
yaoxuefeng 已提交
34
  unsigned int offset_ = 0;
Q
qijun 已提交
35 36 37 38

  __host__ __device__ GaussianGenerator(T mean, T std, int seed)
      : mean_(mean), std_(std), seed_(seed) {}

Y
yaoxuefeng 已提交
39 40 41
  __host__ __device__ GaussianGenerator(T mean, T std, int seed, int offset)
      : mean_(mean), std_(std), seed_(seed), offset_(offset) {}

Q
qijun 已提交
42 43 44
  __host__ __device__ T operator()(const unsigned int n) const {
    thrust::minstd_rand rng;
    rng.seed(seed_);
45 46
    using MT = typename details::MPTypeTrait<T>::Type;
    thrust::normal_distribution<MT> dist(mean_, std_);
Y
yaoxuefeng 已提交
47 48
    unsigned int new_n = n + offset_;
    rng.discard(new_n);
49 50
    MT out = dist(rng);
    return static_cast<T>(out);
Q
qijun 已提交
51 52 53 54
  }
};

template <typename T>
Y
Yu Yang 已提交
55
class GPUGaussianRandomKernel : public framework::OpKernel<T> {
Q
qijun 已提交
56 57 58
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* tensor = context.Output<framework::Tensor>("Out");
Y
Pass CI  
Yu Yang 已提交
59
    unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
Y
yaoxuefeng 已提交
60
    bool seed_flag = false;
Q
qijun 已提交
61 62 63
    if (seed == 0) {
      std::random_device rd;
      seed = rd();
Y
yaoxuefeng 已提交
64
      seed_flag = true;
Q
qijun 已提交
65
    }
Y
Yu Yang 已提交
66 67
    T mean = static_cast<T>(context.Attr<float>("mean"));
    T std = static_cast<T>(context.Attr<float>("std"));
Y
Yang 已提交
68
    thrust::counting_iterator<int64_t> index_sequence_begin(0);
69
    auto shape = GetShape(context);
70
    tensor->Resize(shape);
71 72 73 74

    auto& dev_cxt =
        context.template device_context<platform::CUDADeviceContext>();
    T* data = tensor->mutable_data<T>(dev_cxt.GetPlace());
75

76
    int64_t size = tensor->numel();
Y
yaoxuefeng 已提交
77

78
    int device_id = context.GetPlace().GetDeviceId();
Y
yaoxuefeng 已提交
79 80 81
    auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);

    if (gen_cuda->GetIsInitPy() && seed_flag) {
82 83 84 85 86 87 88 89 90 91 92 93 94 95
      if (FLAGS_use_curand) {
        using MT = typename details::MPTypeTrait<T>::Type;
        distribution::normal_distribution<MT> dist;
        distribution::normal_transform<MT> trans(mean, std);
        distribution::distribution_and_transform<T>(dev_cxt, tensor, dist,
                                                    trans);
      } else {
        auto seed_offset = gen_cuda->IncrementOffset(1);
        int64_t gen_offset = size * seed_offset.second;
        thrust::transform(
            index_sequence_begin, index_sequence_begin + size,
            thrust::device_ptr<T>(data),
            GaussianGenerator<T>(mean, std, seed_offset.first, gen_offset));
      }
Y
yaoxuefeng 已提交
96 97 98 99 100
    } else {
      thrust::transform(index_sequence_begin, index_sequence_begin + size,
                        thrust::device_ptr<T>(data),
                        GaussianGenerator<T>(mean, std, seed));
    }
Q
qijun 已提交
101 102 103
  }
};

104 105 106 107 108 109 110
template <typename T>
class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* tensor = context.Output<framework::Tensor>("Out");
    T* data = tensor->mutable_data<T>(context.GetPlace());
    unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
Y
yaoxuefeng 已提交
111
    bool seed_flag = false;
112 113 114
    if (seed == 0) {
      std::random_device rd;
      seed = rd();
Y
yaoxuefeng 已提交
115
      seed_flag = true;
116 117 118
    }
    T mean = static_cast<T>(context.Attr<float>("mean"));
    T std = static_cast<T>(context.Attr<float>("std"));
Y
Yang 已提交
119
    thrust::counting_iterator<int64_t> index_sequence_begin(0);
120
    int64_t size = tensor->numel();
Y
yaoxuefeng 已提交
121

122
    int device_id = context.GetPlace().GetDeviceId();
Y
yaoxuefeng 已提交
123 124 125 126
    auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);

    if (gen_cuda->GetIsInitPy() && seed_flag) {
      auto seed_offset = gen_cuda->IncrementOffset(1);
Y
Yang 已提交
127
      int64_t gen_offset = size * seed_offset.second;
Y
yaoxuefeng 已提交
128 129 130 131 132 133 134 135 136
      thrust::transform(index_sequence_begin, index_sequence_begin + size,
                        thrust::device_ptr<T>(data),
                        GaussianGenerator<T>(mean, std, seed_offset.first,
                                             seed_offset.second));
    } else {
      thrust::transform(index_sequence_begin, index_sequence_begin + size,
                        thrust::device_ptr<T>(data),
                        GaussianGenerator<T>(mean, std, seed));
    }
137 138
  }
};
Q
qijun 已提交
139 140
}  // namespace operators
}  // namespace paddle
D
dongzhihong 已提交
141

142 143 144 145 146
REGISTER_OP_CUDA_KERNEL(
    gaussian_random,
    paddle::operators::GPUGaussianRandomKernel<paddle::platform::float16>,
    paddle::operators::GPUGaussianRandomKernel<float>,
    paddle::operators::GPUGaussianRandomKernel<double>);
147 148
REGISTER_OP_CUDA_KERNEL(
    gaussian_random_batch_size_like,
149 150
    paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<
        paddle::platform::float16>,
151 152
    paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<float>,
    paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<double>);