dropout_op.h 4.8 KB
Newer Older
X
Xinghai Sun 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */

#pragma once
16 17 18 19 20
#include <thrust/device_ptr.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/random.h>
#include <thrust/transform.h>
#include <random>
X
Xinghai Sun 已提交
21 22 23 24 25 26 27 28 29 30 31 32
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;

template <typename Place, typename T>
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
class CPUDropoutKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* x = context.Input<Tensor>("X");
    auto* y = context.Output<Tensor>("Out");
    auto* mask = context.Output<Tensor>("Mask");
    T* mask_data = mask->mutable_data<T>(context.GetPlace());
    T* y_data = y->mutable_data<T>(context.GetPlace());
    const T* x_data = x->data<T>();

    float dropout_prob = context.op_.GetAttr<float>("dropout_prob");
    int seed = context.op_.GetAttr<int>("seed");

    std::minstd_rand engine;
    engine.seed(seed);
    std::uniform_real_distribution<T> dist(0, 1);
    size_t size = framework::product(mask->dims());
    for (size_t i = 0; i < size; ++i) {
      if (dist(engine) < dropout_prob) {
        mask_data[i] = 0;
        y_data[i] = 0;
      } else {
        mask_data[i] = 1;
        y_data[i] = (1 - dropout_prob) * x_data[i];
      }
    }
  }
};

template <typename T>
struct MaskGenerator {
  float dropout_prob_;
  int seed_;

  __host__ __device__ MaskGenerator(float dropout_prob, int seed)
      : dropout_prob_(dropout_prob), seed_(seed) {}

  __host__ __device__ T operator()(const unsigned int n) const {
    thrust::minstd_rand rng;
    rng.seed(seed_);
    thrust::uniform_real_distribution<T> dist(0, 1);
    rng.discard(n);
    if (dist(rng) < dropout_prob_) {
      return static_cast<T>(0);
    } else {
      return static_cast<T>(1);
    }
  }
};

// It seems that Eigen::Tensor::setRandom in GPU will SEGFAULT.
// Use std::random and thrust::random(thrust is a std library in CUDA) to
// implement uniform random.
template <typename Place, typename T>
class GPUDropoutKernel : public framework::OpKernel {
X
Xinghai Sun 已提交
88 89 90 91 92 93 94
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* x = context.Input<Tensor>("X");
    auto* y = context.Output<Tensor>("Out");
    auto* mask = context.Output<Tensor>("Mask");
    y->mutable_data<T>(context.GetPlace());

95 96 97 98 99 100 101 102 103
    float dropout_prob = context.op_.GetAttr<float>("dropout_prob");
    int seed = context.op_.GetAttr<int>("seed");
    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
    int size = framework::product(mask->dims());
    T* mask_data = mask->mutable_data<T>(context.GetPlace());
    thrust::transform(index_sequence_begin, index_sequence_begin + size,
                      thrust::device_ptr<T>(mask_data),
                      MaskGenerator<T>(dropout_prob, seed));

X
Xinghai Sun 已提交
104
    auto dims = x->dims();
105 106 107 108
    auto new_dims = framework::make_ddim({dims[0], size / dims[0]});
    auto X = EigenMatrix<T>::From(*x, new_dims);
    auto Y = EigenMatrix<T>::From(*y, new_dims);
    auto M = EigenMatrix<T>::From(*mask, new_dims);
X
Xinghai Sun 已提交
109 110

    auto place = context.GetEigenDevice<Place>();
111
    Y.device(place) = X * M * (1 - dropout_prob);
X
Xinghai Sun 已提交
112 113 114 115 116 117 118 119 120 121 122 123 124
  }
};

template <typename Place, typename T>
class DropoutGradKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* grad_x = context.Output<Tensor>(framework::GradVarName("X"));
    auto* grad_y = context.Input<Tensor>(framework::GradVarName("Out"));
    auto* mask = context.Input<Tensor>("Mask");
    grad_x->mutable_data<T>(context.GetPlace());

    auto dims = grad_x->dims();
125 126 127 128 129
    int size = static_cast<int>(framework::product(dims));
    auto new_dims = framework::make_ddim({dims[0], size / dims[0]});
    auto M = EigenMatrix<T>::From(*mask, new_dims);
    auto dX = EigenMatrix<T>::From(*grad_x, new_dims);
    auto dY = EigenMatrix<T>::From(*grad_y, new_dims);
X
Xinghai Sun 已提交
130 131

    auto place = context.GetEigenDevice<Place>();
132 133
    float dropout_prob = context.op_.GetAttr<float>("dropout_prob");
    dX.device(place) = dY * M * (1 - dropout_prob);
X
Xinghai Sun 已提交
134 135 136 137 138
  }
};

}  // namespace operators
}  // namespace paddle