dropout_op.h 4.8 KB
Newer Older
X
Xinghai Sun 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */

#pragma once
16 17 18 19 20
#include <thrust/device_ptr.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/random.h>
#include <thrust/transform.h>
#include <random>
X
Xinghai Sun 已提交
21 22 23 24 25 26 27 28 29 30 31 32
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;

template <typename Place, typename T>
33 34 35 36 37 38 39 40 41 42
class CPUDropoutKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* x = context.Input<Tensor>("X");
    auto* y = context.Output<Tensor>("Out");
    auto* mask = context.Output<Tensor>("Mask");
    T* mask_data = mask->mutable_data<T>(context.GetPlace());
    T* y_data = y->mutable_data<T>(context.GetPlace());
    const T* x_data = x->data<T>();

43 44
    float dropout_prob = context.GetAttr<float>("dropout_prob");
    int seed = context.GetAttr<int>("seed");
45 46 47 48 49 50 51 52 53 54 55

    std::minstd_rand engine;
    engine.seed(seed);
    std::uniform_real_distribution<T> dist(0, 1);
    size_t size = framework::product(mask->dims());
    for (size_t i = 0; i < size; ++i) {
      if (dist(engine) < dropout_prob) {
        mask_data[i] = 0;
        y_data[i] = 0;
      } else {
        mask_data[i] = 1;
56
        y_data[i] = x_data[i];
57 58
      }
    }
59
    // TODO: add test time logits.
60 61 62 63 64
  }
};

template <typename T>
struct MaskGenerator {
65 66
  float dropout_prob;
  int seed;
67 68

  __host__ __device__ MaskGenerator(float dropout_prob, int seed)
69
      : dropout_prob(dropout_prob), seed(seed) {}
70 71 72

  __host__ __device__ T operator()(const unsigned int n) const {
    thrust::minstd_rand rng;
73
    rng.seed(seed);
74 75
    thrust::uniform_real_distribution<T> dist(0, 1);
    rng.discard(n);
76
    if (dist(rng) < dropout_prob) {
77 78 79 80 81 82 83 84 85 86 87 88
      return static_cast<T>(0);
    } else {
      return static_cast<T>(1);
    }
  }
};

// It seems that Eigen::Tensor::setRandom in GPU will SEGFAULT.
// Use std::random and thrust::random(thrust is a std library in CUDA) to
// implement uniform random.
template <typename Place, typename T>
class GPUDropoutKernel : public framework::OpKernel {
X
Xinghai Sun 已提交
89 90 91 92 93 94 95
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* x = context.Input<Tensor>("X");
    auto* y = context.Output<Tensor>("Out");
    auto* mask = context.Output<Tensor>("Mask");
    y->mutable_data<T>(context.GetPlace());

96 97
    float dropout_prob = context.GetAttr<float>("dropout_prob");
    int seed = context.GetAttr<int>("seed");
98 99 100 101 102 103 104
    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
    int size = framework::product(mask->dims());
    T* mask_data = mask->mutable_data<T>(context.GetPlace());
    thrust::transform(index_sequence_begin, index_sequence_begin + size,
                      thrust::device_ptr<T>(mask_data),
                      MaskGenerator<T>(dropout_prob, seed));

X
Xinghai Sun 已提交
105
    auto dims = x->dims();
106 107 108 109
    auto new_dims = framework::make_ddim({dims[0], size / dims[0]});
    auto X = EigenMatrix<T>::From(*x, new_dims);
    auto Y = EigenMatrix<T>::From(*y, new_dims);
    auto M = EigenMatrix<T>::From(*mask, new_dims);
X
Xinghai Sun 已提交
110 111

    auto place = context.GetEigenDevice<Place>();
112 113
    Y.device(place) = X * M;
    // TODO: add test time logits.
X
Xinghai Sun 已提交
114 115 116 117 118 119 120 121 122 123 124 125 126
  }
};

template <typename Place, typename T>
class DropoutGradKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* grad_x = context.Output<Tensor>(framework::GradVarName("X"));
    auto* grad_y = context.Input<Tensor>(framework::GradVarName("Out"));
    auto* mask = context.Input<Tensor>("Mask");
    grad_x->mutable_data<T>(context.GetPlace());

    auto dims = grad_x->dims();
127 128 129 130 131
    int size = static_cast<int>(framework::product(dims));
    auto new_dims = framework::make_ddim({dims[0], size / dims[0]});
    auto M = EigenMatrix<T>::From(*mask, new_dims);
    auto dX = EigenMatrix<T>::From(*grad_x, new_dims);
    auto dY = EigenMatrix<T>::From(*grad_y, new_dims);
X
Xinghai Sun 已提交
132 133

    auto place = context.GetEigenDevice<Place>();
134 135
    dX.device(place) = dY * M;
    // TODO: add test time logits.
X
Xinghai Sun 已提交
136 137 138 139 140
  }
};

}  // namespace operators
}  // namespace paddle