提交 a6e6bc45 编写于 作者: P phlrain

modify dropout att; test=develop

上级 049c9c7d
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/dropout_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -57,15 +58,29 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -57,15 +58,29 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
"will be dropped.") "will be dropped.")
.SetDefault(false); .SetDefault(false);
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0); AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
AddAttr<bool>("dropout_implementation", AddAttr<std::string>(
"When it's True, In the training, after set some value" "dropout_implementation",
"to 0 (probability is dropout_prob)," "[\"downgrade_in_infer\"|\"upscale_in_train\"]"
"all the value will divide (1-dropout_prob)" "There are two kinds of ways to implement dropout"
"By using this way, will do nothing in the inference program" "(the mask below is a tensor have the same shape with input"
"The dropout op can be removed in the inference program." "the value of mask is 0 or 1, the ratio of 0 is dropout_prob)"
"The inference program will be more efficient" "1. downgrade_in_infer(default), downgrade the outcome at inference "
"When it's False, same as original") "time"
.SetDefault(false); " train: out = input * mask"
" inference: out = input * dropout_prob"
"2. upscale_in_train, upscale the outcome at training time, do nothing "
"in inference"
" train: out = input * mask / ( 1.0 - dropout_prob )"
" inference: out = input"
" dropout op can be removed from the program. the program will be "
"efficient")
.SetDefault("downgrade_in_infer")
.AddCustomChecker([](const std::string& type) {
PADDLE_ENFORCE(
type == "downgrade_in_infer" || type == "upscale_in_train",
"dropout_implementation can only be downgrade_in_infer or "
"upscale_in_train");
});
AddComment(R"DOC( AddComment(R"DOC(
Dropout Operator. Dropout Operator.
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <thrust/iterator/counting_iterator.h> #include <thrust/iterator/counting_iterator.h>
#include <thrust/random.h> #include <thrust/random.h>
#include <thrust/transform.h> #include <thrust/transform.h>
#include <string>
#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
...@@ -27,7 +28,7 @@ template <typename T> ...@@ -27,7 +28,7 @@ template <typename T>
__global__ void RandomGenerator(const size_t n, const int seed, __global__ void RandomGenerator(const size_t n, const int seed,
const float dropout_prob, const T* src, const float dropout_prob, const T* src,
T* mask_data, T* dst, T* mask_data, T* dst,
bool dropout_implementation) { bool is_upscale_in_train) {
thrust::minstd_rand rng; thrust::minstd_rand rng;
rng.seed(seed); rng.seed(seed);
thrust::uniform_real_distribution<float> dist(0, 1); thrust::uniform_real_distribution<float> dist(0, 1);
...@@ -48,7 +49,7 @@ __global__ void RandomGenerator(const size_t n, const int seed, ...@@ -48,7 +49,7 @@ __global__ void RandomGenerator(const size_t n, const int seed,
if (dist(rng) < dropout_prob) { if (dist(rng) < dropout_prob) {
mask = static_cast<T>(0); mask = static_cast<T>(0);
} else { } else {
if (dropout_implementation) { if (is_upscale_in_train) {
mask = static_cast<T>(1.0f / (1.0f - dropout_prob)); mask = static_cast<T>(1.0f / (1.0f - dropout_prob));
} else { } else {
mask = static_cast<T>(1); mask = static_cast<T>(1);
...@@ -72,7 +73,8 @@ class GPUDropoutKernel : public framework::OpKernel<T> { ...@@ -72,7 +73,8 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
y->mutable_data<T>(context.GetPlace()); y->mutable_data<T>(context.GetPlace());
float dropout_prob = context.Attr<float>("dropout_prob"); float dropout_prob = context.Attr<float>("dropout_prob");
auto dropout_implementation = context.Attr<bool>("dropout_implementation"); auto dropout_implementation =
context.Attr<std::string>("dropout_implementation");
auto& place = *context.template device_context<Place>().eigen_device(); auto& place = *context.template device_context<Place>().eigen_device();
if (!context.Attr<bool>("is_test")) { if (!context.Attr<bool>("is_test")) {
auto* mask = context.Output<Tensor>("Mask"); auto* mask = context.Output<Tensor>("Mask");
...@@ -90,11 +92,11 @@ class GPUDropoutKernel : public framework::OpKernel<T> { ...@@ -90,11 +92,11 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
RandomGenerator< RandomGenerator<
T><<<grid, threads, 0, context.cuda_device_context().stream()>>>( T><<<grid, threads, 0, context.cuda_device_context().stream()>>>(
size, seed, dropout_prob, x_data, mask_data, y_data, size, seed, dropout_prob, x_data, mask_data, y_data,
dropout_implementation); (dropout_implementation == "upscale_in_train"));
} else { } else {
auto X = EigenMatrix<T>::Reshape(*x, 1); auto X = EigenMatrix<T>::Reshape(*x, 1);
auto Y = EigenMatrix<T>::Reshape(*y, 1); auto Y = EigenMatrix<T>::Reshape(*y, 1);
if (dropout_implementation) { if (dropout_implementation == "upscale_in_train") {
Y.device(place) = X; Y.device(place) = X;
} else { } else {
Y.device(place) = X * static_cast<T>(1.0f - dropout_prob); Y.device(place) = X * static_cast<T>(1.0f - dropout_prob);
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <random> #include <random>
#include <string>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
...@@ -36,7 +37,8 @@ class CPUDropoutKernel : public framework::OpKernel<T> { ...@@ -36,7 +37,8 @@ class CPUDropoutKernel : public framework::OpKernel<T> {
auto* y_data = y->mutable_data<T>(context.GetPlace()); auto* y_data = y->mutable_data<T>(context.GetPlace());
float dropout_prob = context.Attr<float>("dropout_prob"); float dropout_prob = context.Attr<float>("dropout_prob");
auto dropout_implementation = context.Attr<bool>("dropout_implementation"); auto dropout_implementation =
context.Attr<std::string>("dropout_implementation");
if (!context.Attr<bool>("is_test")) { if (!context.Attr<bool>("is_test")) {
auto* mask = context.Output<Tensor>("Mask"); auto* mask = context.Output<Tensor>("Mask");
auto* mask_data = mask->mutable_data<T>(context.GetPlace()); auto* mask_data = mask->mutable_data<T>(context.GetPlace());
...@@ -57,7 +59,7 @@ class CPUDropoutKernel : public framework::OpKernel<T> { ...@@ -57,7 +59,7 @@ class CPUDropoutKernel : public framework::OpKernel<T> {
mask_data[i] = 0; mask_data[i] = 0;
y_data[i] = 0; y_data[i] = 0;
} else { } else {
if (dropout_implementation) { if (dropout_implementation == "upscale_in_train") {
mask_data[i] = 1.0f / static_cast<T>(1.0f - dropout_prob); mask_data[i] = 1.0f / static_cast<T>(1.0f - dropout_prob);
y_data[i] = x_data[i] / static_cast<T>(1.0f - dropout_prob); y_data[i] = x_data[i] / static_cast<T>(1.0f - dropout_prob);
} else { } else {
...@@ -71,7 +73,7 @@ class CPUDropoutKernel : public framework::OpKernel<T> { ...@@ -71,7 +73,7 @@ class CPUDropoutKernel : public framework::OpKernel<T> {
auto Y = EigenMatrix<T>::Reshape(*y, 1); auto Y = EigenMatrix<T>::Reshape(*y, 1);
auto& place = auto& place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
if (dropout_implementation) { if (dropout_implementation == "upscale_in_train") {
Y.device(place) = X; Y.device(place) = X;
} else { } else {
Y.device(place) = X * static_cast<T>(1.0f - dropout_prob); Y.device(place) = X * static_cast<T>(1.0f - dropout_prob);
......
...@@ -985,7 +985,7 @@ def dropout(x, ...@@ -985,7 +985,7 @@ def dropout(x,
is_test=False, is_test=False,
seed=None, seed=None,
name=None, name=None,
dropout_implementation=False): dropout_implementation="downgrade_in_infer"):
""" """
Computes dropout. Computes dropout.
...@@ -1005,13 +1005,20 @@ def dropout(x, ...@@ -1005,13 +1005,20 @@ def dropout(x,
units will be dropped. DO NOT use a fixed seed in training. units will be dropped. DO NOT use a fixed seed in training.
name (str|None): A name for this layer(optional). If set None, the layer name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. will be named automatically.
dropout_implementation(bool): A Flag indicating whether divide (1-dropout_prob). dropout_implementation(string): ['downgrade_in_infer'(defauld)|'upscale_in_train']
When it's True, all the units will divide (1-dropout_prob) 1. downgrade_in_infer(default), downgrade the outcome at inference
after set some units to zero in the train program. train: out = input * mask
And do nothing in the inference program. inference: out = input * dropout_prob
The dropout op can be removed in the inference program. (make is a tensor same shape with input, value is 0 or 1
The inference program will be more efficient ratio of 0 is dropout_prob)
When it's False, same as original 2. upscale_in_train, upscale the outcome at training time
train: out = input * mask / ( 1.0 - dropout_prob )
inference: out = input
(make is a tensor same shape with input, value is 0 or 1
ratio of 0 is dropout_prob)
dropout op can be removed from the program.
the program will be efficient
Returns: Returns:
......
...@@ -93,7 +93,7 @@ class TestDropoutOp6(TestDropoutOp): ...@@ -93,7 +93,7 @@ class TestDropoutOp6(TestDropoutOp):
'dropout_prob': 1.0, 'dropout_prob': 1.0,
'fix_seed': True, 'fix_seed': True,
'is_test': False, 'is_test': False,
'div_prob_in_train': True 'dropout_implementation': 'upscale_in_train'
} }
self.outputs = { self.outputs = {
'Out': np.zeros((32, 64)).astype('float32'), 'Out': np.zeros((32, 64)).astype('float32'),
...@@ -109,7 +109,7 @@ class TestDropoutOp7(TestDropoutOp): ...@@ -109,7 +109,7 @@ class TestDropoutOp7(TestDropoutOp):
'dropout_prob': 0.0, 'dropout_prob': 0.0,
'fix_seed': True, 'fix_seed': True,
'is_test': False, 'is_test': False,
'div_prob_in_train': True 'dropout_implementation': 'upscale_in_train'
} }
self.outputs = { self.outputs = {
'Out': self.inputs['X'], 'Out': self.inputs['X'],
...@@ -125,7 +125,7 @@ class TestDropoutOp8(OpTest): ...@@ -125,7 +125,7 @@ class TestDropoutOp8(OpTest):
'dropout_prob': 0.35, 'dropout_prob': 0.35,
'fix_seed': True, 'fix_seed': True,
'is_test': True, 'is_test': True,
'div_prob_in_train': True 'dropout_implementation': 'upscale_in_train'
} }
self.outputs = {'Out': self.inputs['X']} self.outputs = {'Out': self.inputs['X']}
...@@ -140,7 +140,7 @@ class TestDropoutOp9(OpTest): ...@@ -140,7 +140,7 @@ class TestDropoutOp9(OpTest):
self.attrs = { self.attrs = {
'dropout_prob': 0.75, 'dropout_prob': 0.75,
'is_test': True, 'is_test': True,
'div_prob_in_train': True 'dropout_implementation': 'upscale_in_train'
} }
self.outputs = {'Out': self.inputs['X']} self.outputs = {'Out': self.inputs['X']}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册