未验证 提交 e2d849b9 编写于 作者: M mapingshuo 提交者: GitHub

Dropout with seed (#21590)

* add seed op
上级 e81f0228
...@@ -35,12 +35,23 @@ class DropoutOp : public framework::OperatorWithKernel { ...@@ -35,12 +35,23 @@ class DropoutOp : public framework::OperatorWithKernel {
} }
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
}
}; };
class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", "The input of dropout op."); AddInput("X", "The input of dropout op.");
AddInput("Seed",
"The seed of dropout op, it has higher priority than the attr "
"fix_seed and seed")
.AsDispensable();
AddOutput("Out", "The output of dropout op."); AddOutput("Out", "The output of dropout op.");
AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate(); AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate();
......
...@@ -67,6 +67,8 @@ class GPUDropoutKernel : public framework::OpKernel<T> { ...@@ -67,6 +67,8 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<Tensor>("X"); auto* x = context.Input<Tensor>("X");
auto* seed =
context.HasInput("Seed") ? context.Input<Tensor>("Seed") : nullptr;
auto* y = context.Output<Tensor>("Out"); auto* y = context.Output<Tensor>("Out");
y->mutable_data<T>(context.GetPlace()); y->mutable_data<T>(context.GetPlace());
float dropout_prob = context.Attr<float>("dropout_prob"); float dropout_prob = context.Attr<float>("dropout_prob");
...@@ -84,6 +86,20 @@ class GPUDropoutKernel : public framework::OpKernel<T> { ...@@ -84,6 +86,20 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
auto* mask_data = mask->mutable_data<uint8_t>(context.GetPlace()); auto* mask_data = mask->mutable_data<uint8_t>(context.GetPlace());
size_t size = framework::product(mask->dims()); size_t size = framework::product(mask->dims());
auto* x_data = x->data<T>(); auto* x_data = x->data<T>();
int seed_data;
std::random_device rnd;
if (seed) {
if (platform::is_gpu_place(seed->place())) {
framework::Tensor temp;
TensorCopySync(*seed, platform::CPUPlace(), &temp);
seed_data = *(temp.data<int>());
} else {
seed_data = *(seed->data<int>());
}
} else {
seed_data =
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
}
auto* y_data = y->mutable_data<T>(context.GetPlace()); auto* y_data = y->mutable_data<T>(context.GetPlace());
if (dropout_prob == 1.0f) { if (dropout_prob == 1.0f) {
PADDLE_ENFORCE_CUDA_SUCCESS( PADDLE_ENFORCE_CUDA_SUCCESS(
...@@ -93,14 +109,10 @@ class GPUDropoutKernel : public framework::OpKernel<T> { ...@@ -93,14 +109,10 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
return; return;
} }
std::random_device rnd;
int seed =
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
int threads = 512; int threads = 512;
int grid = (x_numel + threads - 1) / threads; int grid = (x_numel + threads - 1) / threads;
RandomGenerator<T, uint8_t><<<grid, threads, 0, stream>>>( RandomGenerator<T, uint8_t><<<grid, threads, 0, stream>>>(
size, seed, dropout_prob, x_data, mask_data, y_data, size, seed_data, dropout_prob, x_data, mask_data, y_data,
upscale_in_train); upscale_in_train);
} else { } else {
auto X = EigenMatrix<T>::Reshape(*x, 1); auto X = EigenMatrix<T>::Reshape(*x, 1);
......
...@@ -33,6 +33,8 @@ class CPUDropoutKernel : public framework::OpKernel<T> { ...@@ -33,6 +33,8 @@ class CPUDropoutKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<Tensor>("X"); auto* x = context.Input<Tensor>("X");
auto* seed =
context.HasInput("Seed") ? context.Input<Tensor>("Seed") : nullptr;
auto* y = context.Output<Tensor>("Out"); auto* y = context.Output<Tensor>("Out");
const auto* x_data = x->data<T>(); const auto* x_data = x->data<T>();
auto* y_data = y->mutable_data<T>(context.GetPlace()); auto* y_data = y->mutable_data<T>(context.GetPlace());
...@@ -57,9 +59,14 @@ class CPUDropoutKernel : public framework::OpKernel<T> { ...@@ -57,9 +59,14 @@ class CPUDropoutKernel : public framework::OpKernel<T> {
// Guarantee to use random seed in training. // Guarantee to use random seed in training.
std::random_device rnd; std::random_device rnd;
std::minstd_rand engine; std::minstd_rand engine;
int seed = int seed_data;
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd(); if (seed) {
engine.seed(seed); seed_data = *(seed->data<int>());
} else {
seed_data =
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
}
engine.seed(seed_data);
std::uniform_real_distribution<float> dist(0, 1); std::uniform_real_distribution<float> dist(0, 1);
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/seed_op.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
class SeedOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
ctx->SetOutputDim("Out", {1});
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(framework::proto::VarType::INT32,
platform::CPUPlace());
}
};
class SeedOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddOutput("Out", "The output of seed op.");
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
AddComment(R"DOC(
Seed Operator.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
seed, ops::SeedOp, ops::SeedOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
seed, ops::CPUSeedKernel<paddle::platform::CPUDeviceContext, int>);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
class CPUSeedKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* out = context.Output<Tensor>("Out");
auto* out_data = out->mutable_data<T>(context.GetPlace());
int user_seed = context.Attr<int>("seed");
// NOTE: fixed seed should only be used in unittest or for debug.
// Guarantee to use random seed in training.
std::random_device rnd;
int seed;
if (user_seed != 0) {
seed = user_seed;
} else {
seed = rnd();
}
out_data[0] = seed;
}
};
} // namespace operators
} // namespace paddle
...@@ -56,7 +56,7 @@ class ProgramStats(object): ...@@ -56,7 +56,7 @@ class ProgramStats(object):
def get_reserved_vars(self): def get_reserved_vars(self):
var_name = [] var_name = []
for op in self.ops: for op in self.ops:
if op.desc.type() == "dropout": if op.desc.type() == "seed":
var_name.extend(op.desc.output_arg_names()) var_name.extend(op.desc.output_arg_names())
return var_name return var_name
...@@ -136,6 +136,42 @@ class ProgramStats(object): ...@@ -136,6 +136,42 @@ class ProgramStats(object):
sorted_checkpoints = sorted(sorted_checkpoints, key=lambda x: x[1]) sorted_checkpoints = sorted(sorted_checkpoints, key=lambda x: x[1])
return [x[0] for x in sorted_checkpoints] return [x[0] for x in sorted_checkpoints]
def modify_forward_desc_for_recompute(self):
op_types = [op.desc.type() for op in self.ops]
if "dropout" not in op_types:
return
op_idx = 0
while (op_idx < len(self.ops)):
op = self.ops[op_idx]
if op.desc.type() != "dropout":
op_idx += 1
continue
# add a seed op so that the two dropout op can generate same output
op_unique_name = unique_name.generate("seed")
var_unique_name = unique_name.generate_with_ignorable_key(".".join(
[op_unique_name, 'tmp']))
added_var = self.block.create_var(
name=var_unique_name,
dtype='int32',
type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False,
stop_gradient=False)
seed = 0 if op.attr("fix_seed") is False else int(op.attr("seed"))
added_op = self.block._insert_op(
index=op.idx,
type='seed',
inputs={},
outputs={'Out': [added_var]},
attrs={'seed': seed})
self.ops.insert(op_idx, added_op)
# modify dropout op desc so that it accept a seed var as input
op.desc.set_input("Seed", [var_unique_name])
op.desc.remove_attr("fix_seed")
op.desc.remove_attr("seed")
self.block._sync_with_cpp()
op_idx += 2
def _pretty_op_desc_(op_desc, prefix): def _pretty_op_desc_(op_desc, prefix):
out_s = "%s\tname:[%s]\n%s \tinputs:[%s]\n%s \toutputs:[%s]" % \ out_s = "%s\tname:[%s]\n%s \tinputs:[%s]\n%s \toutputs:[%s]" % \
...@@ -589,6 +625,7 @@ def _append_backward_ops_with_checkpoints_( ...@@ -589,6 +625,7 @@ def _append_backward_ops_with_checkpoints_(
checkpoints: variables that a user defined as checkpoint for forward recomputation checkpoints: variables that a user defined as checkpoint for forward recomputation
Algorithms: Algorithms:
0) deal with forward recomputing program descs
1) find ops between checkpoints, i.e. recompute_segments 1) find ops between checkpoints, i.e. recompute_segments
2) go through all forward ops and induct all variables that will be hold in memory 2) go through all forward ops and induct all variables that will be hold in memory
a. variables that are used across segments will be held in memory a. variables that are used across segments will be held in memory
...@@ -609,10 +646,12 @@ def _append_backward_ops_with_checkpoints_( ...@@ -609,10 +646,12 @@ def _append_backward_ops_with_checkpoints_(
checkpoints_name = list(set(checkpoints_name)) checkpoints_name = list(set(checkpoints_name))
local_block = block.program._create_block() local_block = block.program._create_block()
buffer_block = block.program._create_block() buffer_block = block.program._create_block()
# 0) deal with forward recomputing program descs
# 1) find ops between checkpoints, i.e. recompute_segments
program_stat = ProgramStats(block, ops) program_stat = ProgramStats(block, ops)
program_stat.modify_forward_desc_for_recompute()
program_stat.build_stats() program_stat.build_stats()
# 1) find ops between checkpoints, i.e. recompute_segments
checkpoints_name = program_stat.sort_checkpoints(checkpoints_name) checkpoints_name = program_stat.sort_checkpoints(checkpoints_name)
segments = [] segments = []
......
...@@ -150,6 +150,27 @@ class TestDropoutOp9(OpTest): ...@@ -150,6 +150,27 @@ class TestDropoutOp9(OpTest):
self.check_output() self.check_output()
class TestDropoutOpWithSeed(OpTest):
def setUp(self):
self.op_type = "dropout"
self.inputs = {
"X": np.random.random((32, 64)).astype("float32"),
"Seed": np.asarray(
[125], dtype="int32")
}
self.attrs = {'dropout_prob': 0.0, }
self.outputs = {
'Out': self.inputs['X'],
'Mask': np.ones((32, 64)).astype('uint8')
}
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X'], 'Out', max_relative_error=0.05)
class TestFP16DropoutOp(OpTest): class TestFP16DropoutOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "dropout" self.op_type = "dropout"
......
...@@ -614,7 +614,7 @@ class TestLookaheadOptimizer(unittest.TestCase): ...@@ -614,7 +614,7 @@ class TestLookaheadOptimizer(unittest.TestCase):
class TestRecomputeOptimizer(unittest.TestCase): class TestRecomputeOptimizer(unittest.TestCase):
def net(self, return_input=False): def net(self, return_input=False, with_dropout=False):
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
mul_x = block.create_parameter( mul_x = block.create_parameter(
...@@ -623,6 +623,14 @@ class TestRecomputeOptimizer(unittest.TestCase): ...@@ -623,6 +623,14 @@ class TestRecomputeOptimizer(unittest.TestCase):
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var( mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
if with_dropout == True:
mul_out_drop = block.create_var(
dtype="float32",
shape=[5, 8],
lod_level=0,
name="mul.out.dropout")
mul_out_mask = block.create_var(
dtype="uint8", shape=[5, 8], lod_level=0, name="mul.out.mask")
b1 = block.create_parameter( b1 = block.create_parameter(
dtype="float32", shape=[5, 8], lod_level=0, name="b1") dtype="float32", shape=[5, 8], lod_level=0, name="b1")
b1_out = block.create_var( b1_out = block.create_var(
...@@ -639,11 +647,24 @@ class TestRecomputeOptimizer(unittest.TestCase): ...@@ -639,11 +647,24 @@ class TestRecomputeOptimizer(unittest.TestCase):
"Y": mul_y}, "Y": mul_y},
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
block.append_op( if with_dropout == True:
type="elementwise_add", block.append_op(
inputs={"X": mul_out, type='dropout',
"Y": b1}, inputs={'X': [mul_out]},
outputs={"Out": b1_out}) outputs={'Out': [mul_out_drop],
'Mask': [mul_out_mask]},
attrs={'dropout_prob': 0.5, })
block.append_op(
type="elementwise_add",
inputs={"X": mul_out_drop,
"Y": b1},
outputs={"Out": b1_out})
else:
block.append_op(
type="elementwise_add",
inputs={"X": mul_out,
"Y": b1},
outputs={"Out": b1_out})
block.append_op( block.append_op(
type="elementwise_add", type="elementwise_add",
inputs={"X": b1_out, inputs={"X": b1_out,
...@@ -799,6 +820,29 @@ class TestRecomputeOptimizer(unittest.TestCase): ...@@ -799,6 +820,29 @@ class TestRecomputeOptimizer(unittest.TestCase):
"load function is not supported by Recompute Optimizer for now", "load function is not supported by Recompute Optimizer for now",
cpt.get_exception_message(e)) cpt.get_exception_message(e))
def test_dropout(self):
"""
If there are dropout layers in the forward nets, we should add a
seed op
"""
mul_out, b1_out, b2_out, mean_out = self.net(with_dropout=True)
self.assertEqual(len(mean_out.block.ops), 5)
self.assertEqual(
[op.type for op in mean_out.block.ops],
["mul", "dropout", "elementwise_add", "elementwise_add", "mean"])
sgd_optimizer = optimizer.SGD(learning_rate=1.0)
recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer)
recompute_optimizer._set_checkpoints([b1_out])
opts, params_grads = recompute_optimizer.minimize(mean_out)
self.assertEqual(len(mean_out.block.ops), 17)
self.assertEqual([op.type for op in mean_out.block.ops], [
"mul", "seed", "dropout", "elementwise_add", "elementwise_add",
"mean", "fill_constant", "mean_grad", "elementwise_add_grad", "mul",
"dropout", "elementwise_add_grad", "dropout_grad", "mul_grad",
"sgd", "sgd", "sgd"
])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid as fluid
class TestSeedOpFixSeed(OpTest):
def setUp(self):
self.op_type = "seed"
self.inputs = {}
self.attrs = {"seed": 123}
self.outputs = {"Out": np.asarray((123)).astype('int32')}
def test_check_output(self):
self.check_output()
class TestSeedOpDiffSeed(OpTest):
def setUp(self):
self.op_type = "seed"
self.inputs = {}
self.attrs = {"seed": 0}
self.outputs = {"Out": np.asarray((123)).astype('int32')}
def test_check_output(self):
self.check_output(no_check_set=["Out"])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册