未验证 提交 9a9c690e 编写于 作者: X Xin Pan 提交者: GitHub

Merge pull request #15343 from panyx0718/imperative3

add a GAN model in imperative mode
......@@ -57,15 +57,15 @@ class Autograd {
Autograd() {}
void RunBackward(VarBase* var) {
if (var->stop_gradient_) {
if (var->IsStopGradient()) {
return;
}
VLOG(3) << "start autograd";
std::deque<OpBase*> ready;
ready.push_back(var->pre_op_);
ready.push_back(var->PreOp());
std::map<OpBase*, int> dep_counts = ComputeDepCounts(var->pre_op_);
std::map<OpBase*, int> dep_counts = ComputeDepCounts(var->PreOp());
while (!ready.empty()) {
OpBase* ready_op = ready.front();
......@@ -77,7 +77,7 @@ class Autograd {
const std::vector<VarBase*>& ingrads = it.second;
for (size_t i = 0; i < ingrads.size(); ++i) {
if (!ingrads[i]) continue;
if (ready_op->input_vars_[it.first][i]->stop_gradient_) {
if (ready_op->input_vars_[it.first][i]->IsStopGradient()) {
continue;
}
OpBase* pre_op = ready_op->pre_ops_[it.first][i];
......
......@@ -100,22 +100,20 @@ class VarBase {
// Owns `var` and `grad`
VarBase(framework::Variable* var, VarBase* grad)
: pre_op_(nullptr),
pre_op_out_name_(),
pre_op_out_idx_(-1),
var_desc_(nullptr),
: var_desc_(nullptr),
var_(var),
grads_(grad),
stop_gradient_(false) {}
stop_gradient_(false),
pre_op_(nullptr),
pre_op_out_idx_(-1) {}
explicit VarBase(bool stop_gradient)
: pre_op_(nullptr),
pre_op_out_name_(),
pre_op_out_idx_(-1),
var_desc_(nullptr),
: var_desc_(nullptr),
var_(new framework::Variable()),
grads_(stop_gradient ? nullptr : new VarBase(true)),
stop_gradient_(stop_gradient) {}
stop_gradient_(stop_gradient),
pre_op_(nullptr),
pre_op_out_idx_(-1) {}
virtual ~VarBase() {
if (var_) {
......@@ -127,8 +125,27 @@ class VarBase {
}
}
OpBase* PreOp() const { return pre_op_; }
int PreOpOutIdx() const { return pre_op_out_idx_; }
void SetStopGradient(bool stop_gradient) { stop_gradient_ = stop_gradient; }
bool IsStopGradient() const { return stop_gradient_; }
void RunBackward();
void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name,
int pre_op_out_idx, bool stop_gradient) {
pre_op_ = pre_op;
pre_op_out_name_ = pre_op_out_name;
pre_op_out_idx_ = pre_op_out_idx;
stop_gradient_ = stop_gradient;
}
void ClearGradient() {
delete grads_;
grads_ = new VarBase(true);
}
framework::LoDTensor& GradValue();
inline std::string GradName() const {
......@@ -138,16 +155,16 @@ class VarBase {
return string::Sprintf("%s@IGrad", var_desc_->Name());
}
OpBase* pre_op_;
std::string pre_op_out_name_;
int pre_op_out_idx_;
framework::VarDesc* var_desc_;
framework::Variable* var_;
VarBase* grads_;
private:
bool stop_gradient_;
OpBase* pre_op_;
std::string pre_op_out_name_;
int pre_op_out_idx_;
};
/* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its
......
......@@ -63,9 +63,9 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
invars.push_back(inp->var_);
vars[inp->var_desc_->Name()] = inp;
if (inp->pre_op_) {
op->pre_ops_[it.first].push_back(inp->pre_op_);
op->pre_ops_out_idx_[it.first].push_back(inp->pre_op_out_idx_);
if (inp->PreOp()) {
op->pre_ops_[it.first].push_back(inp->PreOp());
op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx());
} else {
op->pre_ops_[it.first].push_back(nullptr);
}
......@@ -89,10 +89,7 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
} else {
LOG(ERROR) << "tracer doesn't support yet";
}
out->stop_gradient_ = stop_gradient;
out->pre_op_ = op;
out->pre_op_out_name_ = it.first;
out->pre_op_out_idx_ = i;
out->TrackPreOp(op, it.first, i, stop_gradient);
VLOG(3) << "output vname " << out->var_desc_->Name() << " "
<< out->var_->IsInitialized();
......@@ -167,9 +164,9 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op,
op->input_vars_[PyLayer::kFwdInp] = inputs;
op->output_vars_[PyLayer::kFwdOut] = PyLayer::Apply(op->forward_id_, inputs);
for (VarBase* inp : inputs) {
if (inp->pre_op_) {
op->pre_ops_[PyLayer::kFwdInp].push_back(inp->pre_op_);
op->pre_ops_out_idx_[PyLayer::kFwdInp].push_back(inp->pre_op_out_idx_);
if (inp->PreOp()) {
op->pre_ops_[PyLayer::kFwdInp].push_back(inp->PreOp());
op->pre_ops_out_idx_[PyLayer::kFwdInp].push_back(inp->PreOpOutIdx());
} else {
op->pre_ops_[PyLayer::kFwdInp].push_back(nullptr);
}
......@@ -178,10 +175,7 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op,
auto& outputs = op->output_vars_[PyLayer::kFwdOut];
for (size_t i = 0; i < outputs.size(); ++i) {
VarBase* out = outputs[i];
out->stop_gradient_ = stop_gradient;
out->pre_op_ = op;
out->pre_op_out_name_ = PyLayer::kFwdOut;
out->pre_op_out_idx_ = i;
out->TrackPreOp(op, PyLayer::kFwdOut, i, stop_gradient);
}
if (!stop_gradient) {
auto& grad_input_vars =
......
......@@ -133,6 +133,7 @@ PYBIND11_MODULE(core, m) {
[](imperative::VarBase &self) { self.RunBackward(); })
.def("_grad_name", &imperative::VarBase::GradName)
.def("_grad_value", &imperative::VarBase::GradValue)
.def("_clear_gradient", &imperative::VarBase::ClearGradient)
.def("_grad_ivar",
[](const imperative::VarBase &self) { return self.grads_; },
py::return_value_policy::reference)
......@@ -147,9 +148,9 @@ PYBIND11_MODULE(core, m) {
py::return_value_policy::reference)
.def_property(
"stop_gradient",
[](const imperative::VarBase &self) { return self.stop_gradient_; },
[](const imperative::VarBase &self) { return self.IsStopGradient(); },
[](imperative::VarBase &self, bool stop_gradient) {
self.stop_gradient_ = stop_gradient;
self.SetStopGradient(stop_gradient);
});
py::class_<imperative::OpBase, PyOpBase>(m, "OpBase", R"DOC()DOC")
......
......@@ -389,6 +389,9 @@ class Variable(object):
def _gradient(self):
return np.array(self._ivar._grad_value())
def _clear_gradient(self):
self._ivar._clear_gradient()
def __str__(self):
return self.to_string(True)
......
......@@ -27,18 +27,25 @@ class Layer(core.Layer):
"""Layers composed of operators."""
def __init__(self, dtype=core.VarDesc.VarType.FP32, name=None):
self._once_built = False
self._built = False
self._dtype = dtype
def parameters(self):
return []
def clear_gradients(self):
for p in self.parameters():
p._clear_gradient()
def _build_once(self, inputs):
pass
def __call__(self, *inputs):
if not self._once_built:
if not self._built:
self._build_once(*inputs)
self._once_built = True
outputs = self.forward(*inputs)
self._built = True
return outputs
def forward(self, *inputs):
......
......@@ -48,6 +48,7 @@ class Conv2D(layers.Layer):
assert param_attr is not False, "param_attr should not be False here."
super(Conv2D, self).__init__(name=name, dtype=dtype)
# TODO(minqiyang): Move this to the top.
from ..layer_helper import LayerHelper
self._helper = LayerHelper(
type(self).__name__,
......@@ -209,14 +210,25 @@ class FC(layers.Layer):
def __init__(self,
size,
param_attr=None,
bias_attr=None,
num_flatten_dims=1,
dtype=core.VarDesc.VarType.FP32):
dtype=core.VarDesc.VarType.FP32,
act=None,
name=None):
super(FC, self).__init__()
self._size = size
self._num_flatten_dims = num_flatten_dims
self._dtype = dtype
from ..layer_helper import LayerHelper
self._helper = LayerHelper('FC', param_attr=param_attr)
self._helper = LayerHelper(
'FC',
param_attr=param_attr,
bias_attr=bias_attr,
act=act,
name=name)
def parameters(self):
return [self._w, self._b]
def _build_once(self, input):
input_shape = input.shape
......@@ -247,4 +259,22 @@ class FC(layers.Layer):
inputs={"X": [tmp]},
outputs={"Out": out},
attrs={"use_mkldnn": False})
return out
bias_attr = self._helper.bias_attr
if bias_attr:
# add bias
size = list(out.shape[1:])
if not self._built:
self._b = self._helper.create_parameter(
attr=bias_attr, shape=size, dtype=out.dtype, is_bias=True)
bias_out = self._helper.create_variable_for_type_inference(
dtype=out.dtype)
self._helper.append_op(
type='elementwise_add',
inputs={'X': [out],
'Y': [self._b]},
outputs={'Out': [bias_out]},
attrs={'axis': 1})
out = bias_out
# add activation
return self._helper.append_activation(out)
......@@ -21,10 +21,11 @@ from paddle.fluid import core
@contextlib.contextmanager
def new_program_scope():
prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.core.Scope()
def new_program_scope(main=None, startup=None, scope=None):
prog = main if main else fluid.Program()
startup_prog = startup if startup else fluid.Program()
scope = scope if scope else fluid.core.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog):
yield
with fluid.unique_name.guard():
yield
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import unittest
import numpy as np
import six
import sys
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC
from test_imperative_base import new_program_scope
from paddle.fluid.imperative.base import to_variable
class Discriminator(fluid.imperative.Layer):
def __init__(self):
super(Discriminator, self).__init__()
self._fc1 = FC(size=32, act='elu', name="d_fc1")
self._fc2 = FC(size=1, name="d_fc2")
def parameters(self):
return self._fc1.parameters() + self._fc2.parameters()
def forward(self, inputs):
x = self._fc1(inputs)
return self._fc2(x)
class Generator(fluid.imperative.Layer):
def __init__(self):
super(Generator, self).__init__()
self._fc1 = FC(size=64, act='elu', name="g_fc1")
self._fc2 = FC(size=64, act='elu', name="g_fc2")
self._fc3 = FC(size=1, name="g_fc3")
def parameters(self):
return self._fc1.parameters() + self._fc2.parameters(
) + self._fc3.parameters()
def forward(self, inputs):
x = self._fc1(inputs)
x = self._fc2(x)
return self._fc3(x)
class TestImperativeMnist(unittest.TestCase):
def test_mnist_cpu_float32(self):
seed = 90
startup = fluid.Program()
startup.random_seed = seed
discriminate_p = fluid.Program()
generate_p = fluid.Program()
discriminate_p.random_seed = seed
generate_p.random_seed = seed
scope = fluid.core.Scope()
with new_program_scope(
main=discriminate_p, startup=startup, scope=scope):
discriminator = Discriminator()
generator = Generator()
img = fluid.layers.data(
name="img", shape=[2, 1], append_batch_size=False)
noise = fluid.layers.data(
name="noise", shape=[2, 2], append_batch_size=False)
d_real = discriminator(img)
d_loss_real = fluid.layers.reduce_mean(
fluid.layers.sigmoid_cross_entropy_with_logits(
x=d_real,
label=fluid.layers.fill_constant(
shape=[2, 1], dtype='float32', value=1.0)))
d_fake = discriminator(generator(noise))
d_loss_fake = fluid.layers.reduce_mean(
fluid.layers.sigmoid_cross_entropy_with_logits(
x=d_fake,
label=fluid.layers.fill_constant(
shape=[2, 1], dtype='float32', value=0.0)))
d_loss = d_loss_real + d_loss_fake
sgd = SGDOptimizer(learning_rate=1e-3)
sgd.minimize(d_loss)
with new_program_scope(main=generate_p, startup=startup, scope=scope):
discriminator = Discriminator()
generator = Generator()
noise = fluid.layers.data(
name="noise", shape=[2, 2], append_batch_size=False)
d_fake = discriminator(generator(noise))
g_loss = fluid.layers.reduce_mean(
fluid.layers.sigmoid_cross_entropy_with_logits(
x=d_fake,
label=fluid.layers.fill_constant(
shape=[2, 1], dtype='float32', value=1.0)))
sgd = SGDOptimizer(learning_rate=1e-3)
sgd.minimize(g_loss)
exe = fluid.Executor(fluid.CPUPlace())
static_params = dict()
with fluid.scope_guard(scope):
img = np.ones([2, 1], np.float32)
noise = np.ones([2, 2], np.float32)
exe.run(startup)
static_d_loss = exe.run(discriminate_p,
feed={'img': img,
'noise': noise},
fetch_list=[d_loss])[0]
static_g_loss = exe.run(generate_p,
feed={'noise': noise},
fetch_list=[g_loss])[0]
# generate_p contains all parameters needed.
for param in generate_p.global_block().all_parameters():
static_params[param.name] = np.array(
scope.find_var(param.name).get_tensor())
dy_params = dict()
with fluid.imperative.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
discriminator = Discriminator()
generator = Generator()
sgd = SGDOptimizer(learning_rate=1e-3)
d_real = discriminator(to_variable(np.ones([2, 1], np.float32)))
d_loss_real = fluid.layers.reduce_mean(
fluid.layers.sigmoid_cross_entropy_with_logits(
x=d_real, label=to_variable(np.ones([2, 1], np.float32))))
d_fake = discriminator(
generator(to_variable(np.ones([2, 2], np.float32))))
d_loss_fake = fluid.layers.reduce_mean(
fluid.layers.sigmoid_cross_entropy_with_logits(
x=d_fake, label=to_variable(np.zeros([2, 1], np.float32))))
d_loss = d_loss_real + d_loss_fake
d_loss._backward()
sgd.minimize(d_loss)
discriminator.clear_gradients()
generator.clear_gradients()
d_fake = discriminator(
generator(to_variable(np.ones([2, 2], np.float32))))
g_loss = fluid.layers.reduce_mean(
fluid.layers.sigmoid_cross_entropy_with_logits(
x=d_fake, label=to_variable(np.ones([2, 1], np.float32))))
g_loss._backward()
sgd.minimize(g_loss)
for p in discriminator.parameters():
dy_params[p.name] = p._numpy()
for p in generator.parameters():
dy_params[p.name] = p._numpy()
dy_g_loss = g_loss._numpy()
dy_d_loss = d_loss._numpy()
self.assertEqual(dy_g_loss, static_g_loss)
self.assertEqual(dy_d_loss, static_d_loss)
for k, v in six.iteritems(dy_params):
self.assertTrue(np.allclose(v, static_params[k]))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册