提交 7aab39af 编写于 作者: M minqiyang

Change grads to VarBase

上级 67093da3
...@@ -42,7 +42,7 @@ void AddTo(Variable* src, Variable* dst) { ...@@ -42,7 +42,7 @@ void AddTo(Variable* src, Variable* dst) {
src_tensor->numel()); src_tensor->numel());
float* dst_data = dst_tensor->mutable_data<float>(platform::CPUPlace()); float* dst_data = dst_tensor->mutable_data<float>(platform::CPUPlace());
const float* src_data = src_tensor->data<float>(); const float* src_data = src_tensor->data<float>();
for (size_t i = 0; i < src_tensor->numel(); ++i) { for (int64_t i = 0; i < src_tensor->numel(); ++i) {
dst_data[i] += src_data[i]; dst_data[i] += src_data[i];
} }
} }
...@@ -116,7 +116,7 @@ class Autograd { ...@@ -116,7 +116,7 @@ class Autograd {
framework::LoDTensor& VarBase::Grad() { framework::LoDTensor& VarBase::Grad() {
VLOG(3) << "get var grad " << var_desc_->Name(); VLOG(3) << "get var grad " << var_desc_->Name();
return *grads_->GetMutable<framework::LoDTensor>(); return *(grads_->var_->GetMutable<framework::LoDTensor>());
} }
std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
...@@ -173,7 +173,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -173,7 +173,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
void VarBase::RunBackward() { void VarBase::RunBackward() {
if (!pre_op_) return; if (!pre_op_) return;
auto grads_t = grads_->GetMutable<framework::LoDTensor>(); auto grads_t = grads_->var_->GetMutable<framework::LoDTensor>();
float* data = grads_t->mutable_data<float>(platform::CPUPlace()); float* data = grads_t->mutable_data<float>(platform::CPUPlace());
std::fill(data, data + grads_t->numel(), 1.0); std::fill(data, data + grads_t->numel(), 1.0);
......
...@@ -17,11 +17,14 @@ ...@@ -17,11 +17,14 @@
#include <map> #include <map>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/imperative/type_defs.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
...@@ -79,6 +82,11 @@ class PreparedOp { ...@@ -79,6 +82,11 @@ class PreparedOp {
}; };
class OpBase; class OpBase;
/* The wrapper for Variable which holds a Variable and a VarBase of its
* gradient. This object should be managed totally by Python intepreter.
*
* Nearly all interface should be implemented in C++.
*/
class VarBase { class VarBase {
public: public:
VarBase() VarBase()
...@@ -86,7 +94,7 @@ class VarBase { ...@@ -86,7 +94,7 @@ class VarBase {
pre_op_out_idx_(-1), pre_op_out_idx_(-1),
var_desc_(nullptr), var_desc_(nullptr),
var_(new framework::Variable()), var_(new framework::Variable()),
grads_(new framework::Variable()), grads_(new VarBase(true)),
stop_gradient_(false) {} stop_gradient_(false) {}
explicit VarBase(bool stop_gradient) explicit VarBase(bool stop_gradient)
...@@ -94,7 +102,7 @@ class VarBase { ...@@ -94,7 +102,7 @@ class VarBase {
pre_op_out_idx_(-1), pre_op_out_idx_(-1),
var_desc_(nullptr), var_desc_(nullptr),
var_(new framework::Variable()), var_(new framework::Variable()),
grads_(new framework::Variable()), grads_(stop_gradient ? nullptr : new VarBase(true)),
stop_gradient_(stop_gradient) {} stop_gradient_(stop_gradient) {}
virtual ~VarBase() {} virtual ~VarBase() {}
...@@ -116,11 +124,14 @@ class VarBase { ...@@ -116,11 +124,14 @@ class VarBase {
framework::VarDesc* var_desc_; framework::VarDesc* var_desc_;
framework::Variable* var_; framework::Variable* var_;
framework::Variable* grads_; VarBase* grads_;
bool stop_gradient_; bool stop_gradient_;
}; };
/* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its
* gradient. This object should be managed totally by Python intepreter.
*/
class OpBase { class OpBase {
public: public:
OpBase() : op_desc_(nullptr), grad_op_desc_(nullptr) {} OpBase() : op_desc_(nullptr), grad_op_desc_(nullptr) {}
...@@ -134,13 +145,13 @@ class OpBase { ...@@ -134,13 +145,13 @@ class OpBase {
framework::OpDesc* op_desc_; framework::OpDesc* op_desc_;
framework::OpDesc* grad_op_desc_; framework::OpDesc* grad_op_desc_;
std::map<std::string, std::vector<VarBase*>> input_vars_; VarBasePtrMap input_vars_;
std::map<std::string, std::vector<VarBase*>> output_vars_; VarBasePtrMap output_vars_;
std::map<std::string, std::vector<OpBase*>> pre_ops_; OpBasePtrMap pre_ops_;
std::map<std::string, std::vector<int>> pre_ops_out_idx_; std::map<std::string, std::vector<int>> pre_ops_out_idx_;
std::map<std::string, std::vector<framework::Variable*>> grad_input_vars_; framework::VariableValueMap grad_input_vars_;
std::map<std::string, std::vector<framework::Variable*>> grad_output_vars_; framework::VariableValueMap grad_output_vars_;
framework::BlockDesc* block_; framework::BlockDesc* block_;
}; };
......
...@@ -146,10 +146,10 @@ class Tracer { ...@@ -146,10 +146,10 @@ class Tracer {
grad_in_vars.push_back(fwd_var_it->second->var_); grad_in_vars.push_back(fwd_var_it->second->var_);
} else { } else {
VarBase* var = vars[var_it->second]; VarBase* var = vars[var_it->second];
if (!var->grads_->IsInitialized()) { if (!var->grads_->var_->IsInitialized()) {
InitVar(var->var_, var->grads_); InitVar(var->var_, var->grads_->var_);
} }
grad_in_vars.push_back(var->grads_); grad_in_vars.push_back(var->grads_->var_);
} }
} }
} }
...@@ -161,10 +161,10 @@ class Tracer { ...@@ -161,10 +161,10 @@ class Tracer {
auto var_it = grad_to_var->find(grad_outvar); auto var_it = grad_to_var->find(grad_outvar);
PADDLE_ENFORCE(var_it != grad_to_var->end()); PADDLE_ENFORCE(var_it != grad_to_var->end());
VarBase* var = vars[var_it->second]; VarBase* var = vars[var_it->second];
if (!var->grads_->IsInitialized()) { if (!var->grads_->var_->IsInitialized()) {
InitVar(var->var_, var->grads_); InitVar(var->var_, var->grads_->var_);
} }
grad_out_vars.push_back(var->grads_); grad_out_vars.push_back(var->grads_->var_);
} }
} }
} }
......
...@@ -133,17 +133,10 @@ PYBIND11_MODULE(core, m) { ...@@ -133,17 +133,10 @@ PYBIND11_MODULE(core, m) {
[](imperative::VarBase &self) { self.RunBackward(); }) [](imperative::VarBase &self) { self.RunBackward(); })
.def("_grad_name", &imperative::VarBase::GradName) .def("_grad_name", &imperative::VarBase::GradName)
.def("_grad", &imperative::VarBase::Grad) .def("_grad", &imperative::VarBase::Grad)
.def_property("grad_value", .def("_grad_ivar",
[](const imperative::VarBase &self) { return self.grads_; }, [](const imperative::VarBase &self) { return self.grads_; },
[](imperative::VarBase &self, framework::Variable *grad) {
self.grads_ = grad;
},
py::return_value_policy::reference) py::return_value_policy::reference)
.def_property("value", .def("value", [](const imperative::VarBase &self) { return self.var_; },
[](const imperative::VarBase &self) { return self.var_; },
[](imperative::VarBase &self, framework::Variable *var) {
self.var_ = var;
},
py::return_value_policy::reference) py::return_value_policy::reference)
.def_property( .def_property(
"desc", "desc",
......
...@@ -365,12 +365,14 @@ class Variable(object): ...@@ -365,12 +365,14 @@ class Variable(object):
self.stop_gradient = stop_gradient self.stop_gradient = stop_gradient
self.is_data = is_data self.is_data = is_data
if _in_imperative_mode(): if _in_imperative_mode():
self._ivar = kwargs.get("ivar", None)
if not self._ivar:
self._ivar = core.VarBase() self._ivar = core.VarBase()
self._ivar.desc = self.desc self._ivar.desc = self.desc
self._ivar.stop_gradient = stop_gradient self._ivar.stop_gradient = stop_gradient
def _numpy(self): def _numpy(self):
tensor = self._ivar.value.get_tensor() tensor = self._ivar.value().get_tensor()
return np.array(tensor) return np.array(tensor)
def _backward(self): def _backward(self):
...@@ -379,14 +381,6 @@ class Variable(object): ...@@ -379,14 +381,6 @@ class Variable(object):
def _gradient(self): def _gradient(self):
return np.array(self._ivar._grad()) return np.array(self._ivar._grad())
@property
def _value(self):
return self._ivar.value
@_value.setter
def _value(self, v):
self._ivar.value = v
def __str__(self): def __str__(self):
return self.to_string(True) return self.to_string(True)
......
...@@ -45,7 +45,8 @@ def to_variable(value, block=None): ...@@ -45,7 +45,8 @@ def to_variable(value, block=None):
name=None, name=None,
shape=value.shape, shape=value.shape,
dtype=value.dtype) dtype=value.dtype)
var = py_var._ivar.value var = py_var._ivar.value()
print(type(var))
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set(value, core.CPUPlace()) tensor.set(value, core.CPUPlace())
return py_var return py_var
......
...@@ -314,8 +314,8 @@ class Optimizer(object): ...@@ -314,8 +314,8 @@ class Optimizer(object):
grad_var = Variable( grad_var = Variable(
block=loss.block, block=loss.block,
name=param._ivar._grad_name(), name=param._ivar._grad_name(),
stop_gradient=True) stop_gradient=True,
grad_var._value = param._ivar.grad_value ivar=param._ivar._grad_ivar())
params_grads.append((param, grad_var)) params_grads.append((param, grad_var))
optimize_ops = self._create_optimization_pass(params_grads, loss, optimize_ops = self._create_optimization_pass(params_grads, loss,
......
...@@ -105,7 +105,6 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -105,7 +105,6 @@ class TestImperativeMnist(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# mnist = Conv2D(1, 20, 5)
mnist = MNIST() mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch( train_reader = paddle.batch(
...@@ -126,16 +125,17 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -126,16 +125,17 @@ class TestImperativeMnist(unittest.TestCase):
label._stop_gradient = True label._stop_gradient = True
cost = mnist(img) cost = mnist(img)
loss = fluid.layers.reduce_mean(cost) # loss = fluid.layers.cross_entropy(cost)
dy_out = loss._numpy() avg_loss = fluid.layers.reduce_mean(cost)
dy_out = avg_loss._numpy()
if batch_id == 0: if batch_id == 0:
for param in fluid.default_main_program().global_block( for param in fluid.default_main_program().global_block(
).all_parameters(): ).all_parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param._numpy()
loss._backward() avg_loss._backward()
sgd.minimize(loss) sgd.minimize(avg_loss)
dy_param_value = {} dy_param_value = {}
for param in fluid.default_main_program().global_block( for param in fluid.default_main_program().global_block(
).all_parameters(): ).all_parameters():
...@@ -147,7 +147,6 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -147,7 +147,6 @@ class TestImperativeMnist(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
# mnist = Conv2D(1, 20, 5)
mnist = MNIST() mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch( train_reader = paddle.batch(
...@@ -157,8 +156,9 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -157,8 +156,9 @@ class TestImperativeMnist(unittest.TestCase):
name='pixel', shape=[1, 28, 28], dtype='float32') name='pixel', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
cost = mnist(img) cost = mnist(img)
loss = fluid.layers.reduce_mean(cost) # loss = fluid.layers.cross_entropy(cost)
sgd.minimize(loss) avg_loss = fluid.layers.reduce_mean(cost)
sgd.minimize(avg_loss)
# initialize params and fetch them # initialize params and fetch them
static_param_init_value = {} static_param_init_value = {}
...@@ -182,7 +182,7 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -182,7 +182,7 @@ class TestImperativeMnist(unittest.TestCase):
y_data = np.array([x[1] for x in data]).astype('int64').reshape( y_data = np.array([x[1] for x in data]).astype('int64').reshape(
[128, 1]) [128, 1])
fetch_list = [loss.name] fetch_list = [avg_loss.name]
fetch_list.extend(static_param_name_list) fetch_list.extend(static_param_name_list)
out = exe.run(fluid.default_main_program(), out = exe.run(fluid.default_main_program(),
feed={"pixel": x_data, feed={"pixel": x_data,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册