提交 7aab39af 编写于 作者: M minqiyang

Change grads to VarBase

上级 67093da3
......@@ -42,7 +42,7 @@ void AddTo(Variable* src, Variable* dst) {
src_tensor->numel());
float* dst_data = dst_tensor->mutable_data<float>(platform::CPUPlace());
const float* src_data = src_tensor->data<float>();
for (size_t i = 0; i < src_tensor->numel(); ++i) {
for (int64_t i = 0; i < src_tensor->numel(); ++i) {
dst_data[i] += src_data[i];
}
}
......@@ -116,7 +116,7 @@ class Autograd {
framework::LoDTensor& VarBase::Grad() {
VLOG(3) << "get var grad " << var_desc_->Name();
return *grads_->GetMutable<framework::LoDTensor>();
return *(grads_->var_->GetMutable<framework::LoDTensor>());
}
std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
......@@ -173,7 +173,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
void VarBase::RunBackward() {
if (!pre_op_) return;
auto grads_t = grads_->GetMutable<framework::LoDTensor>();
auto grads_t = grads_->var_->GetMutable<framework::LoDTensor>();
float* data = grads_t->mutable_data<float>(platform::CPUPlace());
std::fill(data, data + grads_t->numel(), 1.0);
......
......@@ -17,11 +17,14 @@
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/imperative/type_defs.h"
namespace paddle {
namespace imperative {
......@@ -79,6 +82,11 @@ class PreparedOp {
};
class OpBase;
/* The wrapper for Variable which holds a Variable and a VarBase of its
* gradient. This object should be managed totally by Python intepreter.
*
* Nearly all interface should be implemented in C++.
*/
class VarBase {
public:
VarBase()
......@@ -86,7 +94,7 @@ class VarBase {
pre_op_out_idx_(-1),
var_desc_(nullptr),
var_(new framework::Variable()),
grads_(new framework::Variable()),
grads_(new VarBase(true)),
stop_gradient_(false) {}
explicit VarBase(bool stop_gradient)
......@@ -94,7 +102,7 @@ class VarBase {
pre_op_out_idx_(-1),
var_desc_(nullptr),
var_(new framework::Variable()),
grads_(new framework::Variable()),
grads_(stop_gradient ? nullptr : new VarBase(true)),
stop_gradient_(stop_gradient) {}
virtual ~VarBase() {}
......@@ -116,11 +124,14 @@ class VarBase {
framework::VarDesc* var_desc_;
framework::Variable* var_;
framework::Variable* grads_;
VarBase* grads_;
bool stop_gradient_;
};
/* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its
* gradient. This object should be managed totally by Python intepreter.
*/
class OpBase {
public:
OpBase() : op_desc_(nullptr), grad_op_desc_(nullptr) {}
......@@ -134,13 +145,13 @@ class OpBase {
framework::OpDesc* op_desc_;
framework::OpDesc* grad_op_desc_;
std::map<std::string, std::vector<VarBase*>> input_vars_;
std::map<std::string, std::vector<VarBase*>> output_vars_;
std::map<std::string, std::vector<OpBase*>> pre_ops_;
VarBasePtrMap input_vars_;
VarBasePtrMap output_vars_;
OpBasePtrMap pre_ops_;
std::map<std::string, std::vector<int>> pre_ops_out_idx_;
std::map<std::string, std::vector<framework::Variable*>> grad_input_vars_;
std::map<std::string, std::vector<framework::Variable*>> grad_output_vars_;
framework::VariableValueMap grad_input_vars_;
framework::VariableValueMap grad_output_vars_;
framework::BlockDesc* block_;
};
......
......@@ -146,10 +146,10 @@ class Tracer {
grad_in_vars.push_back(fwd_var_it->second->var_);
} else {
VarBase* var = vars[var_it->second];
if (!var->grads_->IsInitialized()) {
InitVar(var->var_, var->grads_);
if (!var->grads_->var_->IsInitialized()) {
InitVar(var->var_, var->grads_->var_);
}
grad_in_vars.push_back(var->grads_);
grad_in_vars.push_back(var->grads_->var_);
}
}
}
......@@ -161,10 +161,10 @@ class Tracer {
auto var_it = grad_to_var->find(grad_outvar);
PADDLE_ENFORCE(var_it != grad_to_var->end());
VarBase* var = vars[var_it->second];
if (!var->grads_->IsInitialized()) {
InitVar(var->var_, var->grads_);
if (!var->grads_->var_->IsInitialized()) {
InitVar(var->var_, var->grads_->var_);
}
grad_out_vars.push_back(var->grads_);
grad_out_vars.push_back(var->grads_->var_);
}
}
}
......
......@@ -133,17 +133,10 @@ PYBIND11_MODULE(core, m) {
[](imperative::VarBase &self) { self.RunBackward(); })
.def("_grad_name", &imperative::VarBase::GradName)
.def("_grad", &imperative::VarBase::Grad)
.def_property("grad_value",
.def("_grad_ivar",
[](const imperative::VarBase &self) { return self.grads_; },
[](imperative::VarBase &self, framework::Variable *grad) {
self.grads_ = grad;
},
py::return_value_policy::reference)
.def_property("value",
[](const imperative::VarBase &self) { return self.var_; },
[](imperative::VarBase &self, framework::Variable *var) {
self.var_ = var;
},
.def("value", [](const imperative::VarBase &self) { return self.var_; },
py::return_value_policy::reference)
.def_property(
"desc",
......
......@@ -365,12 +365,14 @@ class Variable(object):
self.stop_gradient = stop_gradient
self.is_data = is_data
if _in_imperative_mode():
self._ivar = kwargs.get("ivar", None)
if not self._ivar:
self._ivar = core.VarBase()
self._ivar.desc = self.desc
self._ivar.stop_gradient = stop_gradient
def _numpy(self):
tensor = self._ivar.value.get_tensor()
tensor = self._ivar.value().get_tensor()
return np.array(tensor)
def _backward(self):
......@@ -379,14 +381,6 @@ class Variable(object):
def _gradient(self):
return np.array(self._ivar._grad())
@property
def _value(self):
return self._ivar.value
@_value.setter
def _value(self, v):
self._ivar.value = v
def __str__(self):
return self.to_string(True)
......
......@@ -45,7 +45,8 @@ def to_variable(value, block=None):
name=None,
shape=value.shape,
dtype=value.dtype)
var = py_var._ivar.value
var = py_var._ivar.value()
print(type(var))
tensor = var.get_tensor()
tensor.set(value, core.CPUPlace())
return py_var
......
......@@ -314,8 +314,8 @@ class Optimizer(object):
grad_var = Variable(
block=loss.block,
name=param._ivar._grad_name(),
stop_gradient=True)
grad_var._value = param._ivar.grad_value
stop_gradient=True,
ivar=param._ivar._grad_ivar())
params_grads.append((param, grad_var))
optimize_ops = self._create_optimization_pass(params_grads, loss,
......
......@@ -105,7 +105,6 @@ class TestImperativeMnist(unittest.TestCase):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
# mnist = Conv2D(1, 20, 5)
mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch(
......@@ -126,16 +125,17 @@ class TestImperativeMnist(unittest.TestCase):
label._stop_gradient = True
cost = mnist(img)
loss = fluid.layers.reduce_mean(cost)
dy_out = loss._numpy()
# loss = fluid.layers.cross_entropy(cost)
avg_loss = fluid.layers.reduce_mean(cost)
dy_out = avg_loss._numpy()
if batch_id == 0:
for param in fluid.default_main_program().global_block(
).all_parameters():
dy_param_init_value[param.name] = param._numpy()
loss._backward()
sgd.minimize(loss)
avg_loss._backward()
sgd.minimize(avg_loss)
dy_param_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
......@@ -147,7 +147,6 @@ class TestImperativeMnist(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace())
# mnist = Conv2D(1, 20, 5)
mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch(
......@@ -157,8 +156,9 @@ class TestImperativeMnist(unittest.TestCase):
name='pixel', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
cost = mnist(img)
loss = fluid.layers.reduce_mean(cost)
sgd.minimize(loss)
# loss = fluid.layers.cross_entropy(cost)
avg_loss = fluid.layers.reduce_mean(cost)
sgd.minimize(avg_loss)
# initialize params and fetch them
static_param_init_value = {}
......@@ -182,7 +182,7 @@ class TestImperativeMnist(unittest.TestCase):
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
[128, 1])
fetch_list = [loss.name]
fetch_list = [avg_loss.name]
fetch_list.extend(static_param_name_list)
out = exe.run(fluid.default_main_program(),
feed={"pixel": x_data,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册