提交 68e9b841 编写于 作者: M minqiyang

Add support for optimizer

上级 224c90a8
......@@ -104,7 +104,7 @@ class Autograd {
framework::Variable* CreateVariable(const std::string& name,
const framework::DDim& dim, float val,
framework::Scope* scope,
bool random_name = true) {
bool random_name = false) {
std::string varname = name;
if (random_name) {
std::mt19937 rng;
......
......@@ -45,6 +45,15 @@ class VarBase {
framework::LoDTensor& Grad();
inline framework::Variable* GradVar() { return grads_; }
inline std::string GradName() const {
PADDLE_ENFORCE(
var_desc_,
"Couldn't get gradient variable's name, please call backward() first");
return string::Sprintf("%s@IGrad", var_desc_->Name());
}
OpBase* pre_op_;
int pre_op_out_idx_;
......
......@@ -52,7 +52,7 @@ class Tracer {
const std::vector<VarBase*>& outputs, framework::BlockDesc* block,
const bool stop_gradient) {
framework::OpDesc* op_desc = op->op_desc_;
VLOG(3) << "tracer tracing " << op_desc->Type();
LOG(ERROR) << "tracer tracing " << op_desc->Type();
op_desc->InferShape(*block);
op_desc->InferVarType(block);
std::unique_ptr<framework::OperatorBase> op_base =
......@@ -61,7 +61,10 @@ class Tracer {
*op->input_vars_ = inputs;
for (VarBase* input : inputs) {
const std::string vname = input->var_desc_->Name();
LOG(ERROR) << "input: " << vname;
LOG(ERROR) << "input var: " << input->var_;
framework::Variable* var = root_scope_->Var(vname);
LOG(ERROR) << "var_ in tracer pointer: " << var;
input->var_ = var;
if (!var->IsInitialized()) {
framework::VarDesc* var_desc = block->FindVar(vname);
......@@ -84,6 +87,7 @@ class Tracer {
*op->output_vars_ = outputs;
for (size_t i = 0; i < outputs.size(); ++i) {
const std::string vname = outputs[i]->var_desc_->Name();
LOG(ERROR) << "output name: " << vname;
framework::Variable* var = root_scope_->Var(vname);
if (!var->IsInitialized()) {
framework::VarDesc* var_desc = block->FindVar(vname);
......@@ -98,7 +102,7 @@ class Tracer {
outputs[i]->pre_op_out_idx_ = i;
}
VLOG(3) << "tracer running " << op_desc->Type();
LOG(ERROR) << "tracer running " << op_desc->Type();
op_base->Run(*root_scope_, platform::CPUPlace());
if (!stop_gradient) {
framework::OpDesc* grad_op_desc;
......
......@@ -29,6 +29,8 @@ class SGDOpKernel : public framework::OpKernel<T> {
const auto *param_var = ctx.InputVar("Param");
const auto *grad_var = ctx.InputVar("Grad");
LOG(ERROR) << "grad_var: " << grad_var;
if (param_var->IsType<framework::LoDTensor>()) {
const auto *param = ctx.Input<framework::Tensor>("Param");
auto *param_out = ctx.Output<framework::Tensor>("ParamOut");
......@@ -39,8 +41,11 @@ class SGDOpKernel : public framework::OpKernel<T> {
const auto *grad = ctx.Input<framework::Tensor>("Grad");
auto p = framework::EigenVector<T>::Flatten(*param);
LOG(ERROR) << "param flattened";
auto g = framework::EigenVector<T>::Flatten(*grad);
LOG(ERROR) << "grad flattened";
auto o = framework::EigenVector<T>::Flatten(*param_out);
LOG(ERROR) << "paramout flattened";
auto *lr = learning_rate->data<T>();
o = p - lr[0] * g;
......
......@@ -117,10 +117,23 @@ PYBIND11_MODULE(core, m) {
[](imperative::VarBase &self, framework::Scope *scope) {
self.RunBackward(scope);
})
.def("_grad_var",
[](const imperative::VarBase &self) {
LOG(ERROR) << "grad_var_ pointer: " << self.grads_;
return self.grads_;
},
py::return_value_policy::reference)
.def("_grad_name", &imperative::VarBase::GradName)
.def("_grad", &imperative::VarBase::Grad)
.def("_print_var_pointer",
[](const imperative::VarBase &self) {
LOG(ERROR) << self.var_desc_->Name()
<< " print_var pointer: " << self.var_;
})
.def_property("value",
[](const imperative::VarBase &self) { return self.var_; },
[](imperative::VarBase &self, framework::Variable *var) {
LOG(ERROR) << "set var to pointer: " << var;
self.var_ = var;
},
py::return_value_policy::reference)
......
......@@ -19,7 +19,6 @@ import contextlib
import os
import re
import six
import sys
import numpy as np
......@@ -369,6 +368,7 @@ class Variable(object):
self._ivar.stop_gradient = stop_gradient
def _numpy(self):
print("get_variable_tensor", self.desc.name())
scope = _imperative_tracer().get_scope()
tensor = core.get_variable_tensor(scope, self.desc.name())
return np.array(tensor)
......@@ -380,6 +380,14 @@ class Variable(object):
def _gradient(self):
return np.array(self._ivar._grad())
@property
def _value(self):
return self._ivar.value
@_value.setter
def _value(self, v):
self._ivar.value = v
def __str__(self):
return self.to_string(True)
......@@ -632,6 +640,7 @@ class Operator(object):
if inputs is not None:
for in_proto in proto.inputs:
print("create op: find_name", in_proto.name)
found = find_name(inputs, in_proto.name)
assert found or in_proto.dispensable, "Input {} not found".format(
in_proto.name)
......@@ -695,9 +704,11 @@ class Operator(object):
self._update_desc_attr(attr_name, attr_val)
self.desc.check_attrs()
if self._has_kernel(type):
self.desc.infer_var_type(self.block.desc)
self.desc.infer_shape(self.block.desc)
if _in_imperative_mode():
self.iop = core.OpBase()
self.iop.desc = self.desc
......@@ -1167,6 +1178,7 @@ class Block(object):
def create_var(self, *args, **kwargs):
var = Variable(block=self, *args, **kwargs)
if 'initializer' in kwargs:
print("initializer, ", type(kwargs['initializer']))
kwargs['initializer'](var, self)
return var
......@@ -1281,6 +1293,16 @@ class Block(object):
"""
op_desc = self.desc.append_op()
op = Operator(block=self, desc=op_desc, *args, **kwargs)
print("op inputs: ", [v._numpy() for v in op.inputs])
print("op inputs: ", [v for v in op.inputs])
import sys
sys.stdout.flush()
for v in op.inputs:
v._ivar._print_var_pointer()
print("print var pointer end")
import sys
sys.stdout.flush()
if _in_imperative_mode():
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc,
......@@ -1338,6 +1360,10 @@ class Block(object):
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc,
kwargs.get("stop_gradient", False))
print([v.name for v in op.outputs])
for v in op.outputs:
v._ivar._print_var_pointer()
print("fill_constant end")
self.ops.insert(0, op)
return op
......
......@@ -153,6 +153,7 @@ class ConstantInitializer(Initializer):
assert isinstance(var, framework.Variable)
assert isinstance(block, framework.Block)
# Initialization Ops should be prepended and not appended
print("fill_constant")
op = block._prepend_op(
type="fill_constant",
outputs={"Out": var},
......
......@@ -369,7 +369,7 @@ class LayerHelper(object):
def set_variable_initializer(self, var, initializer):
assert isinstance(var, Variable)
self.startup_program.global_block().create_var(
return self.startup_program.global_block().create_var(
name=var.name,
type=var.type,
dtype=var.dtype,
......
......@@ -20,6 +20,7 @@ from ..framework import convert_np_dtype_to_dtype_
from ..framework import Variable
from ..initializer import Constant, force_init_on_cpu
from ..core import VarDesc
from ..imperative import base as imperative_base
from .layer_function_generator import templatedoc
import numpy
......@@ -126,10 +127,22 @@ def create_global_var(shape,
"""
helper = LayerHelper("global_var", **locals())
var = helper.create_global_variable(
dtype=dtype, shape=shape, persistable=persistable, name=name)
dtype=dtype,
shape=shape,
persistable=persistable,
name=name,
stop_gradient=True)
print("set_variable_initializer, ", var.name)
if imperative_base.enabled():
var = helper.set_variable_initializer(
var, initializer=Constant(
value=float(value), force_cpu=force_cpu))
print("get var", var)
else:
helper.set_variable_initializer(
var, initializer=Constant(
value=float(value), force_cpu=force_cpu))
return var
......
......@@ -30,6 +30,7 @@ from .initializer import Constant
from .layer_helper import LayerHelper
from .layers import ops
from .regularizer import append_regularization_ops
from .imperative import base as imperative_base
__all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl',
......@@ -108,6 +109,7 @@ class Optimizer(object):
# create learning rate variable for every parameter
param = param_and_grad[0]
param_lr = param.optimize_attr['learning_rate']
print("param_lr: ", param_lr, self._global_learning_rate()._numpy())
if type(param_lr) == Variable:
return param_lr
else:
......@@ -301,6 +303,25 @@ class Optimizer(object):
This method combines interface `append_backward()` and
`create_optimization_pass()` into one.
"""
if imperative_base.enabled:
if parameter_list is not None:
params_grads = parameter_list
else:
program = loss.block.program
parameters = program.global_block().all_parameters()
params_grads = []
for param in parameters:
grad_var = Variable(
block=loss.block,
name=param._ivar._grad_name(),
stop_gradient=True)
grad_var._value = param._ivar._grad_var()
print("create grad var: ", grad_var.name)
print("grad_var value: ", grad_var._numpy())
import sys
sys.stdout.flush()
params_grads.append((param, grad_var))
else:
params_grads = append_backward(loss, parameter_list, no_grad_set,
[error_clip_callback])
......@@ -356,6 +377,10 @@ class SGDOptimizer(Optimizer):
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
print("append sgd")
import sys
sys.stdout.flush()
# create the optimize op
sgd_op = block.append_op(
type=self.type,
......
......@@ -18,6 +18,7 @@ import numpy as np
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC
from paddle.fluid.imperative.base import to_variable
......@@ -119,7 +120,11 @@ class TestImperativeMnist(unittest.TestCase):
out._backward()
filter_grad = mnist._simple_img_conv_pool_1._conv2d._filter_param._gradient(
)
print(filter_grad)
# print(filter_grad)
sgd = SGDOptimizer(learning_rate=1e-3)
sgd.minimize(out)
# np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
# with fluid.imperative.guard():
# mlp = MLP()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册