提交 68e9b841 编写于 作者: M minqiyang

Add support for optimizer

上级 224c90a8
...@@ -104,7 +104,7 @@ class Autograd { ...@@ -104,7 +104,7 @@ class Autograd {
framework::Variable* CreateVariable(const std::string& name, framework::Variable* CreateVariable(const std::string& name,
const framework::DDim& dim, float val, const framework::DDim& dim, float val,
framework::Scope* scope, framework::Scope* scope,
bool random_name = true) { bool random_name = false) {
std::string varname = name; std::string varname = name;
if (random_name) { if (random_name) {
std::mt19937 rng; std::mt19937 rng;
......
...@@ -45,6 +45,15 @@ class VarBase { ...@@ -45,6 +45,15 @@ class VarBase {
framework::LoDTensor& Grad(); framework::LoDTensor& Grad();
inline framework::Variable* GradVar() { return grads_; }
inline std::string GradName() const {
PADDLE_ENFORCE(
var_desc_,
"Couldn't get gradient variable's name, please call backward() first");
return string::Sprintf("%s@IGrad", var_desc_->Name());
}
OpBase* pre_op_; OpBase* pre_op_;
int pre_op_out_idx_; int pre_op_out_idx_;
......
...@@ -52,7 +52,7 @@ class Tracer { ...@@ -52,7 +52,7 @@ class Tracer {
const std::vector<VarBase*>& outputs, framework::BlockDesc* block, const std::vector<VarBase*>& outputs, framework::BlockDesc* block,
const bool stop_gradient) { const bool stop_gradient) {
framework::OpDesc* op_desc = op->op_desc_; framework::OpDesc* op_desc = op->op_desc_;
VLOG(3) << "tracer tracing " << op_desc->Type(); LOG(ERROR) << "tracer tracing " << op_desc->Type();
op_desc->InferShape(*block); op_desc->InferShape(*block);
op_desc->InferVarType(block); op_desc->InferVarType(block);
std::unique_ptr<framework::OperatorBase> op_base = std::unique_ptr<framework::OperatorBase> op_base =
...@@ -61,7 +61,10 @@ class Tracer { ...@@ -61,7 +61,10 @@ class Tracer {
*op->input_vars_ = inputs; *op->input_vars_ = inputs;
for (VarBase* input : inputs) { for (VarBase* input : inputs) {
const std::string vname = input->var_desc_->Name(); const std::string vname = input->var_desc_->Name();
LOG(ERROR) << "input: " << vname;
LOG(ERROR) << "input var: " << input->var_;
framework::Variable* var = root_scope_->Var(vname); framework::Variable* var = root_scope_->Var(vname);
LOG(ERROR) << "var_ in tracer pointer: " << var;
input->var_ = var; input->var_ = var;
if (!var->IsInitialized()) { if (!var->IsInitialized()) {
framework::VarDesc* var_desc = block->FindVar(vname); framework::VarDesc* var_desc = block->FindVar(vname);
...@@ -84,6 +87,7 @@ class Tracer { ...@@ -84,6 +87,7 @@ class Tracer {
*op->output_vars_ = outputs; *op->output_vars_ = outputs;
for (size_t i = 0; i < outputs.size(); ++i) { for (size_t i = 0; i < outputs.size(); ++i) {
const std::string vname = outputs[i]->var_desc_->Name(); const std::string vname = outputs[i]->var_desc_->Name();
LOG(ERROR) << "output name: " << vname;
framework::Variable* var = root_scope_->Var(vname); framework::Variable* var = root_scope_->Var(vname);
if (!var->IsInitialized()) { if (!var->IsInitialized()) {
framework::VarDesc* var_desc = block->FindVar(vname); framework::VarDesc* var_desc = block->FindVar(vname);
...@@ -98,7 +102,7 @@ class Tracer { ...@@ -98,7 +102,7 @@ class Tracer {
outputs[i]->pre_op_out_idx_ = i; outputs[i]->pre_op_out_idx_ = i;
} }
VLOG(3) << "tracer running " << op_desc->Type(); LOG(ERROR) << "tracer running " << op_desc->Type();
op_base->Run(*root_scope_, platform::CPUPlace()); op_base->Run(*root_scope_, platform::CPUPlace());
if (!stop_gradient) { if (!stop_gradient) {
framework::OpDesc* grad_op_desc; framework::OpDesc* grad_op_desc;
......
...@@ -29,6 +29,8 @@ class SGDOpKernel : public framework::OpKernel<T> { ...@@ -29,6 +29,8 @@ class SGDOpKernel : public framework::OpKernel<T> {
const auto *param_var = ctx.InputVar("Param"); const auto *param_var = ctx.InputVar("Param");
const auto *grad_var = ctx.InputVar("Grad"); const auto *grad_var = ctx.InputVar("Grad");
LOG(ERROR) << "grad_var: " << grad_var;
if (param_var->IsType<framework::LoDTensor>()) { if (param_var->IsType<framework::LoDTensor>()) {
const auto *param = ctx.Input<framework::Tensor>("Param"); const auto *param = ctx.Input<framework::Tensor>("Param");
auto *param_out = ctx.Output<framework::Tensor>("ParamOut"); auto *param_out = ctx.Output<framework::Tensor>("ParamOut");
...@@ -39,8 +41,11 @@ class SGDOpKernel : public framework::OpKernel<T> { ...@@ -39,8 +41,11 @@ class SGDOpKernel : public framework::OpKernel<T> {
const auto *grad = ctx.Input<framework::Tensor>("Grad"); const auto *grad = ctx.Input<framework::Tensor>("Grad");
auto p = framework::EigenVector<T>::Flatten(*param); auto p = framework::EigenVector<T>::Flatten(*param);
LOG(ERROR) << "param flattened";
auto g = framework::EigenVector<T>::Flatten(*grad); auto g = framework::EigenVector<T>::Flatten(*grad);
LOG(ERROR) << "grad flattened";
auto o = framework::EigenVector<T>::Flatten(*param_out); auto o = framework::EigenVector<T>::Flatten(*param_out);
LOG(ERROR) << "paramout flattened";
auto *lr = learning_rate->data<T>(); auto *lr = learning_rate->data<T>();
o = p - lr[0] * g; o = p - lr[0] * g;
......
...@@ -117,10 +117,23 @@ PYBIND11_MODULE(core, m) { ...@@ -117,10 +117,23 @@ PYBIND11_MODULE(core, m) {
[](imperative::VarBase &self, framework::Scope *scope) { [](imperative::VarBase &self, framework::Scope *scope) {
self.RunBackward(scope); self.RunBackward(scope);
}) })
.def("_grad_var",
[](const imperative::VarBase &self) {
LOG(ERROR) << "grad_var_ pointer: " << self.grads_;
return self.grads_;
},
py::return_value_policy::reference)
.def("_grad_name", &imperative::VarBase::GradName)
.def("_grad", &imperative::VarBase::Grad) .def("_grad", &imperative::VarBase::Grad)
.def("_print_var_pointer",
[](const imperative::VarBase &self) {
LOG(ERROR) << self.var_desc_->Name()
<< " print_var pointer: " << self.var_;
})
.def_property("value", .def_property("value",
[](const imperative::VarBase &self) { return self.var_; }, [](const imperative::VarBase &self) { return self.var_; },
[](imperative::VarBase &self, framework::Variable *var) { [](imperative::VarBase &self, framework::Variable *var) {
LOG(ERROR) << "set var to pointer: " << var;
self.var_ = var; self.var_ = var;
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
......
...@@ -19,7 +19,6 @@ import contextlib ...@@ -19,7 +19,6 @@ import contextlib
import os import os
import re import re
import six import six
import sys
import numpy as np import numpy as np
...@@ -369,6 +368,7 @@ class Variable(object): ...@@ -369,6 +368,7 @@ class Variable(object):
self._ivar.stop_gradient = stop_gradient self._ivar.stop_gradient = stop_gradient
def _numpy(self): def _numpy(self):
print("get_variable_tensor", self.desc.name())
scope = _imperative_tracer().get_scope() scope = _imperative_tracer().get_scope()
tensor = core.get_variable_tensor(scope, self.desc.name()) tensor = core.get_variable_tensor(scope, self.desc.name())
return np.array(tensor) return np.array(tensor)
...@@ -380,6 +380,14 @@ class Variable(object): ...@@ -380,6 +380,14 @@ class Variable(object):
def _gradient(self): def _gradient(self):
return np.array(self._ivar._grad()) return np.array(self._ivar._grad())
@property
def _value(self):
return self._ivar.value
@_value.setter
def _value(self, v):
self._ivar.value = v
def __str__(self): def __str__(self):
return self.to_string(True) return self.to_string(True)
...@@ -632,6 +640,7 @@ class Operator(object): ...@@ -632,6 +640,7 @@ class Operator(object):
if inputs is not None: if inputs is not None:
for in_proto in proto.inputs: for in_proto in proto.inputs:
print("create op: find_name", in_proto.name)
found = find_name(inputs, in_proto.name) found = find_name(inputs, in_proto.name)
assert found or in_proto.dispensable, "Input {} not found".format( assert found or in_proto.dispensable, "Input {} not found".format(
in_proto.name) in_proto.name)
...@@ -695,9 +704,11 @@ class Operator(object): ...@@ -695,9 +704,11 @@ class Operator(object):
self._update_desc_attr(attr_name, attr_val) self._update_desc_attr(attr_name, attr_val)
self.desc.check_attrs() self.desc.check_attrs()
if self._has_kernel(type): if self._has_kernel(type):
self.desc.infer_var_type(self.block.desc) self.desc.infer_var_type(self.block.desc)
self.desc.infer_shape(self.block.desc) self.desc.infer_shape(self.block.desc)
if _in_imperative_mode(): if _in_imperative_mode():
self.iop = core.OpBase() self.iop = core.OpBase()
self.iop.desc = self.desc self.iop.desc = self.desc
...@@ -1167,6 +1178,7 @@ class Block(object): ...@@ -1167,6 +1178,7 @@ class Block(object):
def create_var(self, *args, **kwargs): def create_var(self, *args, **kwargs):
var = Variable(block=self, *args, **kwargs) var = Variable(block=self, *args, **kwargs)
if 'initializer' in kwargs: if 'initializer' in kwargs:
print("initializer, ", type(kwargs['initializer']))
kwargs['initializer'](var, self) kwargs['initializer'](var, self)
return var return var
...@@ -1281,6 +1293,16 @@ class Block(object): ...@@ -1281,6 +1293,16 @@ class Block(object):
""" """
op_desc = self.desc.append_op() op_desc = self.desc.append_op()
op = Operator(block=self, desc=op_desc, *args, **kwargs) op = Operator(block=self, desc=op_desc, *args, **kwargs)
print("op inputs: ", [v._numpy() for v in op.inputs])
print("op inputs: ", [v for v in op.inputs])
import sys
sys.stdout.flush()
for v in op.inputs:
v._ivar._print_var_pointer()
print("print var pointer end")
import sys
sys.stdout.flush()
if _in_imperative_mode(): if _in_imperative_mode():
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs], _imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc, [v._ivar for v in op.outputs], self.desc,
...@@ -1338,6 +1360,10 @@ class Block(object): ...@@ -1338,6 +1360,10 @@ class Block(object):
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs], _imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc, [v._ivar for v in op.outputs], self.desc,
kwargs.get("stop_gradient", False)) kwargs.get("stop_gradient", False))
print([v.name for v in op.outputs])
for v in op.outputs:
v._ivar._print_var_pointer()
print("fill_constant end")
self.ops.insert(0, op) self.ops.insert(0, op)
return op return op
......
...@@ -153,6 +153,7 @@ class ConstantInitializer(Initializer): ...@@ -153,6 +153,7 @@ class ConstantInitializer(Initializer):
assert isinstance(var, framework.Variable) assert isinstance(var, framework.Variable)
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
# Initialization Ops should be prepended and not appended # Initialization Ops should be prepended and not appended
print("fill_constant")
op = block._prepend_op( op = block._prepend_op(
type="fill_constant", type="fill_constant",
outputs={"Out": var}, outputs={"Out": var},
......
...@@ -369,7 +369,7 @@ class LayerHelper(object): ...@@ -369,7 +369,7 @@ class LayerHelper(object):
def set_variable_initializer(self, var, initializer): def set_variable_initializer(self, var, initializer):
assert isinstance(var, Variable) assert isinstance(var, Variable)
self.startup_program.global_block().create_var( return self.startup_program.global_block().create_var(
name=var.name, name=var.name,
type=var.type, type=var.type,
dtype=var.dtype, dtype=var.dtype,
......
...@@ -20,6 +20,7 @@ from ..framework import convert_np_dtype_to_dtype_ ...@@ -20,6 +20,7 @@ from ..framework import convert_np_dtype_to_dtype_
from ..framework import Variable from ..framework import Variable
from ..initializer import Constant, force_init_on_cpu from ..initializer import Constant, force_init_on_cpu
from ..core import VarDesc from ..core import VarDesc
from ..imperative import base as imperative_base
from .layer_function_generator import templatedoc from .layer_function_generator import templatedoc
import numpy import numpy
...@@ -126,10 +127,22 @@ def create_global_var(shape, ...@@ -126,10 +127,22 @@ def create_global_var(shape,
""" """
helper = LayerHelper("global_var", **locals()) helper = LayerHelper("global_var", **locals())
var = helper.create_global_variable( var = helper.create_global_variable(
dtype=dtype, shape=shape, persistable=persistable, name=name) dtype=dtype,
shape=shape,
persistable=persistable,
name=name,
stop_gradient=True)
print("set_variable_initializer, ", var.name)
if imperative_base.enabled():
var = helper.set_variable_initializer(
var, initializer=Constant(
value=float(value), force_cpu=force_cpu))
print("get var", var)
else:
helper.set_variable_initializer( helper.set_variable_initializer(
var, initializer=Constant( var, initializer=Constant(
value=float(value), force_cpu=force_cpu)) value=float(value), force_cpu=force_cpu))
return var return var
......
...@@ -30,6 +30,7 @@ from .initializer import Constant ...@@ -30,6 +30,7 @@ from .initializer import Constant
from .layer_helper import LayerHelper from .layer_helper import LayerHelper
from .layers import ops from .layers import ops
from .regularizer import append_regularization_ops from .regularizer import append_regularization_ops
from .imperative import base as imperative_base
__all__ = [ __all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl',
...@@ -108,6 +109,7 @@ class Optimizer(object): ...@@ -108,6 +109,7 @@ class Optimizer(object):
# create learning rate variable for every parameter # create learning rate variable for every parameter
param = param_and_grad[0] param = param_and_grad[0]
param_lr = param.optimize_attr['learning_rate'] param_lr = param.optimize_attr['learning_rate']
print("param_lr: ", param_lr, self._global_learning_rate()._numpy())
if type(param_lr) == Variable: if type(param_lr) == Variable:
return param_lr return param_lr
else: else:
...@@ -301,6 +303,25 @@ class Optimizer(object): ...@@ -301,6 +303,25 @@ class Optimizer(object):
This method combines interface `append_backward()` and This method combines interface `append_backward()` and
`create_optimization_pass()` into one. `create_optimization_pass()` into one.
""" """
if imperative_base.enabled:
if parameter_list is not None:
params_grads = parameter_list
else:
program = loss.block.program
parameters = program.global_block().all_parameters()
params_grads = []
for param in parameters:
grad_var = Variable(
block=loss.block,
name=param._ivar._grad_name(),
stop_gradient=True)
grad_var._value = param._ivar._grad_var()
print("create grad var: ", grad_var.name)
print("grad_var value: ", grad_var._numpy())
import sys
sys.stdout.flush()
params_grads.append((param, grad_var))
else:
params_grads = append_backward(loss, parameter_list, no_grad_set, params_grads = append_backward(loss, parameter_list, no_grad_set,
[error_clip_callback]) [error_clip_callback])
...@@ -356,6 +377,10 @@ class SGDOptimizer(Optimizer): ...@@ -356,6 +377,10 @@ class SGDOptimizer(Optimizer):
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
print("append sgd")
import sys
sys.stdout.flush()
# create the optimize op # create the optimize op
sgd_op = block.append_op( sgd_op = block.append_op(
type=self.type, type=self.type,
......
...@@ -18,6 +18,7 @@ import numpy as np ...@@ -18,6 +18,7 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC
from paddle.fluid.imperative.base import to_variable from paddle.fluid.imperative.base import to_variable
...@@ -119,7 +120,11 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -119,7 +120,11 @@ class TestImperativeMnist(unittest.TestCase):
out._backward() out._backward()
filter_grad = mnist._simple_img_conv_pool_1._conv2d._filter_param._gradient( filter_grad = mnist._simple_img_conv_pool_1._conv2d._filter_param._gradient(
) )
print(filter_grad) # print(filter_grad)
sgd = SGDOptimizer(learning_rate=1e-3)
sgd.minimize(out)
# np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) # np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
# with fluid.imperative.guard(): # with fluid.imperative.guard():
# mlp = MLP() # mlp = MLP()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册