提交 68e9b841 编写于 作者: M minqiyang

Add support for optimizer

上级 224c90a8
...@@ -104,7 +104,7 @@ class Autograd { ...@@ -104,7 +104,7 @@ class Autograd {
framework::Variable* CreateVariable(const std::string& name, framework::Variable* CreateVariable(const std::string& name,
const framework::DDim& dim, float val, const framework::DDim& dim, float val,
framework::Scope* scope, framework::Scope* scope,
bool random_name = true) { bool random_name = false) {
std::string varname = name; std::string varname = name;
if (random_name) { if (random_name) {
std::mt19937 rng; std::mt19937 rng;
......
...@@ -45,6 +45,15 @@ class VarBase { ...@@ -45,6 +45,15 @@ class VarBase {
framework::LoDTensor& Grad(); framework::LoDTensor& Grad();
inline framework::Variable* GradVar() { return grads_; }
inline std::string GradName() const {
PADDLE_ENFORCE(
var_desc_,
"Couldn't get gradient variable's name, please call backward() first");
return string::Sprintf("%s@IGrad", var_desc_->Name());
}
OpBase* pre_op_; OpBase* pre_op_;
int pre_op_out_idx_; int pre_op_out_idx_;
......
...@@ -52,7 +52,7 @@ class Tracer { ...@@ -52,7 +52,7 @@ class Tracer {
const std::vector<VarBase*>& outputs, framework::BlockDesc* block, const std::vector<VarBase*>& outputs, framework::BlockDesc* block,
const bool stop_gradient) { const bool stop_gradient) {
framework::OpDesc* op_desc = op->op_desc_; framework::OpDesc* op_desc = op->op_desc_;
VLOG(3) << "tracer tracing " << op_desc->Type(); LOG(ERROR) << "tracer tracing " << op_desc->Type();
op_desc->InferShape(*block); op_desc->InferShape(*block);
op_desc->InferVarType(block); op_desc->InferVarType(block);
std::unique_ptr<framework::OperatorBase> op_base = std::unique_ptr<framework::OperatorBase> op_base =
...@@ -61,7 +61,10 @@ class Tracer { ...@@ -61,7 +61,10 @@ class Tracer {
*op->input_vars_ = inputs; *op->input_vars_ = inputs;
for (VarBase* input : inputs) { for (VarBase* input : inputs) {
const std::string vname = input->var_desc_->Name(); const std::string vname = input->var_desc_->Name();
LOG(ERROR) << "input: " << vname;
LOG(ERROR) << "input var: " << input->var_;
framework::Variable* var = root_scope_->Var(vname); framework::Variable* var = root_scope_->Var(vname);
LOG(ERROR) << "var_ in tracer pointer: " << var;
input->var_ = var; input->var_ = var;
if (!var->IsInitialized()) { if (!var->IsInitialized()) {
framework::VarDesc* var_desc = block->FindVar(vname); framework::VarDesc* var_desc = block->FindVar(vname);
...@@ -84,6 +87,7 @@ class Tracer { ...@@ -84,6 +87,7 @@ class Tracer {
*op->output_vars_ = outputs; *op->output_vars_ = outputs;
for (size_t i = 0; i < outputs.size(); ++i) { for (size_t i = 0; i < outputs.size(); ++i) {
const std::string vname = outputs[i]->var_desc_->Name(); const std::string vname = outputs[i]->var_desc_->Name();
LOG(ERROR) << "output name: " << vname;
framework::Variable* var = root_scope_->Var(vname); framework::Variable* var = root_scope_->Var(vname);
if (!var->IsInitialized()) { if (!var->IsInitialized()) {
framework::VarDesc* var_desc = block->FindVar(vname); framework::VarDesc* var_desc = block->FindVar(vname);
...@@ -98,7 +102,7 @@ class Tracer { ...@@ -98,7 +102,7 @@ class Tracer {
outputs[i]->pre_op_out_idx_ = i; outputs[i]->pre_op_out_idx_ = i;
} }
VLOG(3) << "tracer running " << op_desc->Type(); LOG(ERROR) << "tracer running " << op_desc->Type();
op_base->Run(*root_scope_, platform::CPUPlace()); op_base->Run(*root_scope_, platform::CPUPlace());
if (!stop_gradient) { if (!stop_gradient) {
framework::OpDesc* grad_op_desc; framework::OpDesc* grad_op_desc;
......
...@@ -29,6 +29,8 @@ class SGDOpKernel : public framework::OpKernel<T> { ...@@ -29,6 +29,8 @@ class SGDOpKernel : public framework::OpKernel<T> {
const auto *param_var = ctx.InputVar("Param"); const auto *param_var = ctx.InputVar("Param");
const auto *grad_var = ctx.InputVar("Grad"); const auto *grad_var = ctx.InputVar("Grad");
LOG(ERROR) << "grad_var: " << grad_var;
if (param_var->IsType<framework::LoDTensor>()) { if (param_var->IsType<framework::LoDTensor>()) {
const auto *param = ctx.Input<framework::Tensor>("Param"); const auto *param = ctx.Input<framework::Tensor>("Param");
auto *param_out = ctx.Output<framework::Tensor>("ParamOut"); auto *param_out = ctx.Output<framework::Tensor>("ParamOut");
...@@ -39,8 +41,11 @@ class SGDOpKernel : public framework::OpKernel<T> { ...@@ -39,8 +41,11 @@ class SGDOpKernel : public framework::OpKernel<T> {
const auto *grad = ctx.Input<framework::Tensor>("Grad"); const auto *grad = ctx.Input<framework::Tensor>("Grad");
auto p = framework::EigenVector<T>::Flatten(*param); auto p = framework::EigenVector<T>::Flatten(*param);
LOG(ERROR) << "param flattened";
auto g = framework::EigenVector<T>::Flatten(*grad); auto g = framework::EigenVector<T>::Flatten(*grad);
LOG(ERROR) << "grad flattened";
auto o = framework::EigenVector<T>::Flatten(*param_out); auto o = framework::EigenVector<T>::Flatten(*param_out);
LOG(ERROR) << "paramout flattened";
auto *lr = learning_rate->data<T>(); auto *lr = learning_rate->data<T>();
o = p - lr[0] * g; o = p - lr[0] * g;
......
...@@ -117,10 +117,23 @@ PYBIND11_MODULE(core, m) { ...@@ -117,10 +117,23 @@ PYBIND11_MODULE(core, m) {
[](imperative::VarBase &self, framework::Scope *scope) { [](imperative::VarBase &self, framework::Scope *scope) {
self.RunBackward(scope); self.RunBackward(scope);
}) })
.def("_grad_var",
[](const imperative::VarBase &self) {
LOG(ERROR) << "grad_var_ pointer: " << self.grads_;
return self.grads_;
},
py::return_value_policy::reference)
.def("_grad_name", &imperative::VarBase::GradName)
.def("_grad", &imperative::VarBase::Grad) .def("_grad", &imperative::VarBase::Grad)
.def("_print_var_pointer",
[](const imperative::VarBase &self) {
LOG(ERROR) << self.var_desc_->Name()
<< " print_var pointer: " << self.var_;
})
.def_property("value", .def_property("value",
[](const imperative::VarBase &self) { return self.var_; }, [](const imperative::VarBase &self) { return self.var_; },
[](imperative::VarBase &self, framework::Variable *var) { [](imperative::VarBase &self, framework::Variable *var) {
LOG(ERROR) << "set var to pointer: " << var;
self.var_ = var; self.var_ = var;
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
......
...@@ -19,7 +19,6 @@ import contextlib ...@@ -19,7 +19,6 @@ import contextlib
import os import os
import re import re
import six import six
import sys
import numpy as np import numpy as np
...@@ -369,6 +368,7 @@ class Variable(object): ...@@ -369,6 +368,7 @@ class Variable(object):
self._ivar.stop_gradient = stop_gradient self._ivar.stop_gradient = stop_gradient
def _numpy(self): def _numpy(self):
print("get_variable_tensor", self.desc.name())
scope = _imperative_tracer().get_scope() scope = _imperative_tracer().get_scope()
tensor = core.get_variable_tensor(scope, self.desc.name()) tensor = core.get_variable_tensor(scope, self.desc.name())
return np.array(tensor) return np.array(tensor)
...@@ -380,6 +380,14 @@ class Variable(object): ...@@ -380,6 +380,14 @@ class Variable(object):
def _gradient(self): def _gradient(self):
return np.array(self._ivar._grad()) return np.array(self._ivar._grad())
@property
def _value(self):
return self._ivar.value
@_value.setter
def _value(self, v):
self._ivar.value = v
def __str__(self): def __str__(self):
return self.to_string(True) return self.to_string(True)
...@@ -632,6 +640,7 @@ class Operator(object): ...@@ -632,6 +640,7 @@ class Operator(object):
if inputs is not None: if inputs is not None:
for in_proto in proto.inputs: for in_proto in proto.inputs:
print("create op: find_name", in_proto.name)
found = find_name(inputs, in_proto.name) found = find_name(inputs, in_proto.name)
assert found or in_proto.dispensable, "Input {} not found".format( assert found or in_proto.dispensable, "Input {} not found".format(
in_proto.name) in_proto.name)
...@@ -695,9 +704,11 @@ class Operator(object): ...@@ -695,9 +704,11 @@ class Operator(object):
self._update_desc_attr(attr_name, attr_val) self._update_desc_attr(attr_name, attr_val)
self.desc.check_attrs() self.desc.check_attrs()
if self._has_kernel(type): if self._has_kernel(type):
self.desc.infer_var_type(self.block.desc) self.desc.infer_var_type(self.block.desc)
self.desc.infer_shape(self.block.desc) self.desc.infer_shape(self.block.desc)
if _in_imperative_mode(): if _in_imperative_mode():
self.iop = core.OpBase() self.iop = core.OpBase()
self.iop.desc = self.desc self.iop.desc = self.desc
...@@ -1167,6 +1178,7 @@ class Block(object): ...@@ -1167,6 +1178,7 @@ class Block(object):
def create_var(self, *args, **kwargs): def create_var(self, *args, **kwargs):
var = Variable(block=self, *args, **kwargs) var = Variable(block=self, *args, **kwargs)
if 'initializer' in kwargs: if 'initializer' in kwargs:
print("initializer, ", type(kwargs['initializer']))
kwargs['initializer'](var, self) kwargs['initializer'](var, self)
return var return var
...@@ -1281,6 +1293,16 @@ class Block(object): ...@@ -1281,6 +1293,16 @@ class Block(object):
""" """
op_desc = self.desc.append_op() op_desc = self.desc.append_op()
op = Operator(block=self, desc=op_desc, *args, **kwargs) op = Operator(block=self, desc=op_desc, *args, **kwargs)
print("op inputs: ", [v._numpy() for v in op.inputs])
print("op inputs: ", [v for v in op.inputs])
import sys
sys.stdout.flush()
for v in op.inputs:
v._ivar._print_var_pointer()
print("print var pointer end")
import sys
sys.stdout.flush()
if _in_imperative_mode(): if _in_imperative_mode():
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs], _imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc, [v._ivar for v in op.outputs], self.desc,
...@@ -1338,6 +1360,10 @@ class Block(object): ...@@ -1338,6 +1360,10 @@ class Block(object):
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs], _imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc, [v._ivar for v in op.outputs], self.desc,
kwargs.get("stop_gradient", False)) kwargs.get("stop_gradient", False))
print([v.name for v in op.outputs])
for v in op.outputs:
v._ivar._print_var_pointer()
print("fill_constant end")
self.ops.insert(0, op) self.ops.insert(0, op)
return op return op
......
...@@ -153,6 +153,7 @@ class ConstantInitializer(Initializer): ...@@ -153,6 +153,7 @@ class ConstantInitializer(Initializer):
assert isinstance(var, framework.Variable) assert isinstance(var, framework.Variable)
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
# Initialization Ops should be prepended and not appended # Initialization Ops should be prepended and not appended
print("fill_constant")
op = block._prepend_op( op = block._prepend_op(
type="fill_constant", type="fill_constant",
outputs={"Out": var}, outputs={"Out": var},
......
...@@ -369,7 +369,7 @@ class LayerHelper(object): ...@@ -369,7 +369,7 @@ class LayerHelper(object):
def set_variable_initializer(self, var, initializer): def set_variable_initializer(self, var, initializer):
assert isinstance(var, Variable) assert isinstance(var, Variable)
self.startup_program.global_block().create_var( return self.startup_program.global_block().create_var(
name=var.name, name=var.name,
type=var.type, type=var.type,
dtype=var.dtype, dtype=var.dtype,
......
...@@ -20,6 +20,7 @@ from ..framework import convert_np_dtype_to_dtype_ ...@@ -20,6 +20,7 @@ from ..framework import convert_np_dtype_to_dtype_
from ..framework import Variable from ..framework import Variable
from ..initializer import Constant, force_init_on_cpu from ..initializer import Constant, force_init_on_cpu
from ..core import VarDesc from ..core import VarDesc
from ..imperative import base as imperative_base
from .layer_function_generator import templatedoc from .layer_function_generator import templatedoc
import numpy import numpy
...@@ -104,15 +105,15 @@ def create_global_var(shape, ...@@ -104,15 +105,15 @@ def create_global_var(shape,
Args: Args:
shape(list[int]): shape of the variable shape(list[int]): shape of the variable
value(float): the value of the variable. The new created value(float): the value of the variable. The new created
variable will be filled with it. variable will be filled with it.
dtype(string): data type of the variable dtype(string): data type of the variable
persistable(bool): if this variable is persistable. persistable(bool): if this variable is persistable.
Default: False Default: False
force_cpu(bool): force this variable to be on CPU. force_cpu(bool): force this variable to be on CPU.
Default: False Default: False
name(str|None): The name of the variable. If set to None the variable name(str|None): The name of the variable. If set to None the variable
name will be generated automatically. name will be generated automatically.
Default: None Default: None
Returns: Returns:
...@@ -121,21 +122,33 @@ def create_global_var(shape, ...@@ -121,21 +122,33 @@ def create_global_var(shape,
Examples: Examples:
.. code-block:: python .. code-block:: python
var = fluid.create_global_var(shape=[2,3], value=1.0, dtype='float32', var = fluid.create_global_var(shape=[2,3], value=1.0, dtype='float32',
persistable=True, force_cpu=True, name='new_var') persistable=True, force_cpu=True, name='new_var')
""" """
helper = LayerHelper("global_var", **locals()) helper = LayerHelper("global_var", **locals())
var = helper.create_global_variable( var = helper.create_global_variable(
dtype=dtype, shape=shape, persistable=persistable, name=name) dtype=dtype,
helper.set_variable_initializer( shape=shape,
var, initializer=Constant( persistable=persistable,
value=float(value), force_cpu=force_cpu)) name=name,
stop_gradient=True)
print("set_variable_initializer, ", var.name)
if imperative_base.enabled():
var = helper.set_variable_initializer(
var, initializer=Constant(
value=float(value), force_cpu=force_cpu))
print("get var", var)
else:
helper.set_variable_initializer(
var, initializer=Constant(
value=float(value), force_cpu=force_cpu))
return var return var
def cast(x, dtype): def cast(x, dtype):
""" """
This layer takes in the Variable :attr:`x` with :attr:`x.dtype` and casts This layer takes in the Variable :attr:`x` with :attr:`x.dtype` and casts
it to the output with :attr:`dtype`. it to the output with :attr:`dtype`.
Args: Args:
...@@ -199,9 +212,9 @@ def tensor_array_to_tensor(input, axis=1, name=None): ...@@ -199,9 +212,9 @@ def tensor_array_to_tensor(input, axis=1, name=None):
and returns that as the output. and returns that as the output.
A simple example as below: A simple example as below:
.. code-block:: text .. code-block:: text
Given: Given:
input.data = {[[0.6, 0.1, 0.3], input.data = {[[0.6, 0.1, 0.3],
...@@ -210,9 +223,9 @@ def tensor_array_to_tensor(input, axis=1, name=None): ...@@ -210,9 +223,9 @@ def tensor_array_to_tensor(input, axis=1, name=None):
[1.8]], [1.8]],
[[2.3, 2.1], [[2.3, 2.1],
[2.5, 2.4]]} [2.5, 2.4]]}
axis = 1 axis = 1
Then: Then:
output.data = [[0.6, 0.1, 0.3, 1.3, 2.3, 2.1], output.data = [[0.6, 0.1, 0.3, 1.3, 2.3, 2.1],
...@@ -493,12 +506,12 @@ def argmax(x, axis=0): ...@@ -493,12 +506,12 @@ def argmax(x, axis=0):
def argsort(input, axis=-1, name=None): def argsort(input, axis=-1, name=None):
""" """
Performs sorting on the input Variable along the given axis, and outputs Performs sorting on the input Variable along the given axis, and outputs
sorted data Varibale and its corresponding index Variable with the same sorted data Varibale and its corresponding index Variable with the same
shape as :attr:`input`. shape as :attr:`input`.
.. code-block:: text .. code-block:: text
For example, the given axis is -1 and the input Variable For example, the given axis is -1 and the input Variable
input = [[0.15849551, 0.45865775, 0.8563702 ], input = [[0.15849551, 0.45865775, 0.8563702 ],
...@@ -511,15 +524,15 @@ def argsort(input, axis=-1, name=None): ...@@ -511,15 +524,15 @@ def argsort(input, axis=-1, name=None):
and the sorted indices along the given axis turn outs to be and the sorted indices along the given axis turn outs to be
indices = [[0, 1, 2], indices = [[0, 1, 2],
[0, 2, 1]] [0, 2, 1]]
Args: Args:
input(Variable): The input Variable for sorting. input(Variable): The input Variable for sorting.
axis(int): The axis along which to sort the input Variable. When axis(int): The axis along which to sort the input Variable. When
:attr:`axis` < 0, the actual axis will be :attr:`axis` + :attr:`axis` < 0, the actual axis will be :attr:`axis` +
rank(:attr:`input`). Default -1, the last dimension. rank(:attr:`input`). Default -1, the last dimension.
name(str|None): (optional) A name for this layer. If set None, the name(str|None): (optional) A name for this layer. If set None, the
layer will be named automatically. layer will be named automatically.
Returns: Returns:
......
...@@ -30,6 +30,7 @@ from .initializer import Constant ...@@ -30,6 +30,7 @@ from .initializer import Constant
from .layer_helper import LayerHelper from .layer_helper import LayerHelper
from .layers import ops from .layers import ops
from .regularizer import append_regularization_ops from .regularizer import append_regularization_ops
from .imperative import base as imperative_base
__all__ = [ __all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl',
...@@ -108,6 +109,7 @@ class Optimizer(object): ...@@ -108,6 +109,7 @@ class Optimizer(object):
# create learning rate variable for every parameter # create learning rate variable for every parameter
param = param_and_grad[0] param = param_and_grad[0]
param_lr = param.optimize_attr['learning_rate'] param_lr = param.optimize_attr['learning_rate']
print("param_lr: ", param_lr, self._global_learning_rate()._numpy())
if type(param_lr) == Variable: if type(param_lr) == Variable:
return param_lr return param_lr
else: else:
...@@ -301,19 +303,38 @@ class Optimizer(object): ...@@ -301,19 +303,38 @@ class Optimizer(object):
This method combines interface `append_backward()` and This method combines interface `append_backward()` and
`create_optimization_pass()` into one. `create_optimization_pass()` into one.
""" """
params_grads = append_backward(loss, parameter_list, no_grad_set, if imperative_base.enabled:
[error_clip_callback]) if parameter_list is not None:
params_grads = parameter_list
else:
program = loss.block.program
parameters = program.global_block().all_parameters()
params_grads = []
for param in parameters:
grad_var = Variable(
block=loss.block,
name=param._ivar._grad_name(),
stop_gradient=True)
grad_var._value = param._ivar._grad_var()
print("create grad var: ", grad_var.name)
print("grad_var value: ", grad_var._numpy())
import sys
sys.stdout.flush()
params_grads.append((param, grad_var))
else:
params_grads = append_backward(loss, parameter_list, no_grad_set,
[error_clip_callback])
params_grads = sorted(params_grads, key=lambda x: x[0].name) params_grads = sorted(params_grads, key=lambda x: x[0].name)
params_grads, table_param_and_grad, table_optimize_op = \ params_grads, table_param_and_grad, table_optimize_op = \
self._process_distribute_lookuptable(params_grads, loss, startup_program) self._process_distribute_lookuptable(params_grads, loss, startup_program)
params_grads = append_gradient_clip_ops(params_grads) params_grads = append_gradient_clip_ops(params_grads)
# Add regularization if any # Add regularization if any
params_grads = append_regularization_ops(params_grads, params_grads = append_regularization_ops(params_grads,
self.regularization) self.regularization)
optimize_ops = self._create_optimization_pass(params_grads, loss, optimize_ops = self._create_optimization_pass(params_grads, loss,
startup_program) startup_program)
...@@ -356,6 +377,10 @@ class SGDOptimizer(Optimizer): ...@@ -356,6 +377,10 @@ class SGDOptimizer(Optimizer):
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
print("append sgd")
import sys
sys.stdout.flush()
# create the optimize op # create the optimize op
sgd_op = block.append_op( sgd_op = block.append_op(
type=self.type, type=self.type,
...@@ -477,7 +502,7 @@ class LarsMomentumOptimizer(Optimizer): ...@@ -477,7 +502,7 @@ class LarsMomentumOptimizer(Optimizer):
regularization: A Regularizer, such as regularization: A Regularizer, such as
fluid.regularizer.L2DecayRegularizer. fluid.regularizer.L2DecayRegularizer.
name: A optional name prefix. name: A optional name prefix.
Examples: Examples:
.. code-block:: python .. code-block:: python
......
...@@ -18,6 +18,7 @@ import numpy as np ...@@ -18,6 +18,7 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC
from paddle.fluid.imperative.base import to_variable from paddle.fluid.imperative.base import to_variable
...@@ -119,7 +120,11 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -119,7 +120,11 @@ class TestImperativeMnist(unittest.TestCase):
out._backward() out._backward()
filter_grad = mnist._simple_img_conv_pool_1._conv2d._filter_param._gradient( filter_grad = mnist._simple_img_conv_pool_1._conv2d._filter_param._gradient(
) )
print(filter_grad) # print(filter_grad)
sgd = SGDOptimizer(learning_rate=1e-3)
sgd.minimize(out)
# np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) # np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
# with fluid.imperative.guard(): # with fluid.imperative.guard():
# mlp = MLP() # mlp = MLP()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册