Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
68e9b841
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
68e9b841
编写于
12月 27, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add support for optimizer
上级
224c90a8
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
139 addition
and
38 deletion
+139
-38
paddle/fluid/imperative/layer.cc
paddle/fluid/imperative/layer.cc
+1
-1
paddle/fluid/imperative/layer.h
paddle/fluid/imperative/layer.h
+9
-0
paddle/fluid/imperative/tracer.h
paddle/fluid/imperative/tracer.h
+6
-2
paddle/fluid/operators/optimizers/sgd_op.h
paddle/fluid/operators/optimizers/sgd_op.h
+5
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+13
-0
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+27
-1
python/paddle/fluid/initializer.py
python/paddle/fluid/initializer.py
+1
-0
python/paddle/fluid/layer_helper.py
python/paddle/fluid/layer_helper.py
+1
-1
python/paddle/fluid/layers/tensor.py
python/paddle/fluid/layers/tensor.py
+35
-22
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+35
-10
python/paddle/fluid/tests/unittests/test_imperative_mnist.py
python/paddle/fluid/tests/unittests/test_imperative_mnist.py
+6
-1
未找到文件。
paddle/fluid/imperative/layer.cc
浏览文件 @
68e9b841
...
...
@@ -104,7 +104,7 @@ class Autograd {
framework
::
Variable
*
CreateVariable
(
const
std
::
string
&
name
,
const
framework
::
DDim
&
dim
,
float
val
,
framework
::
Scope
*
scope
,
bool
random_name
=
tru
e
)
{
bool
random_name
=
fals
e
)
{
std
::
string
varname
=
name
;
if
(
random_name
)
{
std
::
mt19937
rng
;
...
...
paddle/fluid/imperative/layer.h
浏览文件 @
68e9b841
...
...
@@ -45,6 +45,15 @@ class VarBase {
framework
::
LoDTensor
&
Grad
();
inline
framework
::
Variable
*
GradVar
()
{
return
grads_
;
}
inline
std
::
string
GradName
()
const
{
PADDLE_ENFORCE
(
var_desc_
,
"Couldn't get gradient variable's name, please call backward() first"
);
return
string
::
Sprintf
(
"%s@IGrad"
,
var_desc_
->
Name
());
}
OpBase
*
pre_op_
;
int
pre_op_out_idx_
;
...
...
paddle/fluid/imperative/tracer.h
浏览文件 @
68e9b841
...
...
@@ -52,7 +52,7 @@ class Tracer {
const
std
::
vector
<
VarBase
*>&
outputs
,
framework
::
BlockDesc
*
block
,
const
bool
stop_gradient
)
{
framework
::
OpDesc
*
op_desc
=
op
->
op_desc_
;
VLOG
(
3
)
<<
"tracer tracing "
<<
op_desc
->
Type
();
LOG
(
ERROR
)
<<
"tracer tracing "
<<
op_desc
->
Type
();
op_desc
->
InferShape
(
*
block
);
op_desc
->
InferVarType
(
block
);
std
::
unique_ptr
<
framework
::
OperatorBase
>
op_base
=
...
...
@@ -61,7 +61,10 @@ class Tracer {
*
op
->
input_vars_
=
inputs
;
for
(
VarBase
*
input
:
inputs
)
{
const
std
::
string
vname
=
input
->
var_desc_
->
Name
();
LOG
(
ERROR
)
<<
"input: "
<<
vname
;
LOG
(
ERROR
)
<<
"input var: "
<<
input
->
var_
;
framework
::
Variable
*
var
=
root_scope_
->
Var
(
vname
);
LOG
(
ERROR
)
<<
"var_ in tracer pointer: "
<<
var
;
input
->
var_
=
var
;
if
(
!
var
->
IsInitialized
())
{
framework
::
VarDesc
*
var_desc
=
block
->
FindVar
(
vname
);
...
...
@@ -84,6 +87,7 @@ class Tracer {
*
op
->
output_vars_
=
outputs
;
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
++
i
)
{
const
std
::
string
vname
=
outputs
[
i
]
->
var_desc_
->
Name
();
LOG
(
ERROR
)
<<
"output name: "
<<
vname
;
framework
::
Variable
*
var
=
root_scope_
->
Var
(
vname
);
if
(
!
var
->
IsInitialized
())
{
framework
::
VarDesc
*
var_desc
=
block
->
FindVar
(
vname
);
...
...
@@ -98,7 +102,7 @@ class Tracer {
outputs
[
i
]
->
pre_op_out_idx_
=
i
;
}
VLOG
(
3
)
<<
"tracer running "
<<
op_desc
->
Type
();
LOG
(
ERROR
)
<<
"tracer running "
<<
op_desc
->
Type
();
op_base
->
Run
(
*
root_scope_
,
platform
::
CPUPlace
());
if
(
!
stop_gradient
)
{
framework
::
OpDesc
*
grad_op_desc
;
...
...
paddle/fluid/operators/optimizers/sgd_op.h
浏览文件 @
68e9b841
...
...
@@ -29,6 +29,8 @@ class SGDOpKernel : public framework::OpKernel<T> {
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
LOG
(
ERROR
)
<<
"grad_var: "
<<
grad_var
;
if
(
param_var
->
IsType
<
framework
::
LoDTensor
>
())
{
const
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
...
...
@@ -39,8 +41,11 @@ class SGDOpKernel : public framework::OpKernel<T> {
const
auto
*
grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Grad"
);
auto
p
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param
);
LOG
(
ERROR
)
<<
"param flattened"
;
auto
g
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
grad
);
LOG
(
ERROR
)
<<
"grad flattened"
;
auto
o
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param_out
);
LOG
(
ERROR
)
<<
"paramout flattened"
;
auto
*
lr
=
learning_rate
->
data
<
T
>
();
o
=
p
-
lr
[
0
]
*
g
;
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
68e9b841
...
...
@@ -117,10 +117,23 @@ PYBIND11_MODULE(core, m) {
[](
imperative
::
VarBase
&
self
,
framework
::
Scope
*
scope
)
{
self
.
RunBackward
(
scope
);
})
.
def
(
"_grad_var"
,
[](
const
imperative
::
VarBase
&
self
)
{
LOG
(
ERROR
)
<<
"grad_var_ pointer: "
<<
self
.
grads_
;
return
self
.
grads_
;
},
py
::
return_value_policy
::
reference
)
.
def
(
"_grad_name"
,
&
imperative
::
VarBase
::
GradName
)
.
def
(
"_grad"
,
&
imperative
::
VarBase
::
Grad
)
.
def
(
"_print_var_pointer"
,
[](
const
imperative
::
VarBase
&
self
)
{
LOG
(
ERROR
)
<<
self
.
var_desc_
->
Name
()
<<
" print_var pointer: "
<<
self
.
var_
;
})
.
def_property
(
"value"
,
[](
const
imperative
::
VarBase
&
self
)
{
return
self
.
var_
;
},
[](
imperative
::
VarBase
&
self
,
framework
::
Variable
*
var
)
{
LOG
(
ERROR
)
<<
"set var to pointer: "
<<
var
;
self
.
var_
=
var
;
},
py
::
return_value_policy
::
reference
)
...
...
python/paddle/fluid/framework.py
浏览文件 @
68e9b841
...
...
@@ -19,7 +19,6 @@ import contextlib
import
os
import
re
import
six
import
sys
import
numpy
as
np
...
...
@@ -369,6 +368,7 @@ class Variable(object):
self
.
_ivar
.
stop_gradient
=
stop_gradient
def
_numpy
(
self
):
print
(
"get_variable_tensor"
,
self
.
desc
.
name
())
scope
=
_imperative_tracer
().
get_scope
()
tensor
=
core
.
get_variable_tensor
(
scope
,
self
.
desc
.
name
())
return
np
.
array
(
tensor
)
...
...
@@ -380,6 +380,14 @@ class Variable(object):
def
_gradient
(
self
):
return
np
.
array
(
self
.
_ivar
.
_grad
())
@
property
def
_value
(
self
):
return
self
.
_ivar
.
value
@
_value
.
setter
def
_value
(
self
,
v
):
self
.
_ivar
.
value
=
v
def
__str__
(
self
):
return
self
.
to_string
(
True
)
...
...
@@ -632,6 +640,7 @@ class Operator(object):
if
inputs
is
not
None
:
for
in_proto
in
proto
.
inputs
:
print
(
"create op: find_name"
,
in_proto
.
name
)
found
=
find_name
(
inputs
,
in_proto
.
name
)
assert
found
or
in_proto
.
dispensable
,
"Input {} not found"
.
format
(
in_proto
.
name
)
...
...
@@ -695,9 +704,11 @@ class Operator(object):
self
.
_update_desc_attr
(
attr_name
,
attr_val
)
self
.
desc
.
check_attrs
()
if
self
.
_has_kernel
(
type
):
self
.
desc
.
infer_var_type
(
self
.
block
.
desc
)
self
.
desc
.
infer_shape
(
self
.
block
.
desc
)
if
_in_imperative_mode
():
self
.
iop
=
core
.
OpBase
()
self
.
iop
.
desc
=
self
.
desc
...
...
@@ -1167,6 +1178,7 @@ class Block(object):
def
create_var
(
self
,
*
args
,
**
kwargs
):
var
=
Variable
(
block
=
self
,
*
args
,
**
kwargs
)
if
'initializer'
in
kwargs
:
print
(
"initializer, "
,
type
(
kwargs
[
'initializer'
]))
kwargs
[
'initializer'
](
var
,
self
)
return
var
...
...
@@ -1281,6 +1293,16 @@ class Block(object):
"""
op_desc
=
self
.
desc
.
append_op
()
op
=
Operator
(
block
=
self
,
desc
=
op_desc
,
*
args
,
**
kwargs
)
print
(
"op inputs: "
,
[
v
.
_numpy
()
for
v
in
op
.
inputs
])
print
(
"op inputs: "
,
[
v
for
v
in
op
.
inputs
])
import
sys
sys
.
stdout
.
flush
()
for
v
in
op
.
inputs
:
v
.
_ivar
.
_print_var_pointer
()
print
(
"print var pointer end"
)
import
sys
sys
.
stdout
.
flush
()
if
_in_imperative_mode
():
_imperative_tracer
().
trace
(
op
.
iop
,
[
v
.
_ivar
for
v
in
op
.
inputs
],
[
v
.
_ivar
for
v
in
op
.
outputs
],
self
.
desc
,
...
...
@@ -1338,6 +1360,10 @@ class Block(object):
_imperative_tracer
().
trace
(
op
.
iop
,
[
v
.
_ivar
for
v
in
op
.
inputs
],
[
v
.
_ivar
for
v
in
op
.
outputs
],
self
.
desc
,
kwargs
.
get
(
"stop_gradient"
,
False
))
print
([
v
.
name
for
v
in
op
.
outputs
])
for
v
in
op
.
outputs
:
v
.
_ivar
.
_print_var_pointer
()
print
(
"fill_constant end"
)
self
.
ops
.
insert
(
0
,
op
)
return
op
...
...
python/paddle/fluid/initializer.py
浏览文件 @
68e9b841
...
...
@@ -153,6 +153,7 @@ class ConstantInitializer(Initializer):
assert
isinstance
(
var
,
framework
.
Variable
)
assert
isinstance
(
block
,
framework
.
Block
)
# Initialization Ops should be prepended and not appended
print
(
"fill_constant"
)
op
=
block
.
_prepend_op
(
type
=
"fill_constant"
,
outputs
=
{
"Out"
:
var
},
...
...
python/paddle/fluid/layer_helper.py
浏览文件 @
68e9b841
...
...
@@ -369,7 +369,7 @@ class LayerHelper(object):
def
set_variable_initializer
(
self
,
var
,
initializer
):
assert
isinstance
(
var
,
Variable
)
self
.
startup_program
.
global_block
().
create_var
(
return
self
.
startup_program
.
global_block
().
create_var
(
name
=
var
.
name
,
type
=
var
.
type
,
dtype
=
var
.
dtype
,
...
...
python/paddle/fluid/layers/tensor.py
浏览文件 @
68e9b841
...
...
@@ -20,6 +20,7 @@ from ..framework import convert_np_dtype_to_dtype_
from
..framework
import
Variable
from
..initializer
import
Constant
,
force_init_on_cpu
from
..core
import
VarDesc
from
..imperative
import
base
as
imperative_base
from
.layer_function_generator
import
templatedoc
import
numpy
...
...
@@ -104,15 +105,15 @@ def create_global_var(shape,
Args:
shape(list[int]): shape of the variable
value(float): the value of the variable. The new created
value(float): the value of the variable. The new created
variable will be filled with it.
dtype(string): data type of the variable
persistable(bool): if this variable is persistable.
persistable(bool): if this variable is persistable.
Default: False
force_cpu(bool): force this variable to be on CPU.
force_cpu(bool): force this variable to be on CPU.
Default: False
name(str|None): The name of the variable. If set to None the variable
name will be generated automatically.
name(str|None): The name of the variable. If set to None the variable
name will be generated automatically.
Default: None
Returns:
...
...
@@ -121,21 +122,33 @@ def create_global_var(shape,
Examples:
.. code-block:: python
var = fluid.create_global_var(shape=[2,3], value=1.0, dtype='float32',
var = fluid.create_global_var(shape=[2,3], value=1.0, dtype='float32',
persistable=True, force_cpu=True, name='new_var')
"""
helper
=
LayerHelper
(
"global_var"
,
**
locals
())
var
=
helper
.
create_global_variable
(
dtype
=
dtype
,
shape
=
shape
,
persistable
=
persistable
,
name
=
name
)
helper
.
set_variable_initializer
(
var
,
initializer
=
Constant
(
value
=
float
(
value
),
force_cpu
=
force_cpu
))
dtype
=
dtype
,
shape
=
shape
,
persistable
=
persistable
,
name
=
name
,
stop_gradient
=
True
)
print
(
"set_variable_initializer, "
,
var
.
name
)
if
imperative_base
.
enabled
():
var
=
helper
.
set_variable_initializer
(
var
,
initializer
=
Constant
(
value
=
float
(
value
),
force_cpu
=
force_cpu
))
print
(
"get var"
,
var
)
else
:
helper
.
set_variable_initializer
(
var
,
initializer
=
Constant
(
value
=
float
(
value
),
force_cpu
=
force_cpu
))
return
var
def
cast
(
x
,
dtype
):
"""
This layer takes in the Variable :attr:`x` with :attr:`x.dtype` and casts
This layer takes in the Variable :attr:`x` with :attr:`x.dtype` and casts
it to the output with :attr:`dtype`.
Args:
...
...
@@ -199,9 +212,9 @@ def tensor_array_to_tensor(input, axis=1, name=None):
and returns that as the output.
A simple example as below:
.. code-block:: text
Given:
input.data = {[[0.6, 0.1, 0.3],
...
...
@@ -210,9 +223,9 @@ def tensor_array_to_tensor(input, axis=1, name=None):
[1.8]],
[[2.3, 2.1],
[2.5, 2.4]]}
axis = 1
Then:
output.data = [[0.6, 0.1, 0.3, 1.3, 2.3, 2.1],
...
...
@@ -493,12 +506,12 @@ def argmax(x, axis=0):
def
argsort
(
input
,
axis
=-
1
,
name
=
None
):
"""
Performs sorting on the input Variable along the given axis, and outputs
sorted data Varibale and its corresponding index Variable with the same
Performs sorting on the input Variable along the given axis, and outputs
sorted data Varibale and its corresponding index Variable with the same
shape as :attr:`input`.
.. code-block:: text
For example, the given axis is -1 and the input Variable
input = [[0.15849551, 0.45865775, 0.8563702 ],
...
...
@@ -511,15 +524,15 @@ def argsort(input, axis=-1, name=None):
and the sorted indices along the given axis turn outs to be
indices = [[0, 1, 2],
indices = [[0, 1, 2],
[0, 2, 1]]
Args:
input(Variable): The input Variable for sorting.
axis(int): The axis along which to sort the input Variable. When
:attr:`axis` < 0, the actual axis will be :attr:`axis` +
axis(int): The axis along which to sort the input Variable. When
:attr:`axis` < 0, the actual axis will be :attr:`axis` +
rank(:attr:`input`). Default -1, the last dimension.
name(str|None): (optional) A name for this layer. If set None, the
name(str|None): (optional) A name for this layer. If set None, the
layer will be named automatically.
Returns:
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
68e9b841
...
...
@@ -30,6 +30,7 @@ from .initializer import Constant
from
.layer_helper
import
LayerHelper
from
.layers
import
ops
from
.regularizer
import
append_regularization_ops
from
.imperative
import
base
as
imperative_base
__all__
=
[
'SGD'
,
'Momentum'
,
'Adagrad'
,
'Adam'
,
'Adamax'
,
'DecayedAdagrad'
,
'Ftrl'
,
...
...
@@ -108,6 +109,7 @@ class Optimizer(object):
# create learning rate variable for every parameter
param
=
param_and_grad
[
0
]
param_lr
=
param
.
optimize_attr
[
'learning_rate'
]
print
(
"param_lr: "
,
param_lr
,
self
.
_global_learning_rate
().
_numpy
())
if
type
(
param_lr
)
==
Variable
:
return
param_lr
else
:
...
...
@@ -301,19 +303,38 @@ class Optimizer(object):
This method combines interface `append_backward()` and
`create_optimization_pass()` into one.
"""
params_grads
=
append_backward
(
loss
,
parameter_list
,
no_grad_set
,
[
error_clip_callback
])
if
imperative_base
.
enabled
:
if
parameter_list
is
not
None
:
params_grads
=
parameter_list
else
:
program
=
loss
.
block
.
program
parameters
=
program
.
global_block
().
all_parameters
()
params_grads
=
[]
for
param
in
parameters
:
grad_var
=
Variable
(
block
=
loss
.
block
,
name
=
param
.
_ivar
.
_grad_name
(),
stop_gradient
=
True
)
grad_var
.
_value
=
param
.
_ivar
.
_grad_var
()
print
(
"create grad var: "
,
grad_var
.
name
)
print
(
"grad_var value: "
,
grad_var
.
_numpy
())
import
sys
sys
.
stdout
.
flush
()
params_grads
.
append
((
param
,
grad_var
))
else
:
params_grads
=
append_backward
(
loss
,
parameter_list
,
no_grad_set
,
[
error_clip_callback
])
params_grads
=
sorted
(
params_grads
,
key
=
lambda
x
:
x
[
0
].
name
)
params_grads
=
sorted
(
params_grads
,
key
=
lambda
x
:
x
[
0
].
name
)
params_grads
,
table_param_and_grad
,
table_optimize_op
=
\
self
.
_process_distribute_lookuptable
(
params_grads
,
loss
,
startup_program
)
params_grads
,
table_param_and_grad
,
table_optimize_op
=
\
self
.
_process_distribute_lookuptable
(
params_grads
,
loss
,
startup_program
)
params_grads
=
append_gradient_clip_ops
(
params_grads
)
params_grads
=
append_gradient_clip_ops
(
params_grads
)
# Add regularization if any
params_grads
=
append_regularization_ops
(
params_grads
,
self
.
regularization
)
# Add regularization if any
params_grads
=
append_regularization_ops
(
params_grads
,
self
.
regularization
)
optimize_ops
=
self
.
_create_optimization_pass
(
params_grads
,
loss
,
startup_program
)
...
...
@@ -356,6 +377,10 @@ class SGDOptimizer(Optimizer):
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
assert
isinstance
(
block
,
framework
.
Block
)
print
(
"append sgd"
)
import
sys
sys
.
stdout
.
flush
()
# create the optimize op
sgd_op
=
block
.
append_op
(
type
=
self
.
type
,
...
...
@@ -477,7 +502,7 @@ class LarsMomentumOptimizer(Optimizer):
regularization: A Regularizer, such as
fluid.regularizer.L2DecayRegularizer.
name: A optional name prefix.
Examples:
.. code-block:: python
...
...
python/paddle/fluid/tests/unittests/test_imperative_mnist.py
浏览文件 @
68e9b841
...
...
@@ -18,6 +18,7 @@ import numpy as np
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.imperative.nn
import
Conv2D
,
Pool2D
,
FC
from
paddle.fluid.imperative.base
import
to_variable
...
...
@@ -119,7 +120,11 @@ class TestImperativeMnist(unittest.TestCase):
out
.
_backward
()
filter_grad
=
mnist
.
_simple_img_conv_pool_1
.
_conv2d
.
_filter_param
.
_gradient
(
)
print
(
filter_grad
)
# print(filter_grad)
sgd
=
SGDOptimizer
(
learning_rate
=
1e-3
)
sgd
.
minimize
(
out
)
# np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
# with fluid.imperative.guard():
# mlp = MLP()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录