Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
e42f9b7a
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e42f9b7a
编写于
7月 12, 2018
作者:
Q
Qiao Longfei
提交者:
GitHub
7月 12, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #12103 from jacquesqiao/fix-optimizer-accumulator
Fix optimizer accumulator
上级
72ce4d56
2d2e813d
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
67 addition
and
64 deletion
+67
-64
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+65
-62
python/paddle/fluid/tests/unittests/test_optimizer.py
python/paddle/fluid/tests/unittests/test_optimizer.py
+2
-2
未找到文件。
python/paddle/fluid/optimizer.py
浏览文件 @
e42f9b7a
...
@@ -123,7 +123,7 @@ class Optimizer(object):
...
@@ -123,7 +123,7 @@ class Optimizer(object):
"""
"""
pass
pass
def
_finish_update
(
self
,
block
):
def
_finish_update
(
self
,
block
,
parameters
):
"""Finish any custom updates needed
"""Finish any custom updates needed
before completing an optimization step
before completing an optimization step
...
@@ -132,7 +132,7 @@ class Optimizer(object):
...
@@ -132,7 +132,7 @@ class Optimizer(object):
parameters: list of parameter variables for the optimizer
parameters: list of parameter variables for the optimizer
Returns:
Returns:
list of finish ops or
None
None
"""
"""
pass
pass
...
@@ -236,7 +236,8 @@ class Optimizer(object):
...
@@ -236,7 +236,8 @@ class Optimizer(object):
# Get custom finish ops for subclasses
# Get custom finish ops for subclasses
# FIXME: Need to fix this once we figure out how to handle dependencies
# FIXME: Need to fix this once we figure out how to handle dependencies
self
.
_finish_update
(
loss
.
block
)
self
.
_finish_update
(
loss
.
block
,
[
p
[
0
]
for
p
in
parameters_and_grads
])
end
=
len
(
global_block
.
ops
)
end
=
len
(
global_block
.
ops
)
return
global_block
.
slice_ops
(
start
,
end
)
return
global_block
.
slice_ops
(
start
,
end
)
...
@@ -486,6 +487,8 @@ class AdamOptimizer(Optimizer):
...
@@ -486,6 +487,8 @@ class AdamOptimizer(Optimizer):
"""
"""
_moment1_acc_str
=
"moment1"
_moment1_acc_str
=
"moment1"
_moment2_acc_str
=
"moment2"
_moment2_acc_str
=
"moment2"
_beta1_pow_acc_str
=
"beta1_pow_acc"
_beta2_pow_acc_str
=
"beta2_pow_acc"
def
__init__
(
self
,
def
__init__
(
self
,
learning_rate
=
0.001
,
learning_rate
=
0.001
,
...
@@ -507,32 +510,22 @@ class AdamOptimizer(Optimizer):
...
@@ -507,32 +510,22 @@ class AdamOptimizer(Optimizer):
def
_create_accumulators
(
self
,
block
,
parameters
):
def
_create_accumulators
(
self
,
block
,
parameters
):
assert
isinstance
(
block
,
framework
.
Block
)
assert
isinstance
(
block
,
framework
.
Block
)
main_block
=
block
.
program
.
global_block
()
# Create beta1 and beta2 power tensors
beta_shape
=
[
1
]
self
.
_beta1_pow_acc
=
self
.
helper
.
create_global_variable
(
name
=
unique_name
.
generate
(
'beta1_pow_acc'
),
dtype
=
'float32'
if
self
.
_dtype
==
None
else
self
.
_dtype
,
shape
=
beta_shape
,
lod_level
=
0
,
persistable
=
True
)
self
.
helper
.
set_variable_initializer
(
self
.
_beta1_pow_acc
,
initializer
=
Constant
(
self
.
_beta1
))
self
.
_beta2_pow_acc
=
self
.
helper
.
create_global_variable
(
name
=
unique_name
.
generate
(
'beta2_pow_acc'
),
dtype
=
'float32'
if
self
.
_dtype
==
None
else
self
.
_dtype
,
shape
=
beta_shape
,
lod_level
=
0
,
persistable
=
True
)
self
.
helper
.
set_variable_initializer
(
self
.
_beta2_pow_acc
,
initializer
=
Constant
(
self
.
_beta2
))
# Create accumulator tensors for first and second moments
# Create accumulator tensors for first and second moments
for
p
in
parameters
:
for
p
in
parameters
:
self
.
_add_accumulator
(
self
.
_moment1_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_moment1_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_moment2_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_moment2_acc_str
,
p
)
self
.
_add_accumulator
(
name
=
self
.
_beta1_pow_acc_str
,
param
=
p
,
dtype
=
'float32'
,
fill_value
=
self
.
_beta1
,
shape
=
[
1
])
self
.
_add_accumulator
(
name
=
self
.
_beta2_pow_acc_str
,
param
=
p
,
dtype
=
'float32'
,
fill_value
=
self
.
_beta2
,
shape
=
[
1
])
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
assert
isinstance
(
block
,
framework
.
Block
)
assert
isinstance
(
block
,
framework
.
Block
)
...
@@ -541,6 +534,11 @@ class AdamOptimizer(Optimizer):
...
@@ -541,6 +534,11 @@ class AdamOptimizer(Optimizer):
param_and_grad
[
0
])
param_and_grad
[
0
])
moment2
=
self
.
_get_accumulator
(
self
.
_moment2_acc_str
,
moment2
=
self
.
_get_accumulator
(
self
.
_moment2_acc_str
,
param_and_grad
[
0
])
param_and_grad
[
0
])
beta1_pow_acc
=
self
.
_get_accumulator
(
self
.
_beta1_pow_acc_str
,
param_and_grad
[
0
])
beta2_pow_acc
=
self
.
_get_accumulator
(
self
.
_beta2_pow_acc_str
,
param_and_grad
[
0
])
# create the adam optimize op
# create the adam optimize op
adam_op
=
block
.
append_op
(
adam_op
=
block
.
append_op
(
type
=
self
.
type
,
type
=
self
.
type
,
...
@@ -550,8 +548,8 @@ class AdamOptimizer(Optimizer):
...
@@ -550,8 +548,8 @@ class AdamOptimizer(Optimizer):
"LearningRate"
:
self
.
_create_param_lr
(
param_and_grad
),
"LearningRate"
:
self
.
_create_param_lr
(
param_and_grad
),
"Moment1"
:
moment1
,
"Moment1"
:
moment1
,
"Moment2"
:
moment2
,
"Moment2"
:
moment2
,
"Beta1Pow"
:
self
.
_
beta1_pow_acc
,
"Beta1Pow"
:
beta1_pow_acc
,
"Beta2Pow"
:
self
.
_
beta2_pow_acc
"Beta2Pow"
:
beta2_pow_acc
},
},
outputs
=
{
outputs
=
{
"ParamOut"
:
param_and_grad
[
0
],
"ParamOut"
:
param_and_grad
[
0
],
...
@@ -566,24 +564,28 @@ class AdamOptimizer(Optimizer):
...
@@ -566,24 +564,28 @@ class AdamOptimizer(Optimizer):
return
adam_op
return
adam_op
def
_finish_update
(
self
,
block
):
def
_finish_update
(
self
,
block
,
parameters
):
"""Update Beta1 and Beta2 Power accumulators
"""Update Beta1 and Beta2 Power accumulators
"""
"""
assert
isinstance
(
block
,
framework
.
Block
)
assert
isinstance
(
block
,
framework
.
Block
)
main_block
=
block
.
program
.
global_block
()
main_block
=
block
.
program
.
global_block
()
scale_beta1
=
main_block
.
append_op
(
for
param
in
parameters
:
type
=
"scale"
,
with
param
.
block
.
program
.
optimized_guard
(
param
):
inputs
=
{
"X"
:
self
.
_beta1_pow_acc
},
beta1_pow_acc
=
self
.
_get_accumulator
(
self
.
_beta1_pow_acc_str
,
outputs
=
{
"Out"
:
self
.
_beta1_pow_acc
},
param
)
attrs
=
{
"scale"
:
self
.
_beta1
})
beta2_pow_acc
=
self
.
_get_accumulator
(
self
.
_beta2_pow_acc_str
,
param
)
scale_beta2
=
main_block
.
append_op
(
main_block
.
append_op
(
type
=
"scale"
,
type
=
"scale"
,
inputs
=
{
"X"
:
self
.
_beta2_pow_acc
},
inputs
=
{
"X"
:
beta1_pow_acc
},
outputs
=
{
"Out"
:
self
.
_beta2_pow_acc
},
outputs
=
{
"Out"
:
beta1_pow_acc
},
attrs
=
{
"scale"
:
self
.
_beta2
})
attrs
=
{
"scale"
:
self
.
_beta1
})
return
[
scale_beta1
,
scale_beta2
]
main_block
.
append_op
(
type
=
"scale"
,
inputs
=
{
"X"
:
beta2_pow_acc
},
outputs
=
{
"Out"
:
beta2_pow_acc
},
attrs
=
{
"scale"
:
self
.
_beta2
})
class
AdamaxOptimizer
(
Optimizer
):
class
AdamaxOptimizer
(
Optimizer
):
...
@@ -626,6 +628,7 @@ class AdamaxOptimizer(Optimizer):
...
@@ -626,6 +628,7 @@ class AdamaxOptimizer(Optimizer):
"""
"""
_moment_acc_str
=
"moment"
_moment_acc_str
=
"moment"
_inf_norm_acc_str
=
"inf_norm"
_inf_norm_acc_str
=
"inf_norm"
_beta1_pow_acc_str
=
"beta1_pow_acc"
def
__init__
(
self
,
def
__init__
(
self
,
learning_rate
=
0.001
,
learning_rate
=
0.001
,
...
@@ -645,21 +648,16 @@ class AdamaxOptimizer(Optimizer):
...
@@ -645,21 +648,16 @@ class AdamaxOptimizer(Optimizer):
self
.
_epsilon
=
epsilon
self
.
_epsilon
=
epsilon
def
_create_accumulators
(
self
,
block
,
parameters
):
def
_create_accumulators
(
self
,
block
,
parameters
):
# Create beta1 power accumulator tensor
beta_shape
=
[
1
]
self
.
_beta1_pow_acc
=
self
.
helper
.
create_global_variable
(
name
=
unique_name
.
generate
(
'beta1_pow_acc'
),
dtype
=
'float32'
if
self
.
_dtype
==
None
else
self
.
_dtype
,
shape
=
beta_shape
,
lod_level
=
0
,
persistable
=
True
)
self
.
helper
.
set_variable_initializer
(
self
.
_beta1_pow_acc
,
initializer
=
Constant
(
self
.
_beta1
))
# Create accumulator tensors for first moment and infinity norm
# Create accumulator tensors for first moment and infinity norm
for
p
in
parameters
:
for
p
in
parameters
:
self
.
_add_accumulator
(
self
.
_moment_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_moment_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_inf_norm_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_inf_norm_acc_str
,
p
)
self
.
_add_accumulator
(
name
=
self
.
_beta1_pow_acc_str
,
param
=
p
,
dtype
=
'float32'
,
fill_value
=
self
.
_beta1
,
shape
=
[
1
])
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
assert
isinstance
(
block
,
framework
.
Block
)
assert
isinstance
(
block
,
framework
.
Block
)
...
@@ -667,6 +665,8 @@ class AdamaxOptimizer(Optimizer):
...
@@ -667,6 +665,8 @@ class AdamaxOptimizer(Optimizer):
moment
=
self
.
_get_accumulator
(
self
.
_moment_acc_str
,
param_and_grad
[
0
])
moment
=
self
.
_get_accumulator
(
self
.
_moment_acc_str
,
param_and_grad
[
0
])
inf_norm
=
self
.
_get_accumulator
(
self
.
_inf_norm_acc_str
,
inf_norm
=
self
.
_get_accumulator
(
self
.
_inf_norm_acc_str
,
param_and_grad
[
0
])
param_and_grad
[
0
])
beta1_pow_acc
=
self
.
_get_accumulator
(
self
.
_beta1_pow_acc_str
,
param_and_grad
[
0
])
# create the adamax optimize op
# create the adamax optimize op
adamax_op
=
block
.
append_op
(
adamax_op
=
block
.
append_op
(
type
=
self
.
type
,
type
=
self
.
type
,
...
@@ -676,7 +676,7 @@ class AdamaxOptimizer(Optimizer):
...
@@ -676,7 +676,7 @@ class AdamaxOptimizer(Optimizer):
"LearningRate"
:
self
.
_create_param_lr
(
param_and_grad
),
"LearningRate"
:
self
.
_create_param_lr
(
param_and_grad
),
"Moment"
:
moment
,
"Moment"
:
moment
,
"InfNorm"
:
inf_norm
,
"InfNorm"
:
inf_norm
,
"Beta1Pow"
:
self
.
_
beta1_pow_acc
"Beta1Pow"
:
beta1_pow_acc
},
},
outputs
=
{
outputs
=
{
"ParamOut"
:
param_and_grad
[
0
],
"ParamOut"
:
param_and_grad
[
0
],
...
@@ -691,18 +691,20 @@ class AdamaxOptimizer(Optimizer):
...
@@ -691,18 +691,20 @@ class AdamaxOptimizer(Optimizer):
return
adamax_op
return
adamax_op
def
_finish_update
(
self
,
block
):
def
_finish_update
(
self
,
block
,
parameters
):
"""Update Beta1 Power accumulator
"""Update Beta1 Power accumulator
"""
"""
assert
isinstance
(
block
,
framework
.
Block
)
assert
isinstance
(
block
,
framework
.
Block
)
main_block
=
block
.
program
.
global_block
()
main_block
=
block
.
program
.
global_block
()
scale_beta1
=
main_block
.
append_op
(
for
param
in
parameters
:
type
=
"scale"
,
with
param
.
block
.
program
.
optimized_guard
(
param
):
inputs
=
{
"X"
:
self
.
_beta1_pow_acc
},
beta1_pow_acc
=
self
.
_get_accumulator
(
self
.
_beta1_pow_acc_str
,
outputs
=
{
"Out"
:
self
.
_beta1_pow_acc
},
param
)
attrs
=
{
"scale"
:
self
.
_beta1
})
main_block
.
append_op
(
type
=
"scale"
,
return
[
scale_beta1
]
inputs
=
{
"X"
:
beta1_pow_acc
},
outputs
=
{
"Out"
:
beta1_pow_acc
},
attrs
=
{
"scale"
:
self
.
_beta1
})
class
DecayedAdagradOptimizer
(
Optimizer
):
class
DecayedAdagradOptimizer
(
Optimizer
):
...
@@ -1156,7 +1158,8 @@ class ModelAverage(Optimizer):
...
@@ -1156,7 +1158,8 @@ class ModelAverage(Optimizer):
self
.
params_grads
.
append
((
param
,
grad
))
self
.
params_grads
.
append
((
param
,
grad
))
for
param
,
grad
in
self
.
params_grads
:
for
param
,
grad
in
self
.
params_grads
:
self
.
_append_average_accumulate_op
(
param
)
with
param
.
block
.
program
.
optimized_guard
(
param
):
self
.
_append_average_accumulate_op
(
param
)
self
.
apply_program
=
Program
()
self
.
apply_program
=
Program
()
block
=
self
.
apply_program
.
global_block
()
block
=
self
.
apply_program
.
global_block
()
...
...
python/paddle/fluid/tests/unittests/test_optimizer.py
浏览文件 @
e42f9b7a
...
@@ -287,7 +287,7 @@ class TestAdamOptimizer(unittest.TestCase):
...
@@ -287,7 +287,7 @@ class TestAdamOptimizer(unittest.TestCase):
# Check accumulators
# Check accumulators
accumulators
=
adam_optimizer
.
get_accumulators
()
accumulators
=
adam_optimizer
.
get_accumulators
()
self
.
assertEqual
(
len
(
accumulators
),
2
)
self
.
assertEqual
(
len
(
accumulators
),
4
)
self
.
assertTrue
(
adam_optimizer
.
get_moment1_str
()
in
accumulators
)
self
.
assertTrue
(
adam_optimizer
.
get_moment1_str
()
in
accumulators
)
self
.
assertTrue
(
adam_optimizer
.
get_moment2_str
()
in
accumulators
)
self
.
assertTrue
(
adam_optimizer
.
get_moment2_str
()
in
accumulators
)
moment1_acc
=
accumulators
[
adam_optimizer
.
get_moment1_str
()]
moment1_acc
=
accumulators
[
adam_optimizer
.
get_moment1_str
()]
...
@@ -354,7 +354,7 @@ class TestAdamaxOptimizer(unittest.TestCase):
...
@@ -354,7 +354,7 @@ class TestAdamaxOptimizer(unittest.TestCase):
# Check accumulators
# Check accumulators
accumulators
=
adamax_optimizer
.
get_accumulators
()
accumulators
=
adamax_optimizer
.
get_accumulators
()
self
.
assertEqual
(
len
(
accumulators
),
2
)
self
.
assertEqual
(
len
(
accumulators
),
3
)
self
.
assertTrue
(
adamax_optimizer
.
get_moment_str
()
in
accumulators
)
self
.
assertTrue
(
adamax_optimizer
.
get_moment_str
()
in
accumulators
)
self
.
assertTrue
(
adamax_optimizer
.
get_inf_norm_str
()
in
accumulators
)
self
.
assertTrue
(
adamax_optimizer
.
get_inf_norm_str
()
in
accumulators
)
moment_acc
=
accumulators
[
adamax_optimizer
.
get_moment_str
()]
moment_acc
=
accumulators
[
adamax_optimizer
.
get_moment_str
()]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录