Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
3d8077e9
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3d8077e9
编写于
11月 07, 2018
作者:
Q
Qiao Longfei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update optimizer
上级
fbcdb29d
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
54 addition
and
44 deletion
+54
-44
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+53
-41
python/paddle/fluid/transpiler/details/__init__.py
python/paddle/fluid/transpiler/details/__init__.py
+1
-0
python/paddle/fluid/transpiler/details/distribute_lookuptable_utils.py
.../fluid/transpiler/details/distribute_lookuptable_utils.py
+0
-3
未找到文件。
python/paddle/fluid/optimizer.py
浏览文件 @
3d8077e9
...
@@ -40,30 +40,6 @@ __all__ = [
...
@@ -40,30 +40,6 @@ __all__ = [
]
]
def
_process_distribute_lookuptable
(
program
,
param_grads
,
learning_rate
):
table_name
=
find_distributed_lookup_table
(
program
)
table_param
=
None
table_grad
=
None
new_param_grads
=
[]
for
p
,
g
in
param_grads
:
if
p
.
name
==
table_name
:
if
table_param
is
not
None
:
raise
RuntimeError
(
"multi dist table var found, only support one now!"
)
table_param
=
p
table_grad
=
g
else
:
new_param_grads
.
append
((
p
,
g
))
sgd_op
=
None
if
table_param
is
not
None
:
with
table_param
.
block
.
program
.
_optimized_guard
(
[
table_param
,
table_grad
]),
framework
.
name_scope
(
"optimizer"
):
sgd_optimizer
=
SGD
(
learning_rate
)
sgd_op
=
sgd_optimizer
.
_append_optimize_op
(
table_param
.
block
,
(
table_param
,
table_grad
))
return
new_param_grads
,
(
table_param
,
table_grad
),
sgd_op
class
Optimizer
(
object
):
class
Optimizer
(
object
):
"""Optimizer Base class.
"""Optimizer Base class.
...
@@ -111,7 +87,7 @@ class Optimizer(object):
...
@@ -111,7 +87,7 @@ class Optimizer(object):
name
=
unique_name
.
generate
(
"learning_rate"
),
name
=
unique_name
.
generate
(
"learning_rate"
),
shape
=
[
1
],
shape
=
[
1
],
value
=
float
(
self
.
_learning_rate
),
value
=
float
(
self
.
_learning_rate
),
dtype
=
'float32'
if
self
.
_dtype
==
None
else
self
.
_dtype
,
dtype
=
'float32'
if
self
.
_dtype
is
None
else
self
.
_dtype
,
persistable
=
True
)
persistable
=
True
)
def
_global_learning_rate
(
self
,
program
=
None
):
def
_global_learning_rate
(
self
,
program
=
None
):
...
@@ -251,7 +227,6 @@ class Optimizer(object):
...
@@ -251,7 +227,6 @@ class Optimizer(object):
self
.
helper
=
LayerHelper
(
self
.
__class__
.
__name__
)
self
.
helper
=
LayerHelper
(
self
.
__class__
.
__name__
)
self
.
_create_accumulators
(
loss
.
block
,
self
.
_create_accumulators
(
loss
.
block
,
[
p
[
0
]
for
p
in
parameters_and_grads
])
[
p
[
0
]
for
p
in
parameters_and_grads
])
self
.
_create_global_learning_rate
()
optimize_ops
=
[]
optimize_ops
=
[]
for
param_and_grad
in
parameters_and_grads
:
for
param_and_grad
in
parameters_and_grads
:
...
@@ -271,6 +246,40 @@ class Optimizer(object):
...
@@ -271,6 +246,40 @@ class Optimizer(object):
end
=
len
(
global_block
.
ops
)
end
=
len
(
global_block
.
ops
)
return
global_block
.
_slice_ops
(
start
,
end
)
return
global_block
.
_slice_ops
(
start
,
end
)
def
_process_distribute_lookuptable
(
self
,
param_grads
,
loss
,
startup_program
):
program
=
loss
.
block
.
program
table_name
=
find_distributed_lookup_table
(
program
)
table_param
=
None
table_grad
=
None
new_param_grads
=
[]
for
p
,
g
in
param_grads
:
if
p
.
name
==
table_name
:
if
table_param
is
not
None
:
raise
RuntimeError
(
"multi dist table var found, only support one now!"
)
table_param
=
p
table_grad
=
g
else
:
new_param_grads
.
append
((
p
,
g
))
sgd_op
=
None
if
table_param
is
not
None
:
with
program_guard
(
program
,
startup_program
):
param_and_grad
=
[
table_param
,
table_grad
]
with
table_param
.
block
.
program
.
_optimized_guard
(
param_and_grad
),
\
framework
.
name_scope
(
"optimizer"
):
# create the optimize op
sgd_op
=
loss
.
block
.
append_op
(
type
=
'sgd'
,
inputs
=
{
"Param"
:
table_param
,
"Grad"
:
table_grad
,
"LearningRate"
:
self
.
_create_param_lr
(
param_and_grad
)
},
outputs
=
{
"ParamOut"
:
param_and_grad
[
0
]})
return
new_param_grads
,
(
table_param
,
table_grad
),
sgd_op
def
minimize
(
self
,
def
minimize
(
self
,
loss
,
loss
,
startup_program
=
None
,
startup_program
=
None
,
...
@@ -281,26 +290,29 @@ class Optimizer(object):
...
@@ -281,26 +290,29 @@ class Optimizer(object):
This method combines interface `append_backward()` and
This method combines interface `append_backward()` and
`create_optimization_pass()` into one.
`create_optimization_pass()` into one.
"""
"""
params_grads
=
append_backward
(
loss
,
parameter_list
,
no_grad_set
,
with
program_guard
(
loss
.
block
.
program
,
startup_program
):
[
error_clip_callback
])
self
.
_create_global_learning_rate
()
params_grads
=
append_backward
(
loss
,
parameter_list
,
no_grad_set
,
[
error_clip_callback
])
params_grads
=
sorted
(
params_grads
,
key
=
lambda
x
:
x
[
0
].
name
)
params_grads
=
sorted
(
params_grads
,
key
=
lambda
x
:
x
[
0
].
name
)
params_grads
,
table_param_and_grad
,
table_optimize_op
=
\
params_grads
,
table_param_and_grad
,
table_optimize_op
=
\
_process_distribute_lookuptable
(
loss
.
block
.
program
,
params_grads
,
self
.
_learning_rate
)
self
.
_process_distribute_lookuptable
(
params_grads
,
loss
,
startup_program
)
params_grads
=
append_gradient_clip_ops
(
params_grads
)
params_grads
=
append_gradient_clip_ops
(
params_grads
)
# Add regularization if any
# Add regularization if any
params_grads
=
append_regularization_ops
(
params_grads
,
params_grads
=
append_regularization_ops
(
params_grads
,
self
.
regularization
)
self
.
regularization
)
optimize_ops
=
self
.
_create_optimization_pass
(
params_grads
,
loss
,
optimize_ops
=
self
.
_create_optimization_pass
(
params_grads
,
loss
,
startup_program
)
startup_program
)
if
table_optimize_op
is
not
None
:
if
table_optimize_op
is
not
None
:
optimize_ops
.
append
(
table_optimize_op
)
optimize_ops
.
append
(
table_optimize_op
)
params_grads
.
append
(
table_param_and_grad
)
params_grads
.
append
(
table_param_and_grad
)
return
optimize_ops
,
params_grads
return
optimize_ops
,
params_grads
class
SGDOptimizer
(
Optimizer
):
class
SGDOptimizer
(
Optimizer
):
...
...
python/paddle/fluid/transpiler/details/__init__.py
浏览文件 @
3d8077e9
...
@@ -17,3 +17,4 @@ from __future__ import print_function
...
@@ -17,3 +17,4 @@ from __future__ import print_function
from
.program_utils
import
*
from
.program_utils
import
*
from
.ufind
import
*
from
.ufind
import
*
from
.checkport
import
*
from
.checkport
import
*
from
.distribute_lookuptable_utils
import
*
python/paddle/fluid/transpiler/details/distribute_lookuptable_utils.py
浏览文件 @
3d8077e9
...
@@ -12,9 +12,6 @@
...
@@ -12,9 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
paddle.fluid.optimizer
as
optimizer
import
paddle.fluid.framework
as
framework
LOOKUP_TABLE_TYPE
=
"lookup_table"
LOOKUP_TABLE_TYPE
=
"lookup_table"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录