Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
051ba1ce
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
051ba1ce
编写于
2月 09, 2018
作者:
Q
Qiao Longfei
提交者:
GitHub
2月 09, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Use force cpu in fill constant op (#8254)
上级
222155cc
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
138 addition
and
62 deletion
+138
-62
python/paddle/v2/fluid/initializer.py
python/paddle/v2/fluid/initializer.py
+31
-6
python/paddle/v2/fluid/layers/math_op_patch.py
python/paddle/v2/fluid/layers/math_op_patch.py
+7
-3
python/paddle/v2/fluid/layers/tensor.py
python/paddle/v2/fluid/layers/tensor.py
+23
-4
python/paddle/v2/fluid/learning_rate_decay.py
python/paddle/v2/fluid/learning_rate_decay.py
+66
-48
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
...n/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
+11
-1
未找到文件。
python/paddle/v2/fluid/initializer.py
浏览文件 @
051ba1ce
...
...
@@ -14,14 +14,37 @@
import
framework
import
numpy
as
np
import
contextlib
__all__
=
[
'Constant'
,
'Uniform'
,
'Normal'
,
'Xavier'
,
'Constant'
,
'Uniform'
,
'Normal'
,
'Xavier'
,
'force_init_on_cpu'
,
'init_on_cpu'
]
_force_init_on_cpu_
=
False
def
force_init_on_cpu
():
return
_force_init_on_cpu_
@
contextlib
.
contextmanager
def
init_on_cpu
():
"""
Switch program with `with` statement
Examples:
>>> with init_on_cpu():
>>> step = layers.create_global_var()
"""
global
_force_init_on_cpu_
pre_state
=
force_init_on_cpu
()
_force_init_on_cpu_
=
True
yield
_force_init_on_cpu_
=
pre_state
class
Initializer
(
object
):
"""Base class for variable initializers
...
...
@@ -80,7 +103,7 @@ class ConstantInitializer(Initializer):
"""Implements the constant initializer
"""
def
__init__
(
self
,
value
=
0.0
):
def
__init__
(
self
,
value
=
0.0
,
force_cpu
=
False
):
"""Constructor for ConstantInitializer
Args:
...
...
@@ -89,6 +112,7 @@ class ConstantInitializer(Initializer):
assert
value
is
not
None
super
(
ConstantInitializer
,
self
).
__init__
()
self
.
_value
=
value
self
.
_force_cpu
=
force_cpu
def
__call__
(
self
,
var
,
block
):
"""Add constant initialization ops for a variable
...
...
@@ -110,7 +134,8 @@ class ConstantInitializer(Initializer):
attrs
=
{
"shape"
:
var
.
shape
,
"dtype"
:
int
(
var
.
dtype
),
"value"
:
self
.
_value
"value"
:
float
(
self
.
_value
),
'force_cpu'
:
self
.
_force_cpu
or
force_init_on_cpu
()
})
var
.
op
=
op
return
op
...
...
python/paddle/v2/fluid/layers/math_op_patch.py
浏览文件 @
051ba1ce
...
...
@@ -14,6 +14,7 @@
from
..framework
import
Variable
,
unique_name
from
layer_function_generator
import
OpProtoHolder
from
..initializer
import
force_init_on_cpu
__all__
=
[
'monkey_patch_variable'
]
...
...
@@ -36,9 +37,12 @@ def monkey_patch_variable():
block
.
append_op
(
type
=
"fill_constant"
,
outputs
=
{
'Out'
:
[
var
]},
attrs
=
{
'dtype'
:
var
.
dtype
,
'shape'
:
shape
,
'value'
:
value
})
attrs
=
{
'dtype'
:
var
.
dtype
,
'shape'
:
shape
,
'value'
:
value
,
'force_cpu'
:
force_init_on_cpu
()
})
return
var
def
create_scalar
(
block
,
value
,
dtype
):
...
...
python/paddle/v2/fluid/layers/tensor.py
浏览文件 @
051ba1ce
...
...
@@ -16,7 +16,7 @@ from ..layer_helper import LayerHelper
from
..param_attr
import
ParamAttr
from
..framework
import
convert_np_dtype_to_dtype_
from
..framework
import
Variable
from
..initializer
import
Constant
from
..initializer
import
Constant
,
force_init_on_cpu
from
..core
import
DataType
import
numpy
...
...
@@ -69,12 +69,30 @@ def create_parameter(shape,
default_initializer
)
def
create_global_var
(
shape
,
value
,
dtype
,
persistable
=
False
,
name
=
None
):
def
create_global_var
(
shape
,
value
,
dtype
,
persistable
=
False
,
force_cpu
=
False
,
name
=
None
):
"""
Create a global variable. such as global_step
Args:
shape(list[int]): shape of the variable
value(float): the value of the variable
dtype(string): element type of the parameter
persistable(bool): if this variable is persistable
force_cpu(bool): force this variable to be on CPU
Returns:
Variable: the created Variable
"""
helper
=
LayerHelper
(
"global_var"
,
**
locals
())
var
=
helper
.
create_global_variable
(
dtype
=
dtype
,
shape
=
shape
,
persistable
=
persistable
,
name
=
name
)
helper
.
set_variable_initializer
(
var
,
initializer
=
Constant
(
value
=
float
(
value
)))
var
,
initializer
=
Constant
(
value
=
float
(
value
),
force_cpu
=
force_cpu
))
return
var
...
...
@@ -221,6 +239,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
dtype(np.dtype|core.DataType|str): Data type of the output tensor.
value(float): The constant value used to initialize the output tensor.
out(Variable): The output tensor.
force_cpu(True|False): data should be on CPU if set true.
Returns:
Variable: The tensor variable storing the output.
...
...
@@ -242,7 +261,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
'shape'
:
shape
,
'dtype'
:
out
.
dtype
,
'value'
:
float
(
value
),
'force_cpu'
:
force_cpu
'force_cpu'
:
force_cpu
or
force_init_on_cpu
()
})
out
.
stop_gradient
=
True
return
out
...
...
python/paddle/v2/fluid/learning_rate_decay.py
浏览文件 @
051ba1ce
...
...
@@ -14,6 +14,7 @@
import
layers
from
framework
import
Variable
from
initializer
import
init_on_cpu
__all__
=
[
'exponential_decay'
,
'natural_exp_decay'
,
'inverse_time_decay'
,
...
...
@@ -54,11 +55,14 @@ def exponential_decay(learning_rate,
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for exponential_decay."
)
# update learning_rate
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
layers
.
floor
(
x
=
div_res
)
return
learning_rate
*
(
decay_rate
**
div_res
)
with
init_on_cpu
():
# update learning_rate
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
layers
.
floor
(
x
=
div_res
)
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
return
decayed_lr
def
natural_exp_decay
(
learning_rate
,
...
...
@@ -88,10 +92,13 @@ def natural_exp_decay(learning_rate,
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for natural_exp_decay."
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
layers
.
floor
(
x
=
div_res
)
return
learning_rate
*
layers
.
exp
(
x
=
(
-
1
*
decay_rate
*
div_res
))
with
init_on_cpu
():
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
layers
.
floor
(
x
=
div_res
)
decayed_lr
=
learning_rate
*
layers
.
exp
(
x
=
(
-
1
*
decay_rate
*
div_res
))
return
decayed_lr
def
inverse_time_decay
(
learning_rate
,
...
...
@@ -121,11 +128,14 @@ def inverse_time_decay(learning_rate,
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for inverse_time_decay."
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
layers
.
floor
(
x
=
div_res
)
with
init_on_cpu
():
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
layers
.
floor
(
x
=
div_res
)
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
return
learning_rate
/
(
1
+
decay_rate
*
div_res
)
return
decayed_lr
def
polynomial_decay
(
learning_rate
,
...
...
@@ -160,22 +170,27 @@ def polynomial_decay(learning_rate,
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for inverse_time_decay."
)
if
cycle
:
div_res
=
layers
.
ceil
(
x
=
(
global_step
/
decay_steps
))
zero_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
)
one_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
with
layers
.
Switch
()
as
switch
:
with
switch
.
case
(
layers
.
equal
(
x
=
global_step
,
y
=
zero_var
)):
layers
.
assign
(
input
=
one_var
,
output
=
div_res
)
decay_steps
=
decay_steps
*
div_res
else
:
decay_steps_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
decay_steps
))
global_step
=
layers
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
return
(
learning_rate
-
end_learning_rate
)
*
\
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
with
init_on_cpu
():
if
cycle
:
div_res
=
layers
.
ceil
(
x
=
(
global_step
/
decay_steps
))
zero_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
)
one_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
with
layers
.
Switch
()
as
switch
:
with
switch
.
case
(
layers
.
equal
(
x
=
global_step
,
y
=
zero_var
)):
layers
.
assign
(
input
=
one_var
,
output
=
div_res
)
decay_steps
=
decay_steps
*
div_res
else
:
decay_steps_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
decay_steps
))
global_step
=
layers
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
return
decayed_lr
def
piecewise_decay
(
global_step
,
boundaries
,
values
):
...
...
@@ -200,24 +215,27 @@ def piecewise_decay(global_step, boundaries, values):
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for piecewise_decay."
)
lr
=
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
True
,
name
=
"learning_rate"
)
with
layers
.
Switch
()
as
switch
:
for
i
in
range
(
len
(
boundaries
)):
boundary_val
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
boundaries
[
i
]))
value_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
with
switch
.
case
(
layers
.
less_than
(
global_step
,
boundary_val
)):
layers
.
assign
(
value_var
,
lr
)
last_value_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
with
switch
.
default
():
layers
.
assign
(
last_value_var
,
lr
)
with
init_on_cpu
():
lr
=
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
True
,
name
=
"learning_rate"
)
with
layers
.
Switch
()
as
switch
:
for
i
in
range
(
len
(
boundaries
)):
boundary_val
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
boundaries
[
i
]))
value_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
with
switch
.
case
(
layers
.
less_than
(
global_step
,
boundary_val
)):
layers
.
assign
(
value_var
,
lr
)
last_value_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
with
switch
.
default
():
layers
.
assign
(
last_value_var
,
lr
)
return
lr
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
浏览文件 @
051ba1ce
...
...
@@ -18,6 +18,7 @@ import numpy as np
import
paddle.v2
as
paddle
import
paddle.v2.dataset.conll05
as
conll05
import
paddle.v2.fluid
as
fluid
from
paddle.v2.fluid.initializer
import
init_on_cpu
import
contextlib
import
time
import
unittest
...
...
@@ -167,7 +168,16 @@ def train(use_cuda, save_dirname=None):
# TODO(qiao)
# check other optimizers and check why out will be NAN
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.0001
)
global_step
=
fluid
.
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0
,
dtype
=
'float32'
,
force_cpu
=
True
,
persistable
=
True
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
fluid
.
learning_rate_decay
.
exponential_decay
(
learning_rate
=
0.0001
,
global_step
=
global_step
,
decay_steps
=
100000
,
decay_rate
=
0.5
,
staircase
=
True
),
global_step
=
global_step
)
sgd_optimizer
.
minimize
(
avg_cost
)
# TODO(qiao)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录