Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
051ba1ce
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
051ba1ce
编写于
2月 09, 2018
作者:
Q
Qiao Longfei
提交者:
GitHub
2月 09, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Use force cpu in fill constant op (#8254)
上级
222155cc
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
138 addition
and
62 deletion
+138
-62
python/paddle/v2/fluid/initializer.py
python/paddle/v2/fluid/initializer.py
+31
-6
python/paddle/v2/fluid/layers/math_op_patch.py
python/paddle/v2/fluid/layers/math_op_patch.py
+7
-3
python/paddle/v2/fluid/layers/tensor.py
python/paddle/v2/fluid/layers/tensor.py
+23
-4
python/paddle/v2/fluid/learning_rate_decay.py
python/paddle/v2/fluid/learning_rate_decay.py
+66
-48
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
...n/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
+11
-1
未找到文件。
python/paddle/v2/fluid/initializer.py
浏览文件 @
051ba1ce
...
...
@@ -14,14 +14,37 @@
import
framework
import
numpy
as
np
import
contextlib
__all__
=
[
'Constant'
,
'Uniform'
,
'Normal'
,
'Xavier'
,
'Constant'
,
'Uniform'
,
'Normal'
,
'Xavier'
,
'force_init_on_cpu'
,
'init_on_cpu'
]
_force_init_on_cpu_
=
False
def
force_init_on_cpu
():
return
_force_init_on_cpu_
@
contextlib
.
contextmanager
def
init_on_cpu
():
"""
Switch program with `with` statement
Examples:
>>> with init_on_cpu():
>>> step = layers.create_global_var()
"""
global
_force_init_on_cpu_
pre_state
=
force_init_on_cpu
()
_force_init_on_cpu_
=
True
yield
_force_init_on_cpu_
=
pre_state
class
Initializer
(
object
):
"""Base class for variable initializers
...
...
@@ -80,7 +103,7 @@ class ConstantInitializer(Initializer):
"""Implements the constant initializer
"""
def
__init__
(
self
,
value
=
0.0
):
def
__init__
(
self
,
value
=
0.0
,
force_cpu
=
False
):
"""Constructor for ConstantInitializer
Args:
...
...
@@ -89,6 +112,7 @@ class ConstantInitializer(Initializer):
assert
value
is
not
None
super
(
ConstantInitializer
,
self
).
__init__
()
self
.
_value
=
value
self
.
_force_cpu
=
force_cpu
def
__call__
(
self
,
var
,
block
):
"""Add constant initialization ops for a variable
...
...
@@ -110,7 +134,8 @@ class ConstantInitializer(Initializer):
attrs
=
{
"shape"
:
var
.
shape
,
"dtype"
:
int
(
var
.
dtype
),
"value"
:
self
.
_value
"value"
:
float
(
self
.
_value
),
'force_cpu'
:
self
.
_force_cpu
or
force_init_on_cpu
()
})
var
.
op
=
op
return
op
...
...
python/paddle/v2/fluid/layers/math_op_patch.py
浏览文件 @
051ba1ce
...
...
@@ -14,6 +14,7 @@
from
..framework
import
Variable
,
unique_name
from
layer_function_generator
import
OpProtoHolder
from
..initializer
import
force_init_on_cpu
__all__
=
[
'monkey_patch_variable'
]
...
...
@@ -36,9 +37,12 @@ def monkey_patch_variable():
block
.
append_op
(
type
=
"fill_constant"
,
outputs
=
{
'Out'
:
[
var
]},
attrs
=
{
'dtype'
:
var
.
dtype
,
attrs
=
{
'dtype'
:
var
.
dtype
,
'shape'
:
shape
,
'value'
:
value
})
'value'
:
value
,
'force_cpu'
:
force_init_on_cpu
()
})
return
var
def
create_scalar
(
block
,
value
,
dtype
):
...
...
python/paddle/v2/fluid/layers/tensor.py
浏览文件 @
051ba1ce
...
...
@@ -16,7 +16,7 @@ from ..layer_helper import LayerHelper
from
..param_attr
import
ParamAttr
from
..framework
import
convert_np_dtype_to_dtype_
from
..framework
import
Variable
from
..initializer
import
Constant
from
..initializer
import
Constant
,
force_init_on_cpu
from
..core
import
DataType
import
numpy
...
...
@@ -69,12 +69,30 @@ def create_parameter(shape,
default_initializer
)
def
create_global_var
(
shape
,
value
,
dtype
,
persistable
=
False
,
name
=
None
):
def
create_global_var
(
shape
,
value
,
dtype
,
persistable
=
False
,
force_cpu
=
False
,
name
=
None
):
"""
Create a global variable. such as global_step
Args:
shape(list[int]): shape of the variable
value(float): the value of the variable
dtype(string): element type of the parameter
persistable(bool): if this variable is persistable
force_cpu(bool): force this variable to be on CPU
Returns:
Variable: the created Variable
"""
helper
=
LayerHelper
(
"global_var"
,
**
locals
())
var
=
helper
.
create_global_variable
(
dtype
=
dtype
,
shape
=
shape
,
persistable
=
persistable
,
name
=
name
)
helper
.
set_variable_initializer
(
var
,
initializer
=
Constant
(
value
=
float
(
value
)))
var
,
initializer
=
Constant
(
value
=
float
(
value
),
force_cpu
=
force_cpu
))
return
var
...
...
@@ -221,6 +239,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
dtype(np.dtype|core.DataType|str): Data type of the output tensor.
value(float): The constant value used to initialize the output tensor.
out(Variable): The output tensor.
force_cpu(True|False): data should be on CPU if set true.
Returns:
Variable: The tensor variable storing the output.
...
...
@@ -242,7 +261,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
'shape'
:
shape
,
'dtype'
:
out
.
dtype
,
'value'
:
float
(
value
),
'force_cpu'
:
force_cpu
'force_cpu'
:
force_cpu
or
force_init_on_cpu
()
})
out
.
stop_gradient
=
True
return
out
...
...
python/paddle/v2/fluid/learning_rate_decay.py
浏览文件 @
051ba1ce
...
...
@@ -14,6 +14,7 @@
import
layers
from
framework
import
Variable
from
initializer
import
init_on_cpu
__all__
=
[
'exponential_decay'
,
'natural_exp_decay'
,
'inverse_time_decay'
,
...
...
@@ -54,11 +55,14 @@ def exponential_decay(learning_rate,
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for exponential_decay."
)
with
init_on_cpu
():
# update learning_rate
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
layers
.
floor
(
x
=
div_res
)
return
learning_rate
*
(
decay_rate
**
div_res
)
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
return
decayed_lr
def
natural_exp_decay
(
learning_rate
,
...
...
@@ -88,10 +92,13 @@ def natural_exp_decay(learning_rate,
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for natural_exp_decay."
)
with
init_on_cpu
():
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
layers
.
floor
(
x
=
div_res
)
return
learning_rate
*
layers
.
exp
(
x
=
(
-
1
*
decay_rate
*
div_res
))
decayed_lr
=
learning_rate
*
layers
.
exp
(
x
=
(
-
1
*
decay_rate
*
div_res
))
return
decayed_lr
def
inverse_time_decay
(
learning_rate
,
...
...
@@ -121,11 +128,14 @@ def inverse_time_decay(learning_rate,
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for inverse_time_decay."
)
with
init_on_cpu
():
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
layers
.
floor
(
x
=
div_res
)
return
learning_rate
/
(
1
+
decay_rate
*
div_res
)
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
return
decayed_lr
def
polynomial_decay
(
learning_rate
,
...
...
@@ -160,10 +170,13 @@ def polynomial_decay(learning_rate,
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for inverse_time_decay."
)
with
init_on_cpu
():
if
cycle
:
div_res
=
layers
.
ceil
(
x
=
(
global_step
/
decay_steps
))
zero_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
)
one_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
zero_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
)
one_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
with
layers
.
Switch
()
as
switch
:
with
switch
.
case
(
layers
.
equal
(
x
=
global_step
,
y
=
zero_var
)):
...
...
@@ -172,10 +185,12 @@ def polynomial_decay(learning_rate,
else
:
decay_steps_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
decay_steps
))
global_step
=
layers
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
global_step
=
layers
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
return
(
learning_rate
-
end_learning_rate
)
*
\
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
return
decayed_lr
def
piecewise_decay
(
global_step
,
boundaries
,
values
):
...
...
@@ -200,6 +215,7 @@ def piecewise_decay(global_step, boundaries, values):
if
not
isinstance
(
global_step
,
Variable
):
raise
ValueError
(
"global_step is required for piecewise_decay."
)
with
init_on_cpu
():
lr
=
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
...
...
@@ -216,7 +232,9 @@ def piecewise_decay(global_step, boundaries, values):
with
switch
.
case
(
layers
.
less_than
(
global_step
,
boundary_val
)):
layers
.
assign
(
value_var
,
lr
)
last_value_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
with
switch
.
default
():
layers
.
assign
(
last_value_var
,
lr
)
...
...
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
浏览文件 @
051ba1ce
...
...
@@ -18,6 +18,7 @@ import numpy as np
import
paddle.v2
as
paddle
import
paddle.v2.dataset.conll05
as
conll05
import
paddle.v2.fluid
as
fluid
from
paddle.v2.fluid.initializer
import
init_on_cpu
import
contextlib
import
time
import
unittest
...
...
@@ -167,7 +168,16 @@ def train(use_cuda, save_dirname=None):
# TODO(qiao)
# check other optimizers and check why out will be NAN
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.0001
)
global_step
=
fluid
.
layers
.
create_global_var
(
shape
=
[
1
],
value
=
0
,
dtype
=
'float32'
,
force_cpu
=
True
,
persistable
=
True
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
fluid
.
learning_rate_decay
.
exponential_decay
(
learning_rate
=
0.0001
,
global_step
=
global_step
,
decay_steps
=
100000
,
decay_rate
=
0.5
,
staircase
=
True
),
global_step
=
global_step
)
sgd_optimizer
.
minimize
(
avg_cost
)
# TODO(qiao)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录