Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
358f7852
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
358f7852
编写于
3月 16, 2020
作者:
Q
qingqing01
提交者:
GitHub
3月 16, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3 from qingqing01/api_loss
Refine Loss in Model
上级
1a2d3b5f
14a57371
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
241 addition
and
180 deletion
+241
-180
.pre-commit-config.yaml
.pre-commit-config.yaml
+27
-0
mnist.py
mnist.py
+27
-20
model.py
model.py
+187
-160
未找到文件。
.pre-commit-config.yaml
0 → 100644
浏览文件 @
358f7852
-
repo
:
https://github.com/PaddlePaddle/mirrors-yapf.git
sha
:
0d79c0c469bab64f7229c9aca2b1186ef47f0e37
hooks
:
-
id
:
yapf
files
:
\.py$
-
repo
:
https://github.com/pre-commit/pre-commit-hooks
sha
:
a11d9314b22d8f8c7556443875b731ef05965464
hooks
:
-
id
:
check-merge-conflict
-
id
:
check-symlinks
-
id
:
detect-private-key
files
:
(?!.*paddle)^.*$
-
id
:
end-of-file-fixer
files
:
\.(md|yml)$
-
id
:
trailing-whitespace
files
:
\.(md|yml)$
-
repo
:
https://github.com/Lucas-C/pre-commit-hooks
sha
:
v1.0.1
hooks
:
-
id
:
forbid-crlf
files
:
\.(md|yml)$
-
id
:
remove-crlf
files
:
\.(md|yml)$
-
id
:
forbid-tabs
files
:
\.(md|yml)$
-
id
:
remove-tabs
files
:
\.(md|yml)$
mnist.py
浏览文件 @
358f7852
...
@@ -26,7 +26,7 @@ from paddle import fluid
...
@@ -26,7 +26,7 @@ from paddle import fluid
from
paddle.fluid.optimizer
import
Momentum
from
paddle.fluid.optimizer
import
Momentum
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
Linear
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
Linear
from
model
import
Model
,
CrossEntropy
from
model
import
Model
,
CrossEntropy
,
Input
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
...
@@ -78,7 +78,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
...
@@ -78,7 +78,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
class
MNIST
(
Model
):
class
MNIST
(
Model
):
def
__init__
(
self
):
def
__init__
(
self
):
super
(
MNIST
,
self
).
__init__
()
super
(
MNIST
,
self
).
__init__
()
self
.
_simple_img_conv_pool_1
=
SimpleImgConvPool
(
self
.
_simple_img_conv_pool_1
=
SimpleImgConvPool
(
1
,
20
,
5
,
2
,
2
,
act
=
"relu"
)
1
,
20
,
5
,
2
,
2
,
act
=
"relu"
)
...
@@ -88,12 +87,13 @@ class MNIST(Model):
...
@@ -88,12 +87,13 @@ class MNIST(Model):
pool_2_shape
=
50
*
4
*
4
pool_2_shape
=
50
*
4
*
4
SIZE
=
10
SIZE
=
10
scale
=
(
2.0
/
(
pool_2_shape
**
2
*
SIZE
))
**
0.5
scale
=
(
2.0
/
(
pool_2_shape
**
2
*
SIZE
))
**
0.5
self
.
_fc
=
Linear
(
800
,
self
.
_fc
=
Linear
(
10
,
800
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
10
,
initializer
=
fluid
.
initializer
.
NormalInitializer
(
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
loc
=
0.0
,
scale
=
scale
)),
initializer
=
fluid
.
initializer
.
NormalInitializer
(
act
=
"softmax"
)
loc
=
0.0
,
scale
=
scale
)),
act
=
"softmax"
)
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
x
=
self
.
_simple_img_conv_pool_1
(
inputs
)
x
=
self
.
_simple_img_conv_pool_1
(
inputs
)
...
@@ -137,13 +137,15 @@ def main():
...
@@ -137,13 +137,15 @@ def main():
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
FLAGS
.
batch_size
,
drop_last
=
True
),
1
,
1
)
batch_size
=
FLAGS
.
batch_size
,
drop_last
=
True
),
1
,
1
)
device_ids
=
list
(
range
(
FLAGS
.
num_devices
))
with
guard
:
with
guard
:
model
=
MNIST
()
model
=
MNIST
()
optim
=
Momentum
(
learning_rate
=
FLAGS
.
lr
,
momentum
=
.
9
,
optim
=
Momentum
(
parameter_list
=
model
.
parameters
())
learning_rate
=
FLAGS
.
lr
,
model
.
prepare
(
optim
,
CrossEntropy
())
momentum
=
.
9
,
parameter_list
=
model
.
parameters
())
inputs
=
[
Input
([
None
,
1
,
28
,
28
],
'float32'
,
name
=
'image'
)]
labels
=
[
Input
([
None
,
1
],
'int64'
,
name
=
'label'
)]
model
.
prepare
(
optim
,
CrossEntropy
(),
inputs
,
labels
)
if
FLAGS
.
resume
is
not
None
:
if
FLAGS
.
resume
is
not
None
:
model
.
load
(
FLAGS
.
resume
)
model
.
load
(
FLAGS
.
resume
)
...
@@ -154,8 +156,7 @@ def main():
...
@@ -154,8 +156,7 @@ def main():
val_acc
=
0.0
val_acc
=
0.0
print
(
"======== train epoch {} ========"
.
format
(
e
))
print
(
"======== train epoch {} ========"
.
format
(
e
))
for
idx
,
batch
in
enumerate
(
train_loader
()):
for
idx
,
batch
in
enumerate
(
train_loader
()):
outputs
,
losses
=
model
.
train
(
batch
[
0
],
batch
[
1
],
device
=
'gpu'
,
outputs
,
losses
=
model
.
train
(
batch
[
0
],
batch
[
1
])
device_ids
=
device_ids
)
acc
=
accuracy
(
outputs
[
0
],
batch
[
1
])[
0
]
acc
=
accuracy
(
outputs
[
0
],
batch
[
1
])[
0
]
train_loss
+=
np
.
sum
(
losses
)
train_loss
+=
np
.
sum
(
losses
)
...
@@ -166,8 +167,7 @@ def main():
...
@@ -166,8 +167,7 @@ def main():
print
(
"======== eval epoch {} ========"
.
format
(
e
))
print
(
"======== eval epoch {} ========"
.
format
(
e
))
for
idx
,
batch
in
enumerate
(
val_loader
()):
for
idx
,
batch
in
enumerate
(
val_loader
()):
outputs
,
losses
=
model
.
eval
(
batch
[
0
],
batch
[
1
],
device
=
'gpu'
,
outputs
,
losses
=
model
.
eval
(
batch
[
0
],
batch
[
1
])
device_ids
=
device_ids
)
acc
=
accuracy
(
outputs
[
0
],
batch
[
1
])[
0
]
acc
=
accuracy
(
outputs
[
0
],
batch
[
1
])[
0
]
val_loss
+=
np
.
sum
(
losses
)
val_loss
+=
np
.
sum
(
losses
)
...
@@ -185,14 +185,21 @@ if __name__ == '__main__':
...
@@ -185,14 +185,21 @@ if __name__ == '__main__':
parser
.
add_argument
(
parser
.
add_argument
(
"-e"
,
"--epoch"
,
default
=
100
,
type
=
int
,
help
=
"number of epoch"
)
"-e"
,
"--epoch"
,
default
=
100
,
type
=
int
,
help
=
"number of epoch"
)
parser
.
add_argument
(
parser
.
add_argument
(
'--lr'
,
'--learning-rate'
,
default
=
1e-3
,
type
=
float
,
metavar
=
'LR'
,
'--lr'
,
'--learning-rate'
,
default
=
1e-3
,
type
=
float
,
metavar
=
'LR'
,
help
=
'initial learning rate'
)
help
=
'initial learning rate'
)
parser
.
add_argument
(
parser
.
add_argument
(
"-b"
,
"--batch_size"
,
default
=
128
,
type
=
int
,
help
=
"batch size"
)
"-b"
,
"--batch_size"
,
default
=
128
,
type
=
int
,
help
=
"batch size"
)
parser
.
add_argument
(
parser
.
add_argument
(
"-n"
,
"--num_devices"
,
default
=
4
,
type
=
int
,
help
=
"number of devices"
)
"-n"
,
"--num_devices"
,
default
=
1
,
type
=
int
,
help
=
"number of devices"
)
parser
.
add_argument
(
parser
.
add_argument
(
"-r"
,
"--resume"
,
default
=
None
,
type
=
str
,
"-r"
,
"--resume"
,
default
=
None
,
type
=
str
,
help
=
"checkpoint path to resume"
)
help
=
"checkpoint path to resume"
)
FLAGS
=
parser
.
parse_args
()
FLAGS
=
parser
.
parse_args
()
main
()
main
()
model.py
浏览文件 @
358f7852
...
@@ -27,10 +27,12 @@ from paddle.fluid.executor import global_scope
...
@@ -27,10 +27,12 @@ from paddle.fluid.executor import global_scope
from
paddle.fluid.io
import
is_belong_to_optimizer
from
paddle.fluid.io
import
is_belong_to_optimizer
from
paddle.fluid.dygraph.base
import
to_variable
from
paddle.fluid.dygraph.base
import
to_variable
__all__
=
[
'
shape_hints'
,
'Model'
,
'Loss'
,
'CrossEntropy
'
]
__all__
=
[
'
Model'
,
'Loss'
,
'CrossEntropy'
,
'Input
'
]
def
to_list
(
value
):
def
to_list
(
value
):
if
value
is
None
:
return
value
if
isinstance
(
value
,
(
list
,
tuple
)):
if
isinstance
(
value
,
(
list
,
tuple
)):
return
value
return
value
return
[
value
]
return
[
value
]
...
@@ -51,20 +53,14 @@ def extract_args(func):
...
@@ -51,20 +53,14 @@ def extract_args(func):
return
inspect
.
getargspec
(
func
)[
0
]
return
inspect
.
getargspec
(
func
)[
0
]
def
shape_hints
(
**
hints
):
class
Input
(
fluid
.
dygraph
.
Layer
):
assert
hints
,
"hints can not be empty"
def
__init__
(
self
,
shape
=
None
,
dtype
=
None
,
name
=
None
):
assert
all
(
isinstance
(
h
,
(
list
,
tuple
))
for
h
in
hints
.
values
()),
\
self
.
shape
=
shape
"shape hint must be a list or tuple"
self
.
dtype
=
dtype
self
.
name
=
name
def
wrapper
(
func
):
def
forward
(
self
):
args
=
extract_args
(
func
)
return
fluid
.
data
(
self
.
name
,
shape
=
self
.
shape
,
dtype
=
self
.
dtype
)
invalid
=
set
(
hints
.
keys
())
-
set
(
args
)
assert
not
invalid
,
\
"shape hint for arguments that are not present in forward method"
\
+
": ({})"
.
format
(
", "
.
join
(
invalid
))
func
.
shape_hints
=
hints
return
func
return
wrapper
class
Loss
(
object
):
class
Loss
(
object
):
...
@@ -72,12 +68,6 @@ class Loss(object):
...
@@ -72,12 +68,6 @@ class Loss(object):
super
(
Loss
,
self
).
__init__
()
super
(
Loss
,
self
).
__init__
()
self
.
average
=
average
self
.
average
=
average
def
infer_shape
(
self
,
outputs
):
return
[
o
.
shape
for
o
in
outputs
]
def
infer_dtype
(
self
,
outputs
):
return
[
o
.
dtype
for
o
in
outputs
]
def
forward
(
self
,
outputs
,
labels
):
def
forward
(
self
,
outputs
,
labels
):
raise
NotImplementedError
()
raise
NotImplementedError
()
...
@@ -86,24 +76,21 @@ class Loss(object):
...
@@ -86,24 +76,21 @@ class Loss(object):
if
in_dygraph_mode
():
if
in_dygraph_mode
():
labels
=
[
to_variable
(
l
)
for
l
in
labels
]
labels
=
[
to_variable
(
l
)
for
l
in
labels
]
losses
=
to_list
(
self
.
forward
(
to_list
(
outputs
),
labels
))
losses
=
to_list
(
self
.
forward
(
to_list
(
outputs
),
labels
))
if
not
self
.
average
:
if
self
.
average
:
return
losses
losses
=
[
fluid
.
layers
.
reduce_mean
(
l
)
for
l
in
losses
]
return
[
fluid
.
layers
.
reduce_mean
(
l
)
for
l
in
losses
]
else
:
losses
=
[
fluid
.
layers
.
reduce_sum
(
l
)
for
l
in
losses
]
return
losses
class
CrossEntropy
(
Loss
):
class
CrossEntropy
(
Loss
):
def
__init__
(
self
):
def
__init__
(
self
,
average
=
True
):
super
(
CrossEntropy
,
self
).
__init__
()
super
(
CrossEntropy
,
self
).
__init__
()
def
infer_shape
(
self
,
outputs
):
return
[
o
.
shape
[:
-
1
]
+
(
1
,
)
for
o
in
outputs
]
def
infer_dtype
(
self
,
outputs
):
return
[
'int64'
for
_
in
outputs
]
def
forward
(
self
,
outputs
,
labels
):
def
forward
(
self
,
outputs
,
labels
):
return
[
fluid
.
layers
.
cross_entropy
(
o
,
l
)
for
o
,
l
in
zip
(
return
[
outputs
,
labels
)]
fluid
.
layers
.
cross_entropy
(
o
,
l
)
for
o
,
l
in
zip
(
outputs
,
labels
)
]
class
StaticGraphAdapter
(
object
):
class
StaticGraphAdapter
(
object
):
...
@@ -116,21 +103,13 @@ class StaticGraphAdapter(object):
...
@@ -116,21 +103,13 @@ class StaticGraphAdapter(object):
self
.
_orig_prog
=
fluid
.
default_main_program
()
self
.
_orig_prog
=
fluid
.
default_main_program
()
self
.
_label_vars
=
{}
# label variables
self
.
_label_vars
=
{}
# label variables
self
.
_input_vars
=
{}
# label variables
self
.
_endpoints
=
{}
self
.
_endpoints
=
{}
self
.
_loss_endpoint
=
None
self
.
_loss_endpoint
=
None
self
.
_executor
=
None
self
.
_executor
=
None
self
.
_progs
=
{}
self
.
_progs
=
{}
self
.
_compiled_progs
=
{}
self
.
_compiled_progs
=
{}
self
.
_lazy_load_optimizer
=
None
# parse shape hints
self
.
_input_desc
=
OrderedDict
([
(
n
,
None
)
for
n
in
extract_args
(
self
.
model
.
forward
)
if
n
!=
'self'
])
if
hasattr
(
self
.
model
.
forward
,
'shape_hints'
):
self
.
_input_desc
.
update
(
self
.
model
.
forward
.
shape_hints
)
@
property
@
property
def
mode
(
self
):
def
mode
(
self
):
return
self
.
model
.
mode
return
self
.
model
.
mode
...
@@ -139,21 +118,19 @@ class StaticGraphAdapter(object):
...
@@ -139,21 +118,19 @@ class StaticGraphAdapter(object):
def
mode
(
self
,
value
):
def
mode
(
self
,
value
):
self
.
model
.
mode
=
value
self
.
model
.
mode
=
value
def
train
(
self
,
inputs
,
labels
,
device
=
'CPU'
,
device_ids
=
None
):
def
train
(
self
,
inputs
,
labels
=
None
):
assert
self
.
model
.
_optimizer
and
self
.
model
.
_loss_function
,
\
assert
self
.
model
.
_optimizer
,
\
"model not ready, please call `model.prepare()` first"
"model not ready, please call `model.prepare()` first"
self
.
mode
=
'train'
self
.
mode
=
'train'
return
self
.
_run
(
inputs
,
labels
,
device
,
device_ids
)
return
self
.
_run
(
inputs
,
labels
)
def
eval
(
self
,
inputs
,
labels
,
device
=
'CPU'
,
device_ids
=
None
):
def
eval
(
self
,
inputs
,
labels
=
None
):
assert
self
.
model
.
_loss_function
,
\
"model not ready, please call `model.prepare()` first"
self
.
mode
=
'eval'
self
.
mode
=
'eval'
return
self
.
_run
(
inputs
,
labels
,
device
,
device_ids
)
return
self
.
_run
(
inputs
,
labels
)
def
test
(
self
,
inputs
,
device
=
'CPU'
,
device_ids
=
None
):
def
test
(
self
,
inputs
):
self
.
mode
=
'test'
self
.
mode
=
'test'
return
self
.
_run
(
inputs
,
None
,
device
,
device_ids
)
return
self
.
_run
(
inputs
,
None
)
def
parameters
(
self
,
*
args
,
**
kwargs
):
def
parameters
(
self
,
*
args
,
**
kwargs
):
return
None
return
None
...
@@ -162,8 +139,10 @@ class StaticGraphAdapter(object):
...
@@ -162,8 +139,10 @@ class StaticGraphAdapter(object):
def
_save
(
state
,
path
):
def
_save
(
state
,
path
):
if
not
state
:
if
not
state
:
return
return
state
=
{
k
:
to_numpy
(
v
)
if
isinstance
(
v
,
Variable
)
else
v
state
=
{
for
k
,
v
in
state
.
items
()}
k
:
to_numpy
(
v
)
if
isinstance
(
v
,
Variable
)
else
v
for
k
,
v
in
state
.
items
()
}
with
open
(
path
,
'wb'
)
as
f
:
with
open
(
path
,
'wb'
)
as
f
:
pickle
.
dump
(
state
,
f
)
pickle
.
dump
(
state
,
f
)
...
@@ -179,8 +158,10 @@ class StaticGraphAdapter(object):
...
@@ -179,8 +158,10 @@ class StaticGraphAdapter(object):
return
return
# XXX `optimizer.state_dict()` only work in dygraph mode
# XXX `optimizer.state_dict()` only work in dygraph mode
optim_path
=
path
+
".pdopt"
optim_path
=
path
+
".pdopt"
optim
=
{
p
.
name
:
p
for
p
in
filter
(
optim
=
{
is_belong_to_optimizer
,
prog
.
list_vars
())}
p
.
name
:
p
for
p
in
filter
(
is_belong_to_optimizer
,
prog
.
list_vars
())
}
if
not
optim
:
if
not
optim
:
return
return
...
@@ -219,19 +200,15 @@ class StaticGraphAdapter(object):
...
@@ -219,19 +200,15 @@ class StaticGraphAdapter(object):
if
optim_state
is
None
:
if
optim_state
is
None
:
return
return
if
self
.
_executor
is
not
None
:
self
.
_load_optimizer
(
optim_state
,
executor
)
self
.
_load_optimizer
(
optim_state
)
else
:
self
.
_lazy_load_optimizer
=
optim_state
def
_load_optimizer
(
self
,
state
):
def
_load_optimizer
(
self
,
state
,
executor
):
prog
=
self
.
_progs
.
get
(
'train'
,
None
)
prog
=
self
.
_progs
.
get
(
'train'
,
None
)
optim
=
list
(
filter
(
is_belong_to_optimizer
,
prog
.
list_vars
()))
optim
=
list
(
filter
(
is_belong_to_optimizer
,
prog
.
list_vars
()))
if
not
optim
:
if
not
optim
:
return
return
fluid
.
core
.
_create_loaded_parameter
(
fluid
.
core
.
_create_loaded_parameter
(
optim
,
global_scope
(),
executor
)
optim
,
global_scope
(),
self
.
_executor
.
_default_executor
)
converted_state
=
dict
(
state
)
converted_state
=
dict
(
state
)
for
var
in
optim
:
for
var
in
optim
:
...
@@ -261,16 +238,17 @@ class StaticGraphAdapter(object):
...
@@ -261,16 +238,17 @@ class StaticGraphAdapter(object):
opt_cls_name
=
self
.
model
.
_optimizer
.
__class__
.
__name__
opt_cls_name
=
self
.
model
.
_optimizer
.
__class__
.
__name__
opt_unq_name
=
None
opt_unq_name
=
None
for
name
in
self
.
model
.
_optimizer
.
_accumulators
.
keys
():
for
name
in
self
.
model
.
_optimizer
.
_accumulators
.
keys
():
accum_name
=
name
if
opt_name
is
None
else
name
[
accum_name
=
name
if
opt_name
is
None
else
name
[
len
(
len
(
opt_name
)
+
1
:]
opt_name
)
+
1
:]
for
param_name
,
state_var
in
self
.
model
.
_optimizer
.
_accumulators
[
for
param_name
,
state_var
in
self
.
model
.
_optimizer
.
_accumulators
[
name
].
items
():
name
].
items
():
if
opt_unq_name
is
None
:
if
opt_unq_name
is
None
:
# can not infer out the exact unique(opt_name),
# can not infer out the exact unique(opt_name),
# thus try to extract rather than generate
# thus try to extract rather than generate
for
state_key
in
sorted
(
state
.
keys
(),
for
state_key
in
sorted
(
key
=
lambda
x
:
len
(
x
),
state
.
keys
(),
reverse
=
True
):
key
=
lambda
x
:
len
(
x
),
reverse
=
True
):
prefix
=
param_name
+
"_"
+
(
prefix
=
param_name
+
"_"
+
(
opt_cls_name
if
opt_name
is
None
else
opt_cls_name
if
opt_name
is
None
else
opt_name
)
+
"_"
opt_name
)
+
"_"
...
@@ -281,8 +259,8 @@ class StaticGraphAdapter(object):
...
@@ -281,8 +259,8 @@ class StaticGraphAdapter(object):
param_name
+
"_"
):
prefix_offset
]
param_name
+
"_"
):
prefix_offset
]
# TODO: assert
# TODO: assert
# assert opt_unq_name is None
# assert opt_unq_name is None
# gen(param.name + "_" + gen(opt_name) + "_" + accum_name)
# gen(param.name + "_" + gen(opt_name) + "_" + accum_name)
# always end with "_0" since the unique optimizer._name
# always end with "_0" since the unique optimizer._name
dy_state_name
=
(
param_name
+
"_"
+
opt_unq_name
+
dy_state_name
=
(
param_name
+
"_"
+
opt_unq_name
+
"_"
+
accum_name
+
"_0"
)
"_"
+
accum_name
+
"_0"
)
converted_state
[
converted_state
[
...
@@ -307,21 +285,20 @@ class StaticGraphAdapter(object):
...
@@ -307,21 +285,20 @@ class StaticGraphAdapter(object):
t
.
set
(
ndarray
,
place
)
t
.
set
(
ndarray
,
place
)
def
_run
(
self
,
inputs
,
labels
=
None
,
device
=
'CPU'
,
device_ids
=
None
):
def
_run
(
self
,
inputs
,
labels
=
None
):
compiled_prog
=
self
.
_compiled_progs
.
get
(
self
.
mode
,
None
)
assert
compiled_prog
,
\
"Model is not ready, please call `model.prepare()` first"
inputs
=
to_list
(
inputs
)
inputs
=
to_list
(
inputs
)
if
labels
is
not
None
:
if
labels
is
not
None
:
labels
=
to_list
(
labels
)
labels
=
to_list
(
labels
)
assert
len
(
inputs
)
==
len
(
self
.
_input_desc
),
"number of inputs"
\
assert
len
(
inputs
)
==
len
(
self
.
_input_vars
[
self
.
mode
]),
\
"number of inputs"
\
+
" does not match number of arguments of `forward` method"
+
" does not match number of arguments of `forward` method"
if
self
.
_progs
.
get
(
self
.
mode
,
None
)
is
None
:
self
.
_make_program
(
self
.
_infer_input_vars
(
inputs
))
compiled_prog
=
self
.
_compile_and_initialize
(
self
.
_progs
[
self
.
mode
],
device
,
device_ids
)
feed
=
{}
feed
=
{}
input_names
=
[
name
for
name
in
self
.
_input_desc
.
keys
()
]
input_names
=
[
v
.
name
for
v
in
self
.
_input_vars
[
self
.
mode
]
]
for
idx
,
n
in
enumerate
(
input_names
):
for
idx
,
n
in
enumerate
(
input_names
):
# train and test may take different arguments
# train and test may take different arguments
if
inputs
[
idx
]
is
not
None
:
if
inputs
[
idx
]
is
not
None
:
...
@@ -333,79 +310,76 @@ class StaticGraphAdapter(object):
...
@@ -333,79 +310,76 @@ class StaticGraphAdapter(object):
endpoints
=
self
.
_endpoints
[
self
.
mode
]
endpoints
=
self
.
_endpoints
[
self
.
mode
]
fetch_list
=
endpoints
[
'output'
]
+
endpoints
[
'loss'
]
fetch_list
=
endpoints
[
'output'
]
+
endpoints
[
'loss'
]
num_output
=
len
(
endpoints
[
'output'
])
num_output
=
len
(
endpoints
[
'output'
])
out
=
self
.
_executor
.
run
(
out
=
self
.
_executor
.
run
(
compiled_prog
,
compiled_prog
,
feed
=
feed
,
feed
=
feed
,
fetch_list
=
fetch_list
)
fetch_list
=
fetch_list
)
if
self
.
mode
==
'test'
:
if
self
.
mode
==
'test'
:
return
out
[:
num_output
]
return
out
[:
num_output
]
else
:
else
:
return
out
[:
num_output
],
out
[
num_output
:]
return
out
[:
num_output
],
out
[
num_output
:]
def
_make_program
(
self
,
inputs
):
def
prepare
(
self
):
modes
=
[
'train'
,
'eval'
,
'test'
]
for
mode
in
modes
:
self
.
_make_program
(
mode
)
self
.
_compile_and_initialize
(
self
.
_progs
[
mode
],
mode
)
def
_make_program
(
self
,
mode
):
prog
=
self
.
_progs
.
get
(
mode
,
None
)
if
prog
is
not
None
:
return
prog
=
self
.
_orig_prog
.
clone
()
prog
=
self
.
_orig_prog
.
clone
()
# change inputs to the same var in cloned program
inputs
=
fluid
.
layers
.
utils
.
map_structure
(
lambda
var
:
prog
.
global_block
().
var
(
var
.
name
),
inputs
)
# NOTE: When defining learning rate scheduling in static-graph, ops to
# NOTE: When defining learning rate scheduling in static-graph, ops to
# increase the global step var and calculate learning rate would be
# increase the global step var and calculate learning rate would be
# prepended into _orig_prog. test program maked by `_orig_prog.clone`
# prepended into _orig_prog. test program maked by `_orig_prog.clone`
# also would include these ops. Thus must prune these ops in test
# also would include these ops. Thus must prune these ops in test
# program, otherwise the global step would be changed in test.
# program, otherwise the global step would be changed in test.
if
self
.
mode
!=
'train'
:
if
mode
!=
'train'
:
for
op
in
list
(
prog
.
global_block
().
ops
):
for
op
in
list
(
prog
.
global_block
().
ops
):
prog
.
global_block
().
_remove_op
(
0
)
prog
.
global_block
().
_remove_op
(
0
)
if
self
.
mode
==
'train'
and
self
.
model
.
_optimizer
.
_learning_rate_map
:
if
mode
==
'train'
and
self
.
model
.
_optimizer
\
and
self
.
model
.
_optimizer
.
_learning_rate_map
:
# HACK workaround learning rate map issue
# HACK workaround learning rate map issue
lr_var
=
self
.
model
.
_optimizer
.
_learning_rate_map
[
self
.
_orig_prog
]
lr_var
=
self
.
model
.
_optimizer
.
_learning_rate_map
[
self
.
_orig_prog
]
self
.
model
.
_optimizer
.
_learning_rate_map
[
prog
]
=
lr_var
self
.
model
.
_optimizer
.
_learning_rate_map
[
prog
]
=
lr_var
losses
=
[]
losses
=
[]
with
fluid
.
program_guard
(
prog
,
self
.
_startup_prog
):
with
fluid
.
program_guard
(
prog
,
self
.
_startup_prog
):
if
isinstance
(
self
.
model
.
_inputs
,
dict
):
ins
=
[
self
.
model
.
_inputs
[
n
]
\
for
n
in
extract_args
(
self
.
model
.
forward
)
if
n
!=
'self'
]
else
:
ins
=
self
.
model
.
_inputs
lbls
=
self
.
model
.
_labels
if
self
.
model
.
_labels
else
[]
inputs
=
[
k
.
forward
()
for
k
in
to_list
(
ins
)]
labels
=
[
k
.
forward
()
for
k
in
to_list
(
lbls
)]
outputs
=
to_list
(
self
.
model
.
forward
(
*
inputs
))
outputs
=
to_list
(
self
.
model
.
forward
(
*
inputs
))
if
self
.
mode
!=
'test'
:
if
mode
!=
'test'
:
label_vars
=
self
.
_infer_label_vars
(
outputs
)
if
self
.
model
.
_loss_function
:
self
.
_label_vars
[
self
.
mode
]
=
label_vars
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
losses
=
self
.
model
.
_loss_function
(
outputs
,
label_vars
)
if
mode
==
'train'
and
self
.
model
.
_optimizer
:
if
self
.
mode
==
'train'
:
self
.
_loss_endpoint
=
fluid
.
layers
.
sum
(
losses
)
self
.
_loss_endpoint
=
fluid
.
layers
.
sum
(
losses
)
self
.
model
.
_optimizer
.
minimize
(
self
.
_loss_endpoint
)
self
.
model
.
_optimizer
.
minimize
(
self
.
_loss_endpoint
)
if
self
.
mode
!=
'train'
:
# clone again to put it in test mode
if
mode
!=
'train'
:
# clone again to put it in test mode
prog
=
prog
.
clone
(
for_test
=
True
)
prog
=
prog
.
clone
(
for_test
=
True
)
self
.
_progs
[
self
.
mode
]
=
prog
self
.
_endpoints
[
self
.
mode
]
=
{
"output"
:
outputs
,
"loss"
:
losses
}
def
_infer_input_vars
(
self
,
inputs
):
self
.
_input_vars
[
mode
]
=
inputs
input_vars
=
[]
self
.
_label_vars
[
mode
]
=
labels
for
idx
,
i
in
enumerate
(
inputs
):
self
.
_progs
[
mode
]
=
prog
if
i
is
None
:
# train and test may take different arguments
self
.
_endpoints
[
mode
]
=
{
"output"
:
outputs
,
"loss"
:
losses
}
input_vars
.
append
(
None
)
continue
def
_compile_and_initialize
(
self
,
prog
,
mode
):
ndarray
=
np
.
array
(
i
)
compiled_prog
=
self
.
_compiled_progs
.
get
(
mode
,
None
)
name
=
list
(
self
.
_input_desc
.
keys
())[
idx
]
shape
=
list
(
self
.
_input_desc
.
values
())[
idx
]
if
shape
is
None
:
shape
=
(
None
,
)
+
ndarray
.
shape
[
1
:]
input_vars
.
append
(
fluid
.
data
(
name
,
shape
,
ndarray
.
dtype
))
return
input_vars
def
_infer_label_vars
(
self
,
outputs
):
shapes
=
self
.
model
.
_loss_function
.
infer_shape
(
outputs
)
dtypes
=
self
.
model
.
_loss_function
.
infer_dtype
(
outputs
)
label_vars
=
[]
for
idx
,
(
shape
,
dtype
)
in
enumerate
(
zip
(
shapes
,
dtypes
)):
name
=
'__label{}'
.
format
(
idx
)
label_vars
.
append
(
fluid
.
data
(
name
,
shape
,
dtype
))
return
label_vars
def
_compile_and_initialize
(
self
,
prog
,
device
=
'CPU'
,
device_ids
=
None
):
compiled_prog
=
self
.
_compiled_progs
.
get
(
self
.
mode
,
None
)
if
compiled_prog
is
not
None
:
if
compiled_prog
is
not
None
:
return
compiled_prog
return
compiled_prog
places
=
[
device
.
lower
()
==
'gpu'
and
fluid
.
CUDAPlace
(
i
)
device
=
self
.
model
.
_device
or
fluid
.
CPUPlace
()
for
i
in
device_ids
]
device_ids
=
self
.
model
.
_device_ids
if
device
.
lower
()
==
'gpu'
:
places
=
fluid
.
cuda_places
(
device_ids
)
else
:
places
=
fluid
.
cpu_places
(
len
(
device_ids
)
if
device_ids
else
None
)
# XXX *ALL WEIGHTS* should be initialized upon model construction
# XXX *ALL WEIGHTS* should be initialized upon model construction
# even if `forward()` may run different code path for different mode
# even if `forward()` may run different code path for different mode
...
@@ -423,31 +397,14 @@ class StaticGraphAdapter(object):
...
@@ -423,31 +397,14 @@ class StaticGraphAdapter(object):
startup_prog
=
self
.
_startup_prog
.
_prune
(
uninitialized
)
startup_prog
=
self
.
_startup_prog
.
_prune
(
uninitialized
)
self
.
_executor
.
run
(
startup_prog
)
self
.
_executor
.
run
(
startup_prog
)
if
self
.
mode
==
'train'
and
self
.
_lazy_load_optimizer
:
self
.
_load_optimizer
(
self
.
_lazy_load_optimizer
)
self
.
_lazy_load_optimizer
=
None
compiled_prog
=
fluid
.
CompiledProgram
(
prog
)
compiled_prog
=
fluid
.
CompiledProgram
(
prog
)
if
len
(
device_id
s
)
>
1
:
if
len
(
place
s
)
>
1
:
loss_name
=
None
loss_name
=
None
if
self
.
mode
==
'train'
and
self
.
_loss_endpoint
is
not
None
:
if
mode
==
'train'
and
self
.
_loss_endpoint
is
not
None
:
loss_name
=
self
.
_loss_endpoint
.
name
loss_name
=
self
.
_loss_endpoint
.
name
share_vars_from
=
None
if
self
.
mode
==
'eval'
and
'train'
in
self
.
_compiled_progs
:
share_vars_from
=
self
.
_compiled_progs
[
'train'
]
# HACK invalidate eval program if is compiled before train program
# quite hackish, OTOH, it is generally uncommon that the eval
# program will be run before the train program
if
self
.
mode
==
'train'
and
'eval'
in
self
.
_compiled_progs
:
del
self
.
_compiled_progs
[
'eval'
]
compiled_prog
=
compiled_prog
.
with_data_parallel
(
compiled_prog
=
compiled_prog
.
with_data_parallel
(
loss_name
=
loss_name
,
places
=
places
,
loss_name
=
loss_name
,
places
=
places
)
share_vars_from
=
share_vars_from
)
self
.
_compiled_progs
[
mode
]
=
compiled_prog
self
.
_compiled_progs
[
self
.
mode
]
=
compiled_prog
return
compiled_prog
class
DynamicGraphAdapter
(
object
):
class
DynamicGraphAdapter
(
object
):
...
@@ -464,13 +421,14 @@ class DynamicGraphAdapter(object):
...
@@ -464,13 +421,14 @@ class DynamicGraphAdapter(object):
self
.
model
.
mode
=
value
self
.
model
.
mode
=
value
# TODO multi device in dygraph mode not implemented at present time
# TODO multi device in dygraph mode not implemented at present time
def
train
(
self
,
inputs
,
labels
,
device
=
'CPU'
,
device_ids
=
None
):
def
train
(
self
,
inputs
,
labels
=
None
):
assert
self
.
model
.
_optimizer
and
self
.
model
.
_loss_function
,
\
assert
self
.
model
.
_optimizer
,
\
"model not ready, please call `model.prepare()` first"
"model not ready, please call `model.prepare()` first"
super
(
Model
,
self
.
model
).
train
()
super
(
Model
,
self
.
model
).
train
()
self
.
mode
=
'train'
self
.
mode
=
'train'
inputs
=
to_list
(
inputs
)
inputs
=
to_list
(
inputs
)
labels
=
to_list
(
labels
)
if
labels
is
not
None
:
labels
=
to_list
(
labels
)
outputs
=
self
.
model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
outputs
=
self
.
model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
final_loss
=
fluid
.
layers
.
sum
(
losses
)
final_loss
=
fluid
.
layers
.
sum
(
losses
)
...
@@ -480,19 +438,25 @@ class DynamicGraphAdapter(object):
...
@@ -480,19 +438,25 @@ class DynamicGraphAdapter(object):
return
[
to_numpy
(
o
)
for
o
in
to_list
(
outputs
)],
\
return
[
to_numpy
(
o
)
for
o
in
to_list
(
outputs
)],
\
[
to_numpy
(
l
)
for
l
in
losses
]
[
to_numpy
(
l
)
for
l
in
losses
]
def
eval
(
self
,
inputs
,
labels
,
device
=
'CPU'
,
device_ids
=
None
):
def
eval
(
self
,
inputs
,
labels
=
None
):
assert
self
.
model
.
_loss_function
,
\
"model not ready, please call `model.prepare()` first"
super
(
Model
,
self
.
model
).
eval
()
super
(
Model
,
self
.
model
).
eval
()
self
.
mode
=
'eval'
self
.
mode
=
'eval'
inputs
=
to_list
(
inputs
)
inputs
=
to_list
(
inputs
)
labels
=
to_list
(
labels
)
if
labels
is
not
None
:
labels
=
to_list
(
labels
)
outputs
=
self
.
model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
outputs
=
self
.
model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
if
self
.
model
.
_loss_function
:
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
else
:
losses
=
[]
# To be consistent with static graph
# return empty loss if loss_function is None
return
[
to_numpy
(
o
)
for
o
in
to_list
(
outputs
)],
\
return
[
to_numpy
(
o
)
for
o
in
to_list
(
outputs
)],
\
[
to_numpy
(
l
)
for
l
in
losses
]
[
to_numpy
(
l
)
for
l
in
losses
]
def
test
(
self
,
inputs
,
device
=
'CPU'
,
device_ids
=
None
):
def
test
(
self
,
inputs
):
super
(
Model
,
self
.
model
).
eval
()
super
(
Model
,
self
.
model
).
eval
()
self
.
mode
=
'test'
self
.
mode
=
'test'
inputs
=
[
to_variable
(
x
)
for
x
in
to_list
(
inputs
)]
inputs
=
[
to_variable
(
x
)
for
x
in
to_list
(
inputs
)]
...
@@ -528,9 +492,8 @@ class DynamicGraphAdapter(object):
...
@@ -528,9 +492,8 @@ class DynamicGraphAdapter(object):
opt_cls_name
=
self
.
model
.
_optimizer
.
__class__
.
__name__
opt_cls_name
=
self
.
model
.
_optimizer
.
__class__
.
__name__
opt_name
=
opt_unq_name
[:
opt_unq_name
.
rfind
(
"_"
)]
# remove suffix idx
opt_name
=
opt_unq_name
[:
opt_unq_name
.
rfind
(
"_"
)]
# remove suffix idx
param_names
=
[
param
.
name
for
param
in
self
.
model
.
parameters
()]
param_names
=
[
param
.
name
for
param
in
self
.
model
.
parameters
()]
for
var_name
,
state_var
in
sorted
(
optim
.
items
(),
for
var_name
,
state_var
in
sorted
(
key
=
lambda
x
:
len
(
x
[
0
]),
optim
.
items
(),
key
=
lambda
x
:
len
(
x
[
0
]),
reverse
=
True
):
reverse
=
True
):
if
var_name
in
[
"@LR_DECAY_COUNTER@"
,
"global_step"
]:
if
var_name
in
[
"@LR_DECAY_COUNTER@"
,
"global_step"
]:
# NOTE: dygraph saved global_step is 1 larger than that in
# NOTE: dygraph saved global_step is 1 larger than that in
# static-graph, since the time of global_step to increase is
# static-graph, since the time of global_step to increase is
...
@@ -564,11 +527,21 @@ class DynamicGraphAdapter(object):
...
@@ -564,11 +527,21 @@ class DynamicGraphAdapter(object):
class
Model
(
fluid
.
dygraph
.
Layer
):
class
Model
(
fluid
.
dygraph
.
Layer
):
"""
FIXME: add more comments and usage
"""
def
__init__
(
self
):
def
__init__
(
self
):
super
(
Model
,
self
).
__init__
(
self
.
__class__
.
__name__
)
super
(
Model
,
self
).
__init__
(
self
.
__class__
.
__name__
)
self
.
mode
=
'train'
self
.
mode
=
'train'
self
.
_inputs
=
None
self
.
_labels
=
None
self
.
_loss_function
=
None
self
.
_loss_function
=
None
self
.
_loss_weights
=
None
self
.
_loss_weights
=
None
self
.
_loss
=
None
self
.
_optimizer
=
None
self
.
_device
=
None
self
.
_device_ids
=
None
self
.
_optimizer
=
None
self
.
_optimizer
=
None
if
in_dygraph_mode
():
if
in_dygraph_mode
():
self
.
_adapter
=
DynamicGraphAdapter
(
self
)
self
.
_adapter
=
DynamicGraphAdapter
(
self
)
...
@@ -590,11 +563,65 @@ class Model(fluid.dygraph.Layer):
...
@@ -590,11 +563,65 @@ class Model(fluid.dygraph.Layer):
def
load
(
self
,
*
args
,
**
kwargs
):
def
load
(
self
,
*
args
,
**
kwargs
):
return
self
.
_adapter
.
load
(
*
args
,
**
kwargs
)
return
self
.
_adapter
.
load
(
*
args
,
**
kwargs
)
def
prepare
(
self
,
optimizer
,
loss_function
):
def
prepare
(
self
,
optimizer
=
None
,
loss_function
=
None
,
inputs
=
None
,
labels
=
None
,
device
=
None
,
device_ids
=
None
):
"""
FIXME: add comments
Args:
optimizer (Optimizer|None): optimizer must be set in training
and should be a Optimizer instance. It can be None in eval
and test mode.
loss_function (Loss|None): loss function must be set in training
and should be a Loss instance. It can be None when there is
no loss.
inputs (Input|list|dict|None): inputs, entry points of network,
could be a Input layer, or lits of Input layers,
or dict (name: Input), or None. For static graph,
inputs must be set. For dynamic graph, it could be None.
labels (Input|list|None): labels, entry points of network,
could be a Input layer or lits of Input layers, or None.
For static graph, if set loss_function in Model.prepare(), it
must be set. Otherwise, it could be None.
device (str|None): specify device type, 'CPU' or 'GPU'.
If None, automatically select device according to
installation package version.
device_ids (list[int]|None): specify device index. If None,
the available device will be obtained from the environment
variable when the model is executed: If the GPU is used, the
currently available device ID is obtained from the environment
variable FLAGS_selected_gpus or CUDA_VISIBLE_DEVICES when the
model is executed; CPU, when the model is executed,
the currently available CPU number is obtained from the
environment variable CPU_NUM. For example, export CPU_NUM=4,
if the environment variable is not set, the executor will add
the variable to the environment variable and set its value to 1.
The default is None.
"""
self
.
_optimizer
=
optimizer
self
.
_optimizer
=
optimizer
assert
isinstance
(
loss_function
,
Loss
),
\
if
loss_function
:
"'loss_function' must be sub classes of 'Loss'"
if
not
isinstance
(
loss_function
,
Loss
):
raise
TypeError
(
"'loss_function' must be sub classes of 'Loss'"
)
self
.
_loss_function
=
loss_function
self
.
_loss_function
=
loss_function
if
not
in_dygraph_mode
():
if
not
isinstance
(
inputs
,
(
list
,
dict
,
Input
)):
raise
TypeError
(
"'inputs' must be list or dict in static graph mode"
)
if
loss_function
and
not
isinstance
(
labels
,
(
list
,
Input
)):
raise
TypeError
(
"'labels' must be list in static graph mode"
)
self
.
_inputs
=
inputs
self
.
_labels
=
labels
self
.
_device
=
device
if
device
is
None
:
self
.
_device
=
'GPU'
if
fluid
.
is_compiled_with_cuda
()
else
'CPU'
self
.
_device_ids
=
device_ids
if
not
in_dygraph_mode
():
self
.
_adapter
.
prepare
()
def
parameters
(
self
,
*
args
,
**
kwargs
):
def
parameters
(
self
,
*
args
,
**
kwargs
):
return
self
.
_adapter
.
parameters
(
*
args
,
**
kwargs
)
return
self
.
_adapter
.
parameters
(
*
args
,
**
kwargs
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录