Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
9c96edfe
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 2 年 前同步成功
通知
285
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9c96edfe
编写于
3月 31, 2019
作者:
Z
Zeyu Chen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor save_checkpoint, load_checkpoint, memory_optimization, add more logs
上级
4920392b
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
208 addition
and
109 deletion
+208
-109
demo/bert-cls/finetune_with_hub.py
demo/bert-cls/finetune_with_hub.py
+2
-3
demo/bert-cls/run_fintune_with_hub.sh
demo/bert-cls/run_fintune_with_hub.sh
+6
-6
paddle_hub/common/paddle_helper.py
paddle_hub/common/paddle_helper.py
+2
-3
paddle_hub/finetune/checkpoint.proto
paddle_hub/finetune/checkpoint.proto
+4
-4
paddle_hub/finetune/checkpoint.py
paddle_hub/finetune/checkpoint.py
+41
-11
paddle_hub/finetune/checkpoint_pb2.py
paddle_hub/finetune/checkpoint_pb2.py
+19
-17
paddle_hub/finetune/config.py
paddle_hub/finetune/config.py
+90
-21
paddle_hub/finetune/finetune.py
paddle_hub/finetune/finetune.py
+36
-43
paddle_hub/finetune/task.py
paddle_hub/finetune/task.py
+7
-0
paddle_hub/module/module.py
paddle_hub/module/module.py
+1
-1
未找到文件。
demo/bert-cls/finetune_with_hub.py
浏览文件 @
9c96edfe
...
@@ -58,7 +58,7 @@ if __name__ == '__main__':
...
@@ -58,7 +58,7 @@ if __name__ == '__main__':
config
=
FinetuneConfig
(
config
=
FinetuneConfig
(
log_interval
=
10
,
log_interval
=
10
,
eval_interval
=
100
,
eval_interval
=
100
,
save_ckpt_interval
=
20
0
,
save_ckpt_interval
=
5
0
,
use_cuda
=
True
,
use_cuda
=
True
,
checkpoint_dir
=
args
.
checkpoint_dir
,
checkpoint_dir
=
args
.
checkpoint_dir
,
learning_rate
=
args
.
learning_rate
,
learning_rate
=
args
.
learning_rate
,
...
@@ -67,8 +67,7 @@ if __name__ == '__main__':
...
@@ -67,8 +67,7 @@ if __name__ == '__main__':
max_seq_len
=
args
.
max_seq_len
,
max_seq_len
=
args
.
max_seq_len
,
weight_decay
=
args
.
weight_decay
,
weight_decay
=
args
.
weight_decay
,
finetune_strategy
=
"bert_finetune"
,
finetune_strategy
=
"bert_finetune"
,
with_memory_optimization
=
True
,
enable_memory_optim
=
True
,
in_tokens
=
False
,
optimizer
=
None
,
optimizer
=
None
,
warmup_proportion
=
args
.
warmup_proportion
)
warmup_proportion
=
args
.
warmup_proportion
)
...
...
demo/bert-cls/run_fintune_with_hub.sh
浏览文件 @
9c96edfe
...
@@ -3,17 +3,17 @@ export CUDA_VISIBLE_DEVICES=5
...
@@ -3,17 +3,17 @@ export CUDA_VISIBLE_DEVICES=5
DATA_PATH
=
./chnsenticorp_data
DATA_PATH
=
./chnsenticorp_data
#
HUB_MODULE_DIR="./hub_module/bert_chinese_L-12_H-768_A-12.hub_module"
HUB_MODULE_DIR
=
"./hub_module/bert_chinese_L-12_H-768_A-12.hub_module"
HUB_MODULE_DIR
=
"./hub_module/ernie_stable.hub_module"
#
HUB_MODULE_DIR="./hub_module/ernie_stable.hub_module"
CKPT_DIR
=
"./ckpt"
CKPT_DIR
=
"./ckpt"
rm
-rf
$CKPT_DIR
#
rm -rf $CKPT_DIR
python
-u
finetune_with_hub.py
\
python
-u
finetune_with_hub.py
\
--batch_size
64
\
--batch_size
128
\
--hub_module_dir
=
$HUB_MODULE_DIR
\
--hub_module_dir
=
$HUB_MODULE_DIR
\
--data_dir
${
DATA_PATH
}
\
--data_dir
${
DATA_PATH
}
\
--weight_decay
0.01
\
--weight_decay
0.01
\
--checkpoint_dir
$CKPT_DIR
\
--checkpoint_dir
$CKPT_DIR
\
--warmup_proportion
0.0
\
--warmup_proportion
0.0
\
--epoch
3
\
--epoch
2
\
--max_seq_len
1
28
\
--max_seq_len
1
6
\
--learning_rate
5e-5
--learning_rate
5e-5
paddle_hub/common/paddle_helper.py
浏览文件 @
9c96edfe
...
@@ -171,7 +171,7 @@ def connect_program(pre_program, next_program, input_dict=None, inplace=True):
...
@@ -171,7 +171,7 @@ def connect_program(pre_program, next_program, input_dict=None, inplace=True):
outputs
=
{
'Out'
:
output_var
})
outputs
=
{
'Out'
:
output_var
})
block_map
=
{
0
:
0
}
block_map
=
{
0
:
0
}
logger
.
info
(
"
start to connect program
"
)
logger
.
info
(
"
Connect program's input tensor
"
)
for
index
,
block
in
enumerate
(
next_program
.
blocks
):
for
index
,
block
in
enumerate
(
next_program
.
blocks
):
if
block
.
idx
==
0
:
if
block
.
idx
==
0
:
_copy_vars_and_ops_in_blocks
(
block
,
output_program
.
global_block
())
_copy_vars_and_ops_in_blocks
(
block
,
output_program
.
global_block
())
...
@@ -183,14 +183,13 @@ def connect_program(pre_program, next_program, input_dict=None, inplace=True):
...
@@ -183,14 +183,13 @@ def connect_program(pre_program, next_program, input_dict=None, inplace=True):
new_block
=
output_program
.
_create_block
(
new_block
=
output_program
.
_create_block
(
parent_idx
=
block_map
[
block
.
parent_idx
])
parent_idx
=
block_map
[
block
.
parent_idx
])
_copy_vars_and_ops_in_blocks
(
block
,
new_block
)
_copy_vars_and_ops_in_blocks
(
block
,
new_block
)
logger
.
info
(
"
end of connect program
"
)
logger
.
info
(
"
Connect program's input tensor done
"
)
return
output_program
return
output_program
def
remove_feed_fetch_op
(
program
):
def
remove_feed_fetch_op
(
program
):
""" remove feed and fetch operator and variable for fine-tuning
""" remove feed and fetch operator and variable for fine-tuning
"""
"""
logger
.
info
(
"remove feed fetch op"
)
block
=
program
.
global_block
()
block
=
program
.
global_block
()
need_to_remove_op_index
=
[]
need_to_remove_op_index
=
[]
for
i
,
op
in
enumerate
(
block
.
ops
):
for
i
,
op
in
enumerate
(
block
.
ops
):
...
...
paddle_hub/finetune/checkpoint.proto
浏览文件 @
9c96edfe
// Copyright 201
8
The Paddle Authors. All Rights Reserved.
// Copyright 201
9
The Paddle Authors. All Rights Reserved.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
@@ -19,7 +19,7 @@ option optimize_for = LITE_RUNTIME;
...
@@ -19,7 +19,7 @@ option optimize_for = LITE_RUNTIME;
package
paddle_hub_finetune_checkpoint
;
package
paddle_hub_finetune_checkpoint
;
message
CheckPoint
{
message
CheckPoint
{
int64
las
t_epoch
=
1
;
int64
curren
t_epoch
=
1
;
int64
last
_step
=
2
;
int64
global
_step
=
2
;
string
last_model_dir
=
3
;
string
la
te
st_model_dir
=
3
;
}
}
paddle_hub/finetune/checkpoint.py
浏览文件 @
9c96edfe
...
@@ -16,20 +16,50 @@ from __future__ import absolute_import
...
@@ -16,20 +16,50 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
import
os
import
paddle.fluid
as
fluid
from
paddle_hub.finetune
import
checkpoint_pb2
from
paddle_hub.finetune
import
checkpoint_pb2
from
paddle_hub.common.logger
import
logger
CKPT_FILE_NAME
=
"ckpt.meta"
def
load_checkpoint
(
checkpoint_path
):
ckpt
=
checkpoint_pb2
.
CheckPoint
()
with
open
(
checkpoint_path
,
"rb"
)
as
file
:
ckpt
.
ParseFromString
(
file
.
read
())
return
ckpt
.
last_epoch
,
ckpt
.
last_step
,
ckpt
.
last_model_dir
def
load_checkpoint
(
checkpoint_dir
,
exe
):
ckpt_meta_path
=
os
.
path
.
join
(
checkpoint_dir
,
CKPT_FILE_NAME
)
logger
.
info
(
"Try loading checkpoint from {}"
.
format
(
ckpt_meta_path
))
if
os
.
path
.
exists
(
ckpt_meta_path
):
ckpt
=
checkpoint_pb2
.
CheckPoint
()
with
open
(
ckpt_meta_path
,
"rb"
)
as
f
:
ckpt
.
ParseFromString
(
f
.
read
())
fluid
.
io
.
load_persistables
(
exe
,
ckpt
.
latest_model_dir
)
logger
.
info
(
"Checkpoint loaded. current_epoch={},"
"global_step={}"
.
format
(
ckpt_meta_path
,
current_epoch
,
global_step
))
return
ckpt
.
current_epoch
,
ckpt
.
global_step
else
:
current_epoch
=
1
global_step
=
0
latest_model_dir
=
None
logger
.
info
(
"Checkpoint not found, start training from scratch..."
)
exe
.
run
(
fluid
.
default_startup_program
())
def
save_checkpoint
(
checkpoint_path
,
last_epoch
,
last_step
,
last_model_dir
):
return
current_epoch
,
global_step
def
save_checkpoint
(
checkpoint_dir
,
current_epoch
,
global_step
,
exe
):
ckpt_meta_path
=
os
.
path
.
join
(
checkpoint_dir
,
CKPT_FILE_NAME
)
ckpt
=
checkpoint_pb2
.
CheckPoint
()
ckpt
=
checkpoint_pb2
.
CheckPoint
()
ckpt
.
last_epoch
=
last_epoch
ckpt
.
last_step
=
last_step
model_saved_dir
=
os
.
path
.
join
(
checkpoint_dir
,
"step_%d"
%
global_step
)
ckpt
.
last_model_dir
=
last_model_dir
logger
.
info
(
"Saving model checkpoint to {}"
.
format
(
model_saved_dir
))
with
open
(
checkpoint_path
,
"wb"
)
as
file
:
fluid
.
io
.
save_persistables
(
exe
,
dirname
=
model_saved_dir
)
file
.
write
(
ckpt
.
SerializeToString
())
ckpt
.
current_epoch
=
current_epoch
ckpt
.
global_step
=
global_step
ckpt
.
latest_model_dir
=
model_saved_dir
with
open
(
ckpt_meta_path
,
"wb"
)
as
f
:
f
.
write
(
ckpt
.
SerializeToString
())
paddle_hub/finetune/checkpoint_pb2.py
浏览文件 @
9c96edfe
...
@@ -7,7 +7,6 @@ from google.protobuf import descriptor as _descriptor
...
@@ -7,7 +7,6 @@ from google.protobuf import descriptor as _descriptor
from
google.protobuf
import
message
as
_message
from
google.protobuf
import
message
as
_message
from
google.protobuf
import
reflection
as
_reflection
from
google.protobuf
import
reflection
as
_reflection
from
google.protobuf
import
symbol_database
as
_symbol_database
from
google.protobuf
import
symbol_database
as
_symbol_database
from
google.protobuf
import
descriptor_pb2
# @@protoc_insertion_point(imports)
# @@protoc_insertion_point(imports)
_sym_db
=
_symbol_database
.
Default
()
_sym_db
=
_symbol_database
.
Default
()
...
@@ -16,10 +15,10 @@ DESCRIPTOR = _descriptor.FileDescriptor(
...
@@ -16,10 +15,10 @@ DESCRIPTOR = _descriptor.FileDescriptor(
name
=
'checkpoint.proto'
,
name
=
'checkpoint.proto'
,
package
=
'paddle_hub_finetune_checkpoint'
,
package
=
'paddle_hub_finetune_checkpoint'
,
syntax
=
'proto3'
,
syntax
=
'proto3'
,
serialized_options
=
_b
(
'H
\003
'
),
serialized_pb
=
_b
(
serialized_pb
=
_b
(
'
\n\x10\x63
heckpoint.proto
\x12\x1e
paddle_hub_finetune_checkpoint
\"
K
\n\n
CheckPoint
\x12\x12\n\n
last_epoch
\x18\x01
\x01
(
\x03\x12\x11\n\t
last_step
\x18\x02
\x01
(
\x03\x12\x16\n\x0e
la
st_model_dir
\x18\x03
\x01
(
\t
B
\x02
H
\x03\x62\x06
proto3'
'
\n\x10\x63
heckpoint.proto
\x12\x1e
paddle_hub_finetune_checkpoint
\"
R
\n\n
CheckPoint
\x12\x15\n\r
current_epoch
\x18\x01
\x01
(
\x03\x12\x13\n\x0b
global_step
\x18\x02
\x01
(
\x03\x12\x18\n\x10
late
st_model_dir
\x18\x03
\x01
(
\t
B
\x02
H
\x03\x62\x06
proto3'
))
))
_sym_db
.
RegisterFileDescriptor
(
DESCRIPTOR
)
_CHECKPOINT
=
_descriptor
.
Descriptor
(
_CHECKPOINT
=
_descriptor
.
Descriptor
(
name
=
'CheckPoint'
,
name
=
'CheckPoint'
,
...
@@ -29,8 +28,8 @@ _CHECKPOINT = _descriptor.Descriptor(
...
@@ -29,8 +28,8 @@ _CHECKPOINT = _descriptor.Descriptor(
containing_type
=
None
,
containing_type
=
None
,
fields
=
[
fields
=
[
_descriptor
.
FieldDescriptor
(
_descriptor
.
FieldDescriptor
(
name
=
'
las
t_epoch'
,
name
=
'
curren
t_epoch'
,
full_name
=
'paddle_hub_finetune_checkpoint.CheckPoint.
las
t_epoch'
,
full_name
=
'paddle_hub_finetune_checkpoint.CheckPoint.
curren
t_epoch'
,
index
=
0
,
index
=
0
,
number
=
1
,
number
=
1
,
type
=
3
,
type
=
3
,
...
@@ -43,10 +42,11 @@ _CHECKPOINT = _descriptor.Descriptor(
...
@@ -43,10 +42,11 @@ _CHECKPOINT = _descriptor.Descriptor(
containing_type
=
None
,
containing_type
=
None
,
is_extension
=
False
,
is_extension
=
False
,
extension_scope
=
None
,
extension_scope
=
None
,
options
=
None
),
serialized_options
=
None
,
file
=
DESCRIPTOR
),
_descriptor
.
FieldDescriptor
(
_descriptor
.
FieldDescriptor
(
name
=
'
last
_step'
,
name
=
'
global
_step'
,
full_name
=
'paddle_hub_finetune_checkpoint.CheckPoint.
last
_step'
,
full_name
=
'paddle_hub_finetune_checkpoint.CheckPoint.
global
_step'
,
index
=
1
,
index
=
1
,
number
=
2
,
number
=
2
,
type
=
3
,
type
=
3
,
...
@@ -59,10 +59,12 @@ _CHECKPOINT = _descriptor.Descriptor(
...
@@ -59,10 +59,12 @@ _CHECKPOINT = _descriptor.Descriptor(
containing_type
=
None
,
containing_type
=
None
,
is_extension
=
False
,
is_extension
=
False
,
extension_scope
=
None
,
extension_scope
=
None
,
options
=
None
),
serialized_options
=
None
,
file
=
DESCRIPTOR
),
_descriptor
.
FieldDescriptor
(
_descriptor
.
FieldDescriptor
(
name
=
'last_model_dir'
,
name
=
'latest_model_dir'
,
full_name
=
'paddle_hub_finetune_checkpoint.CheckPoint.last_model_dir'
,
full_name
=
'paddle_hub_finetune_checkpoint.CheckPoint.latest_model_dir'
,
index
=
2
,
index
=
2
,
number
=
3
,
number
=
3
,
type
=
9
,
type
=
9
,
...
@@ -75,21 +77,23 @@ _CHECKPOINT = _descriptor.Descriptor(
...
@@ -75,21 +77,23 @@ _CHECKPOINT = _descriptor.Descriptor(
containing_type
=
None
,
containing_type
=
None
,
is_extension
=
False
,
is_extension
=
False
,
extension_scope
=
None
,
extension_scope
=
None
,
options
=
None
),
serialized_options
=
None
,
file
=
DESCRIPTOR
),
],
],
extensions
=
[],
extensions
=
[],
nested_types
=
[],
nested_types
=
[],
enum_types
=
[],
enum_types
=
[],
options
=
None
,
serialized_
options
=
None
,
is_extendable
=
False
,
is_extendable
=
False
,
syntax
=
'proto3'
,
syntax
=
'proto3'
,
extension_ranges
=
[],
extension_ranges
=
[],
oneofs
=
[],
oneofs
=
[],
serialized_start
=
52
,
serialized_start
=
52
,
serialized_end
=
1
27
,
serialized_end
=
1
34
,
)
)
DESCRIPTOR
.
message_types_by_name
[
'CheckPoint'
]
=
_CHECKPOINT
DESCRIPTOR
.
message_types_by_name
[
'CheckPoint'
]
=
_CHECKPOINT
_sym_db
.
RegisterFileDescriptor
(
DESCRIPTOR
)
CheckPoint
=
_reflection
.
GeneratedProtocolMessageType
(
CheckPoint
=
_reflection
.
GeneratedProtocolMessageType
(
'CheckPoint'
,
'CheckPoint'
,
...
@@ -101,7 +105,5 @@ CheckPoint = _reflection.GeneratedProtocolMessageType(
...
@@ -101,7 +105,5 @@ CheckPoint = _reflection.GeneratedProtocolMessageType(
))
))
_sym_db
.
RegisterMessage
(
CheckPoint
)
_sym_db
.
RegisterMessage
(
CheckPoint
)
DESCRIPTOR
.
has_options
=
True
DESCRIPTOR
.
_options
=
None
DESCRIPTOR
.
_options
=
_descriptor
.
_ParseOptions
(
descriptor_pb2
.
FileOptions
(),
_b
(
'H
\003
'
))
# @@protoc_insertion_point(module_scope)
# @@protoc_insertion_point(module_scope)
paddle_hub/finetune/config.py
浏览文件 @
9c96edfe
...
@@ -14,24 +14,93 @@
...
@@ -14,24 +14,93 @@
import
collections
import
collections
FinetuneConfig
=
collections
.
namedtuple
(
'FinetuneConfig'
,
class
FinetuneConfig
(
object
):
[
""" This class specifies the configurations for PaddleHub to finetune """
'log_interval'
,
# print training log every n step
'eval_interval'
,
# evalution the model every n steps
def
__init__
(
self
,
'save_ckpt_interval'
,
# save the model checkpoint every n steps
log_interval
=
10
,
'use_cuda'
,
# use gpu or not
eval_interval
=
100
,
'learning_rate'
,
save_ckpt_interval
=
None
,
'checkpoint_dir'
,
# model checkpoint directory
use_cuda
=
False
,
'num_epoch'
,
# number of finetune epoch
learning_rate
=
1e-4
,
'batch_size'
,
checkpoint_dir
=
None
,
# for bert parameter
num_epoch
=
10
,
'max_seq_len'
,
# for bert
batch_size
=
None
,
'weight_decay'
,
# for bert
max_seq_len
=
128
,
'warmup_proportion'
,
# for bert
weight_decay
=
None
,
'in_tokens'
,
# for bert
warmup_proportion
=
0.0
,
'finetune_strategy'
,
finetune_strategy
=
None
,
'with_memory_optimization'
,
enable_memory_optim
=
True
,
# learning rate scheduler
optimizer
=
"adam"
):
'optimizer'
""" Construct finetune Config """
])
self
.
_log_interval
=
log_interval
self
.
_eval_interval
=
eval_interval
self
.
_save_ckpt_interval
=
save_ckpt_interval
self
.
_use_cuda
=
use_cuda
self
.
_learning_rate
=
learning_rate
self
.
_checkpoint_dir
=
checkpoint_dir
self
.
_num_epoch
=
num_epoch
self
.
_batch_size
=
batch_size
self
.
_max_seq_len
=
max_seq_len
self
.
_weight_decay
=
weight_decay
self
.
_warmup_proportion
=
warmup_proportion
self
.
_finetune_strategy
=
finetune_strategy
self
.
_enable_memory_optim
=
enable_memory_optim
self
.
_optimizer
=
optimizer
@
property
def
log_interval
(
self
):
return
self
.
_log_interval
@
property
def
eval_interval
(
self
):
return
self
.
_eval_interval
@
property
def
save_ckpt_interval
(
self
):
return
self
.
_save_ckpt_interval
@
property
def
use_cuda
(
self
):
return
self
.
_use_cuda
@
property
def
learning_rate
(
self
):
return
self
.
_learning_rate
@
property
def
checkpoint_dir
(
self
):
return
self
.
_checkpoint_dir
@
property
def
num_epoch
(
self
):
return
self
.
_num_epoch
@
property
def
batch_size
(
self
):
return
self
.
_batch_size
@
property
def
max_seq_len
(
self
):
return
self
.
_max_seq_len
@
property
def
weight_decay
(
self
):
return
self
.
_weight_decay
@
property
def
warmup_proportion
(
self
):
return
self
.
_warmup_proportion
@
property
def
finetune_strategy
(
self
):
return
self
.
_finetune_strategy
@
property
def
enable_memory_optim
(
self
):
return
self
.
_enable_memory_optim
@
property
def
optimier
(
self
):
return
self
.
_optimizer
paddle_hub/finetune/finetune.py
浏览文件 @
9c96edfe
...
@@ -27,8 +27,6 @@ from paddle_hub.common.logger import logger
...
@@ -27,8 +27,6 @@ from paddle_hub.common.logger import logger
from
paddle_hub.finetune.optimization
import
bert_finetune
from
paddle_hub.finetune.optimization
import
bert_finetune
from
paddle_hub.finetune.checkpoint
import
load_checkpoint
,
save_checkpoint
from
paddle_hub.finetune.checkpoint
import
load_checkpoint
,
save_checkpoint
CKPT_FILE
=
"ckpt.meta"
def
_get_running_device_info
(
config
):
def
_get_running_device_info
(
config
):
if
config
.
use_cuda
:
if
config
.
use_cuda
:
...
@@ -41,6 +39,27 @@ def _get_running_device_info(config):
...
@@ -41,6 +39,27 @@ def _get_running_device_info(config):
return
place
,
dev_count
return
place
,
dev_count
def
_do_memory_optimization
(
task
,
config
):
if
config
.
enable_memory_optim
:
logger
.
info
(
"Memory optimization start..."
)
task_var_name
=
task
.
metric_variable_names
()
logger
.
info
(
"Skip memory optimization on variables: {}"
.
format
(
task_var_name
))
optimize_time_begin
=
time
.
time
()
fluid
.
memory_optimize
(
input_program
=
fluid
.
default_main_program
(),
# skip memory optimization on task metric variables
skip_opt_set
=
task_var_name
)
time_used
=
time
.
time
()
-
optimize_time_begin
logger
.
info
(
"Memory optimization done! Time elapsed %f sec"
%
time_used
)
lower_mem
,
upper_mem
,
unit
=
fluid
.
contrib
.
memory_usage
(
program
=
fluid
.
default_main_program
(),
batch_size
=
config
.
batch_size
)
logger
.
info
(
"Theoretical memory usage in training: %.3f - %.3f %s"
%
(
lower_mem
,
upper_mem
,
unit
)),
def
_finetune_model
(
task
,
data_reader
,
feed_list
,
config
=
None
,
do_eval
=
False
):
def
_finetune_model
(
task
,
data_reader
,
feed_list
,
config
=
None
,
do_eval
=
False
):
main_program
=
task
.
main_program
()
main_program
=
task
.
main_program
()
startup_program
=
task
.
startup_program
()
startup_program
=
task
.
startup_program
()
...
@@ -50,14 +69,11 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
...
@@ -50,14 +69,11 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
num_epoch
=
config
.
num_epoch
num_epoch
=
config
.
num_epoch
batch_size
=
config
.
batch_size
batch_size
=
config
.
batch_size
learning_rate
=
config
.
learning_rate
learning_rate
=
config
.
learning_rate
with_memory_optimization
=
config
.
with_memory_optimization
checkpoint_path
=
os
.
path
.
join
(
config
.
checkpoint_dir
,
CKPT_FILE
)
log_writter
=
LogWriter
(
log_writter
=
LogWriter
(
os
.
path
.
join
(
config
.
checkpoint_dir
,
"vdllog"
),
sync_cycle
=
10
)
os
.
path
.
join
(
config
.
checkpoint_dir
,
"vdllog"
),
sync_cycle
=
10
)
place
,
dev_count
=
_get_running_device_info
(
config
)
place
,
dev_count
=
_get_running_device_info
(
config
)
with
fluid
.
program_guard
(
main_program
,
startup_program
):
with
fluid
.
program_guard
(
main_program
,
startup_program
):
exe
=
fluid
.
Executor
(
place
=
place
)
exe
=
fluid
.
Executor
(
place
=
place
)
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
...
@@ -69,33 +85,10 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
...
@@ -69,33 +85,10 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
optimizer
.
minimize
(
loss
)
optimizer
.
minimize
(
loss
)
#TODO: add more finetune strategy
#TODO: add more finetune strategy
if
with_memory_optimization
:
_do_memory_optimization
(
task
,
config
)
logger
.
info
(
"Memory optimization start..."
)
optimize_time_begin
=
time
.
time
()
# Try to restore model training checkpoint
fluid
.
memory_optimize
(
current_epoch
,
global_step
=
load_checkpoint
(
config
.
checkpoint_dir
,
exe
)
input_program
=
fluid
.
default_main_program
(),
skip_opt_set
=
[
# skip task graph variable memory optimization
loss
.
name
,
accuracy
.
name
])
time_used
=
time
.
time
()
-
optimize_time_begin
logger
.
info
(
"Memory optimization done! Time elapsed %f sec"
%
time_used
)
lower_mem
,
upper_mem
,
unit
=
fluid
.
contrib
.
memory_usage
(
program
=
main_program
,
batch_size
=
batch_size
)
logger
.
info
(
"Theoretical memory usage in training: %.3f - %.3f %s"
%
(
lower_mem
,
upper_mem
,
unit
)),
# initilize
if
os
.
path
.
exists
(
checkpoint_path
):
last_epoch
,
global_step
,
last_model_dir
=
load_checkpoint
(
checkpoint_path
)
fluid
.
io
.
load_persistables
(
exe
,
last_model_dir
)
else
:
exe
.
run
(
fluid
.
default_startup_program
())
global_step
=
0
last_epoch
=
1
best_eval_acc
=
0.0
best_eval_acc
=
0.0
train_time_used
=
0
train_time_used
=
0
...
@@ -109,7 +102,7 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
...
@@ -109,7 +102,7 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
eval_loss_scalar
=
logw
.
scalar
(
tag
=
"loss[evaluate]"
)
eval_loss_scalar
=
logw
.
scalar
(
tag
=
"loss[evaluate]"
)
eval_acc_scalar
=
logw
.
scalar
(
tag
=
"accuracy[evaluate]"
)
eval_acc_scalar
=
logw
.
scalar
(
tag
=
"accuracy[evaluate]"
)
for
epoch
in
range
(
las
t_epoch
,
num_epoch
+
1
):
for
epoch
in
range
(
curren
t_epoch
,
num_epoch
+
1
):
train_reader
=
data_reader
.
data_generator
(
train_reader
=
data_reader
.
data_generator
(
batch_size
=
batch_size
,
phase
=
'train'
)
batch_size
=
batch_size
,
phase
=
'train'
)
num_trained_examples
=
acc_sum
=
loss_sum
=
0
num_trained_examples
=
acc_sum
=
loss_sum
=
0
...
@@ -141,16 +134,16 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
...
@@ -141,16 +134,16 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
num_trained_examples
=
acc_sum
=
loss_sum
=
0
num_trained_examples
=
acc_sum
=
loss_sum
=
0
if
global_step
%
config
.
save_ckpt_interval
==
0
:
if
global_step
%
config
.
save_ckpt_interval
==
0
:
model_saved_dir
=
os
.
path
.
join
(
model_saved_dir
=
os
.
path
.
join
(
config
.
checkpoint_dir
,
config
.
checkpoint_dir
,
"model_in_
step_%d"
%
global_step
)
"
step_%d"
%
global_step
)
fluid
.
io
.
save_persistables
(
exe
,
dirname
=
model_saved_dir
)
fluid
.
io
.
save_persistables
(
exe
,
dirname
=
model_saved_dir
)
# NOTE: current saved checkpoint machanism is not completed,
# NOTE: current saved checkpoint machanism is not completed,
# it can't restore dataset training status
# it can't restore dataset training status
save_checkpoint
(
save_checkpoint
(
checkpoint_
path
,
checkpoint_
dir
=
config
.
checkpoint_dir
,
las
t_epoch
=
epoch
,
curren
t_epoch
=
epoch
,
last
_step
=
global_step
,
global
_step
=
global_step
,
last_model_dir
=
model_saved_dir
)
exe
=
exe
)
if
do_eval
and
global_step
%
config
.
eval_interval
==
0
:
if
do_eval
and
global_step
%
config
.
eval_interval
==
0
:
eval_loss
,
eval_acc
,
eval_perf
=
evaluate
(
eval_loss
,
eval_acc
,
eval_perf
=
evaluate
(
...
@@ -176,10 +169,10 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
...
@@ -176,10 +169,10 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
# NOTE: current saved checkpoint machanism is not completed, it can't
# NOTE: current saved checkpoint machanism is not completed, it can't
# resotre dataset training status
# resotre dataset training status
save_checkpoint
(
save_checkpoint
(
checkpoint_
path
,
checkpoint_
dir
=
config
.
checkpoint_dir
,
las
t_epoch
=
num_epoch
+
1
,
curren
t_epoch
=
num_epoch
+
1
,
last
_step
=
global_step
,
global
_step
=
global_step
,
last_model_dir
=
model_saved_dir
)
exe
=
exe
)
if
do_eval
:
if
do_eval
:
evaluate
(
task
,
data_reader
,
feed_list
,
phase
=
"test"
,
config
=
config
)
evaluate
(
task
,
data_reader
,
feed_list
,
phase
=
"test"
,
config
=
config
)
...
...
paddle_hub/finetune/task.py
浏览文件 @
9c96edfe
...
@@ -43,3 +43,10 @@ class Task(object):
...
@@ -43,3 +43,10 @@ class Task(object):
def
inference_program
(
self
):
def
inference_program
(
self
):
return
self
.
_inference_program
return
self
.
_inference_program
def
metric_variable_names
(
self
):
metric_variable_names
=
[]
for
var_name
in
self
.
graph_var_dict
:
metric_variable_names
.
append
(
var_name
)
return
metric_variable_names
paddle_hub/module/module.py
浏览文件 @
9c96edfe
...
@@ -464,7 +464,7 @@ class Module(object):
...
@@ -464,7 +464,7 @@ class Module(object):
"max_seq_len({}) should be in the range of [1, {}]"
.
format
(
"max_seq_len({}) should be in the range of [1, {}]"
.
format
(
MAX_SEQ_LENGTH
))
MAX_SEQ_LENGTH
))
logger
.
info
(
logger
.
info
(
"
update
maximum sequence length of input tensor to {}"
.
format
(
"
Set
maximum sequence length of input tensor to {}"
.
format
(
max_seq_len
))
max_seq_len
))
for
tensor_name
in
[
for
tensor_name
in
[
"input_ids"
,
"position_ids"
,
"segment_ids"
,
"input_mask"
"input_ids"
,
"position_ids"
,
"segment_ids"
,
"input_mask"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录