Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
57dbe135
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
282
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
57dbe135
编写于
3月 27, 2019
作者:
Z
Zeyu Chen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add finetune.py and networks.py
上级
5b884607
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
253 addition
and
477 deletion
+253
-477
demo/bert-cls/finetune_with_hub.py
demo/bert-cls/finetune_with_hub.py
+2
-2
demo/bert-cls/reader/batching.py
demo/bert-cls/reader/batching.py
+0
-0
demo/bert-cls/reader/cls.py
demo/bert-cls/reader/cls.py
+1
-1
demo/bert-cls/reader/pretraining.py
demo/bert-cls/reader/pretraining.py
+0
-290
paddle_hub/__init__.py
paddle_hub/__init__.py
+4
-1
paddle_hub/finetune/finetune.py
paddle_hub/finetune/finetune.py
+165
-0
paddle_hub/finetune/network.py
paddle_hub/finetune/network.py
+81
-0
paddle_hub/finetune/task.py
paddle_hub/finetune/task.py
+0
-183
未找到文件。
demo/bert-cls/finetune_with_hub.py
浏览文件 @
57dbe135
...
...
@@ -101,10 +101,10 @@ def test_hub_api(args, config):
input_dict
[
"sent_ids"
].
name
,
input_dict
[
"input_mask"
].
name
,
label
.
name
]
task
=
hub
.
append_mlp_classifier
(
cls_
task
=
hub
.
append_mlp_classifier
(
pooled_output
,
label
,
num_classes
=
num_labels
)
hub
.
finetune_and_eval
(
task
,
feed_list
,
processor
,
config
)
hub
.
finetune_and_eval
(
cls_
task
,
feed_list
,
processor
,
config
)
if
__name__
==
'__main__'
:
...
...
demo/bert-cls/batching.py
→
demo/bert-cls/
reader/
batching.py
浏览文件 @
57dbe135
文件已移动
demo/bert-cls/reader/cls.py
浏览文件 @
57dbe135
...
...
@@ -17,7 +17,7 @@ import types
import
csv
import
numpy
as
np
import
tokenization
from
batching
import
prepare_batch_data
from
.
batching
import
prepare_batch_data
class
DataProcessor
(
object
):
...
...
demo/bert-cls/reader/pretraining.py
已删除
100644 → 0
浏览文件 @
5b884607
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
division
import
os
import
numpy
as
np
import
types
import
gzip
import
logging
import
re
import
six
import
collections
import
tokenization
import
paddle
import
paddle.fluid
as
fluid
from
batching
import
prepare_batch_data
class
DataReader
(
object
):
def
__init__
(
self
,
data_dir
,
vocab_path
,
batch_size
=
4096
,
in_tokens
=
True
,
max_seq_len
=
512
,
shuffle_files
=
True
,
epoch
=
100
,
voc_size
=
0
,
is_test
=
False
,
generate_neg_sample
=
False
):
self
.
vocab
=
self
.
load_vocab
(
vocab_path
)
self
.
data_dir
=
data_dir
self
.
batch_size
=
batch_size
self
.
in_tokens
=
in_tokens
self
.
shuffle_files
=
shuffle_files
self
.
epoch
=
epoch
self
.
current_epoch
=
0
self
.
current_file_index
=
0
self
.
total_file
=
0
self
.
current_file
=
None
self
.
voc_size
=
voc_size
self
.
max_seq_len
=
max_seq_len
self
.
pad_id
=
self
.
vocab
[
"[PAD]"
]
self
.
cls_id
=
self
.
vocab
[
"[CLS]"
]
self
.
sep_id
=
self
.
vocab
[
"[SEP]"
]
self
.
mask_id
=
self
.
vocab
[
"[MASK]"
]
self
.
is_test
=
is_test
self
.
generate_neg_sample
=
generate_neg_sample
if
self
.
in_tokens
:
assert
self
.
batch_size
>=
self
.
max_seq_len
,
"The number of "
\
"tokens in batch should not be smaller than max seq length."
if
self
.
is_test
:
self
.
epoch
=
1
self
.
shuffle_files
=
False
def
get_progress
(
self
):
"""return current progress of traning data
"""
return
self
.
current_epoch
,
self
.
current_file_index
,
self
.
total_file
,
self
.
current_file
def
parse_line
(
self
,
line
,
max_seq_len
=
512
):
""" parse one line to token_ids, sentence_ids, pos_ids, label
"""
line
=
line
.
strip
().
split
(
";"
)
assert
len
(
line
)
==
4
,
"One sample must have 4 fields!"
(
token_ids
,
sent_ids
,
pos_ids
,
label
)
=
line
token_ids
=
[
int
(
token
)
for
token
in
token_ids
.
split
(
" "
)]
sent_ids
=
[
int
(
token
)
for
token
in
sent_ids
.
split
(
" "
)]
pos_ids
=
[
int
(
token
)
for
token
in
pos_ids
.
split
(
" "
)]
assert
len
(
token_ids
)
==
len
(
sent_ids
)
==
len
(
pos_ids
),
"[Must be true]len(token_ids) == len(sent_ids) == len(pos_ids)"
label
=
int
(
label
)
if
len
(
token_ids
)
>
max_seq_len
:
return
None
return
[
token_ids
,
sent_ids
,
pos_ids
,
label
]
def
read_file
(
self
,
file
):
assert
file
.
endswith
(
'.gz'
),
"[ERROR] %s is not a gzip file"
%
file
file_path
=
self
.
data_dir
+
"/"
+
file
with
gzip
.
open
(
file_path
,
"rb"
)
as
f
:
for
line
in
f
:
parsed_line
=
self
.
parse_line
(
line
,
max_seq_len
=
self
.
max_seq_len
)
if
parsed_line
is
None
:
continue
yield
parsed_line
def
convert_to_unicode
(
self
,
text
):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
if
six
.
PY3
:
if
isinstance
(
text
,
str
):
return
text
elif
isinstance
(
text
,
bytes
):
return
text
.
decode
(
"utf-8"
,
"ignore"
)
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
elif
six
.
PY2
:
if
isinstance
(
text
,
str
):
return
text
.
decode
(
"utf-8"
,
"ignore"
)
elif
isinstance
(
text
,
unicode
):
return
text
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
else
:
raise
ValueError
(
"Not running on Python2 or Python 3?"
)
def
load_vocab
(
self
,
vocab_file
):
"""Loads a vocabulary file into a dictionary."""
vocab
=
collections
.
OrderedDict
()
fin
=
open
(
vocab_file
)
for
num
,
line
in
enumerate
(
fin
):
items
=
self
.
convert_to_unicode
(
line
.
strip
()).
split
(
"
\t
"
)
if
len
(
items
)
>
2
:
break
token
=
items
[
0
]
index
=
items
[
1
]
if
len
(
items
)
==
2
else
num
token
=
token
.
strip
()
vocab
[
token
]
=
int
(
index
)
return
vocab
def
random_pair_neg_samples
(
self
,
pos_samples
):
""" randomly generate negtive samples using pos_samples
Args:
pos_samples: list of positive samples
Returns:
neg_samples: list of negtive samples
"""
np
.
random
.
shuffle
(
pos_samples
)
num_sample
=
len
(
pos_samples
)
neg_samples
=
[]
miss_num
=
0
for
i
in
range
(
num_sample
):
pair_index
=
(
i
+
1
)
%
num_sample
origin_src_ids
=
pos_samples
[
i
][
0
]
origin_sep_index
=
origin_src_ids
.
index
(
2
)
pair_src_ids
=
pos_samples
[
pair_index
][
0
]
pair_sep_index
=
pair_src_ids
.
index
(
2
)
src_ids
=
origin_src_ids
[:
origin_sep_index
+
1
]
+
pair_src_ids
[
pair_sep_index
+
1
:]
if
len
(
src_ids
)
>=
self
.
max_seq_len
:
miss_num
+=
1
continue
sent_ids
=
[
0
]
*
len
(
origin_src_ids
[:
origin_sep_index
+
1
])
+
[
1
]
*
len
(
pair_src_ids
[
pair_sep_index
+
1
:])
pos_ids
=
list
(
range
(
len
(
src_ids
)))
neg_sample
=
[
src_ids
,
sent_ids
,
pos_ids
,
0
]
assert
len
(
src_ids
)
==
len
(
sent_ids
)
==
len
(
pos_ids
),
"[ERROR]len(src_id) == lne(sent_id) == len(pos_id) must be True"
neg_samples
.
append
(
neg_sample
)
return
neg_samples
,
miss_num
def
mixin_negtive_samples
(
self
,
pos_sample_generator
,
buffer
=
1000
):
""" 1. generate negtive samples by randomly group sentence_1 and sentence_2 of positive samples
2. combine negtive samples and positive samples
Args:
pos_sample_generator: a generator producing a parsed positive sample, which is a list: [token_ids, sent_ids, pos_ids, 1]
Returns:
sample: one sample from shuffled positive samples and negtive samples
"""
pos_samples
=
[]
num_total_miss
=
0
pos_sample_num
=
0
try
:
while
True
:
while
len
(
pos_samples
)
<
buffer
:
pos_sample
=
next
(
pos_sample_generator
)
label
=
pos_sample
[
3
]
assert
label
==
1
,
"positive sample's label must be 1"
pos_samples
.
append
(
pos_sample
)
pos_sample_num
+=
1
neg_samples
,
miss_num
=
self
.
random_pair_neg_samples
(
pos_samples
)
num_total_miss
+=
miss_num
samples
=
pos_samples
+
neg_samples
pos_samples
=
[]
np
.
random
.
shuffle
(
samples
)
for
sample
in
samples
:
yield
sample
except
StopIteration
:
print
(
"stopiteration: reach end of file"
)
if
len
(
pos_samples
)
==
1
:
yield
pos_samples
[
0
]
elif
len
(
pos_samples
)
==
0
:
yield
None
else
:
neg_samples
,
miss_num
=
self
.
random_pair_neg_samples
(
pos_samples
)
num_total_miss
+=
miss_num
samples
=
pos_samples
+
neg_samples
pos_samples
=
[]
np
.
random
.
shuffle
(
samples
)
for
sample
in
samples
:
yield
sample
print
(
"miss_num:%d
\t
ideal_total_sample_num:%d
\t
miss_rate:%f"
%
(
num_total_miss
,
pos_sample_num
*
2
,
num_total_miss
/
(
pos_sample_num
*
2
)))
def
data_generator
(
self
):
"""
data_generator
"""
files
=
os
.
listdir
(
self
.
data_dir
)
self
.
total_file
=
len
(
files
)
assert
self
.
total_file
>
0
,
"[Error] data_dir is empty"
def
wrapper
():
def
reader
():
for
epoch
in
range
(
self
.
epoch
):
self
.
current_epoch
=
epoch
+
1
if
self
.
shuffle_files
:
np
.
random
.
shuffle
(
files
)
for
index
,
file
in
enumerate
(
files
):
self
.
current_file_index
=
index
+
1
self
.
current_file
=
file
sample_generator
=
self
.
read_file
(
file
)
if
not
self
.
is_test
and
self
.
generate_neg_sample
:
sample_generator
=
self
.
mixin_negtive_samples
(
sample_generator
)
for
sample
in
sample_generator
:
if
sample
is
None
:
continue
yield
sample
def
batch_reader
(
reader
,
batch_size
,
in_tokens
):
batch
,
total_token_num
,
max_len
=
[],
0
,
0
for
parsed_line
in
reader
():
token_ids
,
sent_ids
,
pos_ids
,
label
=
parsed_line
max_len
=
max
(
max_len
,
len
(
token_ids
))
if
in_tokens
:
to_append
=
(
len
(
batch
)
+
1
)
*
max_len
<=
batch_size
else
:
to_append
=
len
(
batch
)
<
batch_size
if
to_append
:
batch
.
append
(
parsed_line
)
total_token_num
+=
len
(
token_ids
)
else
:
yield
batch
,
total_token_num
batch
,
total_token_num
,
max_len
=
[
parsed_line
],
len
(
token_ids
),
len
(
token_ids
)
if
len
(
batch
)
>
0
:
yield
batch
,
total_token_num
for
batch_data
,
total_token_num
in
batch_reader
(
reader
,
self
.
batch_size
,
self
.
in_tokens
):
yield
prepare_batch_data
(
batch_data
,
total_token_num
,
voc_size
=
self
.
voc_size
,
pad_id
=
self
.
pad_id
,
cls_id
=
self
.
cls_id
,
sep_id
=
self
.
sep_id
,
mask_id
=
self
.
mask_id
,
return_input_mask
=
True
,
return_max_len
=
False
,
return_num_token
=
False
)
return
wrapper
if
__name__
==
"__main__"
:
pass
paddle_hub/__init__.py
浏览文件 @
57dbe135
...
...
@@ -26,4 +26,7 @@ from .tools.logger import logger
from
.tools.paddle_helper
import
connect_program
from
.io.type
import
DataType
from
.hub_server
import
default_hub_server
from
.finetune.task
import
append_mlp_classifier
,
finetune_and_eval
from
.finetune.network
import
append_mlp_classifier
from
.finetune.finetune
import
finetune_and_eval
from
.finetune.config
import
FinetuneConfig
from
.finetune.task
import
Task
paddle_hub/finetune/finetune.py
0 → 100644
浏览文件 @
57dbe135
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
paddle.fluid
as
fluid
import
time
import
numpy
as
np
import
multiprocessing
from
paddle_hub.finetune.optimization
import
bert_optimization
from
paddle_hub.finetune.config
import
FinetuneConfig
def
finetune_and_eval
(
task
,
feed_list
,
data_processor
,
config
=
None
):
# environment setup
if
config
.
use_cuda
:
place
=
fluid
.
CUDAPlace
(
int
(
os
.
getenv
(
'FLAGS_selected_gpus'
,
'0'
)))
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
else
:
place
=
fluid
.
CPUPlace
()
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
# data generator
data_generator
=
{
'train'
:
data_processor
.
data_generator
(
batch_size
=
config
.
batch_size
,
phase
=
'train'
,
epoch
=
config
.
epoch
,
shuffle
=
False
),
'test'
:
data_processor
.
data_generator
(
batch_size
=
config
.
batch_size
,
phase
=
'test'
,
shuffle
=
False
),
'dev'
:
data_processor
.
data_generator
(
batch_size
=
config
.
batch_size
,
phase
=
'dev'
,
shuffle
=
False
)
}
# hub.finetune_and_eval start here
#TODO: to simplify
loss
=
task
.
variable
(
"loss"
)
probs
=
task
.
variable
(
"probs"
)
accuracy
=
task
.
variable
(
"accuracy"
)
num_example
=
task
.
variable
(
"num_example"
)
num_train_examples
=
data_processor
.
get_num_examples
(
phase
=
'train'
)
if
config
.
in_tokens
:
max_train_steps
=
config
.
epoch
*
num_train_examples
//
(
config
.
batch_size
//
config
.
max_seq_len
)
//
dev_count
else
:
max_train_steps
=
config
.
epoch
*
num_train_examples
//
config
.
batch_size
//
dev_count
warmup_steps
=
int
(
max_train_steps
*
config
.
warmup_proportion
)
# obtain main program from Task class
train_program
=
task
.
main_program
()
startup_program
=
task
.
startup_program
()
# clone test program before optimize
test_program
=
train_program
.
clone
(
for_test
=
True
)
bert_optimization
(
loss
,
warmup_steps
,
max_train_steps
,
config
.
learning_rate
,
train_program
,
config
.
weight_decay
)
# memory optimization
fluid
.
memory_optimize
(
input_program
=
train_program
,
skip_opt_set
=
[
# skip task graph variable memory optimization
loss
.
name
,
probs
.
name
,
accuracy
.
name
,
num_example
.
name
])
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
# Traning block
# prepare training dataset
train_data_generator
=
data_generator
[
'train'
]
total_loss
,
total_acc
,
total_num_example
=
[],
[],
[]
step
=
0
time_begin
=
time
.
time
()
train_time_used
=
0.0
for
example
in
train_data_generator
():
step
+=
1
train_time_begin
=
time
.
time
()
np_loss
,
np_acc
,
np_num_example
=
exe
.
run
(
program
=
train_program
,
feed
=
feeder
.
feed
([
example
]),
fetch_list
=
[
loss
,
accuracy
,
num_example
])
train_time_used
+=
time
.
time
()
-
train_time_begin
# Statistic Block
total_loss
.
extend
(
np_loss
*
np_num_example
)
total_acc
.
extend
(
np_acc
*
np_num_example
)
total_num_example
.
extend
(
np_num_example
)
if
step
%
config
.
stat_interval
==
0
:
# get training progress
accum_num_example
=
np
.
sum
(
total_num_example
)
print
(
"step {}: loss={:.5f} acc={:.5f} [step/sec: {:.2f}]"
.
format
(
step
,
np
.
sum
(
total_loss
)
/
accum_num_example
,
np
.
sum
(
total_acc
)
/
accum_num_example
,
config
.
stat_interval
/
train_time_used
))
# reset statistic variables
total_loss
,
total_acc
,
total_num_example
=
[],
[],
[]
train_time_used
=
0.0
# Evaluation block
if
step
%
config
.
eval_interval
==
0
:
evaluate
(
test_program
,
exe
,
data_generator
)
if
step
%
config
.
eval_interval
==
0
:
# Final Test Block
total_loss
,
total_acc
,
total_num_example
=
[],
[],
[]
test_data_generator
=
data_generator
[
'test'
]
for
example
in
test_data_generator
():
np_loss
,
np_acc
,
np_num_example
=
exe
.
run
(
program
=
test_program
,
feed
=
feeder
.
feed
([
example
]),
fetch_list
=
[
loss
,
accuracy
,
num_example
])
total_loss
.
extend
(
np_loss
*
np_num_example
)
total_acc
.
extend
(
np_acc
*
np_num_example
)
total_num_example
.
extend
(
np_num_example
)
accum_num_example
=
np
.
sum
(
total_num_example
)
print
(
"[Final Test] loss={:.5f} acc={:.5f}"
.
format
(
np
.
sum
(
total_loss
)
/
accum_num_example
,
np
.
sum
(
total_acc
)
/
accum_num_example
))
def
evaluate
(
test_program
,
exe
,
feeder
,
data_generator
):
print
(
"Evaluation start"
)
total_loss
,
total_acc
,
total_num_example
=
[],
[],
[]
dev_data_generator
=
data_generator
[
'dev'
]
eval_step
=
0
eval_time_begin
=
time
.
time
()
for
example
in
dev_data_generator
():
eval_step
+=
1
np_loss
,
np_acc
,
np_num_example
=
exe
.
run
(
program
=
test_program
,
feed
=
feeder
.
feed
([
example
]),
fetch_list
=
[
loss
,
accuracy
,
num_example
])
total_loss
.
extend
(
np_loss
*
np_num_example
)
total_acc
.
extend
(
np_acc
*
np_num_example
)
total_num_example
.
extend
(
np_num_example
)
eval_time_used
=
time
.
time
()
-
eval_time_begin
accum_num_example
=
np
.
sum
(
total_num_example
)
print
(
"[Evaluation] loss={:.5f} acc={:.5f} [step/sec: {:.2f}]"
.
format
(
np
.
sum
(
total_loss
)
/
accum_num_example
,
np
.
sum
(
total_acc
)
/
accum_num_example
,
eval_step
/
eval_time_used
))
paddle_hub/finetune/network.py
0 → 100644
浏览文件 @
57dbe135
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
collections
import
paddle.fluid
as
fluid
import
time
import
numpy
as
np
import
multiprocessing
from
paddle_hub.finetune.optimization
import
bert_optimization
from
.task
import
Task
__all__
=
[
'append_mlp_classifier'
]
def
append_mlp_classifier
(
feature
,
label
,
num_classes
=
2
,
hidden_units
=
None
):
"""
Append a multi-layer perceptron classifier for binary classification base
on input feature
"""
cls_feats
=
fluid
.
layers
.
dropout
(
x
=
feature
,
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
# append fully connected layer according to hidden_units
if
hidden_units
is
not
None
:
for
n_hidden
in
hidden_units
:
cls_feats
=
fluid
.
layers
.
fc
(
input
=
cls_feats
,
size
=
n_hidden
)
logits
=
fluid
.
layers
.
fc
(
input
=
cls_feats
,
size
=
num_classes
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"cls_out_w"
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
scale
=
0.02
)),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"cls_out_b"
,
initializer
=
fluid
.
initializer
.
Constant
(
0.
)))
ce_loss
,
probs
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
logits
,
label
=
label
,
return_softmax
=
True
)
loss
=
fluid
.
layers
.
mean
(
x
=
ce_loss
)
num_example
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
probs
,
label
=
label
,
total
=
num_example
)
# TODO: encapsulate to Task
graph_var_dict
=
{
"loss"
:
loss
,
"probs"
:
probs
,
"accuracy"
:
accuracy
,
"num_example"
:
num_example
}
task
=
Task
(
"text_classification"
,
graph_var_dict
,
fluid
.
default_main_program
(),
fluid
.
default_startup_program
())
return
task
def
append_mlp_multi_classifier
(
feature
,
label
,
num_classes
,
hidden_units
=
None
,
act
=
None
):
pass
def
append_sequence_labler
(
feature
,
label
):
pass
paddle_hub/finetune/task.py
浏览文件 @
57dbe135
...
...
@@ -19,192 +19,9 @@ import time
import
numpy
as
np
import
multiprocessing
from
paddle_hub.tools.logger
import
logger
from
paddle_hub.finetune.optimization
import
bert_optimization
from
paddle_hub.finetune.config
import
FinetuneConfig
__all__
=
[
'append_mlp_classifier'
]
def
append_mlp_classifier
(
feature
,
label
,
num_classes
=
2
,
hidden_units
=
None
):
cls_feats
=
fluid
.
layers
.
dropout
(
x
=
feature
,
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
# append fully connected layer according to hidden_units
if
hidden_units
!=
None
:
for
n_hidden
in
hidden_units
:
cls_feats
=
fluid
.
layers
.
fc
(
input
=
cls_feats
,
size
=
n_hidden
)
logits
=
fluid
.
layers
.
fc
(
input
=
cls_feats
,
size
=
num_classes
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"cls_out_w"
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
scale
=
0.02
)),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"cls_out_b"
,
initializer
=
fluid
.
initializer
.
Constant
(
0.
)))
ce_loss
,
probs
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
logits
,
label
=
label
,
return_softmax
=
True
)
loss
=
fluid
.
layers
.
mean
(
x
=
ce_loss
)
num_example
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
probs
,
label
=
label
,
total
=
num_example
)
# TODO: encapsulate to Task
graph_var_dict
=
{
"loss"
:
loss
,
"probs"
:
probs
,
"accuracy"
:
accuracy
,
"num_example"
:
num_example
}
task
=
Task
(
"text_classification"
,
graph_var_dict
,
fluid
.
default_main_program
(),
fluid
.
default_startup_program
())
return
task
def
finetune_and_eval
(
task
,
feed_list
,
data_processor
,
config
=
None
):
if
config
.
use_cuda
:
place
=
fluid
.
CUDAPlace
(
int
(
os
.
getenv
(
'FLAGS_selected_gpus'
,
'0'
)))
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
else
:
place
=
fluid
.
CPUPlace
()
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
# data generator
data_generator
=
{
'train'
:
data_processor
.
data_generator
(
batch_size
=
config
.
batch_size
,
phase
=
'train'
,
epoch
=
config
.
epoch
,
shuffle
=
False
),
'test'
:
data_processor
.
data_generator
(
batch_size
=
config
.
batch_size
,
phase
=
'test'
,
shuffle
=
False
),
'dev'
:
data_processor
.
data_generator
(
batch_size
=
config
.
batch_size
,
phase
=
'dev'
,
shuffle
=
False
)
}
# hub.finetune_and_eval start here
#TODO: to simplify
loss
=
task
.
variable
(
"loss"
)
probs
=
task
.
variable
(
"probs"
)
accuracy
=
task
.
variable
(
"accuracy"
)
num_example
=
task
.
variable
(
"num_example"
)
num_train_examples
=
data_processor
.
get_num_examples
(
phase
=
'train'
)
if
config
.
in_tokens
:
max_train_steps
=
config
.
epoch
*
num_train_examples
//
(
config
.
batch_size
//
config
.
max_seq_len
)
//
dev_count
else
:
max_train_steps
=
config
.
epoch
*
num_train_examples
//
config
.
batch_size
//
dev_count
warmup_steps
=
int
(
max_train_steps
*
config
.
warmup_proportion
)
# obtain main program from Task class
train_program
=
task
.
main_program
()
startup_program
=
task
.
startup_program
()
# clone test program before optimize
test_program
=
train_program
.
clone
(
for_test
=
True
)
bert_optimization
(
loss
,
warmup_steps
,
max_train_steps
,
config
.
learning_rate
,
train_program
,
config
.
weight_decay
)
# memory optimization
fluid
.
memory_optimize
(
input_program
=
train_program
,
skip_opt_set
=
[
# skip task graph variable memory optimization
loss
.
name
,
probs
.
name
,
accuracy
.
name
,
num_example
.
name
])
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
# Traning block
# prepare training dataset
train_data_generator
=
data_generator
[
'train'
]
total_loss
,
total_acc
,
total_num_example
=
[],
[],
[]
step
=
0
time_begin
=
time
.
time
()
train_time_used
=
0.0
for
example
in
train_data_generator
():
step
+=
1
train_time_begin
=
time
.
time
()
np_loss
,
np_acc
,
np_num_example
=
exe
.
run
(
program
=
train_program
,
feed
=
feeder
.
feed
([
example
]),
fetch_list
=
[
loss
,
accuracy
,
num_example
])
train_time_used
+=
time
.
time
()
-
train_time_begin
# Statistic Block
total_loss
.
extend
(
np_loss
*
np_num_example
)
total_acc
.
extend
(
np_acc
*
np_num_example
)
total_num_example
.
extend
(
np_num_example
)
if
step
%
config
.
stat_interval
==
0
:
# get training progress
accum_num_example
=
np
.
sum
(
total_num_example
)
print
(
"step {}: loss={:.5f} acc={:.5f} [step/sec: {:.2f}]"
.
format
(
step
,
np
.
sum
(
total_loss
)
/
accum_num_example
,
np
.
sum
(
total_acc
)
/
accum_num_example
,
config
.
stat_interval
/
train_time_used
))
# reset statistic variables
total_loss
,
total_acc
,
total_num_example
=
[],
[],
[]
train_time_used
=
0.0
# Evaluation block
if
step
%
config
.
eval_interval
==
0
:
print
(
"Evaluation start"
)
total_loss
,
total_acc
,
total_num_example
=
[],
[],
[]
dev_data_generator
=
data_generator
[
'dev'
]
eval_step
=
0
eval_time_begin
=
time
.
time
()
for
example
in
dev_data_generator
():
eval_step
+=
1
np_loss
,
np_acc
,
np_num_example
=
exe
.
run
(
program
=
test_program
,
feed
=
feeder
.
feed
([
example
]),
fetch_list
=
[
loss
,
accuracy
,
num_example
])
total_loss
.
extend
(
np_loss
*
np_num_example
)
total_acc
.
extend
(
np_acc
*
np_num_example
)
total_num_example
.
extend
(
np_num_example
)
eval_time_used
=
time
.
time
()
-
eval_time_begin
accum_num_example
=
np
.
sum
(
total_num_example
)
print
(
"[Evaluation] loss={:.5f} acc={:.5f} [step/sec: {:.2f}]"
.
format
(
np
.
sum
(
total_loss
)
/
accum_num_example
,
np
.
sum
(
total_acc
)
/
accum_num_example
,
eval_step
/
eval_time_used
))
if
step
%
config
.
eval_interval
==
0
:
# Final Test Block
total_loss
,
total_acc
,
total_num_example
=
[],
[],
[]
test_data_generator
=
data_generator
[
'test'
]
for
example
in
test_data_generator
():
np_loss
,
np_acc
,
np_num_example
=
exe
.
run
(
program
=
test_program
,
feed
=
feeder
.
feed
([
example
]),
fetch_list
=
[
loss
,
accuracy
,
num_example
])
total_loss
.
extend
(
np_loss
*
np_num_example
)
total_acc
.
extend
(
np_acc
*
np_num_example
)
total_num_example
.
extend
(
np_num_example
)
accum_num_example
=
np
.
sum
(
total_num_example
)
print
(
"[Final Test] loss={:.5f} acc={:.5f}"
.
format
(
np
.
sum
(
total_loss
)
/
accum_num_example
,
np
.
sum
(
total_acc
)
/
accum_num_example
))
class
Task
(
object
):
def
__init__
(
self
,
task_type
,
graph_var_dict
,
main_program
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录