Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleSlim
提交
51044022
P
PaddleSlim
项目概览
PaddlePaddle
/
PaddleSlim
大约 2 年 前同步成功
通知
51
Star
1434
Fork
344
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
53
列表
看板
标记
里程碑
合并请求
16
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleSlim
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
53
Issue
53
列表
看板
标记
里程碑
合并请求
16
合并请求
16
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
51044022
编写于
4月 16, 2020
作者:
W
wanghaoshuang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
1. Add number of layer into search space.
2. Add FLOPs and model size into loss
上级
0fa4b3c4
变更
5
展开全部
隐藏空白更改
内联
并排
Showing
5 changed file
with
88 addition
and
1242 deletion
+88
-1242
paddleslim/nas/darts/search_space/conv_bert/cls.py
paddleslim/nas/darts/search_space/conv_bert/cls.py
+26
-8
paddleslim/nas/darts/search_space/conv_bert/model/bert.py
paddleslim/nas/darts/search_space/conv_bert/model/bert.py
+9
-3
paddleslim/nas/darts/search_space/conv_bert/model/transformer_encoder.py
...darts/search_space/conv_bert/model/transformer_encoder.py
+53
-7
paddleslim/teachers/bert/reader/pretraining.py
paddleslim/teachers/bert/reader/pretraining.py
+0
-289
paddleslim/teachers/bert/reader/squad.py
paddleslim/teachers/bert/reader/squad.py
+0
-935
未找到文件。
paddleslim/nas/darts/search_space/conv_bert/cls.py
浏览文件 @
51044022
...
@@ -83,23 +83,34 @@ class AdaBERTClassifier(Layer):
...
@@ -83,23 +83,34 @@ class AdaBERTClassifier(Layer):
sentence_ids
=
data_ids
[
2
]
sentence_ids
=
data_ids
[
2
]
input_mask
=
data_ids
[
3
]
input_mask
=
data_ids
[
3
]
labels
=
data_ids
[
4
]
labels
=
data_ids
[
4
]
enc_outputs
,
next_sent_feats
=
self
.
student
(
src_ids
,
position_ids
,
flops
=
[]
sentence_ids
)
model_size
=
[]
enc_outputs
,
next_sent_feats
,
k_i
=
self
.
student
(
src_ids
,
position_ids
,
sentence_ids
,
flops
=
flops
,
model_size
=
model_size
)
self
.
teacher
.
eval
()
self
.
teacher
.
eval
()
total_loss
,
logits
,
losses
,
accuracys
,
num_seqs
=
self
.
teacher
(
total_loss
,
t_logits
,
t_
losses
,
accuracys
,
num_seqs
=
self
.
teacher
(
data_ids
)
data_ids
)
# define kd loss
kd_losses
=
[]
kd_losses
=
[]
for
t_logits
,
t_loss
,
s_sent_feat
,
fc
in
zip
(
for
i
in
range
(
len
(
next_sent_feats
)):
logits
,
losses
,
next_sent_feats
,
self
.
cls_fc
):
j
=
np
.
ceil
(
i
*
(
len
(
next_sent_feats
)
/
len
(
logits
)))
t_logit
=
t_logits
[
j
]
t_loss
=
t_losses
[
j
]
s_sent_feat
=
next_sent_feats
[
i
]
fc
=
self
.
cls_fc
[
i
]
s_sent_feat
=
fluid
.
layers
.
dropout
(
s_sent_feat
=
fluid
.
layers
.
dropout
(
x
=
s_sent_feat
,
x
=
s_sent_feat
,
dropout_prob
=
0.1
,
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
dropout_implementation
=
"upscale_in_train"
)
s_logits
=
fc
(
s_sent_feat
)
s_logits
=
fc
(
s_sent_feat
)
t_probs
=
fluid
.
layers
.
softmax
(
t_logit
s
)
t_probs
=
fluid
.
layers
.
softmax
(
t_logit
)
s_probs
=
fluid
.
layers
.
softmax
(
s_logits
)
s_probs
=
fluid
.
layers
.
softmax
(
s_logits
)
t_probs
.
stop_gradient
=
False
t_probs
.
stop_gradient
=
False
kd_loss
=
t_probs
*
fluid
.
layers
.
log
(
s_probs
/
T
)
kd_loss
=
t_probs
*
fluid
.
layers
.
log
(
s_probs
/
T
)
...
@@ -110,9 +121,16 @@ class AdaBERTClassifier(Layer):
...
@@ -110,9 +121,16 @@ class AdaBERTClassifier(Layer):
kd_loss
=
fluid
.
layers
.
sum
(
kd_losses
)
kd_loss
=
fluid
.
layers
.
sum
(
kd_losses
)
# define ce loss
ce_loss
=
fluid
.
layers
.
cross_entropy
(
s_probs
,
labels
)
ce_loss
=
fluid
.
layers
.
cross_entropy
(
s_probs
,
labels
)
ce_loss
=
fluid
.
layers
.
mean
(
x
=
ce_loss
)
ce_loss
=
fluid
.
layers
.
mean
(
x
=
ce_loss
)
*
k_i
e_loss
=
1
# to be done
# define e loss
model_size
=
fluid
.
layers
.
sum
(
model_size
)
flops
=
fluid
.
layers
.
sum
(
flops
)
e_loss
=
(
len
(
next_sent_feats
)
*
k_i
/
self
.
_n_layer
)
*
(
flops
+
model_size
)
# define total loss
loss
=
(
1
-
gamma
)
*
ce_loss
-
gamma
*
kd_loss
+
beta
*
e_loss
loss
=
(
1
-
gamma
)
*
ce_loss
-
gamma
*
kd_loss
+
beta
*
e_loss
return
loss
return
loss
paddleslim/nas/darts/search_space/conv_bert/model/bert.py
浏览文件 @
51044022
...
@@ -85,7 +85,12 @@ class BertModelLayer(Layer):
...
@@ -85,7 +85,12 @@ class BertModelLayer(Layer):
def
arch_parameters
(
self
):
def
arch_parameters
(
self
):
return
[
self
.
_encoder
.
alphas
]
return
[
self
.
_encoder
.
alphas
]
def
forward
(
self
,
src_ids
,
position_ids
,
sentence_ids
):
def
forward
(
self
,
src_ids
,
position_ids
,
sentence_ids
,
flops
=
[],
model_size
=
[]):
"""
"""
forward
forward
"""
"""
...
@@ -96,7 +101,8 @@ class BertModelLayer(Layer):
...
@@ -96,7 +101,8 @@ class BertModelLayer(Layer):
emb_out
=
src_emb
+
pos_emb
emb_out
=
src_emb
+
pos_emb
emb_out
=
emb_out
+
sent_emb
emb_out
=
emb_out
+
sent_emb
enc_outputs
=
self
.
_encoder
(
emb_out
)
enc_outputs
,
k_i
=
self
.
_encoder
(
emb_out
,
flops
=
flops
,
model_size
=
model_size
)
if
not
self
.
return_pooled_out
:
if
not
self
.
return_pooled_out
:
return
enc_outputs
return
enc_outputs
...
@@ -109,4 +115,4 @@ class BertModelLayer(Layer):
...
@@ -109,4 +115,4 @@ class BertModelLayer(Layer):
next_sent_feat
,
shape
=
[
-
1
,
self
.
_emb_size
])
next_sent_feat
,
shape
=
[
-
1
,
self
.
_emb_size
])
next_sent_feats
.
append
(
next_sent_feat
)
next_sent_feats
.
append
(
next_sent_feat
)
return
enc_outputs
,
next_sent_feats
return
enc_outputs
,
next_sent_feats
,
k_i
paddleslim/nas/darts/search_space/conv_bert/model/transformer_encoder.py
浏览文件 @
51044022
...
@@ -29,6 +29,33 @@ PRIMITIVES = [
...
@@ -29,6 +29,33 @@ PRIMITIVES = [
'dil_conv_7'
,
'avg_pool_3'
,
'max_pool_3'
,
'none'
,
'skip_connect'
'dil_conv_7'
,
'avg_pool_3'
,
'max_pool_3'
,
'none'
,
'skip_connect'
]
]
input_size
=
128
*
768
FLOPs
=
{
'std_conv_3'
:
input_size
*
3
*
1
,
'std_conv_5'
:
input_size
*
5
*
1
,
'std_conv_7'
:
input_size
*
7
*
1
,
'dil_conv_3'
:
input_size
*
3
*
1
,
'dil_conv_5'
:
input_size
*
5
*
1
,
'dil_conv_7'
:
input_size
*
7
*
1
,
'avg_pool_3'
:
input_size
*
3
*
1
,
'max_pool_3'
:
input_size
*
3
*
1
,
'none'
:
0
,
'skip_connect'
:
0
,
}
ModelSize
=
{
'std_conv_3'
:
3
*
1
,
'std_conv_5'
:
5
*
1
,
'std_conv_7'
:
7
*
1
,
'dil_conv_3'
:
3
*
1
,
'dil_conv_5'
:
5
*
1
,
'dil_conv_7'
:
7
*
1
,
'avg_pool_3'
:
0
,
'max_pool_3'
:
0
,
'none'
:
0
,
'skip_connect'
:
0
,
}
OPS
=
{
OPS
=
{
'std_conv_3'
:
lambda
:
ConvBN
(
1
,
1
,
filter_size
=
3
,
dilation
=
1
),
'std_conv_3'
:
lambda
:
ConvBN
(
1
,
1
,
filter_size
=
3
,
dilation
=
1
),
...
@@ -50,9 +77,11 @@ class MixedOp(fluid.dygraph.Layer):
...
@@ -50,9 +77,11 @@ class MixedOp(fluid.dygraph.Layer):
ops
=
[
OPS
[
primitive
]()
for
primitive
in
PRIMITIVES
]
ops
=
[
OPS
[
primitive
]()
for
primitive
in
PRIMITIVES
]
self
.
_ops
=
fluid
.
dygraph
.
LayerList
(
ops
)
self
.
_ops
=
fluid
.
dygraph
.
LayerList
(
ops
)
def
forward
(
self
,
x
,
weights
):
def
forward
(
self
,
x
,
weights
,
flops
=
[],
model_size
=
[]
):
for
i
in
range
(
len
(
self
.
_ops
)):
for
i
in
range
(
len
(
self
.
_ops
)):
if
weights
[
i
]
!=
0
:
if
weights
[
i
]
!=
0
:
flops
.
append
(
FLOPs
.
values
()[
i
]
*
weights
[
i
])
model_size
.
append
(
ModelSize
.
values
()[
i
]
*
weights
[
i
])
return
self
.
_ops
[
i
](
x
)
*
weights
[
i
]
return
self
.
_ops
[
i
](
x
)
*
weights
[
i
]
...
@@ -132,13 +161,16 @@ class Cell(fluid.dygraph.Layer):
...
@@ -132,13 +161,16 @@ class Cell(fluid.dygraph.Layer):
ops
.
append
(
op
)
ops
.
append
(
op
)
self
.
_ops
=
fluid
.
dygraph
.
LayerList
(
ops
)
self
.
_ops
=
fluid
.
dygraph
.
LayerList
(
ops
)
def
forward
(
self
,
s0
,
s1
,
weights
,
weights2
=
None
):
def
forward
(
self
,
s0
,
s1
,
weights
,
weights2
=
None
,
flops
=
[],
model_size
=
[]
):
states
=
[
s0
,
s1
]
states
=
[
s0
,
s1
]
offset
=
0
offset
=
0
for
i
in
range
(
self
.
_steps
):
for
i
in
range
(
self
.
_steps
):
s
=
fluid
.
layers
.
sums
([
s
=
fluid
.
layers
.
sums
([
self
.
_ops
[
offset
+
j
](
h
,
weights
[
offset
+
j
])
self
.
_ops
[
offset
+
j
](
h
,
weights
[
offset
+
j
],
flops
=
flops
,
model_size
=
model_size
)
for
j
,
h
in
enumerate
(
states
)
for
j
,
h
in
enumerate
(
states
)
])
])
offset
+=
len
(
states
)
offset
+=
len
(
states
)
...
@@ -173,7 +205,13 @@ class EncoderLayer(Layer):
...
@@ -173,7 +205,13 @@ class EncoderLayer(Layer):
default_initializer
=
NormalInitializer
(
default_initializer
=
NormalInitializer
(
loc
=
0.0
,
scale
=
1e-3
))
loc
=
0.0
,
scale
=
1e-3
))
def
forward
(
self
,
enc_input
):
self
.
k
=
fluid
.
layers
.
create_parameter
(
shape
=
[
1
,
self
.
_n_layer
],
dtype
=
"float32"
,
default_initializer
=
NormalInitializer
(
loc
=
0.0
,
scale
=
1e-3
))
def
forward
(
self
,
enc_input
,
flops
=
[],
model_size
=
[]):
"""
"""
forward
forward
:param enc_input:
:param enc_input:
...
@@ -184,12 +222,20 @@ class EncoderLayer(Layer):
...
@@ -184,12 +222,20 @@ class EncoderLayer(Layer):
[
-
1
,
1
,
enc_input
.
shape
[
1
],
self
.
_d_model
])
[
-
1
,
1
,
enc_input
.
shape
[
1
],
self
.
_d_model
])
alphas
=
gumbel_softmax
(
self
.
alphas
)
alphas
=
gumbel_softmax
(
self
.
alphas
)
k
=
gumbel_softmax
(
self
.
k
)
outputs
=
[]
outputs
=
[]
s0
=
s1
=
tmp
s0
=
s1
=
tmp
for
i
,
cell
in
enumerate
(
self
.
_cells
):
for
i
in
range
(
self
.
_n_layer
):
s0
,
s1
=
s1
,
cell
(
s0
,
s1
,
alphas
)
s0
,
s1
=
s1
,
self
.
_cells
[
i
](
s0
,
s1
,
alphas
,
flops
=
flops
,
model_size
=
model_size
)
enc_output
=
fluid
.
layers
.
reshape
(
enc_output
=
fluid
.
layers
.
reshape
(
s1
,
[
-
1
,
enc_input
.
shape
[
1
],
self
.
_d_model
])
s1
,
[
-
1
,
enc_input
.
shape
[
1
],
self
.
_d_model
])
outputs
.
append
(
enc_output
)
outputs
.
append
(
enc_output
)
return
outputs
if
k
[
i
]
!=
0
:
outputs
[
-
1
]
=
outputs
[
-
1
]
*
k
[
i
]
break
return
outputs
,
k
[
i
]
paddleslim/teachers/bert/reader/pretraining.py
已删除
100644 → 0
浏览文件 @
0fa4b3c4
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
division
import
os
import
numpy
as
np
import
types
import
gzip
import
logging
import
re
import
six
import
collections
import
tokenization
import
paddle
import
paddle.fluid
as
fluid
from
batching
import
prepare_batch_data
class
DataReader
(
object
):
def
__init__
(
self
,
data_dir
,
vocab_path
,
batch_size
=
4096
,
in_tokens
=
True
,
max_seq_len
=
512
,
shuffle_files
=
True
,
epoch
=
100
,
voc_size
=
0
,
is_test
=
False
,
generate_neg_sample
=
False
):
self
.
vocab
=
self
.
load_vocab
(
vocab_path
)
self
.
data_dir
=
data_dir
self
.
batch_size
=
batch_size
self
.
in_tokens
=
in_tokens
self
.
shuffle_files
=
shuffle_files
self
.
epoch
=
epoch
self
.
current_epoch
=
0
self
.
current_file_index
=
0
self
.
total_file
=
0
self
.
current_file
=
None
self
.
voc_size
=
voc_size
self
.
max_seq_len
=
max_seq_len
self
.
pad_id
=
self
.
vocab
[
"[PAD]"
]
self
.
cls_id
=
self
.
vocab
[
"[CLS]"
]
self
.
sep_id
=
self
.
vocab
[
"[SEP]"
]
self
.
mask_id
=
self
.
vocab
[
"[MASK]"
]
self
.
is_test
=
is_test
self
.
generate_neg_sample
=
generate_neg_sample
if
self
.
in_tokens
:
assert
self
.
batch_size
>=
self
.
max_seq_len
,
"The number of "
\
"tokens in batch should not be smaller than max seq length."
if
self
.
is_test
:
self
.
epoch
=
1
self
.
shuffle_files
=
False
def
get_progress
(
self
):
"""return current progress of traning data
"""
return
self
.
current_epoch
,
self
.
current_file_index
,
self
.
total_file
,
self
.
current_file
def
parse_line
(
self
,
line
,
max_seq_len
=
512
):
""" parse one line to token_ids, sentence_ids, pos_ids, label
"""
line
=
line
.
strip
().
decode
().
split
(
";"
)
assert
len
(
line
)
==
4
,
"One sample must have 4 fields!"
(
token_ids
,
sent_ids
,
pos_ids
,
label
)
=
line
token_ids
=
[
int
(
token
)
for
token
in
token_ids
.
split
(
" "
)]
sent_ids
=
[
int
(
token
)
for
token
in
sent_ids
.
split
(
" "
)]
pos_ids
=
[
int
(
token
)
for
token
in
pos_ids
.
split
(
" "
)]
assert
len
(
token_ids
)
==
len
(
sent_ids
)
==
len
(
pos_ids
),
"[Must be true]len(token_ids) == len(sent_ids) == len(pos_ids)"
label
=
int
(
label
)
if
len
(
token_ids
)
>
max_seq_len
:
return
None
return
[
token_ids
,
sent_ids
,
pos_ids
,
label
]
def
read_file
(
self
,
file
):
assert
file
.
endswith
(
'.gz'
),
"[ERROR] %s is not a gzip file"
%
file
file_path
=
self
.
data_dir
+
"/"
+
file
with
gzip
.
open
(
file_path
,
"rb"
)
as
f
:
for
line
in
f
:
parsed_line
=
self
.
parse_line
(
line
,
max_seq_len
=
self
.
max_seq_len
)
if
parsed_line
is
None
:
continue
yield
parsed_line
def
convert_to_unicode
(
self
,
text
):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
if
six
.
PY3
:
if
isinstance
(
text
,
str
):
return
text
elif
isinstance
(
text
,
bytes
):
return
text
.
decode
(
"utf-8"
,
"ignore"
)
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
elif
six
.
PY2
:
if
isinstance
(
text
,
str
):
return
text
.
decode
(
"utf-8"
,
"ignore"
)
elif
isinstance
(
text
,
unicode
):
return
text
else
:
raise
ValueError
(
"Unsupported string type: %s"
%
(
type
(
text
)))
else
:
raise
ValueError
(
"Not running on Python2 or Python 3?"
)
def
load_vocab
(
self
,
vocab_file
):
"""Loads a vocabulary file into a dictionary."""
vocab
=
collections
.
OrderedDict
()
fin
=
open
(
vocab_file
)
for
num
,
line
in
enumerate
(
fin
):
items
=
self
.
convert_to_unicode
(
line
.
strip
()).
split
(
"
\t
"
)
if
len
(
items
)
>
2
:
break
token
=
items
[
0
]
index
=
items
[
1
]
if
len
(
items
)
==
2
else
num
token
=
token
.
strip
()
vocab
[
token
]
=
int
(
index
)
return
vocab
def
random_pair_neg_samples
(
self
,
pos_samples
):
""" randomly generate negtive samples using pos_samples
Args:
pos_samples: list of positive samples
Returns:
neg_samples: list of negtive samples
"""
np
.
random
.
shuffle
(
pos_samples
)
num_sample
=
len
(
pos_samples
)
neg_samples
=
[]
miss_num
=
0
for
i
in
range
(
num_sample
):
pair_index
=
(
i
+
1
)
%
num_sample
origin_src_ids
=
pos_samples
[
i
][
0
]
origin_sep_index
=
origin_src_ids
.
index
(
2
)
pair_src_ids
=
pos_samples
[
pair_index
][
0
]
pair_sep_index
=
pair_src_ids
.
index
(
2
)
src_ids
=
origin_src_ids
[:
origin_sep_index
+
1
]
+
pair_src_ids
[
pair_sep_index
+
1
:]
if
len
(
src_ids
)
>=
self
.
max_seq_len
:
miss_num
+=
1
continue
sent_ids
=
[
0
]
*
len
(
origin_src_ids
[:
origin_sep_index
+
1
])
+
[
1
]
*
len
(
pair_src_ids
[
pair_sep_index
+
1
:])
pos_ids
=
list
(
range
(
len
(
src_ids
)))
neg_sample
=
[
src_ids
,
sent_ids
,
pos_ids
,
0
]
assert
len
(
src_ids
)
==
len
(
sent_ids
)
==
len
(
pos_ids
),
"[ERROR]len(src_id) == lne(sent_id) == len(pos_id) must be True"
neg_samples
.
append
(
neg_sample
)
return
neg_samples
,
miss_num
def
mixin_negtive_samples
(
self
,
pos_sample_generator
,
buffer
=
1000
):
""" 1. generate negtive samples by randomly group sentence_1 and sentence_2 of positive samples
2. combine negtive samples and positive samples
Args:
pos_sample_generator: a generator producing a parsed positive sample, which is a list: [token_ids, sent_ids, pos_ids, 1]
Returns:
sample: one sample from shuffled positive samples and negtive samples
"""
pos_samples
=
[]
num_total_miss
=
0
pos_sample_num
=
0
try
:
while
True
:
while
len
(
pos_samples
)
<
buffer
:
pos_sample
=
next
(
pos_sample_generator
)
label
=
pos_sample
[
3
]
assert
label
==
1
,
"positive sample's label must be 1"
pos_samples
.
append
(
pos_sample
)
pos_sample_num
+=
1
neg_samples
,
miss_num
=
self
.
random_pair_neg_samples
(
pos_samples
)
num_total_miss
+=
miss_num
samples
=
pos_samples
+
neg_samples
pos_samples
=
[]
np
.
random
.
shuffle
(
samples
)
for
sample
in
samples
:
yield
sample
except
StopIteration
:
print
(
"stopiteration: reach end of file"
)
if
len
(
pos_samples
)
==
1
:
yield
pos_samples
[
0
]
elif
len
(
pos_samples
)
==
0
:
yield
None
else
:
neg_samples
,
miss_num
=
self
.
random_pair_neg_samples
(
pos_samples
)
num_total_miss
+=
miss_num
samples
=
pos_samples
+
neg_samples
pos_samples
=
[]
np
.
random
.
shuffle
(
samples
)
for
sample
in
samples
:
yield
sample
print
(
"miss_num:%d
\t
ideal_total_sample_num:%d
\t
miss_rate:%f"
%
(
num_total_miss
,
pos_sample_num
*
2
,
num_total_miss
/
(
pos_sample_num
*
2
)))
def
data_generator
(
self
):
"""
data_generator
"""
files
=
os
.
listdir
(
self
.
data_dir
)
self
.
total_file
=
len
(
files
)
assert
self
.
total_file
>
0
,
"[Error] data_dir is empty"
def
wrapper
():
def
reader
():
for
epoch
in
range
(
self
.
epoch
):
self
.
current_epoch
=
epoch
+
1
if
self
.
shuffle_files
:
np
.
random
.
shuffle
(
files
)
for
index
,
file
in
enumerate
(
files
):
self
.
current_file_index
=
index
+
1
self
.
current_file
=
file
sample_generator
=
self
.
read_file
(
file
)
if
not
self
.
is_test
and
self
.
generate_neg_sample
:
sample_generator
=
self
.
mixin_negtive_samples
(
sample_generator
)
for
sample
in
sample_generator
:
if
sample
is
None
:
continue
yield
sample
def
batch_reader
(
reader
,
batch_size
,
in_tokens
):
batch
,
total_token_num
,
max_len
=
[],
0
,
0
for
parsed_line
in
reader
():
token_ids
,
sent_ids
,
pos_ids
,
label
=
parsed_line
max_len
=
max
(
max_len
,
len
(
token_ids
))
if
in_tokens
:
to_append
=
(
len
(
batch
)
+
1
)
*
max_len
<=
batch_size
else
:
to_append
=
len
(
batch
)
<
batch_size
if
to_append
:
batch
.
append
(
parsed_line
)
total_token_num
+=
len
(
token_ids
)
else
:
yield
batch
,
total_token_num
batch
,
total_token_num
,
max_len
=
[
parsed_line
],
len
(
token_ids
),
len
(
token_ids
)
if
len
(
batch
)
>
0
:
yield
batch
,
total_token_num
for
batch_data
,
total_token_num
in
batch_reader
(
reader
,
self
.
batch_size
,
self
.
in_tokens
):
yield
prepare_batch_data
(
batch_data
,
total_token_num
,
voc_size
=
self
.
voc_size
,
pad_id
=
self
.
pad_id
,
cls_id
=
self
.
cls_id
,
sep_id
=
self
.
sep_id
,
mask_id
=
self
.
mask_id
,
return_input_mask
=
True
,
return_max_len
=
False
,
return_num_token
=
False
)
return
wrapper
if
__name__
==
"__main__"
:
pass
paddleslim/teachers/bert/reader/squad.py
已删除
100644 → 0
浏览文件 @
0fa4b3c4
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录