Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PLSC
提交
c56ceffc
P
PLSC
项目概览
PaddlePaddle
/
PLSC
通知
12
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
5
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PLSC
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
5
Issue
5
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c56ceffc
编写于
1月 09, 2020
作者:
L
lilong12
提交者:
GitHub
1月 09, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix the compatibility issue between PY2 and PY3 (#21)
1. be compatible with PY3 2. reformat code.
上级
a36148cf
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
1371 addition
and
1160 deletion
+1371
-1160
.gitignore
.gitignore
+1
-0
plsc/__init__.py
plsc/__init__.py
+1
-0
plsc/config.py
plsc/config.py
+2
-2
plsc/entry.py
plsc/entry.py
+410
-318
plsc/models/__init__.py
plsc/models/__init__.py
+5
-2
plsc/models/base_model.py
plsc/models/base_model.py
+71
-48
plsc/models/dist_algo.py
plsc/models/dist_algo.py
+82
-77
plsc/models/resnet.py
plsc/models/resnet.py
+72
-43
plsc/utils/__init__.py
plsc/utils/__init__.py
+0
-1
plsc/utils/base64_reader.py
plsc/utils/base64_reader.py
+82
-47
plsc/utils/jpeg_reader.py
plsc/utils/jpeg_reader.py
+37
-20
plsc/utils/parameter_converter.py
plsc/utils/parameter_converter.py
+583
-0
plsc/utils/process_distfc_parameter.py
plsc/utils/process_distfc_parameter.py
+0
-572
plsc/version.py
plsc/version.py
+1
-1
tools/process_base64_files.py
tools/process_base64_files.py
+24
-29
未找到文件。
.gitignore
浏览文件 @
c56ceffc
*.pyc
*.pyc
.idea
*.DS_Store
*.DS_Store
plsc/__init__.py
浏览文件 @
c56ceffc
...
@@ -13,5 +13,6 @@
...
@@ -13,5 +13,6 @@
# limitations under the License.
# limitations under the License.
from
.entry
import
Entry
from
.entry
import
Entry
from
.version
import
plsc_version
as
__version__
__all__
=
[
'Entry'
]
__all__
=
[
'Entry'
]
plsc/config.py
浏览文件 @
c56ceffc
...
@@ -35,9 +35,9 @@ config.warmup_epochs = 0
...
@@ -35,9 +35,9 @@ config.warmup_epochs = 0
config
.
loss_type
=
"dist_arcface"
config
.
loss_type
=
"dist_arcface"
config
.
num_classes
=
85742
config
.
num_classes
=
85742
config
.
image_shape
=
(
3
,
112
,
112
)
config
.
image_shape
=
(
3
,
112
,
112
)
config
.
margin
=
0.5
config
.
margin
=
0.5
config
.
scale
=
64.0
config
.
scale
=
64.0
config
.
lr
=
0.1
config
.
lr
=
0.1
config
.
lr_steps
=
(
100000
,
160000
,
220000
)
config
.
lr_steps
=
(
100000
,
160000
,
220000
)
config
.
emb_dim
=
512
config
.
emb_dim
=
512
plsc/entry.py
浏览文件 @
c56ceffc
...
@@ -12,36 +12,38 @@
...
@@ -12,36 +12,38 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
division
from
__future__
import
division
import
os
from
__future__
import
print_function
import
sys
import
time
import
errno
import
argparse
import
json
import
numpy
as
np
import
logging
import
math
import
math
import
pickle
import
os
import
subprocess
import
shutil
import
shutil
import
logging
import
subprocess
import
sys
import
tempfile
import
tempfile
import
time
import
numpy
as
np
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.incubate.fleet.base.role_maker
as
role_maker
import
paddle.fluid.transpiler.distribute_transpiler
as
dist_transpiler
import
sklearn
import
sklearn
from
paddle.fluid.incubate.fleet.collective
import
fleet
,
DistributedStrategy
from
paddle.fluid.optimizer
import
Optimizer
from
paddle.fluid.transpiler.details.program_utils
import
program_to_code
from
.
import
config
from
.
import
config
from
.models
import
resnet
from
.models
import
DistributedClassificationOptimizer
from
.models
import
base_model
from
.models
import
base_model
from
.models.dist_algo
import
DistributedClassificationOptimizer
from
.models
import
resnet
from
.utils
import
jpeg_reader
as
reader
from
.utils.learning_rate
import
lr_warmup
from
.utils.learning_rate
import
lr_warmup
from
.utils.parameter_converter
import
ParameterConverter
from
.utils.verification
import
evaluate
from
.utils.verification
import
evaluate
from
.utils
import
jpeg_reader
as
reader
from
paddle.fluid.incubate.fleet.collective
import
fleet
,
DistributedStrategy
import
paddle.fluid.incubate.fleet.base.role_maker
as
role_maker
from
paddle.fluid.transpiler.details.program_utils
import
program_to_code
import
paddle.fluid.transpiler.distribute_transpiler
as
dist_transpiler
from
paddle.fluid.optimizer
import
Optimizer
logging
.
basicConfig
(
logging
.
basicConfig
(
level
=
logging
.
INFO
,
level
=
logging
.
INFO
,
...
@@ -59,9 +61,6 @@ class Entry(object):
...
@@ -59,9 +61,6 @@ class Entry(object):
"""
"""
Check the validation of parameters.
Check the validation of parameters.
"""
"""
assert
os
.
getenv
(
"PADDLE_TRAINERS_NUM"
)
is
not
None
,
\
"Please start script using paddle.distributed.launch module."
supported_types
=
[
"softmax"
,
"arcface"
,
supported_types
=
[
"softmax"
,
"arcface"
,
"dist_softmax"
,
"dist_arcface"
]
"dist_softmax"
,
"dist_arcface"
]
assert
self
.
loss_type
in
supported_types
,
\
assert
self
.
loss_type
in
supported_types
,
\
...
@@ -70,7 +69,8 @@ class Entry(object):
...
@@ -70,7 +69,8 @@ class Entry(object):
if
self
.
loss_type
in
[
"dist_softmax"
,
"dist_arcface"
]:
if
self
.
loss_type
in
[
"dist_softmax"
,
"dist_arcface"
]:
assert
self
.
num_trainers
>
1
,
\
assert
self
.
num_trainers
>
1
,
\
"At least 2 trainers are required to use distributed fc-layer."
"At least 2 trainers are required for distributed fc-layer. "
\
"You can start your job using paddle.distributed.launch module."
def
__init__
(
self
):
def
__init__
(
self
):
self
.
config
=
config
.
config
self
.
config
=
config
.
config
...
@@ -89,6 +89,7 @@ class Entry(object):
...
@@ -89,6 +89,7 @@ class Entry(object):
self
.
model
=
None
self
.
model
=
None
self
.
train_reader
=
None
self
.
train_reader
=
None
self
.
test_reader
=
None
self
.
test_reader
=
None
self
.
predict_reader
=
None
self
.
train_program
=
fluid
.
Program
()
self
.
train_program
=
fluid
.
Program
()
self
.
startup_program
=
fluid
.
Program
()
self
.
startup_program
=
fluid
.
Program
()
...
@@ -97,7 +98,15 @@ class Entry(object):
...
@@ -97,7 +98,15 @@ class Entry(object):
self
.
fs_name
=
None
self
.
fs_name
=
None
self
.
fs_ugi
=
None
self
.
fs_ugi
=
None
self
.
fs_dir
=
None
self
.
fs_dir_for_save
=
None
self
.
fs_checkpoint_dir
=
None
self
.
param_attr
=
None
self
.
bias_attr
=
None
self
.
has_run_train
=
False
# Whether has run training or not
self
.
test_initialized
=
False
self
.
train_pass_id
=
-
1
self
.
use_fp16
=
False
self
.
use_fp16
=
False
self
.
fp16_user_dict
=
None
self
.
fp16_user_dict
=
None
...
@@ -150,13 +159,13 @@ class Entry(object):
...
@@ -150,13 +159,13 @@ class Entry(object):
def
set_mixed_precision
(
self
,
def
set_mixed_precision
(
self
,
use_fp16
,
use_fp16
,
init_loss_scaling
=
1.0
,
init_loss_scaling
=
1.0
,
incr_every_n_steps
=
2000
,
incr_every_n_steps
=
2000
,
decr_every_n_nan_or_inf
=
2
,
decr_every_n_nan_or_inf
=
2
,
incr_ratio
=
2.0
,
incr_ratio
=
2.0
,
decr_ratio
=
0.5
,
decr_ratio
=
0.5
,
use_dynamic_loss_scaling
=
True
,
use_dynamic_loss_scaling
=
True
,
amp_lists
=
None
):
amp_lists
=
None
):
"""
"""
Whether to use mixed precision training.
Whether to use mixed precision training.
"""
"""
...
@@ -178,7 +187,11 @@ class Entry(object):
...
@@ -178,7 +187,11 @@ class Entry(object):
self
.
global_test_batch_size
=
batch_size
*
self
.
num_trainers
self
.
global_test_batch_size
=
batch_size
*
self
.
num_trainers
logger
.
info
(
"Set test batch size to {}."
.
format
(
batch_size
))
logger
.
info
(
"Set test batch size to {}."
.
format
(
batch_size
))
def
set_hdfs_info
(
self
,
fs_name
,
fs_ugi
,
directory
):
def
set_hdfs_info
(
self
,
fs_name
,
fs_ugi
,
fs_dir_for_save
=
None
,
fs_checkpoint_dir
=
None
):
"""
"""
Set the info to download from or upload to hdfs filesystems.
Set the info to download from or upload to hdfs filesystems.
If the information is provided, we will download pretrained
If the information is provided, we will download pretrained
...
@@ -187,11 +200,13 @@ class Entry(object):
...
@@ -187,11 +200,13 @@ class Entry(object):
"""
"""
self
.
fs_name
=
fs_name
self
.
fs_name
=
fs_name
self
.
fs_ugi
=
fs_ugi
self
.
fs_ugi
=
fs_ugi
self
.
fs_dir
=
directory
self
.
fs_dir_for_save
=
fs_dir_for_save
self
.
fs_checkpoint_dir
=
fs_checkpoint_dir
logger
.
info
(
"HDFS Info:"
)
logger
.
info
(
"HDFS Info:"
)
logger
.
info
(
"
\t
fs_name: {}"
.
format
(
fs_name
))
logger
.
info
(
"
\t
fs_name: {}"
.
format
(
fs_name
))
logger
.
info
(
"
\t
fs_ugi: {}"
.
format
(
fs_ugi
))
logger
.
info
(
"
\t
fs_ugi: {}"
.
format
(
fs_ugi
))
logger
.
info
(
"
\t
remote directory: {}"
.
format
(
directory
))
logger
.
info
(
"
\t
fs dir for save: {}"
.
format
(
self
.
fs_dir_for_save
))
logger
.
info
(
"
\t
fs checkpoint dir: {}"
.
format
(
self
.
fs_checkpoint_dir
))
def
set_model_save_dir
(
self
,
directory
):
def
set_model_save_dir
(
self
,
directory
):
"""
"""
...
@@ -207,7 +222,7 @@ class Entry(object):
...
@@ -207,7 +222,7 @@ class Entry(object):
Whether to calcuate acc1 and acc5 during training.
Whether to calcuate acc1 and acc5 during training.
"""
"""
self
.
calc_train_acc
=
calc
self
.
calc_train_acc
=
calc
logger
.
info
(
"Calcuating acc1 and acc5 during training: {}."
.
format
(
logger
.
info
(
"Calcu
l
ating acc1 and acc5 during training: {}."
.
format
(
calc
))
calc
))
def
set_dataset_dir
(
self
,
directory
):
def
set_dataset_dir
(
self
,
directory
):
...
@@ -237,8 +252,8 @@ class Entry(object):
...
@@ -237,8 +252,8 @@ class Entry(object):
"""
"""
Set the size of the last hidding layer before the distributed fc-layer.
Set the size of the last hidding layer before the distributed fc-layer.
"""
"""
self
.
emb_
size
=
size
self
.
emb_
dim
=
size
logger
.
info
(
"Set emb_
size
to {}."
.
format
(
size
))
logger
.
info
(
"Set emb_
dim
to {}."
.
format
(
size
))
def
set_model
(
self
,
model
):
def
set_model
(
self
,
model
):
"""
"""
...
@@ -270,13 +285,13 @@ class Entry(object):
...
@@ -270,13 +285,13 @@ class Entry(object):
self
.
warmup_epochs
=
num
self
.
warmup_epochs
=
num
logger
.
info
(
"Set warmup_epochs to {}."
.
format
(
num
))
logger
.
info
(
"Set warmup_epochs to {}."
.
format
(
num
))
def
set_loss_type
(
self
,
type
):
def
set_loss_type
(
self
,
loss_
type
):
supported_types
=
[
"dist_softmax"
,
"dist_arcface"
,
"softmax"
,
"arcface"
]
supported_types
=
[
"dist_softmax"
,
"dist_arcface"
,
"softmax"
,
"arcface"
]
if
not
type
in
supported_types
:
if
loss_type
not
in
supported_types
:
raise
ValueError
(
"All supported loss types: {}"
.
format
(
raise
ValueError
(
"All supported loss types: {}"
.
format
(
supported_types
))
supported_types
))
self
.
loss_type
=
type
self
.
loss_type
=
loss_
type
logger
.
info
(
"Set loss_type to {}."
.
format
(
type
))
logger
.
info
(
"Set loss_type to {}."
.
format
(
loss_
type
))
def
set_image_shape
(
self
,
shape
):
def
set_image_shape
(
self
,
shape
):
if
not
isinstance
(
shape
,
(
list
,
tuple
)):
if
not
isinstance
(
shape
,
(
list
,
tuple
)):
...
@@ -286,9 +301,21 @@ class Entry(object):
...
@@ -286,9 +301,21 @@ class Entry(object):
def
set_optimizer
(
self
,
optimizer
):
def
set_optimizer
(
self
,
optimizer
):
if
not
isinstance
(
optimizer
,
Optimizer
):
if
not
isinstance
(
optimizer
,
Optimizer
):
raise
ValueError
(
"Optimizer must be
type of
Optimizer"
)
raise
ValueError
(
"Optimizer must be
of type
Optimizer"
)
self
.
optimizer
=
optimizer
self
.
optimizer
=
optimizer
logger
.
info
(
"User manually set optimizer"
)
logger
.
info
(
"User manually set optimizer."
)
def
set_with_test
(
self
,
with_test
):
self
.
with_test
=
with_test
logger
.
info
(
"Set with_test to {}."
.
format
(
with_test
))
def
set_distfc_attr
(
self
,
param_attr
=
None
,
bias_attr
=
None
):
self
.
param_attr
=
param_attr
logger
.
info
(
"Set param_attr for distfc to {}."
.
format
(
self
.
param_attr
))
if
self
.
bias_attr
:
self
.
bias_attr
=
bias_attr
logger
.
info
(
"Set bias_attr for distfc to {}."
.
format
(
self
.
bias_attr
))
def
_get_optimizer
(
self
):
def
_get_optimizer
(
self
):
if
not
self
.
optimizer
:
if
not
self
.
optimizer
:
...
@@ -310,7 +337,10 @@ class Entry(object):
...
@@ -310,7 +337,10 @@ class Entry(object):
logger
.
info
(
"lr_step: {}"
.
format
(
lr
))
logger
.
info
(
"lr_step: {}"
.
format
(
lr
))
if
self
.
warmup_epochs
:
if
self
.
warmup_epochs
:
lr_val
=
lr_warmup
(
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
lr_val
=
lr_warmup
(
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr
),
warmup_steps
,
start_lr
,
base_lr
)
values
=
lr
),
warmup_steps
,
start_lr
,
base_lr
)
else
:
else
:
lr_val
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr
)
lr_val
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr
)
...
@@ -321,25 +351,30 @@ class Entry(object):
...
@@ -321,25 +351,30 @@ class Entry(object):
if
self
.
loss_type
in
[
"dist_softmax"
,
"dist_arcface"
]:
if
self
.
loss_type
in
[
"dist_softmax"
,
"dist_arcface"
]:
self
.
optimizer
=
DistributedClassificationOptimizer
(
self
.
optimizer
=
DistributedClassificationOptimizer
(
self
.
optimizer
,
global_batch_size
,
use_fp16
=
self
.
use_fp16
,
self
.
optimizer
,
self
.
train_batch_size
,
use_fp16
=
self
.
use_fp16
,
loss_type
=
self
.
loss_type
,
loss_type
=
self
.
loss_type
,
fp16_user_dict
=
self
.
fp16_user_dict
)
fp16_user_dict
=
self
.
fp16_user_dict
)
elif
self
.
use_fp16
:
elif
self
.
use_fp16
:
self
.
optimizer
=
fluid
.
contrib
.
mixed_precision
.
decorate
(
self
.
optimizer
=
fluid
.
contrib
.
mixed_precision
.
decorate
(
optimizer
=
optimizer
,
optimizer
=
self
.
optimizer
,
init_loss_scaling
=
self
.
fp16_user_dict
[
'init_loss_scaling'
],
init_loss_scaling
=
self
.
fp16_user_dict
[
'init_loss_scaling'
],
incr_every_n_steps
=
self
.
fp16_user_dict
[
'incr_every_n_steps'
],
incr_every_n_steps
=
self
.
fp16_user_dict
[
'incr_every_n_steps'
],
decr_every_n_nan_or_inf
=
self
.
fp16_user_dict
[
'decr_every_n_nan_or_inf'
],
decr_every_n_nan_or_inf
=
self
.
fp16_user_dict
[
'decr_every_n_nan_or_inf'
],
incr_ratio
=
self
.
fp16_user_dict
[
'incr_ratio'
],
incr_ratio
=
self
.
fp16_user_dict
[
'incr_ratio'
],
decr_ratio
=
self
.
fp16_user_dict
[
'decr_ratio'
],
decr_ratio
=
self
.
fp16_user_dict
[
'decr_ratio'
],
use_dynamic_loss_scaling
=
self
.
fp16_user_dict
[
'use_dynamic_loss_scaling'
],
use_dynamic_loss_scaling
=
self
.
fp16_user_dict
[
'use_dynamic_loss_scaling'
],
amp_lists
=
self
.
fp16_user_dict
[
'amp_lists'
]
amp_lists
=
self
.
fp16_user_dict
[
'amp_lists'
]
)
)
return
self
.
optimizer
return
self
.
optimizer
def
build_program
(
self
,
def
build_program
(
self
,
is_train
=
True
,
is_train
=
True
,
use_parallel_test
=
False
):
use_parallel_test
=
False
,
dist_strategy
=
None
):
model_name
=
self
.
model_name
model_name
=
self
.
model_name
assert
not
(
is_train
and
use_parallel_test
),
\
assert
not
(
is_train
and
use_parallel_test
),
\
"is_train and use_parallel_test cannot be set simultaneously."
"is_train and use_parallel_test cannot be set simultaneously."
...
@@ -357,18 +392,23 @@ class Entry(object):
...
@@ -357,18 +392,23 @@ class Entry(object):
with
fluid
.
program_guard
(
main_program
,
startup_program
):
with
fluid
.
program_guard
(
main_program
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
shape
=
image_shape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
shape
=
[
1
],
dtype
=
'int64'
)
emb
,
loss
,
prob
=
model
.
get_output
(
input
=
image
,
emb
,
loss
,
prob
=
model
.
get_output
(
input
=
image
,
label
=
label
,
label
=
label
,
is_train
=
is_train
,
num_ranks
=
num_trainers
,
num_classes
=
self
.
num_classes
,
rank_id
=
trainer_id
,
loss_type
=
self
.
loss_type
,
is_train
=
is_train
,
margin
=
self
.
margin
,
num_classes
=
self
.
num_classes
,
scale
=
self
.
scale
)
loss_type
=
self
.
loss_type
,
param_attr
=
self
.
param_attr
,
bias_attr
=
self
.
bias_attr
,
margin
=
self
.
margin
,
scale
=
self
.
scale
)
acc1
=
None
acc1
=
None
acc5
=
None
acc5
=
None
...
@@ -377,78 +417,93 @@ class Entry(object):
...
@@ -377,78 +417,93 @@ class Entry(object):
if
self
.
calc_train_acc
:
if
self
.
calc_train_acc
:
shard_prob
=
loss
.
_get_info
(
"shard_prob"
)
shard_prob
=
loss
.
_get_info
(
"shard_prob"
)
prob_all
=
fluid
.
layers
.
collective
.
_c_allgather
(
shard_prob
,
prob_all
=
fluid
.
layers
.
collective
.
_c_allgather
(
nranks
=
num_trainers
,
use_calc_stream
=
True
)
shard_prob
,
prob_list
=
fluid
.
layers
.
split
(
prob_all
,
dim
=
0
,
nranks
=
num_trainers
,
use_calc_stream
=
True
)
prob_list
=
fluid
.
layers
.
split
(
prob_all
,
dim
=
0
,
num_or_sections
=
num_trainers
)
num_or_sections
=
num_trainers
)
prob
=
fluid
.
layers
.
concat
(
prob_list
,
axis
=
1
)
prob
=
fluid
.
layers
.
concat
(
prob_list
,
axis
=
1
)
label_all
=
fluid
.
layers
.
collective
.
_c_allgather
(
label
,
label_all
=
fluid
.
layers
.
collective
.
_c_allgather
(
nranks
=
num_trainers
,
use_calc_stream
=
True
)
label
,
acc1
=
fluid
.
layers
.
accuracy
(
input
=
prob
,
label
=
label_all
,
k
=
1
)
nranks
=
num_trainers
,
acc5
=
fluid
.
layers
.
accuracy
(
input
=
prob
,
label
=
label_all
,
k
=
5
)
use_calc_stream
=
True
)
acc1
=
fluid
.
layers
.
accuracy
(
input
=
prob
,
label
=
label_all
,
k
=
1
)
acc5
=
fluid
.
layers
.
accuracy
(
input
=
prob
,
label
=
label_all
,
k
=
5
)
else
:
else
:
if
self
.
calc_train_acc
:
if
self
.
calc_train_acc
:
acc1
=
fluid
.
layers
.
accuracy
(
input
=
prob
,
label
=
label
,
k
=
1
)
acc1
=
fluid
.
layers
.
accuracy
(
input
=
prob
,
acc5
=
fluid
.
layers
.
accuracy
(
input
=
prob
,
label
=
label
,
k
=
5
)
label
=
label
,
k
=
1
)
acc5
=
fluid
.
layers
.
accuracy
(
input
=
prob
,
label
=
label
,
k
=
5
)
optimizer
=
None
optimizer
=
None
if
is_train
:
if
is_train
:
# initialize optimizer
# initialize optimizer
optimizer
=
self
.
_get_optimizer
()
optimizer
=
self
.
_get_optimizer
()
dist_optimizer
=
self
.
fleet
.
distributed_optimizer
(
if
self
.
num_trainers
>
1
:
optimizer
,
strategy
=
self
.
strategy
)
dist_optimizer
=
fleet
.
distributed_optimizer
(
dist_optimizer
.
minimize
(
loss
)
optimizer
,
strategy
=
dist_strategy
)
dist_optimizer
.
minimize
(
loss
)
else
:
# single card training
optimizer
.
minimize
(
loss
)
if
"dist"
in
self
.
loss_type
or
self
.
use_fp16
:
if
"dist"
in
self
.
loss_type
or
self
.
use_fp16
:
optimizer
=
optimizer
.
_optimizer
optimizer
=
optimizer
.
_optimizer
elif
use_parallel_test
:
elif
use_parallel_test
:
emb
=
fluid
.
layers
.
collective
.
_c_allgather
(
emb
,
emb
=
fluid
.
layers
.
collective
.
_c_allgather
(
nranks
=
num_trainers
,
use_calc_stream
=
True
)
emb
,
nranks
=
num_trainers
,
use_calc_stream
=
True
)
return
emb
,
loss
,
acc1
,
acc5
,
optimizer
return
emb
,
loss
,
acc1
,
acc5
,
optimizer
def
get_files_from_hdfs
(
self
):
def
get_files_from_hdfs
(
self
,
local_dir
):
assert
self
.
fs_checkpoint_dir
,
\
logger
.
error
(
"Please set the fs_checkpoint_dir paramerters for "
"set_hdfs_info to get models from hdfs."
)
self
.
fs_checkpoint_dir
=
os
.
path
.
join
(
self
.
fs_checkpoint_dir
,
'*'
)
cmd
=
"hadoop fs -D fs.default.name="
cmd
=
"hadoop fs -D fs.default.name="
cmd
+=
self
.
fs_name
+
" "
cmd
+=
self
.
fs_name
+
" "
cmd
+=
"-D hadoop.job.ugi="
cmd
+=
"-D hadoop.job.ugi="
cmd
+=
self
.
fs_ugi
+
" "
cmd
+=
self
.
fs_ugi
+
" "
cmd
+=
"-get "
+
self
.
fs_dir
cmd
+=
"-get "
+
self
.
fs_
checkpoint_
dir
cmd
+=
" "
+
local
_dir
cmd
+=
" "
+
self
.
checkpoint
_dir
logger
.
info
(
"hdfs download cmd: {}"
.
format
(
cmd
))
logger
.
info
(
"hdfs download cmd: {}"
.
format
(
cmd
))
cmd
=
cmd
.
split
(
' '
)
cmd
=
cmd
.
split
(
' '
)
process
=
subprocess
.
Popen
(
cmd
,
process
=
subprocess
.
Popen
(
cmd
,
stdout
=
sys
.
stdout
,
stdout
=
sys
.
stdout
,
stderr
=
subprocess
.
STDOUT
)
stderr
=
subprocess
.
STDOUT
)
process
.
wait
()
process
.
wait
()
def
put_files_to_hdfs
(
self
,
local_dir
):
def
put_files_to_hdfs
(
self
,
local_dir
):
assert
self
.
fs_dir_for_save
,
\
logger
.
error
(
"Please set fs_dir_for_save paramerter "
"for set_hdfs_info to save models to hdfs."
)
cmd
=
"hadoop fs -D fs.default.name="
cmd
=
"hadoop fs -D fs.default.name="
cmd
+=
self
.
fs_name
+
" "
cmd
+=
self
.
fs_name
+
" "
cmd
+=
"-D hadoop.job.ugi="
cmd
+=
"-D hadoop.job.ugi="
cmd
+=
self
.
fs_ugi
+
" "
cmd
+=
self
.
fs_ugi
+
" "
cmd
+=
"-put "
+
local_dir
cmd
+=
"-put "
+
local_dir
cmd
+=
" "
+
self
.
fs_dir
cmd
+=
" "
+
self
.
fs_dir
_for_save
logger
.
info
(
"hdfs upload cmd: {}"
.
format
(
cmd
))
logger
.
info
(
"hdfs upload cmd: {}"
.
format
(
cmd
))
cmd
=
cmd
.
split
(
' '
)
cmd
=
cmd
.
split
(
' '
)
process
=
subprocess
.
Popen
(
cmd
,
process
=
subprocess
.
Popen
(
cmd
,
stdout
=
sys
.
stdout
,
stdout
=
sys
.
stdout
,
stderr
=
subprocess
.
STDOUT
)
stderr
=
subprocess
.
STDOUT
)
process
.
wait
()
process
.
wait
()
def
preprocess_distributed_params
(
self
,
def
process_distributed_params
(
self
,
local_dir
):
local_dir
):
local_dir
=
os
.
path
.
abspath
(
local_dir
)
local_dir
=
os
.
path
.
abspath
(
local_dir
)
output_dir
=
tempfile
.
mkdtemp
()
output_dir
=
tempfile
.
mkdtemp
()
cmd
=
sys
.
executable
+
' -m plsc.utils.process_distfc_parameter '
converter
=
ParameterConverter
(
local_dir
,
output_dir
,
self
.
num_trainers
)
cmd
+=
"--nranks {} "
.
format
(
self
.
num_trainers
)
converter
.
process
()
cmd
+=
"--num_classes {} "
.
format
(
self
.
num_classes
)
cmd
+=
"--pretrained_model_dir {} "
.
format
(
local_dir
)
cmd
+=
"--output_dir {}"
.
format
(
output_dir
)
cmd
=
cmd
.
split
(
' '
)
logger
.
info
(
"Distributed parameters processing cmd: {}"
.
format
(
cmd
))
process
=
subprocess
.
Popen
(
cmd
,
stdout
=
sys
.
stdout
,
stderr
=
subprocess
.
STDOUT
)
process
.
wait
()
for
file
in
os
.
listdir
(
local_dir
):
for
file
in
os
.
listdir
(
local_dir
):
if
"dist@"
in
file
and
"@rank@"
in
file
:
if
"dist@"
in
file
and
"@rank@"
in
file
:
...
@@ -477,7 +532,6 @@ class Entry(object):
...
@@ -477,7 +532,6 @@ class Entry(object):
outputs
=
{
'Out'
:
var
},
outputs
=
{
'Out'
:
var
},
attrs
=
{
'use_calc_stream'
:
True
})
attrs
=
{
'use_calc_stream'
:
True
})
def
load_checkpoint
(
self
,
def
load_checkpoint
(
self
,
executor
,
executor
,
main_program
,
main_program
,
...
@@ -493,30 +547,28 @@ class Entry(object):
...
@@ -493,30 +547,28 @@ class Entry(object):
if
os
.
path
.
exists
(
checkpoint_dir
):
if
os
.
path
.
exists
(
checkpoint_dir
):
logger
.
info
(
"Local dir {} exists, we'll overwrite it."
.
format
(
logger
.
info
(
"Local dir {} exists, we'll overwrite it."
.
format
(
checkpoint_dir
))
checkpoint_dir
))
shutil
.
rmtree
(
checkpoint_dir
)
os
.
makedirs
(
checkpoint_dir
)
# sync all trainers to avoid loading checkpoints before
# parameters are downloaded
file_name
=
os
.
path
.
join
(
checkpoint_dir
,
'.lock'
)
if
self
.
trainer_id
==
0
:
self
.
get_files_from_hdfs
(
checkpoint_dir
)
with
open
(
file_name
,
'w'
)
as
f
:
pass
time
.
sleep
(
10
)
os
.
remove
(
file_name
)
else
:
while
True
:
if
not
os
.
path
.
exists
(
file_name
):
time
.
sleep
(
1
)
else
:
break
# sync all trainers to avoid loading checkpoints before
# parameters are downloaded
file_name
=
os
.
path
.
join
(
checkpoint_dir
,
'.lock'
)
if
self
.
trainer_id
==
0
:
self
.
get_files_from_hdfs
()
with
open
(
file_name
,
'w'
)
as
f
:
pass
time
.
sleep
(
10
)
os
.
remove
(
file_name
)
else
:
while
True
:
if
not
os
.
path
.
exists
(
file_name
):
time
.
sleep
(
1
)
else
:
break
# Preporcess distributed parameters.
# Preporcess distributed parameters.
file_name
=
os
.
path
.
join
(
checkpoint_dir
,
'.lock'
)
file_name
=
os
.
path
.
join
(
checkpoint_dir
,
'.lock'
)
distributed
=
self
.
loss_type
in
[
"dist_softmax"
,
"dist_arcface"
]
distributed
=
self
.
loss_type
in
[
"dist_softmax"
,
"dist_arcface"
]
if
load_for_train
and
self
.
trainer_id
==
0
and
distributed
:
if
load_for_train
and
self
.
trainer_id
==
0
and
distributed
:
self
.
pr
epr
ocess_distributed_params
(
checkpoint_dir
)
self
.
process_distributed_params
(
checkpoint_dir
)
with
open
(
file_name
,
'w'
)
as
f
:
with
open
(
file_name
,
'w'
)
as
f
:
pass
pass
time
.
sleep
(
10
)
time
.
sleep
(
10
)
...
@@ -532,11 +584,13 @@ class Entry(object):
...
@@ -532,11 +584,13 @@ class Entry(object):
def
if_exist
(
var
):
def
if_exist
(
var
):
has_var
=
os
.
path
.
exists
(
os
.
path
.
join
(
checkpoint_dir
,
var
.
name
))
has_var
=
os
.
path
.
exists
(
os
.
path
.
join
(
checkpoint_dir
,
var
.
name
))
if
has_var
:
if
has_var
:
print
(
'var: %s found'
%
(
var
.
name
))
logger
.
info
(
'var: %s found'
%
(
var
.
name
))
return
has_var
return
has_var
fluid
.
io
.
load_vars
(
executor
,
checkpoint_dir
,
predicate
=
if_exist
,
fluid
.
io
.
load_vars
(
executor
,
main_program
=
main_program
)
checkpoint_dir
,
predicate
=
if_exist
,
main_program
=
main_program
)
def
convert_for_prediction
(
self
):
def
convert_for_prediction
(
self
):
model_name
=
self
.
model_name
model_name
=
self
.
model_name
...
@@ -545,19 +599,20 @@ class Entry(object):
...
@@ -545,19 +599,20 @@ class Entry(object):
model
=
self
.
model
model
=
self
.
model
if
model
is
None
:
if
model
is
None
:
model
=
resnet
.
__dict__
[
model_name
](
emb_dim
=
self
.
emb_dim
)
model
=
resnet
.
__dict__
[
model_name
](
emb_dim
=
self
.
emb_dim
)
main_program
=
self
.
train
_program
main_program
=
self
.
predict
_program
startup_program
=
self
.
startup_program
startup_program
=
self
.
startup_program
with
fluid
.
program_guard
(
main_program
,
startup_program
):
with
fluid
.
program_guard
(
main_program
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
shape
=
image_shape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
shape
=
[
1
],
dtype
=
'int64'
)
emb
=
model
.
build_network
(
emb
=
model
.
build_network
(
input
=
image
,
input
=
image
,
label
=
label
,
label
=
label
,
is_train
=
False
)
is_train
=
False
)
gpu_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
0
))
gpu_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
0
))
place
=
fluid
.
CUDAPlace
(
gpu_id
)
place
=
fluid
.
CUDAPlace
(
gpu_id
)
...
@@ -565,8 +620,9 @@ class Entry(object):
...
@@ -565,8 +620,9 @@ class Entry(object):
exe
.
run
(
startup_program
)
exe
.
run
(
startup_program
)
assert
self
.
checkpoint_dir
,
"No checkpoint found for converting."
assert
self
.
checkpoint_dir
,
"No checkpoint found for converting."
self
.
load_checkpoint
(
executor
=
exe
,
main_program
=
main_program
,
self
.
load_checkpoint
(
executor
=
exe
,
load_for_train
=
False
)
main_program
=
main_program
,
load_for_train
=
False
)
assert
self
.
model_save_dir
,
\
assert
self
.
model_save_dir
,
\
"Does not set model_save_dir for inference model converting."
"Does not set model_save_dir for inference model converting."
...
@@ -582,6 +638,16 @@ class Entry(object):
...
@@ -582,6 +638,16 @@ class Entry(object):
if
self
.
fs_name
:
if
self
.
fs_name
:
self
.
put_files_to_hdfs
(
self
.
model_save_dir
)
self
.
put_files_to_hdfs
(
self
.
model_save_dir
)
def
_set_info
(
self
,
key
,
value
):
if
not
hasattr
(
self
,
'_info'
):
self
.
_info
=
{}
self
.
_info
[
key
]
=
value
def
_get_info
(
self
,
key
):
if
hasattr
(
self
,
'_info'
)
and
key
in
self
.
_info
:
return
self
.
_info
[
key
]
return
None
def
predict
(
self
):
def
predict
(
self
):
model_name
=
self
.
model_name
model_name
=
self
.
model_name
image_shape
=
[
int
(
m
)
for
m
in
self
.
image_shape
]
image_shape
=
[
int
(
m
)
for
m
in
self
.
image_shape
]
...
@@ -594,14 +660,15 @@ class Entry(object):
...
@@ -594,14 +660,15 @@ class Entry(object):
with
fluid
.
program_guard
(
main_program
,
startup_program
):
with
fluid
.
program_guard
(
main_program
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
shape
=
image_shape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
shape
=
[
1
],
dtype
=
'int64'
)
emb
=
model
.
build_network
(
emb
=
model
.
build_network
(
input
=
image
,
input
=
image
,
label
=
label
,
label
=
label
,
is_train
=
False
)
is_train
=
False
)
gpu_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
0
))
gpu_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
0
))
place
=
fluid
.
CUDAPlace
(
gpu_id
)
place
=
fluid
.
CUDAPlace
(
gpu_id
)
...
@@ -609,104 +676,77 @@ class Entry(object):
...
@@ -609,104 +676,77 @@ class Entry(object):
exe
.
run
(
startup_program
)
exe
.
run
(
startup_program
)
assert
self
.
checkpoint_dir
,
"No checkpoint found for predicting."
assert
self
.
checkpoint_dir
,
"No checkpoint found for predicting."
self
.
load_checkpoint
(
executor
=
exe
,
main_program
=
main_program
,
self
.
load_checkpoint
(
executor
=
exe
,
load_for_train
=
False
)
main_program
=
main_program
,
load_for_train
=
False
)
if
self
.
train_reader
is
None
:
predict_reader
=
paddle
.
batch
(
reader
.
arc_train
(
if
self
.
predict_reader
is
None
:
self
.
dataset_dir
,
self
.
num_classes
),
predict_reader
=
paddle
.
batch
(
reader
.
arc_train
(
self
.
dataset_dir
,
batch_size
=
self
.
train_batch_size
)
self
.
num_classes
),
batch_size
=
self
.
train_batch_size
)
else
:
else
:
predict_reader
=
self
.
train
_reader
predict_reader
=
self
.
predict
_reader
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
'image'
,
'label'
],
program
=
main_program
)
feed_list
=
[
'image'
,
'label'
],
program
=
main_program
)
fetch_list
=
[
emb
.
name
]
fetch_list
=
[
emb
.
name
]
for
data
in
predict_reader
():
for
data
in
predict_reader
():
emb
=
exe
.
run
(
main_program
,
feed
=
feeder
.
feed
(
data
),
emb
=
exe
.
run
(
main_program
,
fetch_list
=
fetch_list
,
use_program_cache
=
True
)
feed
=
feeder
.
feed
(
data
),
fetch_list
=
fetch_list
,
use_program_cache
=
True
)
print
(
"emb: "
,
emb
)
print
(
"emb: "
,
emb
)
def
test
(
self
,
pass_id
=
0
):
def
_run_test
(
self
,
self
.
_check
()
exe
,
test_list
,
test_name_list
,
feeder
,
fetch_list
):
trainer_id
=
self
.
trainer_id
trainer_id
=
self
.
trainer_id
num_trainers
=
self
.
num_trainers
worker_endpoints
=
os
.
getenv
(
"PADDLE_TRAINER_ENDPOINTS"
)
current_endpoint
=
os
.
getenv
(
"PADDLE_CURRENT_ENDPOINT"
)
emb
,
loss
,
acc1
,
acc5
,
_
=
self
.
build_program
(
False
,
self
.
num_trainers
>
1
)
config
=
dist_transpiler
.
DistributeTranspilerConfig
()
config
.
mode
=
"collective"
config
.
collective_mode
=
"grad_allreduce"
t
=
dist_transpiler
.
DistributeTranspiler
(
config
=
config
)
t
.
transpile
(
trainer_id
=
trainer_id
,
trainers
=
worker_endpoints
,
startup_program
=
self
.
startup_program
,
program
=
self
.
test_program
,
current_endpoint
=
current_endpoint
)
gpu_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
0
))
place
=
fluid
.
CUDAPlace
(
gpu_id
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
self
.
startup_program
)
test_list
,
test_name_list
=
reader
.
test
(
self
.
dataset_dir
,
self
.
val_targets
)
test_program
=
self
.
test_program
#test_program = test_program._prune(emb)
assert
self
.
checkpoint_dir
,
"No checkpoint found for test."
self
.
load_checkpoint
(
executor
=
exe
,
main_program
=
test_program
,
load_for_train
=
False
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
'image'
,
'label'
],
program
=
test_program
)
fetch_list
=
[
emb
.
name
]
real_test_batch_size
=
self
.
global_test_batch_size
real_test_batch_size
=
self
.
global_test_batch_size
test_start
=
time
.
time
()
for
i
in
range
(
len
(
test_list
)):
for
i
in
range
(
len
(
test_list
)):
data_list
,
issame_list
=
test_list
[
i
]
data_list
,
issame_list
=
test_list
[
i
]
embeddings_list
=
[]
embeddings_list
=
[]
for
j
in
x
range
(
len
(
data_list
)):
for
j
in
range
(
len
(
data_list
)):
data
=
data_list
[
j
]
data
=
data_list
[
j
]
embeddings
=
None
embeddings
=
None
parallel_test_steps
=
data
.
shape
[
0
]
//
real_test_batch_size
parallel_test_steps
=
data
.
shape
[
0
]
//
real_test_batch_size
beg
=
0
end
=
0
for
idx
in
range
(
parallel_test_steps
):
for
idx
in
range
(
parallel_test_steps
):
start
=
idx
*
real_test_batch_size
start
=
idx
*
real_test_batch_size
offset
=
trainer_id
*
self
.
test_batch_size
offset
=
trainer_id
*
self
.
test_batch_size
begin
=
start
+
offset
begin
=
start
+
offset
end
=
begin
+
self
.
test_batch_size
end
=
begin
+
self
.
test_batch_size
_data
=
[]
_data
=
[]
for
k
in
x
range
(
begin
,
end
):
for
k
in
range
(
begin
,
end
):
_data
.
append
((
data
[
k
],
0
))
_data
.
append
((
data
[
k
],
0
))
assert
len
(
_data
)
==
self
.
test_batch_size
assert
len
(
_data
)
==
self
.
test_batch_size
[
_embeddings
]
=
exe
.
run
(
test_program
,
[
_embeddings
]
=
exe
.
run
(
self
.
test_program
,
fetch_list
=
fetch_list
,
feed
=
feeder
.
feed
(
_data
),
fetch_list
=
fetch_list
,
use_program_cache
=
True
)
feed
=
feeder
.
feed
(
_data
),
use_program_cache
=
True
)
if
embeddings
is
None
:
if
embeddings
is
None
:
embeddings
=
np
.
zeros
((
data
.
shape
[
0
],
_embeddings
.
shape
[
1
]))
embeddings
=
np
.
zeros
((
data
.
shape
[
0
],
embeddings
[
start
:
start
+
real_test_batch_size
,
:]
=
_embeddings
[:,
:]
_embeddings
.
shape
[
1
]))
end
=
start
+
real_test_batch_size
embeddings
[
start
:
end
,
:]
=
_embeddings
[:,
:]
beg
=
parallel_test_steps
*
real_test_batch_size
beg
=
parallel_test_steps
*
real_test_batch_size
while
beg
<
data
.
shape
[
0
]:
while
beg
<
data
.
shape
[
0
]:
end
=
min
(
beg
+
self
.
test_batch_size
,
data
.
shape
[
0
])
end
=
min
(
beg
+
self
.
test_batch_size
,
data
.
shape
[
0
])
count
=
end
-
beg
count
=
end
-
beg
_data
=
[]
_data
=
[]
for
k
in
x
range
(
end
-
self
.
test_batch_size
,
end
):
for
k
in
range
(
end
-
self
.
test_batch_size
,
end
):
_data
.
append
((
data
[
k
],
0
))
_data
.
append
((
data
[
k
],
0
))
[
_embeddings
]
=
exe
.
run
(
test_program
,
[
_embeddings
]
=
exe
.
run
(
self
.
test_program
,
fetch_list
=
fetch_list
,
feed
=
feeder
.
feed
(
_data
),
fetch_list
=
fetch_list
,
use_program_cache
=
True
)
feed
=
feeder
.
feed
(
_data
),
_embeddings
=
_embeddings
[
0
:
self
.
test_batch_size
,:]
use_program_cache
=
True
)
embeddings
[
beg
:
end
,
:]
=
_embeddings
[(
self
.
test_batch_size
-
count
):,
:]
_embeddings
=
_embeddings
[
0
:
self
.
test_batch_size
,
:]
embeddings
[
beg
:
end
,
:]
=
_embeddings
[
(
self
.
test_batch_size
-
count
):,
:]
beg
=
end
beg
=
end
embeddings_list
.
append
(
embeddings
)
embeddings_list
.
append
(
embeddings
)
...
@@ -719,44 +759,140 @@ class Entry(object):
...
@@ -719,44 +759,140 @@ class Entry(object):
embeddings
=
embeddings_list
[
0
]
+
embeddings_list
[
1
]
embeddings
=
embeddings_list
[
0
]
+
embeddings_list
[
1
]
embeddings
=
sklearn
.
preprocessing
.
normalize
(
embeddings
)
embeddings
=
sklearn
.
preprocessing
.
normalize
(
embeddings
)
_
,
_
,
accuracy
,
val
,
val_std
,
far
=
evaluate
(
embeddings
,
issame_list
,
nrof_folds
=
10
)
_
,
_
,
accuracy
,
val
,
val_std
,
far
=
evaluate
(
embeddings
,
issame_list
,
nrof_folds
=
10
)
acc
,
std
=
np
.
mean
(
accuracy
),
np
.
std
(
accuracy
)
acc
,
std
=
np
.
mean
(
accuracy
),
np
.
std
(
accuracy
)
print
(
'[%s][%d]XNorm: %f'
%
(
test_name_list
[
i
],
pass_id
,
xnorm
))
if
self
.
train_pass_id
>=
0
:
print
(
'[%s][%d]Accuracy-Flip: %1.5f+-%1.5f'
%
(
test_name_list
[
i
],
pass_id
,
acc
,
std
))
logger
.
info
(
'[{}][{}]XNorm: {:.5f}'
.
format
(
test_name_list
[
i
],
self
.
train_pass_id
,
xnorm
))
logger
.
info
(
'[{}][{}]Accuracy-Flip: {:.5f}+-{:.5f}'
.
format
(
test_name_list
[
i
],
self
.
train_pass_id
,
acc
,
std
))
else
:
logger
.
info
(
'[{}]XNorm: {:.5f}'
.
format
(
test_name_list
[
i
],
xnorm
))
logger
.
info
(
'[{}]Accuracy-Flip: {:.5f}+-{:.5f}'
.
format
(
test_name_list
[
i
],
acc
,
std
))
sys
.
stdout
.
flush
()
sys
.
stdout
.
flush
()
def
test
(
self
):
self
.
_check
()
trainer_id
=
self
.
trainer_id
num_trainers
=
self
.
num_trainers
# if the test program is not built, which means that is the first time
# to call the test method, we will first build the test program and
# add ops to broadcast bn-related parameters from trainer 0 to other
# trainers for distributed tests.
if
not
self
.
test_initialized
:
emb
,
loss
,
_
,
_
,
_
=
self
.
build_program
(
False
,
self
.
num_trainers
>
1
)
emb_name
=
emb
.
name
assert
self
.
_get_info
(
emb_name
)
is
None
self
.
_set_info
(
'emb_name'
,
emb
.
name
)
if
num_trainers
>
1
and
self
.
has_run_train
:
self
.
_append_broadcast_ops
(
self
.
test_program
)
if
num_trainers
>
1
and
not
self
.
has_run_train
:
worker_endpoints
=
os
.
getenv
(
"PADDLE_TRAINER_ENDPOINTS"
)
current_endpoint
=
os
.
getenv
(
"PADDLE_CURRENT_ENDPOINT"
)
config
=
dist_transpiler
.
DistributeTranspilerConfig
()
config
.
mode
=
"collective"
config
.
collective_mode
=
"grad_allreduce"
t
=
dist_transpiler
.
DistributeTranspiler
(
config
=
config
)
t
.
transpile
(
trainer_id
=
trainer_id
,
trainers
=
worker_endpoints
,
startup_program
=
self
.
startup_program
,
program
=
self
.
test_program
,
current_endpoint
=
current_endpoint
)
else
:
emb_name
=
self
.
_get_info
(
'emb_name'
)
gpu_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
0
))
place
=
fluid
.
CUDAPlace
(
gpu_id
)
exe
=
fluid
.
Executor
(
place
)
if
not
self
.
has_run_train
:
exe
.
run
(
self
.
startup_program
)
if
not
self
.
test_reader
:
test_reader
=
reader
.
test
else
:
test_reader
=
self
.
test_reader
if
not
self
.
test_initialized
:
test_list
,
test_name_list
=
test_reader
(
self
.
dataset_dir
,
self
.
val_targets
)
assert
self
.
_get_info
(
'test_list'
)
is
None
assert
self
.
_get_info
(
'test_name_list'
)
is
None
self
.
_set_info
(
'test_list'
,
test_list
)
self
.
_set_info
(
'test_name_list'
,
test_name_list
)
else
:
test_list
=
self
.
_get_info
(
'test_list'
)
test_name_list
=
self
.
_get_info
(
'test_name_list'
)
test_program
=
self
.
test_program
if
not
self
.
has_run_train
:
assert
self
.
checkpoint_dir
,
"No checkpoint found for test."
self
.
load_checkpoint
(
executor
=
exe
,
main_program
=
test_program
,
load_for_train
=
False
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
'image'
,
'label'
],
program
=
test_program
)
fetch_list
=
[
emb_name
]
self
.
test_initialized
=
True
test_start
=
time
.
time
()
self
.
_run_test
(
exe
,
test_list
,
test_name_list
,
feeder
,
fetch_list
)
test_end
=
time
.
time
()
test_end
=
time
.
time
()
print
(
"test time: {
}"
.
format
(
test_end
-
test_start
))
logger
.
info
(
"test time: {:.4f
}"
.
format
(
test_end
-
test_start
))
def
train
(
self
):
def
train
(
self
):
self
.
_check
()
self
.
_check
()
self
.
has_run_train
=
True
trainer_id
=
self
.
trainer_id
trainer_id
=
self
.
trainer_id
num_trainers
=
self
.
num_trainers
num_trainers
=
self
.
num_trainers
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
strategy
=
None
fleet
.
init
(
role
)
if
num_trainers
>
1
:
strategy
=
DistributedStrategy
()
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
strategy
.
mode
=
"collective"
fleet
.
init
(
role
)
strategy
.
collective_mode
=
"grad_allreduce"
strategy
=
DistributedStrategy
()
self
.
fleet
=
fleet
strategy
.
mode
=
"collective"
self
.
strategy
=
strategy
strategy
.
collective_mode
=
"grad_allreduce"
train_emb
,
train_loss
,
train_acc1
,
train_acc5
,
optimizer
=
\
emb
,
loss
,
acc1
,
acc5
,
optimizer
=
self
.
build_program
(
self
.
build_program
(
True
,
False
)
True
,
if
self
.
with_test
:
False
,
test_emb
,
test_loss
,
test_acc1
,
test_acc5
,
_
=
\
dist_strategy
=
strategy
)
self
.
build_program
(
False
,
self
.
num_trainers
>
1
)
test_list
,
test_name_list
=
reader
.
test
(
self
.
dataset_dir
,
self
.
val_targets
)
test_program
=
self
.
test_program
self
.
_append_broadcast_ops
(
test_program
)
global_lr
=
optimizer
.
_global_learning_rate
(
global_lr
=
optimizer
.
_global_learning_rate
(
program
=
self
.
train_program
)
program
=
self
.
train_program
)
if
num_trainers
>
1
:
origin_prog
=
fleet
.
_origin_program
train_prog
=
fleet
.
main_program
else
:
origin_prog
=
self
.
train_program
train_prog
=
self
.
train_program
origin_prog
=
fleet
.
_origin_program
train_prog
=
fleet
.
main_program
if
trainer_id
==
0
:
if
trainer_id
==
0
:
with
open
(
'start.program'
,
'w'
)
as
fout
:
with
open
(
'start.program'
,
'w'
)
as
fout
:
program_to_code
(
self
.
startup_program
,
fout
,
True
)
program_to_code
(
self
.
startup_program
,
fout
,
True
)
...
@@ -764,20 +900,12 @@ class Entry(object):
...
@@ -764,20 +900,12 @@ class Entry(object):
program_to_code
(
train_prog
,
fout
,
True
)
program_to_code
(
train_prog
,
fout
,
True
)
with
open
(
'origin.program'
,
'w'
)
as
fout
:
with
open
(
'origin.program'
,
'w'
)
as
fout
:
program_to_code
(
origin_prog
,
fout
,
True
)
program_to_code
(
origin_prog
,
fout
,
True
)
with
open
(
'test.program'
,
'w'
)
as
fout
:
program_to_code
(
test_program
,
fout
,
True
)
gpu_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
0
))
gpu_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
0
))
place
=
fluid
.
CUDAPlace
(
gpu_id
)
place
=
fluid
.
CUDAPlace
(
gpu_id
)
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
self
.
startup_program
)
exe
.
run
(
self
.
startup_program
)
if
self
.
with_test
:
test_feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
'image'
,
'label'
],
program
=
test_program
)
fetch_list_test
=
[
test_emb
.
name
]
real_test_batch_size
=
self
.
global_test_batch_size
if
self
.
checkpoint_dir
:
if
self
.
checkpoint_dir
:
load_checkpoint
=
True
load_checkpoint
=
True
else
:
else
:
...
@@ -793,31 +921,38 @@ class Entry(object):
...
@@ -793,31 +921,38 @@ class Entry(object):
train_reader
=
self
.
train_reader
train_reader
=
self
.
train_reader
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
'image'
,
'label'
],
program
=
origin_prog
)
feed_list
=
[
'image'
,
'label'
],
program
=
origin_prog
)
if
self
.
calc_train_acc
:
if
self
.
calc_train_acc
:
fetch_list
=
[
train_
loss
.
name
,
global_lr
.
name
,
fetch_list
=
[
loss
.
name
,
global_lr
.
name
,
train_acc1
.
name
,
train_
acc5
.
name
]
acc1
.
name
,
acc5
.
name
]
else
:
else
:
fetch_list
=
[
train_
loss
.
name
,
global_lr
.
name
]
fetch_list
=
[
loss
.
name
,
global_lr
.
name
]
local_time
=
0.0
local_time
=
0.0
nsamples
=
0
nsamples
=
0
inspect_steps
=
200
inspect_steps
=
200
global_batch_size
=
self
.
global_train_batch_size
global_batch_size
=
self
.
global_train_batch_size
for
pass_id
in
range
(
self
.
train_epochs
):
for
pass_id
in
range
(
self
.
train_epochs
):
self
.
train_pass_id
=
pass_id
train_info
=
[[],
[],
[],
[]]
train_info
=
[[],
[],
[],
[]]
local_train_info
=
[[],
[],
[],
[]]
local_train_info
=
[[],
[],
[],
[]]
for
batch_id
,
data
in
enumerate
(
train_reader
()):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
nsamples
+=
global_batch_size
nsamples
+=
global_batch_size
t1
=
time
.
time
()
t1
=
time
.
time
()
acc1
=
None
acc5
=
None
if
self
.
calc_train_acc
:
if
self
.
calc_train_acc
:
loss
,
lr
,
acc1
,
acc5
=
exe
.
run
(
train_prog
,
loss
,
lr
,
acc1
,
acc5
=
exe
.
run
(
train_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
fetch_list
,
feed
=
feeder
.
feed
(
data
),
use_program_cache
=
True
)
fetch_list
=
fetch_list
,
use_program_cache
=
True
)
else
:
else
:
loss
,
lr
=
exe
.
run
(
train_prog
,
feed
=
feeder
.
feed
(
data
),
loss
,
lr
=
exe
.
run
(
train_prog
,
fetch_list
=
fetch_list
,
use_program_cache
=
True
)
feed
=
feeder
.
feed
(
data
),
fetch_list
=
fetch_list
,
use_program_cache
=
True
)
t2
=
time
.
time
()
t2
=
time
.
time
()
period
=
t2
-
t1
period
=
t2
-
t1
local_time
+=
period
local_time
+=
period
...
@@ -828,83 +963,37 @@ class Entry(object):
...
@@ -828,83 +963,37 @@ class Entry(object):
if
batch_id
%
inspect_steps
==
0
:
if
batch_id
%
inspect_steps
==
0
:
avg_loss
=
np
.
mean
(
local_train_info
[
0
])
avg_loss
=
np
.
mean
(
local_train_info
[
0
])
avg_lr
=
np
.
mean
(
local_train_info
[
1
])
avg_lr
=
np
.
mean
(
local_train_info
[
1
])
speed
=
nsamples
/
local_time
if
self
.
calc_train_acc
:
if
self
.
calc_train_acc
:
logger
.
info
(
"Pass:%d batch:%d lr:%f loss:%f qps:%.2f "
logger
.
info
(
"Pass:{} batch:%d lr:{:.8f} loss:{:.6f} "
"acc1:%.4f acc5:%.4f"
%
(
pass_id
,
batch_id
,
avg_lr
,
"qps:{:.2f} acc1:{:.6f} acc5:{:.6f}"
.
format
(
avg_loss
,
nsamples
/
local_time
,
acc1
,
acc5
))
pass_id
,
batch_id
,
avg_lr
,
avg_loss
,
speed
,
acc1
,
acc5
))
else
:
else
:
logger
.
info
(
"Pass:%d batch:%d lr:%f loss:%f qps:%.2f"
%
(
logger
.
info
(
"Pass:{} batch:{} lr:{:.8f} loss:{:.6f} "
pass_id
,
batch_id
,
avg_lr
,
avg_loss
,
"qps:{:.2f}"
.
format
(
pass_id
,
nsamples
/
local_time
))
batch_id
,
avg_lr
,
avg_loss
,
speed
))
local_time
=
0
local_time
=
0
nsamples
=
0
nsamples
=
0
local_train_info
=
[[],
[],
[],
[]]
local_train_info
=
[[],
[],
[],
[]]
train_loss
=
np
.
array
(
train_info
[
0
]).
mean
()
train_loss
=
np
.
array
(
train_info
[
0
]).
mean
()
print
(
"End pass {0}, train_loss {1}"
.
format
(
pass_id
,
train_loss
))
logger
.
info
(
"End pass {}, train_loss {:.6f}"
.
format
(
pass_id
,
train_loss
))
sys
.
stdout
.
flush
()
sys
.
stdout
.
flush
()
if
self
.
with_test
:
if
self
.
with_test
:
test_start
=
time
.
time
()
self
.
test
()
for
i
in
xrange
(
len
(
test_list
)):
data_list
,
issame_list
=
test_list
[
i
]
# save model
embeddings_list
=
[]
for
j
in
xrange
(
len
(
data_list
)):
data
=
data_list
[
j
]
embeddings
=
None
parallel_test_steps
=
data
.
shape
[
0
]
//
real_test_batch_size
beg
=
0
end
=
0
for
idx
in
range
(
parallel_test_steps
):
start
=
idx
*
real_test_batch_size
offset
=
trainer_id
*
self
.
test_batch_size
begin
=
start
+
offset
end
=
begin
+
self
.
test_batch_size
_data
=
[]
for
k
in
xrange
(
begin
,
end
):
_data
.
append
((
data
[
k
],
0
))
assert
len
(
_data
)
==
self
.
test_batch_size
[
_embeddings
]
=
exe
.
run
(
test_program
,
fetch_list
=
fetch_list_test
,
feed
=
test_feeder
.
feed
(
_data
),
use_program_cache
=
True
)
if
embeddings
is
None
:
embeddings
=
np
.
zeros
((
data
.
shape
[
0
],
_embeddings
.
shape
[
1
]))
embeddings
[
start
:
start
+
real_test_batch_size
,
:]
=
_embeddings
[:,
:]
beg
=
parallel_test_steps
*
real_test_batch_size
while
beg
<
data
.
shape
[
0
]:
end
=
min
(
beg
+
self
.
test_batch_size
,
data
.
shape
[
0
])
count
=
end
-
beg
_data
=
[]
for
k
in
xrange
(
end
-
self
.
test_batch_size
,
end
):
_data
.
append
((
data
[
k
],
0
))
[
_embeddings
]
=
exe
.
run
(
test_program
,
fetch_list
=
fetch_list_test
,
feed
=
test_feeder
.
feed
(
_data
),
use_program_cache
=
True
)
_embeddings
=
_embeddings
[
0
:
self
.
test_batch_size
,:]
embeddings
[
beg
:
end
,
:]
=
_embeddings
[(
self
.
test_batch_size
-
count
):,
:]
beg
=
end
embeddings_list
.
append
(
embeddings
)
xnorm
=
0.0
xnorm_cnt
=
0
for
embed
in
embeddings_list
:
xnorm
+=
np
.
sqrt
((
embed
*
embed
).
sum
(
axis
=
1
)).
sum
(
axis
=
0
)
xnorm_cnt
+=
embed
.
shape
[
0
]
xnorm
/=
xnorm_cnt
embeddings
=
embeddings_list
[
0
]
+
embeddings_list
[
1
]
embeddings
=
sklearn
.
preprocessing
.
normalize
(
embeddings
)
_
,
_
,
accuracy
,
val
,
val_std
,
far
=
evaluate
(
embeddings
,
issame_list
,
nrof_folds
=
10
)
acc
,
std
=
np
.
mean
(
accuracy
),
np
.
std
(
accuracy
)
print
(
'[%s][%d]XNorm: %f'
%
(
test_name_list
[
i
],
pass_id
,
xnorm
))
print
(
'[%s][%d]Accuracy-Flip: %1.5f+-%1.5f'
%
(
test_name_list
[
i
],
pass_id
,
acc
,
std
))
sys
.
stdout
.
flush
()
test_end
=
time
.
time
()
print
(
"test time: {}"
.
format
(
test_end
-
test_start
))
#save model
if
self
.
model_save_dir
:
if
self
.
model_save_dir
:
model_save_dir
=
os
.
path
.
join
(
model_save_dir
=
os
.
path
.
join
(
self
.
model_save_dir
,
str
(
pass_id
))
self
.
model_save_dir
,
str
(
pass_id
))
...
@@ -919,27 +1008,30 @@ class Entry(object):
...
@@ -919,27 +1008,30 @@ class Entry(object):
pass
pass
if
trainer_id
==
0
:
if
trainer_id
==
0
:
fluid
.
io
.
save_persistables
(
exe
,
fluid
.
io
.
save_persistables
(
exe
,
model_save_dir
,
model_save_dir
,
origin_prog
)
origin_prog
)
else
:
else
:
def
save_var
(
var
):
def
save_var
(
var
):
to_save
=
"dist@"
in
var
.
name
and
'@rank@'
in
var
.
name
to_save
=
"dist@"
in
var
.
name
and
'@rank@'
in
var
.
name
return
to_save
and
var
.
persistable
return
to_save
and
var
.
persistable
fluid
.
io
.
save_vars
(
exe
,
model_save_dir
,
origin_prog
,
predicate
=
save_var
)
#save training info
fluid
.
io
.
save_vars
(
exe
,
model_save_dir
,
origin_prog
,
predicate
=
save_var
)
# save training info
if
self
.
model_save_dir
and
trainer_id
==
0
:
if
self
.
model_save_dir
and
trainer_id
==
0
:
config_file
=
os
.
path
.
join
(
config_file
=
os
.
path
.
join
(
self
.
model_save_dir
,
str
(
pass_id
),
'meta.
pickle
'
)
self
.
model_save_dir
,
str
(
pass_id
),
'meta.
json
'
)
train_info
=
dict
()
train_info
=
dict
()
train_info
[
"pretrain_nranks"
]
=
self
.
num_trainers
train_info
[
"pretrain_nranks"
]
=
self
.
num_trainers
train_info
[
"emb_dim"
]
=
self
.
emb_dim
train_info
[
"emb_dim"
]
=
self
.
emb_dim
train_info
[
'num_classes'
]
=
self
.
num_classes
train_info
[
'num_classes'
]
=
self
.
num_classes
with
open
(
config_file
,
'w
b
'
)
as
f
:
with
open
(
config_file
,
'w'
)
as
f
:
pickle
.
dump
(
train_info
,
f
)
json
.
dump
(
train_info
,
f
)
#upload model
#
upload model
if
self
.
model_save_dir
and
self
.
fs_name
and
trainer_id
==
0
:
if
self
.
model_save_dir
and
self
.
fs_name
and
trainer_id
==
0
:
self
.
put_files_to_hdfs
(
self
.
model_save_dir
)
self
.
put_files_to_hdfs
(
self
.
model_save_dir
)
...
...
plsc/models/__init__.py
浏览文件 @
c56ceffc
...
@@ -12,11 +12,14 @@
...
@@ -12,11 +12,14 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.
import
resnet
from
.resnet
import
*
from
.
import
base_model
from
.
import
base_model
from
.
import
dist_algo
from
.
import
resnet
from
.base_model
import
*
from
.base_model
import
*
from
.dist_algo
import
*
from
.resnet
import
*
__all__
=
[]
__all__
=
[]
__all__
+=
resnet
.
__all__
__all__
+=
resnet
.
__all__
__all__
+=
base_model
.
__all__
__all__
+=
base_model
.
__all__
__all__
+=
dist_algo
.
__all__
plsc/models/base_model.py
浏览文件 @
c56ceffc
...
@@ -13,14 +13,11 @@
...
@@ -13,14 +13,11 @@
# limitations under the License.
# limitations under the License.
import
math
import
math
import
os
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid
import
unique_name
from
paddle.fluid
import
unique_name
from
.
import
dist_algo
from
.
import
dist_algo
__all__
=
[
"BaseModel"
]
__all__
=
[
"BaseModel"
]
...
@@ -32,21 +29,24 @@ class BaseModel(object):
...
@@ -32,21 +29,24 @@ class BaseModel(object):
which constructs the custom model. And we will add the
which constructs the custom model. And we will add the
distributed fc layer for you automatically.
distributed fc layer for you automatically.
"""
"""
def
__init__
(
self
):
def
__init__
(
self
):
super
(
BaseModel
,
self
).
__init__
()
super
(
BaseModel
,
self
).
__init__
()
def
build_network
(
self
,
input
,
label
,
is_train
=
True
):
def
build_network
(
self
,
input
,
label
,
is_train
=
True
):
"""
"""
Construct the custom model, and we will add the
Construct the custom model, and we will add the
distributed fc layer
distributed fc layer for you
automatically.
at the end of your model
automatically.
"""
"""
raise
NotImplementedError
(
raise
NotImplementedError
(
"You must implement this method in your sub
class."
)
"You must implement this method in your subclass."
)
def
get_output
(
self
,
def
get_output
(
self
,
input
,
input
,
label
,
label
,
num_classes
,
num_classes
,
num_ranks
=
1
,
rank_id
=
0
,
is_train
=
True
,
is_train
=
True
,
param_attr
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
bias_attr
=
None
,
...
@@ -55,6 +55,20 @@ class BaseModel(object):
...
@@ -55,6 +55,20 @@ class BaseModel(object):
scale
=
64.0
):
scale
=
64.0
):
"""
"""
Add the distributed fc layer for the custom model.
Add the distributed fc layer for the custom model.
Params:
input: input for the model
label: label for the input
num_classes: number of classes for the classifier
num_ranks: number of trainers, i.e., GPUs
rank_id: id for the current trainer, from 0 to num_ranks - 1
is_train: build the network for training or not
param_attr: param_attr for the weight parameter of fc
bias_attr: bias_attr for the weight parameter for fc
loss_type: loss type to use, one of dist_softmax, softmax, arcface
and dist_arcface
margin: the margin parameter for arcface and dist_arcface
scale: the scale parameter for arcface and dist_arcface
"""
"""
supported_loss_types
=
[
"dist_softmax"
,
"dist_arcface"
,
supported_loss_types
=
[
"dist_softmax"
,
"dist_arcface"
,
"softmax"
,
"arcface"
]
"softmax"
,
"arcface"
]
...
@@ -62,67 +76,75 @@ class BaseModel(object):
...
@@ -62,67 +76,75 @@ class BaseModel(object):
"Supported loss types: {}, but given: {}"
.
format
(
"Supported loss types: {}, but given: {}"
.
format
(
supported_loss_types
,
loss_type
)
supported_loss_types
,
loss_type
)
nranks
=
int
(
os
.
getenv
(
"PADDLE_TRAINERS_NUM"
,
1
))
rank_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
0
))
emb
=
self
.
build_network
(
input
,
label
,
is_train
)
emb
=
self
.
build_network
(
input
,
label
,
is_train
)
prob
=
None
loss
=
None
if
loss_type
==
"softmax"
:
if
loss_type
==
"softmax"
:
loss
,
prob
=
self
.
fc_classify
(
emb
,
loss
,
prob
=
BaseModel
.
_
fc_classify
(
emb
,
label
,
label
,
num_classes
,
num_classes
,
param_attr
,
param_attr
,
bias_attr
)
bias_attr
)
elif
loss_type
==
"arcface"
:
elif
loss_type
==
"arcface"
:
loss
,
prob
=
self
.
arcface
(
emb
,
loss
,
prob
=
BaseModel
.
_
arcface
(
emb
,
label
,
label
,
num_classes
,
num_classes
,
param_attr
,
param_attr
,
margin
,
margin
,
scale
)
scale
)
elif
loss_type
==
"dist_arcface"
:
elif
loss_type
==
"dist_arcface"
:
loss
=
dist_algo
.
_distributed_arcface_classify
(
loss
=
dist_algo
.
distributed_arcface_classify
(
x
=
emb
,
x
=
emb
,
label
=
label
,
class_num
=
num_classes
,
label
=
label
,
nranks
=
nranks
,
rank_id
=
rank_id
,
margin
=
margin
,
class_num
=
num_classes
,
logit_scale
=
scale
,
param_attr
=
param_attr
)
nranks
=
num_ranks
,
prob
=
None
rank_id
=
rank_id
,
margin
=
margin
,
logit_scale
=
scale
,
param_attr
=
param_attr
)
elif
loss_type
==
"dist_softmax"
:
elif
loss_type
==
"dist_softmax"
:
loss
=
dist_algo
.
_distributed_softmax_classify
(
loss
=
dist_algo
.
distributed_softmax_classify
(
x
=
emb
,
x
=
emb
,
label
=
label
,
class_num
=
num_classes
,
label
=
label
,
nranks
=
nranks
,
rank_id
=
rank_id
,
param_attr
=
param_attr
,
class_num
=
num_classes
,
use_bias
=
True
,
bias_attr
=
bias_attr
)
nranks
=
num_ranks
,
prob
=
None
rank_id
=
rank_id
,
param_attr
=
param_attr
,
use_bias
=
True
,
bias_attr
=
bias_attr
)
return
emb
,
loss
,
prob
return
emb
,
loss
,
prob
def
fc_classify
(
self
,
input
,
label
,
out_dim
,
param_attr
,
bias_attr
):
@
staticmethod
def
_fc_classify
(
input
,
label
,
out_dim
,
param_attr
,
bias_attr
):
if
param_attr
is
None
:
if
param_attr
is
None
:
stdv
=
1.0
/
math
.
sqrt
(
input
.
shape
[
1
]
*
1.0
)
std
de
v
=
1.0
/
math
.
sqrt
(
input
.
shape
[
1
]
*
1.0
)
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
-
std
v
,
std
v
))
initializer
=
fluid
.
initializer
.
Uniform
(
-
std
dev
,
stdde
v
))
out
=
fluid
.
layers
.
fc
(
input
=
input
,
out
=
fluid
.
layers
.
fc
(
input
=
input
,
size
=
out_dim
,
size
=
out_dim
,
param_attr
=
param_attr
,
param_attr
=
param_attr
,
bias_attr
=
bias_attr
)
bias_attr
=
bias_attr
)
loss
,
prob
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
out
,
loss
,
prob
=
fluid
.
layers
.
softmax_with_cross_entropy
(
label
=
label
,
return_softmax
=
True
)
logits
=
out
,
label
=
label
,
return_softmax
=
True
)
avg_loss
=
fluid
.
layers
.
mean
(
x
=
loss
)
avg_loss
=
fluid
.
layers
.
mean
(
x
=
loss
)
return
avg_loss
,
prob
return
avg_loss
,
prob
def
arcface
(
self
,
input
,
label
,
out_dim
,
param_attr
,
margin
,
scale
):
@
staticmethod
def
_arcface
(
input
,
label
,
out_dim
,
param_attr
,
margin
,
scale
):
input_norm
=
fluid
.
layers
.
sqrt
(
input_norm
=
fluid
.
layers
.
sqrt
(
fluid
.
layers
.
reduce_sum
(
fluid
.
layers
.
square
(
input
),
dim
=
1
))
fluid
.
layers
.
reduce_sum
(
fluid
.
layers
.
square
(
input
),
dim
=
1
))
input
=
fluid
.
layers
.
elementwise_div
(
input
,
input_norm
,
axis
=
0
)
input
=
fluid
.
layers
.
elementwise_div
(
input
,
input_norm
,
axis
=
0
)
if
param_attr
is
None
:
if
param_attr
is
None
:
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Xavier
(
initializer
=
fluid
.
initializer
.
Xavier
(
uniform
=
False
,
fan_in
=
0.0
))
uniform
=
False
,
fan_in
=
0.0
))
weight
=
fluid
.
layers
.
create_parameter
(
weight
=
fluid
.
layers
.
create_parameter
(
shape
=
[
input
.
shape
[
1
],
out_dim
],
shape
=
[
input
.
shape
[
1
],
out_dim
],
dtype
=
'float32'
,
dtype
=
'float32'
,
name
=
unique_name
.
generate
(
'final_fc_w'
),
name
=
unique_name
.
generate
(
'final_fc_w'
),
attr
=
param_attr
)
attr
=
param_attr
)
weight_norm
=
fluid
.
layers
.
sqrt
(
weight_norm
=
fluid
.
layers
.
sqrt
(
fluid
.
layers
.
reduce_sum
(
fluid
.
layers
.
square
(
weight
),
dim
=
0
))
fluid
.
layers
.
reduce_sum
(
fluid
.
layers
.
square
(
weight
),
dim
=
0
))
...
@@ -137,10 +159,11 @@ class BaseModel(object):
...
@@ -137,10 +159,11 @@ class BaseModel(object):
logit
=
fluid
.
layers
.
scale
(
target_cos
,
scale
=
scale
)
logit
=
fluid
.
layers
.
scale
(
target_cos
,
scale
=
scale
)
loss
,
prob
=
fluid
.
layers
.
softmax_with_cross_entropy
(
loss
,
prob
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
logit
,
label
=
label
,
return_softmax
=
True
)
logits
=
logit
,
label
=
label
,
return_softmax
=
True
)
avg_loss
=
fluid
.
layers
.
mean
(
x
=
loss
)
avg_loss
=
fluid
.
layers
.
mean
(
x
=
loss
)
one_hot
.
stop_gradient
=
True
one_hot
.
stop_gradient
=
True
return
avg_loss
,
prob
return
avg_loss
,
prob
plsc/models/dist_algo.py
浏览文件 @
c56ceffc
...
@@ -12,32 +12,38 @@
...
@@ -12,32 +12,38 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
import
math
import
math
import
logging
import
logging
from
six.moves
import
reduce
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.layer_helper
import
LayerHelper
import
paddle.fluid.layers.collective
as
collective
from
paddle.fluid.framework
import
Variable
,
default_startup_program
from
paddle.fluid.param_attr
import
ParamAttr
from
paddle.fluid.initializer
import
Normal
,
Constant
import
paddle.fluid.layers.nn
as
nn
import
paddle.fluid.layers.nn
as
nn
import
paddle.fluid.layers.ops
as
ops
import
paddle.fluid.layers.ops
as
ops
import
paddle.fluid.layers
as
layers
import
paddle.fluid.layers
as
layers
import
paddle.fluid.layers.collective
as
collective
from
paddle.fluid.optimizer
import
Optimizer
from
paddle.fluid.optimizer
import
Optimizer
import
paddle.fluid.unique_name
as
unique_name
import
paddle.fluid.unique_name
as
unique_name
from
paddle.fluid.framework
import
Variable
,
default_startup_program
from
paddle.fluid.initializer
import
Normal
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.optimizer
import
Optimizer
from
paddle.fluid.param_attr
import
ParamAttr
from
..utils.fp16_utils
import
rewrite_program
,
update_role_var_grad
from
..utils.fp16_utils
import
rewrite_program
,
update_role_var_grad
from
..utils.fp16_utils
import
update_loss_scaling
,
move_optimize_ops_back
from
..utils.fp16_utils
import
update_loss_scaling
,
move_optimize_ops_back
from
..utils.fp16_lists
import
AutoMixedPrecisionLists
from
..utils.fp16_lists
import
AutoMixedPrecisionLists
from
six.moves
import
reduce
__all__
=
[
'distributed_arcface_classify'
,
'distributed_softmax_classify'
,
'DistributedClassificationOptimizer'
]
class
DistributedClassificationOptimizer
(
Optimizer
):
class
DistributedClassificationOptimizer
(
Optimizer
):
'''
"""
A optimizer wrapper to generate backward network for distributed
A
n
optimizer wrapper to generate backward network for distributed
classification training of model parallelism.
classification training of model parallelism.
'''
"""
def
init_fp16_params
(
self
,
loss_type
,
fp16_user_dict
):
def
init_fp16_params
(
self
,
loss_type
,
fp16_user_dict
):
# set default value for fp16_params_dict
# set default value for fp16_params_dict
fp16_params_dict
=
dict
()
fp16_params_dict
=
dict
()
...
@@ -261,15 +267,15 @@ class DistributedClassificationOptimizer(Optimizer):
...
@@ -261,15 +267,15 @@ class DistributedClassificationOptimizer(Optimizer):
})
})
def
insert_commom_backward_op
(
self
,
def
insert_commom_backward_op
(
self
,
block
,
block
,
index
,
index
,
shard_logit
,
shard_logit
,
shard_prob
,
shard_prob
,
shard_label
,
shard_label
,
shard_dim
,
shard_dim
,
op_role_key
,
op_role_key
,
backward_role
,
backward_role
,
loss_backward_role
):
loss_backward_role
):
'''
'''
insert backward ops when not using mixed precision training.
insert backward ops when not using mixed precision training.
common use in all lose type.
common use in all lose type.
...
@@ -421,10 +427,10 @@ class DistributedClassificationOptimizer(Optimizer):
...
@@ -421,10 +427,10 @@ class DistributedClassificationOptimizer(Optimizer):
class
DistributedClassifier
(
object
):
class
DistributedClassifier
(
object
):
'''
"""
Tookit for distributed classification, in which the parameter of the last
Tookit for distributed classification, in which the parameter of the last
full-connected layer is distributed to all trainers
full-connected layer is distributed to all trainers
'''
"""
def
__init__
(
self
,
nclasses
,
nranks
,
rank_id
,
layer_helper
):
def
__init__
(
self
,
nclasses
,
nranks
,
rank_id
,
layer_helper
):
self
.
nclasses
=
nclasses
self
.
nclasses
=
nclasses
...
@@ -446,29 +452,29 @@ class DistributedClassifier(object):
...
@@ -446,29 +452,29 @@ class DistributedClassifier(object):
dtype
,
dtype
,
in_dim
,
in_dim
,
param_attr
=
None
,
param_attr
=
None
,
use_bias
=
True
,
bias_attr
=
None
,
bias_attr
=
None
,
transpose_weight
=
False
,
transpose_weight
=
False
):
use_bias
=
True
):
if
param_attr
is
None
:
if
param_attr
is
None
:
stdv
=
math
.
sqrt
(
2.0
/
(
in_dim
+
self
.
nclasses
))
std
de
v
=
math
.
sqrt
(
2.0
/
(
in_dim
+
self
.
nclasses
))
param_attr
=
ParamAttr
(
initializer
=
Normal
(
scale
=
stdv
))
param_attr
=
ParamAttr
(
initializer
=
Normal
(
scale
=
std
de
v
))
weight_shape
=
[
self
.
shard_dim
,
in_dim
weight_shape
=
[
self
.
shard_dim
,
in_dim
]
if
transpose_weight
else
[
in_dim
,
self
.
shard_dim
]
]
if
transpose_weight
else
[
in_dim
,
self
.
shard_dim
]
weight
=
self
.
_layer_helper
.
create_parameter
(
weight
=
self
.
_layer_helper
.
create_parameter
(
shape
=
weight_shape
,
dtype
=
dtype
,
attr
=
param_attr
,
is_bias
=
False
)
shape
=
weight_shape
,
dtype
=
dtype
,
attr
=
param_attr
,
is_bias
=
False
)
# avoid distributed parameter allreduce gradients
# avoid allreducing gradients for distributed parameters
weight
.
is_distributed
=
True
weight
.
is_distributed
=
True
# avoid
distributed parameter broadcasting
in startup program
# avoid
broadcasting distributed parameters
in startup program
default_startup_program
().
global_block
().
vars
[
default_startup_program
().
global_block
().
vars
[
weight
.
name
].
is_distributed
=
True
weight
.
name
].
is_distributed
=
True
bias
=
None
bias
=
None
if
use_bias
:
if
use_bias
:
bias
=
self
.
_layer_helper
.
create_parameter
(
bias
=
self
.
_layer_helper
.
create_parameter
(
shape
=
[
self
.
shard_dim
],
shape
=
[
self
.
shard_dim
],
attr
=
bias_attr
,
attr
=
bias_attr
,
dtype
=
dtype
,
dtype
=
dtype
,
is_bias
=
True
)
is_bias
=
True
)
bias
.
is_distributed
=
True
bias
.
is_distributed
=
True
default_startup_program
().
global_block
().
vars
[
default_startup_program
().
global_block
().
vars
[
bias
.
name
].
is_distributed
=
True
bias
.
name
].
is_distributed
=
True
...
@@ -505,12 +511,11 @@ class DistributedClassifier(object):
...
@@ -505,12 +511,11 @@ class DistributedClassifier(object):
use_bias
=
True
,
use_bias
=
True
,
bias_attr
=
None
):
bias_attr
=
None
):
flatten_dim
=
reduce
(
lambda
a
,
b
:
a
*
b
,
x
.
shape
[
1
:],
1
)
flatten_dim
=
reduce
(
lambda
a
,
b
:
a
*
b
,
x
.
shape
[
1
:],
1
)
weight
,
bias
=
self
.
create_parameter
(
weight
,
bias
=
self
.
create_parameter
(
dtype
=
x
.
dtype
,
dtype
=
x
.
dtype
,
in_dim
=
flatten_dim
,
in_dim
=
flatten_dim
,
param_attr
=
param_attr
,
param_attr
=
param_attr
,
bias_attr
=
bias_attr
,
bias_attr
=
bias_attr
,
use_bias
=
use_bias
)
use_bias
=
use_bias
)
x_all
=
collective
.
_c_allgather
(
x_all
=
collective
.
_c_allgather
(
x
,
nranks
=
self
.
nranks
,
use_calc_stream
=
True
)
x
,
nranks
=
self
.
nranks
,
use_calc_stream
=
True
)
...
@@ -551,11 +556,10 @@ class DistributedClassifier(object):
...
@@ -551,11 +556,10 @@ class DistributedClassifier(object):
reference: ArcFace. https://arxiv.org/abs/1801.07698
reference: ArcFace. https://arxiv.org/abs/1801.07698
'''
'''
flatten_dim
=
reduce
(
lambda
a
,
b
:
a
*
b
,
x
.
shape
[
1
:],
1
)
flatten_dim
=
reduce
(
lambda
a
,
b
:
a
*
b
,
x
.
shape
[
1
:],
1
)
weight
,
bias
=
self
.
create_parameter
(
weight
,
bias
=
self
.
create_parameter
(
dtype
=
x
.
dtype
,
dtype
=
x
.
dtype
,
in_dim
=
flatten_dim
,
in_dim
=
flatten_dim
,
param_attr
=
param_attr
,
param_attr
=
param_attr
,
use_bias
=
False
)
use_bias
=
False
)
# normalize x
# normalize x
x_l2
=
ops
.
sqrt
(
nn
.
reduce_sum
(
ops
.
square
(
x
),
dim
=
1
))
x_l2
=
ops
.
sqrt
(
nn
.
reduce_sum
(
ops
.
square
(
x
),
dim
=
1
))
...
@@ -566,12 +570,11 @@ class DistributedClassifier(object):
...
@@ -566,12 +570,11 @@ class DistributedClassifier(object):
label_all
=
collective
.
_c_allgather
(
label_all
=
collective
.
_c_allgather
(
label
,
nranks
=
self
.
nranks
,
use_calc_stream
=
True
)
label
,
nranks
=
self
.
nranks
,
use_calc_stream
=
True
)
label_all
.
stop_gradient
=
True
label_all
.
stop_gradient
=
True
shard_label
=
nn
.
shard_index
(
shard_label
=
nn
.
shard_index
(
label_all
,
label_all
,
index_num
=
self
.
nclasses
,
index_num
=
self
.
nclasses
,
nshards
=
self
.
nranks
,
nshards
=
self
.
nranks
,
shard_id
=
self
.
rank_id
,
shard_id
=
self
.
rank_id
,
ignore_value
=-
1
)
ignore_value
=-
1
)
# TODO check necessary
# TODO check necessary
shard_label
.
stop_gradient
=
True
shard_label
.
stop_gradient
=
True
...
@@ -605,16 +608,16 @@ class DistributedClassifier(object):
...
@@ -605,16 +608,16 @@ class DistributedClassifier(object):
return
avg_loss
return
avg_loss
def
_
distributed_softmax_classify
(
x
,
def
distributed_softmax_classify
(
x
,
label
,
label
,
class_num
,
class_num
,
nranks
,
nranks
,
rank_id
,
rank_id
,
param_attr
=
None
,
param_attr
=
None
,
use_bias
=
True
,
use_bias
=
True
,
bias_attr
=
None
,
bias_attr
=
None
,
name
=
None
):
name
=
None
):
'''
"""
Classification layer with FC, softmax and cross entropy calculation of
Classification layer with FC, softmax and cross entropy calculation of
distibuted version in case of too large number of classes.
distibuted version in case of too large number of classes.
...
@@ -652,26 +655,29 @@ def _distributed_softmax_classify(x,
...
@@ -652,26 +655,29 @@ def _distributed_softmax_classify(x,
class_num=1000,
class_num=1000,
nranks=8,
nranks=8,
rank_id=0)
rank_id=0)
'''
"""
if
name
is
None
:
if
name
is
None
:
name
=
'dist@softmax@rank@%05d'
%
rank_id
name
=
'dist@softmax@rank@%05d'
%
rank_id
helper
=
LayerHelper
(
name
,
**
locals
())
helper
=
LayerHelper
(
name
,
**
locals
())
classifier
=
DistributedClassifier
(
class_num
,
nranks
,
rank_id
,
helper
)
classifier
=
DistributedClassifier
(
class_num
,
nranks
,
rank_id
,
helper
)
return
classifier
.
softmax_classify
(
x
,
label
,
param_attr
,
use_bias
,
return
classifier
.
softmax_classify
(
x
,
label
,
param_attr
,
use_bias
,
bias_attr
)
bias_attr
)
def
_
distributed_arcface_classify
(
x
,
def
distributed_arcface_classify
(
x
,
label
,
label
,
class_num
,
class_num
,
nranks
,
nranks
,
rank_id
,
rank_id
,
margin
=
0.5
,
margin
=
0.5
,
logit_scale
=
64.0
,
logit_scale
=
64.0
,
param_attr
=
None
,
param_attr
=
None
,
name
=
None
):
name
=
None
):
'''
"""
Classification layer with ArcFace loss of distibuted version in case of
Classification layer with ArcFace loss of distibuted version in case of
too large number of classes. the equation is
too large number of classes. the equation is
...
@@ -719,14 +725,13 @@ def _distributed_arcface_classify(x,
...
@@ -719,14 +725,13 @@ def _distributed_arcface_classify(x,
class_num=1000,
class_num=1000,
nranks=8,
nranks=8,
rank_id=0)
rank_id=0)
'''
"""
if
name
is
None
:
if
name
is
None
:
name
=
'dist@arcface@rank@%05d'
%
rank_id
name
=
'dist@arcface@rank@%05d'
%
rank_id
helper
=
LayerHelper
(
name
,
**
locals
())
helper
=
LayerHelper
(
name
,
**
locals
())
classifier
=
DistributedClassifier
(
class_num
,
nranks
,
rank_id
,
helper
)
classifier
=
DistributedClassifier
(
class_num
,
nranks
,
rank_id
,
helper
)
return
classifier
.
arcface_classify
(
return
classifier
.
arcface_classify
(
x
=
x
,
x
=
x
,
label
=
label
,
label
=
label
,
margin
=
margin
,
margin
=
margin
,
logit_scale
=
logit_scale
,
logit_scale
=
logit_scale
,
param_attr
=
param_attr
)
param_attr
=
param_attr
)
plsc/models/resnet.py
浏览文件 @
c56ceffc
...
@@ -12,14 +12,9 @@
...
@@ -12,14 +12,9 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
math
import
os
import
numpy
as
np
from
paddle.fluid
import
unique_name
from
.base_model
import
BaseModel
from
.base_model
import
BaseModel
__all__
=
[
"ResNet"
,
"ResNet50"
,
"ResNet101"
,
"ResNet152"
]
__all__
=
[
"ResNet"
,
"ResNet50"
,
"ResNet101"
,
"ResNet152"
]
...
@@ -33,12 +28,13 @@ class ResNet(BaseModel):
...
@@ -33,12 +28,13 @@ class ResNet(BaseModel):
def
build_network
(
self
,
def
build_network
(
self
,
input
,
input
,
label
,
label
,
is_train
):
is_train
=
True
):
layers
=
self
.
layers
layers
=
self
.
layers
supported_layers
=
[
50
,
101
,
152
]
supported_layers
=
[
50
,
101
,
152
]
assert
layers
in
supported_layers
,
\
assert
layers
in
supported_layers
,
\
"supported layers {}, but given {}"
.
format
(
supported_layers
,
layers
)
"supported layers {}, but given {}"
.
format
(
supported_layers
,
layers
)
depth
=
None
if
layers
==
50
:
if
layers
==
50
:
depth
=
[
3
,
4
,
14
,
3
]
depth
=
[
3
,
4
,
14
,
3
]
elif
layers
==
101
:
elif
layers
==
101
:
...
@@ -59,21 +55,26 @@ class ResNet(BaseModel):
...
@@ -59,21 +55,26 @@ class ResNet(BaseModel):
stride
=
2
if
i
==
0
else
1
,
stride
=
2
if
i
==
0
else
1
,
is_train
=
is_train
)
is_train
=
is_train
)
bn
=
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
None
,
epsilon
=
2e-05
,
bn
=
fluid
.
layers
.
batch_norm
(
input
=
conv
,
is_test
=
False
if
is_train
else
True
)
act
=
None
,
drop
=
fluid
.
layers
.
dropout
(
x
=
bn
,
dropout_prob
=
0.4
,
epsilon
=
2e-05
,
dropout_implementation
=
'upscale_in_train'
,
is_test
=
False
if
is_train
else
True
)
is_test
=
False
if
is_train
else
True
)
drop
=
fluid
.
layers
.
dropout
(
x
=
bn
,
dropout_prob
=
0.4
,
dropout_implementation
=
'upscale_in_train'
,
is_test
=
False
if
is_train
else
True
)
fc
=
fluid
.
layers
.
fc
(
fc
=
fluid
.
layers
.
fc
(
input
=
drop
,
input
=
drop
,
size
=
self
.
emb_dim
,
size
=
self
.
emb_dim
,
act
=
None
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Xavier
(
uniform
=
False
,
fan_in
=
0.0
)),
initializer
=
fluid
.
initializer
.
Xavier
(
uniform
=
False
,
fan_in
=
0.0
)),
bias_attr
=
fluid
.
param_attr
.
ParamAttr
(
bias_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
ConstantInitializer
()))
initializer
=
fluid
.
initializer
.
ConstantInitializer
()))
emb
=
fluid
.
layers
.
batch_norm
(
input
=
fc
,
act
=
None
,
epsilon
=
2e-05
,
emb
=
fluid
.
layers
.
batch_norm
(
input
=
fc
,
is_test
=
False
if
is_train
else
True
)
act
=
None
,
epsilon
=
2e-05
,
is_test
=
False
if
is_train
else
True
)
return
emb
return
emb
def
conv_bn_layer
(
self
,
def
conv_bn_layer
(
self
,
...
@@ -92,51 +93,79 @@ class ResNet(BaseModel):
...
@@ -92,51 +93,79 @@ class ResNet(BaseModel):
stride
=
stride
,
stride
=
stride
,
padding
=
pad
,
padding
=
pad
,
groups
=
groups
,
groups
=
groups
,
act
=
None
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Xavier
(
initializer
=
fluid
.
initializer
.
Xavier
(
uniform
=
False
,
fan_in
=
0.0
)),
uniform
=
False
,
fan_in
=
0.0
)),
bias_attr
=
False
)
bias_attr
=
False
)
if
act
==
'prelu'
:
if
act
==
'prelu'
:
bn
=
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
None
,
epsilon
=
2e-05
,
bn
=
fluid
.
layers
.
batch_norm
(
input
=
conv
,
momentum
=
0.9
,
is_test
=
False
if
is_train
else
True
)
act
=
None
,
return
fluid
.
layers
.
prelu
(
bn
,
mode
=
"all"
,
epsilon
=
2e-05
,
momentum
=
0.9
,
is_test
=
False
if
is_train
else
True
)
return
fluid
.
layers
.
prelu
(
bn
,
mode
=
"all"
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
0.25
)))
initializer
=
fluid
.
initializer
.
Constant
(
0.25
)))
else
:
else
:
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
epsilon
=
2e-05
,
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
is_test
=
False
if
is_train
else
True
)
act
=
act
,
epsilon
=
2e-05
,
is_test
=
False
if
is_train
else
True
)
def
shortcut
(
self
,
input
,
ch_out
,
stride
,
is_train
):
def
shortcut
(
self
,
input
,
ch_out
,
stride
,
is_train
):
ch_in
=
input
.
shape
[
1
]
ch_in
=
input
.
shape
[
1
]
if
ch_in
!=
ch_out
or
stride
!=
1
:
if
ch_in
!=
ch_out
or
stride
!=
1
:
return
self
.
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
return
self
.
conv_bn_layer
(
input
,
is_train
=
is_train
)
ch_out
,
1
,
stride
,
is_train
=
is_train
)
else
:
else
:
return
input
return
input
def
bottleneck_block
(
self
,
input
,
num_filters
,
stride
,
is_train
):
def
bottleneck_block
(
self
,
input
,
num_filters
,
stride
,
is_train
):
if
self
.
layers
<
101
:
if
self
.
layers
<
101
:
bn1
=
fluid
.
layers
.
batch_norm
(
input
=
input
,
act
=
None
,
epsilon
=
2e-05
,
bn1
=
fluid
.
layers
.
batch_norm
(
input
=
input
,
is_test
=
False
if
is_train
else
True
)
act
=
None
,
conv1
=
self
.
conv_bn_layer
(
epsilon
=
2e-05
,
input
=
bn1
,
num_filters
=
num_filters
,
filter_size
=
3
,
pad
=
1
,
is_test
=
False
if
is_train
else
True
)
act
=
'prelu'
,
is_train
=
is_train
)
conv1
=
self
.
conv_bn_layer
(
input
=
bn1
,
conv2
=
self
.
conv_bn_layer
(
num_filters
=
num_filters
,
input
=
conv1
,
num_filters
=
num_filters
,
filter_size
=
3
,
filter_size
=
3
,
stride
=
stride
,
pad
=
1
,
act
=
None
,
is_train
=
is_train
)
pad
=
1
,
act
=
'prelu'
,
is_train
=
is_train
)
conv2
=
self
.
conv_bn_layer
(
input
=
conv1
,
num_filters
=
num_filters
,
filter_size
=
3
,
stride
=
stride
,
pad
=
1
,
is_train
=
is_train
)
else
:
else
:
bn0
=
fluid
.
layers
.
batch_norm
(
input
=
input
,
act
=
None
,
epsilon
=
2e-05
,
bn0
=
fluid
.
layers
.
batch_norm
(
input
=
input
,
is_test
=
False
if
is_train
else
True
)
act
=
None
,
conv0
=
self
.
conv_bn_layer
(
epsilon
=
2e-05
,
input
=
bn0
,
num_filters
=
num_filters
/
4
,
filter_size
=
1
,
pad
=
0
,
is_test
=
False
if
is_train
else
True
)
act
=
'prelu'
,
is_train
=
is_train
)
conv0
=
self
.
conv_bn_layer
(
input
=
bn0
,
conv1
=
self
.
conv_bn_layer
(
num_filters
=
num_filters
/
4
,
input
=
conv0
,
num_filters
=
num_filters
/
4
,
filter_size
=
3
,
pad
=
1
,
filter_size
=
1
,
act
=
'prelu'
,
is_train
=
is_train
)
pad
=
0
,
conv2
=
self
.
conv_bn_layer
(
act
=
'prelu'
,
input
=
conv1
,
num_filters
=
num_filters
,
filter_size
=
1
,
is_train
=
is_train
)
stride
=
stride
,
pad
=
0
,
act
=
None
,
is_train
=
is_train
)
conv1
=
self
.
conv_bn_layer
(
input
=
conv0
,
num_filters
=
num_filters
/
4
,
filter_size
=
3
,
pad
=
1
,
act
=
'prelu'
,
is_train
=
is_train
)
conv2
=
self
.
conv_bn_layer
(
input
=
conv1
,
num_filters
=
num_filters
,
filter_size
=
1
,
stride
=
stride
,
pad
=
0
,
is_train
=
is_train
)
short
=
self
.
shortcut
(
input
,
num_filters
,
stride
,
is_train
=
is_train
)
short
=
self
.
shortcut
(
input
,
num_filters
,
stride
,
is_train
=
is_train
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv2
,
act
=
None
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv2
,
act
=
None
)
...
...
plsc/utils/__init__.py
浏览文件 @
c56ceffc
...
@@ -11,4 +11,3 @@
...
@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
plsc/utils/base64_reader.py
浏览文件 @
c56ceffc
import
os
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
import
math
#
import
random
# Licensed under the Apache License, Version 2.0 (the "License");
import
pickle
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
base64
import
base64
import
functools
import
functools
import
math
import
os
import
pickle
import
random
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
import
six
from
PIL
import
Image
,
ImageEnhance
from
PIL
import
Image
,
ImageEnhance
try
:
try
:
from
StringIO
import
StringIO
from
StringIO
import
StringIO
except
ImportError
:
except
ImportError
:
from
io
import
StringIO
from
io
import
StringIO
from
io
import
BytesIO
random
.
seed
(
0
)
random
.
seed
(
0
)
...
@@ -18,7 +36,6 @@ DATA_DIM = 112
...
@@ -18,7 +36,6 @@ DATA_DIM = 112
THREAD
=
8
THREAD
=
8
BUF_SIZE
=
10240
BUF_SIZE
=
10240
img_mean
=
np
.
array
([
127.5
,
127.5
,
127.5
]).
reshape
((
3
,
1
,
1
))
img_mean
=
np
.
array
([
127.5
,
127.5
,
127.5
]).
reshape
((
3
,
1
,
1
))
img_std
=
np
.
array
([
128.0
,
128.0
,
128.0
]).
reshape
((
3
,
1
,
1
))
img_std
=
np
.
array
([
128.0
,
128.0
,
128.0
]).
reshape
((
3
,
1
,
1
))
...
@@ -97,13 +114,13 @@ def RandomResizedCrop(img, size):
...
@@ -97,13 +114,13 @@ def RandomResizedCrop(img, size):
return
img
return
img
def
random_crop
(
img
,
size
,
scale
=
[
0.08
,
1.0
],
ratio
=
[
3.
/
4.
,
4.
/
3.
]
):
def
random_crop
(
img
,
size
,
scale
=
(
0.08
,
1.0
),
ratio
=
(
3.
/
4.
,
4.
/
3.
)
):
aspect_ratio
=
math
.
sqrt
(
random
.
uniform
(
*
ratio
))
aspect_ratio
=
math
.
sqrt
(
random
.
uniform
(
*
ratio
))
w
=
1.
*
aspect_ratio
w
=
1.
*
aspect_ratio
h
=
1.
/
aspect_ratio
h
=
1.
/
aspect_ratio
bound
=
min
((
float
(
img
.
size
[
0
])
/
img
.
size
[
1
])
/
(
w
**
2
),
bound
=
min
((
float
(
img
.
size
[
0
])
/
img
.
size
[
1
])
/
(
w
**
2
),
(
float
(
img
.
size
[
1
])
/
img
.
size
[
0
])
/
(
h
**
2
))
(
float
(
img
.
size
[
1
])
/
img
.
size
[
0
])
/
(
h
**
2
))
scale_max
=
min
(
scale
[
1
],
bound
)
scale_max
=
min
(
scale
[
1
],
bound
)
scale_min
=
min
(
scale
[
0
],
bound
)
scale_min
=
min
(
scale
[
0
],
bound
)
...
@@ -150,12 +167,12 @@ def distort_color(img):
...
@@ -150,12 +167,12 @@ def distort_color(img):
return
img
return
img
def
process_image
_imagepath
(
sample
,
def
process_image
(
sample
,
class_dim
,
class_dim
,
color_jitter
,
color_jitter
,
rotate
,
rotate
,
rand_mirror
,
rand_mirror
,
normalize
):
normalize
):
img_data
=
base64
.
b64decode
(
sample
[
0
])
img_data
=
base64
.
b64decode
(
sample
[
0
])
img
=
Image
.
open
(
StringIO
(
img_data
))
img
=
Image
.
open
(
StringIO
(
img_data
))
...
@@ -185,49 +202,62 @@ def process_image_imagepath(sample,
...
@@ -185,49 +202,62 @@ def process_image_imagepath(sample,
return
img
,
sample
[
1
]
return
img
,
sample
[
1
]
def
arc_iterator
(
file_list
,
def
arc_iterator
(
data_dir
,
file_list
,
class_dim
,
class_dim
,
color_jitter
=
False
,
color_jitter
=
False
,
rotate
=
False
,
rotate
=
False
,
rand_mirror
=
False
,
rand_mirror
=
False
,
normalize
=
False
):
normalize
=
False
):
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
"0"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
"0"
))
trainer_count
=
int
(
os
.
getenv
(
"PADDLE_TRAINERS_NUM"
,
"1"
))
num_trainers
=
int
(
os
.
getenv
(
"PADDLE_TRAINERS_NUM"
,
"1"
))
def
reader
():
def
reader
():
with
open
(
file_list
,
'r'
)
as
f
:
with
open
(
file_list
,
'r'
)
as
f
:
flist
=
f
.
readlines
()
flist
=
f
.
readlines
()
assert
len
(
flist
)
%
trainer_count
==
0
,
\
assert
len
(
flist
)
==
num_trainers
,
\
"Number of files should be divisible by trainer count, "
\
"Please use process_base64_files.py to pre-process the dataset."
"run base64 file preprocessing tool first."
file
=
flist
[
trainer_id
]
num_files_per_trainer
=
len
(
flist
)
//
trainer_count
file
=
os
.
path
.
join
(
data_dir
,
file
)
start
=
num_files_per_trainer
*
trainer_id
end
=
start
+
num_files_per_trainer
with
open
(
file
,
'r'
)
as
f
:
flist
=
flist
[
start
:
end
]
if
six
.
PY2
:
for
line
in
f
.
xreadlines
():
for
file
in
flist
:
line
=
line
.
strip
().
split
(
'
\t
'
)
with
open
(
file
,
'r'
)
as
f
:
image
,
label
=
line
[
0
],
line
[
1
]
for
line
in
f
.
xreadlines
():
yield
image
,
label
line
=
line
.
strip
().
split
(
'
\t
'
)
else
:
image
,
label
=
line
[
0
],
line
[
1
]
for
line
in
f
:
yield
image
,
label
line
=
line
.
strip
().
split
(
'
\t
'
)
image
,
label
=
line
[
0
],
line
[
1
]
mapper
=
functools
.
partial
(
process_image_imagepath
,
yield
image
,
label
class_dim
=
class_dim
,
color_jitter
=
color_jitter
,
rotate
=
rotate
,
rand_mirror
=
rand_mirror
,
normalize
=
normalize
)
mapper
=
functools
.
partial
(
process_image
,
class_dim
=
class_dim
,
color_jitter
=
color_jitter
,
rotate
=
rotate
,
rand_mirror
=
rand_mirror
,
normalize
=
normalize
)
return
paddle
.
reader
.
xmap_readers
(
mapper
,
reader
,
THREAD
,
BUF_SIZE
)
return
paddle
.
reader
.
xmap_readers
(
mapper
,
reader
,
THREAD
,
BUF_SIZE
)
def
load_bin
(
path
,
image_size
):
def
load_bin
(
path
,
image_size
):
bins
,
issame_list
=
pickle
.
load
(
open
(
path
,
'rb'
))
if
six
.
PY2
:
bins
,
issame_list
=
pickle
.
load
(
open
(
path
,
'rb'
))
else
:
bins
,
issame_list
=
pickle
.
load
(
open
(
path
,
'rb'
),
encoding
=
'bytes'
)
data_list
=
[]
data_list
=
[]
for
flip
in
[
0
,
1
]:
for
flip
in
[
0
,
1
]:
data
=
np
.
empty
((
len
(
issame_list
)
*
2
,
3
,
image_size
[
0
],
image_size
[
1
]))
data
=
np
.
empty
((
len
(
issame_list
)
*
2
,
3
,
image_size
[
0
],
image_size
[
1
]))
data_list
.
append
(
data
)
data_list
.
append
(
data
)
for
i
in
xrange
(
len
(
issame_list
)
*
2
):
for
i
in
range
(
len
(
issame_list
)
*
2
):
_bin
=
bins
[
i
]
_bin
=
bins
[
i
]
if
not
isinstance
(
_bin
,
basestring
):
if
six
.
PY2
:
_bin
=
_bin
.
tostring
()
if
not
isinstance
(
_bin
,
six
.
string_types
):
img_ori
=
Image
.
open
(
StringIO
(
_bin
))
_bin
=
_bin
.
tostring
()
img_ori
=
Image
.
open
(
StringIO
(
_bin
))
else
:
img_ori
=
Image
.
open
(
BytesIO
(
_bin
))
for
flip
in
[
0
,
1
]:
for
flip
in
[
0
,
1
]:
img
=
img_ori
.
copy
()
img
=
img_ori
.
copy
()
if
flip
==
1
:
if
flip
==
1
:
...
@@ -241,13 +271,18 @@ def load_bin(path, image_size):
...
@@ -241,13 +271,18 @@ def load_bin(path, image_size):
if
i
%
1000
==
0
:
if
i
%
1000
==
0
:
print
(
'loading bin'
,
i
)
print
(
'loading bin'
,
i
)
print
(
data_list
[
0
].
shape
)
print
(
data_list
[
0
].
shape
)
return
(
data_list
,
issame_list
)
return
data_list
,
issame_list
def
train
(
data_dir
,
file_list
,
num_classes
):
def
train
(
data_dir
,
num_classes
):
file_path
=
os
.
path
.
join
(
data_dir
,
file_list
)
file_path
=
os
.
path
.
join
(
data_dir
,
'file_list.txt'
)
return
arc_iterator
(
file_path
,
class_dim
=
num_classes
,
color_jitter
=
False
,
return
arc_iterator
(
data_dir
,
rotate
=
False
,
rand_mirror
=
True
,
normalize
=
True
)
file_path
,
class_dim
=
num_classes
,
color_jitter
=
False
,
rotate
=
False
,
rand_mirror
=
True
,
normalize
=
True
)
def
test
(
data_dir
,
datasets
):
def
test
(
data_dir
,
datasets
):
...
...
plsc/utils/jpeg_reader.py
浏览文件 @
c56ceffc
import
o
s
import
functool
s
import
math
import
math
import
random
import
os
import
pickle
import
pickle
import
functools
import
random
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
import
six
import
six
from
PIL
import
Image
,
ImageEnhance
from
PIL
import
Image
,
ImageEnhance
try
:
try
:
from
StringIO
import
StringIO
from
StringIO
import
StringIO
except
ImportError
:
except
ImportError
:
from
io
import
StringIO
from
io
import
StringIO
from
io
import
BytesIO
random
.
seed
(
0
)
random
.
seed
(
0
)
...
@@ -123,13 +126,13 @@ def RandomResizedCrop(img, size):
...
@@ -123,13 +126,13 @@ def RandomResizedCrop(img, size):
return
img
return
img
def
random_crop
(
img
,
size
,
scale
=
[
0.08
,
1.0
],
ratio
=
[
3.
/
4.
,
4.
/
3.
]
):
def
random_crop
(
img
,
size
,
scale
=
(
0.08
,
1.0
),
ratio
=
(
3.
/
4.
,
4.
/
3.
)
):
aspect_ratio
=
math
.
sqrt
(
random
.
uniform
(
*
ratio
))
aspect_ratio
=
math
.
sqrt
(
random
.
uniform
(
*
ratio
))
w
=
1.
*
aspect_ratio
w
=
1.
*
aspect_ratio
h
=
1.
/
aspect_ratio
h
=
1.
/
aspect_ratio
bound
=
min
((
float
(
img
.
size
[
0
])
/
img
.
size
[
1
])
/
(
w
**
2
),
bound
=
min
((
float
(
img
.
size
[
0
])
/
img
.
size
[
1
])
/
(
w
**
2
),
(
float
(
img
.
size
[
1
])
/
img
.
size
[
0
])
/
(
h
**
2
))
(
float
(
img
.
size
[
1
])
/
img
.
size
[
0
])
/
(
h
**
2
))
scale_max
=
min
(
scale
[
1
],
bound
)
scale_max
=
min
(
scale
[
1
],
bound
)
scale_min
=
min
(
scale
[
0
],
bound
)
scale_min
=
min
(
scale
[
0
],
bound
)
...
@@ -222,28 +225,37 @@ def arc_iterator(data,
...
@@ -222,28 +225,37 @@ def arc_iterator(data,
def
reader
():
def
reader
():
if
shuffle
:
if
shuffle
:
random
.
shuffle
(
data
)
random
.
shuffle
(
data
)
for
j
in
x
range
(
len
(
data
)):
for
j
in
range
(
len
(
data
)):
path
,
label
=
data
[
j
]
path
,
label
=
data
[
j
]
path
=
os
.
path
.
join
(
data_dir
,
path
)
path
=
os
.
path
.
join
(
data_dir
,
path
)
yield
path
,
label
yield
path
,
label
mapper
=
functools
.
partial
(
process_image_imagepath
,
class_dim
=
class_dim
,
mapper
=
functools
.
partial
(
process_image_imagepath
,
color_jitter
=
color_jitter
,
rotate
=
rotate
,
class_dim
=
class_dim
,
rand_mirror
=
rand_mirror
,
normalize
=
normalize
)
color_jitter
=
color_jitter
,
rotate
=
rotate
,
rand_mirror
=
rand_mirror
,
normalize
=
normalize
)
return
paddle
.
reader
.
xmap_readers
(
mapper
,
reader
,
THREAD
,
BUF_SIZE
)
return
paddle
.
reader
.
xmap_readers
(
mapper
,
reader
,
THREAD
,
BUF_SIZE
)
def
load_bin
(
path
,
image_size
):
def
load_bin
(
path
,
image_size
):
bins
,
issame_list
=
pickle
.
load
(
open
(
path
,
'rb'
))
if
six
.
PY2
:
bins
,
issame_list
=
pickle
.
load
(
open
(
path
,
'rb'
))
else
:
bins
,
issame_list
=
pickle
.
load
(
open
(
path
,
'rb'
),
encoding
=
'bytes'
)
data_list
=
[]
data_list
=
[]
for
flip
in
[
0
,
1
]:
for
flip
in
[
0
,
1
]:
data
=
np
.
empty
((
len
(
issame_list
)
*
2
,
3
,
image_size
[
0
],
image_size
[
1
]))
data
=
np
.
empty
((
len
(
issame_list
)
*
2
,
3
,
image_size
[
0
],
image_size
[
1
]))
data_list
.
append
(
data
)
data_list
.
append
(
data
)
for
i
in
range
(
len
(
issame_list
)
*
2
):
for
i
in
range
(
len
(
issame_list
)
*
2
):
_bin
=
bins
[
i
]
_bin
=
bins
[
i
]
if
not
isinstance
(
_bin
,
six
.
string_types
):
if
six
.
PY2
:
_bin
=
_bin
.
tostring
()
if
not
isinstance
(
_bin
,
six
.
string_types
):
img_ori
=
Image
.
open
(
StringIO
(
_bin
))
_bin
=
_bin
.
tostring
()
img_ori
=
Image
.
open
(
StringIO
(
_bin
))
else
:
img_ori
=
Image
.
open
(
BytesIO
(
_bin
))
for
flip
in
[
0
,
1
]:
for
flip
in
[
0
,
1
]:
img
=
img_ori
.
copy
()
img
=
img_ori
.
copy
()
if
flip
==
1
:
if
flip
==
1
:
...
@@ -257,14 +269,19 @@ def load_bin(path, image_size):
...
@@ -257,14 +269,19 @@ def load_bin(path, image_size):
if
i
%
1000
==
0
:
if
i
%
1000
==
0
:
print
(
'loading bin'
,
i
)
print
(
'loading bin'
,
i
)
print
(
data_list
[
0
].
shape
)
print
(
data_list
[
0
].
shape
)
return
(
data_list
,
issame_list
)
return
data_list
,
issame_list
def
arc_train
(
data_dir
,
class_dim
):
def
arc_train
(
data_dir
,
class_dim
):
train_image_list
=
get_train_image_list
(
data_dir
)
train_image_list
=
get_train_image_list
(
data_dir
)
return
arc_iterator
(
train_image_list
,
shuffle
=
True
,
class_dim
=
class_dim
,
return
arc_iterator
(
train_image_list
,
data_dir
=
data_dir
,
color_jitter
=
False
,
rotate
=
False
,
rand_mirror
=
True
,
shuffle
=
True
,
normalize
=
True
)
class_dim
=
class_dim
,
data_dir
=
data_dir
,
color_jitter
=
False
,
rotate
=
False
,
rand_mirror
=
True
,
normalize
=
True
)
def
test
(
data_dir
,
datasets
):
def
test
(
data_dir
,
datasets
):
...
...
plsc/utils/parameter_converter.py
0 → 100644
浏览文件 @
c56ceffc
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
json
import
logging
import
os
import
shutil
from
functools
import
cmp_to_key
import
paddle.fluid
as
fluid
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
'[%(levelname)s %(asctime)s line:%(lineno)d] %(message)s'
,
datefmt
=
'%d %b %Y %H:%M:%S'
)
logger
=
logging
.
getLogger
()
class
ParameterConverter
(
object
):
"""
Tool to convert pre-trained distributed fc parameters for inference or
fine-tuning. Note that the number of ranks or GPUs for inference or
fine-tuning can be different from that for pre-training.
"""
def
__init__
(
self
,
model_dir
,
output_dir
,
num_trainers
):
super
(
ParameterConverter
,
self
).
__init__
()
self
.
model_dir
=
model_dir
self
.
output_dir
=
output_dir
self
.
pretrain_nranks
=
-
1
self
.
emb_dim
=
-
1
self
.
num_classes
=
-
1
self
.
nranks
=
num_trainers
self
.
load_config
()
def
load_config
(
self
):
"""
Load config file which contains the following information for
pre-training:
1. pretrain_nranks (int): number of ranks for pre-training;
2. emb_dim (int): embedding dim for pre-training;
3. num_classes (int): number of classes for classification.
"""
meta_file
=
os
.
path
.
join
(
self
.
model_dir
,
'meta.json'
)
if
not
os
.
path
.
exists
(
meta_file
):
logger
.
error
(
"Meta file does not exist, make sure your pre-trained "
"models are legal."
)
exit
()
with
open
(
meta_file
,
'r'
)
as
handle
:
config
=
json
.
load
(
handle
)
self
.
pretrain_nranks
=
config
[
'pretrain_nranks'
]
assert
self
.
pretrain_nranks
>
0
self
.
emb_dim
=
config
[
'emb_dim'
]
assert
self
.
emb_dim
>
0
self
.
num_classes
=
config
[
'num_classes'
]
assert
self
.
num_classes
>
0
logger
.
info
(
"Parameters for pre-training: pretrain_nranks ({}), "
"emb_dim ({}), and num_classes ({})."
.
format
(
self
.
pretrain_nranks
,
self
.
emb_dim
,
self
.
num_classes
))
logger
.
debug
(
"Parameters for inference or fine-tuning: "
"nranks ({})."
.
format
(
self
.
nranks
))
def
find_var_names
(
self
):
"""
Find all names of pre-trained parameters for the distributed fc layer,
e.g., dist@softmax@rank@00000.w_0, dist@softmax@rank@00000.b_0 etc.
We assume that names of distributed fc related parameters start with the
prefix dist@ and have @rank@ in their names.
"""
var_names
=
[]
model_dir
=
os
.
path
.
abspath
(
self
.
model_dir
)
if
not
os
.
path
.
exists
(
model_dir
):
logger
.
error
(
"The directory for pre-trained model ({}) does not "
"exist, please check it."
.
format
(
model_dir
))
exit
()
logger
.
info
(
"The directory for pre-trained model: {}"
.
format
(
model_dir
))
for
file
in
os
.
listdir
(
model_dir
):
if
'dist@'
in
file
and
'@rank@'
in
file
:
var_names
.
append
(
file
)
assert
len
(
var_names
)
>
0
,
\
logger
.
error
(
"No distributed fc parameters found."
)
logger
.
info
(
"Number of distributed fc parameters: {}."
.
format
(
len
(
var_names
)))
logger
.
info
(
"Distributed fc parameters: {}."
.
format
(
var_names
))
return
var_names
def
split_load_and_save
(
self
,
name_index
,
param_names
,
save_rank_id
,
remainder
,
as_bias
,
train_nshards
,
train_nranks
,
nshards
,
dtype
=
"float32"
):
var2
=
None
advance
=
False
emb_dim
=
self
.
emb_dim
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
num_classes
=
self
.
num_classes
load_var_name
=
param_names
[
name_index
]
save_var_name_list
=
load_var_name
.
split
(
'.'
)
save_var_name_list
[
0
]
=
save_var_name_list
[
0
].
split
(
'@'
)
save_var_name_list
[
0
][
-
1
]
=
"%05d"
%
save_rank_id
save_var_name_list
[
0
]
=
'@'
.
join
(
save_var_name_list
[
0
])
save_var_name
=
'.'
.
join
(
save_var_name_list
)
last_train_nshards
=
num_classes
-
(
train_nranks
-
1
)
*
train_nshards
with
fluid
.
program_guard
(
main_program
,
startup_program
):
if
name_index
==
train_nranks
-
1
:
var_dim
=
last_train_nshards
else
:
var_dim
=
train_nshards
shape
=
[
var_dim
]
if
as_bias
else
[
emb_dim
,
var_dim
]
var
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
load_var_name
)
if
as_bias
:
var
=
fluid
.
layers
.
slice
(
var
,
axes
=
[
0
],
starts
=
[
var
.
shape
[
0
]
-
remainder
],
ends
=
[
var
.
shape
[
0
]])
else
:
var
=
fluid
.
layers
.
split
(
var
,
[
var
.
shape
[
1
]
-
remainder
,
remainder
],
dim
=
1
)[
1
]
save_var_dim
=
nshards
if
remainder
<
nshards
:
if
name_index
==
train_nranks
-
1
:
save_var_dim
=
remainder
else
:
name_index
+=
1
advance
=
True
load_var_name
=
param_names
[
name_index
]
if
name_index
==
train_nranks
-
1
:
var_dim
=
last_train_nshards
else
:
var_dim
=
train_nshards
shape
=
[
var_dim
]
if
as_bias
else
[
emb_dim
,
var_dim
]
var2
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
load_var_name
)
if
remainder
+
var_dim
<
nshards
:
# The last train rank
save_var_dim
=
remainder
+
var_dim
else
:
remainder
=
remainder
+
var_dim
-
nshards
elif
remainder
==
nshards
:
if
name_index
==
train_nranks
-
2
:
remainder
=
last_train_nshards
advance
=
True
elif
name_index
<
train_nranks
-
2
:
remainder
=
train_nshards
advance
=
True
else
:
remainder
=
remainder
-
nshards
if
var2
is
not
None
:
var
=
fluid
.
layers
.
concat
([
var
,
var2
],
axis
=
0
if
as_bias
else
1
)
shape
=
[
save_var_dim
]
if
as_bias
else
[
emb_dim
,
save_var_dim
]
to_save_var
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
save_var_name
+
'_temp'
)
if
save_var_dim
!=
nshards
:
# get last dim
if
as_bias
:
temp_var
=
fluid
.
layers
.
slice
(
var
,
axes
=
[
0
],
starts
=
[
var
.
shape
[
0
]
-
save_var_dim
],
ends
=
[
var
.
shape
[
0
]])
else
:
temp_var
=
fluid
.
layers
.
split
(
var
,
[
var
.
shape
[
1
]
-
save_var_dim
,
save_var_dim
],
dim
=
1
)[
1
]
fluid
.
layers
.
assign
(
temp_var
,
to_save_var
)
else
:
if
as_bias
:
temp_var
=
fluid
.
layers
.
slice
(
var
,
axes
=
[
0
],
starts
=
[
0
],
ends
=
[
nshards
])
else
:
temp_var
=
fluid
.
layers
.
split
(
var
,
[
nshards
,
var
.
shape
[
1
]
-
nshards
],
dim
=
1
)[
0
]
fluid
.
layers
.
assign
(
temp_var
,
to_save_var
)
def
expected_var
(
var
):
has_var
=
os
.
path
.
exists
(
os
.
path
.
join
(
self
.
model_dir
,
var
.
name
))
if
has_var
:
return
True
return
False
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
fluid
.
io
.
load_vars
(
exe
,
dirname
=
self
.
model_dir
,
predicate
=
expected_var
,
main_program
=
main_program
)
exe
.
run
(
main_program
)
if
not
os
.
path
.
exists
(
self
.
output_dir
):
os
.
makedirs
(
self
.
output_dir
)
fluid
.
io
.
save_vars
(
exe
,
self
.
output_dir
,
vars
=
[
to_save_var
],
main_program
=
main_program
)
srcfile
=
os
.
path
.
join
(
self
.
output_dir
,
to_save_var
.
name
)
dstfile
=
os
.
path
.
join
(
self
.
output_dir
,
save_var_name
)
shutil
.
move
(
srcfile
,
dstfile
)
return
remainder
,
advance
def
split_parameters
(
self
,
param_names
,
as_bias
):
"""
Split parameters whose names are in param_names.
Params:
param_names: list of names of parameters to split
as_bias: whether parameters to split are as bias or not
"""
num_classes
=
self
.
num_classes
train_nranks
=
self
.
pretrain_nranks
nranks
=
self
.
nranks
train_nshards
=
(
num_classes
+
train_nranks
-
1
)
//
train_nranks
nshards
=
(
num_classes
+
nranks
-
1
)
//
nranks
save_rank_id
=
0
# remainder dim that is not split in a var
remainder_var_dim
=
train_nshards
name_index
=
0
# index of name of pre-trained parameter to process
for
save_rank_id
in
range
(
nranks
):
assert
name_index
<
train_nranks
remainder_var_dim
,
advance
=
self
.
split_load_and_save
(
name_index
,
param_names
,
save_rank_id
,
remainder_var_dim
,
as_bias
,
train_nshards
,
train_nranks
,
nshards
)
name_index
+=
1
if
advance
else
0
processed_var_count
=
name_index
+
1
assert
processed_var_count
==
train_nranks
,
\
logger
.
error
(
"Number of pre-trained parameters processed ({}) is "
"not equal to the number of ranks ({}) for "
"pre-training."
.
format
(
processed_var_count
,
train_nranks
))
assert
save_rank_id
==
nranks
-
1
,
\
logger
.
error
(
"Number of saved parameters ({}) is not equal to the "
"number of ranks ({}) for inference or "
"fine-tuning."
.
format
(
save_rank_id
+
1
,
nranks
))
def
split_distfc_parameters
(
self
,
weight_param_names
,
weight_velocity_param_names
,
bias_param_names
,
bias_velocity_param_names
):
"""
Split each distributed fc-related parameter according to number of ranks
for inference or fine-tuning.
Params:
weight_param_names: list of names of weight parameters
bias_param_names: list of names of bias parameters
"""
self
.
split_parameters
(
weight_param_names
,
as_bias
=
False
)
self
.
split_parameters
(
weight_velocity_param_names
,
as_bias
=
False
)
if
len
(
bias_param_names
)
!=
0
:
self
.
split_parameters
(
bias_param_names
,
as_bias
=
True
)
self
.
split_parameters
(
bias_velocity_param_names
,
as_bias
=
True
)
def
concat_load_and_save
(
self
,
name_index
,
param_names
,
save_rank_id
,
remainder
,
as_bias
,
train_nshards
,
train_nranks
,
nshards
,
dtype
=
"float32"
):
advance
=
0
emb_dim
=
self
.
emb_dim
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
num_classes
=
self
.
num_classes
load_var_name
=
param_names
[
name_index
]
save_var_name_list
=
load_var_name
.
split
(
'.'
)
save_var_name_list
[
0
]
=
save_var_name_list
[
0
].
split
(
'@'
)
save_var_name_list
[
0
][
-
1
]
=
"%05d"
%
save_rank_id
save_var_name_list
[
0
]
=
'@'
.
join
(
save_var_name_list
[
0
])
save_var_name
=
'.'
.
join
(
save_var_name_list
)
last_train_nshards
=
num_classes
-
(
train_nranks
-
1
)
*
train_nshards
with
fluid
.
program_guard
(
main_program
,
startup_program
):
if
name_index
==
train_nranks
-
1
:
var_dim
=
last_train_nshards
else
:
var_dim
=
train_nshards
shape
=
[
var_dim
]
if
as_bias
else
[
emb_dim
,
var_dim
]
var
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
load_var_name
)
if
as_bias
:
var
=
fluid
.
layers
.
slice
(
var
,
axes
=
[
0
],
starts
=
[
var
.
shape
[
0
]
-
remainder
],
ends
=
[
var
.
shape
[
0
]])
else
:
var
=
fluid
.
layers
.
split
(
var
,
[
var
.
shape
[
1
]
-
remainder
,
remainder
],
dim
=
1
)[
1
]
to_concat_var_list
=
[
var
]
while
remainder
<
nshards
and
name_index
<
train_nranks
-
1
:
name_index
+=
1
advance
+=
1
load_var_name
=
param_names
[
name_index
]
if
name_index
==
train_nranks
-
1
:
var_dim
=
last_train_nshards
else
:
var_dim
=
train_nshards
shape
=
[
var_dim
]
if
as_bias
else
[
emb_dim
,
var_dim
]
var
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
load_var_name
)
to_concat_var_list
.
append
(
var
)
remainder
+=
var_dim
if
len
(
to_concat_var_list
)
>
1
:
var
=
fluid
.
layers
.
concat
(
to_concat_var_list
,
axis
=
0
if
as_bias
else
1
)
save_var_dim
=
nshards
if
remainder
>
nshards
:
if
as_bias
:
var
=
fluid
.
layers
.
slice
(
var
,
axes
=
[
0
],
starts
=
[
0
],
ends
=
[
nshards
])
else
:
var
=
fluid
.
layers
.
split
(
var
,
[
nshards
,
var
.
shape
[
1
]
-
nshards
],
dim
=
1
)[
0
]
remainder
=
remainder
-
nshards
elif
remainder
==
nshards
:
if
name_index
==
train_nranks
-
2
:
# advance += 1 if len(to_concat_var_list) > 1 else 0
# to avoid duplicate add
# name_index += 1 if len(to_concat_var_list) > 1 else 0
# to avoid duplicate add
advance
+=
1
name_index
+=
1
remainder
=
last_train_nshards
elif
name_index
<
train_nranks
-
2
:
# advance += 1 if len(to_concat_var_list) > 1 else 0
# to avoid duplicate add
# name_index += 1 if len(to_concat_var_list) > 1 else 0
# to avoid duplicate add
advance
+=
1
name_index
+=
1
remainder
=
train_nshards
else
:
save_var_dim
=
remainder
shape
=
[
save_var_dim
]
if
as_bias
else
[
emb_dim
,
save_var_dim
]
to_save_var
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
save_var_name
+
'_temp'
)
fluid
.
layers
.
assign
(
var
,
to_save_var
)
def
expected_var
(
var
):
has_var
=
os
.
path
.
exists
(
os
.
path
.
join
(
self
.
model_dir
,
var
.
name
))
if
has_var
:
return
True
return
False
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
fluid
.
io
.
load_vars
(
exe
,
dirname
=
self
.
model_dir
,
predicate
=
expected_var
,
main_program
=
main_program
)
exe
.
run
(
main_program
)
if
not
os
.
path
.
exists
(
self
.
output_dir
):
os
.
makedirs
(
self
.
output_dir
)
fluid
.
io
.
save_vars
(
exe
,
self
.
output_dir
,
vars
=
[
to_save_var
],
main_program
=
main_program
)
srcfile
=
os
.
path
.
join
(
self
.
output_dir
,
to_save_var
.
name
)
dstfile
=
os
.
path
.
join
(
self
.
output_dir
,
save_var_name
)
shutil
.
move
(
srcfile
,
dstfile
)
return
remainder
,
advance
def
concat_parameters
(
self
,
param_names
,
as_bias
):
"""
Concat parameters whose names are in param_names.
Params:
param_names: list of names of parameters to concat
as_bias: whether parameters to split are as bias or not
"""
num_classes
=
self
.
num_classes
train_nranks
=
self
.
pretrain_nranks
nranks
=
self
.
nranks
train_nshards
=
(
num_classes
+
train_nranks
-
1
)
//
train_nranks
nshards
=
(
num_classes
+
nranks
-
1
)
//
nranks
save_rank_id
=
0
remainder_dim
=
train_nshards
# remainder dim that is not concated
name_index
=
0
# index of name of pre-trained parameter to process
for
save_rank_id
in
range
(
nranks
):
assert
name_index
<
train_nranks
remainder_dim
,
advance
=
self
.
concat_load_and_save
(
name_index
,
param_names
,
save_rank_id
,
remainder_dim
,
as_bias
,
train_nshards
,
train_nranks
,
nshards
)
name_index
+=
advance
processed_var_count
=
name_index
+
1
assert
processed_var_count
==
train_nranks
,
\
logger
.
error
(
"Number of pre-trained parameters processed ({}) is "
"not equal to the number of ranks ({}) for "
"pre-training."
.
format
(
processed_var_count
,
train_nranks
))
assert
save_rank_id
==
nranks
-
1
,
\
logger
.
error
(
"Number of saved parameters ({}) is not equal to the "
"number of ranks ({}) for inference or "
"fine-tuning."
.
format
(
save_rank_id
+
1
,
nranks
))
def
concat_distfc_parameters
(
self
,
weight_param_names
,
weight_velocity_param_names
,
bias_param_names
,
bias_velocity_param_names
):
"""
Concat distributed fc-related parameters according to number of ranks
for inference or finetuning.
Params:
weight_param_names: list of names of weight parameters
weight_velocity_param_names: list of names of weight velocity
parameters
bias_param_names: list of names of bias parameters
bias_velocity_param_names: list of names of bias velocity parameters
"""
self
.
concat_parameters
(
weight_param_names
,
as_bias
=
False
)
self
.
concat_parameters
(
weight_velocity_param_names
,
as_bias
=
False
)
if
len
(
bias_param_names
)
!=
0
:
self
.
concat_parameters
(
bias_param_names
,
as_bias
=
True
)
self
.
concat_parameters
(
bias_velocity_param_names
,
as_bias
=
True
)
def
process
(
self
):
self
.
load_config
()
var_names
=
self
.
find_var_names
()
weight_param_names
=
[
name
for
name
in
var_names
if
'.w'
in
name
and
'velocity'
not
in
name
]
weight_velocity_param_names
=
[
name
for
name
in
var_names
if
'.w'
in
name
and
'velocity'
in
name
]
bias_param_names
=
[
name
for
name
in
var_names
if
'.b'
in
name
and
'velocity'
not
in
name
]
bias_velocity_param_names
=
[
name
for
name
in
var_names
if
'.b'
in
name
and
'velocity'
in
name
]
def
parameter_name_compare
(
x
,
y
):
"""
Compare two parameter names depend on their rank id.
A parameter name is like dist_softmax_rank_00000.w_0,
where 00000 is the rank id.
"""
rank_id_x
=
int
(
x
.
split
(
'.'
)[
0
].
split
(
'@'
)[
-
1
])
rank_id_y
=
int
(
y
.
split
(
'.'
)[
0
].
split
(
'@'
)[
-
1
])
if
rank_id_x
<
rank_id_y
:
return
-
1
elif
rank_id_x
==
rank_id_y
:
return
0
else
:
return
1
weight_param_names
.
sort
(
key
=
cmp_to_key
(
parameter_name_compare
))
weight_velocity_param_names
.
sort
(
key
=
cmp_to_key
(
parameter_name_compare
))
bias_param_names
.
sort
(
key
=
cmp_to_key
(
parameter_name_compare
))
bias_velocity_param_names
.
sort
(
key
=
cmp_to_key
(
parameter_name_compare
))
assert
len
(
weight_param_names
)
==
self
.
pretrain_nranks
,
\
logger
.
error
(
"Number of distributed fc-related weight parameters ({}) "
"should be equal to the number of ranks ({}) for "
"pre-training."
.
format
(
len
(
weight_param_names
),
self
.
pretrain_nranks
))
assert
len
(
weight_velocity_param_names
)
==
self
.
pretrain_nranks
,
\
logger
.
error
(
"Number of distributed fc-related weight parameters ({}) "
"should be equal to the number of ranks ({}) for "
"pre-training."
.
format
(
len
(
weight_velocity_param_names
),
self
.
pretrain_nranks
))
assert
(
len
(
bias_param_names
)
==
0
or
len
(
bias_param_names
)
==
self
.
pretrain_nranks
),
\
logger
.
error
(
"Number of distributed fc-related bias parameters ({}) "
"should be 0 or equal to the number of ranks ({}) for "
"pre-training."
.
format
(
len
(
bias_param_names
),
self
.
pretrain_nranks
))
assert
(
len
(
bias_velocity_param_names
)
==
0
or
len
(
bias_velocity_param_names
)
==
self
.
pretrain_nranks
),
\
logger
.
error
(
"Number of distributed fc-related bias parameters ({}) "
"should be 0 or equal to the number of ranks ({}) for "
"pre-training."
.
format
(
len
(
bias_velocity_param_names
),
self
.
pretrain_nranks
))
pretrain_nranks
=
self
.
pretrain_nranks
nranks
=
self
.
nranks
if
pretrain_nranks
==
nranks
:
logger
.
info
(
"Pre-training and inference (or fine-tuning) have the same "
"number of ranks, nothing to do."
)
elif
pretrain_nranks
<
nranks
:
self
.
split_distfc_parameters
(
weight_param_names
,
weight_velocity_param_names
,
bias_param_names
,
bias_velocity_param_names
)
else
:
self
.
concat_distfc_parameters
(
weight_param_names
,
weight_velocity_param_names
,
bias_param_names
,
bias_velocity_param_names
)
logger
.
info
(
"Done."
)
if
__name__
==
"__main__"
:
converter
=
ParameterConverter
(
'./trained_model'
,
"./trained_model_temp"
,
8
)
converter
.
process
()
plsc/utils/process_distfc_parameter.py
已删除
100644 → 0
浏览文件 @
a36148cf
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
warnings
import
os
import
six
import
logging
import
argparse
import
shutil
import
pickle
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.transpiler.details
import
program_to_code
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
'[%(levelname)s %(asctime)s line:%(lineno)d] %(message)s'
,
datefmt
=
'%d %b %Y %H:%M:%S'
)
logger
=
logging
.
getLogger
()
parser
=
argparse
.
ArgumentParser
(
description
=
"""
Tool to convert pretrained distributed fc parameters for inference.
Note that the number of ranks or GPUs for inference can be different
from that for pretraining."""
)
parser
.
add_argument
(
"--name_feature"
,
type
=
str
,
default
=
"@rank@"
,
help
=
"Feature for names of distributed fc parameters. "
"For example, by default the name for the "
"distributed fc weight parameter is like "
"dist@xxx@rank@id.w_0 where xxx is softmax or arcface "
"depending on the loss types used and rank_id is the "
"rank_id generating this parameter, and hence the "
"feature cloud be @rank@."
)
parser
.
add_argument
(
"--pretrain_nranks"
,
type
=
int
,
default
=-
1
,
help
=
"Number of ranks (GPUs) for pre-training."
)
parser
.
add_argument
(
"--nranks"
,
type
=
int
,
required
=
True
,
help
=
"Number of ranks (GPUs) for inference or finetuning."
)
parser
.
add_argument
(
"--num_classes"
,
type
=
int
,
default
=-
1
,
help
=
"Number of classes for classification."
)
parser
.
add_argument
(
"--emb_dim"
,
type
=
int
,
default
=-
1
,
help
=
"Embedding dim."
)
parser
.
add_argument
(
"--pretrained_model_dir"
,
type
=
str
,
required
=
True
,
default
=
None
,
help
=
"Directory for pretrained model."
)
parser
.
add_argument
(
"--output_dir"
,
type
=
str
,
required
=
True
,
default
=
None
,
help
=
"Directory for output."
)
args
=
parser
.
parse_args
()
def
load_config
(
args
):
"""
Load config file which contains the following information for pretraining:
1. pretrain_nranks (int): number of ranks for pretraining;
2. emb_dim (int): embedding dim for pretraining;
3. num_classes (int): number of classes for classification.
"""
meta_file
=
os
.
path
.
join
(
args
.
pretrained_model_dir
,
'meta.pickle'
)
if
not
os
.
path
.
exists
(
meta_file
):
if
args
.
pretrain_nranks
<
0
or
args
.
emb_dim
<
0
or
args
.
num_classes
<
0
:
logger
.
error
(
"Meta file does not exist, you have to set "
"'--pretrain_nranks', '--emb_dim' and '--num_classes "
"parameters manually."
)
exit
()
logger
.
debug
(
"Meta file does not exist, make sure you have correctly "
"set --pretrain_nranks ({}), --emb_dim ({}) and "
"--num_classes ({}) parameters manually."
.
format
(
args
.
pretrain_nranks
,
args
.
emb_dim
,
args
.
num_classes
))
else
:
with
open
(
meta_file
,
'rb'
)
as
handle
:
config
=
pickle
.
load
(
handle
)
if
args
.
pretrain_nranks
<
0
:
args
.
pretrain_nranks
=
config
[
'pretrain_nranks'
]
elif
args
.
pretrain_nranks
!=
config
[
'pretrain_nranks'
]:
logger
.
error
(
"The --pretrain_nranks ({}) parameter you set is not "
"equal to that ({}) for pretraining, please check "
"it."
.
format
(
args
.
pretrain_nranks
,
config
[
'pretrain_nranks'
]))
exit
()
if
args
.
emb_dim
<
0
:
args
.
emb_dim
=
config
[
'emb_dim'
]
elif
args
.
emb_dim
!=
config
[
'emb_dim'
]:
logger
.
error
(
"The --emb_dim ({}) parameter you set is not equal to "
"that ({}) for pretraining, please check it."
.
format
(
args
.
emb_dim
,
config
[
'emb_dim'
]))
exit
()
if
args
.
num_classes
<
0
:
args
.
num_classes
=
config
[
'num_classes'
]
elif
args
.
num_classes
!=
config
[
'num_classes'
]:
logger
.
error
(
"The --num_classes ({}) parameter you set is not equal"
" to that ({}) for pretraining, please check "
"it."
.
format
(
args
.
emb_dim
,
config
[
'emb_dim'
]))
exit
()
logger
.
debug
(
"Parameters for pretraining: pretrain_nranks ({}), emb_dim "
"({}), and num_classes ({})."
.
format
(
args
.
pretrain_nranks
,
args
.
emb_dim
,
args
.
num_classes
))
logger
.
debug
(
"Parameters for inference or finetuning: nranks ({})."
.
format
(
args
.
nranks
))
def
find_distfc_var_names
(
args
):
"""
Find all names of pretrained distfc-related parameters,
e.g., dist_softmax_rank_00000.w_0, dist_softmax_rank_00000.b_0 etc.
We assume that names of distfc-related parameters start with the
prefix 'dist'.
"""
var_names
=
[]
model_dir
=
os
.
path
.
abspath
(
args
.
pretrained_model_dir
)
if
not
os
.
path
.
exists
(
model_dir
):
logger
.
error
(
"The directory for pretrained model ({}) does not exist, "
"please check it."
.
format
(
model_dir
))
exit
()
logger
.
info
(
"The directory for pretrained model: {}"
.
format
(
model_dir
))
args
.
pretrained_model_dir
=
model_dir
for
file
in
os
.
listdir
(
model_dir
):
if
args
.
name_feature
in
file
:
var_names
.
append
(
file
)
assert
len
(
var_names
)
>
0
,
\
logger
.
error
(
"No distributed fc parameters found."
)
logger
.
info
(
"Number of distributed fc parameters: {}."
.
format
(
len
(
var_names
)))
logger
.
debug
(
"Distributed fc parameters: {}."
.
format
(
var_names
))
return
var_names
def
split_load_and_save
(
args
,
name_index
,
param_names
,
save_rank_id
,
remainder
,
as_bias
,
train_nshards
,
train_nranks
,
nshards
,
dtype
=
"float32"
):
var2
=
None
advance
=
False
emb_dim
=
args
.
emb_dim
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
load_var_name
=
param_names
[
name_index
]
save_var_name_list
=
load_var_name
.
split
(
'.'
)
save_var_name_list
[
0
]
=
save_var_name_list
[
0
].
split
(
'@'
)
save_var_name_list
[
0
][
-
1
]
=
"%05d"
%
save_rank_id
save_var_name_list
[
0
]
=
'@'
.
join
(
save_var_name_list
[
0
])
save_var_name
=
'.'
.
join
(
save_var_name_list
)
last_train_nshards
=
args
.
num_classes
-
(
train_nranks
-
1
)
*
train_nshards
with
fluid
.
program_guard
(
main_program
,
startup_program
):
if
name_index
==
train_nranks
-
1
:
var_dim
=
last_train_nshards
else
:
var_dim
=
train_nshards
shape
=
[
var_dim
]
if
as_bias
else
[
emb_dim
,
var_dim
]
var
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
load_var_name
)
if
as_bias
:
var
=
fluid
.
layers
.
slice
(
var
,
axes
=
[
0
],
starts
=
[
var
.
shape
[
0
]
-
remainder
],
ends
=
[
var
.
shape
[
0
]])
else
:
var
=
fluid
.
layers
.
split
(
var
,
[
var
.
shape
[
1
]
-
remainder
,
remainder
],
dim
=
1
)[
1
]
save_var_dim
=
nshards
if
remainder
<
nshards
:
if
name_index
==
train_nranks
-
1
:
save_var_dim
=
remainder
else
:
name_index
+=
1
advance
=
True
load_var_name
=
param_names
[
name_index
]
if
name_index
==
train_nranks
-
1
:
var_dim
=
last_train_nshards
else
:
var_dim
=
train_nshards
shape
=
[
var_dim
]
if
as_bias
else
[
emb_dim
,
var_dim
]
var2
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
load_var_name
)
if
remainder
+
var_dim
<
nshards
:
# The last train rank
save_var_dim
=
remainder
+
var_dim
else
:
remainder
=
remainder
+
var_dim
-
nshards
elif
remainder
==
nshards
:
if
name_index
==
train_nranks
-
2
:
remainder
=
last_train_nshards
advance
=
True
elif
name_index
<
train_nranks
-
2
:
remainder
=
train_nshards
advance
=
True
else
:
remainder
=
remainder
-
nshards
if
var2
is
not
None
:
var
=
fluid
.
layers
.
concat
([
var
,
var2
],
axis
=
0
if
as_bias
else
1
)
shape
=
[
save_var_dim
]
if
as_bias
else
[
emb_dim
,
save_var_dim
]
to_save_var
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
save_var_name
+
'_temp'
)
if
save_var_dim
!=
nshards
:
# get last dim
if
as_bias
:
temp_var
=
fluid
.
layers
.
slice
(
var
,
axes
=
[
0
],
starts
=
[
var
.
shape
[
0
]
-
save_var_dim
],
ends
=
[
var
.
shape
[
0
]])
else
:
temp_var
=
fluid
.
layers
.
split
(
var
,
[
var
.
shape
[
1
]
-
save_var_dim
,
save_var_dim
],
dim
=
1
)[
1
]
fluid
.
layers
.
assign
(
temp_var
,
to_save_var
)
else
:
if
as_bias
:
temp_var
=
fluid
.
layers
.
slice
(
var
,
axes
=
[
0
],
starts
=
[
0
],
ends
=
[
nshards
])
else
:
temp_var
=
fluid
.
layers
.
split
(
var
,
[
nshards
,
var
.
shape
[
1
]
-
nshards
],
dim
=
1
)[
0
]
fluid
.
layers
.
assign
(
temp_var
,
to_save_var
)
def
expected_var
(
var
):
has_var
=
os
.
path
.
exists
(
os
.
path
.
join
(
args
.
pretrained_model_dir
,
var
.
name
))
if
has_var
:
return
True
return
False
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
fluid
.
io
.
load_vars
(
exe
,
dirname
=
args
.
pretrained_model_dir
,
predicate
=
expected_var
,
main_program
=
main_program
)
exe
.
run
(
main_program
)
if
not
os
.
path
.
exists
(
args
.
output_dir
):
os
.
makedirs
(
args
.
output_dir
)
fluid
.
io
.
save_vars
(
exe
,
args
.
output_dir
,
vars
=
[
to_save_var
],
main_program
=
main_program
)
srcfile
=
os
.
path
.
join
(
args
.
output_dir
,
to_save_var
.
name
)
dstfile
=
os
.
path
.
join
(
args
.
output_dir
,
save_var_name
)
shutil
.
move
(
srcfile
,
dstfile
)
return
remainder
,
advance
def
split_parameters
(
args
,
param_names
,
as_bias
):
"""
Split parameters whose names are in param_names.
Params:
args: command line paramters
param_names: list of names of parameters to split
as_bias: whether parameters to split are as bias or not
"""
num_classes
=
args
.
num_classes
train_nranks
=
args
.
pretrain_nranks
nranks
=
args
.
nranks
train_nshards
=
(
num_classes
+
train_nranks
-
1
)
//
train_nranks
nshards
=
(
num_classes
+
nranks
-
1
)
//
nranks
# for inference of finetuning
save_rank_id
=
0
remainder_var_dim
=
train_nshards
# remainder dim that is not split in a var
name_index
=
0
# index of name of pretrained parameter to process
for
save_rank_id
in
range
(
nranks
):
assert
name_index
<
train_nranks
remainder_var_dim
,
advance
=
split_load_and_save
(
args
,
name_index
,
param_names
,
save_rank_id
,
remainder_var_dim
,
as_bias
,
train_nshards
,
train_nranks
,
nshards
)
name_index
+=
1
if
advance
else
0
processed_var_count
=
name_index
+
1
assert
processed_var_count
==
train_nranks
,
logger
.
error
(
"Number of "
"pretrained parameters processed ({}) is not equal to the number of "
"ranks ({}) for pretraining."
.
format
(
processed_var_count
,
train_nranks
))
assert
save_rank_id
==
nranks
-
1
,
logger
.
error
(
"Number of saved parameters"
" ({}) is not equal to the number of ranks ({}) for inference or "
"finetuning."
.
format
(
save_rank_id
+
1
,
nranks
))
def
split_distfc_parameters
(
args
,
weight_param_names
,
weight_velocity_param_names
,
bias_param_names
,
bias_velocity_param_names
):
"""
Split each distributed fc-related parameter according to number of ranks
for inference or finetuning.
Params:
args: command line paramters
weight_param_names: list of names of weight parameters
bias_param_names: list of names of bias parameters
"""
split_parameters
(
args
,
weight_param_names
,
as_bias
=
False
)
split_parameters
(
args
,
weight_velocity_param_names
,
as_bias
=
False
)
if
len
(
bias_param_names
)
!=
0
:
split_parameters
(
args
,
bias_param_names
,
as_bias
=
True
)
split_parameters
(
args
,
bias_velocity_param_names
,
as_bias
=
True
)
def
concat_load_and_save
(
args
,
name_index
,
param_names
,
save_rank_id
,
remainder
,
as_bias
,
train_nshards
,
train_nranks
,
nshards
,
dtype
=
"float32"
):
advance
=
0
orig_nshards
=
nshards
emb_dim
=
args
.
emb_dim
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
load_var_name
=
param_names
[
name_index
]
save_var_name_list
=
load_var_name
.
split
(
'.'
)
save_var_name_list
[
0
]
=
save_var_name_list
[
0
].
split
(
'@'
)
save_var_name_list
[
0
][
-
1
]
=
"%05d"
%
save_rank_id
save_var_name_list
[
0
]
=
'@'
.
join
(
save_var_name_list
[
0
])
save_var_name
=
'.'
.
join
(
save_var_name_list
)
last_train_nshards
=
args
.
num_classes
-
(
train_nranks
-
1
)
*
train_nshards
with
fluid
.
program_guard
(
main_program
,
startup_program
):
if
name_index
==
train_nranks
-
1
:
var_dim
=
last_train_nshards
else
:
var_dim
=
train_nshards
shape
=
[
var_dim
]
if
as_bias
else
[
emb_dim
,
var_dim
]
var
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
load_var_name
)
if
as_bias
:
var
=
fluid
.
layers
.
slice
(
var
,
axes
=
[
0
],
starts
=
[
var
.
shape
[
0
]
-
remainder
],
ends
=
[
var
.
shape
[
0
]])
else
:
var
=
fluid
.
layers
.
split
(
var
,
[
var
.
shape
[
1
]
-
remainder
,
remainder
],
dim
=
1
)[
1
]
to_concat_var_list
=
[
var
]
while
remainder
<
nshards
and
name_index
<
train_nranks
-
1
:
name_index
+=
1
advance
+=
1
load_var_name
=
param_names
[
name_index
]
if
name_index
==
train_nranks
-
1
:
var_dim
=
last_train_nshards
else
:
var_dim
=
train_nshards
shape
=
[
var_dim
]
if
as_bias
else
[
emb_dim
,
var_dim
]
var
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
load_var_name
)
to_concat_var_list
.
append
(
var
)
remainder
+=
var_dim
if
len
(
to_concat_var_list
)
>
1
:
var
=
fluid
.
layers
.
concat
(
to_concat_var_list
,
axis
=
0
if
as_bias
else
1
)
save_var_dim
=
nshards
if
remainder
>
nshards
:
if
as_bias
:
var
=
fluid
.
layers
.
slice
(
var
,
axes
=
[
0
],
starts
=
[
0
],
ends
=
[
nshards
])
else
:
var
=
fluid
.
layers
.
split
(
var
,
[
nshards
,
var
.
shape
[
1
]
-
nshards
],
dim
=
1
)[
0
]
remainder
=
remainder
-
nshards
elif
remainder
==
nshards
:
if
name_index
==
train_nranks
-
2
:
#advance += 1 if len(to_concat_var_list) > 1 else 0 # to avoid duplicate add
#name_index += 1 if len(to_concat_var_list) > 1 else 0 # to avoid duplicate add
advance
+=
1
name_index
+=
1
remainder
=
last_train_nshards
elif
name_index
<
train_nranks
-
2
:
#advance += 1 if len(to_concat_var_list) > 1 else 0 # to avoid duplicate add
#name_index += 1 if len(to_concat_var_list) > 1 else 0 # to avoid duplicate add
advance
+=
1
name_index
+=
1
remainder
=
train_nshards
else
:
save_var_dim
=
remainder
shape
=
[
save_var_dim
]
if
as_bias
else
[
emb_dim
,
save_var_dim
]
to_save_var
=
fluid
.
layers
.
create_parameter
(
shape
,
dtype
=
dtype
,
name
=
save_var_name
+
'_temp'
)
fluid
.
layers
.
assign
(
var
,
to_save_var
)
def
expected_var
(
var
):
has_var
=
os
.
path
.
exists
(
os
.
path
.
join
(
args
.
pretrained_model_dir
,
var
.
name
))
if
has_var
:
return
True
return
False
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
fluid
.
io
.
load_vars
(
exe
,
dirname
=
args
.
pretrained_model_dir
,
predicate
=
expected_var
,
main_program
=
main_program
)
exe
.
run
(
main_program
)
if
not
os
.
path
.
exists
(
args
.
output_dir
):
os
.
makedirs
(
args
.
output_dir
)
fluid
.
io
.
save_vars
(
exe
,
args
.
output_dir
,
vars
=
[
to_save_var
],
main_program
=
main_program
)
srcfile
=
os
.
path
.
join
(
args
.
output_dir
,
to_save_var
.
name
)
dstfile
=
os
.
path
.
join
(
args
.
output_dir
,
save_var_name
)
shutil
.
move
(
srcfile
,
dstfile
)
return
remainder
,
advance
def
concat_parameters
(
args
,
param_names
,
as_bias
):
"""
Concat parameters whose names are in param_names.
Params:
args: command line paramters
param_names: list of names of parameters to concat
as_bias: whether parameters to split are as bias or not
"""
num_classes
=
args
.
num_classes
train_nranks
=
args
.
pretrain_nranks
nranks
=
args
.
nranks
train_nshards
=
(
num_classes
+
train_nranks
-
1
)
//
train_nranks
nshards
=
(
num_classes
+
nranks
-
1
)
//
nranks
# for inference of finetuning
save_rank_id
=
0
remainder_dim
=
train_nshards
# remainder dim that is not concatted
name_index
=
0
# index of name of pretrained parameter to process
for
save_rank_id
in
range
(
nranks
):
assert
name_index
<
train_nranks
remainder_dim
,
advance
=
concat_load_and_save
(
args
,
name_index
,
param_names
,
save_rank_id
,
remainder_dim
,
as_bias
,
train_nshards
,
train_nranks
,
nshards
)
name_index
+=
advance
processed_var_count
=
name_index
+
1
assert
processed_var_count
==
train_nranks
,
logger
.
error
(
"Number of "
"pretrained parameters processed ({}) is not equal to the number of "
"ranks ({}) for pretraining."
.
format
(
processed_var_count
,
train_nranks
))
assert
save_rank_id
==
nranks
-
1
,
logger
.
error
(
"Number of saved parameters"
" ({}) is not equal to the number of ranks ({}) for inference or "
"finetuning."
.
format
(
save_rank_id
+
1
,
nranks
))
def
concat_distfc_parameters
(
args
,
weight_param_names
,
weight_velocity_param_names
,
bias_param_names
,
bias_velocity_param_names
):
"""
Concat distributed fc-related parameters according to number of ranks
for inference or finetuning.
Params:
args: command line paramters
weight_param_names: list of names of weight parameters
bias_param_names: list of names of bias parameters
"""
concat_parameters
(
args
,
weight_param_names
,
as_bias
=
False
)
concat_parameters
(
args
,
weight_velocity_param_names
,
as_bias
=
False
)
if
len
(
bias_param_names
)
!=
0
:
concat_parameters
(
args
,
bias_param_names
,
as_bias
=
True
)
concat_parameters
(
args
,
bias_velocity_param_names
,
as_bias
=
True
)
def
parameter_name_compare
(
x
,
y
):
"""
Compare two parameter names depend on their rank id.
A parameter name is like dist_softmax_rank_00000.w_0,
where 00000 is the rank id.
"""
rank_id_x
=
int
(
x
.
split
(
'.'
)[
0
].
split
(
'@'
)[
-
1
])
rank_id_y
=
int
(
y
.
split
(
'.'
)[
0
].
split
(
'@'
)[
-
1
])
if
rank_id_x
<
rank_id_y
:
return
-
1
elif
rank_id_x
==
rank_id_y
:
return
0
else
:
return
1
def
main
():
global
args
load_config
(
args
)
var_names
=
find_distfc_var_names
(
args
)
weight_param_names
=
[
name
for
name
in
var_names
if
'.w'
in
name
and
'velocity'
not
in
name
]
weight_velocity_param_names
=
[
name
for
name
in
var_names
if
'.w'
in
name
and
'velocity'
in
name
]
bias_param_names
=
[
name
for
name
in
var_names
if
'.b'
in
name
and
'velocity'
not
in
name
]
bias_velocity_param_names
=
[
name
for
name
in
var_names
if
'.b'
in
name
and
'velocity'
in
name
]
weight_param_names
.
sort
(
parameter_name_compare
)
weight_velocity_param_names
.
sort
(
parameter_name_compare
)
bias_param_names
.
sort
(
parameter_name_compare
)
bias_velocity_param_names
.
sort
(
parameter_name_compare
)
assert
len
(
weight_param_names
)
==
args
.
pretrain_nranks
,
\
logger
.
error
(
"Number of distributed fc-related weight parameters ({}) "
"should be equal to the number of ranks ({}) for "
"pretraining."
.
format
(
len
(
weight_param_names
),
args
.
pretrain_nranks
))
assert
len
(
weight_velocity_param_names
)
==
args
.
pretrain_nranks
,
\
logger
.
error
(
"Number of distributed fc-related weight parameters ({}) "
"should be equal to the number of ranks ({}) for "
"pretraining."
.
format
(
len
(
weight_velocity_param_names
),
args
.
pretrain_nranks
))
assert
len
(
bias_param_names
)
==
0
or
\
len
(
bias_param_names
)
==
args
.
pretrain_nranks
,
logger
.
error
(
"Number of "
"distributed fc-related bias parameters ({}) should be 0 or equal "
"to the number of ranks ({}) for pretraining."
.
format
(
len
(
bias_param_names
),
args
.
pretrain_nranks
))
assert
len
(
bias_velocity_param_names
)
==
0
or
\
len
(
bias_velocity_param_names
)
==
args
.
pretrain_nranks
,
logger
.
error
(
"Number of "
"distributed fc-related bias parameters ({}) should be 0 or equal "
"to the number of ranks ({}) for pretraining."
.
format
(
len
(
bias_velocity_param_names
),
args
.
pretrain_nranks
))
pretrain_nranks
=
args
.
pretrain_nranks
nranks
=
args
.
nranks
if
pretrain_nranks
==
nranks
:
logger
.
info
(
"Pre-training and inference (or finetuning) have the same "
"number of ranks, nothing to do."
)
elif
pretrain_nranks
<
nranks
:
split_distfc_parameters
(
args
,
weight_param_names
,
weight_velocity_param_names
,
bias_param_names
,
bias_velocity_param_names
)
else
:
concat_distfc_parameters
(
args
,
weight_param_names
,
weight_velocity_param_names
,
bias_param_names
,
bias_velocity_param_names
)
logger
.
info
(
"Done."
)
if
__name__
==
"__main__"
:
main
()
plsc/version.py
浏览文件 @
c56ceffc
...
@@ -12,4 +12,4 @@
...
@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
""" PLSC version string """
""" PLSC version string """
plsc_version
=
"0.
1
.0"
plsc_version
=
"0.
0
.0"
tools/process_base64_files.py
浏览文件 @
c56ceffc
...
@@ -12,29 +12,28 @@
...
@@ -12,29 +12,28 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
import
os
import
argparse
import
argparse
import
random
import
time
import
math
import
logging
import
logging
import
math
import
os
import
random
import
sqlite3
import
sqlite3
import
tempfile
import
tempfile
import
six
import
time
import
six
logging
.
basicConfig
(
level
=
logging
.
INFO
,
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
'[%(levelname)s %(asctime)s line:%(lineno)d]
%(message)s'
,
format
=
'[%(asctime)s - %(levelname)s -
%(message)s'
,
datefmt
=
'%d %b %Y %H:%M:%S'
)
datefmt
=
'%d %b %Y %H:%M:%S'
)
logger
=
logging
.
getLogger
()
logger
=
logging
.
getLogger
()
parser
=
argparse
.
ArgumentParser
(
description
=
"""
parser
=
argparse
.
ArgumentParser
(
description
=
"""
Tool to preprocess dataset in base64 format."""
)
Tool to preprocess dataset in base64 format."""
)
"""
"""
We assume that the directory of dataset contains a file-list file, and one
We assume that the directory of dataset contains a file-list file, and one
or more data files. Each line of the file-list file represents a data file.
or more data files. Each line of the file-list file represents a data file.
...
@@ -111,9 +110,9 @@ class Base64Preprocessor(object):
...
@@ -111,9 +110,9 @@ class Base64Preprocessor(object):
line
=
line
.
strip
()
line
=
line
.
strip
()
file_path
=
os
.
path
.
join
(
self
.
data_dir
,
line
)
file_path
=
os
.
path
.
join
(
self
.
data_dir
,
line
)
with
open
(
file_path
,
'r'
)
as
df
:
with
open
(
file_path
,
'r'
)
as
df
:
for
line
in
df
.
xreadlines
():
for
line
_local
in
df
.
xreadlines
():
line
=
line
.
strip
().
split
(
'
\t
'
)
line
_local
=
line_local
.
strip
().
split
(
'
\t
'
)
self
.
insert_to_db
(
cnt
,
line
)
self
.
insert_to_db
(
cnt
,
line
_local
)
cnt
+=
1
cnt
+=
1
os
.
remove
(
file_path
)
os
.
remove
(
file_path
)
else
:
else
:
...
@@ -121,9 +120,9 @@ class Base64Preprocessor(object):
...
@@ -121,9 +120,9 @@ class Base64Preprocessor(object):
line
=
line
.
strip
()
line
=
line
.
strip
()
file_path
=
os
.
path
.
join
(
self
.
data_dir
,
line
)
file_path
=
os
.
path
.
join
(
self
.
data_dir
,
line
)
with
open
(
file_path
,
'r'
)
as
df
:
with
open
(
file_path
,
'r'
)
as
df
:
for
line
in
df
:
for
line
_local
in
df
:
line
=
line
.
strip
().
split
(
'
\t
'
)
line
_local
=
line_local
.
strip
().
split
(
'
\t
'
)
self
.
insert_to_db
(
cnt
,
line
)
self
.
insert_to_db
(
cnt
,
line
_local
)
cnt
+=
1
cnt
+=
1
os
.
remove
(
file_path
)
os
.
remove
(
file_path
)
...
@@ -143,19 +142,20 @@ class Base64Preprocessor(object):
...
@@ -143,19 +142,20 @@ class Base64Preprocessor(object):
start_time
=
time
.
time
()
start_time
=
time
.
time
()
lines_per_rank
=
int
(
math
.
ceil
(
num
/
nranks
))
lines_per_rank
=
int
(
math
.
ceil
(
num
/
nranks
))
total_num
=
lines_per_rank
*
nranks
total_num
=
lines_per_rank
*
nranks
index
=
index
+
index
[
0
:
total_num
-
num
]
index
=
index
+
index
[
0
:
total_num
-
num
]
assert
len
(
index
)
==
total_num
assert
len
(
index
)
==
total_num
for
rank
in
range
(
nranks
):
for
rank
in
range
(
nranks
):
start
=
rank
*
lines_per_rank
start
=
rank
*
lines_per_rank
end
=
(
rank
+
1
)
*
lines_per_rank
# exclusive
end
=
(
rank
+
1
)
*
lines_per_rank
# exclusive
f_handler
=
open
(
os
.
path
.
join
(
self
.
data_dir
,
f_handler
=
open
(
os
.
path
.
join
(
self
.
data_dir
,
".tmp_"
+
str
(
rank
)),
'w'
)
".tmp_"
+
str
(
rank
)),
'w'
)
for
i
in
range
(
start
,
end
):
for
i
in
range
(
start
,
end
):
idx
=
index
[
i
]
idx
=
index
[
i
]
sql_cmd
=
"SELECT DATA, LABEL FROM DATASET WHERE ID={};"
.
format
(
idx
)
sql_cmd
=
"SELECT DATA, LABEL FROM DATASET WHERE ID={};"
.
format
(
idx
)
cursor
=
self
.
cursor
.
execute
(
sql_cmd
)
cursor
=
self
.
cursor
.
execute
(
sql_cmd
)
for
result
in
cursor
:
for
result
in
cursor
:
data
=
result
[
0
]
data
=
result
[
0
]
...
@@ -174,7 +174,7 @@ class Base64Preprocessor(object):
...
@@ -174,7 +174,7 @@ class Base64Preprocessor(object):
line
+=
'
\n
'
line
+=
'
\n
'
f_t
.
writelines
(
line
)
f_t
.
writelines
(
line
)
os
.
rename
(
os
.
path
.
join
(
data_dir
,
".tmp_"
+
str
(
rank
)),
os
.
rename
(
os
.
path
.
join
(
data_dir
,
".tmp_"
+
str
(
rank
)),
os
.
path
.
join
(
data_dir
,
"base64_rank_{}"
.
format
(
rank
)))
os
.
path
.
join
(
data_dir
,
"base64_rank_{}"
.
format
(
rank
)))
os
.
remove
(
file_list
)
os
.
remove
(
file_list
)
os
.
rename
(
temp_file_list
,
file_list
)
os
.
rename
(
temp_file_list
,
file_list
)
...
@@ -183,21 +183,16 @@ class Base64Preprocessor(object):
...
@@ -183,21 +183,16 @@ class Base64Preprocessor(object):
def
close_db
(
self
):
def
close_db
(
self
):
self
.
conn
.
close
()
self
.
conn
.
close
()
self
.
tempfile
.
close
()
self
.
tempfile
.
close
()
os
.
remove
(
self
.
sqlite3_file
)
def
main
():
def
main
():
global
args
global
args
obj
=
Base64Preprocessor
(
args
.
data_dir
,
args
.
file_list
,
args
.
nranks
)
obj
=
Base64Preprocessor
(
args
.
data_dir
,
args
.
file_list
,
args
.
nranks
)
obj
.
shuffle_files
()
obj
.
shuffle_files
()
obj
.
close_db
()
obj
.
close_db
()
#data_dir = args.data_dir
#file_list = args.file_list
#nranks = args.nranks
#names, file_num_map, num = get_image_info(data_dir, file_list)
#
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
main
()
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录