Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleClas
提交
6529765a
P
PaddleClas
项目概览
PaddlePaddle
/
PaddleClas
1 年多 前同步成功
通知
116
Star
4999
Fork
1114
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
6
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleClas
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
6
合并请求
6
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6529765a
编写于
9月 23, 2021
作者:
W
weishengyu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update pksampler
上级
af25e256
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
34 addition
and
25 deletion
+34
-25
ppcls/data/dataloader/pk_sampler.py
ppcls/data/dataloader/pk_sampler.py
+34
-25
未找到文件。
ppcls/data/dataloader/pk_sampler.py
浏览文件 @
6529765a
...
...
@@ -48,50 +48,59 @@ class PKSampler(DistributedBatchSampler):
"PKSampler configs error, Sample_per_id must be a divisor of batch_size."
assert
hasattr
(
self
.
dataset
,
"labels"
),
"Dataset must have labels attribute."
self
.
sample_per_
id
=
sample_per_id
self
.
sample_per_
label
=
sample_per_id
self
.
label_dict
=
defaultdict
(
list
)
self
.
sample_method
=
sample_method
for
idx
,
label
in
enumerate
(
self
.
dataset
.
labels
):
self
.
label_dict
[
label
].
append
(
idx
)
self
.
label_list
=
list
(
self
.
label_dict
)
assert
len
(
self
.
label_list
)
*
self
.
sample_per_label
>
self
.
batch_size
,
\
"batch size should be smaller than "
if
self
.
sample_method
==
"id_avg_prob"
:
for
idx
,
label
in
enumerate
(
self
.
dataset
.
labels
):
self
.
label_dict
[
label
].
append
(
idx
)
self
.
id_list
=
list
(
self
.
label_dict
)
self
.
prob_list
=
np
.
array
([
1
/
len
(
self
.
label_list
)]
*
len
(
self
.
label_list
))
elif
self
.
sample_method
==
"sample_avg_prob"
:
self
.
id_list
=
[]
for
idx
,
label
in
enumerate
(
self
.
dataset
.
labels
):
self
.
label_dict
[
label
].
append
(
idx
)
counter
=
[]
for
label_i
in
self
.
label_list
:
counter
.
append
(
len
(
self
.
label_list
[
label_i
]))
self
.
prob_list
=
np
.
array
(
counter
)
/
sum
(
counter
)
else
:
logger
.
error
(
"PKSampler only support id_avg_prob and sample_avg_prob sample method, "
"but receive {}."
.
format
(
self
.
sample_method
))
if
sum
(
np
.
abs
(
self
.
prob_list
-
1
)
>
0.00000001
):
self
.
prob_list
[
-
1
]
=
1
-
sum
(
self
.
prob_list
[:
-
1
])
if
self
.
prob_list
[
-
1
]
>
1
or
self
.
prob_list
[
-
1
]
<
0
:
logger
.
error
(
"PKSampler prob list error"
)
else
:
logger
.
info
(
"PKSampler: sum of prob list not equal to 1, change the last prob"
)
def
__iter__
(
self
):
label_per_batch
=
self
.
batch_size
//
self
.
sample_per_label
if
self
.
shuffle
:
np
.
random
.
RandomState
(
self
.
epoch
).
shuffle
(
self
.
id_list
)
id_list
=
self
.
id_list
[
self
.
local_rank
*
len
(
self
.
id_list
)
//
self
.
nranks
:(
self
.
local_rank
+
1
)
*
len
(
self
.
id_list
)
//
self
.
nranks
]
if
self
.
sample_method
==
"id_avg_prob"
:
id_batch_num
=
len
(
id_list
)
*
self
.
sample_per_id
//
self
.
batch_size
if
id_batch_num
<
len
(
self
):
id_list
=
id_list
*
(
len
(
self
)
//
id_batch_num
+
1
)
id_list
=
id_list
[
0
:
len
(
self
)]
id_per_batch
=
self
.
batch_size
//
self
.
sample_per_id
np
.
random
.
RandomState
(
self
.
epoch
).
shuffle
(
self
.
label_list
)
for
i
in
range
(
len
(
self
)):
batch_index
=
[]
for
label_id
in
id_list
[
i
*
id_per_batch
:(
i
+
1
)
*
id_per_batch
]:
idx_label_list
=
self
.
label_dict
[
label_id
]
if
self
.
sample_per_id
<=
len
(
idx_label_list
):
batch_label_list
=
np
.
random
.
sample
(
self
.
label_list
,
size
=
label_per_batch
,
replace
=
False
,
p
=
self
.
prob_list
)
for
label_i
in
batch_label_list
:
label_i_indexes
=
self
.
label_dict
[
label_i
]
if
self
.
sample_per_label
<=
len
(
label_i_indexes
):
batch_index
.
extend
(
np
.
random
.
choice
(
idx_label_list
,
size
=
self
.
sample_per_
id
,
label_i_indexes
,
size
=
self
.
sample_per_
label
,
replace
=
False
))
else
:
batch_index
.
extend
(
np
.
random
.
choice
(
idx_label_list
,
size
=
self
.
sample_per_
id
,
label_i_indexes
,
size
=
self
.
sample_per_
label
,
replace
=
True
))
if
not
self
.
drop_last
or
len
(
batch_index
)
==
self
.
batch_size
:
yield
batch_index
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录