Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
c67c3916
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c67c3916
编写于
9月 16, 2020
作者:
Y
yaoxuefeng
提交者:
GitHub
9月 16, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine fleet dataset class api (#27133)
上级
c296618c
变更
11
展开全部
显示空白变更内容
内联
并排
Showing
11 changed file
with
835 addition
and
568 deletion
+835
-568
python/paddle/distributed/__init__.py
python/paddle/distributed/__init__.py
+3
-5
python/paddle/distributed/fleet/__init__.py
python/paddle/distributed/fleet/__init__.py
+0
-1
python/paddle/distributed/fleet/dataset/dataset.py
python/paddle/distributed/fleet/dataset/dataset.py
+586
-414
python/paddle/fluid/reader.py
python/paddle/fluid/reader.py
+2
-2
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
+7
-5
python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py
python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py
+5
-5
python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py
python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py
+5
-5
python/paddle/fluid/tests/unittests/test_dataset.py
python/paddle/fluid/tests/unittests/test_dataset.py
+212
-116
python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
...n/paddle/fluid/tests/unittests/test_dataset_dataloader.py
+7
-5
python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
...on/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
+2
-3
python/paddle/fluid/tests/unittests/test_monitor.py
python/paddle/fluid/tests/unittests/test_monitor.py
+6
-7
未找到文件。
python/paddle/distributed/__init__.py
浏览文件 @
c67c3916
...
@@ -21,6 +21,7 @@ from .parallel import get_rank
...
@@ -21,6 +21,7 @@ from .parallel import get_rank
from
.parallel
import
get_world_size
from
.parallel
import
get_world_size
from
paddle.fluid.dygraph.parallel
import
prepare_context
#DEFINE_ALIAS
from
paddle.fluid.dygraph.parallel
import
prepare_context
#DEFINE_ALIAS
from
paddle.fluid.dygraph.parallel
import
ParallelEnv
#DEFINE_ALIAS
from
paddle.fluid.dygraph.parallel
import
ParallelEnv
#DEFINE_ALIAS
from
paddle.distributed.fleet.dataset
import
*
from
.
import
collective
from
.
import
collective
from
.collective
import
*
from
.collective
import
*
...
@@ -30,11 +31,8 @@ __all__ = ["spawn"]
...
@@ -30,11 +31,8 @@ __all__ = ["spawn"]
# dygraph parallel apis
# dygraph parallel apis
__all__
+=
[
__all__
+=
[
"init_parallel_env"
,
"init_parallel_env"
,
"get_rank"
,
"get_world_size"
,
"prepare_context"
,
"get_rank"
,
"ParallelEnv"
,
"InMemoryDataset"
,
"QueueDataset"
"get_world_size"
,
"prepare_context"
,
"ParallelEnv"
,
]
]
# collective apis
# collective apis
...
...
python/paddle/distributed/fleet/__init__.py
浏览文件 @
c67c3916
...
@@ -23,7 +23,6 @@ from .dataset import *
...
@@ -23,7 +23,6 @@ from .dataset import *
__all__
=
[
__all__
=
[
"DistributedStrategy"
,
"DistributedStrategy"
,
"UtilBase"
,
"UtilBase"
,
"DatasetFactory"
,
"UserDefinedRoleMaker"
,
"UserDefinedRoleMaker"
,
"PaddleCloudRoleMaker"
,
"PaddleCloudRoleMaker"
,
"Fleet"
,
"Fleet"
,
...
...
python/paddle/distributed/fleet/dataset/dataset.py
浏览文件 @
c67c3916
此差异已折叠。
点击以展开。
python/paddle/fluid/reader.py
浏览文件 @
c67c3916
...
@@ -1726,13 +1726,13 @@ class DatasetLoader(DataLoaderBase):
...
@@ -1726,13 +1726,13 @@ class DatasetLoader(DataLoaderBase):
logging
.
warn
(
'thread_num {} which is set in Dataset is ignored'
.
logging
.
warn
(
'thread_num {} which is set in Dataset is ignored'
.
format
(
dataset
.
thread_num
))
format
(
dataset
.
thread_num
))
dataset
.
set_thread
(
thread_num
)
dataset
.
_
set_thread
(
thread_num
)
if
isinstance
(
dataset
,
paddle
.
distributed
.
fleet
.
dataset
.
if
isinstance
(
dataset
,
paddle
.
distributed
.
fleet
.
dataset
.
InMemoryDataset
)
and
dataset
.
queue_num
>
thread_num
:
InMemoryDataset
)
and
dataset
.
queue_num
>
thread_num
:
logging
.
warn
(
"queue_num {} which is set in Dataset is ignored"
.
logging
.
warn
(
"queue_num {} which is set in Dataset is ignored"
.
format
(
dataset
.
queue_num
))
format
(
dataset
.
queue_num
))
dataset
.
set_queue_num
(
thread_num
)
dataset
.
_
set_queue_num
(
thread_num
)
self
.
_dataset
=
dataset
self
.
_dataset
=
dataset
use_slots
=
[
use_slots
=
[
...
...
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
浏览文件 @
c67c3916
...
@@ -208,14 +208,16 @@ class TestDistCTR2x2(FleetDistRunnerBase):
...
@@ -208,14 +208,16 @@ class TestDistCTR2x2(FleetDistRunnerBase):
filelist
=
train_file_list
filelist
=
train_file_list
# config dataset
# config dataset
dataset
=
paddle
.
distributed
.
fleet
.
DatasetFactory
().
create_dataset
()
dataset
=
paddle
.
distributed
.
QueueDataset
()
dataset
.
set_batch_size
(
batch_size
)
dataset
.
set_use_var
(
self
.
feeds
)
pipe_command
=
'python ctr_dataset_reader.py'
pipe_command
=
'python ctr_dataset_reader.py'
dataset
.
set_pipe_command
(
pipe_command
)
dataset
.
init
(
batch_size
=
batch_size
,
use_var
=
self
.
feeds
,
pipe_command
=
pipe_command
,
thread_num
=
thread_num
)
dataset
.
set_filelist
(
filelist
)
dataset
.
set_filelist
(
filelist
)
dataset
.
set_thread
(
thread_num
)
for
epoch_id
in
range
(
1
):
for
epoch_id
in
range
(
1
):
pass_start
=
time
.
time
()
pass_start
=
time
.
time
()
...
...
python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py
浏览文件 @
c67c3916
...
@@ -114,14 +114,14 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2):
...
@@ -114,14 +114,14 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2):
filelist
.
append
(
train_file_path
)
filelist
.
append
(
train_file_path
)
# config dataset
# config dataset
dataset
=
paddle
.
fleet
.
DatasetFactory
().
create_d
ataset
()
dataset
=
paddle
.
distributed
.
QueueD
ataset
()
dataset
.
set_batch_size
(
batch_size
)
dataset
.
_
set_batch_size
(
batch_size
)
dataset
.
set_use_var
(
self
.
feeds
)
dataset
.
_
set_use_var
(
self
.
feeds
)
pipe_command
=
'python ctr_dataset_reader.py'
pipe_command
=
'python ctr_dataset_reader.py'
dataset
.
set_pipe_command
(
pipe_command
)
dataset
.
_
set_pipe_command
(
pipe_command
)
dataset
.
set_filelist
(
filelist
)
dataset
.
set_filelist
(
filelist
)
dataset
.
set_thread
(
thread_num
)
dataset
.
_
set_thread
(
thread_num
)
for
epoch_id
in
range
(
1
):
for
epoch_id
in
range
(
1
):
pass_start
=
time
.
time
()
pass_start
=
time
.
time
()
...
...
python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py
浏览文件 @
c67c3916
...
@@ -183,14 +183,14 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase):
...
@@ -183,14 +183,14 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase):
print
(
"filelist: {}"
.
format
(
filelist
))
print
(
"filelist: {}"
.
format
(
filelist
))
# config dataset
# config dataset
dataset
=
paddle
.
distributed
.
fleet
.
DatasetFactory
().
create_d
ataset
()
dataset
=
paddle
.
distributed
.
QueueD
ataset
()
dataset
.
set_batch_size
(
batch_size
)
dataset
.
_
set_batch_size
(
batch_size
)
dataset
.
set_use_var
(
self
.
feeds
)
dataset
.
_
set_use_var
(
self
.
feeds
)
pipe_command
=
'python ctr_dataset_reader.py'
pipe_command
=
'python ctr_dataset_reader.py'
dataset
.
set_pipe_command
(
pipe_command
)
dataset
.
_
set_pipe_command
(
pipe_command
)
dataset
.
set_filelist
(
filelist
)
dataset
.
set_filelist
(
filelist
)
dataset
.
set_thread
(
thread_num
)
dataset
.
_
set_thread
(
thread_num
)
for
epoch_id
in
range
(
1
):
for
epoch_id
in
range
(
1
):
pass_start
=
time
.
time
()
pass_start
=
time
.
time
()
...
...
python/paddle/fluid/tests/unittests/test_dataset.py
浏览文件 @
c67c3916
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
浏览文件 @
c67c3916
...
@@ -97,9 +97,11 @@ class DatasetLoaderTestBase(unittest.TestCase):
...
@@ -97,9 +97,11 @@ class DatasetLoaderTestBase(unittest.TestCase):
def
check_batch_number
(
self
,
place
,
randomize_batch_num
=
False
):
def
check_batch_number
(
self
,
place
,
randomize_batch_num
=
False
):
main_prog
,
startup_prog
,
feeds
=
self
.
build_network
()
main_prog
,
startup_prog
,
feeds
=
self
.
build_network
()
dataset
=
paddle
.
distributed
.
fleet
.
DatasetFactory
().
create_dataset
(
if
self
.
dataset_name
==
"QueueDataset"
:
self
.
dataset_name
)
dataset
=
paddle
.
distributed
.
QueueDataset
()
dataset
.
set_batch_size
(
BATCH_SIZE
)
else
:
dataset
=
paddle
.
distributed
.
InMemoryDataset
()
dataset
.
_set_batch_size
(
BATCH_SIZE
)
if
isinstance
(
place
,
fluid
.
CPUPlace
):
if
isinstance
(
place
,
fluid
.
CPUPlace
):
file_num
=
10
file_num
=
10
...
@@ -128,8 +130,8 @@ class DatasetLoaderTestBase(unittest.TestCase):
...
@@ -128,8 +130,8 @@ class DatasetLoaderTestBase(unittest.TestCase):
fake_reader
(
batch_num
=
BATCH_NUM
+
random_delta_batch_size
[
i
]))
fake_reader
(
batch_num
=
BATCH_NUM
+
random_delta_batch_size
[
i
]))
dataset
.
set_filelist
(
filelist
)
dataset
.
set_filelist
(
filelist
)
dataset
.
set_use_var
(
feeds
)
dataset
.
_
set_use_var
(
feeds
)
dataset
.
set_pipe_command
(
"cat"
)
dataset
.
_
set_pipe_command
(
"cat"
)
if
self
.
dataset_name
==
'InMemoryDataset'
:
if
self
.
dataset_name
==
'InMemoryDataset'
:
dataset
.
load_into_memory
()
dataset
.
load_into_memory
()
...
...
python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
浏览文件 @
c67c3916
...
@@ -163,10 +163,9 @@ class TestCloudRoleMaker2(unittest.TestCase):
...
@@ -163,10 +163,9 @@ class TestCloudRoleMaker2(unittest.TestCase):
data
=
"1 1 1 1
\n
"
data
=
"1 1 1 1
\n
"
f
.
write
(
data
)
f
.
write
(
data
)
dataset
=
paddle
.
distributed
.
fleet
.
DatasetFactory
().
create_dataset
(
dataset
=
paddle
.
distributed
.
InMemoryDataset
()
"InMemoryDataset"
)
dataset
.
set_filelist
([
"test_fleet_gloo_role_maker_1.txt"
])
dataset
.
set_filelist
([
"test_fleet_gloo_role_maker_1.txt"
])
dataset
.
set_use_var
([
show
,
label
])
dataset
.
_
set_use_var
([
show
,
label
])
dataset
.
load_into_memory
()
dataset
.
load_into_memory
()
dataset
.
get_memory_data_size
(
fleet
)
dataset
.
get_memory_data_size
(
fleet
)
dataset
.
get_shuffle_data_size
(
fleet
)
dataset
.
get_shuffle_data_size
(
fleet
)
...
...
python/paddle/fluid/tests/unittests/test_monitor.py
浏览文件 @
c67c3916
...
@@ -52,18 +52,17 @@ class TestDatasetWithStat(unittest.TestCase):
...
@@ -52,18 +52,17 @@ class TestDatasetWithStat(unittest.TestCase):
name
=
slot
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
name
=
slot
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
slots_vars
.
append
(
var
)
slots_vars
.
append
(
var
)
dataset
=
paddle
.
distributed
.
fleet
.
DatasetFactory
().
create_dataset
(
dataset
=
paddle
.
distributed
.
InMemoryDataset
()
"InMemoryDataset"
)
dataset
.
_set_batch_size
(
32
)
dataset
.
set_batch_size
(
32
)
dataset
.
_set_thread
(
3
)
dataset
.
set_thread
(
3
)
dataset
.
set_filelist
([
dataset
.
set_filelist
([
"test_in_memory_dataset_run_a.txt"
,
"test_in_memory_dataset_run_a.txt"
,
"test_in_memory_dataset_run_b.txt"
"test_in_memory_dataset_run_b.txt"
])
])
dataset
.
set_pipe_command
(
"cat"
)
dataset
.
_
set_pipe_command
(
"cat"
)
dataset
.
set_use_var
(
slots_vars
)
dataset
.
_
set_use_var
(
slots_vars
)
dataset
.
load_into_memory
()
dataset
.
load_into_memory
()
dataset
.
set_fea_eval
(
1
,
True
)
dataset
.
_
set_fea_eval
(
1
,
True
)
dataset
.
slots_shuffle
([
"slot1"
])
dataset
.
slots_shuffle
([
"slot1"
])
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录