Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
a34fe624
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a34fe624
编写于
3月 20, 2019
作者:
X
xjqbest
提交者:
dongdaxiang
3月 29, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add some doc
上级
20b76f3d
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
26 addition
and
18 deletion
+26
-18
paddle/fluid/framework/data_set.cc
paddle/fluid/framework/data_set.cc
+11
-18
paddle/fluid/framework/data_set.h
paddle/fluid/framework/data_set.h
+13
-0
paddle/fluid/framework/fleet/fleet_wrapper.h
paddle/fluid/framework/fleet/fleet_wrapper.h
+2
-0
未找到文件。
paddle/fluid/framework/data_set.cc
浏览文件 @
a34fe624
...
...
@@ -24,6 +24,7 @@
namespace
paddle
{
namespace
framework
{
// constructor
template
<
typename
T
>
DatasetImpl
<
T
>::
DatasetImpl
()
{
thread_num_
=
1
;
...
...
@@ -31,37 +32,24 @@ DatasetImpl<T>::DatasetImpl() {
file_idx_
=
0
;
}
// set filelist, file_idx_ will reset to zero.
template
<
typename
T
>
void
DatasetImpl
<
T
>::
SetFileList
(
const
std
::
vector
<
std
::
string
>&
filelist
)
{
VLOG
(
3
)
<<
"filelist size: "
<<
filelist
.
size
();
filelist_
=
filelist
;
file_idx_
=
0
;
/*
int file_cnt = filelist_.size();
if (thread_num_ > file_cnt) {
VLOG(1) << "DataSet thread num = " << thread_num_
<< ", file num = " << file_cnt
<< ". Changing DataSet thread num = " << file_cnt;
thread_num_ = file_cnt;
}*/
}
// buggy here, a user should set filelist first before this function
// not user friendly
// set expect thread num. actually it may change
template
<
typename
T
>
void
DatasetImpl
<
T
>::
SetThreadNum
(
int
thread_num
)
{
VLOG
(
3
)
<<
"SetThreadNum thread_num="
<<
thread_num
;
//int file_cnt = filelist_.size();
/*
if (file_cnt != 0 && thread_num > file_cnt) {
VLOG(3) << "DataSet thread num = " << thread_num
<< ", file num = " << file_cnt
<< ". Changing DataSet thread num = " << file_cnt;
thread_num = file_cnt;
}*/
thread_num_
=
thread_num
;
}
// if you run distributed, and want to do global shuffle,
// set this before global shuffle.
// be sure you call CreateReaders before SetTrainerNum
template
<
typename
T
>
void
DatasetImpl
<
T
>::
SetTrainerNum
(
int
trainer_num
)
{
trainer_num_
=
trainer_num
;
...
...
@@ -86,12 +74,16 @@ void DatasetImpl<T>::SetDataFeedDesc(const std::string& data_feed_desc_str) {
&
data_feed_desc_
);
}
// readers_.size() may not be equal to thread_num_,
// it changes when filelist_.size() < thread_num_
template
<
typename
T
>
std
::
vector
<
std
::
shared_ptr
<
paddle
::
framework
::
DataFeed
>>&
DatasetImpl
<
T
>::
GetReaders
()
{
return
readers_
;
}
// load data into memory, Dataset hold this memory,
// which will later be fed into readers' channel
template
<
typename
T
>
void
DatasetImpl
<
T
>::
LoadIntoMemory
()
{
VLOG
(
3
)
<<
"DatasetImpl<T>::LoadIntoMemory() begin"
;
...
...
@@ -114,6 +106,7 @@ void DatasetImpl<T>::LoadIntoMemory() {
<<
", cost time="
<<
timeline
.
ElapsedSec
()
<<
" seconds"
;
}
// do local shuffle
template
<
typename
T
>
void
DatasetImpl
<
T
>::
LocalShuffle
()
{
VLOG
(
3
)
<<
"DatasetImpl<T>::LocalShuffle() begin"
;
...
...
paddle/fluid/framework/data_set.h
浏览文件 @
a34fe624
...
...
@@ -26,6 +26,16 @@
namespace
paddle
{
namespace
framework
{
// Dataset is a abstract class, which defines user interfaces
// Example Usage:
// Dataset* dataset = DatasetFactory::CreateDataset("InMemoryDataset")
// dataset->SetFileList(std::vector<std::string>{"a.txt", "b.txt"})
// dataset->SetThreadNum(1)
// dataset->CreateReaders();
// dataset->SetDataFeedDesc(your_data_feed_desc);
// dataset->LoadIntoMemory();
// dataset->SetTrainerNum(2);
// dataset->GlobalShuffle();
class
Dataset
{
public:
Dataset
()
{}
...
...
@@ -53,6 +63,8 @@ class Dataset {
const
std
::
string
&
msg
)
=
0
;
};
// DatasetImpl is the implementation of Dataset,
// it holds memory data if user calls load_into_memory
template
<
typename
T
>
class
DatasetImpl
:
public
Dataset
{
public:
...
...
@@ -95,6 +107,7 @@ class DatasetImpl : public Dataset {
std
::
mutex
mutex_for_pick_file_
;
};
// use std::vector<MultiSlotType> as data type
class
MultiSlotDataset
:
public
DatasetImpl
<
std
::
vector
<
MultiSlotType
>>
{
public:
MultiSlotDataset
()
{}
...
...
paddle/fluid/framework/fleet/fleet_wrapper.h
浏览文件 @
a34fe624
...
...
@@ -146,7 +146,9 @@ class FleetWrapper {
private:
static
std
::
shared_ptr
<
FleetWrapper
>
s_instance_
;
#ifdef PADDLE_WITH_PSLIB
std
::
map
<
uint64_t
,
std
::
vector
<
paddle
::
ps
::
Region
>>
_regions
;
#endif
protected:
static
bool
is_initialized_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录