Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
45eb6f07
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
45eb6f07
编写于
3月 26, 2019
作者:
D
dongdaxiang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
run pre-commit check files and fix code style problem
test=develop
上级
e57ac5ed
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
66 addition
and
72 deletion
+66
-72
paddle/fluid/framework/data_feed.cc
paddle/fluid/framework/data_feed.cc
+13
-16
paddle/fluid/framework/data_feed.h
paddle/fluid/framework/data_feed.h
+16
-18
paddle/fluid/framework/data_set.cc
paddle/fluid/framework/data_set.cc
+2
-3
paddle/fluid/framework/data_set.h
paddle/fluid/framework/data_set.h
+1
-1
paddle/fluid/framework/dataset_factory.cc
paddle/fluid/framework/dataset_factory.cc
+12
-13
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+1
-2
paddle/fluid/framework/io/fs.h
paddle/fluid/framework/io/fs.h
+1
-1
paddle/fluid/framework/pull_dense_worker.cc
paddle/fluid/framework/pull_dense_worker.cc
+1
-1
paddle/fluid/pybind/async_executor_py.cc
paddle/fluid/pybind/async_executor_py.cc
+1
-1
paddle/fluid/pybind/data_set_py.cc
paddle/fluid/pybind/data_set_py.cc
+5
-5
paddle/fluid/string/string_helper.h
paddle/fluid/string/string_helper.h
+1
-1
python/paddle/fluid/tests/unittests/test_dataset.py
python/paddle/fluid/tests/unittests/test_dataset.py
+12
-10
未找到文件。
paddle/fluid/framework/data_feed.cc
浏览文件 @
45eb6f07
...
...
@@ -246,8 +246,8 @@ void InMemoryDataFeed<T>::FillMemoryDataToChannel() {
VLOG
(
3
)
<<
"FillMemoryDataToChannel, thread_id="
<<
thread_id_
;
auto
interval
=
GetMemoryDataInterval
();
VLOG
(
3
)
<<
"memory data size="
<<
memory_data_
->
size
()
<<
", fill data from ["
<<
interval
.
first
<<
", "
<<
interval
.
second
<<
"), thread_id="
<<
thread_id_
;
<<
", fill data from ["
<<
interval
.
first
<<
", "
<<
interval
.
second
<<
"), thread_id="
<<
thread_id_
;
for
(
int64_t
i
=
interval
.
first
;
i
<
interval
.
second
;
++
i
)
{
T
&
t
=
(
*
memory_data_
)[
i
];
shuffled_ins_
->
Push
(
std
::
move
(
t
));
...
...
@@ -275,13 +275,13 @@ void InMemoryDataFeed<T>::FillChannelToMemoryData() {
channel
->
Pop
(
&
local_vec
[
i
]);
}
VLOG
(
3
)
<<
"local_vec size="
<<
local_vec
.
size
()
<<
", thread_id="
<<
thread_id_
;
<<
", thread_id="
<<
thread_id_
;
{
std
::
lock_guard
<
std
::
mutex
>
g
(
*
mutex_for_update_memory_data_
);
VLOG
(
3
)
<<
"before insert, memory_data_ size="
<<
memory_data_
->
size
()
<<
", thread_id="
<<
thread_id_
;
memory_data_
->
insert
(
memory_data_
->
end
(),
local_vec
.
begin
(),
local_vec
.
end
());
local_vec
.
end
());
VLOG
(
3
)
<<
"after insert memory_data_ size="
<<
memory_data_
->
size
()
<<
", thread_id="
<<
thread_id_
;
}
...
...
@@ -308,8 +308,8 @@ void InMemoryDataFeed<T>::LoadIntoMemory() {
local_vec
.
push_back
(
instance
);
}
timeline
.
Pause
();
VLOG
(
3
)
<<
"LoadIntoMemory() read all lines, file="
<<
filename
<<
", cost time="
<<
timeline
.
ElapsedSec
()
VLOG
(
3
)
<<
"LoadIntoMemory() read all lines, file="
<<
filename
<<
", cost time="
<<
timeline
.
ElapsedSec
()
<<
" seconds, thread_id="
<<
thread_id_
;
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
*
mutex_for_update_memory_data_
);
...
...
@@ -319,8 +319,7 @@ void InMemoryDataFeed<T>::LoadIntoMemory() {
std
::
make_move_iterator
(
local_vec
.
end
()));
timeline
.
Pause
();
VLOG
(
3
)
<<
"LoadIntoMemory() memory_data insert, cost time="
<<
timeline
.
ElapsedSec
()
<<
" seconds, thread_id="
<<
thread_id_
;
<<
timeline
.
ElapsedSec
()
<<
" seconds, thread_id="
<<
thread_id_
;
}
local_vec
.
clear
();
}
...
...
@@ -358,8 +357,8 @@ void InMemoryDataFeed<T>::GlobalShuffle() {
std
::
string
send_str
;
SerializeIns
(
send_vec
[
j
],
&
send_str
);
VLOG
(
3
)
<<
"send str_length="
<<
send_str
.
length
()
<<
", ins num="
<<
send_vec
[
j
].
size
()
<<
" to node_id="
<<
j
<<
", thread_id="
<<
thread_id_
;
<<
", ins num="
<<
send_vec
[
j
].
size
()
<<
" to node_id="
<<
j
<<
", thread_id="
<<
thread_id_
;
auto
ret
=
fleet_ptr
->
SendClientToClientMsg
(
0
,
j
,
send_str
);
VLOG
(
3
)
<<
"end send, thread_id="
<<
thread_id_
;
send_vec
[
j
].
clear
();
...
...
@@ -371,8 +370,8 @@ void InMemoryDataFeed<T>::GlobalShuffle() {
if
(
send_vec
[
j
].
size
()
!=
0
)
{
std
::
string
send_str
;
SerializeIns
(
send_vec
[
j
],
&
send_str
);
VLOG
(
3
)
<<
"send str_length="
<<
send_str
.
length
()
<<
"
to node_id="
<<
j
<<
"
, thread_id="
<<
thread_id_
;
VLOG
(
3
)
<<
"send str_length="
<<
send_str
.
length
()
<<
" to node_id="
<<
j
<<
", thread_id="
<<
thread_id_
;
auto
ret
=
fleet_ptr
->
SendClientToClientMsg
(
0
,
j
,
send_str
);
VLOG
(
3
)
<<
"end send, thread_id="
<<
thread_id_
;
total_status
.
push_back
(
std
::
move
(
ret
));
...
...
@@ -888,15 +887,13 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(
// todo serialize ins in global shuffle
void
MultiSlotInMemoryDataFeed
::
SerializeIns
(
const
std
::
vector
<
std
::
vector
<
MultiSlotType
>*>&
ins
,
std
::
string
*
str
)
{
const
std
::
vector
<
std
::
vector
<
MultiSlotType
>*>&
ins
,
std
::
string
*
str
)
{
auto
fleet_ptr
=
FleetWrapper
::
GetInstance
();
fleet_ptr
->
Serialize
(
ins
,
str
);
}
// todo deserialize ins in global shuffle
void
MultiSlotInMemoryDataFeed
::
DeserializeIns
(
std
::
vector
<
std
::
vector
<
MultiSlotType
>>*
ins
,
const
std
::
string
&
str
)
{
std
::
vector
<
std
::
vector
<
MultiSlotType
>>*
ins
,
const
std
::
string
&
str
)
{
auto
fleet_ptr
=
FleetWrapper
::
GetInstance
();
fleet_ptr
->
Deserialize
(
ins
,
str
);
}
...
...
paddle/fluid/framework/data_feed.h
浏览文件 @
45eb6f07
...
...
@@ -15,23 +15,23 @@ limitations under the License. */
#pragma once
#include <fstream>
#include <future> // NOLINT
#include <memory>
#include <mutex> // NOLINT
#include <sstream>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <sstream>
#include <future> // NOLINT
#include <utility>
#include <vector>
#include "paddle/fluid/framework/blocking_queue.h"
#include "paddle/fluid/framework/data_feed.pb.h"
#include "paddle/fluid/framework/fleet/fleet_wrapper.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/operators/reader/blocking_queue.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/fluid/framework/blocking_queue.h"
#include "paddle/fluid/framework/fleet/fleet_wrapper.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -85,21 +85,19 @@ class DataFeed {
virtual
void
AddFeedVar
(
Variable
*
var
,
const
std
::
string
&
name
);
// This function will do nothing at default
virtual
void
SetMemoryData
(
void
*
memory_data
)
{
}
virtual
void
SetMemoryData
(
void
*
memory_data
)
{}
// This function will do nothing at default
virtual
void
SetMemoryDataMutex
(
std
::
mutex
*
mutex
)
{
}
virtual
void
SetMemoryDataMutex
(
std
::
mutex
*
mutex
)
{}
// This function will do nothing at default
virtual
void
SetThreadId
(
int
thread_id
)
{
}
virtual
void
SetThreadId
(
int
thread_id
)
{}
// This function will do nothing at default
virtual
void
SetThreadNum
(
int
thread_num
)
{
}
virtual
void
SetThreadNum
(
int
thread_num
)
{}
// This function will do nothing at default
virtual
void
SetTrainerNum
(
int
trainer_num
)
{
}
virtual
void
SetTrainerNum
(
int
trainer_num
)
{}
virtual
void
SetFileListMutex
(
std
::
mutex
*
mutex
)
{
mutex_for_pick_file_
=
mutex
;
}
virtual
void
SetFileListIndex
(
size_t
*
file_index
)
{
file_idx_
=
file_index
;
}
virtual
void
SetFileListIndex
(
size_t
*
file_index
)
{
file_idx_
=
file_index
;
}
virtual
void
LoadIntoMemory
()
{
PADDLE_THROW
(
"This function(LoadIntoMemory) is not implemented."
);
}
...
...
@@ -110,11 +108,11 @@ class DataFeed {
PADDLE_THROW
(
"This function(GlobalShuffle) is not implemented."
);
}
// This function will do nothing at default
virtual
void
FillMemoryDataToChannel
()
{
}
virtual
void
FillMemoryDataToChannel
()
{}
// This function will do nothing at default
virtual
void
FillChannelToMemoryData
()
{
}
virtual
void
FillChannelToMemoryData
()
{}
// This function will do nothing at default
virtual
void
PutInsToChannel
(
const
std
::
string
&
ins_str
)
{
}
virtual
void
PutInsToChannel
(
const
std
::
string
&
ins_str
)
{}
protected:
// The following three functions are used to check if it is executed in this
...
...
@@ -222,8 +220,7 @@ class InMemoryDataFeed : public PrivateQueueDataFeed<T> {
virtual
void
GlobalShuffle
();
protected:
virtual
void
AddInstanceToInsVec
(
T
*
vec_ins
,
const
T
&
instance
,
virtual
void
AddInstanceToInsVec
(
T
*
vec_ins
,
const
T
&
instance
,
int
index
)
=
0
;
virtual
bool
ParseOneInstance
(
T
*
instance
)
=
0
;
virtual
bool
ParseOneInstanceFromPipe
(
T
*
instance
)
=
0
;
...
...
@@ -363,6 +360,7 @@ class MultiSlotInMemoryDataFeed
MultiSlotInMemoryDataFeed
()
{}
virtual
~
MultiSlotInMemoryDataFeed
()
{}
virtual
void
Init
(
const
paddle
::
framework
::
DataFeedDesc
&
data_feed_desc
);
protected:
virtual
void
AddInstanceToInsVec
(
std
::
vector
<
MultiSlotType
>*
vec_ins
,
const
std
::
vector
<
MultiSlotType
>&
instance
,
...
...
paddle/fluid/framework/data_set.cc
浏览文件 @
45eb6f07
...
...
@@ -18,8 +18,8 @@
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
#include "paddle/fluid/framework/data_feed_factory.h"
#include "paddle/fluid/platform/timer.h"
#include "paddle/fluid/framework/io/fs.h"
#include "paddle/fluid/platform/timer.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -248,8 +248,7 @@ template <typename T>
int
DatasetImpl
<
T
>::
ReceiveFromClient
(
int
msg_type
,
int
client_id
,
const
std
::
string
&
msg
)
{
VLOG
(
3
)
<<
"ReceiveFromClient msg_type="
<<
msg_type
<<
", client_id="
<<
client_id
<<
", msg length="
<<
msg
.
length
();
<<
", client_id="
<<
client_id
<<
", msg length="
<<
msg
.
length
();
auto
fleet_ptr
=
FleetWrapper
::
GetInstance
();
int64_t
index
=
fleet_ptr
->
LocalRandomEngine
()()
%
thread_num_
;
VLOG
(
3
)
<<
"ramdom index="
<<
index
;
...
...
paddle/fluid/framework/data_set.h
浏览文件 @
45eb6f07
...
...
@@ -19,8 +19,8 @@
#include <mutex> // NOLINT
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/data_feed.h"
...
...
paddle/fluid/framework/dataset_factory.cc
浏览文件 @
45eb6f07
...
...
@@ -25,24 +25,23 @@ typedef std::shared_ptr<Dataset> (*CreateDatasetFunction)();
typedef
std
::
unordered_map
<
std
::
string
,
CreateDatasetFunction
>
datasetMap
;
datasetMap
g_dataset_map
;
#define REGISTER_DATASET_CLASS(dataset_class)
\
namespace {
\
std::shared_ptr<Dataset> Creator_##dataset_class() {
\
return std::shared_ptr<Dataset>(new dataset_class);
\
}
\
class __Registerer_##dataset_class {
\
public:
\
__Registerer_##dataset_class() {
\
#define REGISTER_DATASET_CLASS(dataset_class) \
namespace { \
std::shared_ptr<Dataset> Creator_##dataset_class() { \
return std::shared_ptr<Dataset>(new dataset_class); \
} \
class __Registerer_##dataset_class { \
public: \
__Registerer_##dataset_class() { \
g_dataset_map[#dataset_class] = &Creator_##dataset_class; \
}
\
};
\
__Registerer_##dataset_class g_registerer_##dataset_class;
\
} \
}; \
__Registerer_##dataset_class g_registerer_##dataset_class; \
} // namespace
std
::
string
DatasetFactory
::
DatasetTypeList
()
{
std
::
string
dataset_types
;
for
(
auto
iter
=
g_dataset_map
.
begin
();
iter
!=
g_dataset_map
.
end
();
++
iter
)
{
for
(
auto
iter
=
g_dataset_map
.
begin
();
iter
!=
g_dataset_map
.
end
();
++
iter
)
{
if
(
iter
!=
g_dataset_map
.
begin
())
{
dataset_types
+=
", "
;
}
...
...
paddle/fluid/framework/executor.h
浏览文件 @
45eb6f07
...
...
@@ -113,8 +113,7 @@ class Executor {
void
EnableMKLDNN
(
const
ProgramDesc
&
program
);
void
RunFromDataset
(
const
ProgramDesc
&
main_program
,
Scope
*
scope
,
Dataset
*
dataset
,
const
std
::
string
&
trainer_desc_str
);
Dataset
*
dataset
,
const
std
::
string
&
trainer_desc_str
);
private:
const
platform
::
Place
place_
;
...
...
paddle/fluid/framework/io/fs.h
浏览文件 @
45eb6f07
...
...
@@ -15,9 +15,9 @@
#pragma once
#include <stdio.h>
#include <memory>
#include <string>
#include <vector>
#include <memory>
#include "glog/logging.h"
#include "paddle/fluid/framework/io/shell.h"
#include "paddle/fluid/string/string_helper.h"
...
...
paddle/fluid/framework/pull_dense_worker.cc
浏览文件 @
45eb6f07
...
...
@@ -47,7 +47,7 @@ void PullDenseWorker::Initialize(const TrainerDesc& param) {
int
var_num
=
table
.
dense_value_name_size
();
dense_value_names_
[
tid
].
resize
(
var_num
);
for
(
int
j
=
0
;
j
<
var_num
;
++
j
)
{
dense_value_names_
[
tid
][
j
]
=
table
.
dense_value_name
(
j
);
dense_value_names_
[
tid
][
j
]
=
table
.
dense_value_name
(
j
);
}
// setup training version for each table
training_versions_
[
tid
].
resize
(
thread_num_
,
0
);
...
...
paddle/fluid/pybind/async_executor_py.cc
浏览文件 @
45eb6f07
...
...
@@ -21,9 +21,9 @@ limitations under the License. */
#ifdef _XOPEN_SOURCE
#undef _XOPEN_SOURCE
#endif
#include <memory>
#include <string>
#include <vector>
#include <memory>
#include "google/protobuf/io/zero_copy_stream_impl.h"
#include "google/protobuf/text_format.h"
...
...
paddle/fluid/pybind/data_set_py.cc
浏览文件 @
45eb6f07
...
...
@@ -19,21 +19,21 @@ limitations under the License. */
#ifdef _XOPEN_SOURCE
#undef _XOPEN_SOURCE
#endif
#include <memory>
#include <string>
#include <vector>
#include <memory>
#include "google/protobuf/io/zero_copy_stream_impl.h"
#include "google/protobuf/text_format.h"
#include "paddle/fluid/framework/async_executor.h"
#include "paddle/fluid/framework/data_feed.h"
#include "paddle/fluid/framework/data_feed.pb.h"
#include "paddle/fluid/framework/data_set.h"
#include "paddle/fluid/framework/dataset_factory.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/variant.h"
#include "paddle/fluid/pybind/data_set_py.h"
#include "paddle/fluid/framework/dataset_factory.h"
namespace
py
=
pybind11
;
namespace
pd
=
paddle
::
framework
;
...
...
@@ -42,8 +42,8 @@ namespace paddle {
namespace
pybind
{
void
BindDataset
(
py
::
module
*
m
)
{
py
::
class_
<
framework
::
Dataset
,
std
::
shared_ptr
<
framework
::
Dataset
>>
(
*
m
,
"Dataset"
)
py
::
class_
<
framework
::
Dataset
,
std
::
shared_ptr
<
framework
::
Dataset
>>
(
*
m
,
"Dataset"
)
.
def
(
py
::
init
([](
const
std
::
string
&
name
=
"MultiSlotDataset"
)
{
return
framework
::
DatasetFactory
::
CreateDataset
(
name
);
}))
...
...
@@ -58,7 +58,7 @@ void BindDataset(py::module* m) {
.
def
(
"get_hdfs_config"
,
&
framework
::
Dataset
::
GetHdfsConfig
)
.
def
(
"get_data_feed_desc"
,
&
framework
::
Dataset
::
GetDataFeedDesc
)
.
def
(
"register_client2client_msg_handler"
,
&
framework
::
Dataset
::
RegisterClientToClientMsgHandler
)
&
framework
::
Dataset
::
RegisterClientToClientMsgHandler
)
.
def
(
"load_into_memory"
,
&
framework
::
Dataset
::
LoadIntoMemory
)
.
def
(
"release_memory"
,
&
framework
::
Dataset
::
ReleaseMemory
)
.
def
(
"local_shuffle"
,
&
framework
::
Dataset
::
LocalShuffle
)
...
...
paddle/fluid/string/string_helper.h
浏览文件 @
45eb6f07
...
...
@@ -18,8 +18,8 @@
#include <stdio.h>
#include <cstring>
#include <string>
#include <vector>
#include <utility>
#include <vector>
#include "boost/lexical_cast.hpp"
#include "glog/logging.h"
...
...
python/paddle/fluid/tests/unittests/test_dataset.py
浏览文件 @
45eb6f07
...
...
@@ -80,18 +80,20 @@ class TestDataset(unittest.TestCase):
data
+=
"1 7 2 3 6 4 8 8 8 8 1 7
\n
"
f
.
write
(
data
)
slots
=
[
"slot1"
,
"slot2"
,
"slot3"
,
"slot4"
]
slots
=
[
"slot1"
,
"slot2"
,
"slot3"
,
"slot4"
]
slots_vars
=
[]
for
slot
in
slots
:
var
=
fluid
.
layers
.
data
(
name
=
slot
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
var
=
fluid
.
layers
.
data
(
name
=
slot
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
slots_vars
.
append
(
var
)
dataset
=
fluid
.
DatasetFactory
().
create_dataset
(
"InMemoryDataset"
)
dataset
.
set_batch_size
(
32
)
dataset
.
set_thread
(
3
)
dataset
.
set_filelist
([
"test_in_memory_dataset_run_a.txt"
,
"test_in_memory_dataset_run_b.txt"
])
dataset
.
set_filelist
([
"test_in_memory_dataset_run_a.txt"
,
"test_in_memory_dataset_run_b.txt"
])
dataset
.
set_pipe_command
(
"cat"
)
dataset
.
set_use_var
(
slots_vars
)
dataset
.
load_into_memory
()
...
...
@@ -124,18 +126,18 @@ class TestDataset(unittest.TestCase):
data
+=
"1 7 2 3 6 4 8 8 8 8 1 7
\n
"
f
.
write
(
data
)
slots
=
[
"slot1"
,
"slot2"
,
"slot3"
,
"slot4"
]
slots
=
[
"slot1"
,
"slot2"
,
"slot3"
,
"slot4"
]
slots_vars
=
[]
for
slot
in
slots
:
var
=
fluid
.
layers
.
data
(
name
=
slot
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
var
=
fluid
.
layers
.
data
(
name
=
slot
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
slots_vars
.
append
(
var
)
dataset
=
fluid
.
DatasetFactory
().
create_dataset
(
"QueueDataset"
)
dataset
.
set_batch_size
(
32
)
dataset
.
set_thread
(
3
)
dataset
.
set_filelist
(
[
"test_queue_dataset_run_a.txt"
,
"test_queue_dataset_run_b.txt"
])
dataset
.
set_filelist
(
[
"test_queue_dataset_run_a.txt"
,
"test_queue_dataset_run_b.txt"
])
dataset
.
set_pipe_command
(
"cat"
)
dataset
.
set_use_var
(
slots_vars
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录