Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
b1c458d0
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b1c458d0
编写于
7月 06, 2021
作者:
T
Thunderbrook
提交者:
GitHub
7月 06, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add so parser (#33969)
* add delta score, scale show * so parser * windows * windows
上级
afddcb97
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
181 addition
and
0 deletion
+181
-0
paddle/fluid/framework/data_feed.cc
paddle/fluid/framework/data_feed.cc
+68
-0
paddle/fluid/framework/data_feed.h
paddle/fluid/framework/data_feed.h
+95
-0
paddle/fluid/framework/data_feed.proto
paddle/fluid/framework/data_feed.proto
+1
-0
python/paddle/fluid/dataset.py
python/paddle/fluid/dataset.py
+17
-0
未找到文件。
paddle/fluid/framework/data_feed.cc
浏览文件 @
b1c458d0
...
...
@@ -31,6 +31,11 @@ USE_INT_STAT(STAT_total_feasign_num_in_mem);
namespace
paddle
{
namespace
framework
{
DLManager
&
global_dlmanager_pool
()
{
static
DLManager
manager
;
return
manager
;
}
void
RecordCandidateList
::
ReSize
(
size_t
length
)
{
mutex_
.
lock
();
capacity_
=
length
;
...
...
@@ -366,6 +371,10 @@ void InMemoryDataFeed<T>::SetParseInsId(bool parse_ins_id) {
template
<
typename
T
>
void
InMemoryDataFeed
<
T
>::
LoadIntoMemory
()
{
#ifdef _LINUX
if
(
!
so_parser_name_
.
empty
())
{
LoadIntoMemoryFromSo
();
return
;
}
VLOG
(
3
)
<<
"LoadIntoMemory() begin, thread_id="
<<
thread_id_
;
std
::
string
filename
;
while
(
this
->
PickOneFile
(
&
filename
))
{
...
...
@@ -408,6 +417,51 @@ void InMemoryDataFeed<T>::LoadIntoMemory() {
#endif
}
template
<
typename
T
>
void
InMemoryDataFeed
<
T
>::
LoadIntoMemoryFromSo
()
{
#ifdef _LINUX
VLOG
(
3
)
<<
"LoadIntoMemoryFromSo() begin, thread_id="
<<
thread_id_
;
string
::
LineFileReader
reader
;
paddle
::
framework
::
CustomParser
*
parser
=
global_dlmanager_pool
().
Load
(
so_parser_name_
,
slot_conf_
);
std
::
string
filename
;
while
(
this
->
PickOneFile
(
&
filename
))
{
VLOG
(
3
)
<<
"PickOneFile, filename="
<<
filename
<<
", thread_id="
<<
thread_id_
;
int
err_no
=
0
;
this
->
fp_
=
fs_open_read
(
filename
,
&
err_no
,
this
->
pipe_command_
);
CHECK
(
this
->
fp_
!=
nullptr
);
__fsetlocking
(
&*
(
this
->
fp_
),
FSETLOCKING_BYCALLER
);
paddle
::
framework
::
ChannelWriter
<
T
>
writer
(
input_channel_
);
T
instance
;
platform
::
Timer
timeline
;
timeline
.
Start
();
while
(
1
)
{
if
(
!
reader
.
getline
(
&*
(
fp_
.
get
())))
{
break
;
}
else
{
const
char
*
str
=
reader
.
get
();
ParseOneInstanceFromSo
(
str
,
&
instance
,
parser
);
}
writer
<<
std
::
move
(
instance
);
instance
=
T
();
}
writer
.
Flush
();
timeline
.
Pause
();
VLOG
(
3
)
<<
"LoadIntoMemoryFromSo() read all lines, file="
<<
filename
<<
", cost time="
<<
timeline
.
ElapsedSec
()
<<
" seconds, thread_id="
<<
thread_id_
;
}
VLOG
(
3
)
<<
"LoadIntoMemoryFromSo() end, thread_id="
<<
thread_id_
;
#endif
}
// explicit instantiation
template
class
InMemoryDataFeed
<
Record
>;
...
...
@@ -827,16 +881,23 @@ void MultiSlotInMemoryDataFeed::Init(
inductive_shape_index_
.
resize
(
all_slot_num
);
use_slots_
.
clear
();
use_slots_is_dense_
.
clear
();
slot_conf_
.
resize
(
all_slot_num
);
for
(
size_t
i
=
0
;
i
<
all_slot_num
;
++
i
)
{
const
auto
&
slot
=
multi_slot_desc
.
slots
(
i
);
all_slots_
[
i
]
=
slot
.
name
();
all_slots_type_
[
i
]
=
slot
.
type
();
use_slots_index_
[
i
]
=
slot
.
is_used
()
?
use_slots_
.
size
()
:
-
1
;
slot_conf_
[
i
].
name
=
slot
.
name
();
slot_conf_
[
i
].
type
=
slot
.
type
();
slot_conf_
[
i
].
use_slots_index
=
use_slots_index_
[
i
];
total_dims_without_inductive_
[
i
]
=
1
;
inductive_shape_index_
[
i
]
=
-
1
;
if
(
slot
.
is_used
())
{
use_slots_
.
push_back
(
all_slots_
[
i
]);
use_slots_is_dense_
.
push_back
(
slot
.
is_dense
());
slot_conf_
[
i
].
use_slots_is_dense
=
slot
.
is_dense
();
std
::
vector
<
int
>
local_shape
;
if
(
slot
.
is_dense
())
{
for
(
int
j
=
0
;
j
<
slot
.
shape_size
();
++
j
)
{
...
...
@@ -869,6 +930,7 @@ void MultiSlotInMemoryDataFeed::Init(
}
visit_
.
resize
(
all_slot_num
,
false
);
pipe_command_
=
data_feed_desc
.
pipe_command
();
so_parser_name_
=
data_feed_desc
.
so_parser_name
();
finish_init_
=
true
;
input_type_
=
data_feed_desc
.
input_type
();
}
...
...
@@ -887,6 +949,12 @@ void MultiSlotInMemoryDataFeed::GetMsgFromLogKey(const std::string& log_key,
*
rank
=
(
uint32_t
)
strtoul
(
rank_str
.
c_str
(),
NULL
,
16
);
}
void
MultiSlotInMemoryDataFeed
::
ParseOneInstanceFromSo
(
const
char
*
str
,
Record
*
instance
,
CustomParser
*
parser
)
{
parser
->
ParseOneInstance
(
str
,
instance
);
}
bool
MultiSlotInMemoryDataFeed
::
ParseOneInstanceFromPipe
(
Record
*
instance
)
{
#ifdef _LINUX
thread_local
string
::
LineFileReader
reader
;
...
...
paddle/fluid/framework/data_feed.h
浏览文件 @
b1c458d0
...
...
@@ -117,6 +117,94 @@ using PvInstance = PvInstanceObject*;
inline
PvInstance
make_pv_instance
()
{
return
new
PvInstanceObject
();
}
struct
SlotConf
{
std
::
string
name
;
std
::
string
type
;
int
use_slots_index
;
int
use_slots_is_dense
;
};
class
CustomParser
{
public:
CustomParser
()
{}
virtual
~
CustomParser
()
{}
virtual
void
Init
(
const
std
::
vector
<
SlotConf
>&
slots
)
=
0
;
virtual
void
ParseOneInstance
(
const
char
*
str
,
Record
*
instance
)
=
0
;
};
typedef
paddle
::
framework
::
CustomParser
*
(
*
CreateParserObjectFunc
)();
class
DLManager
{
struct
DLHandle
{
void
*
module
;
paddle
::
framework
::
CustomParser
*
parser
;
};
public:
DLManager
()
{}
~
DLManager
()
{
#ifdef _LINUX
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
for
(
auto
it
=
handle_map_
.
begin
();
it
!=
handle_map_
.
end
();
++
it
)
{
delete
it
->
second
.
parser
;
dlclose
(
it
->
second
.
module
);
}
#endif
}
bool
Close
(
const
std
::
string
&
name
)
{
#ifdef _LINUX
auto
it
=
handle_map_
.
find
(
name
);
if
(
it
==
handle_map_
.
end
())
{
return
true
;
}
delete
it
->
second
.
parser
;
dlclose
(
it
->
second
.
module
);
#endif
VLOG
(
0
)
<<
"Not implement in windows"
;
return
false
;
}
paddle
::
framework
::
CustomParser
*
Load
(
const
std
::
string
&
name
,
std
::
vector
<
SlotConf
>&
conf
)
{
#ifdef _LINUX
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
DLHandle
handle
;
std
::
map
<
std
::
string
,
DLHandle
>::
iterator
it
=
handle_map_
.
find
(
name
);
if
(
it
!=
handle_map_
.
end
())
{
return
it
->
second
.
parser
;
}
handle
.
module
=
dlopen
(
name
.
c_str
(),
RTLD_NOW
);
if
(
handle
.
module
==
nullptr
)
{
VLOG
(
0
)
<<
"Create so of "
<<
name
<<
" fail"
;
return
nullptr
;
}
CreateParserObjectFunc
create_parser_func
=
(
CreateParserObjectFunc
)
dlsym
(
handle
.
module
,
"CreateParserObject"
);
handle
.
parser
=
create_parser_func
();
handle
.
parser
->
Init
(
conf
);
handle_map_
.
insert
({
name
,
handle
});
return
handle
.
parser
;
#endif
VLOG
(
0
)
<<
"Not implement in windows"
;
return
nullptr
;
}
paddle
::
framework
::
CustomParser
*
ReLoad
(
const
std
::
string
&
name
,
std
::
vector
<
SlotConf
>&
conf
)
{
Close
(
name
);
return
Load
(
name
,
conf
);
}
private:
std
::
mutex
mutex_
;
std
::
map
<
std
::
string
,
DLHandle
>
handle_map_
;
};
class
DataFeed
{
public:
DataFeed
()
{
...
...
@@ -252,6 +340,8 @@ class DataFeed {
bool
finish_set_filelist_
;
bool
finish_start_
;
std
::
string
pipe_command_
;
std
::
string
so_parser_name_
;
std
::
vector
<
SlotConf
>
slot_conf_
;
std
::
vector
<
std
::
string
>
ins_id_vec_
;
std
::
vector
<
std
::
string
>
ins_content_vec_
;
platform
::
Place
place_
;
...
...
@@ -324,10 +414,13 @@ class InMemoryDataFeed : public DataFeed {
virtual
void
SetEnablePvMerge
(
bool
enable_pv_merge
);
virtual
void
SetCurrentPhase
(
int
current_phase
);
virtual
void
LoadIntoMemory
();
virtual
void
LoadIntoMemoryFromSo
();
protected:
virtual
bool
ParseOneInstance
(
T
*
instance
)
=
0
;
virtual
bool
ParseOneInstanceFromPipe
(
T
*
instance
)
=
0
;
virtual
void
ParseOneInstanceFromSo
(
const
char
*
str
,
T
*
instance
,
CustomParser
*
parser
)
{}
virtual
void
PutToFeedVec
(
const
std
::
vector
<
T
>&
ins_vec
)
=
0
;
int
thread_id_
;
...
...
@@ -688,6 +781,8 @@ class MultiSlotInMemoryDataFeed : public InMemoryDataFeed<Record> {
protected:
virtual
bool
ParseOneInstance
(
Record
*
instance
);
virtual
bool
ParseOneInstanceFromPipe
(
Record
*
instance
);
virtual
void
ParseOneInstanceFromSo
(
const
char
*
str
,
Record
*
instance
,
CustomParser
*
parser
);
virtual
void
PutToFeedVec
(
const
std
::
vector
<
Record
>&
ins_vec
);
virtual
void
GetMsgFromLogKey
(
const
std
::
string
&
log_key
,
uint64_t
*
search_id
,
uint32_t
*
cmatch
,
uint32_t
*
rank
);
...
...
paddle/fluid/framework/data_feed.proto
浏览文件 @
b1c458d0
...
...
@@ -33,4 +33,5 @@ message DataFeedDesc {
optional
string
rank_offset
=
6
;
optional
int32
pv_batch_size
=
7
[
default
=
32
];
optional
int32
input_type
=
8
[
default
=
0
];
optional
string
so_parser_name
=
9
;
}
python/paddle/fluid/dataset.py
浏览文件 @
b1c458d0
...
...
@@ -95,6 +95,23 @@ class DatasetBase(object):
"""
self
.
proto_desc
.
pipe_command
=
pipe_command
def
set_so_parser_name
(
self
,
so_parser_name
):
"""
Set so parser name of current dataset
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_so_parser_name("./abc.so")
Args:
pipe_command(str): pipe command
"""
self
.
proto_desc
.
so_parser_name
=
so_parser_name
def
set_rank_offset
(
self
,
rank_offset
):
"""
Set rank_offset for merge_pv. It set the message of Pv.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录