Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
9ff558a4
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9ff558a4
编写于
4月 22, 2020
作者:
H
hutuxian
提交者:
GitHub
4月 22, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Optimize DataFeed (#23957)
* Make batch_float_feasigns & batch_uint64_feasigns as member variable
上级
5cccc69f
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
59 addition
and
42 deletion
+59
-42
paddle/fluid/framework/data_feed.cc
paddle/fluid/framework/data_feed.cc
+55
-42
paddle/fluid/framework/data_feed.h
paddle/fluid/framework/data_feed.h
+4
-0
未找到文件。
paddle/fluid/framework/data_feed.cc
浏览文件 @
9ff558a4
...
...
@@ -798,6 +798,19 @@ void MultiSlotInMemoryDataFeed::Init(
}
}
feed_vec_
.
resize
(
use_slots_
.
size
());
const
int
kEstimatedFeasignNumPerSlot
=
5
;
// Magic Number
for
(
size_t
i
=
0
;
i
<
all_slot_num
;
i
++
)
{
batch_float_feasigns_
.
push_back
(
std
::
vector
<
float
>
());
batch_uint64_feasigns_
.
push_back
(
std
::
vector
<
uint64_t
>
());
batch_float_feasigns_
[
i
].
reserve
(
default_batch_size_
*
kEstimatedFeasignNumPerSlot
);
batch_uint64_feasigns_
[
i
].
reserve
(
default_batch_size_
*
kEstimatedFeasignNumPerSlot
);
offset_
.
push_back
(
std
::
vector
<
size_t
>
());
offset_
[
i
].
reserve
(
default_batch_size_
+
1
);
// Each lod info will prepend a zero
}
visit_
.
resize
(
all_slot_num
,
false
);
pipe_command_
=
data_feed_desc
.
pipe_command
();
finish_init_
=
true
;
}
...
...
@@ -989,13 +1002,12 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstance(Record* instance) {
void
MultiSlotInMemoryDataFeed
::
PutToFeedVec
(
const
std
::
vector
<
Record
>&
ins_vec
)
{
#ifdef _LINUX
std
::
vector
<
std
::
vector
<
float
>>
batch_float_feasigns
(
use_slots_
.
size
(),
std
::
vector
<
float
>
());
std
::
vector
<
std
::
vector
<
uint64_t
>>
batch_uint64_feasigns
(
use_slots_
.
size
(),
std
::
vector
<
uint64_t
>
());
std
::
vector
<
std
::
vector
<
size_t
>>
offset
(
use_slots_
.
size
(),
std
::
vector
<
size_t
>
{
0
});
std
::
vector
<
bool
>
visit
(
use_slots_
.
size
(),
false
);
for
(
size_t
i
=
0
;
i
<
batch_float_feasigns_
.
size
();
++
i
)
{
batch_float_feasigns_
[
i
].
clear
();
batch_uint64_feasigns_
[
i
].
clear
();
offset_
[
i
].
clear
();
offset_
[
i
].
push_back
(
0
);
}
ins_content_vec_
.
clear
();
ins_content_vec_
.
reserve
(
ins_vec
.
size
());
ins_id_vec_
.
clear
();
...
...
@@ -1005,30 +1017,31 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(
ins_id_vec_
.
push_back
(
r
.
ins_id_
);
ins_content_vec_
.
push_back
(
r
.
content_
);
for
(
auto
&
item
:
r
.
float_feasigns_
)
{
batch_float_feasigns
[
item
.
slot
()].
push_back
(
item
.
sign
().
float_feasign_
);
visit
[
item
.
slot
()]
=
true
;
batch_float_feasigns
_
[
item
.
slot
()].
push_back
(
item
.
sign
().
float_feasign_
);
visit
_
[
item
.
slot
()]
=
true
;
}
for
(
auto
&
item
:
r
.
uint64_feasigns_
)
{
batch_uint64_feasigns
[
item
.
slot
()].
push_back
(
item
.
sign
().
uint64_feasign_
);
visit
[
item
.
slot
()]
=
true
;
batch_uint64_feasigns_
[
item
.
slot
()].
push_back
(
item
.
sign
().
uint64_feasign_
);
visit_
[
item
.
slot
()]
=
true
;
}
for
(
size_t
j
=
0
;
j
<
use_slots_
.
size
();
++
j
)
{
const
auto
&
type
=
all_slots_type_
[
j
];
if
(
visit
[
j
])
{
visit
[
j
]
=
false
;
if
(
visit
_
[
j
])
{
visit
_
[
j
]
=
false
;
}
else
{
// fill slot value with default value 0
if
(
type
[
0
]
==
'f'
)
{
// float
batch_float_feasigns
[
j
].
push_back
(
0.0
);
batch_float_feasigns
_
[
j
].
push_back
(
0.0
);
}
else
if
(
type
[
0
]
==
'u'
)
{
// uint64
batch_uint64_feasigns
[
j
].
push_back
(
0
);
batch_uint64_feasigns
_
[
j
].
push_back
(
0
);
}
}
// get offset of this ins in this slot
if
(
type
[
0
]
==
'f'
)
{
// float
offset
[
j
].
push_back
(
batch_float_feasigns
[
j
].
size
());
offset
_
[
j
].
push_back
(
batch_float_feasigns_
[
j
].
size
());
}
else
if
(
type
[
0
]
==
'u'
)
{
// uint64
offset
[
j
].
push_back
(
batch_uint64_feasigns
[
j
].
size
());
offset
_
[
j
].
push_back
(
batch_uint64_feasigns_
[
j
].
size
());
}
}
}
...
...
@@ -1037,21 +1050,21 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(
if
(
feed_vec_
[
i
]
==
nullptr
)
{
continue
;
}
int
total_instance
=
offset
[
i
].
back
();
int
total_instance
=
offset
_
[
i
].
back
();
const
auto
&
type
=
all_slots_type_
[
i
];
if
(
type
[
0
]
==
'f'
)
{
// float
float
*
feasign
=
batch_float_feasigns
[
i
].
data
();
float
*
feasign
=
batch_float_feasigns
_
[
i
].
data
();
float
*
tensor_ptr
=
feed_vec_
[
i
]
->
mutable_data
<
float
>
({
total_instance
,
1
},
this
->
place_
);
CopyToFeedTensor
(
tensor_ptr
,
feasign
,
total_instance
*
sizeof
(
float
));
}
else
if
(
type
[
0
]
==
'u'
)
{
// uint64
// no uint64_t type in paddlepaddle
uint64_t
*
feasign
=
batch_uint64_feasigns
[
i
].
data
();
uint64_t
*
feasign
=
batch_uint64_feasigns
_
[
i
].
data
();
int64_t
*
tensor_ptr
=
feed_vec_
[
i
]
->
mutable_data
<
int64_t
>
(
{
total_instance
,
1
},
this
->
place_
);
CopyToFeedTensor
(
tensor_ptr
,
feasign
,
total_instance
*
sizeof
(
int64_t
));
}
auto
&
slot_offset
=
offset
[
i
];
auto
&
slot_offset
=
offset
_
[
i
];
LoD
data_lod
{
slot_offset
};
feed_vec_
[
i
]
->
set_lod
(
data_lod
);
if
(
use_slots_is_dense_
[
i
])
{
...
...
@@ -1427,13 +1440,12 @@ int PaddleBoxDataFeed::GetCurrentPhase() {
void
PaddleBoxDataFeed
::
PutToFeedVec
(
const
std
::
vector
<
Record
*>&
ins_vec
)
{
#ifdef _LINUX
std
::
vector
<
std
::
vector
<
float
>>
batch_float_feasigns
(
use_slots_
.
size
(),
std
::
vector
<
float
>
());
std
::
vector
<
std
::
vector
<
uint64_t
>>
batch_uint64_feasigns
(
use_slots_
.
size
(),
std
::
vector
<
uint64_t
>
());
std
::
vector
<
std
::
vector
<
size_t
>>
offset
(
use_slots_
.
size
(),
std
::
vector
<
size_t
>
{
0
});
std
::
vector
<
bool
>
visit
(
use_slots_
.
size
(),
false
);
for
(
size_t
i
=
0
;
i
<
batch_float_feasigns_
.
size
();
++
i
)
{
batch_float_feasigns_
[
i
].
clear
();
batch_uint64_feasigns_
[
i
].
clear
();
offset_
[
i
].
clear
();
offset_
[
i
].
push_back
(
0
);
}
ins_content_vec_
.
clear
();
ins_content_vec_
.
reserve
(
ins_vec
.
size
());
ins_id_vec_
.
clear
();
...
...
@@ -1443,30 +1455,31 @@ void PaddleBoxDataFeed::PutToFeedVec(const std::vector<Record*>& ins_vec) {
ins_id_vec_
.
push_back
(
r
->
ins_id_
);
ins_content_vec_
.
push_back
(
r
->
content_
);
for
(
auto
&
item
:
r
->
float_feasigns_
)
{
batch_float_feasigns
[
item
.
slot
()].
push_back
(
item
.
sign
().
float_feasign_
);
visit
[
item
.
slot
()]
=
true
;
batch_float_feasigns
_
[
item
.
slot
()].
push_back
(
item
.
sign
().
float_feasign_
);
visit
_
[
item
.
slot
()]
=
true
;
}
for
(
auto
&
item
:
r
->
uint64_feasigns_
)
{
batch_uint64_feasigns
[
item
.
slot
()].
push_back
(
item
.
sign
().
uint64_feasign_
);
visit
[
item
.
slot
()]
=
true
;
batch_uint64_feasigns_
[
item
.
slot
()].
push_back
(
item
.
sign
().
uint64_feasign_
);
visit_
[
item
.
slot
()]
=
true
;
}
for
(
size_t
j
=
0
;
j
<
use_slots_
.
size
();
++
j
)
{
const
auto
&
type
=
all_slots_type_
[
j
];
if
(
visit
[
j
])
{
visit
[
j
]
=
false
;
if
(
visit
_
[
j
])
{
visit
_
[
j
]
=
false
;
}
else
{
// fill slot value with default value 0
if
(
type
[
0
]
==
'f'
)
{
// float
batch_float_feasigns
[
j
].
push_back
(
0.0
);
batch_float_feasigns
_
[
j
].
push_back
(
0.0
);
}
else
if
(
type
[
0
]
==
'u'
)
{
// uint64
batch_uint64_feasigns
[
j
].
push_back
(
0
);
batch_uint64_feasigns
_
[
j
].
push_back
(
0
);
}
}
// get offset of this ins in this slot
if
(
type
[
0
]
==
'f'
)
{
// float
offset
[
j
].
push_back
(
batch_float_feasigns
[
j
].
size
());
offset
_
[
j
].
push_back
(
batch_float_feasigns_
[
j
].
size
());
}
else
if
(
type
[
0
]
==
'u'
)
{
// uint64
offset
[
j
].
push_back
(
batch_uint64_feasigns
[
j
].
size
());
offset
_
[
j
].
push_back
(
batch_uint64_feasigns_
[
j
].
size
());
}
}
}
...
...
@@ -1475,21 +1488,21 @@ void PaddleBoxDataFeed::PutToFeedVec(const std::vector<Record*>& ins_vec) {
if
(
feed_vec_
[
i
]
==
nullptr
)
{
continue
;
}
int
total_instance
=
offset
[
i
].
back
();
int
total_instance
=
offset
_
[
i
].
back
();
const
auto
&
type
=
all_slots_type_
[
i
];
if
(
type
[
0
]
==
'f'
)
{
// float
float
*
feasign
=
batch_float_feasigns
[
i
].
data
();
float
*
feasign
=
batch_float_feasigns
_
[
i
].
data
();
float
*
tensor_ptr
=
feed_vec_
[
i
]
->
mutable_data
<
float
>
({
total_instance
,
1
},
this
->
place_
);
CopyToFeedTensor
(
tensor_ptr
,
feasign
,
total_instance
*
sizeof
(
float
));
}
else
if
(
type
[
0
]
==
'u'
)
{
// uint64
// no uint64_t type in paddlepaddle
uint64_t
*
feasign
=
batch_uint64_feasigns
[
i
].
data
();
uint64_t
*
feasign
=
batch_uint64_feasigns
_
[
i
].
data
();
int64_t
*
tensor_ptr
=
feed_vec_
[
i
]
->
mutable_data
<
int64_t
>
(
{
total_instance
,
1
},
this
->
place_
);
CopyToFeedTensor
(
tensor_ptr
,
feasign
,
total_instance
*
sizeof
(
int64_t
));
}
auto
&
slot_offset
=
offset
[
i
];
auto
&
slot_offset
=
offset
_
[
i
];
LoD
data_lod
{
slot_offset
};
feed_vec_
[
i
]
->
set_lod
(
data_lod
);
if
(
use_slots_is_dense_
[
i
])
{
...
...
paddle/fluid/framework/data_feed.h
浏览文件 @
9ff558a4
...
...
@@ -597,6 +597,10 @@ class MultiSlotInMemoryDataFeed : public InMemoryDataFeed<Record> {
virtual
void
PutToFeedVec
(
const
std
::
vector
<
Record
>&
ins_vec
);
virtual
void
GetMsgFromLogKey
(
const
std
::
string
&
log_key
,
uint64_t
*
search_id
,
uint32_t
*
cmatch
,
uint32_t
*
rank
);
std
::
vector
<
std
::
vector
<
float
>>
batch_float_feasigns_
;
std
::
vector
<
std
::
vector
<
uint64_t
>>
batch_uint64_feasigns_
;
std
::
vector
<
std
::
vector
<
size_t
>>
offset_
;
std
::
vector
<
bool
>
visit_
;
};
class
PaddleBoxDataFeed
:
public
MultiSlotInMemoryDataFeed
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录