Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
9e32a387
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9e32a387
编写于
7月 13, 2021
作者:
S
seemingwang
提交者:
GitHub
7月 13, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
speed up random sample of graph engine (#34088)
上级
75fc32e2
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
81 addition
and
52 deletion
+81
-52
paddle/fluid/distributed/table/common_graph_table.cc
paddle/fluid/distributed/table/common_graph_table.cc
+33
-27
paddle/fluid/distributed/table/common_graph_table.h
paddle/fluid/distributed/table/common_graph_table.h
+1
-0
paddle/fluid/distributed/table/graph/graph_node.cc
paddle/fluid/distributed/table/graph/graph_node.cc
+2
-2
paddle/fluid/distributed/table/graph/graph_node.h
paddle/fluid/distributed/table/graph/graph_node.h
+11
-4
paddle/fluid/distributed/table/graph/graph_weighted_sampler.cc
...e/fluid/distributed/table/graph/graph_weighted_sampler.cc
+24
-14
paddle/fluid/distributed/table/graph/graph_weighted_sampler.h
...le/fluid/distributed/table/graph/graph_weighted_sampler.h
+10
-5
未找到文件。
paddle/fluid/distributed/table/common_graph_table.cc
浏览文件 @
9e32a387
...
...
@@ -15,12 +15,15 @@
#include "paddle/fluid/distributed/table/common_graph_table.h"
#include <time.h>
#include <algorithm>
#include <chrono>
#include <set>
#include <sstream>
#include "paddle/fluid/distributed/common/utils.h"
#include "paddle/fluid/distributed/table/graph/graph_node.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/string_helper.h"
namespace
paddle
{
namespace
distributed
{
...
...
@@ -399,31 +402,34 @@ int32_t GraphTable::random_sample_neighboors(
uint64_t
&
node_id
=
node_ids
[
idx
];
std
::
unique_ptr
<
char
[]
>
&
buffer
=
buffers
[
idx
];
int
&
actual_size
=
actual_sizes
[
idx
];
tasks
.
push_back
(
_shards_task_pool
[
get_thread_pool_index
(
node_id
)]
->
enqueue
(
[
&
]()
->
int
{
Node
*
node
=
find_node
(
node_id
);
if
(
node
==
nullptr
)
{
actual_size
=
0
;
return
0
;
}
std
::
vector
<
int
>
res
=
node
->
sample_k
(
sample_size
);
actual_size
=
res
.
size
()
*
(
Node
::
id_size
+
Node
::
weight_size
);
int
offset
=
0
;
uint64_t
id
;
float
weight
;
char
*
buffer_addr
=
new
char
[
actual_size
];
buffer
.
reset
(
buffer_addr
);
for
(
int
&
x
:
res
)
{
id
=
node
->
get_neighbor_id
(
x
);
weight
=
node
->
get_neighbor_weight
(
x
);
memcpy
(
buffer_addr
+
offset
,
&
id
,
Node
::
id_size
);
offset
+=
Node
::
id_size
;
memcpy
(
buffer_addr
+
offset
,
&
weight
,
Node
::
weight_size
);
offset
+=
Node
::
weight_size
;
}
return
0
;
}));
int
thread_pool_index
=
get_thread_pool_index
(
node_id
);
auto
rng
=
_shards_task_rng_pool
[
thread_pool_index
];
tasks
.
push_back
(
_shards_task_pool
[
thread_pool_index
]
->
enqueue
([
&
]()
->
int
{
Node
*
node
=
find_node
(
node_id
);
if
(
node
==
nullptr
)
{
actual_size
=
0
;
return
0
;
}
std
::
vector
<
int
>
res
=
node
->
sample_k
(
sample_size
,
rng
);
actual_size
=
res
.
size
()
*
(
Node
::
id_size
+
Node
::
weight_size
);
int
offset
=
0
;
uint64_t
id
;
float
weight
;
char
*
buffer_addr
=
new
char
[
actual_size
];
buffer
.
reset
(
buffer_addr
);
for
(
int
&
x
:
res
)
{
id
=
node
->
get_neighbor_id
(
x
);
weight
=
node
->
get_neighbor_weight
(
x
);
memcpy
(
buffer_addr
+
offset
,
&
id
,
Node
::
id_size
);
offset
+=
Node
::
id_size
;
memcpy
(
buffer_addr
+
offset
,
&
weight
,
Node
::
weight_size
);
offset
+=
Node
::
weight_size
;
}
return
0
;
}));
}
for
(
size_t
idx
=
0
;
idx
<
node_num
;
++
idx
)
{
tasks
[
idx
].
get
();
...
...
@@ -512,7 +518,6 @@ int32_t GraphTable::pull_graph_list(int start, int total_size,
int
end
=
start
+
(
count
-
1
)
*
step
+
1
;
tasks
.
push_back
(
_shards_task_pool
[
i
%
task_pool_size_
]
->
enqueue
(
[
this
,
i
,
start
,
end
,
step
,
size
]()
->
std
::
vector
<
Node
*>
{
return
this
->
shards
[
i
].
get_batch
(
start
-
size
,
end
-
size
,
step
);
}));
start
+=
count
*
step
;
...
...
@@ -546,6 +551,7 @@ int32_t GraphTable::initialize() {
_shards_task_pool
.
resize
(
task_pool_size_
);
for
(
size_t
i
=
0
;
i
<
_shards_task_pool
.
size
();
++
i
)
{
_shards_task_pool
[
i
].
reset
(
new
::
ThreadPool
(
1
));
_shards_task_rng_pool
.
push_back
(
paddle
::
framework
::
GetCPURandomEngine
(
0
));
}
server_num
=
_shard_num
;
// VLOG(0) << "in init graph table server num = " << server_num;
...
...
@@ -586,5 +592,5 @@ int32_t GraphTable::initialize() {
shards
=
std
::
vector
<
GraphShard
>
(
shard_num_per_table
,
GraphShard
(
shard_num
));
return
0
;
}
}
};
}
// namespace distributed
};
// namespace paddle
paddle/fluid/distributed/table/common_graph_table.h
浏览文件 @
9e32a387
...
...
@@ -136,6 +136,7 @@ class GraphTable : public SparseTable {
std
::
string
table_type
;
std
::
vector
<
std
::
shared_ptr
<::
ThreadPool
>>
_shards_task_pool
;
std
::
vector
<
std
::
shared_ptr
<
std
::
mt19937_64
>>
_shards_task_rng_pool
;
};
}
// namespace distributed
...
...
paddle/fluid/distributed/table/graph/graph_node.cc
浏览文件 @
9e32a387
...
...
@@ -113,5 +113,5 @@ void FeatureNode::recover_from_buffer(char* buffer) {
feature
.
push_back
(
std
::
string
(
str
));
}
}
}
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/table/graph/graph_node.h
浏览文件 @
9e32a387
...
...
@@ -15,6 +15,7 @@
#pragma once
#include <cstring>
#include <iostream>
#include <memory>
#include <sstream>
#include <vector>
#include "paddle/fluid/distributed/table/graph/graph_weighted_sampler.h"
...
...
@@ -33,7 +34,10 @@ class Node {
virtual
void
build_edges
(
bool
is_weighted
)
{}
virtual
void
build_sampler
(
std
::
string
sample_type
)
{}
virtual
void
add_edge
(
uint64_t
id
,
float
weight
)
{}
virtual
std
::
vector
<
int
>
sample_k
(
int
k
)
{
return
std
::
vector
<
int
>
();
}
virtual
std
::
vector
<
int
>
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
)
{
return
std
::
vector
<
int
>
();
}
virtual
uint64_t
get_neighbor_id
(
int
idx
)
{
return
0
;
}
virtual
float
get_neighbor_weight
(
int
idx
)
{
return
1.
;
}
...
...
@@ -59,7 +63,10 @@ class GraphNode : public Node {
virtual
void
add_edge
(
uint64_t
id
,
float
weight
)
{
edges
->
add_edge
(
id
,
weight
);
}
virtual
std
::
vector
<
int
>
sample_k
(
int
k
)
{
return
sampler
->
sample_k
(
k
);
}
virtual
std
::
vector
<
int
>
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
)
{
return
sampler
->
sample_k
(
k
,
rng
);
}
virtual
uint64_t
get_neighbor_id
(
int
idx
)
{
return
edges
->
get_id
(
idx
);
}
virtual
float
get_neighbor_weight
(
int
idx
)
{
return
edges
->
get_weight
(
idx
);
}
...
...
@@ -123,5 +130,5 @@ class FeatureNode : public Node {
protected:
std
::
vector
<
std
::
string
>
feature
;
};
}
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/table/graph/graph_weighted_sampler.cc
浏览文件 @
9e32a387
...
...
@@ -14,24 +14,30 @@
#include "paddle/fluid/distributed/table/graph/graph_weighted_sampler.h"
#include <iostream>
#include <memory>
#include <unordered_map>
#include "paddle/fluid/framework/generator.h"
namespace
paddle
{
namespace
distributed
{
void
RandomSampler
::
build
(
GraphEdgeBlob
*
edges
)
{
this
->
edges
=
edges
;
}
std
::
vector
<
int
>
RandomSampler
::
sample_k
(
int
k
)
{
std
::
vector
<
int
>
RandomSampler
::
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
)
{
int
n
=
edges
->
size
();
if
(
k
>
n
)
{
if
(
k
>
=
n
)
{
k
=
n
;
std
::
vector
<
int
>
sample_result
;
for
(
int
i
=
0
;
i
<
k
;
i
++
)
{
sample_result
.
push_back
(
i
);
}
return
sample_result
;
}
struct
timespec
tn
;
clock_gettime
(
CLOCK_REALTIME
,
&
tn
);
srand
(
tn
.
tv_nsec
);
std
::
vector
<
int
>
sample_result
;
std
::
unordered_map
<
int
,
int
>
replace_map
;
while
(
k
--
)
{
int
rand_int
=
rand
()
%
n
;
std
::
uniform_int_distribution
<
int
>
distrib
(
0
,
n
-
1
);
int
rand_int
=
distrib
(
*
rng
);
auto
iter
=
replace_map
.
find
(
rand_int
);
if
(
iter
==
replace_map
.
end
())
{
sample_result
.
push_back
(
rand_int
);
...
...
@@ -98,19 +104,23 @@ void WeightedSampler::build_one(WeightedGraphEdgeBlob *edges, int start,
count
=
left
->
count
+
right
->
count
;
}
}
std
::
vector
<
int
>
WeightedSampler
::
sample_k
(
int
k
)
{
if
(
k
>
count
)
{
std
::
vector
<
int
>
WeightedSampler
::
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
)
{
if
(
k
>=
count
)
{
k
=
count
;
std
::
vector
<
int
>
sample_result
;
for
(
int
i
=
0
;
i
<
k
;
i
++
)
{
sample_result
.
push_back
(
i
);
}
return
sample_result
;
}
std
::
vector
<
int
>
sample_result
;
float
subtract
;
std
::
unordered_map
<
WeightedSampler
*
,
float
>
subtract_weight_map
;
std
::
unordered_map
<
WeightedSampler
*
,
int
>
subtract_count_map
;
struct
timespec
tn
;
clock_gettime
(
CLOCK_REALTIME
,
&
tn
);
srand
(
tn
.
tv_nsec
);
std
::
uniform_real_distribution
<
float
>
distrib
(
0
,
1.0
);
while
(
k
--
)
{
float
query_weight
=
rand
()
%
100000
/
100000.0
;
float
query_weight
=
distrib
(
*
rng
)
;
query_weight
*=
weight
-
subtract_weight_map
[
this
];
sample_result
.
push_back
(
sample
(
query_weight
,
subtract_weight_map
,
subtract_count_map
,
subtract
));
...
...
@@ -146,5 +156,5 @@ int WeightedSampler::sample(
subtract_count_map
[
this
]
++
;
return
return_idx
;
}
}
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/table/graph/graph_weighted_sampler.h
浏览文件 @
9e32a387
...
...
@@ -14,6 +14,8 @@
#pragma once
#include <ctime>
#include <memory>
#include <random>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/distributed/table/graph/graph_edge.h"
...
...
@@ -24,14 +26,16 @@ class Sampler {
public:
virtual
~
Sampler
()
{}
virtual
void
build
(
GraphEdgeBlob
*
edges
)
=
0
;
virtual
std
::
vector
<
int
>
sample_k
(
int
k
)
=
0
;
virtual
std
::
vector
<
int
>
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
)
=
0
;
};
class
RandomSampler
:
public
Sampler
{
public:
virtual
~
RandomSampler
()
{}
virtual
void
build
(
GraphEdgeBlob
*
edges
);
virtual
std
::
vector
<
int
>
sample_k
(
int
k
);
virtual
std
::
vector
<
int
>
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
);
GraphEdgeBlob
*
edges
;
};
...
...
@@ -46,7 +50,8 @@ class WeightedSampler : public Sampler {
GraphEdgeBlob
*
edges
;
virtual
void
build
(
GraphEdgeBlob
*
edges
);
virtual
void
build_one
(
WeightedGraphEdgeBlob
*
edges
,
int
start
,
int
end
);
virtual
std
::
vector
<
int
>
sample_k
(
int
k
);
virtual
std
::
vector
<
int
>
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
);
private:
int
sample
(
float
query_weight
,
...
...
@@ -54,5 +59,5 @@ class WeightedSampler : public Sampler {
std
::
unordered_map
<
WeightedSampler
*
,
int
>
&
subtract_count_map
,
float
&
subtract
);
};
}
}
}
// namespace distributed
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录