Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
906db719
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
906db719
编写于
5月 17, 2021
作者:
S
seemingwang
提交者:
GitHub
5月 17, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove redundant graph files (#32924)
* delete unused files.
上级
5f1c07da
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
0 addition
and
527 deletion
+0
-527
paddle/fluid/distributed/table/graph_edge.cc
paddle/fluid/distributed/table/graph_edge.cc
+0
-29
paddle/fluid/distributed/table/graph_edge.h
paddle/fluid/distributed/table/graph_edge.h
+0
-46
paddle/fluid/distributed/table/graph_node.cc
paddle/fluid/distributed/table/graph_node.cc
+0
-117
paddle/fluid/distributed/table/graph_node.h
paddle/fluid/distributed/table/graph_node.h
+0
-127
paddle/fluid/distributed/table/graph_weighted_sampler.cc
paddle/fluid/distributed/table/graph_weighted_sampler.cc
+0
-150
paddle/fluid/distributed/table/graph_weighted_sampler.h
paddle/fluid/distributed/table/graph_weighted_sampler.h
+0
-58
未找到文件。
paddle/fluid/distributed/table/graph_edge.cc
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/table/graph_edge.h"
#include <cstring>
namespace
paddle
{
namespace
distributed
{
void
GraphEdgeBlob
::
add_edge
(
uint64_t
id
,
float
weight
=
1
)
{
id_arr
.
push_back
(
id
);
}
void
WeightedGraphEdgeBlob
::
add_edge
(
uint64_t
id
,
float
weight
=
1
)
{
id_arr
.
push_back
(
id
);
weight_arr
.
push_back
(
weight
);
}
}
}
paddle/fluid/distributed/table/graph_edge.h
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstddef>
#include <cstdint>
#include <vector>
namespace
paddle
{
namespace
distributed
{
class
GraphEdgeBlob
{
public:
GraphEdgeBlob
()
{}
virtual
~
GraphEdgeBlob
()
{}
size_t
size
()
{
return
id_arr
.
size
();
}
virtual
void
add_edge
(
uint64_t
id
,
float
weight
);
uint64_t
get_id
(
int
idx
)
{
return
id_arr
[
idx
];
}
virtual
float
get_weight
(
int
idx
)
{
return
1
;
}
protected:
std
::
vector
<
uint64_t
>
id_arr
;
};
class
WeightedGraphEdgeBlob
:
public
GraphEdgeBlob
{
public:
WeightedGraphEdgeBlob
()
{}
virtual
~
WeightedGraphEdgeBlob
()
{}
virtual
void
add_edge
(
uint64_t
id
,
float
weight
);
virtual
float
get_weight
(
int
idx
)
{
return
weight_arr
[
idx
];
}
protected:
std
::
vector
<
float
>
weight_arr
;
};
}
}
paddle/fluid/distributed/table/graph_node.cc
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/table/graph_node.h"
#include <cstring>
namespace
paddle
{
namespace
distributed
{
GraphNode
::~
GraphNode
()
{
if
(
sampler
!=
nullptr
)
{
delete
sampler
;
sampler
=
nullptr
;
}
if
(
edges
!=
nullptr
)
{
delete
edges
;
edges
=
nullptr
;
}
}
int
Node
::
weight_size
=
sizeof
(
float
);
int
Node
::
id_size
=
sizeof
(
uint64_t
);
int
Node
::
int_size
=
sizeof
(
int
);
int
Node
::
get_size
(
bool
need_feature
)
{
return
id_size
+
int_size
;
}
void
Node
::
to_buffer
(
char
*
buffer
,
bool
need_feature
)
{
memcpy
(
buffer
,
&
id
,
id_size
);
buffer
+=
id_size
;
int
feat_num
=
0
;
memcpy
(
buffer
,
&
feat_num
,
sizeof
(
int
));
}
void
Node
::
recover_from_buffer
(
char
*
buffer
)
{
memcpy
(
&
id
,
buffer
,
id_size
);
}
int
FeatureNode
::
get_size
(
bool
need_feature
)
{
int
size
=
id_size
+
int_size
;
// id, feat_num
if
(
need_feature
)
{
size
+=
feature
.
size
()
*
int_size
;
for
(
const
std
::
string
&
fea
:
feature
)
{
size
+=
fea
.
size
();
}
}
return
size
;
}
void
GraphNode
::
build_edges
(
bool
is_weighted
)
{
if
(
edges
==
nullptr
)
{
if
(
is_weighted
==
true
)
{
edges
=
new
WeightedGraphEdgeBlob
();
}
else
{
edges
=
new
GraphEdgeBlob
();
}
}
}
void
GraphNode
::
build_sampler
(
std
::
string
sample_type
)
{
if
(
sample_type
==
"random"
)
{
sampler
=
new
RandomSampler
();
}
else
if
(
sample_type
==
"weighted"
)
{
sampler
=
new
WeightedSampler
();
}
sampler
->
build
(
edges
);
}
void
FeatureNode
::
to_buffer
(
char
*
buffer
,
bool
need_feature
)
{
memcpy
(
buffer
,
&
id
,
id_size
);
buffer
+=
id_size
;
int
feat_num
=
0
;
int
feat_len
;
if
(
need_feature
)
{
feat_num
+=
feature
.
size
();
memcpy
(
buffer
,
&
feat_num
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
for
(
int
i
=
0
;
i
<
feat_num
;
++
i
)
{
feat_len
=
feature
[
i
].
size
();
memcpy
(
buffer
,
&
feat_len
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
memcpy
(
buffer
,
feature
[
i
].
c_str
(),
feature
[
i
].
size
());
buffer
+=
feature
[
i
].
size
();
}
}
else
{
memcpy
(
buffer
,
&
feat_num
,
sizeof
(
int
));
}
}
void
FeatureNode
::
recover_from_buffer
(
char
*
buffer
)
{
int
feat_num
,
feat_len
;
memcpy
(
&
id
,
buffer
,
id_size
);
buffer
+=
id_size
;
memcpy
(
&
feat_num
,
buffer
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
feature
.
clear
();
for
(
int
i
=
0
;
i
<
feat_num
;
++
i
)
{
memcpy
(
&
feat_len
,
buffer
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
char
str
[
feat_len
+
1
];
memcpy
(
str
,
buffer
,
feat_len
);
buffer
+=
feat_len
;
str
[
feat_len
]
=
'\0'
;
feature
.
push_back
(
std
::
string
(
str
));
}
}
}
}
paddle/fluid/distributed/table/graph_node.h
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstring>
#include <iostream>
#include <sstream>
#include <vector>
#include "paddle/fluid/distributed/table/graph_weighted_sampler.h"
namespace
paddle
{
namespace
distributed
{
class
Node
{
public:
Node
()
{}
Node
(
uint64_t
id
)
:
id
(
id
)
{}
virtual
~
Node
()
{}
static
int
id_size
,
int_size
,
weight_size
;
uint64_t
get_id
()
{
return
id
;
}
void
set_id
(
uint64_t
id
)
{
this
->
id
=
id
;
}
virtual
void
build_edges
(
bool
is_weighted
)
{}
virtual
void
build_sampler
(
std
::
string
sample_type
)
{}
virtual
void
add_edge
(
uint64_t
id
,
float
weight
)
{}
virtual
std
::
vector
<
int
>
sample_k
(
int
k
)
{
return
std
::
vector
<
int
>
();
}
virtual
uint64_t
get_neighbor_id
(
int
idx
)
{
return
0
;
}
virtual
float
get_neighbor_weight
(
int
idx
)
{
return
1.
;
}
virtual
int
get_size
(
bool
need_feature
);
virtual
void
to_buffer
(
char
*
buffer
,
bool
need_feature
);
virtual
void
recover_from_buffer
(
char
*
buffer
);
virtual
std
::
string
get_feature
(
int
idx
)
{
return
std
::
string
(
""
);
}
virtual
void
set_feature
(
int
idx
,
std
::
string
str
)
{}
virtual
void
set_feature_size
(
int
size
)
{}
virtual
int
get_feature_size
()
{
return
0
;
}
protected:
uint64_t
id
;
};
class
GraphNode
:
public
Node
{
public:
GraphNode
()
:
Node
(),
sampler
(
nullptr
),
edges
(
nullptr
)
{}
GraphNode
(
uint64_t
id
)
:
Node
(
id
),
sampler
(
nullptr
),
edges
(
nullptr
)
{}
virtual
~
GraphNode
();
virtual
void
build_edges
(
bool
is_weighted
);
virtual
void
build_sampler
(
std
::
string
sample_type
);
virtual
void
add_edge
(
uint64_t
id
,
float
weight
)
{
edges
->
add_edge
(
id
,
weight
);
}
virtual
std
::
vector
<
int
>
sample_k
(
int
k
)
{
return
sampler
->
sample_k
(
k
);
}
virtual
uint64_t
get_neighbor_id
(
int
idx
)
{
return
edges
->
get_id
(
idx
);
}
virtual
float
get_neighbor_weight
(
int
idx
)
{
return
edges
->
get_weight
(
idx
);
}
protected:
Sampler
*
sampler
;
GraphEdgeBlob
*
edges
;
};
class
FeatureNode
:
public
Node
{
public:
FeatureNode
()
:
Node
()
{}
FeatureNode
(
uint64_t
id
)
:
Node
(
id
)
{}
virtual
~
FeatureNode
()
{}
virtual
int
get_size
(
bool
need_feature
);
virtual
void
to_buffer
(
char
*
buffer
,
bool
need_feature
);
virtual
void
recover_from_buffer
(
char
*
buffer
);
virtual
std
::
string
get_feature
(
int
idx
)
{
if
(
idx
<
(
int
)
this
->
feature
.
size
())
{
return
this
->
feature
[
idx
];
}
else
{
return
std
::
string
(
""
);
}
}
virtual
void
set_feature
(
int
idx
,
std
::
string
str
)
{
if
(
idx
>=
(
int
)
this
->
feature
.
size
())
{
this
->
feature
.
resize
(
idx
+
1
);
}
this
->
feature
[
idx
]
=
str
;
}
virtual
void
set_feature_size
(
int
size
)
{
this
->
feature
.
resize
(
size
);
}
virtual
int
get_feature_size
()
{
return
this
->
feature
.
size
();
}
template
<
typename
T
>
static
std
::
string
parse_value_to_bytes
(
std
::
vector
<
std
::
string
>
feat_str
)
{
T
v
;
size_t
Tsize
=
sizeof
(
T
)
*
feat_str
.
size
();
char
buffer
[
Tsize
];
for
(
size_t
i
=
0
;
i
<
feat_str
.
size
();
i
++
)
{
std
::
stringstream
ss
(
feat_str
[
i
]);
ss
>>
v
;
std
::
memcpy
(
buffer
+
sizeof
(
T
)
*
i
,
(
char
*
)
&
v
,
sizeof
(
T
));
}
return
std
::
string
(
buffer
,
Tsize
);
}
template
<
typename
T
>
static
std
::
vector
<
T
>
parse_bytes_to_array
(
std
::
string
feat_str
)
{
T
v
;
std
::
vector
<
T
>
out
;
size_t
start
=
0
;
const
char
*
buffer
=
feat_str
.
data
();
while
(
start
<
feat_str
.
size
())
{
std
::
memcpy
((
char
*
)
&
v
,
buffer
+
start
,
sizeof
(
T
));
start
+=
sizeof
(
T
);
out
.
push_back
(
v
);
}
return
out
;
}
protected:
std
::
vector
<
std
::
string
>
feature
;
};
}
}
paddle/fluid/distributed/table/graph_weighted_sampler.cc
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/table/graph_weighted_sampler.h"
#include <iostream>
#include <unordered_map>
namespace
paddle
{
namespace
distributed
{
void
RandomSampler
::
build
(
GraphEdgeBlob
*
edges
)
{
this
->
edges
=
edges
;
}
std
::
vector
<
int
>
RandomSampler
::
sample_k
(
int
k
)
{
int
n
=
edges
->
size
();
if
(
k
>
n
)
{
k
=
n
;
}
struct
timespec
tn
;
clock_gettime
(
CLOCK_REALTIME
,
&
tn
);
srand
(
tn
.
tv_nsec
);
std
::
vector
<
int
>
sample_result
;
std
::
unordered_map
<
int
,
int
>
replace_map
;
while
(
k
--
)
{
int
rand_int
=
rand
()
%
n
;
auto
iter
=
replace_map
.
find
(
rand_int
);
if
(
iter
==
replace_map
.
end
())
{
sample_result
.
push_back
(
rand_int
);
}
else
{
sample_result
.
push_back
(
iter
->
second
);
}
iter
=
replace_map
.
find
(
n
-
1
);
if
(
iter
==
replace_map
.
end
())
{
replace_map
[
rand_int
]
=
n
-
1
;
}
else
{
replace_map
[
rand_int
]
=
iter
->
second
;
}
--
n
;
}
return
sample_result
;
}
WeightedSampler
::
WeightedSampler
()
{
left
=
nullptr
;
right
=
nullptr
;
edges
=
nullptr
;
}
WeightedSampler
::~
WeightedSampler
()
{
if
(
left
!=
nullptr
)
{
delete
left
;
left
=
nullptr
;
}
if
(
right
!=
nullptr
)
{
delete
right
;
right
=
nullptr
;
}
}
void
WeightedSampler
::
build
(
GraphEdgeBlob
*
edges
)
{
if
(
left
!=
nullptr
)
{
delete
left
;
left
=
nullptr
;
}
if
(
right
!=
nullptr
)
{
delete
right
;
right
=
nullptr
;
}
return
build_one
((
WeightedGraphEdgeBlob
*
)
edges
,
0
,
edges
->
size
());
}
void
WeightedSampler
::
build_one
(
WeightedGraphEdgeBlob
*
edges
,
int
start
,
int
end
)
{
count
=
0
;
this
->
edges
=
edges
;
if
(
start
+
1
==
end
)
{
left
=
right
=
nullptr
;
idx
=
start
;
count
=
1
;
weight
=
edges
->
get_weight
(
idx
);
}
else
{
left
=
new
WeightedSampler
();
right
=
new
WeightedSampler
();
left
->
build_one
(
edges
,
start
,
start
+
(
end
-
start
)
/
2
);
right
->
build_one
(
edges
,
start
+
(
end
-
start
)
/
2
,
end
);
weight
=
left
->
weight
+
right
->
weight
;
count
=
left
->
count
+
right
->
count
;
}
}
std
::
vector
<
int
>
WeightedSampler
::
sample_k
(
int
k
)
{
if
(
k
>
count
)
{
k
=
count
;
}
std
::
vector
<
int
>
sample_result
;
float
subtract
;
std
::
unordered_map
<
WeightedSampler
*
,
float
>
subtract_weight_map
;
std
::
unordered_map
<
WeightedSampler
*
,
int
>
subtract_count_map
;
struct
timespec
tn
;
clock_gettime
(
CLOCK_REALTIME
,
&
tn
);
srand
(
tn
.
tv_nsec
);
while
(
k
--
)
{
float
query_weight
=
rand
()
%
100000
/
100000.0
;
query_weight
*=
weight
-
subtract_weight_map
[
this
];
sample_result
.
push_back
(
sample
(
query_weight
,
subtract_weight_map
,
subtract_count_map
,
subtract
));
}
return
sample_result
;
}
int
WeightedSampler
::
sample
(
float
query_weight
,
std
::
unordered_map
<
WeightedSampler
*
,
float
>
&
subtract_weight_map
,
std
::
unordered_map
<
WeightedSampler
*
,
int
>
&
subtract_count_map
,
float
&
subtract
)
{
if
(
left
==
nullptr
)
{
subtract_weight_map
[
this
]
=
weight
;
subtract
=
weight
;
subtract_count_map
[
this
]
=
1
;
return
idx
;
}
int
left_count
=
left
->
count
-
subtract_count_map
[
left
];
int
right_count
=
right
->
count
-
subtract_count_map
[
right
];
float
left_subtract
=
subtract_weight_map
[
left
];
int
return_idx
;
if
(
right_count
==
0
||
left_count
>
0
&&
left
->
weight
-
left_subtract
>=
query_weight
)
{
return_idx
=
left
->
sample
(
query_weight
,
subtract_weight_map
,
subtract_count_map
,
subtract
);
}
else
{
return_idx
=
right
->
sample
(
query_weight
-
(
left
->
weight
-
left_subtract
),
subtract_weight_map
,
subtract_count_map
,
subtract
);
}
subtract_weight_map
[
this
]
+=
subtract
;
subtract_count_map
[
this
]
++
;
return
return_idx
;
}
}
}
paddle/fluid/distributed/table/graph_weighted_sampler.h
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctime>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/distributed/table/graph_edge.h"
namespace
paddle
{
namespace
distributed
{
class
Sampler
{
public:
virtual
~
Sampler
()
{}
virtual
void
build
(
GraphEdgeBlob
*
edges
)
=
0
;
virtual
std
::
vector
<
int
>
sample_k
(
int
k
)
=
0
;
};
class
RandomSampler
:
public
Sampler
{
public:
virtual
~
RandomSampler
()
{}
virtual
void
build
(
GraphEdgeBlob
*
edges
);
virtual
std
::
vector
<
int
>
sample_k
(
int
k
);
GraphEdgeBlob
*
edges
;
};
class
WeightedSampler
:
public
Sampler
{
public:
WeightedSampler
();
virtual
~
WeightedSampler
();
WeightedSampler
*
left
,
*
right
;
float
weight
;
int
count
;
int
idx
;
GraphEdgeBlob
*
edges
;
virtual
void
build
(
GraphEdgeBlob
*
edges
);
virtual
void
build_one
(
WeightedGraphEdgeBlob
*
edges
,
int
start
,
int
end
);
virtual
std
::
vector
<
int
>
sample_k
(
int
k
);
private:
int
sample
(
float
query_weight
,
std
::
unordered_map
<
WeightedSampler
*
,
float
>
&
subtract_weight_map
,
std
::
unordered_map
<
WeightedSampler
*
,
int
>
&
subtract_count_map
,
float
&
subtract
);
};
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录