Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
906db719
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
906db719
编写于
5月 17, 2021
作者:
S
seemingwang
提交者:
GitHub
5月 17, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove redundant graph files (#32924)
* delete unused files.
上级
5f1c07da
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
0 addition
and
527 deletion
+0
-527
paddle/fluid/distributed/table/graph_edge.cc
paddle/fluid/distributed/table/graph_edge.cc
+0
-29
paddle/fluid/distributed/table/graph_edge.h
paddle/fluid/distributed/table/graph_edge.h
+0
-46
paddle/fluid/distributed/table/graph_node.cc
paddle/fluid/distributed/table/graph_node.cc
+0
-117
paddle/fluid/distributed/table/graph_node.h
paddle/fluid/distributed/table/graph_node.h
+0
-127
paddle/fluid/distributed/table/graph_weighted_sampler.cc
paddle/fluid/distributed/table/graph_weighted_sampler.cc
+0
-150
paddle/fluid/distributed/table/graph_weighted_sampler.h
paddle/fluid/distributed/table/graph_weighted_sampler.h
+0
-58
未找到文件。
paddle/fluid/distributed/table/graph_edge.cc
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/table/graph_edge.h"
#include <cstring>
namespace
paddle
{
namespace
distributed
{
void
GraphEdgeBlob
::
add_edge
(
uint64_t
id
,
float
weight
=
1
)
{
id_arr
.
push_back
(
id
);
}
void
WeightedGraphEdgeBlob
::
add_edge
(
uint64_t
id
,
float
weight
=
1
)
{
id_arr
.
push_back
(
id
);
weight_arr
.
push_back
(
weight
);
}
}
}
paddle/fluid/distributed/table/graph_edge.h
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstddef>
#include <cstdint>
#include <vector>
namespace
paddle
{
namespace
distributed
{
class
GraphEdgeBlob
{
public:
GraphEdgeBlob
()
{}
virtual
~
GraphEdgeBlob
()
{}
size_t
size
()
{
return
id_arr
.
size
();
}
virtual
void
add_edge
(
uint64_t
id
,
float
weight
);
uint64_t
get_id
(
int
idx
)
{
return
id_arr
[
idx
];
}
virtual
float
get_weight
(
int
idx
)
{
return
1
;
}
protected:
std
::
vector
<
uint64_t
>
id_arr
;
};
class
WeightedGraphEdgeBlob
:
public
GraphEdgeBlob
{
public:
WeightedGraphEdgeBlob
()
{}
virtual
~
WeightedGraphEdgeBlob
()
{}
virtual
void
add_edge
(
uint64_t
id
,
float
weight
);
virtual
float
get_weight
(
int
idx
)
{
return
weight_arr
[
idx
];
}
protected:
std
::
vector
<
float
>
weight_arr
;
};
}
}
paddle/fluid/distributed/table/graph_node.cc
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/table/graph_node.h"
#include <cstring>
namespace
paddle
{
namespace
distributed
{
GraphNode
::~
GraphNode
()
{
if
(
sampler
!=
nullptr
)
{
delete
sampler
;
sampler
=
nullptr
;
}
if
(
edges
!=
nullptr
)
{
delete
edges
;
edges
=
nullptr
;
}
}
int
Node
::
weight_size
=
sizeof
(
float
);
int
Node
::
id_size
=
sizeof
(
uint64_t
);
int
Node
::
int_size
=
sizeof
(
int
);
int
Node
::
get_size
(
bool
need_feature
)
{
return
id_size
+
int_size
;
}
void
Node
::
to_buffer
(
char
*
buffer
,
bool
need_feature
)
{
memcpy
(
buffer
,
&
id
,
id_size
);
buffer
+=
id_size
;
int
feat_num
=
0
;
memcpy
(
buffer
,
&
feat_num
,
sizeof
(
int
));
}
void
Node
::
recover_from_buffer
(
char
*
buffer
)
{
memcpy
(
&
id
,
buffer
,
id_size
);
}
int
FeatureNode
::
get_size
(
bool
need_feature
)
{
int
size
=
id_size
+
int_size
;
// id, feat_num
if
(
need_feature
)
{
size
+=
feature
.
size
()
*
int_size
;
for
(
const
std
::
string
&
fea
:
feature
)
{
size
+=
fea
.
size
();
}
}
return
size
;
}
void
GraphNode
::
build_edges
(
bool
is_weighted
)
{
if
(
edges
==
nullptr
)
{
if
(
is_weighted
==
true
)
{
edges
=
new
WeightedGraphEdgeBlob
();
}
else
{
edges
=
new
GraphEdgeBlob
();
}
}
}
void
GraphNode
::
build_sampler
(
std
::
string
sample_type
)
{
if
(
sample_type
==
"random"
)
{
sampler
=
new
RandomSampler
();
}
else
if
(
sample_type
==
"weighted"
)
{
sampler
=
new
WeightedSampler
();
}
sampler
->
build
(
edges
);
}
void
FeatureNode
::
to_buffer
(
char
*
buffer
,
bool
need_feature
)
{
memcpy
(
buffer
,
&
id
,
id_size
);
buffer
+=
id_size
;
int
feat_num
=
0
;
int
feat_len
;
if
(
need_feature
)
{
feat_num
+=
feature
.
size
();
memcpy
(
buffer
,
&
feat_num
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
for
(
int
i
=
0
;
i
<
feat_num
;
++
i
)
{
feat_len
=
feature
[
i
].
size
();
memcpy
(
buffer
,
&
feat_len
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
memcpy
(
buffer
,
feature
[
i
].
c_str
(),
feature
[
i
].
size
());
buffer
+=
feature
[
i
].
size
();
}
}
else
{
memcpy
(
buffer
,
&
feat_num
,
sizeof
(
int
));
}
}
void
FeatureNode
::
recover_from_buffer
(
char
*
buffer
)
{
int
feat_num
,
feat_len
;
memcpy
(
&
id
,
buffer
,
id_size
);
buffer
+=
id_size
;
memcpy
(
&
feat_num
,
buffer
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
feature
.
clear
();
for
(
int
i
=
0
;
i
<
feat_num
;
++
i
)
{
memcpy
(
&
feat_len
,
buffer
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
char
str
[
feat_len
+
1
];
memcpy
(
str
,
buffer
,
feat_len
);
buffer
+=
feat_len
;
str
[
feat_len
]
=
'\0'
;
feature
.
push_back
(
std
::
string
(
str
));
}
}
}
}
paddle/fluid/distributed/table/graph_node.h
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstring>
#include <iostream>
#include <sstream>
#include <vector>
#include "paddle/fluid/distributed/table/graph_weighted_sampler.h"
namespace
paddle
{
namespace
distributed
{
class
Node
{
public:
Node
()
{}
Node
(
uint64_t
id
)
:
id
(
id
)
{}
virtual
~
Node
()
{}
static
int
id_size
,
int_size
,
weight_size
;
uint64_t
get_id
()
{
return
id
;
}
void
set_id
(
uint64_t
id
)
{
this
->
id
=
id
;
}
virtual
void
build_edges
(
bool
is_weighted
)
{}
virtual
void
build_sampler
(
std
::
string
sample_type
)
{}
virtual
void
add_edge
(
uint64_t
id
,
float
weight
)
{}
virtual
std
::
vector
<
int
>
sample_k
(
int
k
)
{
return
std
::
vector
<
int
>
();
}
virtual
uint64_t
get_neighbor_id
(
int
idx
)
{
return
0
;
}
virtual
float
get_neighbor_weight
(
int
idx
)
{
return
1.
;
}
virtual
int
get_size
(
bool
need_feature
);
virtual
void
to_buffer
(
char
*
buffer
,
bool
need_feature
);
virtual
void
recover_from_buffer
(
char
*
buffer
);
virtual
std
::
string
get_feature
(
int
idx
)
{
return
std
::
string
(
""
);
}
virtual
void
set_feature
(
int
idx
,
std
::
string
str
)
{}
virtual
void
set_feature_size
(
int
size
)
{}
virtual
int
get_feature_size
()
{
return
0
;
}
protected:
uint64_t
id
;
};
class
GraphNode
:
public
Node
{
public:
GraphNode
()
:
Node
(),
sampler
(
nullptr
),
edges
(
nullptr
)
{}
GraphNode
(
uint64_t
id
)
:
Node
(
id
),
sampler
(
nullptr
),
edges
(
nullptr
)
{}
virtual
~
GraphNode
();
virtual
void
build_edges
(
bool
is_weighted
);
virtual
void
build_sampler
(
std
::
string
sample_type
);
virtual
void
add_edge
(
uint64_t
id
,
float
weight
)
{
edges
->
add_edge
(
id
,
weight
);
}
virtual
std
::
vector
<
int
>
sample_k
(
int
k
)
{
return
sampler
->
sample_k
(
k
);
}
virtual
uint64_t
get_neighbor_id
(
int
idx
)
{
return
edges
->
get_id
(
idx
);
}
virtual
float
get_neighbor_weight
(
int
idx
)
{
return
edges
->
get_weight
(
idx
);
}
protected:
Sampler
*
sampler
;
GraphEdgeBlob
*
edges
;
};
class
FeatureNode
:
public
Node
{
public:
FeatureNode
()
:
Node
()
{}
FeatureNode
(
uint64_t
id
)
:
Node
(
id
)
{}
virtual
~
FeatureNode
()
{}
virtual
int
get_size
(
bool
need_feature
);
virtual
void
to_buffer
(
char
*
buffer
,
bool
need_feature
);
virtual
void
recover_from_buffer
(
char
*
buffer
);
virtual
std
::
string
get_feature
(
int
idx
)
{
if
(
idx
<
(
int
)
this
->
feature
.
size
())
{
return
this
->
feature
[
idx
];
}
else
{
return
std
::
string
(
""
);
}
}
virtual
void
set_feature
(
int
idx
,
std
::
string
str
)
{
if
(
idx
>=
(
int
)
this
->
feature
.
size
())
{
this
->
feature
.
resize
(
idx
+
1
);
}
this
->
feature
[
idx
]
=
str
;
}
virtual
void
set_feature_size
(
int
size
)
{
this
->
feature
.
resize
(
size
);
}
virtual
int
get_feature_size
()
{
return
this
->
feature
.
size
();
}
template
<
typename
T
>
static
std
::
string
parse_value_to_bytes
(
std
::
vector
<
std
::
string
>
feat_str
)
{
T
v
;
size_t
Tsize
=
sizeof
(
T
)
*
feat_str
.
size
();
char
buffer
[
Tsize
];
for
(
size_t
i
=
0
;
i
<
feat_str
.
size
();
i
++
)
{
std
::
stringstream
ss
(
feat_str
[
i
]);
ss
>>
v
;
std
::
memcpy
(
buffer
+
sizeof
(
T
)
*
i
,
(
char
*
)
&
v
,
sizeof
(
T
));
}
return
std
::
string
(
buffer
,
Tsize
);
}
template
<
typename
T
>
static
std
::
vector
<
T
>
parse_bytes_to_array
(
std
::
string
feat_str
)
{
T
v
;
std
::
vector
<
T
>
out
;
size_t
start
=
0
;
const
char
*
buffer
=
feat_str
.
data
();
while
(
start
<
feat_str
.
size
())
{
std
::
memcpy
((
char
*
)
&
v
,
buffer
+
start
,
sizeof
(
T
));
start
+=
sizeof
(
T
);
out
.
push_back
(
v
);
}
return
out
;
}
protected:
std
::
vector
<
std
::
string
>
feature
;
};
}
}
paddle/fluid/distributed/table/graph_weighted_sampler.cc
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/table/graph_weighted_sampler.h"
#include <iostream>
#include <unordered_map>
namespace
paddle
{
namespace
distributed
{
void
RandomSampler
::
build
(
GraphEdgeBlob
*
edges
)
{
this
->
edges
=
edges
;
}
std
::
vector
<
int
>
RandomSampler
::
sample_k
(
int
k
)
{
int
n
=
edges
->
size
();
if
(
k
>
n
)
{
k
=
n
;
}
struct
timespec
tn
;
clock_gettime
(
CLOCK_REALTIME
,
&
tn
);
srand
(
tn
.
tv_nsec
);
std
::
vector
<
int
>
sample_result
;
std
::
unordered_map
<
int
,
int
>
replace_map
;
while
(
k
--
)
{
int
rand_int
=
rand
()
%
n
;
auto
iter
=
replace_map
.
find
(
rand_int
);
if
(
iter
==
replace_map
.
end
())
{
sample_result
.
push_back
(
rand_int
);
}
else
{
sample_result
.
push_back
(
iter
->
second
);
}
iter
=
replace_map
.
find
(
n
-
1
);
if
(
iter
==
replace_map
.
end
())
{
replace_map
[
rand_int
]
=
n
-
1
;
}
else
{
replace_map
[
rand_int
]
=
iter
->
second
;
}
--
n
;
}
return
sample_result
;
}
WeightedSampler
::
WeightedSampler
()
{
left
=
nullptr
;
right
=
nullptr
;
edges
=
nullptr
;
}
WeightedSampler
::~
WeightedSampler
()
{
if
(
left
!=
nullptr
)
{
delete
left
;
left
=
nullptr
;
}
if
(
right
!=
nullptr
)
{
delete
right
;
right
=
nullptr
;
}
}
void
WeightedSampler
::
build
(
GraphEdgeBlob
*
edges
)
{
if
(
left
!=
nullptr
)
{
delete
left
;
left
=
nullptr
;
}
if
(
right
!=
nullptr
)
{
delete
right
;
right
=
nullptr
;
}
return
build_one
((
WeightedGraphEdgeBlob
*
)
edges
,
0
,
edges
->
size
());
}
void
WeightedSampler
::
build_one
(
WeightedGraphEdgeBlob
*
edges
,
int
start
,
int
end
)
{
count
=
0
;
this
->
edges
=
edges
;
if
(
start
+
1
==
end
)
{
left
=
right
=
nullptr
;
idx
=
start
;
count
=
1
;
weight
=
edges
->
get_weight
(
idx
);
}
else
{
left
=
new
WeightedSampler
();
right
=
new
WeightedSampler
();
left
->
build_one
(
edges
,
start
,
start
+
(
end
-
start
)
/
2
);
right
->
build_one
(
edges
,
start
+
(
end
-
start
)
/
2
,
end
);
weight
=
left
->
weight
+
right
->
weight
;
count
=
left
->
count
+
right
->
count
;
}
}
std
::
vector
<
int
>
WeightedSampler
::
sample_k
(
int
k
)
{
if
(
k
>
count
)
{
k
=
count
;
}
std
::
vector
<
int
>
sample_result
;
float
subtract
;
std
::
unordered_map
<
WeightedSampler
*
,
float
>
subtract_weight_map
;
std
::
unordered_map
<
WeightedSampler
*
,
int
>
subtract_count_map
;
struct
timespec
tn
;
clock_gettime
(
CLOCK_REALTIME
,
&
tn
);
srand
(
tn
.
tv_nsec
);
while
(
k
--
)
{
float
query_weight
=
rand
()
%
100000
/
100000.0
;
query_weight
*=
weight
-
subtract_weight_map
[
this
];
sample_result
.
push_back
(
sample
(
query_weight
,
subtract_weight_map
,
subtract_count_map
,
subtract
));
}
return
sample_result
;
}
int
WeightedSampler
::
sample
(
float
query_weight
,
std
::
unordered_map
<
WeightedSampler
*
,
float
>
&
subtract_weight_map
,
std
::
unordered_map
<
WeightedSampler
*
,
int
>
&
subtract_count_map
,
float
&
subtract
)
{
if
(
left
==
nullptr
)
{
subtract_weight_map
[
this
]
=
weight
;
subtract
=
weight
;
subtract_count_map
[
this
]
=
1
;
return
idx
;
}
int
left_count
=
left
->
count
-
subtract_count_map
[
left
];
int
right_count
=
right
->
count
-
subtract_count_map
[
right
];
float
left_subtract
=
subtract_weight_map
[
left
];
int
return_idx
;
if
(
right_count
==
0
||
left_count
>
0
&&
left
->
weight
-
left_subtract
>=
query_weight
)
{
return_idx
=
left
->
sample
(
query_weight
,
subtract_weight_map
,
subtract_count_map
,
subtract
);
}
else
{
return_idx
=
right
->
sample
(
query_weight
-
(
left
->
weight
-
left_subtract
),
subtract_weight_map
,
subtract_count_map
,
subtract
);
}
subtract_weight_map
[
this
]
+=
subtract
;
subtract_count_map
[
this
]
++
;
return
return_idx
;
}
}
}
paddle/fluid/distributed/table/graph_weighted_sampler.h
已删除
100644 → 0
浏览文件 @
5f1c07da
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctime>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/distributed/table/graph_edge.h"
namespace
paddle
{
namespace
distributed
{
class
Sampler
{
public:
virtual
~
Sampler
()
{}
virtual
void
build
(
GraphEdgeBlob
*
edges
)
=
0
;
virtual
std
::
vector
<
int
>
sample_k
(
int
k
)
=
0
;
};
class
RandomSampler
:
public
Sampler
{
public:
virtual
~
RandomSampler
()
{}
virtual
void
build
(
GraphEdgeBlob
*
edges
);
virtual
std
::
vector
<
int
>
sample_k
(
int
k
);
GraphEdgeBlob
*
edges
;
};
class
WeightedSampler
:
public
Sampler
{
public:
WeightedSampler
();
virtual
~
WeightedSampler
();
WeightedSampler
*
left
,
*
right
;
float
weight
;
int
count
;
int
idx
;
GraphEdgeBlob
*
edges
;
virtual
void
build
(
GraphEdgeBlob
*
edges
);
virtual
void
build_one
(
WeightedGraphEdgeBlob
*
edges
,
int
start
,
int
end
);
virtual
std
::
vector
<
int
>
sample_k
(
int
k
);
private:
int
sample
(
float
query_weight
,
std
::
unordered_map
<
WeightedSampler
*
,
float
>
&
subtract_weight_map
,
std
::
unordered_map
<
WeightedSampler
*
,
int
>
&
subtract_count_map
,
float
&
subtract
);
};
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录