From 8c58f9623d0131566b85e4b02d4e4a7574768d96 Mon Sep 17 00:00:00 2001 From: seemingwang Date: Fri, 29 Apr 2022 14:08:33 +0800 Subject: [PATCH] enable graph-engine to return all id (#42319) * enable graph-engine to return all id * change vector's dimension * change vector's dimension * enlarge returned ids dimensions --- .../ps/table/common_graph_table.cc | 21 +++++++++++++++++++ .../distributed/ps/table/common_graph_table.h | 10 ++++++++- .../fleet/heter_ps/graph_gpu_wrapper.cu | 5 +++++ .../fleet/heter_ps/graph_gpu_wrapper.h | 2 ++ paddle/fluid/pybind/fleet_py.cc | 1 + 5 files changed, 38 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/distributed/ps/table/common_graph_table.cc b/paddle/fluid/distributed/ps/table/common_graph_table.cc index a9cd0021c85..9310e82d23e 100644 --- a/paddle/fluid/distributed/ps/table/common_graph_table.cc +++ b/paddle/fluid/distributed/ps/table/common_graph_table.cc @@ -85,6 +85,7 @@ paddle::framework::GpuPsCommGraph GraphTable::make_gpu_ps_graph( } return res; } + int32_t GraphTable::add_node_to_ssd(int type_id, int idx, int64_t src_id, char *data, int len) { if (_db != NULL) { @@ -1060,6 +1061,26 @@ std::pair GraphTable::parse_feature( return std::make_pair(-1, ""); } +std::vector> GraphTable::get_all_id(int type_id, int idx, + int slice_num) { + std::vector> res(slice_num); + auto &search_shards = type_id == 0 ? edge_shards[idx] : feature_shards[idx]; + std::vector>> tasks; + for (int i = 0; i < search_shards.size(); i++) { + tasks.push_back(_shards_task_pool[i % task_pool_size_]->enqueue( + [&search_shards, i]() -> std::vector { + return search_shards[i]->get_all_id(); + })); + } + for (size_t i = 0; i < tasks.size(); ++i) { + tasks[i].wait(); + } + for (size_t i = 0; i < tasks.size(); i++) { + auto ids = tasks[i].get(); + for (auto &id : ids) res[id % slice_num].push_back(id); + } + return res; +} int32_t GraphTable::pull_graph_list(int type_id, int idx, int start, int total_size, std::unique_ptr &buffer, diff --git a/paddle/fluid/distributed/ps/table/common_graph_table.h b/paddle/fluid/distributed/ps/table/common_graph_table.h index 059bcb09a0a..f9956c77231 100644 --- a/paddle/fluid/distributed/ps/table/common_graph_table.h +++ b/paddle/fluid/distributed/ps/table/common_graph_table.h @@ -63,7 +63,13 @@ class GraphShard { } return res; } - + std::vector get_all_id() { + std::vector res; + for (int i = 0; i < (int)bucket.size(); i++) { + res.push_back(bucket[i]->get_id()); + } + return res; + } GraphNode *add_graph_node(int64_t id); GraphNode *add_graph_node(Node *node); FeatureNode *add_feature_node(int64_t id); @@ -465,6 +471,8 @@ class GraphTable : public Table { int32_t load_edges(const std::string &path, bool reverse, const std::string &edge_type); + std::vector> get_all_id(int type, int idx, + int slice_num); int32_t load_nodes(const std::string &path, std::string node_type); int32_t add_graph_node(int idx, std::vector &id_list, diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu index b0899b4a7f5..09d4937d276 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu +++ b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu @@ -58,6 +58,11 @@ void GraphGpuWrapper::set_device(std::vector ids) { device_id_mapping.push_back(device_id); } } +std::vector> GraphGpuWrapper::get_all_id(int type, int idx, + int slice_num) { + return ((GpuPsGraphTable *)graph_table) + ->cpu_graph_table->get_all_id(type, idx, slice_num); +} void GraphGpuWrapper::set_up_types(std::vector &edge_types, std::vector &node_types) { id_to_edge = edge_types; diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h index 6972551b896..9472f69a72d 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h +++ b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h @@ -34,6 +34,8 @@ class GraphGpuWrapper { std::string feat_dtype, int feat_shape); void load_edge_file(std::string name, std::string filepath, bool reverse); void load_node_file(std::string name, std::string filepath); + std::vector> get_all_id(int type, int idx, + int slice_num); NodeQueryResult query_node_list(int gpu_id, int start, int query_size); NeighborSampleResult graph_neighbor_sample_v3(NeighborSampleQuery q, bool cpu_switch); diff --git a/paddle/fluid/pybind/fleet_py.cc b/paddle/fluid/pybind/fleet_py.cc index 4df43dc1a3a..7807adab012 100644 --- a/paddle/fluid/pybind/fleet_py.cc +++ b/paddle/fluid/pybind/fleet_py.cc @@ -342,6 +342,7 @@ void BindGraphGpuWrapper(py::module* m) { .def("add_table_feat_conf", &GraphGpuWrapper::add_table_feat_conf) .def("load_edge_file", &GraphGpuWrapper::load_edge_file) .def("upload_batch", &GraphGpuWrapper::upload_batch) + .def("get_all_id", &GraphGpuWrapper::get_all_id) .def("load_node_file", &GraphGpuWrapper::load_node_file); } #endif -- GitLab