From 73d0b0e9147d35ad351bf84ec08c645bee6b400d Mon Sep 17 00:00:00 2001 From: seemingwang Date: Thu, 22 Apr 2021 12:04:20 +0800 Subject: [PATCH] fix count problem (#32415) * graph engine demo * upload unsaved changes * fix dependency error * fix shard_num problem * py client * remove lock and graph-type * add load direct graph * add load direct graph * add load direct graph * batch random_sample * batch_sample_k * fix num_nodes size * batch brpc * batch brpc * add test * add test * add load_nodes; change add_node function * change sample return type to pair * resolve conflict * resolved conflict * resolved conflict * separate server and client * merge pair type * fix * resolved conflict * fixed segment fault; high-level VLOG for load edges and load nodes * random_sample return 0 * rm useless loop * test:load edge * fix ret -1 * test: rm sample * rm sample * random_sample return future * random_sample return int * test fake node * fixed here * memory leak * remove test code * fix return problem * add common_graph_table * random sample node &test & change data-structure from linkedList to vector * add common_graph_table * sample with srand * add node_types * optimize nodes sample * recover test * random sample * destruct weighted sampler * GraphEdgeBlob * WeightedGraphEdgeBlob to GraphEdgeBlob * WeightedGraphEdgeBlob to GraphEdgeBlob * pybind sample nodes api * pull nodes with step * fixed pull_graph_list bug; add test for pull_graph_list by step * add graph table;name * add graph table;name * add pybind * add pybind * add FeatureNode * add FeatureNode * add FeatureNode Serialize * add FeatureNode Serialize * get_feat_node * avoid local rpc * fix get_node_feat * fix get_node_feat * remove log * get_node_feat return py:bytes * merge develop with graph_engine * fix threadpool.h head * fix * fix typo * resolve conflict * fix conflict * recover lost content * fix pybind of FeatureNode * recover cmake * recover tools * resolve conflict * resolve linking problem * code style * change test_server port * fix code problems * remove shard_num config * remove redundent threads * optimize start server * remove logs * fix code problems by reviewers' suggestions * move graph files into a folder * code style change * remove graph operations from base table * optimize get_feat function of graph engine * fix long long count problem Co-authored-by: Huang Zhengjie <270018958@qq.com> Co-authored-by: Weiyue Su Co-authored-by: suweiyue Co-authored-by: luobin06 Co-authored-by: liweibin02 Co-authored-by: tangwei12 --- .../distributed/service/graph_py_service.h | 12 ----------- .../distributed/table/common_graph_table.cc | 2 +- .../distributed/table/common_graph_table.h | 20 ++----------------- 3 files changed, 3 insertions(+), 31 deletions(-) diff --git a/paddle/fluid/distributed/service/graph_py_service.h b/paddle/fluid/distributed/service/graph_py_service.h index e185f23e3d2..c6657be96ba 100644 --- a/paddle/fluid/distributed/service/graph_py_service.h +++ b/paddle/fluid/distributed/service/graph_py_service.h @@ -54,19 +54,7 @@ class GraphPyService { std::vector table_feat_conf_feat_dtype; std::vector table_feat_conf_feat_shape; - // std::thread *server_thread, *client_thread; - - // std::shared_ptr pserver_ptr; - - // std::shared_ptr worker_ptr; - public: - // std::shared_ptr get_ps_server() { - // return pserver_ptr; - // } - // std::shared_ptr get_ps_client() { - // return worker_ptr; - // } int get_shard_num() { return shard_num; } void set_shard_num(int shard_num) { this->shard_num = shard_num; } void GetDownpourSparseTableProto( diff --git a/paddle/fluid/distributed/table/common_graph_table.cc b/paddle/fluid/distributed/table/common_graph_table.cc index 020bcdcc52e..0dc99de1bfe 100644 --- a/paddle/fluid/distributed/table/common_graph_table.cc +++ b/paddle/fluid/distributed/table/common_graph_table.cc @@ -171,7 +171,7 @@ int32_t GraphTable::load_nodes(const std::string &path, std::string node_type) { int32_t GraphTable::load_edges(const std::string &path, bool reverse_edge) { auto paths = paddle::string::split_string(path, ";"); - int count = 0; + int64_t count = 0; std::string sample_type = "random"; bool is_weighted = false; int valid_count = 0; diff --git a/paddle/fluid/distributed/table/common_graph_table.h b/paddle/fluid/distributed/table/common_graph_table.h index 8ddf3c8f904..b18da82abe6 100644 --- a/paddle/fluid/distributed/table/common_graph_table.h +++ b/paddle/fluid/distributed/table/common_graph_table.h @@ -33,26 +33,11 @@ namespace paddle { namespace distributed { class GraphShard { public: - // static int bucket_low_bound; - // static int gcd(int s, int t) { - // if (s % t == 0) return t; - // return gcd(t, s % t); - // } size_t get_size(); GraphShard() {} - GraphShard(int shard_num) { - this->shard_num = shard_num; - // bucket_size = init_bucket_size(shard_num); - // bucket.resize(bucket_size); - } + GraphShard(int shard_num) { this->shard_num = shard_num; } std::vector &get_bucket() { return bucket; } std::vector get_batch(int start, int end, int step); - // int init_bucket_size(int shard_num) { - // for (int i = bucket_low_bound;; i++) { - // if (gcd(i, shard_num) == 1) return i; - // } - // return -1; - // } std::vector get_ids_by_range(int start, int end) { std::vector res; for (int i = start; i < end && i < bucket.size(); i++) { @@ -64,7 +49,6 @@ class GraphShard { FeatureNode *add_feature_node(uint64_t id); Node *find_node(uint64_t id); void add_neighboor(uint64_t id, uint64_t dst_id, float weight); - // std::unordered_map::iterator> std::unordered_map get_node_location() { return node_location; } @@ -131,7 +115,7 @@ class GraphTable : public SparseTable { protected: std::vector shards; size_t shard_start, shard_end, server_num, shard_num_per_table, shard_num; - const int task_pool_size_ = 11; + const int task_pool_size_ = 24; const int random_sample_nodes_ranges = 3; std::vector feat_name; -- GitLab