diff --git a/paddle/fluid/distributed/table/graph_edge.cc b/paddle/fluid/distributed/table/graph_edge.cc deleted file mode 100644 index cc90f4c6516c1873b078b96c550d0d52ac5d3b9c..0000000000000000000000000000000000000000 --- a/paddle/fluid/distributed/table/graph_edge.cc +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/distributed/table/graph_edge.h" -#include -namespace paddle { -namespace distributed { - -void GraphEdgeBlob::add_edge(uint64_t id, float weight = 1) { - id_arr.push_back(id); -} - -void WeightedGraphEdgeBlob::add_edge(uint64_t id, float weight = 1) { - id_arr.push_back(id); - weight_arr.push_back(weight); -} -} -} diff --git a/paddle/fluid/distributed/table/graph_edge.h b/paddle/fluid/distributed/table/graph_edge.h deleted file mode 100644 index 3dfe5a6f357a7cd7d79834a20b6411995665f4fa..0000000000000000000000000000000000000000 --- a/paddle/fluid/distributed/table/graph_edge.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include -namespace paddle { -namespace distributed { - -class GraphEdgeBlob { - public: - GraphEdgeBlob() {} - virtual ~GraphEdgeBlob() {} - size_t size() { return id_arr.size(); } - virtual void add_edge(uint64_t id, float weight); - uint64_t get_id(int idx) { return id_arr[idx]; } - virtual float get_weight(int idx) { return 1; } - - protected: - std::vector id_arr; -}; - -class WeightedGraphEdgeBlob : public GraphEdgeBlob { - public: - WeightedGraphEdgeBlob() {} - virtual ~WeightedGraphEdgeBlob() {} - virtual void add_edge(uint64_t id, float weight); - virtual float get_weight(int idx) { return weight_arr[idx]; } - - protected: - std::vector weight_arr; -}; -} -} diff --git a/paddle/fluid/distributed/table/graph_node.cc b/paddle/fluid/distributed/table/graph_node.cc deleted file mode 100644 index 27a2cafaf4f0fec95de818204ebd191a5083e50a..0000000000000000000000000000000000000000 --- a/paddle/fluid/distributed/table/graph_node.cc +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/distributed/table/graph_node.h" -#include -namespace paddle { -namespace distributed { - -GraphNode::~GraphNode() { - if (sampler != nullptr) { - delete sampler; - sampler = nullptr; - } - if (edges != nullptr) { - delete edges; - edges = nullptr; - } -} - -int Node::weight_size = sizeof(float); -int Node::id_size = sizeof(uint64_t); -int Node::int_size = sizeof(int); - -int Node::get_size(bool need_feature) { return id_size + int_size; } - -void Node::to_buffer(char* buffer, bool need_feature) { - memcpy(buffer, &id, id_size); - buffer += id_size; - - int feat_num = 0; - memcpy(buffer, &feat_num, sizeof(int)); -} - -void Node::recover_from_buffer(char* buffer) { memcpy(&id, buffer, id_size); } - -int FeatureNode::get_size(bool need_feature) { - int size = id_size + int_size; // id, feat_num - if (need_feature) { - size += feature.size() * int_size; - for (const std::string& fea : feature) { - size += fea.size(); - } - } - return size; -} - -void GraphNode::build_edges(bool is_weighted) { - if (edges == nullptr) { - if (is_weighted == true) { - edges = new WeightedGraphEdgeBlob(); - } else { - edges = new GraphEdgeBlob(); - } - } -} -void GraphNode::build_sampler(std::string sample_type) { - if (sample_type == "random") { - sampler = new RandomSampler(); - } else if (sample_type == "weighted") { - sampler = new WeightedSampler(); - } - sampler->build(edges); -} -void FeatureNode::to_buffer(char* buffer, bool need_feature) { - memcpy(buffer, &id, id_size); - buffer += id_size; - - int feat_num = 0; - int feat_len; - if (need_feature) { - feat_num += feature.size(); - memcpy(buffer, &feat_num, sizeof(int)); - buffer += sizeof(int); - for (int i = 0; i < feat_num; ++i) { - feat_len = feature[i].size(); - memcpy(buffer, &feat_len, sizeof(int)); - buffer += sizeof(int); - memcpy(buffer, feature[i].c_str(), feature[i].size()); - buffer += feature[i].size(); - } - } else { - memcpy(buffer, &feat_num, sizeof(int)); - } -} -void FeatureNode::recover_from_buffer(char* buffer) { - int feat_num, feat_len; - memcpy(&id, buffer, id_size); - buffer += id_size; - - memcpy(&feat_num, buffer, sizeof(int)); - buffer += sizeof(int); - - feature.clear(); - for (int i = 0; i < feat_num; ++i) { - memcpy(&feat_len, buffer, sizeof(int)); - buffer += sizeof(int); - - char str[feat_len + 1]; - memcpy(str, buffer, feat_len); - buffer += feat_len; - str[feat_len] = '\0'; - feature.push_back(std::string(str)); - } -} -} -} diff --git a/paddle/fluid/distributed/table/graph_node.h b/paddle/fluid/distributed/table/graph_node.h deleted file mode 100644 index c3e8e3ce5b50d06945857ded1db168f84f955c5f..0000000000000000000000000000000000000000 --- a/paddle/fluid/distributed/table/graph_node.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include -#include -#include "paddle/fluid/distributed/table/graph_weighted_sampler.h" -namespace paddle { -namespace distributed { - -class Node { - public: - Node() {} - Node(uint64_t id) : id(id) {} - virtual ~Node() {} - static int id_size, int_size, weight_size; - uint64_t get_id() { return id; } - void set_id(uint64_t id) { this->id = id; } - - virtual void build_edges(bool is_weighted) {} - virtual void build_sampler(std::string sample_type) {} - virtual void add_edge(uint64_t id, float weight) {} - virtual std::vector sample_k(int k) { return std::vector(); } - virtual uint64_t get_neighbor_id(int idx) { return 0; } - virtual float get_neighbor_weight(int idx) { return 1.; } - - virtual int get_size(bool need_feature); - virtual void to_buffer(char *buffer, bool need_feature); - virtual void recover_from_buffer(char *buffer); - virtual std::string get_feature(int idx) { return std::string(""); } - virtual void set_feature(int idx, std::string str) {} - virtual void set_feature_size(int size) {} - virtual int get_feature_size() { return 0; } - - protected: - uint64_t id; -}; - -class GraphNode : public Node { - public: - GraphNode() : Node(), sampler(nullptr), edges(nullptr) {} - GraphNode(uint64_t id) : Node(id), sampler(nullptr), edges(nullptr) {} - virtual ~GraphNode(); - virtual void build_edges(bool is_weighted); - virtual void build_sampler(std::string sample_type); - virtual void add_edge(uint64_t id, float weight) { - edges->add_edge(id, weight); - } - virtual std::vector sample_k(int k) { return sampler->sample_k(k); } - virtual uint64_t get_neighbor_id(int idx) { return edges->get_id(idx); } - virtual float get_neighbor_weight(int idx) { return edges->get_weight(idx); } - - protected: - Sampler *sampler; - GraphEdgeBlob *edges; -}; - -class FeatureNode : public Node { - public: - FeatureNode() : Node() {} - FeatureNode(uint64_t id) : Node(id) {} - virtual ~FeatureNode() {} - virtual int get_size(bool need_feature); - virtual void to_buffer(char *buffer, bool need_feature); - virtual void recover_from_buffer(char *buffer); - virtual std::string get_feature(int idx) { - if (idx < (int)this->feature.size()) { - return this->feature[idx]; - } else { - return std::string(""); - } - } - - virtual void set_feature(int idx, std::string str) { - if (idx >= (int)this->feature.size()) { - this->feature.resize(idx + 1); - } - this->feature[idx] = str; - } - virtual void set_feature_size(int size) { this->feature.resize(size); } - virtual int get_feature_size() { return this->feature.size(); } - - template - static std::string parse_value_to_bytes(std::vector feat_str) { - T v; - size_t Tsize = sizeof(T) * feat_str.size(); - char buffer[Tsize]; - for (size_t i = 0; i < feat_str.size(); i++) { - std::stringstream ss(feat_str[i]); - ss >> v; - std::memcpy(buffer + sizeof(T) * i, (char *)&v, sizeof(T)); - } - return std::string(buffer, Tsize); - } - - template - static std::vector parse_bytes_to_array(std::string feat_str) { - T v; - std::vector out; - size_t start = 0; - const char *buffer = feat_str.data(); - while (start < feat_str.size()) { - std::memcpy((char *)&v, buffer + start, sizeof(T)); - start += sizeof(T); - out.push_back(v); - } - return out; - } - - protected: - std::vector feature; -}; -} -} diff --git a/paddle/fluid/distributed/table/graph_weighted_sampler.cc b/paddle/fluid/distributed/table/graph_weighted_sampler.cc deleted file mode 100644 index 059a1d64bc392d7ef6936c008bbeec3bef3a5fb9..0000000000000000000000000000000000000000 --- a/paddle/fluid/distributed/table/graph_weighted_sampler.cc +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/distributed/table/graph_weighted_sampler.h" -#include -#include -namespace paddle { -namespace distributed { - -void RandomSampler::build(GraphEdgeBlob *edges) { this->edges = edges; } - -std::vector RandomSampler::sample_k(int k) { - int n = edges->size(); - if (k > n) { - k = n; - } - struct timespec tn; - clock_gettime(CLOCK_REALTIME, &tn); - srand(tn.tv_nsec); - std::vector sample_result; - std::unordered_map replace_map; - while (k--) { - int rand_int = rand() % n; - auto iter = replace_map.find(rand_int); - if (iter == replace_map.end()) { - sample_result.push_back(rand_int); - } else { - sample_result.push_back(iter->second); - } - - iter = replace_map.find(n - 1); - if (iter == replace_map.end()) { - replace_map[rand_int] = n - 1; - } else { - replace_map[rand_int] = iter->second; - } - --n; - } - return sample_result; -} - -WeightedSampler::WeightedSampler() { - left = nullptr; - right = nullptr; - edges = nullptr; -} - -WeightedSampler::~WeightedSampler() { - if (left != nullptr) { - delete left; - left = nullptr; - } - if (right != nullptr) { - delete right; - right = nullptr; - } -} - -void WeightedSampler::build(GraphEdgeBlob *edges) { - if (left != nullptr) { - delete left; - left = nullptr; - } - if (right != nullptr) { - delete right; - right = nullptr; - } - return build_one((WeightedGraphEdgeBlob *)edges, 0, edges->size()); -} - -void WeightedSampler::build_one(WeightedGraphEdgeBlob *edges, int start, - int end) { - count = 0; - this->edges = edges; - if (start + 1 == end) { - left = right = nullptr; - idx = start; - count = 1; - weight = edges->get_weight(idx); - - } else { - left = new WeightedSampler(); - right = new WeightedSampler(); - left->build_one(edges, start, start + (end - start) / 2); - right->build_one(edges, start + (end - start) / 2, end); - weight = left->weight + right->weight; - count = left->count + right->count; - } -} -std::vector WeightedSampler::sample_k(int k) { - if (k > count) { - k = count; - } - std::vector sample_result; - float subtract; - std::unordered_map subtract_weight_map; - std::unordered_map subtract_count_map; - struct timespec tn; - clock_gettime(CLOCK_REALTIME, &tn); - srand(tn.tv_nsec); - while (k--) { - float query_weight = rand() % 100000 / 100000.0; - query_weight *= weight - subtract_weight_map[this]; - sample_result.push_back(sample(query_weight, subtract_weight_map, - subtract_count_map, subtract)); - } - return sample_result; -} - -int WeightedSampler::sample( - float query_weight, - std::unordered_map &subtract_weight_map, - std::unordered_map &subtract_count_map, - float &subtract) { - if (left == nullptr) { - subtract_weight_map[this] = weight; - subtract = weight; - subtract_count_map[this] = 1; - return idx; - } - int left_count = left->count - subtract_count_map[left]; - int right_count = right->count - subtract_count_map[right]; - float left_subtract = subtract_weight_map[left]; - int return_idx; - if (right_count == 0 || - left_count > 0 && left->weight - left_subtract >= query_weight) { - return_idx = left->sample(query_weight, subtract_weight_map, - subtract_count_map, subtract); - } else { - return_idx = - right->sample(query_weight - (left->weight - left_subtract), - subtract_weight_map, subtract_count_map, subtract); - } - subtract_weight_map[this] += subtract; - subtract_count_map[this]++; - return return_idx; -} -} -} diff --git a/paddle/fluid/distributed/table/graph_weighted_sampler.h b/paddle/fluid/distributed/table/graph_weighted_sampler.h deleted file mode 100644 index cfc341d27c6b766fcee57e8973a4353d4fe93b4e..0000000000000000000000000000000000000000 --- a/paddle/fluid/distributed/table/graph_weighted_sampler.h +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include -#include "paddle/fluid/distributed/table/graph_edge.h" -namespace paddle { -namespace distributed { - -class Sampler { - public: - virtual ~Sampler() {} - virtual void build(GraphEdgeBlob *edges) = 0; - virtual std::vector sample_k(int k) = 0; -}; - -class RandomSampler : public Sampler { - public: - virtual ~RandomSampler() {} - virtual void build(GraphEdgeBlob *edges); - virtual std::vector sample_k(int k); - GraphEdgeBlob *edges; -}; - -class WeightedSampler : public Sampler { - public: - WeightedSampler(); - virtual ~WeightedSampler(); - WeightedSampler *left, *right; - float weight; - int count; - int idx; - GraphEdgeBlob *edges; - virtual void build(GraphEdgeBlob *edges); - virtual void build_one(WeightedGraphEdgeBlob *edges, int start, int end); - virtual std::vector sample_k(int k); - - private: - int sample(float query_weight, - std::unordered_map &subtract_weight_map, - std::unordered_map &subtract_count_map, - float &subtract); -}; -} -}