graph_gpu_ps_table.h 7.9 KB
Newer Older
S
seemingwang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
16
#include <thrust/host_vector.h>
17

18
#include <chrono>
19

20 21
#include "paddle/fluid/distributed/ps/table/common_graph_table.h"
#include "paddle/fluid/framework/fleet/heter_ps/gpu_graph_node.h"
L
lxsbupt 已提交
22
#include "paddle/fluid/framework/fleet/heter_ps/heter_comm.h"
S
seemingwang 已提交
23
#include "paddle/fluid/framework/fleet/heter_ps/heter_comm_kernel.h"
S
seemingwang 已提交
24 25
#include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_HETERPS
D
danleifeng 已提交
26 27 28

DECLARE_double(gpugraph_hbm_table_load_factor);

S
seemingwang 已提交
29 30
namespace paddle {
namespace framework {
D
danleifeng 已提交
31
enum GraphTableType { EDGE_TABLE, FEATURE_TABLE };
D
danleifeng 已提交
32
class GpuPsGraphTable
D
danleifeng 已提交
33
    : public HeterComm<uint64_t, uint64_t, int, CommonFeatureValueAccessor> {
S
seemingwang 已提交
34
 public:
D
danleifeng 已提交
35 36 37 38 39 40 41 42
  int get_table_offset(int gpu_id, GraphTableType type, int idx) const {
    int type_id = type;
    return gpu_id * (graph_table_num_ + feature_table_num_) +
           type_id * graph_table_num_ + idx;
  }
  GpuPsGraphTable(std::shared_ptr<HeterPsResource> resource,
                  int graph_table_num)
      : HeterComm<uint64_t, uint64_t, int, CommonFeatureValueAccessor>(
L
lxsbupt 已提交
43
            0, resource) {
D
danleifeng 已提交
44
    load_factor_ = FLAGS_gpugraph_hbm_table_load_factor;
L
lxsbupt 已提交
45 46
    VLOG(0) << "load_factor = " << load_factor_
            << ", graph_table_num = " << graph_table_num;
D
danleifeng 已提交
47

48
    rw_lock.reset(new pthread_rwlock_t());
D
danleifeng 已提交
49 50
    this->graph_table_num_ = graph_table_num;
    this->feature_table_num_ = 1;
S
seemingwang 已提交
51
    gpu_num = resource_->total_device();
S
seemingwang 已提交
52
    memset(global_device_map, -1, sizeof(global_device_map));
L
lxsbupt 已提交
53

D
danleifeng 已提交
54
    tables_ = std::vector<Table *>(
L
lxsbupt 已提交
55
        gpu_num * (graph_table_num_ + feature_table_num_), NULL);
56
    for (int i = 0; i < gpu_num; i++) {
S
seemingwang 已提交
57
      global_device_map[resource_->dev_id(i)] = i;
L
lxsbupt 已提交
58
      for (int j = 0; j < graph_table_num_; j++) {
D
danleifeng 已提交
59 60
        gpu_graph_list_.push_back(GpuPsCommGraph());
      }
L
lxsbupt 已提交
61
      for (int j = 0; j < feature_table_num_; j++) {
D
danleifeng 已提交
62 63
        gpu_graph_fea_list_.push_back(GpuPsCommGraphFea());
      }
64
    }
65
    cpu_table_status = -1;
L
lxsbupt 已提交
66 67 68 69 70 71 72 73
    device_mutex_.resize(gpu_num);
    for (int i = 0; i < gpu_num; i++) {
      device_mutex_[i] = new std::mutex();
    }
  }
  ~GpuPsGraphTable() {
    for (size_t i = 0; i < device_mutex_.size(); ++i) {
      delete device_mutex_[i];
74
    }
L
lxsbupt 已提交
75
    device_mutex_.clear();
76
  }
D
danleifeng 已提交
77 78 79 80
  void build_graph_on_single_gpu(const GpuPsCommGraph &g, int gpu_id, int idx);
  void build_graph_fea_on_single_gpu(const GpuPsCommGraphFea &g, int gpu_id);
  void clear_graph_info(int gpu_id, int index);
  void clear_graph_info(int index);
L
lxsbupt 已提交
81
  void reset_feature_info(int gpu_id, size_t capacity, size_t feature_size);
D
danleifeng 已提交
82 83 84 85 86 87
  void clear_feature_info(int gpu_id, int index);
  void clear_feature_info(int index);
  void build_graph_from_cpu(const std::vector<GpuPsCommGraph> &cpu_node_list,
                            int idx);
  void build_graph_fea_from_cpu(
      const std::vector<GpuPsCommGraphFea> &cpu_node_list, int idx);
88 89
  NodeQueryResult graph_node_sample(int gpu_id, int sample_size);
  NeighborSampleResult graph_neighbor_sample_v3(NeighborSampleQuery q,
L
lxsbupt 已提交
90 91
                                                bool cpu_switch,
                                                bool compress);
92
  NeighborSampleResult graph_neighbor_sample(int gpu_id,
D
danleifeng 已提交
93
                                             uint64_t *key,
94 95 96
                                             int sample_size,
                                             int len);
  NeighborSampleResult graph_neighbor_sample_v2(int gpu_id,
D
danleifeng 已提交
97 98
                                                int idx,
                                                uint64_t *key,
99 100
                                                int sample_size,
                                                int len,
L
lxsbupt 已提交
101 102 103 104 105 106 107 108 109 110 111
                                                bool cpu_query_switch,
                                                bool compress);
  NeighborSampleResultV2 graph_neighbor_sample_all_edge_type(
      int gpu_id,
      int edge_type_len,
      uint64_t *key,
      int sample_size,
      int len,
      std::vector<std::shared_ptr<phi::Allocation>> edge_type_graphs);
  std::vector<std::shared_ptr<phi::Allocation>> get_edge_type_graph(
      int gpu_id, int edge_type_len);
112 113 114 115 116
  void get_node_degree(int gpu_id,
                       int edge_idx,
                       uint64_t *key,
                       int len,
                       std::shared_ptr<phi::Allocation> node_degree);
L
lxsbupt 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
  int get_feature_of_nodes(int gpu_id,
                           uint64_t *d_walk,
                           uint64_t *d_offset,
                           int size,
                           int slot_num,
                           int *d_slot_feature_num_map,
                           int fea_num_per_node);
  int get_feature_info_of_nodes(
      int gpu_id,
      uint64_t *d_nodes,
      int node_num,
      uint32_t *size_list,
      uint32_t *size_list_prefix_sum,
      std::shared_ptr<phi::Allocation> &feature_list,  // NOLINT
      std::shared_ptr<phi::Allocation> &slot_list);    // NOLINT
D
danleifeng 已提交
132 133 134 135 136

  NodeQueryResult query_node_list(int gpu_id,
                                  int idx,
                                  int start,
                                  int query_size);
S
seemingwang 已提交
137
  void display_sample_res(void *key, void *val, int len, int sample_len);
D
danleifeng 已提交
138 139 140 141 142 143 144
  void move_result_to_source_gpu(int gpu_id,
                                 int gpu_num,
                                 int sample_size,
                                 int *h_left,
                                 int *h_right,
                                 uint64_t *src_sample_res,
                                 int *actual_sample_size);
L
lxsbupt 已提交
145 146 147 148 149 150 151 152 153
  void move_result_to_source_gpu(int start_index,
                                 int gpu_num,
                                 int *h_left,
                                 int *h_right,
                                 int *fea_left,
                                 uint32_t *fea_num_list,
                                 uint32_t *actual_feature_size,
                                 uint64_t *feature_list,
                                 uint8_t *slot_list);
154 155
  void move_degree_to_source_gpu(
      int gpu_id, int gpu_num, int *h_left, int *h_right, int *node_degree);
L
lxsbupt 已提交
156 157 158 159 160 161 162 163 164 165 166 167 168 169
  void move_result_to_source_gpu_all_edge_type(int gpu_id,
                                               int gpu_num,
                                               int sample_size,
                                               int *h_left,
                                               int *h_right,
                                               uint64_t *src_sample_res,
                                               int *actual_sample_size,
                                               int edge_type_len,
                                               int len);
  int init_cpu_table(const paddle::distributed::GraphParameter &graph,
                     int gpu_num = 8);
  gpuStream_t get_local_stream(int gpu_id) {
    return resource_->local_stream(gpu_id, 0);
  }
D
danleifeng 已提交
170

171
  int gpu_num;
D
danleifeng 已提交
172 173 174
  int graph_table_num_, feature_table_num_;
  std::vector<GpuPsCommGraph> gpu_graph_list_;
  std::vector<GpuPsCommGraphFea> gpu_graph_fea_list_;
S
seemingwang 已提交
175
  int global_device_map[32];
176 177
  const int parallel_sample_size = 1;
  const int dim_y = 256;
D
danleifeng 已提交
178
  std::shared_ptr<paddle::distributed::GraphTable> cpu_graph_table_;
179 180
  std::shared_ptr<pthread_rwlock_t> rw_lock;
  mutable std::mutex mutex_;
L
lxsbupt 已提交
181
  std::vector<std::mutex *> device_mutex_;
182 183
  std::condition_variable cv_;
  int cpu_table_status;
S
seemingwang 已提交
184
};
L
lxsbupt 已提交
185 186

};  // namespace framework
187
};  // namespace paddle
S
seemingwang 已提交
188
#endif