device_base.h 9.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
16 17
#include <vector>

18
#include "paddle/phi/backends/c_comm_lib.h"
19 20
#include "paddle/phi/backends/event.h"
#include "paddle/phi/backends/stream.h"
21

22 23 24 25 26 27
namespace paddle {
namespace platform {
class TraceEventCollector;
}  // namespace platform
}  // namespace paddle

28
namespace phi {
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75

class DeviceInterface {  // Driver / Runtime
 public:
  DeviceInterface(const std::string& type, uint8_t priority, bool is_custom)
      : type_(type), priority_(priority), is_custom_(is_custom) {}
  uint8_t Priority() { return priority_; }
  std::string Type() { return type_; }
  bool IsCustom() { return is_custom_; }

  virtual ~DeviceInterface() {}

  // Info
  virtual size_t GetComputeCapability();

  virtual size_t GetRuntimeVersion();

  virtual size_t GetDriverVersion();

  // Platform
  //! Initialize
  virtual void Initialize();

  //! Finalize
  virtual void Finalize();

  // Device
  virtual size_t GetDeviceCount() = 0;
  virtual std::vector<size_t> GetDeviceList() = 0;

  //! Wait for compute device to finish.
  virtual void SynchronizeDevice(size_t dev_id);

  //! Initialize device.
  virtual void InitDevice(size_t dev_id);

  //! Deinitialize device.
  virtual void DeInitDevice(size_t dev_id);

  // ! Set device to be used.
  virtual void SetDevice(size_t dev_id);

  // ! Returns which device is currently being used.
  virtual int GetDevice();

  // Stream
  // ! Create an asynchronous stream
  virtual void CreateStream(
76 77
      size_t dev_id,
      stream::Stream* stream,
78 79 80 81 82 83 84 85 86 87 88 89 90 91
      const stream::Stream::Priority& priority =
          stream::Stream::Priority::kNormal,
      const stream::Stream::Flag& flag = stream::Stream::Flag::kDefaultFlag);

  // ! Destroys an asynchronous stream.
  virtual void DestroyStream(size_t dev_id, stream::Stream* stream);

  // ! Waits for stream tasks to complete.
  virtual void SynchronizeStream(size_t dev_id, const stream::Stream* stream);

  // ! Queries an asynchronous stream for completion status.
  virtual bool QueryStream(size_t dev_id, const stream::Stream* stream);

  // ! Add a callback to a compute stream.
92 93
  virtual void AddCallback(size_t dev_id,
                           stream::Stream* stream,
94 95 96 97
                           stream::Stream::Callback* callback);

  // Event
  // ! Create an event.
98 99 100 101
  virtual void CreateEvent(
      size_t dev_id,
      event::Event* event,
      event::Event::Flag flags = event::Event::Flag::Default);
102 103 104 105 106

  // ! Destroy an event.
  virtual void DestroyEvent(size_t dev_id, event::Event* event);

  // ! Records an event.
107 108
  virtual void RecordEvent(size_t dev_id,
                           const event::Event* event,
109 110 111 112 113 114 115 116
                           const stream::Stream* stream);

  // ! Waits for event to complete.
  virtual void SynchronizeEvent(size_t dev_id, const event::Event* event);
  // ! Queries an event for completion status.
  virtual bool QueryEvent(size_t dev_id, const event::Event* event);

  // ! Make a compute stream wait on an event
117 118
  virtual void StreamWaitEvent(size_t dev_id,
                               const stream::Stream* stream,
119 120 121
                               const event::Event* event);

  // Memory
122 123 124
  virtual void MemoryCopyH2D(size_t dev_id,
                             void* dst,
                             const void* src,
125 126 127
                             size_t size,
                             const stream::Stream* stream = nullptr);

128 129 130
  virtual void MemoryCopyD2H(size_t dev_id,
                             void* dst,
                             const void* src,
131 132 133
                             size_t size,
                             const stream::Stream* stream = nullptr);

134 135 136
  virtual void MemoryCopyD2D(size_t dev_id,
                             void* dst,
                             const void* src,
137 138 139
                             size_t size,
                             const stream::Stream* stream = nullptr);

140 141 142 143 144
  virtual void MemoryCopyP2P(const Place& dst_place,
                             void* dst,
                             size_t src_id,
                             const void* src,
                             size_t size,
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
                             const stream::Stream* stream = nullptr);

  virtual void* MemoryAllocate(size_t dev_id, size_t size);

  virtual void MemoryDeallocate(size_t dev_id, void* ptr, size_t size);

  virtual void* MemoryAllocateHost(size_t dev_id, size_t size);

  virtual void MemoryDeallocateHost(size_t dev_id, void* ptr, size_t size);

  virtual void* MemoryAllocateUnified(size_t dev_id, size_t size);

  virtual void MemoryDeallocateUnified(size_t dev_id, void* ptr, size_t size);

  virtual void MemorySet(size_t dev_id, void* ptr, uint8_t value, size_t size);

  virtual void MemoryStats(size_t dev_id, size_t* total, size_t* free);

  virtual size_t GetMinChunkSize(size_t dev_id);

  virtual size_t GetInitAllocSize(size_t dev_id);

  virtual size_t GetReallocSize(size_t dev_id);

  virtual size_t GetMaxAllocSize(size_t dev_id);

  virtual size_t GetMaxChunkSize(size_t dev_id);

  virtual size_t GetExtraPaddingSize(size_t dev_id);

175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
  // CCL
  virtual void CCLDestroyComm(ccl::CCLComm ccl_comm);

  virtual void CCLCommInitRank(size_t num_ranks,
                               ccl::CCLRootId* root_id,
                               size_t rank_id,
                               ccl::CCLComm* ccl_comm);

  virtual void CCLGetUniqueId(ccl::CCLRootId* root_id);

  virtual void CCLBroadcast(void* data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            size_t root,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);

  virtual void CCLAllReduce(void* in_data,
                            void* out_data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            ccl::CCLReduceOp reduce_op,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);
  virtual void CCLReduce(void* in_data,
                         void* out_data,
                         size_t num,
                         ccl::CCLDataType data_type,
                         ccl::CCLReduceOp reduce_op,
204
                         size_t root_id,
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
                         const ccl::CCLComm& ccl_comm,
                         const stream::Stream& stream);
  virtual void CCLAllGather(void* in_data,
                            void* out_data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);
  virtual void CCLReduceScatter(void* in_data,
                                void* out_data,
                                size_t num,
                                ccl::CCLDataType data_type,
                                ccl::CCLReduceOp op,
                                const ccl::CCLComm& ccl_comm,
                                const stream::Stream& stream);
  virtual void CCLGroupStart();
  virtual void CCLGroupEnd();
  virtual void CCLSend(void* sendbuf,
                       size_t num,
                       ccl::CCLDataType data_type,
                       size_t dst_rank,
                       const ccl::CCLComm& ccl_comm,
                       const stream::Stream& stream);
  virtual void CCLRecv(void* recvbuf,
                       size_t num,
                       ccl::CCLDataType data_type,
                       size_t src_rank,
                       const ccl::CCLComm& ccl_comm,
                       const stream::Stream& stream);

235 236 237 238 239 240 241 242 243 244
  // blas
  virtual void BlasAXPBY(size_t dev_id,
                         const stream::Stream& stream,
                         paddle::experimental::DataType dtype,
                         size_t numel,
                         float alpha,
                         void* x,
                         float beta,
                         void* y);

245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
  // profiler
  virtual void ProfilerInitialize(
      paddle::platform::TraceEventCollector* collector, void** user_data);

  virtual void ProfilerFinalize(
      paddle::platform::TraceEventCollector* collector, void* user_data);

  virtual void ProfilerPrepareTracing(
      paddle::platform::TraceEventCollector* collector, void* user_data);

  virtual void ProfilerStartTracing(
      paddle::platform::TraceEventCollector* collector, void* user_data);

  virtual void ProfilerStopTracing(
      paddle::platform::TraceEventCollector* collector, void* user_data);

  virtual void ProfilerCollectTraceData(
      paddle::platform::TraceEventCollector* collector,
      uint64_t start_ns,
      void* user_data);

266 267 268 269 270 271 272 273 274 275
 private:
  const std::string type_;
  const uint8_t priority_;
  const bool is_custom_;

  size_t AllocSize(size_t dev_id, bool realloc);

  size_t AvailableAllocSize(size_t dev_id);
};

276
}  // namespace phi