device_base.h 9.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
16 17
#include <vector>

18
#include "paddle/phi/backends/c_comm_lib.h"
19 20
#include "paddle/phi/backends/event.h"
#include "paddle/phi/backends/stream.h"
21

22
namespace phi {
23

24
class TraceEventCollector;
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71

class DeviceInterface {  // Driver / Runtime
 public:
  DeviceInterface(const std::string& type, uint8_t priority, bool is_custom)
      : type_(type), priority_(priority), is_custom_(is_custom) {}
  uint8_t Priority() { return priority_; }
  std::string Type() { return type_; }
  bool IsCustom() { return is_custom_; }

  virtual ~DeviceInterface() {}

  // Info
  virtual size_t GetComputeCapability();

  virtual size_t GetRuntimeVersion();

  virtual size_t GetDriverVersion();

  // Platform
  //! Initialize
  virtual void Initialize();

  //! Finalize
  virtual void Finalize();

  // Device
  virtual size_t GetDeviceCount() = 0;
  virtual std::vector<size_t> GetDeviceList() = 0;

  //! Wait for compute device to finish.
  virtual void SynchronizeDevice(size_t dev_id);

  //! Initialize device.
  virtual void InitDevice(size_t dev_id);

  //! Deinitialize device.
  virtual void DeInitDevice(size_t dev_id);

  // ! Set device to be used.
  virtual void SetDevice(size_t dev_id);

  // ! Returns which device is currently being used.
  virtual int GetDevice();

  // Stream
  // ! Create an asynchronous stream
  virtual void CreateStream(
72 73
      size_t dev_id,
      stream::Stream* stream,
74 75 76 77 78 79 80 81 82 83 84 85 86 87
      const stream::Stream::Priority& priority =
          stream::Stream::Priority::kNormal,
      const stream::Stream::Flag& flag = stream::Stream::Flag::kDefaultFlag);

  // ! Destroys an asynchronous stream.
  virtual void DestroyStream(size_t dev_id, stream::Stream* stream);

  // ! Waits for stream tasks to complete.
  virtual void SynchronizeStream(size_t dev_id, const stream::Stream* stream);

  // ! Queries an asynchronous stream for completion status.
  virtual bool QueryStream(size_t dev_id, const stream::Stream* stream);

  // ! Add a callback to a compute stream.
88 89
  virtual void AddCallback(size_t dev_id,
                           stream::Stream* stream,
90 91 92 93
                           stream::Stream::Callback* callback);

  // Event
  // ! Create an event.
94 95 96 97
  virtual void CreateEvent(
      size_t dev_id,
      event::Event* event,
      event::Event::Flag flags = event::Event::Flag::Default);
98 99 100 101 102

  // ! Destroy an event.
  virtual void DestroyEvent(size_t dev_id, event::Event* event);

  // ! Records an event.
103 104
  virtual void RecordEvent(size_t dev_id,
                           const event::Event* event,
105 106 107 108 109 110 111 112
                           const stream::Stream* stream);

  // ! Waits for event to complete.
  virtual void SynchronizeEvent(size_t dev_id, const event::Event* event);
  // ! Queries an event for completion status.
  virtual bool QueryEvent(size_t dev_id, const event::Event* event);

  // ! Make a compute stream wait on an event
113 114
  virtual void StreamWaitEvent(size_t dev_id,
                               const stream::Stream* stream,
115 116 117
                               const event::Event* event);

  // Memory
118 119 120
  virtual void MemoryCopyH2D(size_t dev_id,
                             void* dst,
                             const void* src,
121 122 123
                             size_t size,
                             const stream::Stream* stream = nullptr);

124 125 126
  virtual void MemoryCopyD2H(size_t dev_id,
                             void* dst,
                             const void* src,
127 128 129
                             size_t size,
                             const stream::Stream* stream = nullptr);

130 131 132
  virtual void MemoryCopyD2D(size_t dev_id,
                             void* dst,
                             const void* src,
133 134 135
                             size_t size,
                             const stream::Stream* stream = nullptr);

136 137 138 139 140
  virtual void MemoryCopyP2P(const Place& dst_place,
                             void* dst,
                             size_t src_id,
                             const void* src,
                             size_t size,
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
                             const stream::Stream* stream = nullptr);

  virtual void* MemoryAllocate(size_t dev_id, size_t size);

  virtual void MemoryDeallocate(size_t dev_id, void* ptr, size_t size);

  virtual void* MemoryAllocateHost(size_t dev_id, size_t size);

  virtual void MemoryDeallocateHost(size_t dev_id, void* ptr, size_t size);

  virtual void* MemoryAllocateUnified(size_t dev_id, size_t size);

  virtual void MemoryDeallocateUnified(size_t dev_id, void* ptr, size_t size);

  virtual void MemorySet(size_t dev_id, void* ptr, uint8_t value, size_t size);

  virtual void MemoryStats(size_t dev_id, size_t* total, size_t* free);

  virtual size_t GetMinChunkSize(size_t dev_id);

  virtual size_t GetInitAllocSize(size_t dev_id);

  virtual size_t GetReallocSize(size_t dev_id);

  virtual size_t GetMaxAllocSize(size_t dev_id);

  virtual size_t GetMaxChunkSize(size_t dev_id);

  virtual size_t GetExtraPaddingSize(size_t dev_id);

171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
  // CCL
  virtual void CCLDestroyComm(ccl::CCLComm ccl_comm);

  virtual void CCLCommInitRank(size_t num_ranks,
                               ccl::CCLRootId* root_id,
                               size_t rank_id,
                               ccl::CCLComm* ccl_comm);

  virtual void CCLGetUniqueId(ccl::CCLRootId* root_id);

  virtual void CCLBroadcast(void* data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            size_t root,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);

  virtual void CCLAllReduce(void* in_data,
                            void* out_data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            ccl::CCLReduceOp reduce_op,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);
  virtual void CCLReduce(void* in_data,
                         void* out_data,
                         size_t num,
                         ccl::CCLDataType data_type,
                         ccl::CCLReduceOp reduce_op,
200
                         size_t root_id,
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
                         const ccl::CCLComm& ccl_comm,
                         const stream::Stream& stream);
  virtual void CCLAllGather(void* in_data,
                            void* out_data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);
  virtual void CCLReduceScatter(void* in_data,
                                void* out_data,
                                size_t num,
                                ccl::CCLDataType data_type,
                                ccl::CCLReduceOp op,
                                const ccl::CCLComm& ccl_comm,
                                const stream::Stream& stream);
  virtual void CCLGroupStart();
  virtual void CCLGroupEnd();
  virtual void CCLSend(void* sendbuf,
                       size_t num,
                       ccl::CCLDataType data_type,
                       size_t dst_rank,
                       const ccl::CCLComm& ccl_comm,
                       const stream::Stream& stream);
  virtual void CCLRecv(void* recvbuf,
                       size_t num,
                       ccl::CCLDataType data_type,
                       size_t src_rank,
                       const ccl::CCLComm& ccl_comm,
                       const stream::Stream& stream);

231 232 233 234 235 236 237 238 239 240
  virtual void CCLAllToAll(const void** send_buf,
                           const size_t* send_count,
                           const ccl::CCLDataType* send_dtype,
                           void** recv_buf,
                           const size_t* recv_count,
                           const ccl::CCLDataType* recv_dtype,
                           size_t rank,
                           size_t nranks,
                           const ccl::CCLComm& comm,
                           const stream::Stream& stream);
241 242 243
  // blas
  virtual void BlasAXPBY(size_t dev_id,
                         const stream::Stream& stream,
244
                         phi::DataType dtype,
245 246 247 248 249 250
                         size_t numel,
                         float alpha,
                         void* x,
                         float beta,
                         void* y);

251
  // profiler
252 253
  virtual void ProfilerInitialize(phi::TraceEventCollector* collector,
                                  void** user_data);
254

255 256
  virtual void ProfilerFinalize(phi::TraceEventCollector* collector,
                                void* user_data);
257

258 259
  virtual void ProfilerPrepareTracing(phi::TraceEventCollector* collector,
                                      void* user_data);
260

261 262
  virtual void ProfilerStartTracing(phi::TraceEventCollector* collector,
                                    void* user_data);
263

264 265
  virtual void ProfilerStopTracing(phi::TraceEventCollector* collector,
                                   void* user_data);
266

267 268 269
  virtual void ProfilerCollectTraceData(phi::TraceEventCollector* collector,
                                        uint64_t start_ns,
                                        void* user_data);
270

271 272 273 274 275 276 277 278 279 280
 private:
  const std::string type_;
  const uint8_t priority_;
  const bool is_custom_;

  size_t AllocSize(size_t dev_id, bool realloc);

  size_t AvailableAllocSize(size_t dev_id);
};

281
}  // namespace phi