device_base.h 8.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#ifdef PADDLE_WITH_CUSTOM_DEVICE
17 18
#include <vector>

19
#include "paddle/phi/backends/c_comm_lib.h"
20 21
#include "paddle/phi/backends/event.h"
#include "paddle/phi/backends/stream.h"
22

23
namespace phi {
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70

class DeviceInterface {  // Driver / Runtime
 public:
  DeviceInterface(const std::string& type, uint8_t priority, bool is_custom)
      : type_(type), priority_(priority), is_custom_(is_custom) {}
  uint8_t Priority() { return priority_; }
  std::string Type() { return type_; }
  bool IsCustom() { return is_custom_; }

  virtual ~DeviceInterface() {}

  // Info
  virtual size_t GetComputeCapability();

  virtual size_t GetRuntimeVersion();

  virtual size_t GetDriverVersion();

  // Platform
  //! Initialize
  virtual void Initialize();

  //! Finalize
  virtual void Finalize();

  // Device
  virtual size_t GetDeviceCount() = 0;
  virtual std::vector<size_t> GetDeviceList() = 0;

  //! Wait for compute device to finish.
  virtual void SynchronizeDevice(size_t dev_id);

  //! Initialize device.
  virtual void InitDevice(size_t dev_id);

  //! Deinitialize device.
  virtual void DeInitDevice(size_t dev_id);

  // ! Set device to be used.
  virtual void SetDevice(size_t dev_id);

  // ! Returns which device is currently being used.
  virtual int GetDevice();

  // Stream
  // ! Create an asynchronous stream
  virtual void CreateStream(
71 72
      size_t dev_id,
      stream::Stream* stream,
73 74 75 76 77 78 79 80 81 82 83 84 85 86
      const stream::Stream::Priority& priority =
          stream::Stream::Priority::kNormal,
      const stream::Stream::Flag& flag = stream::Stream::Flag::kDefaultFlag);

  // ! Destroys an asynchronous stream.
  virtual void DestroyStream(size_t dev_id, stream::Stream* stream);

  // ! Waits for stream tasks to complete.
  virtual void SynchronizeStream(size_t dev_id, const stream::Stream* stream);

  // ! Queries an asynchronous stream for completion status.
  virtual bool QueryStream(size_t dev_id, const stream::Stream* stream);

  // ! Add a callback to a compute stream.
87 88
  virtual void AddCallback(size_t dev_id,
                           stream::Stream* stream,
89 90 91 92
                           stream::Stream::Callback* callback);

  // Event
  // ! Create an event.
93 94
  virtual void CreateEvent(size_t dev_id,
                           event::Event* event,
95 96 97 98 99 100
                           event::Event::Flag flags);

  // ! Destroy an event.
  virtual void DestroyEvent(size_t dev_id, event::Event* event);

  // ! Records an event.
101 102
  virtual void RecordEvent(size_t dev_id,
                           const event::Event* event,
103 104 105 106 107 108 109 110
                           const stream::Stream* stream);

  // ! Waits for event to complete.
  virtual void SynchronizeEvent(size_t dev_id, const event::Event* event);
  // ! Queries an event for completion status.
  virtual bool QueryEvent(size_t dev_id, const event::Event* event);

  // ! Make a compute stream wait on an event
111 112
  virtual void StreamWaitEvent(size_t dev_id,
                               const stream::Stream* stream,
113 114 115
                               const event::Event* event);

  // Memory
116 117 118
  virtual void MemoryCopyH2D(size_t dev_id,
                             void* dst,
                             const void* src,
119 120 121
                             size_t size,
                             const stream::Stream* stream = nullptr);

122 123 124
  virtual void MemoryCopyD2H(size_t dev_id,
                             void* dst,
                             const void* src,
125 126 127
                             size_t size,
                             const stream::Stream* stream = nullptr);

128 129 130
  virtual void MemoryCopyD2D(size_t dev_id,
                             void* dst,
                             const void* src,
131 132 133
                             size_t size,
                             const stream::Stream* stream = nullptr);

134 135 136 137 138
  virtual void MemoryCopyP2P(const Place& dst_place,
                             void* dst,
                             size_t src_id,
                             const void* src,
                             size_t size,
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
                             const stream::Stream* stream = nullptr);

  virtual void* MemoryAllocate(size_t dev_id, size_t size);

  virtual void MemoryDeallocate(size_t dev_id, void* ptr, size_t size);

  virtual void* MemoryAllocateHost(size_t dev_id, size_t size);

  virtual void MemoryDeallocateHost(size_t dev_id, void* ptr, size_t size);

  virtual void* MemoryAllocateUnified(size_t dev_id, size_t size);

  virtual void MemoryDeallocateUnified(size_t dev_id, void* ptr, size_t size);

  virtual void MemorySet(size_t dev_id, void* ptr, uint8_t value, size_t size);

  virtual void MemoryStats(size_t dev_id, size_t* total, size_t* free);

  virtual size_t GetMinChunkSize(size_t dev_id);

  virtual size_t GetInitAllocSize(size_t dev_id);

  virtual size_t GetReallocSize(size_t dev_id);

  virtual size_t GetMaxAllocSize(size_t dev_id);

  virtual size_t GetMaxChunkSize(size_t dev_id);

  virtual size_t GetExtraPaddingSize(size_t dev_id);

169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
  // CCL
  virtual void CCLDestroyComm(ccl::CCLComm ccl_comm);

  virtual void CCLCommInitRank(size_t num_ranks,
                               ccl::CCLRootId* root_id,
                               size_t rank_id,
                               ccl::CCLComm* ccl_comm);

  virtual void CCLGetUniqueId(ccl::CCLRootId* root_id);

  virtual void CCLBroadcast(void* data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            size_t root,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);

  virtual void CCLAllReduce(void* in_data,
                            void* out_data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            ccl::CCLReduceOp reduce_op,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);
  virtual void CCLReduce(void* in_data,
                         void* out_data,
                         size_t num,
                         ccl::CCLDataType data_type,
                         ccl::CCLReduceOp reduce_op,
198
                         size_t root_id,
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
                         const ccl::CCLComm& ccl_comm,
                         const stream::Stream& stream);
  virtual void CCLAllGather(void* in_data,
                            void* out_data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);
  virtual void CCLReduceScatter(void* in_data,
                                void* out_data,
                                size_t num,
                                ccl::CCLDataType data_type,
                                ccl::CCLReduceOp op,
                                const ccl::CCLComm& ccl_comm,
                                const stream::Stream& stream);
  virtual void CCLGroupStart();
  virtual void CCLGroupEnd();
  virtual void CCLSend(void* sendbuf,
                       size_t num,
                       ccl::CCLDataType data_type,
                       size_t dst_rank,
                       const ccl::CCLComm& ccl_comm,
                       const stream::Stream& stream);
  virtual void CCLRecv(void* recvbuf,
                       size_t num,
                       ccl::CCLDataType data_type,
                       size_t src_rank,
                       const ccl::CCLComm& ccl_comm,
                       const stream::Stream& stream);

229 230 231 232 233 234 235 236 237 238
  // blas
  virtual void BlasAXPBY(size_t dev_id,
                         const stream::Stream& stream,
                         paddle::experimental::DataType dtype,
                         size_t numel,
                         float alpha,
                         void* x,
                         float beta,
                         void* y);

239 240 241 242 243 244 245 246 247 248
 private:
  const std::string type_;
  const uint8_t priority_;
  const bool is_custom_;

  size_t AllocSize(size_t dev_id, bool realloc);

  size_t AvailableAllocSize(size_t dev_id);
};

249
}  // namespace phi
250 251

#endif