device_base.h 9.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
16 17
#include <vector>

18
#include "paddle/phi/backends/c_comm_lib.h"
19 20
#include "paddle/phi/backends/event.h"
#include "paddle/phi/backends/stream.h"
21

22 23
#include "paddle/phi/api/profiler/trace_event_collector.h"

24 25 26 27 28 29
namespace paddle {
namespace platform {
class TraceEventCollector;
}  // namespace platform
}  // namespace paddle

30
namespace phi {
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77

class DeviceInterface {  // Driver / Runtime
 public:
  DeviceInterface(const std::string& type, uint8_t priority, bool is_custom)
      : type_(type), priority_(priority), is_custom_(is_custom) {}
  uint8_t Priority() { return priority_; }
  std::string Type() { return type_; }
  bool IsCustom() { return is_custom_; }

  virtual ~DeviceInterface() {}

  // Info
  virtual size_t GetComputeCapability();

  virtual size_t GetRuntimeVersion();

  virtual size_t GetDriverVersion();

  // Platform
  //! Initialize
  virtual void Initialize();

  //! Finalize
  virtual void Finalize();

  // Device
  virtual size_t GetDeviceCount() = 0;
  virtual std::vector<size_t> GetDeviceList() = 0;

  //! Wait for compute device to finish.
  virtual void SynchronizeDevice(size_t dev_id);

  //! Initialize device.
  virtual void InitDevice(size_t dev_id);

  //! Deinitialize device.
  virtual void DeInitDevice(size_t dev_id);

  // ! Set device to be used.
  virtual void SetDevice(size_t dev_id);

  // ! Returns which device is currently being used.
  virtual int GetDevice();

  // Stream
  // ! Create an asynchronous stream
  virtual void CreateStream(
78 79
      size_t dev_id,
      stream::Stream* stream,
80 81 82 83 84 85 86 87 88 89 90 91 92 93
      const stream::Stream::Priority& priority =
          stream::Stream::Priority::kNormal,
      const stream::Stream::Flag& flag = stream::Stream::Flag::kDefaultFlag);

  // ! Destroys an asynchronous stream.
  virtual void DestroyStream(size_t dev_id, stream::Stream* stream);

  // ! Waits for stream tasks to complete.
  virtual void SynchronizeStream(size_t dev_id, const stream::Stream* stream);

  // ! Queries an asynchronous stream for completion status.
  virtual bool QueryStream(size_t dev_id, const stream::Stream* stream);

  // ! Add a callback to a compute stream.
94 95
  virtual void AddCallback(size_t dev_id,
                           stream::Stream* stream,
96 97 98 99
                           stream::Stream::Callback* callback);

  // Event
  // ! Create an event.
100 101 102 103
  virtual void CreateEvent(
      size_t dev_id,
      event::Event* event,
      event::Event::Flag flags = event::Event::Flag::Default);
104 105 106 107 108

  // ! Destroy an event.
  virtual void DestroyEvent(size_t dev_id, event::Event* event);

  // ! Records an event.
109 110
  virtual void RecordEvent(size_t dev_id,
                           const event::Event* event,
111 112 113 114 115 116 117 118
                           const stream::Stream* stream);

  // ! Waits for event to complete.
  virtual void SynchronizeEvent(size_t dev_id, const event::Event* event);
  // ! Queries an event for completion status.
  virtual bool QueryEvent(size_t dev_id, const event::Event* event);

  // ! Make a compute stream wait on an event
119 120
  virtual void StreamWaitEvent(size_t dev_id,
                               const stream::Stream* stream,
121 122 123
                               const event::Event* event);

  // Memory
124 125 126
  virtual void MemoryCopyH2D(size_t dev_id,
                             void* dst,
                             const void* src,
127 128 129
                             size_t size,
                             const stream::Stream* stream = nullptr);

130 131 132
  virtual void MemoryCopyD2H(size_t dev_id,
                             void* dst,
                             const void* src,
133 134 135
                             size_t size,
                             const stream::Stream* stream = nullptr);

136 137 138
  virtual void MemoryCopyD2D(size_t dev_id,
                             void* dst,
                             const void* src,
139 140 141
                             size_t size,
                             const stream::Stream* stream = nullptr);

142 143 144 145 146
  virtual void MemoryCopyP2P(const Place& dst_place,
                             void* dst,
                             size_t src_id,
                             const void* src,
                             size_t size,
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
                             const stream::Stream* stream = nullptr);

  virtual void* MemoryAllocate(size_t dev_id, size_t size);

  virtual void MemoryDeallocate(size_t dev_id, void* ptr, size_t size);

  virtual void* MemoryAllocateHost(size_t dev_id, size_t size);

  virtual void MemoryDeallocateHost(size_t dev_id, void* ptr, size_t size);

  virtual void* MemoryAllocateUnified(size_t dev_id, size_t size);

  virtual void MemoryDeallocateUnified(size_t dev_id, void* ptr, size_t size);

  virtual void MemorySet(size_t dev_id, void* ptr, uint8_t value, size_t size);

  virtual void MemoryStats(size_t dev_id, size_t* total, size_t* free);

  virtual size_t GetMinChunkSize(size_t dev_id);

  virtual size_t GetInitAllocSize(size_t dev_id);

  virtual size_t GetReallocSize(size_t dev_id);

  virtual size_t GetMaxAllocSize(size_t dev_id);

  virtual size_t GetMaxChunkSize(size_t dev_id);

  virtual size_t GetExtraPaddingSize(size_t dev_id);

177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
  // CCL
  virtual void CCLDestroyComm(ccl::CCLComm ccl_comm);

  virtual void CCLCommInitRank(size_t num_ranks,
                               ccl::CCLRootId* root_id,
                               size_t rank_id,
                               ccl::CCLComm* ccl_comm);

  virtual void CCLGetUniqueId(ccl::CCLRootId* root_id);

  virtual void CCLBroadcast(void* data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            size_t root,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);

  virtual void CCLAllReduce(void* in_data,
                            void* out_data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            ccl::CCLReduceOp reduce_op,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);
  virtual void CCLReduce(void* in_data,
                         void* out_data,
                         size_t num,
                         ccl::CCLDataType data_type,
                         ccl::CCLReduceOp reduce_op,
206
                         size_t root_id,
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
                         const ccl::CCLComm& ccl_comm,
                         const stream::Stream& stream);
  virtual void CCLAllGather(void* in_data,
                            void* out_data,
                            size_t num,
                            ccl::CCLDataType data_type,
                            const ccl::CCLComm& ccl_comm,
                            const stream::Stream& stream);
  virtual void CCLReduceScatter(void* in_data,
                                void* out_data,
                                size_t num,
                                ccl::CCLDataType data_type,
                                ccl::CCLReduceOp op,
                                const ccl::CCLComm& ccl_comm,
                                const stream::Stream& stream);
  virtual void CCLGroupStart();
  virtual void CCLGroupEnd();
  virtual void CCLSend(void* sendbuf,
                       size_t num,
                       ccl::CCLDataType data_type,
                       size_t dst_rank,
                       const ccl::CCLComm& ccl_comm,
                       const stream::Stream& stream);
  virtual void CCLRecv(void* recvbuf,
                       size_t num,
                       ccl::CCLDataType data_type,
                       size_t src_rank,
                       const ccl::CCLComm& ccl_comm,
                       const stream::Stream& stream);

237 238 239 240 241 242 243 244 245 246
  // blas
  virtual void BlasAXPBY(size_t dev_id,
                         const stream::Stream& stream,
                         paddle::experimental::DataType dtype,
                         size_t numel,
                         float alpha,
                         void* x,
                         float beta,
                         void* y);

247
  // profiler
248 249
  virtual void ProfilerInitialize(phi::TraceEventCollector* collector,
                                  void** user_data);
250

251 252
  virtual void ProfilerFinalize(phi::TraceEventCollector* collector,
                                void* user_data);
253

254 255
  virtual void ProfilerPrepareTracing(phi::TraceEventCollector* collector,
                                      void* user_data);
256

257 258
  virtual void ProfilerStartTracing(phi::TraceEventCollector* collector,
                                    void* user_data);
259

260 261
  virtual void ProfilerStopTracing(phi::TraceEventCollector* collector,
                                   void* user_data);
262

263 264 265
  virtual void ProfilerCollectTraceData(phi::TraceEventCollector* collector,
                                        uint64_t start_ns,
                                        void* user_data);
266

267 268 269 270 271 272 273 274 275 276
 private:
  const std::string type_;
  const uint8_t priority_;
  const bool is_custom_;

  size_t AllocSize(size_t dev_id, bool realloc);

  size_t AvailableAllocSize(size_t dev_id);
};

277
}  // namespace phi