npu_info.h 6.4 KB
Newer Older
1
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#ifdef PADDLE_WITH_ASCEND_CL
#include <stddef.h>

#include <string>
#include <vector>

#include "acl/acl.h"
24
#include "paddle/fluid/platform/device/npu/enforce_npu.h"
25
#include "paddle/phi/backends/npu/npu_info.h"
26 27 28 29 30 31 32 33 34

namespace paddle {
namespace platform {

//! Get the total number of NPU devices in system.
int GetNPUDeviceCount();

//! Get the runtime version of the ith NPU
std::string GetNPURuntimeVersion(int id);
35

36 37 38 39 40 41
//! Check if this device can access peer or not.
int NPUCanAccessPeer(int src, int dst);

//! Get the current NPU device id in system.
int GetCurrentNPUDeviceId();

42 43 44
//! Get the current NPU context.
void GetCurrentNPUContext(aclrtContext *context);

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
//! Get the current NPU stream.
int GetCurrentStream();

//! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedNPUDevices();

//! Set the NPU device id for next execution.
void SetNPUDeviceId(int device_id);

//! Reset the NPU device id for next execution.
void ResetNPUDeviceId(int device_id);

//! Get the memory usage of current NPU device.
void NPUMemoryUsage(size_t *available, size_t *total);

//! Get the available memory to allocate, which is the size of available npu
//! minus reserving.
size_t NPUAvailableMemToAlloc();

//! Get the maximum allocation size of current NPU device.
size_t NPUMaxAllocSize();

//! Get the initial allocation size of current NPU device.
size_t NPUInitAllocSize();

//! Get the re-allocation size of current NPU device.
size_t NPUReallocSize();

73
using phi::backends::npu::NPUMinChunkSize;
74 75 76 77 78

//! Get the maximum chunk size for NPU buddy allocator.
size_t NPUMaxChunkSize();

//! Copy memory from address src to dst asynchronously.
79 80 81 82 83
void NPUMemcpyAsync(void *dst,
                    const void *src,
                    size_t count,
                    enum aclrtMemcpyKind kind,
                    aclrtStream stream,
84 85 86
                    size_t dst_max_count = 0);

//! Copy memory from address src to dst synchronously.
87 88 89 90 91
void NPUMemcpySync(void *dst,
                   const void *src,
                   size_t count,
                   enum aclrtMemcpyKind kind,
                   size_t dst_max_count = 0);
92

93 94 95
//! Set memory dst with value count size synchronously.
void NPUMemsetSync(void *dst, int value, size_t count, size_t max_count = 0);

96
//! Set memory dst with value count size asynchronously
97 98 99 100
void NPUMemsetAsync(void *dst,
                    int value,
                    size_t count,
                    aclrtStream stream,
101 102 103
                    size_t max_count = 0);

//! Copy memory from one device to another device asynchronously.
104 105 106 107 108 109
void NPUMemcpyPeerAsync(void *dst,
                        int dst_device,
                        const void *src,
                        int src_device,
                        size_t count,
                        aclrtStream stream,
110 111 112
                        size_t max_count = 0);

//! Copy memory from one device to another device synchronously.
113 114 115 116 117 118
void NPUMemcpyPeerSync(void *dst,
                       int dst_device,
                       const void *src,
                       int src_device,
                       size_t count,
                       size_t max_count = 0);
119

120 121 122
//! Create NPU stream.
void NPUStreamCreate(aclrtStream *stream);

123 124 125
//! Blocks until stream has completed all operations.
void NPUStreamSync(aclrtStream stream);

126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
//! Destroy NPU stream.
void NPUStreamDestroy(aclrtStream stream);

//! Create NPU Event.
void NPUEventCreate(aclrtEvent *event);

//! Destroy NPU Event.
void NPUEventDestroy(aclrtEvent event);

//! Query NPU event status.
void NPUEventQuery(aclrtEvent event, aclrtEventStatus *status);

//! Record NPU event in the stream.
void NPUEventRecord(aclrtEvent event, aclrtStream stream);

141 142 143
//! Synchronize NPU event.
void NPUEventSynchronize(aclrtEvent event);

144 145 146 147 148 149 150 151 152
//! Makes a stream wait on an event.
void NPUStreamWaitEvent(aclrtStream stream, aclrtEvent event);

//! Alloc host or device memory.
aclError NPUHostMalloc(void **ptr, size_t size);

//! Frees host or device memory.
aclError NPUHostFree(void *ptr);

153 154 155 156 157 158 159
//! aclrtMalloc with recorded info
aclError RecordedNPUMalloc(void **ptr, size_t size, int dev_id);

//! aclrtFree with recorded info
void RecordedNPUFree(void *p, size_t size, int dev_id);

//! Get available and total gpu memory with considering limitation
160 161 162 163 164
bool RecordedNPUMemGetInfo(size_t *avail,
                           size_t *total,
                           size_t *actual_avail,
                           size_t *actual_total,
                           int dev_id);
165 166 167 168 169 170

//! Get recorded actrtMalloc size. If record is disabled, return 0.
uint64_t RecordedNPUMallocSize(int dev_id);

bool IsNPUMallocRecorded(int dev_id);

171
//! Adds a callback function executed on the host or device to the stream.
172 173 174 175
void NPULaunchCallback(aclrtCallback fn,
                       void *userData,
                       aclrtCallbackBlockType blockType,
                       aclrtStream stream);
176

177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
class NPUDeviceGuard {
 public:
  explicit inline NPUDeviceGuard(int dev_id) {
    int prev_id = platform::GetCurrentNPUDeviceId();
    if (prev_id != dev_id) {
      prev_id_ = prev_id;
      platform::SetNPUDeviceId(dev_id);
    }
  }

  inline ~NPUDeviceGuard() {
    if (prev_id_ != -1) {
      platform::SetNPUDeviceId(prev_id_);
    }
  }

  NPUDeviceGuard(const NPUDeviceGuard &o) = delete;
  NPUDeviceGuard &operator=(const NPUDeviceGuard &o) = delete;

 private:
  int prev_id_{-1};
};

class AclInstance {
 public:
  // NOTE(zhiiu): Commonly, exception in destructor is not recommended, so
  // no PADDLE_ENFORCE here, call acl API directly.
  ~AclInstance();
  AclInstance(const AclInstance &o) = delete;
  const AclInstance &operator=(const AclInstance &o) = delete;
  static AclInstance &Instance();
  void Finalize();

 private:
  // forbid calling default constructor
  AclInstance();
  std::vector<int> devices_;
};

}  // namespace platform
}  // namespace paddle

#endif