hl_cuda.h 8.2 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Z
zhangjinchao01 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#ifndef HL_CUDA_H_
#define HL_CUDA_H_

#include <string>
L
liaogang 已提交
19
#include "hl_base.h"
Z
zhangjinchao01 已提交
20 21 22 23

/**
 * @brief   HPPL event.
 */
24
typedef struct _hl_event_st *hl_event_t;
Z
zhangjinchao01 已提交
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42

/**
 * @brief return cuda runtime api version.
 */
extern int hl_get_cuda_lib_version();

/**
 * @brief   HPPL strat(Initialize all GPU).
 */
extern void hl_start();

/**
 * @brief   HPPL start(Initialize the specific GPU).
 *
 * @param[in]   device  device id(0, 1......).
 *                      if device is NULL, will start all GPU.
 * @param[in]   number  number of devices.
 */
43
extern void hl_specify_devices_start(int *device, int number);
Z
zhangjinchao01 已提交
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126

/**
 * @brief   Queries if a device may directly access a peer device's memory.
 *
 * @param[in]   device      Device from which allocations on peerDevice are
 *                          to be directly accessed.
 * @param[in]   peerDevice  Device on which the allocations to be directly
 *                          accessed by device reside.
 *
 * @return  Returns true if device is capable of directly accessing memory
 *          from peerDevice and false otherwise.
 */
bool hl_device_can_access_peer(int device, int peerDevice);

/**
 * @brief   Enables direct access to memory allocations on a peer device.
 *
 * @param[in]   peerDevice  Peer device to enable direct access to from the
 *                          current device
 */
void hl_device_enable_peer_access(int peerDevice);

/**
 * @brief   Init a work thread.
 *
 * @param[in]   device  device id.
 */
extern void hl_init(int device);

/**
 * @brief   Finish a work thread.
 */
extern void hl_fini();

/**
 * @brief   Set synchronous/asynchronous flag.
 *
 * @param[in]   flag    true(default), set synchronous flag.
 *                      false, set asynchronous flag.
 *
 *
 * @note    This setting is only valid for the current worker thread.
 */
extern void hl_set_sync_flag(bool flag);

/**
 * @brief   Get synchronous/asynchronous flag.
 *
 * @return  Synchronous call true.
 *          Asynchronous call false.
 *
 */
extern bool hl_get_sync_flag();

/**
 * @brief   Returns the number of compute-capable devices.
 *
 */
extern int hl_get_device_count();

/**
 * @brief   Set device to be used.
 *
 * @param[in]   device  device id.
 *
 */
extern void hl_set_device(int device);

/**
 * @brief   Returns which device is currently being used.
 *
 * @return  device  device id.
 *
 */
extern int hl_get_device();

/**
 * @brief   Allocate device memory.
 *
 * @param[in]   size     size in bytes to copy.
 *
 * @return      dest_d   pointer to device memory.
 */
127
extern void *hl_malloc_device(size_t size);
Z
zhangjinchao01 已提交
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143

/**
 * @brief   Free device memory.
 *
 * @param[in]   dest_d  pointer to device memory.
 *
 */
extern void hl_free_mem_device(void *dest_d);

/**
 * @brief   Allocate host page-lock memory.
 *
 * @param[in]   size     size in bytes to copy.
 *
 * @return      dest_h   pointer to host memory.
 */
144
extern void *hl_malloc_host(size_t size);
Z
zhangjinchao01 已提交
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228

/**
 * @brief   Free host page-lock memory.
 *
 * @param[in]   dest_h  pointer to host memory.
 *
 */
extern void hl_free_mem_host(void *dest_h);

/**
 * @brief   Copy data.
 *
 * @param[in]   dst     dst memory address(host or device).
 * @param[in]   src     src memory address(host or device).
 * @param[in]   size    size in bytes to copy.
 *
 */
extern void hl_memcpy(void *dst, void *src, size_t size);

/**
 * @brief   Set device memory to a value.
 *
 * @param[in]   dest_d  pointer to device memory.
 * @param[in]   value   value to set for each byte of specified memory.
 * @param[in]   size    size in bytes to set.
 *
 */
extern void hl_memset_device(void *dest_d, int value, size_t size);

/**
 * @brief   Copy host memory to device memory.
 *
 * @param[in]   dest_d  dst memory address.
 * @param[in]   src_h   src memory address.
 * @param[in]   size    size in bytes to copy.
 *
 */
extern void hl_memcpy_host2device(void *dest_d, void *src_h, size_t size);

/**
 * @brief   Copy device memory to host memory.
 *
 * @param[in]   dest_h  dst memory address.
 * @param[in]   src_d   src memory address.
 * @param[in]   size    size in bytes to copy.
 *
 */
extern void hl_memcpy_device2host(void *dest_h, void *src_d, size_t size);

/**
 * @brief   Copy device memory to device memory.
 *
 * @param[in]   dest_d  dst memory address.
 * @param[in]   src_d   src memory address.
 * @param[in]   size    size in bytes to copy.
 *
 */
extern void hl_memcpy_device2device(void *dest_d, void *src_d, size_t size);

/**
 * @brief   Generate uniformly distributed floats (0, 1.0].
 *
 * @param[in]   dest_d  pointer to device memory to store results.
 * @param[in]   num     number of floats to generate.
 *
 */
extern void hl_rand(real *dest_d, size_t num);

/**
 * @brief   Set the seed value of the random number generator.
 *
 * @param[in]   seed    seed value.
 */
extern void hl_srand(unsigned int seed);

/**
 * @brief   Copy data.
 *
 * @param[in]   dst     dst memory address(host or device).
 * @param[in]   src     src memory address(host or device).
 * @param[in]   size    size in bytes to copy.
 * @param[in]   stream  stream id.
 */
extern void hl_memcpy_async(void *dst,
229 230 231
                            void *src,
                            size_t size,
                            hl_stream_t stream);
Z
zhangjinchao01 已提交
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261

/**
 * @brief   Waits for stream tasks to complete.
 *
 * @param[in]   stream  stream id.
 */
extern void hl_stream_synchronize(hl_stream_t stream);

/**
 * @brief   Creates an event object.
 *
 * @param[out]   event  New event.
 */
extern void hl_create_event(hl_event_t *event);

/**
 * @brief   Destroys an event object.
 *
 * @param[in]   event   Event to destroy.
 */
extern void hl_destroy_event(hl_event_t event);

/**
 * @brief   Computes the elapsed time between events.
 *
 * @param[in]   start  Starting event.
 * @param[in]   end    Ending event.
 *
 * @return      time   Time between start and end in ms.
 */
262
extern float hl_event_elapsed_time(hl_event_t start, hl_event_t end);
Z
zhangjinchao01 已提交
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299

/**
 * @brief   Records an event.
 *
 * @param[in]   stream   Stream in which to insert event.
 * @param[in]   event    Event waiting to be recorded as completed.
 *
 */
extern void hl_stream_record_event(hl_stream_t stream, hl_event_t event);

/**
 * @brief   Make a compute stream wait on an event.
 *
 * @param[in]   stream   Stream in which to insert event.
 * @param[in]   event    Event to wait on.
 *
 */
extern void hl_stream_wait_event(hl_stream_t stream, hl_event_t event);

/**
 * @brief   Wait for an event to complete.
 *
 * @param[in]   event       event to wait for.
 *
 */
extern void hl_event_synchronize(hl_event_t event);

/**
 * @brief   Sets block flags to be used for device executions.
 *
 * @note    This interface needs to be called before hl_start.
 */
extern void hl_set_device_flags_block();

/**
 * @brief   Returns the last error string from a cuda runtime call.
 */
300
extern const char *hl_get_device_error_string();
Z
zhangjinchao01 已提交
301 302 303 304 305 306 307 308

/**
 * @brief     Returns the last error string from a cuda runtime call.
 *
 * @param[in] err  error number.
 *
 * @see       hl_get_device_last_error()
 */
309
extern const char *hl_get_device_error_string(size_t err);
Z
zhangjinchao01 已提交
310 311 312 313 314 315 316 317 318 319 320

/**
 * @brief   Returns the last error number.
 *
 * @return  error number.
 *
 * @see     hl_get_device_error_string()
 */
extern int hl_get_device_last_error();

/**
L
liaogang 已提交
321
 * @brief   check cuda event is ready
Z
zhangjinchao01 已提交
322
 *
L
liaogang 已提交
323 324 325 326
 * @param[in]  event        cuda event to query.
 *
 * @return     true    cuda event is ready.
 *             false   cuda event is not ready.
Z
zhangjinchao01 已提交
327
 */
L
liaogang 已提交
328
extern bool hl_cuda_event_is_ready(hl_event_t event);
Z
zhangjinchao01 已提交
329 330 331 332 333 334

/**
 * @brief   hppl device synchronization.
 */
extern void hl_device_synchronize();

L
liaogang 已提交
335 336 337 338 339 340 341 342 343 344
/**
 * @brief   gpu profiler start
 */
extern void hl_profiler_start();

/**
 * @brief   gpu profiler stop
 */
extern void hl_profiler_end();

Z
zhangjinchao01 已提交
345
#endif  // HL_CUDA_H_