hl_cuda.h 8.2 KB
Newer Older
Z
zhangjinchao01 已提交

/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */


#ifndef HL_CUDA_H_
#define HL_CUDA_H_

#include "hl_base.h"
#include <string>

/**
 * @brief   HPPL event.
 */
typedef struct _hl_event_st *  hl_event_t;


/**
 * @brief return cuda runtime api version.
 */
extern int hl_get_cuda_lib_version();

/**
 * @brief   HPPL strat(Initialize all GPU).
 */
extern void hl_start();

/**
 * @brief   HPPL start(Initialize the specific GPU).
 *
 * @param[in]   device  device id(0, 1......).
 *                      if device is NULL, will start all GPU.
 * @param[in]   number  number of devices.
 */
extern void hl_specify_devices_start(int* device, int number);

/**
 * @brief   Queries if a device may directly access a peer device's memory.
 *
 * @param[in]   device      Device from which allocations on peerDevice are
 *                          to be directly accessed.
 * @param[in]   peerDevice  Device on which the allocations to be directly
 *                          accessed by device reside.
 *
 * @return  Returns true if device is capable of directly accessing memory
 *          from peerDevice and false otherwise.
 */
bool hl_device_can_access_peer(int device, int peerDevice);

/**
 * @brief   Enables direct access to memory allocations on a peer device.
 *
 * @param[in]   peerDevice  Peer device to enable direct access to from the
 *                          current device
 */
void hl_device_enable_peer_access(int peerDevice);

/**
 * @brief   Init a work thread.
 *
 * @param[in]   device  device id.
 */
extern void hl_init(int device);

/**
 * @brief   Finish a work thread.
 */
extern void hl_fini();

/**
 * @brief   Set synchronous/asynchronous flag.
 *
 * @param[in]   flag    true(default), set synchronous flag.
 *                      false, set asynchronous flag.
 *
 *
 * @note    This setting is only valid for the current worker thread.
 */
extern void hl_set_sync_flag(bool flag);

/**
 * @brief   Get synchronous/asynchronous flag.
 *
 * @return  Synchronous call true.
 *          Asynchronous call false.
 *
 */
extern bool hl_get_sync_flag();

/**
 * @brief   Returns the number of compute-capable devices.
 *
 */
extern int hl_get_device_count();

/**
 * @brief   Set device to be used.
 *
 * @param[in]   device  device id.
 *
 */
extern void hl_set_device(int device);

/**
 * @brief   Returns which device is currently being used.
 *
 * @return  device  device id.
 *
 */
extern int hl_get_device();

/**
 * @brief   Allocate device memory.
 *
 * @param[in]   size     size in bytes to copy.
 *
 * @return      dest_d   pointer to device memory.
 */
extern void* hl_malloc_device(size_t size);

/**
 * @brief   Free device memory.
 *
 * @param[in]   dest_d  pointer to device memory.
 *
 */
extern void hl_free_mem_device(void *dest_d);

/**
 * @brief   Allocate host page-lock memory.
 *
 * @param[in]   size     size in bytes to copy.
 *
 * @return      dest_h   pointer to host memory.
 */
extern void* hl_malloc_host(size_t size);

/**
 * @brief   Free host page-lock memory.
 *
 * @param[in]   dest_h  pointer to host memory.
 *
 */
extern void hl_free_mem_host(void *dest_h);

/**
 * @brief   Copy data.
 *
 * @param[in]   dst     dst memory address(host or device).
 * @param[in]   src     src memory address(host or device).
 * @param[in]   size    size in bytes to copy.
 *
 */
extern void hl_memcpy(void *dst, void *src, size_t size);

/**
 * @brief   Set device memory to a value.
 *
 * @param[in]   dest_d  pointer to device memory.
 * @param[in]   value   value to set for each byte of specified memory.
 * @param[in]   size    size in bytes to set.
 *
 */
extern void hl_memset_device(void *dest_d, int value, size_t size);

/**
 * @brief   Copy host memory to device memory.
 *
 * @param[in]   dest_d  dst memory address.
 * @param[in]   src_h   src memory address.
 * @param[in]   size    size in bytes to copy.
 *
 */
extern void hl_memcpy_host2device(void *dest_d, void *src_h, size_t size);

/**
 * @brief   Copy device memory to host memory.
 *
 * @param[in]   dest_h  dst memory address.
 * @param[in]   src_d   src memory address.
 * @param[in]   size    size in bytes to copy.
 *
 */
extern void hl_memcpy_device2host(void *dest_h, void *src_d, size_t size);

/**
 * @brief   Copy device memory to device memory.
 *
 * @param[in]   dest_d  dst memory address.
 * @param[in]   src_d   src memory address.
 * @param[in]   size    size in bytes to copy.
 *
 */
extern void hl_memcpy_device2device(void *dest_d, void *src_d, size_t size);

/**
 * @brief   Generate uniformly distributed floats (0, 1.0].
 *
 * @param[in]   dest_d  pointer to device memory to store results.
 * @param[in]   num     number of floats to generate.
 *
 */
extern void hl_rand(real *dest_d, size_t num);

/**
 * @brief   Set the seed value of the random number generator.
 *
 * @param[in]   seed    seed value.
 */
extern void hl_srand(unsigned int seed);

/**
 * @brief   Copy data.
 *
 * @param[in]   dst     dst memory address(host or device).
 * @param[in]   src     src memory address(host or device).
 * @param[in]   size    size in bytes to copy.
 * @param[in]   stream  stream id.
 */
extern void hl_memcpy_async(void *dst,
                           void *src,
                           size_t size,
                           hl_stream_t stream);

/**
 * @brief   Waits for stream tasks to complete.
 *
 * @param[in]   stream  stream id.
 */
extern void hl_stream_synchronize(hl_stream_t stream);

/**
 * @brief   Creates an event object.
 *
 * @param[out]   event  New event.
 */
extern void hl_create_event(hl_event_t *event);

/**
 * @brief   Destroys an event object.
 *
 * @param[in]   event   Event to destroy.
 */
extern void hl_destroy_event(hl_event_t event);

/**
 * @brief   Computes the elapsed time between events.
 *
 * @param[in]   start  Starting event.
 * @param[in]   end    Ending event.
 *
 * @return      time   Time between start and end in ms.
 */
extern float hl_event_elapsed_time(hl_event_t start,
                                   hl_event_t end);

/**
 * @brief   Records an event.
 *
 * @param[in]   stream   Stream in which to insert event.
 * @param[in]   event    Event waiting to be recorded as completed.
 *
 */
extern void hl_stream_record_event(hl_stream_t stream, hl_event_t event);

/**
 * @brief   Make a compute stream wait on an event.
 *
 * @param[in]   stream   Stream in which to insert event.
 * @param[in]   event    Event to wait on.
 *
 */
extern void hl_stream_wait_event(hl_stream_t stream, hl_event_t event);

/**
 * @brief   Wait for an event to complete.
 *
 * @param[in]   event       event to wait for.
 *
 */
extern void hl_event_synchronize(hl_event_t event);

/**
 * @brief   Sets block flags to be used for device executions.
 *
 * @note    This interface needs to be called before hl_start.
 */
extern void hl_set_device_flags_block();

/**
 * @brief   Returns the last error string from a cuda runtime call.
 */
extern const char* hl_get_device_error_string();

/**
 * @brief     Returns the last error string from a cuda runtime call.
 *
 * @param[in] err  error number.
 *
 * @see       hl_get_device_last_error()
 */
extern const char* hl_get_device_error_string(size_t err);

/**
 * @brief   Returns the last error number.
 *
 * @return  error number.
 *
 * @see     hl_get_device_error_string()
 */
extern int hl_get_device_last_error();

/**
L
liaogang 已提交
324
 * @brief   check cuda event is ready
Z
zhangjinchao01 已提交
325
 *
L
liaogang 已提交
326 327 328 329
 * @param[in]  event        cuda event to query.
 *
 * @return     true    cuda event is ready.
 *             false   cuda event is not ready.
Z
zhangjinchao01 已提交
330
 */
L
liaogang 已提交
331
extern bool hl_cuda_event_is_ready(hl_event_t event);
Z
zhangjinchao01 已提交
332 333 334 335 336 337

/**
 * @brief   hppl device synchronization.
 */
extern void hl_device_synchronize();

L
liaogang 已提交
338 339 340 341 342 343 344 345 346 347
/**
 * @brief   gpu profiler start
 */
extern void hl_profiler_start();

/**
 * @brief   gpu profiler stop
 */
extern void hl_profiler_end();

Z
zhangjinchao01 已提交
348
#endif  // HL_CUDA_H_