Matrix¶
Base Matrix¶
hl_matrix.h¶
Functions
-
void
hl_matrix_add
(real *A_d, real *B_d, real *C_d, int dimM, int dimN, real alpha, real beta)¶ Matrix addition: C_d[i] = alpha * A_d[i] + beta * B_d[i].
- Parameters
A_d
-input matrix (M x N).
B_d
-input matrix (M x N).
C_d
-output matrix (M x N).
dimM
-matrix height.
dimN
-matrix width.
alpha
-scalar used for addition.
beta
-scalar used for addition.
-
void
hl_matrix_softmax
(real *A_d, real *C_d, int dimM, int dimN)¶ Matrix Softmax.
- Parameters
A_d
-input maxtrix (M x N).
C_d
-output matrix (M x N).
dimM
-matrix height.
dimN
-matrix width.
-
void
hl_matrix_softmax_derivative
(real *grad_d, real *output_d, real *sftmaxSum_d, int dimM, int dimN)¶ Matrix softmax derivative.
- Parameters
grad_d
-intput matrix (M x N).
output_d
-output matrix (M x N).
sftmaxSum_d
-softmax sum (M * 1).
dimM
-matrix height.
dimN
-matrix width.
-
void
hl_sequence_softmax_forward
(real *A_d, real *C_d, const int *index, int numSequence)¶ Sequence softmax.
- Parameters
A_d
-input vector.
C_d
-output vector.
index
-start positions of sequence.
numSequence
-sequence number.
-
void
hl_matrix_classification_error
(real *A_d, int *B_d, real *C_d, int dimM, int dimN)¶ Matrix classification error.
- Parameters
A_d
-input matrix (M x N).
B_d
-input vector (M x 1).
C_d
-output vector (M x 1).
dimM
-matrix height.
dimN
-matrix width.
-
void
hl_matrix_cross_entropy
(real *A_d, real *C_d, int *label_d, int dimM, int dimN)¶ Matrix cross entropy.
- Parameters
A_d
-input matrix (M x N).
C_d
-output matrix (M X 1).
label_d
-input matrix (M x 1).
dimM
-matrix height.
dimN
-matrix width.
-
void
hl_matrix_cross_entropy_bp
(real *grad_d, real *output_d, int *label_d, int dimM, int dimN)¶ Matrix cross entropy back propagation.
- Parameters
grad_d
-output matrix (M x N).
output_d
-input matrix (M x N).
label_d
-input vector (M x 1).
dimM
-matrix height.
dimN
-matrix width.
-
void
hl_matrix_zero_mem
(real *data, int num)¶ Matrix zero memory.
- Parameters
data
-input data.
num
-length of data.
-
void
hl_param_relu_forward
(real *output, real *input, real *w, int width, int height, int partial_sum)¶ parameter relu forward
- Parameters
output
-output data
input
-input data
w
-parameter data
width
-matrix width
height
-matrix height
partial_sum
-
-
void
hl_param_relu_backward_w
(real *grad_w, real *grad_o, real *input, int width, int height, int partial_sum)¶ parameter relu backward w
- Parameters
grad_w
-w grad
grad_o
-output grad
input
-input data
width
-matrix width
height
-matrix height
partial_sum
-
-
void
hl_param_relu_backward_diff
(real *grad_o, real *input, real *w, real *diff, int width, int height, int partial_sum)¶ parameter relu backward diff
- Parameters
grad_o
-output grad
input
-input data
w
-parameter
diff
-diff
width
-matrix width
height
-matrix height
partial_sum
-
-
void
hl_cossim
(real *output, real *input1, real *input2, int width, int input1_height, int input2_height, real scale)¶ cos sim forward
- Parameters
output
-output data
input1
-input1 data(matrix)
input2
-input2 data(matrix or vector)
width
-matrix width
input1_height
-input1_height
input2_height
-input2_height
scale
-scale factor
-
void
hl_cossim_derivative
(real *grad, real *output, real *prevOutX, real *prevOutY, real *prevGradX, real *prevGradY, int width, int input1_height, int input2_height, real scale)¶ cos sim derivate
- Parameters
grad
-output grad
output
-output data
prevOutX
-input1 data
prevOutY
-input2 data
prevGradX
-input1 grad
prevGradY
-input2 grad
width
-matrix width
input1_height
-input1 height
input2_height
-input2 height
scale
-scale factor
hl_matrix_base.h¶
hl_matrix_apply.cuh¶
hl_matrix_ops.cuh¶
hl_matrix_type.cuh¶
hl_sse_matrix_kernel.cuh¶
hl_batch_transpose.h¶
Functions
-
void
batchTranspose
(const real *input, real *output, int width, int height, int batchSize)¶ Perform matrix transpose for each data in the batch.
- Note
- Both the inpt and output are arranged in batch-first order. Each batch has height * width data, which are arranged in height-first (or row-first) manner.
- Parameters
input
-height * width elements in batch.
output
-height * width elements in batch.
width
-width of batch data.
height
-height of batch data.
batchSize
-batch size
Sparse Matrix¶
hl_sparse.h¶
Functions
-
void
hl_malloc_sparse_matrix
(hl_sparse_matrix_s *A_d, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶ Malloc a sparse matrix.
- Parameters
A_d
-sparse matrix.
format
-format.
value_type
-valueType.
dimM
-height.
dimN
-width.
nnz
-number of none zero element.
-
void
hl_free_sparse_matrix
(hl_sparse_matrix_s A_d)¶ Free a sparse matrix.
- Parameters
A_d
-GPU sparse matrix.
-
void
hl_construct_sparse_matrix
(hl_sparse_matrix_s *A_d, void *dest_d, size_t size, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶ Construct a sparse matrix use input gpu memory.
- Note
- Destruct api is hl_destruct_sparse_matrix.
- Parameters
A_d
-sparse matrix.
dest_d
-gpu memory.
size
-size of dest_d.
format
-format.
value_type
-valueType.
dimM
-height.
dimN
-width.
nnz
-number of none zero element.
-
void
hl_construct_sparse_matrix
(hl_sparse_matrix_s *A_d, real *value_d, int *rows_d, int *cols_d, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶ Use three arrays to construct sparse matrix.
if format is HL_SPARSE_CSR, size of rows_d is dimM + 1, and size of cols_d is nnz;
if format is HL_SPARSE_CSC, size of rows_d is nnz, and size of cols_d is dimN + 1.
if valueType is HL_NO_VALUE, size of value_d is zero, else size of value_d is nnz.
- Note
- The corresponding destructor interface is hl_destruct_sparse_matrix.
- Parameters
A_d
-sparse matrix.
value_d
-value.
rows_d
-row.
cols_d
-col.
format
-format.
value_type
-valueType.
dimM
-height.
dimN
-width.
nnz
-number of none zero element.
-
void
hl_destruct_sparse_matrix
(hl_sparse_matrix_s A_d)¶ Destruct sparse matrix.
- Parameters
A_d
-sparse matrix.
-
void
hl_memcpy_csr_matrix
(hl_sparse_matrix_s csr_matrix, real *csr_val, int *csr_row, int *csr_col, hl_stream_t stream)¶ Copy value & index to sparse matrix.
if csr_matrix is HL_FLOAT_VALUE.
- csr_val, csr_row, csr_col three pointers are not null.
- csr_val is not null, csr_row adn csr_col are null.
if csr_matrix is HL_NO_VALUE.
- csr_val will be ignore, csr_row and csr_col are not null.
- Parameters
csr_matrix
-sparse matrix.
csr_val
-point to csr value array(nnz).
csr_row
-point to csr row indices array(dimM+1).
csr_col
-point to csr col indices array(nnz).
stream
-hl_stream_t type.
-
void
hl_memcpy_csc_matrix
(hl_sparse_matrix_s csc_matrix, real *csc_val, int *csc_row, int *csc_col, hl_stream_t stream)¶ Copy value & index to sparse matrix.
if csr_matrix is HL_FLOAT_VALUE.
- csc_val, csc_row, csc_col three pointers are not null.
- csc_val is not null, csc_row and csc_col are null.
if csr_matrix is HL_NO_VALUE.
- csc_val will be ignore, csc_row and csc_col are not null.
- Parameters
csc_matrix
-sparse matrix.
csc_val
-point to csc value array(nnz).
csc_row
-point to csc row indices array(nnz).
csc_col
-point to csc col indices array(dimN+1).
stream
-hl_stream_t type.
-
void
hl_memcpy_sparse_matrix
(hl_sparse_matrix_s dst, hl_sparse_matrix_s src, hl_stream_t stream)¶ Copy sparse matrix to sparse matrix.
- Note
- 1. Format of the src matrix and dst matrix needs to be consistent.
- Source matrix has value, the destination matrix has value or no value can be; the source matrix is no value, then the destination matrix must also be no value;
- Parameters
dst
-sparse matrix.
src
-sparse matrix.
stream
-hl_stream_t type.
-
void
hl_matrix_csr2dense
(hl_sparse_matrix_s A_d, real *C_d, int dimM, int dimN)¶ csr matrix to dense matrix.
- Parameters
A_d
-csr matrix.
C_d
-dense matrix.
dimM
-height.
dimN
-width.
-
void
hl_matrix_csc2dense
(hl_sparse_matrix_s A_d, real *C_d, int dimM, int dimN)¶ csc matrix to dense matrix.
- Parameters
A_d
-csc matrix.
C_d
-dense matrix.
dimM
-height.
dimN
-width.
-
void
hl_matrix_csr_mul_dense
(hl_sparse_matrix_s A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transb is not support HPPL_OP_T.
- Parameters
A_d
-csr sparse matrix.
transa
-operation op(A) that is non-or transpose.
B_d
-dense matrix.
transb
-operation op(B) that is non-or transpose.
C_d
-dense matrix.
dimM
-matrix height of op(A) & C
dimN
-matrix width of op(B) & C
dimK
-width of op(A) & height of op(B)
alpha
-scalar used for multiplication.
beta
-scalar used for multiplication.
-
void
hl_matrix_csc_mul_dense
(hl_sparse_matrix_s A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transb is not support HPPL_OP_T.
- Parameters
A_d
-sparse matrix.
transa
-operation op(A) that is non-or transpose.
B_d
-dense matrix.
transb
-operation op(B) that is non-or transpose.
C_d
-dense matrix.
dimM
-matrix height of op(A) & C
dimN
-matrix width of op(B) & C
dimK
-width of op(A) & height of op(B)
alpha
-scalar used for multiplication.
beta
-scalar used for multiplication.
-
void
hl_matrix_dense_mul_csc
(real *A_d, hl_trans_op_t transa, hl_sparse_matrix_s B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transa is not support HPPL_OP_T.
- Parameters
A_d
-dense matrix.
transa
-operation op(A) that is non-or transpose.
B_d
-csc sparse matrix.
transb
-operation op(B) that is non-or transpose.
C_d
-dense matrix.
dimM
-matrix height of op(A) & C
dimN
-matrix width of op(B) & C
dimK
-width of op(A) & height of op(B)
alpha
-scalar used for multiplication.
beta
-scalar used for multiplication.
-
void
hl_sparse_matrix_mul
(real *A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, hl_sparse_matrix_s C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d. Calculated based on the non-zero elements of the matrix C.
- Note
- transb is not support HPPL_OP_T.
- Parameters
A_d
-dense matrix.
transa
-operation op(A) that is non-or transpose.
B_d
-dense matrix.
transb
-operation op(B) that is non-or transpose.
C_d
-sparse matrix.
dimM
-matrix height of op(A) & C
dimN
-matrix width of op(B) & C
dimK
-width of op(A) & height of op(B)
alpha
-scalar used for multiplication.
beta
-scalar used for multiplication.
-
void
hl_matrix_dense_mul_csr
(real *A_d, hl_trans_op_t transa, hl_sparse_matrix_s B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transa is not support HPPL_OP_T.
- Parameters
A_d
-dense matrix.
transa
-operation op(A) that is non-or transpose.
B_d
-sparse matrix.
transb
-operation op(B) that is non-or transpose.
C_d
-dense matrix.
dimM
-matrix height of op(A) & C
dimN
-matrix width of op(B) & C
dimK
-width of op(A) & height of op(B)
alpha
-scalar used for multiplication.
beta
-scalar used for multiplication.
-
void
hl_memcpy_from_csc_matrix
(real *csc_val, size_t val_size, int *csc_row, size_t row_size, int *csc_col, size_t col_size, hl_sparse_matrix_s csc_matrix, hl_stream_t stream)¶ Memcpy csc_matrix to host.
a. according to csc_matrix, update three arrays
- csc_val, csc_row, csc_col are dest Address.
- if type of csc_matrix is HL_NO_VALUE, update csc_row and csc_col
- if type of csc_matrix is HL_FLOAT_VALUE, update csc_row, csc_col and csc_value.
b. The interface is asynchronous copy. To ensure that the data is copied please call the synchronous interface;
- Parameters
csc_val
-point to csc value array(nnz).
val_size
-csc value size.
csc_row
-point to csc row indices array(nnz).
row_size
-csc row size.
csc_col
-point to csc col indices array(dimN + 1).
col_size
-csc column size.
csc_matrix
-sparse matrix.
stream
-hl_stream_t type.
-
void
hl_memcpy_from_csr_matrix
(real *csr_val, size_t val_size, int *csr_row, size_t row_size, int *csr_col, size_t col_size, hl_sparse_matrix_s csr_matrix, hl_stream_t stream)¶ Memcpy sparse matrix to host.
a. according to csr_matrix, update three arrays
- csr_val, csr_row, csr_col are dest Address.
- if type of csr_matrix is HL_NO_VALUE, update csr_row and csr_col
- if type of csr_matrix is HL_FLOAT_VALUE, update csr_row, csr_col and csr_value
b. The interface is asynchronous copy. To ensure that the data is copied please call the synchronous interface;
- Parameters
csr_val
-point to csr value array(nnz).
val_size
-csr value size.
csr_row
-point to csr row indices array(nnz).
row_size
-csr row size.
csr_col
-point to csr col indices array(dimN + 1).
col_size
-csr column size.
csr_matrix
-sparse matrix.
stream
-hl_stream_t type.
-
void
hl_sparse_matrix_column_sum
(real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale)¶ A_d[j] += B_d[i,j] for i in range(height)
- Parameters
A_d
-vector, size = width.
B_d
-sparse matrix.
dimM
-height.
dimN
-width.
scale
-scale of B_d
-
void
hl_matrix_csr_column_sum
(real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale)¶ implementation of csr sparse matrix in hl_sparse_matirx_column_sum
-
void
hl_sparse_matrix_add_bias
(hl_sparse_matrix_s A_d, real *B_d, real scale)¶ A_d[i,j] += B_d[j].
- Parameters
A_d
-sprare matrix.
B_d
-vector, size = A_d.width.
scale
-scale of B_d.
-
void
hl_matrix_csr_add_bias
(hl_sparse_matrix_s A_d, real *B_d, real scale)¶ implementation of csr sparse matrix in hl_sparse_matrix_add_bias
-
void
hl_sparse_matrix_add_dense
(hl_sparse_matrix_s A_d, real *B_d, int dimM, int dimN, real alpha, real beta)¶ sparseMatrix = alpha * denseMatrix + beta *sparseMatrix A_d[i,j] = alpha * B_d[i,j] + beta * A_d[i,j] Only add value of same (row, col) index in dense matrix and do not use others values whoes postions are not in sparse matirx.
- Parameters
A_d
-sprare matrix.
B_d
-dense matrix.
dimM
-height of B_d.
dimN
-width of B_d.
alpha
-scale of B_d.
beta
-scale of A_d.
-
void
hl_matrix_csr_add_dense
(hl_sparse_matrix_s A_d, real *B_d, int dimM, int dimN, real alpha, real beta)¶ implementation of csr sparse matrix in hl_sparse_matrix_add_dense
-
int *
hl_sparse_matrix_get_rows
(hl_sparse_matrix_s sMat)¶ get rows pionter of GpuSparseMatrix
- Return
- return rows pointer, which is gpu address
- Parameters
sMat
-sparse matrix
-
int *
hl_sparse_matrix_get_cols
(hl_sparse_matrix_s sMat)¶ get cols pionter of GpuSparseMatrix
- Return
- return cols pointer, which is gpu address
- Parameters
sMat
-sparse matrix
-
real *
hl_sparse_matrix_get_value
(hl_sparse_matrix_s sMat)¶ get value pionter of GpuSparseMatrix
- Return
- return value pointer, which is gpu address
- Parameters
sMat
-sparse matrix
hl_sparse.ph¶
Others¶
hl_aggregate.h¶
Functions
-
void
hl_matrix_row_sum
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the sum of each row of the matrix A_d.
- Parameters
A_d
-input matrix (M x N).
C_d
-output matrix (M x 1).
dimM
-matrix height.
dimN
-matrix width.
-
void
hl_matrix_row_max
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the maximum value of each row of the matrix A_d.
- Parameters
A_d
-input matrix (M x N).
C_d
-output matrix (M x 1).
dimM
-matrix height.
dimN
-matrix width.
-
void
hl_matrix_row_min
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the minimum value of each row of the matrix A_d.
- Parameters
A_d
-input matrix (M x N).
C_d
-output matrix (M x 1).
dimM
-matrix height.
dimN
-matrix width.
-
void
hl_matrix_column_sum
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the sum of each column of the matrix A_d.
- Parameters
A_d
-input matrix (M x N).
C_d
-output Matrix (1 x N).
dimM
-matrix height.
dimN
-matrix width.
-
void
hl_matrix_column_max
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the maximum value of each column of the matrix A_d.
- Parameters
A_d
-input matrix (M x N).
C_d
-output matrix (1 x N).
dimM
-matrix height.
dimN
-matrix width.
-
void
hl_matrix_column_min
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the minimum value of each column of the matrix A_d.
- Parameters
A_d
-input matrix (M x N).
C_d
-output matrix (1 x N).
dimM
-matrix height.
dimN
-matrix width.
-
void
hl_vector_sum
(real *A_d, real *C_h, int dimM)¶ C_h = sum(A_d[i]).
- Parameters
A_d
-input(m).
C_h
-output(host memory).
dimM
-size of vector.
-
void
hl_vector_abs_sum
(real *A_d, real *C_h, int dimM)¶ C_h = sum(abs(A_d[i])).
- Parameters
A_d
-input(m).
C_h
-output(host memory).
dimM
-size of vector.
hl_table_apply.h¶
Functions
-
void
hl_matrix_select_rows
(real *output, int ldo, real *table, int ldt, int *ids, int numSamples, int tableSize, int dim)¶ Get row from table. output[i] += table[ids[i]] if ids[i] == -1, it will be ignored.
- Parameters
output
-output matrix.
ldo
-leading dimension of output.
table
-table matrix.
ldt
-leading dimension of table.
ids
-ids vector.
numSamples
-height of output.
tableSize
-height of table.
dim
-width of table.
-
void
hl_matrix_add_to_rows
(real *table, int ldt, real *input, int ldi, int *ids, int numSamples, int tableSize, int dim)¶ Add row to table. table[ids[i]] += output[i] if ids[i] == -1, it will be ignored.
- Parameters
table
-table matrix.
ldt
-leading dimension of table.
input
-input matrix.
ldi
-leading dimension of input.
ids
-ids vector.
numSamples
-height of input.
tableSize
-height of table.
dim
-width of table.
- template <class T>
-
void
hl_vector_select_from
(T *dst, int sized, const T *src, int sizes, const int *ids, int sizei)¶ Select element from vector.
- Parameters
dst
-output vector.
sized
-size of dst.
src
-input vector.
sizes
-size of src.
ids
-index vector.
sizei
-size of ids.
hl_top_k.h¶
Functions
-
void
hl_matrix_top_k
(real *topVal, int ldv, int *topIds, real *src, int lds, int dim, int beamSize, int numSamples)¶ find top k element.
- Parameters
topVal
-top k element.
ldv
-leading dimension of topVal.
topIds
-top k index.
src
-input value.
lds
-leading dimension of src.
dim
-width of input value.
beamSize
-beam size.
numSamples
-height of input value.
-
void
hl_sparse_matrix_top_k
(real *topVal, int ldv, int *topIds, hl_sparse_matrix_s src, int beamSize, int numSamples)¶ find top k element for each row in sparse matrix.
- Note
- Only support HL_SPARSE_CSR format.
- Parameters
topVal
-top k element.
ldv
-leading dimension of topVal.
topIds
-top k index.
src
-sparse matrix.
beamSize
-beam size.
numSamples
-height of input value.