Matrix¶
Base Matrix¶
hl_matrix.h¶
Functions
-
void
hl_matrix_add(real *A_d, real *B_d, real *C_d, int dimM, int dimN, real alpha, real beta)¶ Matrix addition: C_d[i] = alpha * A_d[i] + beta * B_d[i].
- Parameters
A_d: input matrix (M x N).B_d: input matrix (M x N).C_d: output matrix (M x N).dimM: matrix height.dimN: matrix width.alpha: scalar used for addition.beta: scalar used for addition.
-
void
hl_matrix_softmax(real *A_d, real *C_d, int dimM, int dimN)¶ Matrix Softmax.
- Parameters
A_d: input maxtrix (M x N).C_d: output matrix (M x N).dimM: matrix height.dimN: matrix width.
-
void
hl_matrix_softmax_derivative(real *grad_d, real *output_d, real *sftmaxSum_d, int dimM, int dimN)¶ Matrix softmax derivative.
- Parameters
grad_d: intput matrix (M x N).output_d: output matrix (M x N).sftmaxSum_d: softmax sum (M * 1).dimM: matrix height.dimN: matrix width.
-
void
hl_sequence_softmax_forward(real *A_d, real *C_d, const int *index, int numSequence)¶ Sequence softmax.
- Parameters
A_d: input vector.C_d: output vector.index: start positions of sequence.numSequence: sequence number.
-
void
hl_matrix_classification_error(real *A_d, int *B_d, real *C_d, int dimM, int dimN)¶ Matrix classification error.
- Parameters
A_d: input matrix (M x N).B_d: input vector (M x 1).C_d: output vector (M x 1).dimM: matrix height.dimN: matrix width.
-
void
hl_matrix_cross_entropy(real *A_d, real *C_d, int *label_d, int dimM, int dimN)¶ Matrix cross entropy.
- Parameters
A_d: input matrix (M x N).C_d: output matrix (M X 1).label_d: input matrix (M x 1).dimM: matrix height.dimN: matrix width.
-
void
hl_matrix_cross_entropy_bp(real *grad_d, real *output_d, int *label_d, int dimM, int dimN)¶ Matrix cross entropy back propagation.
- Parameters
grad_d: output matrix (M x N).output_d: input matrix (M x N).label_d: input vector (M x 1).dimM: matrix height.dimN: matrix width.
-
void
hl_matrix_multi_binary_cross_entropy(real *output, real *entropy, hl_sparse_matrix_s mat, int dimM, int dimN)¶ Matrix multi-binary label cross entropy.
- Parameters
output: input matrix (M x N).entropy: output matrix (M x 1).mat: input sparse matrix.dimM: matrix height.dimN: matrix width.
-
void
hl_matrix_multi_binary_cross_entropy_bp(real *output, real *grad, hl_sparse_matrix_s mat, int dimM, int dimN)¶ Matrix multi-binary label cross entropy backprop.
- Parameters
output: input matrix (M x N).grad: output matrix (M x N).mat: input sparse matrix.dimM: matrix height.dimN: matrix width.
-
void
hl_matrix_zero_mem(real *data, int num)¶ Matrix zero memory.
- Parameters
data: input data.num: length of data.
-
void
hl_param_relu_forward(real *output, real *input, real *w, int width, int height, int partial_sum)¶ parameter relu forward
- Parameters
output: output datainput: input dataw: parameter datawidth: matrix widthheight: matrix heightpartial_sum:
-
void
hl_param_relu_backward_w(real *grad_w, real *grad_o, real *input, int width, int height, int partial_sum)¶ parameter relu backward w
- Parameters
grad_w: w gradgrad_o: output gradinput: input datawidth: matrix widthheight: matrix heightpartial_sum:
-
void
hl_param_relu_backward_diff(real *grad_o, real *input, real *w, real *diff, int width, int height, int partial_sum)¶ parameter relu backward diff
- Parameters
grad_o: output gradinput: input dataw: parameterdiff: diffwidth: matrix widthheight: matrix heightpartial_sum:
-
void
hl_cossim(real *output, real *input1, real *input2, int width, int input1_height, int input2_height, real scale)¶ cos sim forward
- Parameters
output: output datainput1: input1 data(matrix)input2: input2 data(matrix or vector)width: matrix widthinput1_height: input1_heightinput2_height: input2_heightscale: scale factor
-
void
hl_cossim_derivative(real *grad, real *output, real *prevOutX, real *prevOutY, real *prevGradX, real *prevGradY, int width, int input1_height, int input2_height, real scale)¶ cos sim derivate
- Parameters
grad: output gradoutput: output dataprevOutX: input1 dataprevOutY: input2 dataprevGradX: input1 gradprevGradY: input2 gradwidth: matrix widthinput1_height: input1 heightinput2_height: input2 heightscale: scale factor
Matrix addition: A_d[i][j] += scale * B_d[j/channel].
- Parameters
A_d: input matrix (M x N).B_d: input matrix (1 x channel).channel: width of B.dimM: height of A.dimN: width of A.scale: scalar used for addition.
Matrix addition: A_d[i][j] += scale * B_d[j/channel].
- Parameters
B_d: input matrix (1 x channel).A_d: input matrix (M x N).channel: width of B.dimM: height of A.dimN: width of A.scale: scalar used for addition.
hl_matrix_base.h¶
-
class
BaseOp¶ Public Functions
-
BaseOp()¶
-
BaseOp(const real s1)¶
-
BaseOp(const real s1, const real s2)¶
-
INLINE vecType BaseOp::vecOp(const vecType a) const
-
INLINE vecType BaseOp::vecOp(const vecType a, const vecType b) const
Public Static Attributes
-
const bool
sse¶
-
-
namespace
aggregate¶ -
class
sum¶ Inherits from aggregate::SSESum
Public Functions
-
INLINE real aggregate::sum::init()
-
INLINE real aggregate::sum::operator()(const real a, const real b) const
-
-
class
max¶ Inherits from aggregate::SSEMax
Public Functions
-
INLINE real aggregate::max::init()
-
INLINE real aggregate::max::operator()(const real a, const real b) const
-
-
class
min¶ Inherits from aggregate::SSEMin
Public Functions
-
INLINE real aggregate::min::init()
-
INLINE real aggregate::min::operator()(const real a, const real b) const
-
-
class
-
namespace
base¶ -
namespace
binary¶ -
class
add¶ Inherits from base::binary::SSEAdd
Public Functions
-
INLINE real base::binary::add::operator()(const real a, const real b) const
-
-
class
add2¶ Inherits from base::binary::SSEAdd2
Public Functions
-
add2(const real s1, const real s2)¶
-
INLINE real base::binary::add2::operator()(const real a, const real b) const
-
-
class
sub¶ Inherits from base::binary::SSESub
Public Functions
-
INLINE real base::binary::sub::operator()(const real a, const real b) const
-
-
class
mul¶ Inherits from base::binary::SSEMul
Public Functions
-
INLINE real base::binary::mul::operator()(const real a, const real b) const
-
-
class
div¶ Inherits from base::binary::SSEDiv
Public Functions
-
INLINE real base::binary::div::operator()(const real a, const real b) const
-
-
class
squaredDiff¶ Inherits from base::binary::SSESquaredDiff
Public Functions
-
INLINE real base::binary::squaredDiff::operator()(const real a, const real b) const
-
-
class
first¶ Inherits from base::binary::SSEFirst
Public Functions
-
INLINE real base::binary::first::operator()(const real a, const real b) const
-
-
class
second¶ Inherits from base::binary::SSESecond
Public Functions
-
INLINE real base::binary::second::operator()(const real a, const real b) const
-
-
class
-
namespace
hl_matrix_apply.cuh¶
Defines
-
HL_MATRIX_APPLY_H_¶
Functions
- template <class T, class Op>
-
void
hl_cpu_apply_unary_op(Op op, T *A_h, int dimM, int dimN, int lda)¶ CPU element wise unary operator.
element wise op(a) for 0 <= i < dimM & for 0 <= j < dimN.
CPU element wise unary operator.
- Parameters
op: unary op. see namespace unaryA_h: matrix.dimM: matrix height.dimN: matrix width.lda: leading dimension of A.
- template <class T, class Op, bool BAsRowVector, bool BAsColVector>
-
void
hl_cpu_apply_binary_op(Op op, T *A_h, T *B_h, int dimM, int dimN, int lda, int ldb)¶ CPU element wise binary operator.
element wise op(a, b) for 0 <= i < dimM & for 0 <= j < dimN.
if (BAsRowVector == 0 && BAsColVector == 0) op(A[i * lda + j], B[i * ldb + j])
if (BAsRowVector == 1 && BAsColVector == 0) op(A[i * lda + j], B[j])
if (BAsRowVector == 0 && BAsColVector == 1) op(A[i * lda + j], B[i * ldb])
if (BAsRowVector == 1 && BAsColVector == 1) op(A[i * lda + j], B[0])
CPU element wise binary operator.
- Parameters
op: binary op. see namespace binary.A_h: matrix.B_h: matrix.dimM: matrix height.dimN: matrix width.lda: leading dimension of A.ldb: leading dimension of B.
- template <class T, class Op, bool CAsRowVector, bool CAsColVector>
-
void
hl_cpu_apply_ternary_op(Op op, T *A_h, T *B_h, T *C_h, int dimM, int dimN, int lda, int ldb, int ldc)¶ CPU element wise ternary operator.
element wise op(a, b, c) for 0 <= i < dimM & for 0 <= j < dimN.
if (CAsRowVector == 0 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j])
if (CAsRowVector == 1 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[j])
if (CAsRowVector == 0 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[i*ldc])
if (CAsRowVector == 1 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[0])
CPU element wise ternary operator.
- Parameters
op: ternary op. see namespace ternary.A_h: matrix.B_h: matrix.C_h: matrix.dimM: matrix height.dimN: matrix width.lda: leading dimension of A.ldb: leading dimension of B.ldc: leading dimension of C.
- template <class T, class Op>
-
void
hl_cpu_apply_quaternary_op(Op op, T *A_h, T *B_h, T *C_h, T *D_h, int dimM, int dimN, int lda, int ldb, int ldc, int ldd)¶ CPU element wise quaternary operator. element wise op(a, b, c, d) for 0 <= i < dimM & for 0 <= j < dimN.
CPU element wise quaternary operator. element wise op(a, b, c, d) for 0 <= i < dimM & for 0 <= j < dimN.
- Parameters
op: quaternary op. see namespace ternary.A_h: matrix.B_h: matrix.C_h: matrix.D_h: matrix.dimM: matrix height.dimN: matrix width.lda: leading dimension of A.ldb: leading dimension of B.ldc: leading dimension of C.ldd: leading dimension of D.
- template <class T, class Op>
-
void
hl_gpu_apply_unary_op(Op op, T *A_d, int dimM, int dimN, int lda)¶ GPU element wise unary operator. element wise op(a) for 0 <= i < dimM & for 0 <= j < dimN.
- Parameters
op: unary op. see namespace unary.A_d: matrix.dimM: matrix height.dimN: matrix width.lda: leading dimension of A.
- template <class T, class Op, bool BAsRowVector, bool BAsColVector>
-
void
hl_gpu_apply_binary_op(Op op, T *A_d, T *B_d, int dimM, int dimN, int lda, int ldb)¶ GPU element wise binary operator.
element wise op(a, b) for 0 <= i < dimM & for 0 <= j < dimN
if (BAsRowVector == 0 && BAsColVector == 0) op(A[i * lda + j], B[i * ldb + j])
if (BAsRowVector == 1 && BAsColVector == 0) op(A[i * lda + j], B[j])
if (BAsRowVector == 0 && BAsColVector == 1) op(A[i * lda + j], B[i * ldb])
if (BAsRowVector == 1 && BAsColVector == 1) op(A[i * lda + j], B[0])
- Parameters
op: binary op. see namespace binary.A_d: matrix.B_d: matrix.dimM: matrix height.dimN: matrix width.lda: leading dimension of A.ldb: leading dimension of B.
- template <class T, class Op, bool CAsRowVector, bool CAsColVector>
-
void
hl_gpu_apply_ternary_op(Op op, T *A_d, T *B_d, T *C_d, int dimM, int dimN, int lda, int ldb, int ldc)¶ GPU element wise ternary operator.
element wise op(a, b, c) for 0 <= i < dimM & for 0 <= j < dimN.
if (CAsRowVector == 0 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j])
if (CAsRowVector == 1 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[j])
if (CAsRowVector == 0 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[i*ldc])
if (CAsRowVector == 1 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[0])
- Parameters
op: ternary op. see namespace ternary.A_d: matrix.B_d: matrix.C_d: matrix.dimM: matrix height.dimN: matrix width.lda: leading dimension of A.ldb: leading dimension of B.ldc: leading dimension of C.
- template <class T, class Op>
-
void
hl_gpu_apply_quaternary_op(Op op, T *A_d, T *B_d, T *C_d, T *D_d, int dimM, int dimN, int lda, int ldb, int ldc, int ldd)¶ GPU element wise quaternary operator. element wise op(a, b, c, d) for 0 <= i < dimM & for 0 <= j < dimN.
- Parameters
op: quaternary op. see namespace ternary.A_d: matrix.B_d: matrix.C_d: matrix.D_d: matrix.dimM: matrix height.dimN: matrix width.lda: leading dimension of A.ldb: leading dimension of B.ldc: leading dimension of C.ldd: leading dimension of D.
- template <class Agg, class Op, class Saver>
-
void
hl_cpu_matrix_row_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda)¶ CPU matrix row operator.
- template <class Saver, class Agg, class Op>
-
void
hl_cpu_matrix_row_op(Agg agg, Op op, int dimM, int dimN, real *dst, int ld, real *A, int lda, real *B, int ldb)¶ CPU matrix row operator.
- Parameters
agg: aggregate operator expression.op: operator expression.dimM: matrix height.dimN: matrix width.dst: destination matrix.ld: leading dimension of dst matrix.*A: matrix A.lda: leading dimension of matrix A.*B: matrix B.ldb: leading dimension of matrix B.
- template <class Agg, class Op, class Saver>
-
void
hl_cpu_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶ CPU matrix column operator.
- Parameters
agg: aggregate operator expression.op: operator expression.sv: assignment operator expression.dimM: matrix height.dimN: matrix width.dst: destination matrix.*A: matrix A.lda: leading dimension of matrix A.
- template <class Agg, class Op, class Saver>
-
void
hl_cpu_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶ CPU matrix column operator.
- Parameters
agg: aggregate operator expression.op: operator expression.sv: assignment operator expression.dimM: matrix height.dimN: matrix width.dst: destination matrix.*A: matrix A.lda: leading dimension of matrix A.*B: matrix B.ldb: leading dimension of matrix B.
- template <class Agg, class Op, class Saver>
-
void
hl_gpu_matrix_row_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda)¶ GPU matrix row operator.
- Parameters
agg: aggregate operator expression.op: operator expression.sv: assignment operator expression.dimM: matrix height.dimN: matrix width.dst: destination matrix.ld: leading dimension of dst.*A: matrix A.lda: leading dimension of matrix A.
- template <class Saver, class Agg, class Op>
-
void
hl_gpu_matrix_row_op(Agg agg, Op op, int dimM, int dimN, real *dst, int ld, real *A, int lda, real *B, int ldb)¶ GPU matrix row operator.
- Parameters
agg: aggregate operator expression.op: operator expression.dimM: matrix height.dimN: matrix width.dst: destination matrix.ld: leading dimension of dst matrix.*A: matrix A.lda: leading dimension of matrix A.*B: matrix B.ldb: leading dimension of matrix B.
- template <class Agg, class Op, class Saver>
-
void
hl_gpu_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶ GPU matrix column operator.
- Parameters
agg: aggregate operator expression.op: operator expression.sv: assignment operator expression.dimM: matrix height.dimN: matrix width.dst: destination matrix.*A: matrix A.lda: leading dimension of matrix A.
- template <class Agg, class Op, class Saver>
-
void
hl_gpu_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶ GPU matrix column operator.
- Parameters
agg: aggregate operator expression.op: operator expression.sv: assignment operator expression.dimM: matrix height.dimN: matrix width.dst: destination matrix.*A: matrix A.lda: leading dimension of matrix A.*B: matrix B.ldb: leading dimension of matrix B.
hl_matrix_ops.cuh¶
Defines
-
HL_MATRIX_OPS_CUH_¶
-
HL_DEVICE¶
-
ONE_PARAMETER(name)¶ parameter macro.
-
TWO_PARAMETER(name)¶
-
THREE_PARAMETER(name)¶
-
FOUR_PARAMETER(name)¶
-
DEFINE_MATRIX_UNARY_OP(name, op)¶ unary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b
- See
hl_gpu_apply_unary_op
hl_cpu_apply_unary_op
- Parameters
name: operator name.op: operator expression.
-
DEFINE_MATRIX_UNARY_PARAMETER_OP(name, PARA_MACRO, op)¶ unary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b
- See
hl_gpu_apply_unary_op
hl_cpu_apply_unary_op
- Parameters
name: operator name.PARA_MACRO: parameter macro.op: operator expression.
-
DEFINE_MATRIX_BINARY_OP(name, op)¶ binary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b
- See
hl_gpu_apply_unary_op
hl_cpu_apply_unary_op
- Parameters
name: operator name.op: operator expression.
-
DEFINE_MATRIX_BINARY_PARAMETER_OP(name, PARA_MACRO, op)¶ binary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b
- See
hl_gpu_apply_binary_op
hl_cpu_apply_binary_op
- Parameters
name: operator name.PARA_MACRO: parameter macro.op: operator expression.
-
DEFINE_MATRIX_TERNARY_OP(name, op)¶ ternary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c
- See
hl_gpu_apply_ternary_op
hl_cpu_apply_ternary_op
- Parameters
name: operator name.op: operator expression.
-
DEFINE_MATRIX_TERNARY_PARAMETER_OP(name, PARA_MACRO, op)¶ ternary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c
- See
hl_gpu_apply_ternary_op
hl_cpu_apply_ternary_op
- Parameters
name: operator name.PARA_MACRO: parameter macro.op: operator expression.
-
DEFINE_MATRIX_QUATERNARY_OP(name, op)¶ quaternary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c, d
- See
hl_gpu_apply_quaternary_op
hl_cpu_apply_quaternary_op
- Parameters
name: operator name.op: operator expression.
-
DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(name, PARA_MACRO, op)¶ quaternary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c, d
- See
hl_gpu_apply_quaternary_op
hl_cpu_apply_quaternary_op
- Parameters
name: operator name.PARA_MACRO: parameter macro.op: operator expression.
hl_sse_matrix_kernel.cuh¶
Functions
-
bool
hl_check_align(size_t size)¶
-
bool
hl_check_align(void *ptr)¶
- template <class Agg, class Op, class Saver>
-
void
hl_sse_matrix_row_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda)¶
- template <class Agg, class Op, class Saver>
-
void
hl_sse_matrix_row_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda, real *B, int ldb)¶
- template <class Agg, class Op, class Saver>
-
void
hl_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶
- template <class Agg, class Op, class Saver>
-
void
hl_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶
- template <int MaxRow, class Agg, class Op, class Saver>
-
void
hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶
- template <int Step, class Agg, class Op, class Saver>
-
void
hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶
- template <class Agg, class Op, class Saver>
-
void
hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶
- template <int MaxRow, class Agg, class Op, class Saver>
-
void
hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶
- template <int Step, class Agg, class Op, class Saver>
-
void
hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶
- template <class Agg, class Op, class Saver>
-
void
hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶
hl_batch_transpose.h¶
Functions
-
void
batchTranspose(const real *input, real *output, int width, int height, int batchSize)¶ Perform matrix transpose for each data in the batch.
- Note
- Both the inpt and output are arranged in batch-first order. Each batch has height * width data, which are arranged in height-first (or row-first) manner.
- Parameters
input: height * width elements in batch.output: height * width elements in batch.width: width of batch data.height: height of batch data.batchSize: batch size
Sparse Matrix¶
hl_sparse.h¶
Functions
-
void
hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶ Malloc a sparse matrix.
- Parameters
A_d: sparse matrix.format: format.value_type: valueType.dimM: height.dimN: width.nnz: number of none zero element.
-
void
hl_free_sparse_matrix(hl_sparse_matrix_s A_d)¶ Free a sparse matrix.
- Parameters
A_d: GPU sparse matrix.
-
void
hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, void *dest_d, size_t size, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶ Construct a sparse matrix use input gpu memory.
- Note
- Destruct api is hl_destruct_sparse_matrix.
- Parameters
A_d: sparse matrix.dest_d: gpu memory.size: size of dest_d.format: format.value_type: valueType.dimM: height.dimN: width.nnz: number of none zero element.
-
void
hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, real *value_d, int *rows_d, int *cols_d, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶ Use three arrays to construct sparse matrix.
if format is HL_SPARSE_CSR, size of rows_d is dimM + 1, and size of cols_d is nnz;
if format is HL_SPARSE_CSC, size of rows_d is nnz, and size of cols_d is dimN + 1.
if valueType is HL_NO_VALUE, size of value_d is zero, else size of value_d is nnz.
- Note
- The corresponding destructor interface is hl_destruct_sparse_matrix.
- Parameters
A_d: sparse matrix.value_d: value.rows_d: row.cols_d: col.format: format.value_type: valueType.dimM: height.dimN: width.nnz: number of none zero element.
-
void
hl_destruct_sparse_matrix(hl_sparse_matrix_s A_d)¶ Destruct sparse matrix.
- Parameters
A_d: sparse matrix.
-
void
hl_memcpy_csr_matrix(hl_sparse_matrix_s csr_matrix, real *csr_val, int *csr_row, int *csr_col, hl_stream_t stream)¶ Copy value & index to sparse matrix.
if csr_matrix is HL_FLOAT_VALUE.
- csr_val, csr_row, csr_col three pointers are not null.
- csr_val is not null, csr_row adn csr_col are null.
if csr_matrix is HL_NO_VALUE.
- csr_val will be ignore, csr_row and csr_col are not null.
- Parameters
csr_matrix: sparse matrix.csr_val: point to csr value array(nnz).csr_row: point to csr row indices array(dimM+1).csr_col: point to csr col indices array(nnz).stream: hl_stream_t type.
-
void
hl_memcpy_csc_matrix(hl_sparse_matrix_s csc_matrix, real *csc_val, int *csc_row, int *csc_col, hl_stream_t stream)¶ Copy value & index to sparse matrix.
if csr_matrix is HL_FLOAT_VALUE.
- csc_val, csc_row, csc_col three pointers are not null.
- csc_val is not null, csc_row and csc_col are null.
if csr_matrix is HL_NO_VALUE.
- csc_val will be ignore, csc_row and csc_col are not null.
- Parameters
csc_matrix: sparse matrix.csc_val: point to csc value array(nnz).csc_row: point to csc row indices array(nnz).csc_col: point to csc col indices array(dimN+1).stream: hl_stream_t type.
-
void
hl_memcpy_sparse_matrix(hl_sparse_matrix_s dst, hl_sparse_matrix_s src, hl_stream_t stream)¶ Copy sparse matrix to sparse matrix.
- Note
- 1. Format of the src matrix and dst matrix needs to be consistent.
- Source matrix has value, the destination matrix has value or no value can be; the source matrix is no value, then the destination matrix must also be no value;
- Parameters
dst: sparse matrix.src: sparse matrix.stream: hl_stream_t type.
-
void
hl_matrix_csr2dense(hl_sparse_matrix_s A_d, real *C_d, int dimM, int dimN)¶ csr matrix to dense matrix.
- Parameters
A_d: csr matrix.C_d: dense matrix.dimM: height.dimN: width.
-
void
hl_matrix_csc2dense(hl_sparse_matrix_s A_d, real *C_d, int dimM, int dimN)¶ csc matrix to dense matrix.
- Parameters
A_d: csc matrix.C_d: dense matrix.dimM: height.dimN: width.
-
void
hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transb is not support HPPL_OP_T.
- Parameters
A_d: csr sparse matrix.transa: operation op(A) that is non-or transpose.B_d: dense matrix.transb: operation op(B) that is non-or transpose.C_d: dense matrix.dimM: matrix height of op(A) & CdimN: matrix width of op(B) & CdimK: width of op(A) & height of op(B)alpha: scalar used for multiplication.beta: scalar used for multiplication. If beta is zero, C does not have to be a valid input.
-
void
hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transb is not support HPPL_OP_T.
- Parameters
A_d: sparse matrix.transa: operation op(A) that is non-or transpose.B_d: dense matrix.transb: operation op(B) that is non-or transpose.C_d: dense matrix.dimM: matrix height of op(A) & CdimN: matrix width of op(B) & CdimK: width of op(A) & height of op(B)alpha: scalar used for multiplication.beta: scalar used for multiplication. If beta is zero, C does not have to be a valid input.
-
void
hl_matrix_dense_mul_csc(real *A_d, hl_trans_op_t transa, hl_sparse_matrix_s B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transa is not support HPPL_OP_T.
- Parameters
A_d: dense matrix.transa: operation op(A) that is non-or transpose.B_d: csc sparse matrix.transb: operation op(B) that is non-or transpose.C_d: dense matrix.dimM: matrix height of op(A) & CdimN: matrix width of op(B) & CdimK: width of op(A) & height of op(B)alpha: scalar used for multiplication.beta: scalar used for multiplication. If beta is zero, C does not have to be a valid input.
-
void
hl_sparse_matrix_mul(real *A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, hl_sparse_matrix_s C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d. Calculated based on the non-zero elements of the matrix C.
- Note
- transb is not support HPPL_OP_T.
- Parameters
A_d: dense matrix.transa: operation op(A) that is non-or transpose.B_d: dense matrix.transb: operation op(B) that is non-or transpose.C_d: sparse matrix.dimM: matrix height of op(A) & CdimN: matrix width of op(B) & CdimK: width of op(A) & height of op(B)alpha: scalar used for multiplication.beta: scalar used for multiplication.
-
void
hl_matrix_dense_mul_csr(real *A_d, hl_trans_op_t transa, hl_sparse_matrix_s B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transa is not support HPPL_OP_T.
- Parameters
A_d: dense matrix.transa: operation op(A) that is non-or transpose.B_d: sparse matrix.transb: operation op(B) that is non-or transpose.C_d: dense matrix.dimM: matrix height of op(A) & CdimN: matrix width of op(B) & CdimK: width of op(A) & height of op(B)alpha: scalar used for multiplication.beta: scalar used for multiplication. If beta is zero, C does not have to be a valid input.
-
void
hl_memcpy_from_csc_matrix(real *csc_val, size_t val_size, int *csc_row, size_t row_size, int *csc_col, size_t col_size, hl_sparse_matrix_s csc_matrix, hl_stream_t stream)¶ Memcpy csc_matrix to host.
a. according to csc_matrix, update three arrays
- csc_val, csc_row, csc_col are dest Address.
- if type of csc_matrix is HL_NO_VALUE, update csc_row and csc_col
- if type of csc_matrix is HL_FLOAT_VALUE, update csc_row, csc_col and csc_value.
b. The interface is asynchronous copy. To ensure that the data is copied please call the synchronous interface;
- Parameters
csc_val: point to csc value array(nnz).val_size: csc value size.csc_row: point to csc row indices array(nnz).row_size: csc row size.csc_col: point to csc col indices array(dimN + 1).col_size: csc column size.csc_matrix: sparse matrix.stream: hl_stream_t type.
-
void
hl_memcpy_from_csr_matrix(real *csr_val, size_t val_size, int *csr_row, size_t row_size, int *csr_col, size_t col_size, hl_sparse_matrix_s csr_matrix, hl_stream_t stream)¶ Memcpy sparse matrix to host.
a. according to csr_matrix, update three arrays
- csr_val, csr_row, csr_col are dest Address.
- if type of csr_matrix is HL_NO_VALUE, update csr_row and csr_col
- if type of csr_matrix is HL_FLOAT_VALUE, update csr_row, csr_col and csr_value
b. The interface is asynchronous copy. To ensure that the data is copied please call the synchronous interface;
- Parameters
csr_val: point to csr value array(nnz).val_size: csr value size.csr_row: point to csr row indices array(nnz).row_size: csr row size.csr_col: point to csr col indices array(dimN + 1).col_size: csr column size.csr_matrix: sparse matrix.stream: hl_stream_t type.
-
void
hl_sparse_matrix_column_sum(real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale)¶ A_d[j] += B_d[i,j] for i in range(height)
- Parameters
A_d: vector, size = width.B_d: sparse matrix.dimM: height.dimN: width.scale: scale of B_d
-
void
hl_matrix_csr_column_sum(real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale)¶ implementation of csr sparse matrix in hl_sparse_matirx_column_sum
-
void
hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d, real *B_d, real scale)¶ A_d[i,j] += B_d[j].
- Parameters
A_d: sprare matrix.B_d: vector, size = A_d.width.scale: scale of B_d.
-
void
hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d, real *B_d, real scale)¶ implementation of csr sparse matrix in hl_sparse_matrix_add_bias
-
void
hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d, real *B_d, int dimM, int dimN, real alpha, real beta)¶ sparseMatrix = alpha * denseMatrix + beta *sparseMatrix A_d[i,j] = alpha * B_d[i,j] + beta * A_d[i,j] Only add value of same (row, col) index in dense matrix and do not use others values whoes postions are not in sparse matirx.
- Parameters
A_d: sprare matrix.B_d: dense matrix.dimM: height of B_d.dimN: width of B_d.alpha: scale of B_d.beta: scale of A_d.
-
void
hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d, real *B_d, int dimM, int dimN, real alpha, real beta)¶ implementation of csr sparse matrix in hl_sparse_matrix_add_dense
-
int *
hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat)¶ get rows pionter of GpuSparseMatrix
- Return
- return rows pointer, which is gpu address
- Parameters
sMat: sparse matrix
-
int *
hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat)¶ get cols pionter of GpuSparseMatrix
- Return
- return cols pointer, which is gpu address
- Parameters
sMat: sparse matrix
-
real *
hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat)¶ get value pionter of GpuSparseMatrix
- Return
- return value pointer, which is gpu address
- Parameters
sMat: sparse matrix
hl_sparse.ph¶
Defines
-
HL_SPARSE_PH_¶
-
__sparse_get_type_return__(mat, type, field)¶
-
__sparse_get_return__(mat, field)¶
Typedefs
-
typedef struct _hl_csr_matrix *
hl_csr_matrix¶
-
typedef struct _hl_csc_matrix *
hl_csc_matrix¶
-
struct
_hl_csr_matrix¶ sparse matrix csr format.
- Parameters
*csr_val: nonzero values of matrix.*csr_row: row indices.*csr_col: column indices.nnz_s: sizeof of csr_val & csr_col.row_s: sizeof of csr_row.sparsity: sparsity pattern.
-
struct
_hl_csc_matrix¶ sparse matrix csc format.
- Parameters
*csc_val: nonzero values of matrix.*csc_row: row indices.*csc_col: column indices.nnz_s: sizeof of csc_val & csc_row.col_s: sizeof of csc_col.sparsity: sparsity pattern.
Others¶
hl_aggregate.h¶
Functions
-
void
hl_matrix_row_sum(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the sum of each row of the matrix A_d.
- Parameters
A_d: input matrix (M x N).C_d: output matrix (M x 1).dimM: matrix height.dimN: matrix width.
-
void
hl_matrix_row_max(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the maximum value of each row of the matrix A_d.
- Parameters
A_d: input matrix (M x N).C_d: output matrix (M x 1).dimM: matrix height.dimN: matrix width.
-
void
hl_matrix_row_min(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the minimum value of each row of the matrix A_d.
- Parameters
A_d: input matrix (M x N).C_d: output matrix (M x 1).dimM: matrix height.dimN: matrix width.
-
void
hl_matrix_column_sum(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the sum of each column of the matrix A_d.
- Parameters
A_d: input matrix (M x N).C_d: output Matrix (1 x N).dimM: matrix height.dimN: matrix width.
-
void
hl_matrix_column_max(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the maximum value of each column of the matrix A_d.
- Parameters
A_d: input matrix (M x N).C_d: output matrix (1 x N).dimM: matrix height.dimN: matrix width.
-
void
hl_matrix_column_min(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the minimum value of each column of the matrix A_d.
- Parameters
A_d: input matrix (M x N).C_d: output matrix (1 x N).dimM: matrix height.dimN: matrix width.
-
void
hl_vector_sum(real *A_d, real *C_h, int dimM)¶ C_h = sum(A_d[i]).
- Parameters
A_d: input(m).C_h: output(host memory).dimM: size of vector.
-
void
hl_vector_abs_sum(real *A_d, real *C_h, int dimM)¶ C_h = sum(abs(A_d[i])).
- Parameters
A_d: input(m).C_h: output(host memory).dimM: size of vector.
hl_table_apply.h¶
Functions
-
void
hl_matrix_select_rows(real *output, int ldo, real *table, int ldt, int *ids, int numSamples, int tableSize, int dim)¶ Get row from table. output[i] += table[ids[i]] if ids[i] == -1, it will be ignored.
- Parameters
output: output matrix.ldo: leading dimension of output.table: table matrix.ldt: leading dimension of table.ids: ids vector.numSamples: height of output.tableSize: height of table.dim: width of table.
-
void
hl_matrix_add_to_rows(real *table, int ldt, real *input, int ldi, int *ids, int numSamples, int tableSize, int dim)¶ Add row to table. table[ids[i]] += output[i] if ids[i] == -1, it will be ignored.
- Parameters
table: table matrix.ldt: leading dimension of table.input: input matrix.ldi: leading dimension of input.ids: ids vector.numSamples: height of input.tableSize: height of table.dim: width of table.
- template <class T>
-
void
hl_vector_select_from(T *dst, int sized, const T *src, int sizes, const int *ids, int sizei)¶ Select element from vector.
- Parameters
dst: output vector.sized: size of dst.src: input vector.sizes: size of src.ids: index vector.sizei: size of ids.
hl_top_k.h¶
Functions
-
void
hl_matrix_top_k(real *topVal, int ldv, int *topIds, real *src, int lds, int dim, int beamSize, int numSamples)¶ find top k element.
- Parameters
topVal: top k element.ldv: leading dimension of topVal.topIds: top k index.src: input value.lds: leading dimension of src.dim: width of input value.beamSize: beam size.numSamples: height of input value.
-
void
hl_sparse_matrix_top_k(real *topVal, int ldv, int *topIds, hl_sparse_matrix_s src, int beamSize, int numSamples)¶ find top k element for each row in sparse matrix.
- Note
- Only support HL_SPARSE_CSR format.
- Parameters
topVal: top k element.ldv: leading dimension of topVal.topIds: top k index.src: sparse matrix.beamSize: beam size.numSamples: height of input value.