Matrix¶
Base Matrix¶
hl_matrix.h¶
Functions
-
void
hl_matrix_add
(real *A_d, real *B_d, real *C_d, int dimM, int dimN, real alpha, real beta)¶ Matrix addition: C_d[i] = alpha * A_d[i] + beta * B_d[i].
- Parameters
A_d
: input matrix (M x N).B_d
: input matrix (M x N).C_d
: output matrix (M x N).dimM
: matrix height.dimN
: matrix width.alpha
: scalar used for addition.beta
: scalar used for addition.
-
void
hl_matrix_softmax
(real *A_d, real *C_d, int dimM, int dimN)¶ Matrix Softmax.
- Parameters
A_d
: input maxtrix (M x N).C_d
: output matrix (M x N).dimM
: matrix height.dimN
: matrix width.
-
void
hl_matrix_softmax_derivative
(real *grad_d, real *output_d, real *sftmaxSum_d, int dimM, int dimN)¶ Matrix softmax derivative.
- Parameters
grad_d
: intput matrix (M x N).output_d
: output matrix (M x N).sftmaxSum_d
: softmax sum (M * 1).dimM
: matrix height.dimN
: matrix width.
-
void
hl_sequence_softmax_forward
(real *A_d, real *C_d, const int *index, int numSequence)¶ Sequence softmax.
- Parameters
A_d
: input vector.C_d
: output vector.index
: start positions of sequence.numSequence
: sequence number.
-
void
hl_matrix_classification_error
(real *A_d, int *B_d, real *C_d, int dimM, int dimN)¶ Matrix classification error.
- Parameters
A_d
: input matrix (M x N).B_d
: input vector (M x 1).C_d
: output vector (M x 1).dimM
: matrix height.dimN
: matrix width.
-
void
hl_matrix_cross_entropy
(real *A_d, real *C_d, int *label_d, int dimM, int dimN)¶ Matrix cross entropy.
- Parameters
A_d
: input matrix (M x N).C_d
: output matrix (M X 1).label_d
: input matrix (M x 1).dimM
: matrix height.dimN
: matrix width.
-
void
hl_matrix_cross_entropy_bp
(real *grad_d, real *output_d, int *label_d, int dimM, int dimN)¶ Matrix cross entropy back propagation.
- Parameters
grad_d
: output matrix (M x N).output_d
: input matrix (M x N).label_d
: input vector (M x 1).dimM
: matrix height.dimN
: matrix width.
-
void
hl_matrix_multi_binary_cross_entropy
(real *output, real *entropy, hl_sparse_matrix_s mat, int dimM, int dimN)¶ Matrix multi-binary label cross entropy.
- Parameters
output
: input matrix (M x N).entropy
: output matrix (M x 1).mat
: input sparse matrix.dimM
: matrix height.dimN
: matrix width.
-
void
hl_matrix_multi_binary_cross_entropy_bp
(real *output, real *grad, hl_sparse_matrix_s mat, int dimM, int dimN)¶ Matrix multi-binary label cross entropy backprop.
- Parameters
output
: input matrix (M x N).grad
: output matrix (M x N).mat
: input sparse matrix.dimM
: matrix height.dimN
: matrix width.
-
void
hl_matrix_zero_mem
(real *data, int num)¶ Matrix zero memory.
- Parameters
data
: input data.num
: length of data.
-
void
hl_param_relu_forward
(real *output, real *input, real *w, int width, int height, int partial_sum)¶ parameter relu forward
- Parameters
output
: output datainput
: input dataw
: parameter datawidth
: matrix widthheight
: matrix heightpartial_sum
:
-
void
hl_param_relu_backward_w
(real *grad_w, real *grad_o, real *input, int width, int height, int partial_sum)¶ parameter relu backward w
- Parameters
grad_w
: w gradgrad_o
: output gradinput
: input datawidth
: matrix widthheight
: matrix heightpartial_sum
:
-
void
hl_param_relu_backward_diff
(real *grad_o, real *input, real *w, real *diff, int width, int height, int partial_sum)¶ parameter relu backward diff
- Parameters
grad_o
: output gradinput
: input dataw
: parameterdiff
: diffwidth
: matrix widthheight
: matrix heightpartial_sum
:
-
void
hl_cossim
(real *output, real *input1, real *input2, int width, int input1_height, int input2_height, real scale)¶ cos sim forward
- Parameters
output
: output datainput1
: input1 data(matrix)input2
: input2 data(matrix or vector)width
: matrix widthinput1_height
: input1_heightinput2_height
: input2_heightscale
: scale factor
-
void
hl_cossim_derivative
(real *grad, real *output, real *prevOutX, real *prevOutY, real *prevGradX, real *prevGradY, int width, int input1_height, int input2_height, real scale)¶ cos sim derivate
- Parameters
grad
: output gradoutput
: output dataprevOutX
: input1 dataprevOutY
: input2 dataprevGradX
: input1 gradprevGradY
: input2 gradwidth
: matrix widthinput1_height
: input1 heightinput2_height
: input2 heightscale
: scale factor
Matrix addition: A_d[i][j] += scale * B_d[j/channel].
- Parameters
A_d
: input matrix (M x N).B_d
: input matrix (1 x channel).channel
: width of B.dimM
: height of A.dimN
: width of A.scale
: scalar used for addition.
Matrix addition: A_d[i][j] += scale * B_d[j/channel].
- Parameters
B_d
: input matrix (1 x channel).A_d
: input matrix (M x N).channel
: width of B.dimM
: height of A.dimN
: width of A.scale
: scalar used for addition.
hl_matrix_base.h¶
-
class
BaseOp
¶ Public Functions
-
BaseOp
()¶
-
BaseOp
(const real s1)¶
-
BaseOp
(const real s1, const real s2)¶
-
INLINE vecType BaseOp::vecOp(const vecType a) const
-
INLINE vecType BaseOp::vecOp(const vecType a, const vecType b) const
Public Static Attributes
-
const bool
sse
¶
-
-
namespace
aggregate
¶ -
class
sum
¶ Inherits from aggregate::SSESum
Public Functions
-
INLINE real aggregate::sum::init()
-
INLINE real aggregate::sum::operator()(const real a, const real b) const
-
-
class
max
¶ Inherits from aggregate::SSEMax
Public Functions
-
INLINE real aggregate::max::init()
-
INLINE real aggregate::max::operator()(const real a, const real b) const
-
-
class
min
¶ Inherits from aggregate::SSEMin
Public Functions
-
INLINE real aggregate::min::init()
-
INLINE real aggregate::min::operator()(const real a, const real b) const
-
-
class
-
namespace
base
¶ -
namespace
binary
¶ -
class
add
¶ Inherits from base::binary::SSEAdd
Public Functions
-
INLINE real base::binary::add::operator()(const real a, const real b) const
-
-
class
add2
¶ Inherits from base::binary::SSEAdd2
Public Functions
-
add2
(const real s1, const real s2)¶
-
INLINE real base::binary::add2::operator()(const real a, const real b) const
-
-
class
sub
¶ Inherits from base::binary::SSESub
Public Functions
-
INLINE real base::binary::sub::operator()(const real a, const real b) const
-
-
class
mul
¶ Inherits from base::binary::SSEMul
Public Functions
-
INLINE real base::binary::mul::operator()(const real a, const real b) const
-
-
class
div
¶ Inherits from base::binary::SSEDiv
Public Functions
-
INLINE real base::binary::div::operator()(const real a, const real b) const
-
-
class
squaredDiff
¶ Inherits from base::binary::SSESquaredDiff
Public Functions
-
INLINE real base::binary::squaredDiff::operator()(const real a, const real b) const
-
-
class
first
¶ Inherits from base::binary::SSEFirst
Public Functions
-
INLINE real base::binary::first::operator()(const real a, const real b) const
-
-
class
second
¶ Inherits from base::binary::SSESecond
Public Functions
-
INLINE real base::binary::second::operator()(const real a, const real b) const
-
-
class
-
namespace
hl_matrix_apply.cuh¶
Defines
-
HL_MATRIX_APPLY_H_
¶
Functions
- template <class T, class Op>
-
void
hl_cpu_apply_unary_op
(Op op, T *A_h, int dimM, int dimN, int lda)¶ CPU element wise unary operator.
element wise op(a) for 0 <= i < dimM & for 0 <= j < dimN.
CPU element wise unary operator.
- Parameters
op
: unary op. see namespace unaryA_h
: matrix.dimM
: matrix height.dimN
: matrix width.lda
: leading dimension of A.
- template <class T, class Op, bool BAsRowVector, bool BAsColVector>
-
void
hl_cpu_apply_binary_op
(Op op, T *A_h, T *B_h, int dimM, int dimN, int lda, int ldb)¶ CPU element wise binary operator.
element wise op(a, b) for 0 <= i < dimM & for 0 <= j < dimN.
if (BAsRowVector == 0 && BAsColVector == 0) op(A[i * lda + j], B[i * ldb + j])
if (BAsRowVector == 1 && BAsColVector == 0) op(A[i * lda + j], B[j])
if (BAsRowVector == 0 && BAsColVector == 1) op(A[i * lda + j], B[i * ldb])
if (BAsRowVector == 1 && BAsColVector == 1) op(A[i * lda + j], B[0])
CPU element wise binary operator.
- Parameters
op
: binary op. see namespace binary.A_h
: matrix.B_h
: matrix.dimM
: matrix height.dimN
: matrix width.lda
: leading dimension of A.ldb
: leading dimension of B.
- template <class T, class Op, bool CAsRowVector, bool CAsColVector>
-
void
hl_cpu_apply_ternary_op
(Op op, T *A_h, T *B_h, T *C_h, int dimM, int dimN, int lda, int ldb, int ldc)¶ CPU element wise ternary operator.
element wise op(a, b, c) for 0 <= i < dimM & for 0 <= j < dimN.
if (CAsRowVector == 0 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j])
if (CAsRowVector == 1 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[j])
if (CAsRowVector == 0 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[i*ldc])
if (CAsRowVector == 1 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[0])
CPU element wise ternary operator.
- Parameters
op
: ternary op. see namespace ternary.A_h
: matrix.B_h
: matrix.C_h
: matrix.dimM
: matrix height.dimN
: matrix width.lda
: leading dimension of A.ldb
: leading dimension of B.ldc
: leading dimension of C.
- template <class T, class Op>
-
void
hl_cpu_apply_quaternary_op
(Op op, T *A_h, T *B_h, T *C_h, T *D_h, int dimM, int dimN, int lda, int ldb, int ldc, int ldd)¶ CPU element wise quaternary operator. element wise op(a, b, c, d) for 0 <= i < dimM & for 0 <= j < dimN.
CPU element wise quaternary operator. element wise op(a, b, c, d) for 0 <= i < dimM & for 0 <= j < dimN.
- Parameters
op
: quaternary op. see namespace ternary.A_h
: matrix.B_h
: matrix.C_h
: matrix.D_h
: matrix.dimM
: matrix height.dimN
: matrix width.lda
: leading dimension of A.ldb
: leading dimension of B.ldc
: leading dimension of C.ldd
: leading dimension of D.
- template <class T, class Op>
-
void
hl_gpu_apply_unary_op
(Op op, T *A_d, int dimM, int dimN, int lda)¶ GPU element wise unary operator. element wise op(a) for 0 <= i < dimM & for 0 <= j < dimN.
- Parameters
op
: unary op. see namespace unary.A_d
: matrix.dimM
: matrix height.dimN
: matrix width.lda
: leading dimension of A.
- template <class T, class Op, bool BAsRowVector, bool BAsColVector>
-
void
hl_gpu_apply_binary_op
(Op op, T *A_d, T *B_d, int dimM, int dimN, int lda, int ldb)¶ GPU element wise binary operator.
element wise op(a, b) for 0 <= i < dimM & for 0 <= j < dimN
if (BAsRowVector == 0 && BAsColVector == 0) op(A[i * lda + j], B[i * ldb + j])
if (BAsRowVector == 1 && BAsColVector == 0) op(A[i * lda + j], B[j])
if (BAsRowVector == 0 && BAsColVector == 1) op(A[i * lda + j], B[i * ldb])
if (BAsRowVector == 1 && BAsColVector == 1) op(A[i * lda + j], B[0])
- Parameters
op
: binary op. see namespace binary.A_d
: matrix.B_d
: matrix.dimM
: matrix height.dimN
: matrix width.lda
: leading dimension of A.ldb
: leading dimension of B.
- template <class T, class Op, bool CAsRowVector, bool CAsColVector>
-
void
hl_gpu_apply_ternary_op
(Op op, T *A_d, T *B_d, T *C_d, int dimM, int dimN, int lda, int ldb, int ldc)¶ GPU element wise ternary operator.
element wise op(a, b, c) for 0 <= i < dimM & for 0 <= j < dimN.
if (CAsRowVector == 0 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j])
if (CAsRowVector == 1 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[j])
if (CAsRowVector == 0 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[i*ldc])
if (CAsRowVector == 1 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[0])
- Parameters
op
: ternary op. see namespace ternary.A_d
: matrix.B_d
: matrix.C_d
: matrix.dimM
: matrix height.dimN
: matrix width.lda
: leading dimension of A.ldb
: leading dimension of B.ldc
: leading dimension of C.
- template <class T, class Op>
-
void
hl_gpu_apply_quaternary_op
(Op op, T *A_d, T *B_d, T *C_d, T *D_d, int dimM, int dimN, int lda, int ldb, int ldc, int ldd)¶ GPU element wise quaternary operator. element wise op(a, b, c, d) for 0 <= i < dimM & for 0 <= j < dimN.
- Parameters
op
: quaternary op. see namespace ternary.A_d
: matrix.B_d
: matrix.C_d
: matrix.D_d
: matrix.dimM
: matrix height.dimN
: matrix width.lda
: leading dimension of A.ldb
: leading dimension of B.ldc
: leading dimension of C.ldd
: leading dimension of D.
- template <class Agg, class Op, class Saver>
-
void
hl_cpu_matrix_row_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda)¶ CPU matrix row operator.
- template <class Saver, class Agg, class Op>
-
void
hl_cpu_matrix_row_op
(Agg agg, Op op, int dimM, int dimN, real *dst, int ld, real *A, int lda, real *B, int ldb)¶ CPU matrix row operator.
- Parameters
agg
: aggregate operator expression.op
: operator expression.dimM
: matrix height.dimN
: matrix width.dst
: destination matrix.ld
: leading dimension of dst matrix.*A
: matrix A.lda
: leading dimension of matrix A.*B
: matrix B.ldb
: leading dimension of matrix B.
- template <class Agg, class Op, class Saver>
-
void
hl_cpu_matrix_column_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶ CPU matrix column operator.
- Parameters
agg
: aggregate operator expression.op
: operator expression.sv
: assignment operator expression.dimM
: matrix height.dimN
: matrix width.dst
: destination matrix.*A
: matrix A.lda
: leading dimension of matrix A.
- template <class Agg, class Op, class Saver>
-
void
hl_cpu_matrix_column_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶ CPU matrix column operator.
- Parameters
agg
: aggregate operator expression.op
: operator expression.sv
: assignment operator expression.dimM
: matrix height.dimN
: matrix width.dst
: destination matrix.*A
: matrix A.lda
: leading dimension of matrix A.*B
: matrix B.ldb
: leading dimension of matrix B.
- template <class Agg, class Op, class Saver>
-
void
hl_gpu_matrix_row_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda)¶ GPU matrix row operator.
- Parameters
agg
: aggregate operator expression.op
: operator expression.sv
: assignment operator expression.dimM
: matrix height.dimN
: matrix width.dst
: destination matrix.ld
: leading dimension of dst.*A
: matrix A.lda
: leading dimension of matrix A.
- template <class Saver, class Agg, class Op>
-
void
hl_gpu_matrix_row_op
(Agg agg, Op op, int dimM, int dimN, real *dst, int ld, real *A, int lda, real *B, int ldb)¶ GPU matrix row operator.
- Parameters
agg
: aggregate operator expression.op
: operator expression.dimM
: matrix height.dimN
: matrix width.dst
: destination matrix.ld
: leading dimension of dst matrix.*A
: matrix A.lda
: leading dimension of matrix A.*B
: matrix B.ldb
: leading dimension of matrix B.
- template <class Agg, class Op, class Saver>
-
void
hl_gpu_matrix_column_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶ GPU matrix column operator.
- Parameters
agg
: aggregate operator expression.op
: operator expression.sv
: assignment operator expression.dimM
: matrix height.dimN
: matrix width.dst
: destination matrix.*A
: matrix A.lda
: leading dimension of matrix A.
- template <class Agg, class Op, class Saver>
-
void
hl_gpu_matrix_column_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶ GPU matrix column operator.
- Parameters
agg
: aggregate operator expression.op
: operator expression.sv
: assignment operator expression.dimM
: matrix height.dimN
: matrix width.dst
: destination matrix.*A
: matrix A.lda
: leading dimension of matrix A.*B
: matrix B.ldb
: leading dimension of matrix B.
hl_matrix_ops.cuh¶
Defines
-
HL_MATRIX_OPS_CUH_
¶
-
HL_DEVICE
¶
-
ONE_PARAMETER
(name)¶ parameter macro.
-
TWO_PARAMETER
(name)¶
-
THREE_PARAMETER
(name)¶
-
FOUR_PARAMETER
(name)¶
-
DEFINE_MATRIX_UNARY_OP
(name, op)¶ unary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b
- See
hl_gpu_apply_unary_op
hl_cpu_apply_unary_op
- Parameters
name
: operator name.op
: operator expression.
-
DEFINE_MATRIX_UNARY_PARAMETER_OP
(name, PARA_MACRO, op)¶ unary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b
- See
hl_gpu_apply_unary_op
hl_cpu_apply_unary_op
- Parameters
name
: operator name.PARA_MACRO
: parameter macro.op
: operator expression.
-
DEFINE_MATRIX_BINARY_OP
(name, op)¶ binary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b
- See
hl_gpu_apply_unary_op
hl_cpu_apply_unary_op
- Parameters
name
: operator name.op
: operator expression.
-
DEFINE_MATRIX_BINARY_PARAMETER_OP
(name, PARA_MACRO, op)¶ binary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b
- See
hl_gpu_apply_binary_op
hl_cpu_apply_binary_op
- Parameters
name
: operator name.PARA_MACRO
: parameter macro.op
: operator expression.
-
DEFINE_MATRIX_TERNARY_OP
(name, op)¶ ternary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c
- See
hl_gpu_apply_ternary_op
hl_cpu_apply_ternary_op
- Parameters
name
: operator name.op
: operator expression.
-
DEFINE_MATRIX_TERNARY_PARAMETER_OP
(name, PARA_MACRO, op)¶ ternary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c
- See
hl_gpu_apply_ternary_op
hl_cpu_apply_ternary_op
- Parameters
name
: operator name.PARA_MACRO
: parameter macro.op
: operator expression.
-
DEFINE_MATRIX_QUATERNARY_OP
(name, op)¶ quaternary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c, d
- See
hl_gpu_apply_quaternary_op
hl_cpu_apply_quaternary_op
- Parameters
name
: operator name.op
: operator expression.
-
DEFINE_MATRIX_QUATERNARY_PARAMETER_OP
(name, PARA_MACRO, op)¶ quaternary operator macro.
- Note
- op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c, d
- See
hl_gpu_apply_quaternary_op
hl_cpu_apply_quaternary_op
- Parameters
name
: operator name.PARA_MACRO
: parameter macro.op
: operator expression.
hl_sse_matrix_kernel.cuh¶
Functions
-
bool
hl_check_align
(size_t size)¶
-
bool
hl_check_align
(void *ptr)¶
- template <class Agg, class Op, class Saver>
-
void
hl_sse_matrix_row_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda)¶
- template <class Agg, class Op, class Saver>
-
void
hl_sse_matrix_row_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda, real *B, int ldb)¶
- template <class Agg, class Op, class Saver>
-
void
hl_matrix_column_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶
- template <class Agg, class Op, class Saver>
-
void
hl_matrix_column_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶
- template <int MaxRow, class Agg, class Op, class Saver>
-
void
hl_sse_column_op_with_rem
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶
- template <int Step, class Agg, class Op, class Saver>
-
void
hl_sse_matrix_column_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶
- template <class Agg, class Op, class Saver>
-
void
hl_sse_matrix_column_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶
- template <int MaxRow, class Agg, class Op, class Saver>
-
void
hl_sse_column_op_with_rem
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶
- template <int Step, class Agg, class Op, class Saver>
-
void
hl_sse_matrix_column_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶
- template <class Agg, class Op, class Saver>
-
void
hl_sse_matrix_column_op
(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶
hl_batch_transpose.h¶
Functions
-
void
batchTranspose
(const real *input, real *output, int width, int height, int batchSize)¶ Perform matrix transpose for each data in the batch.
- Note
- Both the inpt and output are arranged in batch-first order. Each batch has height * width data, which are arranged in height-first (or row-first) manner.
- Parameters
input
: height * width elements in batch.output
: height * width elements in batch.width
: width of batch data.height
: height of batch data.batchSize
: batch size
Sparse Matrix¶
hl_sparse.h¶
Functions
-
void
hl_malloc_sparse_matrix
(hl_sparse_matrix_s *A_d, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶ Malloc a sparse matrix.
- Parameters
A_d
: sparse matrix.format
: format.value_type
: valueType.dimM
: height.dimN
: width.nnz
: number of none zero element.
-
void
hl_free_sparse_matrix
(hl_sparse_matrix_s A_d)¶ Free a sparse matrix.
- Parameters
A_d
: GPU sparse matrix.
-
void
hl_construct_sparse_matrix
(hl_sparse_matrix_s *A_d, void *dest_d, size_t size, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶ Construct a sparse matrix use input gpu memory.
- Note
- Destruct api is hl_destruct_sparse_matrix.
- Parameters
A_d
: sparse matrix.dest_d
: gpu memory.size
: size of dest_d.format
: format.value_type
: valueType.dimM
: height.dimN
: width.nnz
: number of none zero element.
-
void
hl_construct_sparse_matrix
(hl_sparse_matrix_s *A_d, real *value_d, int *rows_d, int *cols_d, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶ Use three arrays to construct sparse matrix.
if format is HL_SPARSE_CSR, size of rows_d is dimM + 1, and size of cols_d is nnz;
if format is HL_SPARSE_CSC, size of rows_d is nnz, and size of cols_d is dimN + 1.
if valueType is HL_NO_VALUE, size of value_d is zero, else size of value_d is nnz.
- Note
- The corresponding destructor interface is hl_destruct_sparse_matrix.
- Parameters
A_d
: sparse matrix.value_d
: value.rows_d
: row.cols_d
: col.format
: format.value_type
: valueType.dimM
: height.dimN
: width.nnz
: number of none zero element.
-
void
hl_destruct_sparse_matrix
(hl_sparse_matrix_s A_d)¶ Destruct sparse matrix.
- Parameters
A_d
: sparse matrix.
-
void
hl_memcpy_csr_matrix
(hl_sparse_matrix_s csr_matrix, real *csr_val, int *csr_row, int *csr_col, hl_stream_t stream)¶ Copy value & index to sparse matrix.
if csr_matrix is HL_FLOAT_VALUE.
- csr_val, csr_row, csr_col three pointers are not null.
- csr_val is not null, csr_row adn csr_col are null.
if csr_matrix is HL_NO_VALUE.
- csr_val will be ignore, csr_row and csr_col are not null.
- Parameters
csr_matrix
: sparse matrix.csr_val
: point to csr value array(nnz).csr_row
: point to csr row indices array(dimM+1).csr_col
: point to csr col indices array(nnz).stream
: hl_stream_t type.
-
void
hl_memcpy_csc_matrix
(hl_sparse_matrix_s csc_matrix, real *csc_val, int *csc_row, int *csc_col, hl_stream_t stream)¶ Copy value & index to sparse matrix.
if csr_matrix is HL_FLOAT_VALUE.
- csc_val, csc_row, csc_col three pointers are not null.
- csc_val is not null, csc_row and csc_col are null.
if csr_matrix is HL_NO_VALUE.
- csc_val will be ignore, csc_row and csc_col are not null.
- Parameters
csc_matrix
: sparse matrix.csc_val
: point to csc value array(nnz).csc_row
: point to csc row indices array(nnz).csc_col
: point to csc col indices array(dimN+1).stream
: hl_stream_t type.
-
void
hl_memcpy_sparse_matrix
(hl_sparse_matrix_s dst, hl_sparse_matrix_s src, hl_stream_t stream)¶ Copy sparse matrix to sparse matrix.
- Note
- 1. Format of the src matrix and dst matrix needs to be consistent.
- Source matrix has value, the destination matrix has value or no value can be; the source matrix is no value, then the destination matrix must also be no value;
- Parameters
dst
: sparse matrix.src
: sparse matrix.stream
: hl_stream_t type.
-
void
hl_matrix_csr2dense
(hl_sparse_matrix_s A_d, real *C_d, int dimM, int dimN)¶ csr matrix to dense matrix.
- Parameters
A_d
: csr matrix.C_d
: dense matrix.dimM
: height.dimN
: width.
-
void
hl_matrix_csc2dense
(hl_sparse_matrix_s A_d, real *C_d, int dimM, int dimN)¶ csc matrix to dense matrix.
- Parameters
A_d
: csc matrix.C_d
: dense matrix.dimM
: height.dimN
: width.
-
void
hl_matrix_csr_mul_dense
(hl_sparse_matrix_s A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transb is not support HPPL_OP_T.
- Parameters
A_d
: csr sparse matrix.transa
: operation op(A) that is non-or transpose.B_d
: dense matrix.transb
: operation op(B) that is non-or transpose.C_d
: dense matrix.dimM
: matrix height of op(A) & CdimN
: matrix width of op(B) & CdimK
: width of op(A) & height of op(B)alpha
: scalar used for multiplication.beta
: scalar used for multiplication. If beta is zero, C does not have to be a valid input.
-
void
hl_matrix_csc_mul_dense
(hl_sparse_matrix_s A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transb is not support HPPL_OP_T.
- Parameters
A_d
: sparse matrix.transa
: operation op(A) that is non-or transpose.B_d
: dense matrix.transb
: operation op(B) that is non-or transpose.C_d
: dense matrix.dimM
: matrix height of op(A) & CdimN
: matrix width of op(B) & CdimK
: width of op(A) & height of op(B)alpha
: scalar used for multiplication.beta
: scalar used for multiplication. If beta is zero, C does not have to be a valid input.
-
void
hl_matrix_dense_mul_csc
(real *A_d, hl_trans_op_t transa, hl_sparse_matrix_s B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transa is not support HPPL_OP_T.
- Parameters
A_d
: dense matrix.transa
: operation op(A) that is non-or transpose.B_d
: csc sparse matrix.transb
: operation op(B) that is non-or transpose.C_d
: dense matrix.dimM
: matrix height of op(A) & CdimN
: matrix width of op(B) & CdimK
: width of op(A) & height of op(B)alpha
: scalar used for multiplication.beta
: scalar used for multiplication. If beta is zero, C does not have to be a valid input.
-
void
hl_sparse_matrix_mul
(real *A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, hl_sparse_matrix_s C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d. Calculated based on the non-zero elements of the matrix C.
- Note
- transb is not support HPPL_OP_T.
- Parameters
A_d
: dense matrix.transa
: operation op(A) that is non-or transpose.B_d
: dense matrix.transb
: operation op(B) that is non-or transpose.C_d
: sparse matrix.dimM
: matrix height of op(A) & CdimN
: matrix width of op(B) & CdimK
: width of op(A) & height of op(B)alpha
: scalar used for multiplication.beta
: scalar used for multiplication.
-
void
hl_matrix_dense_mul_csr
(real *A_d, hl_trans_op_t transa, hl_sparse_matrix_s B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶ C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
- Note
- transa is not support HPPL_OP_T.
- Parameters
A_d
: dense matrix.transa
: operation op(A) that is non-or transpose.B_d
: sparse matrix.transb
: operation op(B) that is non-or transpose.C_d
: dense matrix.dimM
: matrix height of op(A) & CdimN
: matrix width of op(B) & CdimK
: width of op(A) & height of op(B)alpha
: scalar used for multiplication.beta
: scalar used for multiplication. If beta is zero, C does not have to be a valid input.
-
void
hl_memcpy_from_csc_matrix
(real *csc_val, size_t val_size, int *csc_row, size_t row_size, int *csc_col, size_t col_size, hl_sparse_matrix_s csc_matrix, hl_stream_t stream)¶ Memcpy csc_matrix to host.
a. according to csc_matrix, update three arrays
- csc_val, csc_row, csc_col are dest Address.
- if type of csc_matrix is HL_NO_VALUE, update csc_row and csc_col
- if type of csc_matrix is HL_FLOAT_VALUE, update csc_row, csc_col and csc_value.
b. The interface is asynchronous copy. To ensure that the data is copied please call the synchronous interface;
- Parameters
csc_val
: point to csc value array(nnz).val_size
: csc value size.csc_row
: point to csc row indices array(nnz).row_size
: csc row size.csc_col
: point to csc col indices array(dimN + 1).col_size
: csc column size.csc_matrix
: sparse matrix.stream
: hl_stream_t type.
-
void
hl_memcpy_from_csr_matrix
(real *csr_val, size_t val_size, int *csr_row, size_t row_size, int *csr_col, size_t col_size, hl_sparse_matrix_s csr_matrix, hl_stream_t stream)¶ Memcpy sparse matrix to host.
a. according to csr_matrix, update three arrays
- csr_val, csr_row, csr_col are dest Address.
- if type of csr_matrix is HL_NO_VALUE, update csr_row and csr_col
- if type of csr_matrix is HL_FLOAT_VALUE, update csr_row, csr_col and csr_value
b. The interface is asynchronous copy. To ensure that the data is copied please call the synchronous interface;
- Parameters
csr_val
: point to csr value array(nnz).val_size
: csr value size.csr_row
: point to csr row indices array(nnz).row_size
: csr row size.csr_col
: point to csr col indices array(dimN + 1).col_size
: csr column size.csr_matrix
: sparse matrix.stream
: hl_stream_t type.
-
void
hl_sparse_matrix_column_sum
(real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale)¶ A_d[j] += B_d[i,j] for i in range(height)
- Parameters
A_d
: vector, size = width.B_d
: sparse matrix.dimM
: height.dimN
: width.scale
: scale of B_d
-
void
hl_matrix_csr_column_sum
(real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale)¶ implementation of csr sparse matrix in hl_sparse_matirx_column_sum
-
void
hl_sparse_matrix_add_bias
(hl_sparse_matrix_s A_d, real *B_d, real scale)¶ A_d[i,j] += B_d[j].
- Parameters
A_d
: sprare matrix.B_d
: vector, size = A_d.width.scale
: scale of B_d.
-
void
hl_matrix_csr_add_bias
(hl_sparse_matrix_s A_d, real *B_d, real scale)¶ implementation of csr sparse matrix in hl_sparse_matrix_add_bias
-
void
hl_sparse_matrix_add_dense
(hl_sparse_matrix_s A_d, real *B_d, int dimM, int dimN, real alpha, real beta)¶ sparseMatrix = alpha * denseMatrix + beta *sparseMatrix A_d[i,j] = alpha * B_d[i,j] + beta * A_d[i,j] Only add value of same (row, col) index in dense matrix and do not use others values whoes postions are not in sparse matirx.
- Parameters
A_d
: sprare matrix.B_d
: dense matrix.dimM
: height of B_d.dimN
: width of B_d.alpha
: scale of B_d.beta
: scale of A_d.
-
void
hl_matrix_csr_add_dense
(hl_sparse_matrix_s A_d, real *B_d, int dimM, int dimN, real alpha, real beta)¶ implementation of csr sparse matrix in hl_sparse_matrix_add_dense
-
int *
hl_sparse_matrix_get_rows
(hl_sparse_matrix_s sMat)¶ get rows pionter of GpuSparseMatrix
- Return
- return rows pointer, which is gpu address
- Parameters
sMat
: sparse matrix
-
int *
hl_sparse_matrix_get_cols
(hl_sparse_matrix_s sMat)¶ get cols pionter of GpuSparseMatrix
- Return
- return cols pointer, which is gpu address
- Parameters
sMat
: sparse matrix
-
real *
hl_sparse_matrix_get_value
(hl_sparse_matrix_s sMat)¶ get value pionter of GpuSparseMatrix
- Return
- return value pointer, which is gpu address
- Parameters
sMat
: sparse matrix
hl_sparse.ph¶
Defines
-
HL_SPARSE_PH_
¶
-
__sparse_get_type_return__
(mat, type, field)¶
-
__sparse_get_return__
(mat, field)¶
Typedefs
-
typedef struct _hl_csr_matrix *
hl_csr_matrix
¶
-
typedef struct _hl_csc_matrix *
hl_csc_matrix
¶
-
struct
_hl_csr_matrix
¶ sparse matrix csr format.
- Parameters
*csr_val
: nonzero values of matrix.*csr_row
: row indices.*csr_col
: column indices.nnz_s
: sizeof of csr_val & csr_col.row_s
: sizeof of csr_row.sparsity
: sparsity pattern.
-
struct
_hl_csc_matrix
¶ sparse matrix csc format.
- Parameters
*csc_val
: nonzero values of matrix.*csc_row
: row indices.*csc_col
: column indices.nnz_s
: sizeof of csc_val & csc_row.col_s
: sizeof of csc_col.sparsity
: sparsity pattern.
Others¶
hl_aggregate.h¶
Functions
-
void
hl_matrix_row_sum
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the sum of each row of the matrix A_d.
- Parameters
A_d
: input matrix (M x N).C_d
: output matrix (M x 1).dimM
: matrix height.dimN
: matrix width.
-
void
hl_matrix_row_max
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the maximum value of each row of the matrix A_d.
- Parameters
A_d
: input matrix (M x N).C_d
: output matrix (M x 1).dimM
: matrix height.dimN
: matrix width.
-
void
hl_matrix_row_min
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the minimum value of each row of the matrix A_d.
- Parameters
A_d
: input matrix (M x N).C_d
: output matrix (M x 1).dimM
: matrix height.dimN
: matrix width.
-
void
hl_matrix_column_sum
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the sum of each column of the matrix A_d.
- Parameters
A_d
: input matrix (M x N).C_d
: output Matrix (1 x N).dimM
: matrix height.dimN
: matrix width.
-
void
hl_matrix_column_max
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the maximum value of each column of the matrix A_d.
- Parameters
A_d
: input matrix (M x N).C_d
: output matrix (1 x N).dimM
: matrix height.dimN
: matrix width.
-
void
hl_matrix_column_min
(real *A_d, real *C_d, int dimM, int dimN)¶ Calculate the minimum value of each column of the matrix A_d.
- Parameters
A_d
: input matrix (M x N).C_d
: output matrix (1 x N).dimM
: matrix height.dimN
: matrix width.
-
void
hl_vector_sum
(real *A_d, real *C_h, int dimM)¶ C_h = sum(A_d[i]).
- Parameters
A_d
: input(m).C_h
: output(host memory).dimM
: size of vector.
-
void
hl_vector_abs_sum
(real *A_d, real *C_h, int dimM)¶ C_h = sum(abs(A_d[i])).
- Parameters
A_d
: input(m).C_h
: output(host memory).dimM
: size of vector.
hl_table_apply.h¶
Functions
-
void
hl_matrix_select_rows
(real *output, int ldo, real *table, int ldt, int *ids, int numSamples, int tableSize, int dim)¶ Get row from table. output[i] += table[ids[i]] if ids[i] == -1, it will be ignored.
- Parameters
output
: output matrix.ldo
: leading dimension of output.table
: table matrix.ldt
: leading dimension of table.ids
: ids vector.numSamples
: height of output.tableSize
: height of table.dim
: width of table.
-
void
hl_matrix_add_to_rows
(real *table, int ldt, real *input, int ldi, int *ids, int numSamples, int tableSize, int dim)¶ Add row to table. table[ids[i]] += output[i] if ids[i] == -1, it will be ignored.
- Parameters
table
: table matrix.ldt
: leading dimension of table.input
: input matrix.ldi
: leading dimension of input.ids
: ids vector.numSamples
: height of input.tableSize
: height of table.dim
: width of table.
- template <class T>
-
void
hl_vector_select_from
(T *dst, int sized, const T *src, int sizes, const int *ids, int sizei)¶ Select element from vector.
- Parameters
dst
: output vector.sized
: size of dst.src
: input vector.sizes
: size of src.ids
: index vector.sizei
: size of ids.
hl_top_k.h¶
Functions
-
void
hl_matrix_top_k
(real *topVal, int ldv, int *topIds, real *src, int lds, int dim, int beamSize, int numSamples)¶ find top k element.
- Parameters
topVal
: top k element.ldv
: leading dimension of topVal.topIds
: top k index.src
: input value.lds
: leading dimension of src.dim
: width of input value.beamSize
: beam size.numSamples
: height of input value.
-
void
hl_sparse_matrix_top_k
(real *topVal, int ldv, int *topIds, hl_sparse_matrix_s src, int beamSize, int numSamples)¶ find top k element for each row in sparse matrix.
- Note
- Only support HL_SPARSE_CSR format.
- Parameters
topVal
: top k element.ldv
: leading dimension of topVal.topIds
: top k index.src
: sparse matrix.beamSize
: beam size.numSamples
: height of input value.