Matrix¶

Base Matrix¶

hl_matrix.h¶

Functions

void hl_matrix_add(real *A_d, real *B_d, real *C_d, int dimM, int dimN, real alpha, real beta)¶

Matrix addition: C_d[i] = alpha * A_d[i] + beta * B_d[i].

Parameters

A_d: input matrix (M x N).
B_d: input matrix (M x N).
C_d: output matrix (M x N).
dimM: matrix height.
dimN: matrix width.
alpha: scalar used for addition.
beta: scalar used for addition.

void hl_matrix_softmax(real *A_d, real *C_d, int dimM, int dimN)¶

Matrix Softmax.

Parameters

A_d: input maxtrix (M x N).
C_d: output matrix (M x N).
dimM: matrix height.
dimN: matrix width.

void hl_matrix_softmax_derivative(real *grad_d, real *output_d, real *sftmaxSum_d, int dimM, int dimN)¶

Matrix softmax derivative.

Parameters

grad_d: intput matrix (M x N).
output_d: output matrix (M x N).
sftmaxSum_d: softmax sum (M * 1).
dimM: matrix height.
dimN: matrix width.

void hl_sequence_softmax_forward(real *A_d, real *C_d, const int *index, int numSequence)¶

Sequence softmax.

Parameters

A_d: input vector.
C_d: output vector.
index: start positions of sequence.
numSequence: sequence number.

void hl_matrix_classification_error(real *A_d, int *B_d, real *C_d, int dimM, int dimN)¶

Matrix classification error.

Parameters

A_d: input matrix (M x N).
B_d: input vector (M x 1).
C_d: output vector (M x 1).
dimM: matrix height.
dimN: matrix width.

void hl_matrix_cross_entropy(real *A_d, real *C_d, int *label_d, int dimM, int dimN)¶

Matrix cross entropy.

Parameters

A_d: input matrix (M x N).
C_d: output matrix (M X 1).
label_d: input matrix (M x 1).
dimM: matrix height.
dimN: matrix width.

void hl_matrix_cross_entropy_bp(real *grad_d, real *output_d, int *label_d, int dimM, int dimN)¶

Matrix cross entropy back propagation.

Parameters

grad_d: output matrix (M x N).
output_d: input matrix (M x N).
label_d: input vector (M x 1).
dimM: matrix height.
dimN: matrix width.

void hl_matrix_multi_binary_cross_entropy(real *output, real *entropy, hl_sparse_matrix_s mat, int dimM, int dimN)¶

Matrix multi-binary label cross entropy.

Parameters

output: input matrix (M x N).
entropy: output matrix (M x 1).
mat: input sparse matrix.
dimM: matrix height.
dimN: matrix width.

void hl_matrix_multi_binary_cross_entropy_bp(real *output, real *grad, hl_sparse_matrix_s mat, int dimM, int dimN)¶

Matrix multi-binary label cross entropy backprop.

Parameters

output: input matrix (M x N).
grad: output matrix (M x N).
mat: input sparse matrix.
dimM: matrix height.
dimN: matrix width.

void hl_matrix_zero_mem(real *data, int num)¶

Matrix zero memory.

Parameters

data: input data.
num: length of data.

void hl_param_relu_forward(real *output, real *input, real *w, int width, int height, int partial_sum)¶

parameter relu forward

Parameters

output: output data
input: input data
w: parameter data
width: matrix width
height: matrix height
partial_sum:

void hl_param_relu_backward_w(real *grad_w, real *grad_o, real *input, int width, int height, int partial_sum)¶

parameter relu backward w

Parameters

grad_w: w grad
grad_o: output grad
input: input data
width: matrix width
height: matrix height
partial_sum:

void hl_param_relu_backward_diff(real *grad_o, real *input, real *w, real *diff, int width, int height, int partial_sum)¶

parameter relu backward diff

Parameters

grad_o: output grad
input: input data
w: parameter
diff: diff
width: matrix width
height: matrix height
partial_sum:

void hl_cossim(real *output, real *input1, real *input2, int width, int input1_height, int input2_height, real scale)¶

cos sim forward

Parameters

output: output data
input1: input1 data(matrix)
input2: input2 data(matrix or vector)
width: matrix width
input1_height: input1_height
input2_height: input2_height
scale: scale factor

void hl_cossim_derivative(real *grad, real *output, real *prevOutX, real *prevOutY, real *prevGradX, real *prevGradY, int width, int input1_height, int input2_height, real scale)¶

cos sim derivate

Parameters

grad: output grad
output: output data
prevOutX: input1 data
prevOutY: input2 data
prevGradX: input1 grad
prevGradY: input2 grad
width: matrix width
input1_height: input1 height
input2_height: input2 height
scale: scale factor

void hl_matrix_add_shared_bias(real *A_d, real *B_d, const int channel, const int dimM, const int dimN, real scale)¶

Matrix addition: A_d[i][j] += scale * B_d[j/channel].

Parameters

A_d: input matrix (M x N).
B_d: input matrix (1 x channel).
channel: width of B.
dimM: height of A.
dimN: width of A.
scale: scalar used for addition.

void hl_matrix_collect_shared_bias(real *B_d, real *A_d, const int channel, const int dimM, const int dimN, real scale)¶

Matrix addition: A_d[i][j] += scale * B_d[j/channel].

Parameters

B_d: input matrix (1 x channel).
A_d: input matrix (M x N).
channel: width of B.
dimM: height of A.
dimN: width of A.
scale: scalar used for addition.

hl_matrix_base.h¶

Defines

HL_MATRIX_BASE_CUH_¶

INLINE¶: CPP inline function

DEVICE_FMAX¶

DEVICE_FMIN¶

class BaseOp¶

Public Functions

BaseOp()¶

BaseOp(const real s1)¶

BaseOp(const real s1, const real s2)¶

INLINE vecType BaseOp::vecOp(const vecType a) const

INLINE vecType BaseOp::vecOp(const vecType a, const vecType b) const

Public Static Attributes

const bool sse¶

namespace aggregate¶

class sum¶

Inherits from aggregate::SSESum

Public Functions

INLINE real aggregate::sum::init()

INLINE real aggregate::sum::operator()(const real a, const real b) const

class max¶

Inherits from aggregate::SSEMax

Public Functions

INLINE real aggregate::max::init()

INLINE real aggregate::max::operator()(const real a, const real b) const

class min¶

Inherits from aggregate::SSEMin

Public Functions

INLINE real aggregate::min::init()

INLINE real aggregate::min::operator()(const real a, const real b) const

namespace base¶

namespace binary¶

class add¶

Inherits from base::binary::SSEAdd

Public Functions

INLINE real base::binary::add::operator()(const real a, const real b) const

class add2¶

Inherits from base::binary::SSEAdd2

Public Functions

add2(const real s1, const real s2)¶

INLINE real base::binary::add2::operator()(const real a, const real b) const

Private Members

const real p1¶

const real p2¶

class sub¶

Inherits from base::binary::SSESub

Public Functions

INLINE real base::binary::sub::operator()(const real a, const real b) const

class mul¶

Inherits from base::binary::SSEMul

Public Functions

INLINE real base::binary::mul::operator()(const real a, const real b) const

class div¶

Inherits from base::binary::SSEDiv

Public Functions

INLINE real base::binary::div::operator()(const real a, const real b) const

class squaredDiff¶

Inherits from base::binary::SSESquaredDiff

Public Functions

INLINE real base::binary::squaredDiff::operator()(const real a, const real b) const

class first¶

Inherits from base::binary::SSEFirst

Public Functions

INLINE real base::binary::first::operator()(const real a, const real b) const

class second¶

Inherits from base::binary::SSESecond

Public Functions

INLINE real base::binary::second::operator()(const real a, const real b) const

class classificationError¶

Inherits from base::binary::SSEClassificationError

Public Functions

classificationError(const real s)¶

INLINE real base::binary::classificationError::operator()(const real a, const real b) const

Private Members

const real p¶

namespace unary¶

class identity¶

Inherits from base::unary::SSEIdentity

Public Functions

INLINE real base::unary::identity::operator()(const real a) const

hl_matrix_apply.cuh¶

Defines

HL_MATRIX_APPLY_H_¶

Functions

template <class T, class Op>

void hl_cpu_apply_unary_op(Op op, T *A_h, int dimM, int dimN, int lda)¶

CPU element wise unary operator.

element wise op(a) for 0 <= i < dimM & for 0 <= j < dimN.

CPU element wise unary operator.

Parameters

op: unary op. see namespace unary
A_h: matrix.
dimM: matrix height.
dimN: matrix width.
lda: leading dimension of A.

template <class T, class Op, bool BAsRowVector, bool BAsColVector>

void hl_cpu_apply_binary_op(Op op, T *A_h, T *B_h, int dimM, int dimN, int lda, int ldb)¶

CPU element wise binary operator.

element wise op(a, b) for 0 <= i < dimM & for 0 <= j < dimN.

if (BAsRowVector == 0 && BAsColVector == 0) op(A[i * lda + j], B[i * ldb + j])

if (BAsRowVector == 1 && BAsColVector == 0) op(A[i * lda + j], B[j])

if (BAsRowVector == 0 && BAsColVector == 1) op(A[i * lda + j], B[i * ldb])

if (BAsRowVector == 1 && BAsColVector == 1) op(A[i * lda + j], B[0])

CPU element wise binary operator.

Parameters

op: binary op. see namespace binary.
A_h: matrix.
B_h: matrix.
dimM: matrix height.
dimN: matrix width.
lda: leading dimension of A.
ldb: leading dimension of B.

template <class T, class Op, bool CAsRowVector, bool CAsColVector>

void hl_cpu_apply_ternary_op(Op op, T *A_h, T *B_h, T *C_h, int dimM, int dimN, int lda, int ldb, int ldc)¶

CPU element wise ternary operator.

element wise op(a, b, c) for 0 <= i < dimM & for 0 <= j < dimN.

if (CAsRowVector == 0 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j])

if (CAsRowVector == 1 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[j])

if (CAsRowVector == 0 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[i*ldc])

if (CAsRowVector == 1 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[0])

CPU element wise ternary operator.

Parameters

op: ternary op. see namespace ternary.
A_h: matrix.
B_h: matrix.
C_h: matrix.
dimM: matrix height.
dimN: matrix width.
lda: leading dimension of A.
ldb: leading dimension of B.
ldc: leading dimension of C.

template <class T, class Op>

void hl_cpu_apply_quaternary_op(Op op, T *A_h, T *B_h, T *C_h, T *D_h, int dimM, int dimN, int lda, int ldb, int ldc, int ldd)¶

CPU element wise quaternary operator. element wise op(a, b, c, d) for 0 <= i < dimM & for 0 <= j < dimN.

Parameters

op: quaternary op. see namespace ternary.
A_h: matrix.
B_h: matrix.
C_h: matrix.
D_h: matrix.
dimM: matrix height.
dimN: matrix width.
lda: leading dimension of A.
ldb: leading dimension of B.
ldc: leading dimension of C.
ldd: leading dimension of D.

template <class T, class Op>

void hl_gpu_apply_unary_op(Op op, T *A_d, int dimM, int dimN, int lda)¶

GPU element wise unary operator. element wise op(a) for 0 <= i < dimM & for 0 <= j < dimN.

Parameters

op: unary op. see namespace unary.
A_d: matrix.
dimM: matrix height.
dimN: matrix width.
lda: leading dimension of A.

template <class T, class Op, bool BAsRowVector, bool BAsColVector>

void hl_gpu_apply_binary_op(Op op, T *A_d, T *B_d, int dimM, int dimN, int lda, int ldb)¶

GPU element wise binary operator.

element wise op(a, b) for 0 <= i < dimM & for 0 <= j < dimN

if (BAsRowVector == 0 && BAsColVector == 0) op(A[i * lda + j], B[i * ldb + j])

if (BAsRowVector == 1 && BAsColVector == 0) op(A[i * lda + j], B[j])

if (BAsRowVector == 0 && BAsColVector == 1) op(A[i * lda + j], B[i * ldb])

if (BAsRowVector == 1 && BAsColVector == 1) op(A[i * lda + j], B[0])

Parameters

op: binary op. see namespace binary.
A_d: matrix.
B_d: matrix.
dimM: matrix height.
dimN: matrix width.
lda: leading dimension of A.
ldb: leading dimension of B.

template <class T, class Op, bool CAsRowVector, bool CAsColVector>

void hl_gpu_apply_ternary_op(Op op, T *A_d, T *B_d, T *C_d, int dimM, int dimN, int lda, int ldb, int ldc)¶

GPU element wise ternary operator.

element wise op(a, b, c) for 0 <= i < dimM & for 0 <= j < dimN.

if (CAsRowVector == 0 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j])

if (CAsRowVector == 1 && CAsColVector == 0) op(A[i*lda + j], B[i*ldb + j], C[j])

if (CAsRowVector == 0 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[i*ldc])

if (CAsRowVector == 1 && CAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[0])

Parameters

op: ternary op. see namespace ternary.
A_d: matrix.
B_d: matrix.
C_d: matrix.
dimM: matrix height.
dimN: matrix width.
lda: leading dimension of A.
ldb: leading dimension of B.
ldc: leading dimension of C.

template <class T, class Op>

void hl_gpu_apply_quaternary_op(Op op, T *A_d, T *B_d, T *C_d, T *D_d, int dimM, int dimN, int lda, int ldb, int ldc, int ldd)¶

GPU element wise quaternary operator. element wise op(a, b, c, d) for 0 <= i < dimM & for 0 <= j < dimN.

Parameters

op: quaternary op. see namespace ternary.
A_d: matrix.
B_d: matrix.
C_d: matrix.
D_d: matrix.
dimM: matrix height.
dimN: matrix width.
lda: leading dimension of A.
ldb: leading dimension of B.
ldc: leading dimension of C.
ldd: leading dimension of D.

template <class Agg, class Op, class Saver>
void hl_cpu_matrix_row_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda)¶: CPU matrix row operator.

template <class Saver, class Agg, class Op>

void hl_cpu_matrix_row_op(Agg agg, Op op, int dimM, int dimN, real *dst, int ld, real *A, int lda, real *B, int ldb)¶

CPU matrix row operator.

Parameters

agg: aggregate operator expression.
op: operator expression.
dimM: matrix height.
dimN: matrix width.
dst: destination matrix.
ld: leading dimension of dst matrix.
*A: matrix A.
lda: leading dimension of matrix A.
*B: matrix B.
ldb: leading dimension of matrix B.

template <class Agg, class Op, class Saver>

void hl_cpu_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶

CPU matrix column operator.

Parameters

agg: aggregate operator expression.
op: operator expression.
sv: assignment operator expression.
dimM: matrix height.
dimN: matrix width.
dst: destination matrix.
*A: matrix A.
lda: leading dimension of matrix A.

template <class Agg, class Op, class Saver>

void hl_cpu_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶

CPU matrix column operator.

Parameters

agg: aggregate operator expression.
op: operator expression.
sv: assignment operator expression.
dimM: matrix height.
dimN: matrix width.
dst: destination matrix.
*A: matrix A.
lda: leading dimension of matrix A.
*B: matrix B.
ldb: leading dimension of matrix B.

template <class Agg, class Op, class Saver>

void hl_gpu_matrix_row_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda)¶

GPU matrix row operator.

Parameters

agg: aggregate operator expression.
op: operator expression.
sv: assignment operator expression.
dimM: matrix height.
dimN: matrix width.
dst: destination matrix.
ld: leading dimension of dst.
*A: matrix A.
lda: leading dimension of matrix A.

template <class Saver, class Agg, class Op>

void hl_gpu_matrix_row_op(Agg agg, Op op, int dimM, int dimN, real *dst, int ld, real *A, int lda, real *B, int ldb)¶

GPU matrix row operator.

Parameters

agg: aggregate operator expression.
op: operator expression.
dimM: matrix height.
dimN: matrix width.
dst: destination matrix.
ld: leading dimension of dst matrix.
*A: matrix A.
lda: leading dimension of matrix A.
*B: matrix B.
ldb: leading dimension of matrix B.

template <class Agg, class Op, class Saver>

void hl_gpu_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶

GPU matrix column operator.

Parameters

agg: aggregate operator expression.
op: operator expression.
sv: assignment operator expression.
dimM: matrix height.
dimN: matrix width.
dst: destination matrix.
*A: matrix A.
lda: leading dimension of matrix A.

template <class Agg, class Op, class Saver>

void hl_gpu_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶

GPU matrix column operator.

Parameters

agg: aggregate operator expression.
op: operator expression.
sv: assignment operator expression.
dimM: matrix height.
dimN: matrix width.
dst: destination matrix.
*A: matrix A.
lda: leading dimension of matrix A.
*B: matrix B.
ldb: leading dimension of matrix B.

hl_matrix_ops.cuh¶

Defines

HL_MATRIX_OPS_CUH_¶

HL_DEVICE¶

ONE_PARAMETER(name)¶: parameter macro.

TWO_PARAMETER(name)¶

THREE_PARAMETER(name)¶

FOUR_PARAMETER(name)¶

DEFINE_MATRIX_UNARY_OP(name, op)¶

unary operator macro.

Note

op format: op supports multiple expressions that are separated by a comma. e.g. a, b

See

hl_gpu_apply_unary_op

hl_cpu_apply_unary_op

Parameters

name: operator name.
op: operator expression.

DEFINE_MATRIX_UNARY_PARAMETER_OP(name, PARA_MACRO, op)¶

unary operator macro.

Note

op format: op supports multiple expressions that are separated by a comma. e.g. a, b

See

hl_gpu_apply_unary_op

hl_cpu_apply_unary_op

Parameters

name: operator name.
PARA_MACRO: parameter macro.
op: operator expression.

DEFINE_MATRIX_BINARY_OP(name, op)¶

binary operator macro.

Note

op format: op supports multiple expressions that are separated by a comma. e.g. a, b

See

hl_gpu_apply_unary_op

hl_cpu_apply_unary_op

Parameters

name: operator name.
op: operator expression.

DEFINE_MATRIX_BINARY_PARAMETER_OP(name, PARA_MACRO, op)¶

binary operator macro.

Note

op format: op supports multiple expressions that are separated by a comma. e.g. a, b

See

hl_gpu_apply_binary_op

hl_cpu_apply_binary_op

Parameters

name: operator name.
PARA_MACRO: parameter macro.
op: operator expression.

DEFINE_MATRIX_TERNARY_OP(name, op)¶

ternary operator macro.

Note

op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c

See

hl_gpu_apply_ternary_op

hl_cpu_apply_ternary_op

Parameters

name: operator name.
op: operator expression.

DEFINE_MATRIX_TERNARY_PARAMETER_OP(name, PARA_MACRO, op)¶

ternary operator macro.

Note

op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c

See

hl_gpu_apply_ternary_op

hl_cpu_apply_ternary_op

Parameters

name: operator name.
PARA_MACRO: parameter macro.
op: operator expression.

DEFINE_MATRIX_QUATERNARY_OP(name, op)¶

quaternary operator macro.

Note

op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c, d

See

hl_gpu_apply_quaternary_op

hl_cpu_apply_quaternary_op

Parameters

name: operator name.
op: operator expression.

DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(name, PARA_MACRO, op)¶

quaternary operator macro.

Note

op format: op supports multiple expressions that are separated by a comma. e.g. a, b, c, d

See

hl_gpu_apply_quaternary_op

hl_cpu_apply_quaternary_op

Parameters

name: operator name.
PARA_MACRO: parameter macro.
op: operator expression.

hl_matrix_type.cuh¶

Defines

HL_MATRIX_TYPE_CUH_¶

Typedefs

typedef __m128 vecType¶

hl_sse_matrix_kernel.cuh¶

Defines

HL_SSE_MATRIX_KERNEL_CUH_¶

VECTOR_SIZE¶

VECTOR_LEN¶

VECTOR_SET¶

Functions

bool hl_check_align(size_t size)¶

bool hl_check_align(void *ptr)¶

template <class Agg>
real hl_agg_op(Agg agg, vecType mm)¶

template <class Agg, class Op, class Saver>
void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda)¶

template <class Agg, class Op, class Saver>
void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, int ld, real *A, int lda, real *B, int ldb)¶

template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶

template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶

template <int MaxRow, class Agg, class Op, class Saver>
void hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶

template <int Step, class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶

template <class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda)¶

template <int MaxRow, class Agg, class Op, class Saver>
void hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶

template <int Step, class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶

template <class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, int dimM, int dimN, real *dst, real *A, int lda, real *B, int ldb)¶

hl_batch_transpose.h¶

Functions

void batchTranspose(const real *input, real *output, int width, int height, int batchSize)¶

Perform matrix transpose for each data in the batch.

Note

Both the inpt and output are arranged in batch-first order. Each batch has height * width data, which are arranged in height-first (or row-first) manner.

Parameters

input: height * width elements in batch.
output: height * width elements in batch.
width: width of batch data.
height: height of batch data.
batchSize: batch size

Sparse Matrix¶

hl_sparse.h¶

Functions

void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶

Malloc a sparse matrix.

Parameters

A_d: sparse matrix.
format: format.
value_type: valueType.
dimM: height.
dimN: width.
nnz: number of none zero element.

void hl_free_sparse_matrix(hl_sparse_matrix_s A_d)¶

Free a sparse matrix.

Parameters

A_d: GPU sparse matrix.

void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, void *dest_d, size_t size, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶

Construct a sparse matrix use input gpu memory.

Note

Destruct api is hl_destruct_sparse_matrix.

Parameters

A_d: sparse matrix.
dest_d: gpu memory.
size: size of dest_d.
format: format.
value_type: valueType.
dimM: height.
dimN: width.
nnz: number of none zero element.

void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, real *value_d, int *rows_d, int *cols_d, hl_matrix_format_t format, hl_matrix_value_t value_type, int dimM, int dimN, int nnz)¶

Use three arrays to construct sparse matrix.

if format is HL_SPARSE_CSR, size of rows_d is dimM + 1, and size of cols_d is nnz;

if format is HL_SPARSE_CSC, size of rows_d is nnz, and size of cols_d is dimN + 1.

if valueType is HL_NO_VALUE, size of value_d is zero, else size of value_d is nnz.

Note

The corresponding destructor interface is hl_destruct_sparse_matrix.

Parameters

A_d: sparse matrix.
value_d: value.
rows_d: row.
cols_d: col.
format: format.
value_type: valueType.
dimM: height.
dimN: width.
nnz: number of none zero element.

void hl_destruct_sparse_matrix(hl_sparse_matrix_s A_d)¶

Destruct sparse matrix.

Parameters

A_d: sparse matrix.

void hl_memcpy_csr_matrix(hl_sparse_matrix_s csr_matrix, real *csr_val, int *csr_row, int *csr_col, hl_stream_t stream)¶

Copy value & index to sparse matrix.

if csr_matrix is HL_FLOAT_VALUE.

csr_val, csr_row, csr_col three pointers are not null.
csr_val is not null, csr_row adn csr_col are null.

if csr_matrix is HL_NO_VALUE.

csr_val will be ignore, csr_row and csr_col are not null.

Parameters

csr_matrix: sparse matrix.
csr_val: point to csr value array(nnz).
csr_row: point to csr row indices array(dimM+1).
csr_col: point to csr col indices array(nnz).
stream: hl_stream_t type.

void hl_memcpy_csc_matrix(hl_sparse_matrix_s csc_matrix, real *csc_val, int *csc_row, int *csc_col, hl_stream_t stream)¶

Copy value & index to sparse matrix.

if csr_matrix is HL_FLOAT_VALUE.

csc_val, csc_row, csc_col three pointers are not null.
csc_val is not null, csc_row and csc_col are null.

if csr_matrix is HL_NO_VALUE.

csc_val will be ignore, csc_row and csc_col are not null.

Parameters

csc_matrix: sparse matrix.
csc_val: point to csc value array(nnz).
csc_row: point to csc row indices array(nnz).
csc_col: point to csc col indices array(dimN+1).
stream: hl_stream_t type.

void hl_memcpy_sparse_matrix(hl_sparse_matrix_s dst, hl_sparse_matrix_s src, hl_stream_t stream)¶

Copy sparse matrix to sparse matrix.

Note

1. Format of the src matrix and dst matrix needs to be consistent.

Source matrix has value, the destination matrix has value or no value can be; the source matrix is no value, then the destination matrix must also be no value;

Parameters

dst: sparse matrix.
src: sparse matrix.
stream: hl_stream_t type.

void hl_matrix_csr2dense(hl_sparse_matrix_s A_d, real *C_d, int dimM, int dimN)¶

csr matrix to dense matrix.

Parameters

A_d: csr matrix.
C_d: dense matrix.
dimM: height.
dimN: width.

void hl_matrix_csc2dense(hl_sparse_matrix_s A_d, real *C_d, int dimM, int dimN)¶

csc matrix to dense matrix.

Parameters

A_d: csc matrix.
C_d: dense matrix.
dimM: height.
dimN: width.

void hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶

C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.

Note

transb is not support HPPL_OP_T.

Parameters

A_d: csr sparse matrix.
transa: operation op(A) that is non-or transpose.
B_d: dense matrix.
transb: operation op(B) that is non-or transpose.
C_d: dense matrix.
dimM: matrix height of op(A) & C
dimN: matrix width of op(B) & C
dimK: width of op(A) & height of op(B)
alpha: scalar used for multiplication.
beta: scalar used for multiplication. If beta is zero, C does not have to be a valid input.

void hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶

C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.

Note

transb is not support HPPL_OP_T.

Parameters

A_d: sparse matrix.
transa: operation op(A) that is non-or transpose.
B_d: dense matrix.
transb: operation op(B) that is non-or transpose.
C_d: dense matrix.
dimM: matrix height of op(A) & C
dimN: matrix width of op(B) & C
dimK: width of op(A) & height of op(B)
alpha: scalar used for multiplication.
beta: scalar used for multiplication. If beta is zero, C does not have to be a valid input.

void hl_matrix_dense_mul_csc(real *A_d, hl_trans_op_t transa, hl_sparse_matrix_s B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶

C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.

Note

transa is not support HPPL_OP_T.

Parameters

A_d: dense matrix.
transa: operation op(A) that is non-or transpose.
B_d: csc sparse matrix.
transb: operation op(B) that is non-or transpose.
C_d: dense matrix.
dimM: matrix height of op(A) & C
dimN: matrix width of op(B) & C
dimK: width of op(A) & height of op(B)
alpha: scalar used for multiplication.
beta: scalar used for multiplication. If beta is zero, C does not have to be a valid input.

void hl_sparse_matrix_mul(real *A_d, hl_trans_op_t transa, real *B_d, hl_trans_op_t transb, hl_sparse_matrix_s C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶

C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d. Calculated based on the non-zero elements of the matrix C.

Note

transb is not support HPPL_OP_T.

Parameters

A_d: dense matrix.
transa: operation op(A) that is non-or transpose.
B_d: dense matrix.
transb: operation op(B) that is non-or transpose.
C_d: sparse matrix.
dimM: matrix height of op(A) & C
dimN: matrix width of op(B) & C
dimK: width of op(A) & height of op(B)
alpha: scalar used for multiplication.
beta: scalar used for multiplication.

void hl_matrix_dense_mul_csr(real *A_d, hl_trans_op_t transa, hl_sparse_matrix_s B_d, hl_trans_op_t transb, real *C_d, int dimM, int dimN, int dimK, real alpha, real beta)¶

C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.

Note

transa is not support HPPL_OP_T.

Parameters

A_d: dense matrix.
transa: operation op(A) that is non-or transpose.
B_d: sparse matrix.
transb: operation op(B) that is non-or transpose.
C_d: dense matrix.
dimM: matrix height of op(A) & C
dimN: matrix width of op(B) & C
dimK: width of op(A) & height of op(B)
alpha: scalar used for multiplication.
beta: scalar used for multiplication. If beta is zero, C does not have to be a valid input.

void hl_memcpy_from_csc_matrix(real *csc_val, size_t val_size, int *csc_row, size_t row_size, int *csc_col, size_t col_size, hl_sparse_matrix_s csc_matrix, hl_stream_t stream)¶

Memcpy csc_matrix to host.

a. according to csc_matrix, update three arrays

csc_val, csc_row, csc_col are dest Address.
if type of csc_matrix is HL_NO_VALUE, update csc_row and csc_col
if type of csc_matrix is HL_FLOAT_VALUE, update csc_row, csc_col and csc_value.

b. The interface is asynchronous copy. To ensure that the data is copied please call the synchronous interface;

Parameters

csc_val: point to csc value array(nnz).
val_size: csc value size.
csc_row: point to csc row indices array(nnz).
row_size: csc row size.
csc_col: point to csc col indices array(dimN + 1).
col_size: csc column size.
csc_matrix: sparse matrix.
stream: hl_stream_t type.

void hl_memcpy_from_csr_matrix(real *csr_val, size_t val_size, int *csr_row, size_t row_size, int *csr_col, size_t col_size, hl_sparse_matrix_s csr_matrix, hl_stream_t stream)¶

Memcpy sparse matrix to host.

a. according to csr_matrix, update three arrays

csr_val, csr_row, csr_col are dest Address.
if type of csr_matrix is HL_NO_VALUE, update csr_row and csr_col
if type of csr_matrix is HL_FLOAT_VALUE, update csr_row, csr_col and csr_value

b. The interface is asynchronous copy. To ensure that the data is copied please call the synchronous interface;

Parameters

csr_val: point to csr value array(nnz).
val_size: csr value size.
csr_row: point to csr row indices array(nnz).
row_size: csr row size.
csr_col: point to csr col indices array(dimN + 1).
col_size: csr column size.
csr_matrix: sparse matrix.
stream: hl_stream_t type.

void hl_sparse_matrix_column_sum(real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale)¶

A_d[j] += B_d[i,j] for i in range(height)

Parameters

A_d: vector, size = width.
B_d: sparse matrix.
dimM: height.
dimN: width.
scale: scale of B_d

void hl_matrix_csr_column_sum(real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale)¶: implementation of csr sparse matrix in hl_sparse_matirx_column_sum

void hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d, real *B_d, real scale)¶

A_d[i,j] += B_d[j].

Parameters

A_d: sprare matrix.
B_d: vector, size = A_d.width.
scale: scale of B_d.

void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d, real *B_d, real scale)¶: implementation of csr sparse matrix in hl_sparse_matrix_add_bias

void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d, real *B_d, int dimM, int dimN, real alpha, real beta)¶

sparseMatrix = alpha * denseMatrix + beta *sparseMatrix A_d[i,j] = alpha * B_d[i,j] + beta * A_d[i,j] Only add value of same (row, col) index in dense matrix and do not use others values whoes postions are not in sparse matirx.

Parameters

A_d: sprare matrix.
B_d: dense matrix.
dimM: height of B_d.
dimN: width of B_d.
alpha: scale of B_d.
beta: scale of A_d.

void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d, real *B_d, int dimM, int dimN, real alpha, real beta)¶: implementation of csr sparse matrix in hl_sparse_matrix_add_dense

int *hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat)¶

get rows pionter of GpuSparseMatrix

Return

return rows pointer, which is gpu address

Parameters

sMat: sparse matrix

int *hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat)¶

get cols pionter of GpuSparseMatrix

Return

return cols pointer, which is gpu address

Parameters

sMat: sparse matrix

real *hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat)¶

get value pionter of GpuSparseMatrix

Return

return value pointer, which is gpu address

Parameters

sMat: sparse matrix

hl_sparse.ph¶

Defines

HL_SPARSE_PH_¶

__sparse_get_type_return__(mat, type, field)¶

__sparse_get_return__(mat, field)¶

Typedefs

typedef struct _hl_csr_matrix *hl_csr_matrix¶

typedef struct _hl_csc_matrix *hl_csc_matrix¶

struct _hl_csr_matrix¶

sparse matrix csr format.

Parameters

*csr_val: nonzero values of matrix.
*csr_row: row indices.
*csr_col: column indices.
nnz_s: sizeof of csr_val & csr_col.
row_s: sizeof of csr_row.
sparsity: sparsity pattern.

Public Members

real *csr_val¶

int *csr_row¶

int *csr_col¶

size_t nnz_s¶

int row_s¶

float sparsity¶

struct _hl_csc_matrix¶

sparse matrix csc format.

Parameters

*csc_val: nonzero values of matrix.
*csc_row: row indices.
*csc_col: column indices.
nnz_s: sizeof of csc_val & csc_row.
col_s: sizeof of csc_col.
sparsity: sparsity pattern.

Public Members

real *csc_val¶

int *csc_row¶

int *csc_col¶

size_t nnz_s¶

int col_s¶

float sparsity¶

Others¶

hl_aggregate.h¶

Functions

void hl_matrix_row_sum(real *A_d, real *C_d, int dimM, int dimN)¶

Calculate the sum of each row of the matrix A_d.

Parameters

A_d: input matrix (M x N).
C_d: output matrix (M x 1).
dimM: matrix height.
dimN: matrix width.

void hl_matrix_row_max(real *A_d, real *C_d, int dimM, int dimN)¶

Calculate the maximum value of each row of the matrix A_d.

Parameters

A_d: input matrix (M x N).
C_d: output matrix (M x 1).
dimM: matrix height.
dimN: matrix width.

void hl_matrix_row_min(real *A_d, real *C_d, int dimM, int dimN)¶

Calculate the minimum value of each row of the matrix A_d.

Parameters

A_d: input matrix (M x N).
C_d: output matrix (M x 1).
dimM: matrix height.
dimN: matrix width.

void hl_matrix_column_sum(real *A_d, real *C_d, int dimM, int dimN)¶

Calculate the sum of each column of the matrix A_d.

Parameters

A_d: input matrix (M x N).
C_d: output Matrix (1 x N).
dimM: matrix height.
dimN: matrix width.

void hl_matrix_column_max(real *A_d, real *C_d, int dimM, int dimN)¶

Calculate the maximum value of each column of the matrix A_d.

Parameters

A_d: input matrix (M x N).
C_d: output matrix (1 x N).
dimM: matrix height.
dimN: matrix width.

void hl_matrix_column_min(real *A_d, real *C_d, int dimM, int dimN)¶

Calculate the minimum value of each column of the matrix A_d.

Parameters

A_d: input matrix (M x N).
C_d: output matrix (1 x N).
dimM: matrix height.
dimN: matrix width.

void hl_vector_sum(real *A_d, real *C_h, int dimM)¶

C_h = sum(A_d[i]).

Parameters

A_d: input(m).
C_h: output(host memory).
dimM: size of vector.

void hl_vector_abs_sum(real *A_d, real *C_h, int dimM)¶

C_h = sum(abs(A_d[i])).

Parameters

A_d: input(m).
C_h: output(host memory).
dimM: size of vector.

hl_table_apply.h¶

Functions

void hl_matrix_select_rows(real *output, int ldo, real *table, int ldt, int *ids, int numSamples, int tableSize, int dim)¶

Get row from table. output[i] += table[ids[i]] if ids[i] == -1, it will be ignored.

Parameters

output: output matrix.
ldo: leading dimension of output.
table: table matrix.
ldt: leading dimension of table.
ids: ids vector.
numSamples: height of output.
tableSize: height of table.
dim: width of table.

void hl_matrix_add_to_rows(real *table, int ldt, real *input, int ldi, int *ids, int numSamples, int tableSize, int dim)¶

Add row to table. table[ids[i]] += output[i] if ids[i] == -1, it will be ignored.

Parameters

table: table matrix.
ldt: leading dimension of table.
input: input matrix.
ldi: leading dimension of input.
ids: ids vector.
numSamples: height of input.
tableSize: height of table.
dim: width of table.

template <class T>

void hl_vector_select_from(T *dst, int sized, const T *src, int sizes, const int *ids, int sizei)¶

Select element from vector.

Parameters

dst: output vector.
sized: size of dst.
src: input vector.
sizes: size of src.
ids: index vector.
sizei: size of ids.

hl_top_k.h¶

Functions

void hl_matrix_top_k(real *topVal, int ldv, int *topIds, real *src, int lds, int dim, int beamSize, int numSamples)¶

find top k element.

Parameters

topVal: top k element.
ldv: leading dimension of topVal.
topIds: top k index.
src: input value.
lds: leading dimension of src.
dim: width of input value.
beamSize: beam size.
numSamples: height of input value.

void hl_sparse_matrix_top_k(real *topVal, int ldv, int *topIds, hl_sparse_matrix_s src, int beamSize, int numSamples)¶

find top k element for each row in sparse matrix.

Note

Only support HL_SPARSE_CSR format.

Parameters

topVal: top k element.
ldv: leading dimension of topVal.
topIds: top k index.
src: sparse matrix.
beamSize: beam size.
numSamples: height of input value.

Matrix¶

Base Matrix¶

hl_matrix.h¶

hl_matrix_base.h¶

hl_matrix_apply.cuh¶

hl_matrix_ops.cuh¶

hl_matrix_type.cuh¶

hl_sse_matrix_kernel.cuh¶

hl_batch_transpose.h¶

Sparse Matrix¶

hl_sparse.h¶

hl_sparse.ph¶

Others¶

hl_aggregate.h¶

hl_table_apply.h¶

hl_top_k.h¶

Table Of Contents

Previous topic

Next topic

This Page