hl_sparse.h 18.3 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Z
zhangjinchao01 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#ifndef HL_SPARSE_H_
#define HL_SPARSE_H_

#include "hl_base.h"

/**
 * @brief   Malloc a sparse matrix.
 *
 * @param[out]  A_d        sparse matrix.
 * @param[in]   format     format.
 * @param[in]   value_type valueType.
 * @param[in]   dimM       height.
 * @param[in]   dimN       width.
 * @param[in]   nnz        number of none zero element.
 *
 */
extern void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d,
                                    hl_matrix_format_t format,
33
                                    hl_matrix_value_t value_type,
Z
zhangjinchao01 已提交
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
                                    int dimM,
                                    int dimN,
                                    int nnz);

/**
 * @brief   Free a sparse matrix.
 *
 * @param[in]  A_d  GPU sparse matrix.
 *
 */
extern void hl_free_sparse_matrix(hl_sparse_matrix_s A_d);

/**
 * @brief   Construct a sparse matrix use input gpu memory.
 *
 * @param[out]  A_d         sparse matrix.
 * @param[in]   dest_d      gpu memory.
 * @param[in]   size        size of dest_d.
 * @param[in]   format      format.
 * @param[in]   value_type  valueType.
 * @param[in]   dimM        height.
 * @param[in]   dimN        width.
 * @param[in]   nnz         number of none zero element.
 *
 * @note    Destruct api is hl_destruct_sparse_matrix.
 *
 */
extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
62
                                       void *dest_d,
Z
zhangjinchao01 已提交
63 64
                                       size_t size,
                                       hl_matrix_format_t format,
65
                                       hl_matrix_value_t value_type,
Z
zhangjinchao01 已提交
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
                                       int dimM,
                                       int dimN,
                                       int nnz);

/**
 * @brief   Use three arrays to construct sparse matrix.
 *
 * if format is HL_SPARSE_CSR, size of rows_d is dimM + 1,
 * and size of cols_d is nnz;
 *
 * if format is HL_SPARSE_CSC, size of rows_d is nnz, and size of
 * cols_d is dimN + 1.
 *
 * if valueType is HL_NO_VALUE, size of value_d is zero,
 * else size of value_d is nnz.
 *
 * @param[out]  A_d        sparse matrix.
 * @param[in]   value_d    value.
 * @param[in]   rows_d     row.
 * @param[in]   cols_d     col.
 * @param[in]   format     format.
 * @param[in]   value_type valueType.
 * @param[in]   dimM       height.
 * @param[in]   dimN       width.
 * @param[in]   nnz        number of none zero element.
 *
 * @note    The corresponding destructor interface is hl_destruct_sparse_matrix.
 *
 */
extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
96 97 98
                                       real *value_d,
                                       int *rows_d,
                                       int *cols_d,
Z
zhangjinchao01 已提交
99
                                       hl_matrix_format_t format,
100
                                       hl_matrix_value_t value_type,
Z
zhangjinchao01 已提交
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
                                       int dimM,
                                       int dimN,
                                       int nnz);

/**
 * @brief   Destruct sparse matrix.
 *
 * @param[in] A_d  sparse matrix.
 *
 */
extern void hl_destruct_sparse_matrix(hl_sparse_matrix_s A_d);

/**
 * @brief   Copy value & index to sparse matrix.
 *
 * if csr_matrix is HL_FLOAT_VALUE.
 *
 *  1. csr_val, csr_row, csr_col three pointers are not null.
 *
 *  2. csr_val is not null, csr_row adn csr_col are null.
 *
 * if csr_matrix is HL_NO_VALUE.
 *
 *  1. csr_val will be ignore, csr_row and csr_col are not null.
 *
 *
 * @param[in,out]   csr_matrix sparse matrix.
 * @param[in]       csr_val    point to csr value array(nnz).
 * @param[in]       csr_row    point to csr row indices array(dimM+1).
 * @param[in]       csr_col    point to csr col indices array(nnz).
 * @param[in]       stream     hl_stream_t type.
 *
 */
extern void hl_memcpy_csr_matrix(hl_sparse_matrix_s csr_matrix,
                                 real *csr_val,
                                 int *csr_row,
                                 int *csr_col,
                                 hl_stream_t stream);

/**
 * @brief   Copy value & index to sparse matrix.
 *
 * if csr_matrix is HL_FLOAT_VALUE.
 *
 *   1. csc_val, csc_row, csc_col three pointers are not null.
 *
 *   2. csc_val is not null, csc_row and csc_col are null.
 *
 * if csr_matrix is HL_NO_VALUE.
 *
 *   1. csc_val will be ignore, csc_row and csc_col are not null.
 *
 * @param[in,out]   csc_matrix sparse matrix.
 * @param[in]       csc_val    point to csc value array(nnz).
 * @param[in]       csc_row    point to csc row indices array(nnz).
 * @param[in]       csc_col    point to csc col indices array(dimN+1).
 * @param[in]       stream     hl_stream_t type.
 *
 *
 */
extern void hl_memcpy_csc_matrix(hl_sparse_matrix_s csc_matrix,
                                 real *csc_val,
                                 int *csc_row,
                                 int *csc_col,
                                 hl_stream_t stream);

/**
 * @brief   Copy sparse matrix to sparse matrix.
 *
 * @param[out]  dst     sparse matrix.
 * @param[in]   src     sparse matrix.
 * @param[in]   stream  hl_stream_t type.
 *
 *
 * @note    1. Format of the src matrix and dst matrix needs to be consistent.
 *          2. Source matrix has value, the destination matrix has value or
 *             no value can be; the source matrix is no value, then the
 *             destination matrix must also be no value;
 */
extern void hl_memcpy_sparse_matrix(hl_sparse_matrix_s dst,
                                    hl_sparse_matrix_s src,
                                    hl_stream_t stream);

/**
 * @brief   csr matrix to dense matrix.
 *
 * @param[in]   A_d     csr matrix.
 * @param[out]  C_d     dense matrix.
 * @param[in]   dimM    height.
 * @param[in]   dimN    width.
 *
 */
extern void hl_matrix_csr2dense(hl_sparse_matrix_s A_d,
                                real *C_d,
                                int dimM,
                                int dimN);

/**
 * @brief   csc matrix to dense matrix.
 *
 * @param[in]   A_d     csc matrix.
 * @param[out]  C_d     dense matrix.
 * @param[in]   dimM    height.
 * @param[in]   dimN    width.
 *
 */
extern void hl_matrix_csc2dense(hl_sparse_matrix_s A_d,
                                real *C_d,
                                int dimM,
                                int dimN);

/**
 * @brief   C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
 *
 * @param[in]   A_d     csr sparse matrix.
 * @param[in]   transa  operation op(A) that is non-or transpose.
 * @param[in]   B_d     dense matrix.
 * @param[in]   transb  operation op(B) that is non-or transpose.
 * @param[out]  C_d     dense matrix.
 * @param[in]   dimM    matrix height of op(A) & C
 * @param[in]   dimN    matrix width of op(B) & C
 * @param[in]   dimK    width of op(A) & height of op(B)
 * @param[in]   alpha   scalar used for multiplication.
 * @param[in]   beta    scalar used for multiplication.
225
 *                      If beta is zero, C does not have to be a valid input.
Z
zhangjinchao01 已提交
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
 *
 * @note    transb is not support HPPL_OP_T.
 *
 */
extern void hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d,
                                    hl_trans_op_t transa,
                                    real *B_d,
                                    hl_trans_op_t transb,
                                    real *C_d,
                                    int dimM,
                                    int dimN,
                                    int dimK,
                                    real alpha,
                                    real beta);

/**
 * @brief   C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
 *
 * @param[in]   A_d     sparse matrix.
 * @param[in]   transa  operation op(A) that is non-or transpose.
 * @param[in]   B_d     dense matrix.
 * @param[in]   transb  operation op(B) that is non-or transpose.
 * @param[out]  C_d     dense matrix.
 * @param[in]   dimM    matrix height of op(A) & C
 * @param[in]   dimN    matrix width of op(B) & C
 * @param[in]   dimK    width of op(A) & height of op(B)
 * @param[in]   alpha   scalar used for multiplication.
 * @param[in]   beta    scalar used for multiplication.
254
 *                      If beta is zero, C does not have to be a valid input.
Z
zhangjinchao01 已提交
255 256 257 258 259 260
 *
 * @note    transb is not support HPPL_OP_T.
 *
 */
extern void hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d,
                                    hl_trans_op_t transa,
261 262
                                    real *B_d,
                                    hl_trans_op_t transb,
Z
zhangjinchao01 已提交
263
                                    real *C_d,
264 265 266 267 268
                                    int dimM,
                                    int dimN,
                                    int dimK,
                                    real alpha,
                                    real beta);
Z
zhangjinchao01 已提交
269 270 271 272 273 274 275 276 277 278 279 280 281 282

/**
 * @brief   C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
 *
 * @param[in]   A_d     dense matrix.
 * @param[in]   transa  operation op(A) that is non-or transpose.
 * @param[in]   B_d     csc sparse matrix.
 * @param[in]   transb  operation op(B) that is non-or transpose.
 * @param[out]  C_d     dense matrix.
 * @param[in]   dimM    matrix height of op(A) & C
 * @param[in]   dimN    matrix width of op(B) & C
 * @param[in]   dimK    width of op(A) & height of op(B)
 * @param[in]   alpha   scalar used for multiplication.
 * @param[in]   beta    scalar used for multiplication.
283
 *                      If beta is zero, C does not have to be a valid input.
Z
zhangjinchao01 已提交
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
 *
 * @note    transa is not support HPPL_OP_T.
 *
 */
extern void hl_matrix_dense_mul_csc(real *A_d,
                                    hl_trans_op_t transa,
                                    hl_sparse_matrix_s B_d,
                                    hl_trans_op_t transb,
                                    real *C_d,
                                    int dimM,
                                    int dimN,
                                    int dimK,
                                    real alpha,
                                    real beta);

/**
 * @brief   C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
 *          Calculated based on the non-zero elements of the matrix C.
 *
 * @param[in]     A_d     dense matrix.
 * @param[in]     transa  operation op(A) that is non-or transpose.
 * @param[in]     B_d     dense matrix.
 * @param[in]     transb  operation op(B) that is non-or transpose.
 * @param[in,out] C_d     sparse matrix.
 * @param[in]     dimM    matrix height of op(A) & C
 * @param[in]     dimN    matrix width of op(B) & C
 * @param[in]     dimK    width of op(A) & height of op(B)
 * @param[in]     alpha   scalar used for multiplication.
 * @param[in]     beta    scalar used for multiplication.
 *
 * @note    transb is not support HPPL_OP_T.
 *
 */
317 318 319 320
extern void hl_sparse_matrix_mul(real *A_d,
                                 hl_trans_op_t transa,
                                 real *B_d,
                                 hl_trans_op_t transb,
Z
zhangjinchao01 已提交
321
                                 hl_sparse_matrix_s C_d,
322 323 324 325 326
                                 int dimM,
                                 int dimN,
                                 int dimK,
                                 real alpha,
                                 real beta);
Z
zhangjinchao01 已提交
327 328 329 330 331 332 333 334 335 336 337 338 339 340

/**
 * @brief   C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
 *
 * @param[in]   A_d     dense matrix.
 * @param[in]   transa  operation op(A) that is non-or transpose.
 * @param[in]   B_d     sparse matrix.
 * @param[in]   transb  operation op(B) that is non-or transpose.
 * @param[out]  C_d     dense matrix.
 * @param[in]   dimM    matrix height of op(A) & C
 * @param[in]   dimN    matrix width of op(B) & C
 * @param[in]   dimK    width of op(A) & height of op(B)
 * @param[in]   alpha   scalar used for multiplication.
 * @param[in]   beta    scalar used for multiplication.
341
 *                      If beta is zero, C does not have to be a valid input.
Z
zhangjinchao01 已提交
342 343 344 345 346
 *
 *
 * @note    transa is not support HPPL_OP_T.
 *
 */
347 348
extern void hl_matrix_dense_mul_csr(real *A_d,
                                    hl_trans_op_t transa,
Z
zhangjinchao01 已提交
349 350 351
                                    hl_sparse_matrix_s B_d,
                                    hl_trans_op_t transb,
                                    real *C_d,
352 353 354 355 356
                                    int dimM,
                                    int dimN,
                                    int dimK,
                                    real alpha,
                                    real beta);
Z
zhangjinchao01 已提交
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436

/**
 * @brief   Memcpy csc_matrix to host.
 *
 * a. according to csc_matrix, update three arrays
 *
 *  1. csc_val, csc_row, csc_col are dest Address.
 *
 *  2. if type of csc_matrix is HL_NO_VALUE, update csc_row and csc_col
 *
 *  3. if type of csc_matrix is HL_FLOAT_VALUE, update csc_row,
 *     csc_col and csc_value.
 *
 * b. The interface is asynchronous copy. To ensure that the data is copied
 *     please call the synchronous interface;
 *
 *
 * @param[out]  csc_val     point to csc value array(nnz).
 * @param[in]   val_size    csc value size.
 * @param[out]  csc_row     point to csc row indices array(nnz).
 * @param[in]   row_size    csc row size.
 * @param[out]  csc_col     point to csc col indices array(dimN + 1).
 * @param[in]   col_size    csc column size.
 * @param[in]   csc_matrix  sparse matrix.
 * @param[in]   stream      hl_stream_t type.
 *
 */
extern void hl_memcpy_from_csc_matrix(real *csc_val,
                                      size_t val_size,
                                      int *csc_row,
                                      size_t row_size,
                                      int *csc_col,
                                      size_t col_size,
                                      hl_sparse_matrix_s csc_matrix,
                                      hl_stream_t stream);

/**
 * @brief   Memcpy sparse matrix to host.
 *
 * a. according to csr_matrix, update three arrays
 *
 *  1. csr_val, csr_row, csr_col are dest Address.
 *
 *  2. if type of csr_matrix is HL_NO_VALUE, update csr_row and csr_col
 *
 *  3. if type of csr_matrix is HL_FLOAT_VALUE, update csr_row,
 *     csr_col and csr_value
 *
 * b. The interface is asynchronous copy. To ensure that the data is copied
 *     please call the synchronous interface;
 *
 * @param[out]  csr_val     point to csr value array(nnz).
 * @param[in]   val_size    csr value size.
 * @param[out]  csr_row     point to csr row indices array(nnz).
 * @param[in]   row_size    csr row size.
 * @param[out]  csr_col     point to csr col indices array(dimN + 1).
 * @param[in]   col_size    csr column size.
 * @param[in]   csr_matrix  sparse matrix.
 * @param[in]   stream      hl_stream_t type.
 *
 */
extern void hl_memcpy_from_csr_matrix(real *csr_val,
                                      size_t val_size,
                                      int *csr_row,
                                      size_t row_size,
                                      int *csr_col,
                                      size_t col_size,
                                      hl_sparse_matrix_s csr_matrix,
                                      hl_stream_t stream);

/**
 * @brief   A_d[j] += B_d[i,j] for i in range(height)
 *
 * @param[in,out]   A_d    vector, size = width.
 * @param[in]       B_d    sparse matrix.
 * @param[in]       dimM   height.
 * @param[in]       dimN   width.
 * @param[in]       scale  scale of B_d
 *
 */
437 438
extern void hl_sparse_matrix_column_sum(
    real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale);
Z
zhangjinchao01 已提交
439 440 441
/**
 * @brief implementation of csr sparse matrix in hl_sparse_matirx_column_sum
 */
442 443
extern void hl_matrix_csr_column_sum(
    real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale);
Z
zhangjinchao01 已提交
444 445 446 447 448 449 450 451 452 453

/**
 * @brief   A_d[i,j] += B_d[j]
 *
 * @param[in,out]   A_d    sprare matrix.
 * @param[in]       B_d    vector, size = A_d.width.
 * @param[in]       scale  scale of B_d.
 *
 */
extern void hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d,
454
                                      real *B_d,
Z
zhangjinchao01 已提交
455 456 457 458 459
                                      real scale);
/**
 * @brief implementation of csr sparse matrix in hl_sparse_matrix_add_bias
 */
extern void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d,
460
                                   real *B_d,
Z
zhangjinchao01 已提交
461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
                                   real scale);

/**
 * @brief   sparseMatrix = alpha * denseMatrix + beta *sparseMatrix
 *          A_d[i,j] = alpha * B_d[i,j] + beta * A_d[i,j]
 *          Only add value of same (row, col) index in dense matrix and
 *          do not use others values whoes postions are not in sparse matirx.
 *
 * @param[in,out]   A_d    sprare matrix.
 * @param[in]       B_d    dense matrix.
 * @param[in]       dimM   height of B_d.
 * @param[in]       dimN   width of B_d.
 * @param[in]       alpha  scale of B_d.
 * @param[in]       beta   scale of A_d.
 *
 */
extern void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d,
478
                                       real *B_d,
Z
zhangjinchao01 已提交
479 480 481 482 483 484 485 486
                                       int dimM,
                                       int dimN,
                                       real alpha,
                                       real beta);
/**
 * @brief implementation of csr sparse matrix in hl_sparse_matrix_add_dense
 */
extern void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d,
487
                                    real *B_d,
Z
zhangjinchao01 已提交
488 489 490 491 492 493 494 495 496 497 498 499 500
                                    int dimM,
                                    int dimN,
                                    real alpha,
                                    real beta);

/**
 * @brief get rows pionter of GpuSparseMatrix
 *
 * @param[in]    sMat  sparse matrix
 *
 * @return   return rows pointer, which is gpu address
 *
 */
501
extern int *hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat);
Z
zhangjinchao01 已提交
502 503 504 505 506 507 508 509 510

/**
 * @brief get cols pionter of GpuSparseMatrix
 *
 * @param[in]    sMat  sparse matrix
 *
 * @return   return cols pointer, which is gpu address
 *
 */
511
extern int *hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat);
Z
zhangjinchao01 已提交
512 513 514 515 516 517 518 519 520

/**
 * @brief get value pionter of GpuSparseMatrix
 *
 * @param[in]    sMat  sparse matrix
 *
 * @return   return value pointer, which is gpu address
 *
 */
521
extern real *hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat);
Z
zhangjinchao01 已提交
522 523

#endif /* HL_SPARSE_H_ */