Matrix¶
Base¶
Defines
- 
CAL_MATRIX_START_ADDRESS(address, height, width, ld, col, row)¶
- Calculate matrix element address. - For instance, address of A[i][j] = i * ld + j. 
- 
namespace paddle¶
- Typedefs - 
typedef bool_constant<bool, false> false_type¶
 - 
typedef bool_constant<bool, true> true_type¶
 - 
typedef BaseMatrixT<real> BaseMatrix¶
 - 
typedef BaseMatrixT<int> IBaseMatrix¶
 - 
class MatrixOffset¶
- Public Functions - 
MatrixOffset(size_t aCol = 0, size_t aRow = 0, size_t bCol = 0, size_t bRow = 0, size_t cCol = 0, size_t cRow = 0, size_t dCol = 0, size_t dRow = 0)¶
 
- 
 - template <class T>
- 
class BaseMatrixT¶
- Subclassed by paddle::BaseVector< T >, paddle::Matrix - Public Functions - 
virtual ~BaseMatrixT()¶
 - 
BaseMatrixT(size_t height, size_t width, T *data, bool trans, bool useGpu)¶
 - 
BaseMatrixT(BaseMatrixT &mat, bool useGpu)¶
- Note
- This constructor is for temporarily making a matrix with different useGpu flag as the original matrix so that mixed gpu/cpu operations can be performed successfully.
 
 - 
BaseMatrixT(size_t height, size_t width, size_t stride, T *data, bool trans, bool use_gpu)¶
 - 
void setData(T *data)¶
- caller should make sure that the size of data is at least height*width 
 - template <class Op>
- 
int applyUnary(Op op)¶
- unary operator: element wise op(a). - for 0 <= i < this->height_ & for 0 <= j < this->width_. 
 - template <class Op>
- 
int applyUnary(Op op, int numRows, int numCols, MatrixOffset &offset)¶
- unary operator: element wise op(a). - for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*ld + offset.aCol_; 
 - template <class Op>
- 
int applyBinary(Op op, BaseMatrixT &b)¶
- binary operator: element wise op(a, b). - for 0 <= i < this->height_ & for 0 <= j < this->width_. While this->height_ == b.height_ && this->width_ == b.width_. 
 - template <class Op, class bAsRowVector, class bAsColVector>
- 
int applyBinary(Op op, BaseMatrixT &b, int numRows, int numCols, MatrixOffset &offset, bAsRowVector, bAsColVector)¶
- binary operator: element wise op(a, b) - for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*lda + offset.aCol_; B = b->data_ + offset.bRow_*ldb + offset.bCol_; if (bAsRowVector == false_type && bAsColVector == false_type) op(A[i * lda + j], B[i * ldb + j]) if (bAsRowVector == true_type && bAsColVector == false_type) op(A[i * lda + j], B[j]) if (bAsRowVector == false_type && bAsColVector == true_type) op(A[i * lda + j], B[i * ldb]) if (bAsRowVector == true_type && bAsColVector == true_type) op(A[i * lda + j], B[0]) 
 - template <class Op>
- 
int applyBinary(Op op, BaseMatrixT &b, int numRows, int numCols, MatrixOffset &offset)¶
 - template <class Op>
- 
int applyTernary(Op op, BaseMatrixT &b, BaseMatrixT &c)¶
- ternary operator: element wise op(a, b, c). - for 0 <= i < this->height_ & for 0 <= j < this->width_. While this->height_ == b.height_ && this->width_ == b.width_ && this->height_ == c.height_ && this->width_ == c.width_ 
 - template <class Op, class cAsRowVector, class cAsColVector>
- 
int applyTernary(Op op, BaseMatrixT &b, BaseMatrixT &c, int numRows, int numCols, MatrixOffset &offset, cAsRowVector, cAsColVector)¶
- ternary operator: element wise op(a, b, c). - for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*lda + offset.aCol_; B = b->data_ + offset.bRow_*ldb + offset.bCol_; C = c->data_ + offset.cRow_*ldc + offset.cCol_; if (cAsRowVector == false_type && cAsColVector == false_type) op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j]) if (cAsRowVector == true_type && cAsColVector == false_type) op(A[i*lda + j], B[i*ldb + j], C[j]) if (cAsRowVector == false_type && cAsColVector == true_type) op(A[i*lda + j], B[i*ldb + j], C[i*ldc]) if (cAsRowVector == 1 && cAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[0]) 
 - template <class Op>
- 
int applyTernary(Op op, BaseMatrixT &b, BaseMatrixT &c, int numRows, int numCols, MatrixOffset &offset)¶
 - template <class Op>
- 
int applyQuaternary(Op op, BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶
- quaternary operator: element wise op(a, b, c, d). - for 0 <= i < this->height_ & for 0 <= j < this->width_. While this->height_ == b.height_ && this->width_ == b.width_ && this->height_ == c.height_ && this->width_ == c.width_ && this->height_ == d.height_ && this->width_ == d.width_ 
 - template <class Op>
- 
int applyQuaternary(Op op, BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d, int numRows, int numCols, MatrixOffset &offset)¶
- quaternary operator: element wise op(a, b, c, d). - for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*lda + offset.aCol_; B = b->data_ + offset.bRow_*ldb + offset.bCol_; C = c->data_ + offset.cRow_*ldc + offset.cCol_; D = d->data_ + offset.dRow_*ldd + offset.dCol_; 
 - template <class Agg, class Op, class Saver, class aAsRowVector, class aAsColVector>
- 
int aggregate(Agg agg, Op op, Saver sv, BaseMatrixT &b, int numRows, int numCols, MatrixOffset &offset, aAsRowVector, aAsColVector)¶
- a aggregate expression that apply each row(or column) of matrix b. op and sv is element wise operator. - if (aAsRowVector == true_type && aAsColVector == false_type) for each column j & 0 <= i < numRows, do: dst = agg(op(b[i*ldb + j])) a[j] = sv(a[j], dst) if (aAsRowVector == false_type && aAsColVector == true_type) for each row i & 0 <= j < numCols, do: dst = agg(op(b[i*ldb + j])) a[i] = sv(a[i], dst) 
 - template <class Agg, class Op, class Saver, class aAsRowVector, class aAsColVector>
- 
int aggregate(Agg agg, Op op, Saver sv, BaseMatrixT &b, BaseMatrixT &c, int numRows, int numCols, MatrixOffset &offset, aAsRowVector, aAsColVector)¶
- a aggregate expression that apply each row(or column) of matrix b and c. - op and sv is element wise operator. - if (aAsRowVector == true_type && aAsColVector == false_type) for each column j & 0 <= i < numRows, do: dst = agg(op(b[i*ldb + j], c[i*ldc + j])) a[j] = sv(a[j], dst) if (aAsRowVector == false_type && aAsColVector == true_type) for each row i & 0 <= j < numCols, do: dst = agg(op(b[i*ldb + j], c[i*ldc + j])) a[i] = sv(a[i], dst) 
 - template <class Agg>
- 
int applyRow(Agg agg, BaseMatrixT &b)¶
- a aggregate expression that apply each row of matrix b. - for each row i & 0 <= j < b.width_, do: this[i] = agg(b[i*ldb + j]) 
 - template <class Agg, class Saver>
- 
int applyRow(Agg agg, Saver sv, BaseMatrixT &b)¶
- a aggregate expression that apply each row of matrix b. - for each row i & 0 <= j < b.width_, do: dst = agg(b[i*ldb + j]) this[i] = sv(this[i], dst) 
 - template <class Agg>
- 
int applyCol(Agg agg, BaseMatrixT &b)¶
- a aggregate expression that apply each column of matrix b. - for each column j & 0 <= i < b.height_, do: this[j] = agg(b[i*ldb + j]) 
 - template <class Agg, class Saver>
- 
int applyCol(Agg agg, Saver sv, BaseMatrixT &b)¶
- a aggregate expression that apply each column of matrix b. - for each column j & 0 <= i < b.height_, do: dst = agg(b[i*ldb + j]) this[j] = sv(this[j], dst) 
 - 
bool useGpu() const¶
 - 
const T *rowBuf(size_t row) const¶
 - 
T *rowBuf(size_t row)¶
 - 
void neg()¶
- unary operator. 
 - 
void exp()¶
 - 
void pow(T p)¶
 - 
void log()¶
 - 
void sqrt()¶
 - 
void square()¶
 - 
void reciprocal()¶
 - 
void abs()¶
 - 
void sign()¶
 - 
void zero()¶
 - 
void zeroAtOffset(int64_t columnOffset, int64_t numColumns)¶
- this(row, col + columnOffset) = 0 for 0 <= col < numColumns 
 - 
void one()¶
 - 
void subScalar(T p)¶
 - 
void mulScalar(T p)¶
 - 
void divScalar(T p)¶
 - 
void assign(T p)¶
- this = p 
 - 
void add(T p)¶
- this = this + p 
 - 
void add(T p1, T p2)¶
- this = this*p1 + p2 
 - 
void clip(T p1, T p2)¶
- this = this < low ? low : this - this = this > high ? high : this 
 - 
void biggerThanScalar(T p)¶
- a = a > p ? 1.0f : 0.0f 
 - 
void downClip(T p)¶
- a = a > p ? a : p 
 - 
void assign(BaseMatrixT &b)¶
- this = b 
 - 
void assignAtOffset(BaseMatrixT &b, int64_t columnOffset)¶
- If b.width + columOffset <= this.width this(row, col + columnOffset) = b(row, col) for 0 <= col < b.width If this.width + columnOffset <= b.width this(row, col) = b(row, col + columnOffset) for 0 <= col < this.width Otherwise, FATAL 
 - 
void add(BaseMatrixT &b)¶
- this = this + b 
 - 
void addAtOffset(BaseMatrixT &b, int64_t columnOffset)¶
- If b.width + columOffset <= this.width this(row, col + columnOffset) += b(row, col) for 0 <= col < b.width If this.width + columnOffset <= b.width this(row, col) += b(row, col + columnOffset) for 0 <= col < this.width Otherwise, FATAL 
 - 
void addColVector(BaseMatrixT &b)¶
 - 
void addRowVector(BaseMatrixT &b)¶
 - 
void addBias(BaseMatrixT &b, T scale)¶
 - 
void mulRowVector(BaseMatrixT &b)¶
 - 
void divRowVector(BaseMatrixT &b)¶
 - 
void addP2P(BaseMatrixT &b)¶
 - 
void add(BaseMatrixT &b, T p)¶
- this = this + b*p 
 - 
void add(BaseMatrixT &b, T p1, T p2)¶
- this = p1*this + p2*b 
 - 
void sub(BaseMatrixT &b)¶
- this = this - b 
 - 
void sub(BaseMatrixT &b, T p)¶
- this = this - b*p 
 - 
void relu(BaseMatrixT &b)¶
- b = max(0, this) 
 - 
void reluDerivative(BaseMatrixT &b)¶
 - 
void softrelu(BaseMatrixT &b)¶
- b = log(1.0 + exp(this)) 
 - 
void softreluDerivative(BaseMatrixT &b)¶
 - 
void brelu(BaseMatrixT &b)¶
- b = min(max(this, p1), p2) 
 - 
void breluDerivative(BaseMatrixT &b)¶
 - 
void square(BaseMatrixT &b)¶
- b = this * this 
 - 
void squareDerivative(BaseMatrixT &b)¶
 - 
void tanh(BaseMatrixT &b)¶
- b = tanh(this) 
 - 
void tanhDerivative(BaseMatrixT &b)¶
 - 
void scaledTanh(BaseMatrixT &b, T p1, T p2)¶
- b = p1 * tanh(p2 * this) 
 - 
void scaledTanhDerivative(BaseMatrixT &b, T p1, T p2)¶
 - 
void reciprocal(BaseMatrixT &b)¶
- b = 1.0f / this 
 - 
void reciprocalDerivative(BaseMatrixT &b)¶
 - 
void abs(BaseMatrixT &b)¶
- b = this > 0.0f ? this : -this 
 - 
void absDerivative(BaseMatrixT &b)¶
 - 
void sigmoid(BaseMatrixT &b)¶
- b = 1.0f / (1.0f + exp(-this)) 
 - 
void sigmoidDerivative(BaseMatrixT &b)¶
 - 
void expDerivative(BaseMatrixT &b)¶
- b = a 
 - 
void sign(BaseMatrixT &b)¶
 - 
void exp(BaseMatrixT &b)¶
 - 
void pow(BaseMatrixT &b, T p)¶
 - 
void log(BaseMatrixT &b)¶
 - 
void sqrt(BaseMatrixT &b)¶
 - 
void addScalar(BaseMatrixT &b, T p)¶
 - 
void subScalar(BaseMatrixT &b, T p)¶
 - 
void mulScalar(BaseMatrixT &b, T p)¶
 - 
void divScalar(BaseMatrixT &b, T p)¶
 - 
void scalarDiv(BaseMatrixT &b, T p)¶
 - 
void invSqrt(BaseMatrixT &b)¶
- this = 1.0f / sqrt(b) 
 - 
void isEqualTo(BaseMatrixT &b, T value)¶
- this = (b == value) 
 - 
void softCrossEntropy(BaseMatrixT &b, BaseMatrixT &c)¶
- ternary operator. 
 - 
void softCrossEntropyBp(BaseMatrixT &b, BaseMatrixT &c)¶
 - 
void binaryLabelCrossEntropy(BaseMatrixT &b, BaseMatrixT &c)¶
 - 
void binaryLabelCrossEntropyBp(BaseMatrixT &b, BaseMatrixT &c)¶
 - 
void add(BaseMatrixT &b, BaseMatrixT &c)¶
- this = b + c 
 - 
void add(BaseMatrixT &b, T p1, BaseMatrixT &c, T p2)¶
- this = b*p1 + c*p2 
 - 
void sub(BaseMatrixT &b, BaseMatrixT &c)¶
- this = b - c 
 - 
void sub(BaseMatrixT &b, T p1, BaseMatrixT &c, T p2)¶
- this = b*p1 - c*p2 
 - 
void add2(BaseMatrixT &b, BaseMatrixT &c)¶
- this = this + b + c 
 - 
void add2(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2, T p3)¶
- this = this*p1 + b*p2 + c*p3 
 - 
void add3(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d, T p1, T p2, T p3)¶
- this = a*p1 + b*p2 + c*p3 
 - 
void sgdUpdate(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2, T p3)¶
- c = p2 * c - p1 * (b + p3 * this) this += mom 
 - 
void sgdUpdate(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d, T p1, T p2, T p3)¶
- c = p2 * c - p1 * d * (b + p3 * this) this += mom 
 - 
void applyL1(T learningRate, T decayRate)¶
- apply L1/L2 to this 
 - 
void applyL1(BaseMatrixT &lr, T learningRate, T decayRate)¶
 - 
void applyL2(T learningRate, T decayRate)¶
 - 
void applyL2(BaseMatrixT &lr, T learningRate, T decayRate)¶
 - 
void dotMul(BaseMatrixT &b)¶
- this *= b 
 - 
void dotMul(BaseMatrixT &b, BaseMatrixT &c)¶
- this = b * c 
 - 
void dotDiv(BaseMatrixT &b, BaseMatrixT &c)¶
- this = b / c 
 - 
void dotDiv(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶
- this = (b + p1) / (c + p2) 
 - 
void rankLoss(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶
- this = log(1 + exp(b - c)) - d * (b - c) 
 - 
void rankLossBp(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶
 - 
void logisticRegressionLoss(BaseMatrixT &b, BaseMatrixT &c)¶
- this = log(1 + exp(b)) - c * b 
 - 
void logisticRegressionLossBp(BaseMatrixT &b, BaseMatrixT &c)¶
- this += exp(b)/(1+exp(b)) - c 
 - 
void biggerThan(BaseMatrixT &b, BaseMatrixT &c)¶
- this = b > c ? 1.0 : 0.0 
 - 
void biggerThan(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶
- this = ((b>c && d>0.5) || (b<c && d<0.5)) ? 1 : 0) 
 - 
void max(BaseMatrixT &b, BaseMatrixT &c)¶
- this = b>c ? b : c 
 - 
void binaryClassificationError(size_t destCol, BaseMatrixT &b, BaseMatrixT &c, T p)¶
- this[destCol] += (b>p1 == c>p1) ? 0 : 1) 
 - 
void binaryClassificationError2(size_t destCol, BaseMatrixT &b, BaseMatrixT &c, T p)¶
 - 
void dotMulSquare(BaseMatrixT &b)¶
- this = this * b * b 
 - 
void dotSquareMul(BaseMatrixT &b)¶
- this = this * this * b 
 - 
void dotMulSquare(BaseMatrixT &b, BaseMatrixT &c)¶
- this = b * c * c 
 - 
void dotSquareSquare(BaseMatrixT &b, BaseMatrixT &c)¶
- this = b * b * c * c 
 - 
void dotMulSquareSum(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶
- this = this * (p1*b + p2*c)^2 
 - 
void dotSquareSum(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶
- this = (p1*b + p2*c)^2 
 - 
void dotMulSum(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶
- this= this * (p1*b + p2*c) 
 - 
void addSquareSum(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT d, T p1, T p2, T p3)¶
- this += sqr(p1*b + p2*c + p3*d) 
 - 
void addSquare(BaseMatrixT &b, T p)¶
- this += p * sqr(b) 
 - 
void decayAddSquare(BaseMatrixT &b, T p1, T p2)¶
- this = p1 * this + p2 * sqr(b) 
 - 
void decayAddSquareMul(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶
- this = p1 * this + p2 * sqr(b * c) 
 - 
void reciprocal(BaseMatrixT &b, T p1, T p2)¶
- this = 1 / (p1 * b + p2) 
 - 
void reciprocalSum(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2, T p3)¶
- this = 1 / (p1 * b + p2 * c + p3) 
 - 
void copyAndClear(BaseMatrixT &b)¶
- b = this; this = 0 
 - 
void rowDotMul(size_t destCol, BaseMatrixT &b, BaseMatrixT &c)¶
- this_row[destCol] += dotprod(b_row, c_row) 
 - 
void rowDotMul2(size_t destCol, BaseMatrixT &b, BaseMatrixT &c)¶
 - 
void addDotMulVMM(BaseMatrixT &b, BaseMatrixT &c)¶
- this is vector (one row matrix) - for each row i, do: this_row += dotmul(b_row_i, c_row_i) 
 - 
void addDotMulVMM2(BaseMatrixT &b, BaseMatrixT &c)¶
 - 
void addDotMulMMV(BaseMatrixT &b, BaseMatrixT &c)¶
- c is vector (one row matrix) - for each row i, do: this_row_i += dotmul(b_row_i, c_row) 
 - 
void addDotMulMMV2(BaseMatrixT &b, BaseMatrixT &c)¶
 - 
void addDotMul(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶
- this = p1 * this + p2 * b * c 
 - 
void rowScale(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶
- this_row = b_row * c_row[cCol] 
 - 
void rowScale2(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶
 - 
void colScale(size_t cRow, BaseMatrixT &b, BaseMatrixT &c)¶
- this_col = b_col * c_col[cRow] 
 - 
void addColScale(size_t cRow, BaseMatrixT &b, BaseMatrixT &c)¶
- this_col += b_col * c_col[cRow] 
 - 
void addRowScale(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶
- this_row += b_row * c_row[cCol] 
 - 
void sumRows(BaseMatrixT &b)¶
- calculate the sum of each row of the matrix b. 
 - 
void maxRows(BaseMatrixT &b)¶
- calculate the maximum value of each row of the matrix b. 
 - 
void minRows(BaseMatrixT &b)¶
- calculate the minimum value of each row of the matrix b. 
 - 
void sumCols(BaseMatrixT &b)¶
- calculate the sum of each column of the matrix b. 
 - 
void maxCols(BaseMatrixT &b)¶
- calculate the maximum value of each column of the matrix b. 
 - 
void minCols(BaseMatrixT &b)¶
- calculate the minimum value of each column of the matrix b. 
 - 
void sumCols(BaseMatrixT &b, T scale)¶
 - 
void sumOfSquares(BaseMatrixT &b, BaseMatrixT &c)¶
- calculate the sum of each row of (b - c)^2. 
 - 
void rowAdd(size_t cCol, BaseMatrixT &b, BaseMatrixT &c, T p)¶
- this_row = b_row + p * ones * c_row[cCol] 
 - 
void rowPow(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶
- this_row = pow(b_row, c_row[cCol]) 
 - 
virtual bool isSparse() const¶
 
- 
virtual 
 
- 
typedef bool_constant<bool, false> 
Sparse Matrix¶
- 
namespace paddle¶
- Typedefs - 
typedef std::shared_ptr<GpuSparseMatrix> GpuSparseMatrixPtr¶
 - 
typedef std::shared_ptr<CpuSparseMatrix> CpuSparseMatrixPtr¶
 - Enums - 
enum SparseFormat¶
- matrix sparse_format . - nnz represents nonzero number in sparse matrix. - SPARSE_CSR: row major matrix. length of row is height_ + 1, each element represents row start index in Matrix. length of col and value are nnz. - SPARSE_CSC: col major matrix. length of col is width_ + 1, each element represents col start index in Matrix. length of col and value are nnz. - for example: [0, 1, 0, 2, 0; 1, 0, 0, 0, 0; 0, 0, 0, 2, 5]; SPARSE_CSR row [0, 2, 3, 5]; col [1, 3, 0, 3, 4]; value [1, 2, 1, 2, 5] SPARSE_CSC col [0, 1, 2, 2, 4, 5]; row [1, 0, 0, 2, 2]; value [1, 1, 2, 2, 5] - Values: - 
SPARSE_CSR= 0¶
 - 
SPARSE_CSC= 1¶
 
- 
 - 
class Matrix¶
- #include <Matrix.h>Copy or assignemnt constructor will share the data as opposed to making a copy of the original data. To make a copy of the orinal data, use copyFrom() instead. Inherits from paddle::BaseMatrixT< real > Subclassed by paddle::CpuMatrix, paddle::CpuSparseMatrix, paddle::GpuMatrix, paddle::GpuSparseMatrix Public Functions - 
virtual ~Matrix()¶
 - 
void setData(real *data)¶
- set the data buffer used to hold the matrix data. - caller should make sure that the size of data is at least sizeof(real)*height*width. 
 - 
void setData(real *data, size_t newHeight, size_t newWidth)¶
- the data should be contiguous 
 - 
size_t getWidth() const¶
 - 
size_t getHeight() const¶
 - 
size_t getStride() const¶
 - 
size_t getElementCnt() const¶
 - 
virtual real *getData()¶
 - 
virtual const real *getData() const¶
 - 
bool isTransposed() const¶
 - 
bool isContiguous() const¶
 - 
virtual int *getRows() const¶
 - 
virtual int *getCols() const¶
 - 
virtual SparseFormat getFormat() const¶
 - 
virtual SparseValueType getValueType() const¶
 - 
virtual void add3(MatrixPtr b)¶
- matrix elment-wise add - Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function. 
 - 
MemoryHandlePtr getMemoryHandle() const¶
 - 
virtual void zeroMem()¶
 - 
virtual void resetOne()¶
 - 
virtual void trimFrom(const CpuSparseMatrix &src)¶
 - 
virtual void copyFrom(const Matrix &src, hl_stream_t stream)¶
 - 
MatrixPtr subMatrix(size_t startRow, size_t endRow, size_t startCol, size_t endCol)¶
 - 
virtual void copyFrom(const real *src, size_t size)¶
- If this is GpuMatrix, src is assumed to be CPU memory - If this is CpuMatrix, src is assumed to be CPU memory 
 - 
virtual void copyFrom(const real *src, const int64_t *seq)¶
 - 
virtual void copyFrom(const IVector &src)¶
- convert a int vector to a real matrix. - (1) source and dest are both in CPU. - (2) sizes are exactly match. 
 - 
virtual MatrixPtr clone(size_t height = 0, size_t width = 0, bool useGpu = false)¶
- Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this. - If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size. 
 - 
virtual real *getRowBuf(size_t row)¶
 - 
virtual real getElement(size_t x, size_t y) const¶
 - 
virtual real getSum()¶
 - 
virtual real getAbsSum()¶
 - 
virtual void resize(size_t newHeight, size_t newWidth) = 0¶
- Note
- Original data may not be preserved after resize().
 
 - 
virtual void resize(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format) = 0¶
- Note
- This should only be used for sparse matrix.
 
 - 
virtual void setRow(size_t row, size_t colNum, const unsigned int *cols, const real *values) = 0¶
- This should only be used for sparse matrix. - Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row. 
 - 
virtual void transpose(MatrixPtr matTrans, bool memAlloc)¶
- hard transpose. - allocate matTrans’ memory outside, then set memAlloc as false; else set as true. 
 - 
virtual void clear()¶
- Only set all variables to 0 or NULL but not free them. 
 - 
void reshape(size_t height, size_t width)¶
 - 
virtual void mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶
- this = scaleAB*(a*b) + scaleT*this 
 - 
virtual void addColumnVector(const Matrix &b)¶
- Add a vector (column) b to matrix a, column by column. 
 - 
virtual void addByBitCode(size_t numClasses, const IVector &codes, const Matrix &vec)¶
- For j < codeLength: this(i, j) += vec(index(i, j), 0) where index(i, j) = ((codes(i) + numClasses) >> (j + 1)) - 1 
 - 
virtual void addByBitCodeBackward(size_t numClasses, const IVector &codes, Matrix &vec)¶
- For j < codeLength: vec(index(i, j), 0) += this(i, j) where index is same as the index for addByBitCode 
 - 
virtual void mulByBitCode(size_t numClasses, const IVector &codes, const Matrix &mat, const Matrix &input)¶
- For j < codeLength: this(i, j) += <mat.row(index(i, j)), input.row(i)> where index is same as the index for addByBitCode 
 - 
virtual void mulByBitCodeBackwardWeight(size_t numClasses, const IVector &codes, Matrix &mat, const Matrix &input)¶
- For j < codeLength: mat.row(index(i, j)) += this(i, j) * input.row(i) where index is same as the index for addByBitCode 
 - 
virtual void mulByBitCodeBackwardError(size_t numClasses, const IVector &codes, const Matrix &mat, Matrix &input)¶
- For j < codeLength: input.row(i) += this(i, j) * mat.row(index(i, j)) where index is same as the index for addByBitCode 
 - 
virtual void sumByBitCode(size_t numClasses, IVector &codes, Matrix &sum, real scaleSum)¶
- For j < codeLength sum(i, 0) = scaleSum * \sum_j bit(i, j) * this(i, j) where bit(i, j) = ((codes(i) + numClasses) & 2^j) ? 1 : 0 
 - 
virtual void subByBitCode(size_t numClasses_, IVector &codes)¶
- For j < codeLength this(i, j) -= bit(i, j) where bit(i, j) is same as that for sumByBitCode 
 - 
virtual void rowMax(IVector &maxIds, Matrix &max)¶
- Get the top k elements of each row of this matrix. - The column ids and values of these elements are stored in maxIds and max respectively. Note that the top k elements are not sorted. 
 - 
virtual void oneHotCrossEntropy(Matrix &output, IVector &label)¶
- copy -log(output[label]) to this->data[i]. 
 - 
virtual void oneHotCrossEntropyBp(Matrix &outputV, IVector &label)¶
- calculate the error of outputV according to label. 
 - 
virtual void oneHotCrossEntropyWithSelfNorm(Matrix &output, IVector &label, real alpha)¶
- copy -log(output[label]) to this->data[i]. 
 - 
virtual void oneHotCrossEntropyWithSelfNormBp(Matrix &outputV, IVector &label, real alpha)¶
- calculate the error of outputV according to label. 
 - 
virtual void circularConv(Matrix &b, Matrix &c)¶
- \[ a[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} b_{i+j} * c_{j} \]b contains M elements, c contains N elements (N is odd), b’s index arithmetic is computed modulo M, c’s index arithmetic is computed modulo N. 
 - 
virtual void circularConvDerivative(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2)¶
 - 
virtual void cosSim(Matrix &output1, Matrix &output2, real scale = 1.0f)¶
- cosine similarity, for each row i, this[i] = cos(output1[i], output2[i]) - output2 can only have one row, then for each row i, this[i] = cos(output1[i], output2[0]) 
 - 
virtual void cosSimDerivative(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2, real scale = 1.0f)¶
 - 
virtual void print(std::ostream &os) const¶
- print out the values of elements to os 
 - 
virtual void print(std::ostream &os, size_t height, size_t width) const¶
- print a part of the matrix from the (top,left) value to the (height, width) value (not included) 
 - 
virtual void printOneRow(std::ostream &os, size_t idx) const¶
- print one row to os 
 - 
virtual real getMin()¶
 - 
virtual real getMax()¶
 - 
virtual void randomizeUniform()¶
 - 
virtual void classificationError(MatrixPtr output, IVectorPtr label)¶
- calulate the error of classification - output[i] = 1 if row i is an error. - output[i] = 0 if row i is correct. 
 - 
virtual void convExpand(Matrix &feature, int feaImgHeight, int feaImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW)¶
- This function is used to calculate the convolution: - It will expand a feature matrix according to the convolution filters 
 - 
virtual void convShrink(Matrix &expandColMat, int thisImgHeight, int thisImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW, real alpha = 1.0f, real beta = 0.0f)¶
- This function is the reverse implementation of convExpand: - Its function is to restore a expanded-matrix into a feature matrix 
 - 
virtual void maxPoolForward(Matrix &inputMat, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶
- Pooling forward operation, pick out the largest element in the sizeX of value 
 - 
virtual void maxPoolBackward(Matrix &image, size_t imgSizeH, size_t imgSizeW, Matrix &outGrad, Matrix &outV, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶
- Pooling backward operation. 
 - 
virtual void avgPoolForward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶
- Pooling forward operation, caculate the average of sizeX elements. 
 - 
virtual void avgPoolBackward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶
 - 
virtual void crossMapNormalFwd(Matrix &input, size_t imgSizeH, size_t imgSizeW, Matrix &denoms, size_t channels, size_t sizeX, float scale, float pow)¶
- normalize-operation. 
 - 
virtual void crossMapNormalBwd(Matrix &localGrad, Matrix &denoms, Matrix &preOutV, Matrix &localOutV, size_t channels, size_t imgSizeH, size_t imgSizeW, size_t size, float scale, float pow)¶
 - 
virtual void maxSequenceForward(Matrix &input, const IVector &sequence, IVector &index)¶
- Input: one or more sequences. Each sequence contains some instances. - Output: output size is the number of input sequences (NOT input instances). - output[i] is set to max_input[i]. 
 - 
virtual void contextProjectionForward(MatrixPtr input, MatrixPtr weight, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶
 - 
virtual void contextProjectionBackward(MatrixPtr inputGrad, MatrixPtr weightGrad, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶
 - 
virtual void contextProjectionBackwardData(MatrixPtr inputGrad, const IVector &sequence, int contextLength, int contextStart)¶
 - 
virtual void contextProjectionBackwardWeight(MatrixPtr weightGrad, const IVector &sequence, int contextLength, int contextStart, int totalPad, size_t beginPad)¶
 - 
virtual void selectRows(Matrix &table, IVector &ids)¶
- this.row[i] += table.row[ids[i]] if ids[i] == -1, it will be ignored 
 - 
virtual void addToRows(Matrix &table, IVector &ids)¶
- table.row[ids[i]] += this.row[i] if ids[i] == -1, it will be ignored 
 - 
virtual void multiBinaryLabelCrossEntropy(Matrix &output, Matrix &label)¶
- cross entropy for multi binary labels - this[i] = -sum(label[i][j]*log(output[i][j]) + (1-label[i][j])*log(1-output[i][j])) 
 - 
virtual void multiBinaryLabelCrossEntropyBp(Matrix &output, Matrix &label)¶
- The gradient of cross entropy for multi binary labels on output. - this[i][j] = -label[i][j]/output[i][j] + (1-label[i][j])/(1-output[i][j]) 
 Public Static Functions - 
MatrixPtr create(MemoryHandlePtr memHandle, size_t height, size_t width, bool trans = false)¶
 - 
MatrixPtr create(size_t height, size_t width, bool trans = false, bool useGpu = false)¶
 - 
MatrixPtr create(real *data, size_t height, size_t width, bool trans = false, bool useGpu = false)¶
 - 
MatrixPtr create(real *data, size_t height, size_t width, size_t stride, bool trans = false, bool useGpu = false)¶
 - 
MatrixPtr createSparseMatrix(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, bool trans = false, bool useGpu = false)¶
 - 
MatrixPtr createSparseMatrix(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat foramt = SPARSE_CSR, bool trans = false, bool useGpu = false)¶
 - 
MatrixPtr createSparseMatrix(real *data, int *row, int *col, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans, bool useGpu)¶
 - 
void resizeOrCreateSparseMatrix(MatrixPtr &matrix, size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat foramt = SPARSE_CSR, bool trans = false, bool useGpu = false)¶
 - 
void resizeOrCreate(MatrixPtr &a, size_t height, size_t width, bool trans = false, bool useGpu = false)¶
 
- 
virtual 
 - 
class GpuMatrix¶
- Inherits from paddle::Matrix - Public Functions - 
GpuMatrix()¶
 - 
GpuMatrix(size_t height, size_t width, bool trans = false)¶
 - 
GpuMatrix(real *data, size_t height, size_t width, bool trans = false)¶
 - 
GpuMatrix(real *data, size_t height, size_t width, size_t stride, bool trans = false)¶
 - 
GpuMatrix(GpuMemHandlePtr dataHandle, size_t height, size_t width, bool trans = false)¶
 - 
~GpuMatrix()¶
 - 
virtual void zeroMem()¶
 - 
virtual void resetOne()¶
 - 
virtual void resize(size_t newHeight, size_t newWidth)¶
- Note
- Original data may not be preserved after resize().
 
 - 
virtual void resize(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶
- Note
- This should only be used for sparse matrix.
 
 - 
virtual void setRow(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶
- This should only be used for sparse matrix. - Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row. 
 - 
virtual void copyFrom(const real *hostSrc, size_t size)¶
- Copy the data from cpu_memory buffer 
 - 
virtual void copyFrom(const real *hostSrc, const int64_t *seq)¶
 - 
virtual void copyFrom(const Matrix &src, hl_stream_t stream)¶
 - 
virtual void copyFrom(const IVector &src)¶
- convert a int vector to a real matrix. - (1) source and dest are both in CPU. - (2) sizes are exactly match. 
 - 
virtual MatrixPtr clone(size_t height, size_t width, bool useGpu = false)¶
- Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this. - If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size. 
 - 
virtual real getElement(size_t x, size_t y) const¶
 - 
real *getRow(size_t row)¶
 - 
virtual real *getRowBuf(size_t row)¶
 - 
virtual real getSum()¶
 - 
virtual real getAbsSum()¶
 - 
virtual void transpose(MatrixPtr matTrans, bool memAlloc)¶
- hard transpose. - allocate matTrans’ memory outside, then set memAlloc as false; else set as true. 
 - 
virtual void addColumnVector(const Matrix &b)¶
- Add a vector (column) b to matrix a, column by column. 
 - 
virtual void mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶
- this = scaleAB*(a*b) + scaleT*this 
 - 
void mul(const GpuSparseMatrix &a, const GpuMatrix &b, real scaleAB, real scaleT)¶
 - 
void mul(const GpuMatrix &a, const GpuSparseMatrix &b, real scaleAB, real scaleT)¶
 - 
virtual void rowMax(IVector &maxIds, Matrix &max)¶
- Get the top k elements of each row of this matrix. - The column ids and values of these elements are stored in maxIds and max respectively. Note that the top k elements are not sorted. 
 - 
virtual void oneHotCrossEntropy(Matrix &output, IVector &label)¶
- copy -log(output[label]) to this->data[i]. 
 - 
virtual void oneHotCrossEntropyBp(Matrix &outputV, IVector &label)¶
- calculate the error of outputV according to label. 
 - 
virtual void oneHotCrossEntropyWithSelfNorm(Matrix &output, IVector &label, real alpha)¶
- copy -log(output[label]) to this->data[i]. 
 - 
virtual void oneHotCrossEntropyWithSelfNormBp(Matrix &outputV, IVector &label, real alpha)¶
- calculate the error of outputV according to label. 
 - 
virtual void cosSim(Matrix &output1, Matrix &output2, real scale)¶
- cosine similarity, for each row i, this[i] = cos(output1[i], output2[i]) - output2 can only have one row, then for each row i, this[i] = cos(output1[i], output2[0]) 
 - 
virtual void cosSimDerivative(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2, real scale)¶
 - 
virtual void print(std::ostream &os) const¶
- print out the values of elements to os 
 - 
virtual void print(std::ostream &os, size_t height, size_t width) const¶
- print a part of the matrix from the (top,left) value to the (height, width) value (not included) 
 - 
virtual void randomizeUniform()¶
 - 
virtual void classificationError(MatrixPtr output, IVectorPtr label)¶
- calulate the error of classification - output[i] = 1 if row i is an error. - output[i] = 0 if row i is correct. 
 - 
virtual void convExpand(Matrix &feature, int feaImgHeight, int feaImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW)¶
- This function is used to calculate the convolution: - It will expand a feature matrix according to the convolution filters 
 - 
virtual void convShrink(Matrix &expandColMat, int thisImgHeight, int thisImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW, real alpha = 1.0f, real beta = 0.0f)¶
- This function is the reverse implementation of convExpand: - Its function is to restore a expanded-matrix into a feature matrix 
 - 
virtual void maxPoolForward(Matrix &inputMat, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶
- Pooling forward operation, pick out the largest element in the sizeX of value 
 - 
virtual void maxPoolBackward(Matrix &image, size_t imgSizeH, size_t imgSizeW, Matrix &outGrad, Matrix &outV, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶
- Pooling backward operation. 
 - 
virtual void avgPoolForward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶
- Pooling forward operation, caculate the average of sizeX elements. 
 - 
virtual void avgPoolBackward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶
 - 
virtual void crossMapNormalFwd(Matrix &input, size_t imgSizeH, size_t imgSizeW, Matrix &denoms, size_t channels, size_t sizeX, float scale, float pow)¶
- normalize-operation. 
 - 
virtual void crossMapNormalBwd(Matrix &localGrad, Matrix &denoms, Matrix &preOutV, Matrix &localOutV, size_t channels, size_t imgSizeH, size_t imgSizeW, size_t sizeX, float scale, float pow)¶
 - 
virtual void maxSequenceForward(Matrix &input, const IVector &sequence, IVector &index)¶
- Input: one or more sequences. Each sequence contains some instances. - Output: output size is the number of input sequences (NOT input instances). - output[i] is set to max_input[i]. 
 - 
virtual void contextProjectionForward(MatrixPtr input, MatrixPtr weight, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶
 
- 
 - 
class CpuMatrix¶
- Inherits from paddle::Matrix - Subclassed by paddle::SharedCpuMatrix, paddle::SparseRowCpuMatrix, paddle::SparseRowIdsCpuMatrix - Public Functions - 
CpuMatrix(real *data, size_t height, size_t width, bool trans = false)¶
 - 
CpuMatrix(real *data, size_t height, size_t width, size_t stride, bool trans = false)¶
 - 
CpuMatrix(CpuMemHandlePtr dataHandle, size_t height, size_t width, bool trans = false)¶
 - 
~CpuMatrix()¶
 - 
virtual void zeroMem()¶
 - 
virtual void resetOne()¶
 - 
virtual void resize(size_t newHeight, size_t newWidth)¶
- Note
- Original data may not be preserved after resize().
 
 - 
virtual void resize(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶
- Note
- This should only be used for sparse matrix.
 
 - 
virtual void setRow(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶
- This should only be used for sparse matrix. - Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row. 
 - 
virtual real getElement(size_t x, size_t y) const¶
 - 
virtual real getSum()¶
 - 
virtual real getAbsSum()¶
 - 
virtual void transpose(MatrixPtr matTrans, bool memAlloc)¶
- hard transpose. - allocate matTrans’ memory outside, then set memAlloc as false; else set as true. 
 - 
virtual void copyFrom(const Matrix &src, hl_stream_t stream)¶
 - 
virtual void copyFrom(const real *src, size_t size)¶
- If this is GpuMatrix, src is assumed to be CPU memory - If this is CpuMatrix, src is assumed to be CPU memory 
 - 
virtual void copyFrom(const real *cpuSrc, const int64_t *seq)¶
 - 
virtual void copyFrom(const IVector &src)¶
- convert a int vector to a real matrix. - (1) source and dest are both in CPU. - (2) sizes are exactly match. 
 - 
void copyFrom(CpuSparseMatrix &src)¶
 - 
virtual MatrixPtr clone(size_t height, size_t width, bool useGpu = false)¶
- Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this. - If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size. 
 - 
virtual void convExpand(Matrix &feature, int feaImgHeight, int feaImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW)¶
- This function is used to calculate the convolution: - It will expand a feature matrix according to the convolution filters 
 - 
virtual void convShrink(Matrix &expandColMat, int thisImgHeight, int thisImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW, real alpha = 1.0f, real beta = 0.0f)¶
- This function is the reverse implementation of convExpand: - Its function is to restore a expanded-matrix into a feature matrix 
 - 
virtual void maxPoolForward(Matrix &inputMat, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶
- Pooling forward operation, pick out the largest element in the sizeX of value 
 - 
virtual void maxPoolBackward(Matrix &image, size_t imgSizeH, size_t imgSizeW, Matrix &outGrad, Matrix &outV, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶
- Pooling backward operation. 
 - 
virtual void avgPoolForward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶
- Pooling forward operation, caculate the average of sizeX elements. 
 - 
virtual void avgPoolBackward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶
 - 
virtual void crossMapNormalFwd(Matrix &input, size_t imgSizeH, size_t imgSizeW, Matrix &denoms, size_t channels, size_t sizeX, float scale, float pow)¶
- normalize-operation. 
 - 
virtual void crossMapNormalBwd(Matrix &localGrad, Matrix &denoms, Matrix &preOutV, Matrix &localOutV, size_t channels, size_t imgSizeH, size_t imgSizeW, size_t sizeX, float scale, float pow)¶
 - 
virtual void maxSequenceForward(Matrix &input, const IVector &sequence, IVector &index)¶
- Input: one or more sequences. Each sequence contains some instances. Output: output size is the number of input sequences (NOT input instances). output[i] is set to max_{for each instance in this sequence}{input[i]} 
 - 
virtual void contextProjectionForward(MatrixPtr input, MatrixPtr weight, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶
 - 
virtual void contextProjectionBackward(MatrixPtr inputGrad, MatrixPtr weightGrad, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶
 - 
real *getRow(size_t row)¶
 - 
virtual real *getRowBuf(size_t row)¶
 - template <typename TableMatType>
- 
void selectRowsImp(TableMatType &table, IVector &ids)¶
- use abstract getRow() to get row from table. - Define table as template instead of virtual class for performance sake. internal used by above two virtual funcs. 
 - 
virtual void addColumnVector(const Matrix &b)¶
- Add a vector (column) b to matrix a, column by column. 
 - 
virtual void mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶
- this = scaleAB*(a*b) + scaleT*this 
 - 
void mul(CpuMatrix *a, CpuSparseMatrix *b, real scaleAB, real scaleT)¶
 - 
virtual void mul(CpuSparseMatrix *a, CpuMatrix *b, real scaleAB, real scaleT)¶
 - 
virtual void rowMax(IVector &maxIds, Matrix &max)¶
- Get the top k elements of each row of this matrix. - The column ids and values of these elements are stored in maxIds and max respectively. Note that the top k elements are not sorted. 
 - 
virtual void oneHotCrossEntropy(Matrix &output, IVector &label)¶
- copy -log(output[label]) to this->data[i]. 
 - 
virtual void oneHotCrossEntropyBp(Matrix &outputV, IVector &label)¶
- calculate the error of outputV according to label. 
 - 
virtual void oneHotCrossEntropyWithSelfNorm(Matrix &output, IVector &label, real alpha)¶
- copy -log(output[label]) to this->data[i]. 
 - 
virtual void oneHotCrossEntropyWithSelfNormBp(Matrix &outputV, IVector &label, real alpha)¶
- calculate the error of outputV according to label. 
 - 
virtual void circularConv(Matrix &b, Matrix &c)¶
- \[ a[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} b_{i+j} * c_{j} \]b contains M elements, c contains N elements (N is odd), b’s index arithmetic is computed modulo M, c’s index arithmetic is computed modulo N. 
 - 
virtual void circularConvDerivative(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2)¶
 - 
virtual void cosSim(Matrix &output1, Matrix &output2, real scale)¶
- cosine similarity, for each row i, this[i] = cos(output1[i], output2[i]) - output2 can only have one row, then for each row i, this[i] = cos(output1[i], output2[0]) 
 - 
virtual void cosSimDerivative(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2, real scale)¶
 - 
virtual void print(std::ostream &os) const¶
- print out the values of elements to os 
 - 
virtual void print(std::ostream &os, size_t height, size_t width) const¶
- print a part of the matrix from the (top,left) value to the (height, width) value (not included) 
 - 
virtual void printOneRow(std::ostream &os, size_t idx) const¶
- print one row to os 
 - 
virtual real getMin()¶
 - 
virtual real getMax()¶
 - 
virtual void randomizeUniform()¶
 - 
virtual void classificationError(MatrixPtr output, IVectorPtr label)¶
- calulate the error of classification - output[i] = 1 if row i is an error. - output[i] = 0 if row i is correct. 
 - 
virtual void addByBitCode(size_t numClasses, const IVector &codes, const Matrix &vec)¶
- For j < codeLength: this(i, j) += vec(index(i, j), 0) where index(i, j) = ((codes(i) + numClasses) >> (j + 1)) - 1 
 - 
virtual void addByBitCodeBackward(size_t numClasses, const IVector &codes, Matrix &vec)¶
- For j < codeLength: vec(index(i, j), 0) += this(i, j) where index is same as the index for addByBitCode 
 - 
virtual void mulByBitCode(size_t numClasses, const IVector &codes, const Matrix &mat, const Matrix &input)¶
- For j < codeLength: this(i, j) += <mat.row(index(i, j)), input.row(i)> where index is same as the index for addByBitCode 
 - 
virtual void mulByBitCodeBackwardWeight(size_t numClasses, const IVector &codes, Matrix &mat, const Matrix &input)¶
- For j < codeLength: mat.row(index(i, j)) += this(i, j) * input.row(i) where index is same as the index for addByBitCode 
 - 
virtual void mulByBitCodeBackwardError(size_t numClasses, const IVector &codes, const Matrix &mat, Matrix &input)¶
- For j < codeLength: input.row(i) += this(i, j) * mat.row(index(i, j)) where index is same as the index for addByBitCode 
 - 
virtual void sumByBitCode(size_t numClasses, IVector &codes, Matrix &sum, real scaleSum)¶
- For j < codeLength sum(i, 0) = scaleSum * \sum_j bit(i, j) * this(i, j) where bit(i, j) = ((codes(i) + numClasses) & 2^j) ? 1 : 0 
 - 
virtual void subByBitCode(size_t numClasses_, IVector &codes)¶
- For j < codeLength this(i, j) -= bit(i, j) where bit(i, j) is same as that for sumByBitCode 
 - 
virtual void multiBinaryLabelCrossEntropy(Matrix &output, Matrix &label)¶
- cross entropy for multi binary labels - this[i] = -sum(label[i][j]*log(output[i][j]) + (1-label[i][j])*log(1-output[i][j])) 
 - Public Static Functions - 
void mul(CpuMatrix *a, CpuMatrix *b, CpuSparseMatrix *c, real scaleAB, real scaleT)¶
 - template <typename MatBType, typename MatCType>
- 
void mul(CpuSparseMatrix *a, MatBType *b, MatCType *c, real scaleAB, real scaleT)¶
- c = a * b - use abstract getRow() to get row from B,C. Define B,C as template instead of virtual class for performance sake. 
 
- 
 - Inherits from paddle::CpuMatrix - Public Functions - Private Functions - Private Members 
 - 
struct sparse_float_value_t¶
 
- 
typedef std::shared_ptr<GpuSparseMatrix> 
- 
namespace paddle
- Typedefs - 
typedef CpuVectorT<real> CpuVector¶
 - 
typedef GpuVectorT<real> GpuVector¶
 - 
typedef CpuVectorT<int> CpuIVector¶
 - 
typedef GpuVectorT<int> GpuIVector¶
 - 
typedef std::shared_ptr<CpuIVector> CpuIVectorPtr¶
 - 
typedef std::shared_ptr<GpuIVector> GpuIVectorPtr¶
 - 
typedef CpuGpuVectorT<real> CpuGpuVector¶
 - 
typedef CpuGpuVectorT<int> ICpuGpuVector¶
 - 
typedef std::shared_ptr<CpuGpuVector> CpuGpuVectorPtr¶
 - 
typedef std::shared_ptr<ICpuGpuVector> ICpuGpuVectorPtr¶
 - template <class T>
- 
class GpuVectorT¶
- Inherits from paddle::VectorT< T > - Public Functions - 
GpuVectorT(size_t size)¶
 - 
GpuVectorT(size_t size, GpuMemHandlePtr memHandle, size_t offset)¶
 - 
GpuVectorT(size_t size, T *data)¶
 - 
virtual MemoryHandlePtr newMemory(size_t size)¶
 - 
virtual void zeroMem()¶
 - 
virtual void reset(const T &value)¶
 - 
virtual void fillSequence()¶
 - 
virtual void copyFrom(const T *src, size_t size)¶
- copy size elements from src - If this is GpuVector, src can be cpu or gpu memory - If this is CpuVector, src is assumed to be cpu memory 
 - 
virtual void copyFrom(const T *src, size_t size, hl_stream_t stream)¶
- copy size elements from src - If this is GpuVector, src can be cpu or gpu memory - If this is CpuVector, src is assumed to be cpu memory, 
 - 
virtual void copyFrom(const VectorT<T> &src)¶
- This function will crash if the size of src and dest is different. 
 - 
virtual void copyFrom(const VectorT<T> &src, hl_stream_t stream)¶
- If use_gpu, this function will push the copy-task to the specifed-stream and return immediately. - If not use GPU, this function is same as the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT. 
 - 
virtual T getElement(size_t i) const¶
- Get the value for the i’th element. 
 - 
virtual void setElement(size_t i, const T &value)¶
 - 
virtual T *getPoint(const uint64_t beginPos)¶
- Get the buffer point with beginPos. 
 - 
virtual T getAbsSum()¶
 - 
virtual T getSum()¶
 - 
virtual T getMax()¶
 - 
virtual T getAbsMax()¶
 - 
virtual T getMin()¶
 - 
virtual void selectFrom(const VectorT<T> &src, const VectorT<int> &ids)¶
- select elements indexed by ids from vector src 
 - 
virtual void histogram(std::ostream &os, int type)¶
- print histogram of vector values - Note
- only exponent histogram supported currently
 
 - 
virtual void rand()¶
- generate uniform random value for each element 
 - 
virtual void rand(size_t classes)¶
- generate uniform random value for each element, data range is from 0 to (classes - 1). 
 - 
virtual void randnorm(real mean, real standardDeviation)¶
- generate univariate Gaussian distributed random numbers with given mean and standardDeviation. 
 - 
virtual void uniform(real left, real right)¶
- generate uniform distributed random numbers with given range. 
 - 
virtual T get(size_t pos)¶
- Debug use only. Very inefficient for GPU vector. get the value at pos. 
 - Protected Functions - 
virtual void copyTo(CpuVectorT<T> *dest) const¶
 - 
virtual void copyTo(GpuVectorT<T> *dest) const¶
 
- 
 - template <class T>
- 
class CpuVectorT¶
- Inherits from paddle::VectorT< T > - Subclassed by paddle::ParallelCpuVectorT< T > - Public Functions - 
CpuVectorT(size_t size)¶
 - 
CpuVectorT(size_t size, MemoryHandlePtr memoryHandle, size_t offset)¶
 - 
CpuVectorT(size_t size, T *data)¶
 - 
CpuVectorT(const VectorT<T> &src)¶
- If src is a CpuVector, the new CpuVector will share the data with src - If src is a GpuVector, the new CpuVector will copy data from src 
 - 
virtual MemoryHandlePtr newMemory(size_t size)¶
 - 
virtual void zeroMem()¶
 - 
virtual void reset(const T &value)¶
 - 
virtual void fillSequence()¶
 - 
virtual void copyFrom(const T *src, size_t size)¶
- copy size elements from src - If this is GpuVector, src can be cpu or gpu memory - If this is CpuVector, src is assumed to be cpu memory 
 - 
virtual void copyFrom(const T *src, size_t size, hl_stream_t stream)¶
- copy size elements from src - If this is GpuVector, src can be cpu or gpu memory - If this is CpuVector, src is assumed to be cpu memory, 
 - 
virtual void copyFrom(const VectorT<T> &src)¶
- This function will crash if the size of src and dest is different. 
 - 
virtual void copyFrom(const VectorT<T> &src, hl_stream_t stream)¶
- If use_gpu, this function will push the copy-task to the specifed-stream and return immediately. - If not use GPU, this function is same as the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT. 
 - 
virtual void copyTo(CpuVectorT<T> *dest) const¶
 - 
virtual void copyTo(GpuVectorT<T> *dest) const¶
 - 
virtual T *getPoint(const uint64_t beginPos)¶
- Get the buffer point with beginPos. 
 - 
virtual T getElement(size_t i) const¶
- Get the value for the i’th element. 
 - 
virtual void setElement(size_t i, const T &value)¶
 - 
virtual T getAbsSum()¶
 - 
virtual T getSum()¶
 - 
virtual T getMax()¶
 - 
virtual T getAbsMax()¶
 - 
virtual T getMin()¶
 - 
virtual void selectFrom(const VectorT<T> &src, const VectorT<int> &ids)¶
- select elements indexed by ids from vector src 
 - 
virtual void histogram(std::ostream &os, int type)¶
- print histogram of vector values - Note
- only exponent histogram supported currently
 
 - 
virtual void rand()¶
- generate uniform random value for each element 
 - 
virtual void rand(size_t classes)¶
- generate uniform random value for each element, data range is from 0 to (classes - 1). 
 - 
virtual void randnorm(real mean, real standardDeviation)¶
- generate univariate Gaussian distributed random numbers with given mean and standardDeviation. 
 - 
virtual void uniform(real left, real right)¶
- generate uniform distributed random numbers with given range. 
 - 
virtual T get(size_t pos)¶
- Debug use only. Very inefficient for GPU vector. get the value at pos. 
 
- 
 - template <class T>
- 
class BaseVector¶
- Inherits from paddle::BaseMatrixT< T > - Subclassed by paddle::VectorT< T > - Protected Attributes - 
size_t &size_¶
 
- 
size_t &
 - template <class T>
- 
class VectorT¶
- #include <Vector.h>Copy or assignemnt constructor will share the data as opposed to making a copy of the original data. To make a copy of the orinal data, use copyFrom() instead. Inherits from paddle::BaseVector< T > Subclassed by paddle::CpuVectorT< T >, paddle::GpuVectorT< T > Public Functions - 
virtual ~VectorT()¶
 - 
size_t getSize() const¶
 - 
const T *getData() const¶
 - 
T *getData()¶
 - 
virtual void zeroMem() = 0¶
 - 
virtual void reset(const T &value) = 0¶
 - 
virtual void fillSequence() = 0¶
 - 
MemoryHandlePtr getMemoryHandle() const¶
 - 
void resize(size_t newSize)¶
- resizing to a big vector will not preserve old values. 
 - 
virtual MemoryHandlePtr newMemory(size_t size) = 0¶
 - 
void subVecFrom(const VectorT<T> &src, size_t start, size_t size)¶
- form sub vector from src, shallow copy 
 - 
void subVecFrom(const T *src, size_t start, size_t size)¶
- form sub vector from src, shallow copy 
 - 
void subVecFrom(const VectorT<T> &src, std::pair<size_t, size_t> interval)¶
- form sub vector from src, shallow copy in interval [interval.first, interval.second) 
 - 
virtual void copyFrom(const VectorT<T> &src) = 0¶
- This function will crash if the size of src and dest is different. 
 - 
virtual void copyFrom(const VectorT<T> &src, hl_stream_t stream) = 0¶
- If use_gpu, this function will push the copy-task to the specifed-stream and return immediately. - If not use GPU, this function is same as the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT. 
 - 
virtual void copyFrom(const T *src, size_t size) = 0¶
- copy size elements from src - If this is GpuVector, src can be cpu or gpu memory - If this is CpuVector, src is assumed to be cpu memory 
 - 
virtual void copyFrom(const T *src, size_t size, hl_stream_t stream) = 0¶
- copy size elements from src - If this is GpuVector, src can be cpu or gpu memory - If this is CpuVector, src is assumed to be cpu memory, 
 - 
virtual void exec(SyncThreadPool::JobFunc func)¶
- exec a func in single/multi thread 
 - 
virtual T *getPoint(const uint64_t beginPos) = 0¶
- Get the buffer point with beginPos. 
 - 
virtual T getElement(size_t i) const = 0¶
- Get the value for the i’th element. 
 - 
virtual void setElement(size_t i, const T &value) = 0¶
 - 
virtual T getAbsSum() = 0¶
 - 
virtual T getSum() = 0¶
 - 
virtual T getMax() = 0¶
 - 
virtual T getAbsMax() = 0¶
 - 
virtual T getMin() = 0¶
 - 
virtual void isEqualTo(const VectorT<T> &b, const T &value) = 0¶
- element-wise calc: this = (b == value) 
 - 
virtual void selectFrom(const VectorT<T> &src, const VectorT<int> &ids) = 0¶
- select elements indexed by ids from vector src 
 - 
virtual void histogram(std::ostream &os, int type = HISTOGRAM_EXPONENT) = 0¶
- print histogram of vector values - Note
- only exponent histogram supported currently
 
 - 
virtual void rand() = 0¶
- generate uniform random value for each element 
 - 
virtual void rand(size_t classes) = 0¶
- generate uniform random value for each element, data range is from 0 to (classes - 1). 
 - 
virtual T get(size_t pos) = 0¶
- Debug use only. Very inefficient for GPU vector. get the value at pos. 
 - 
virtual void randnorm(real mean, real standardDeviation) = 0¶
- generate univariate Gaussian distributed random numbers with given mean and standardDeviation. 
 - 
virtual void uniform(real left, real right) = 0¶
- generate uniform distributed random numbers with given range. 
 Public Static Functions - 
std::shared_ptr<VectorT<T>> create(size_t size, MemoryHandlePtr memoryHandle, size_t offset = 0)¶
 - 
std::shared_ptr<VectorT<T>> createParallelVector(size_t size, bool useGpu, SyncThreadPool *pool = nullptr)¶
 Protected Functions - 
VectorT(size_t size, MemoryHandlePtr memoryHandle, size_t offset, bool useGpu)¶
 - 
VectorT(size_t size, T *data, bool useGpu)¶
 - 
virtual void copyTo(CpuVectorT<T> *dest) const = 0¶
 - 
virtual void copyTo(GpuVectorT<T> *dest) const = 0¶
 Protected Attributes - 
MemoryHandlePtr memoryHandle_¶
 Friends - 
friend paddle::GpuVectorT< T >
 - 
friend paddle::CpuVectorT< T >
 
- 
virtual 
 - template <class T>
- 
class ParallelCpuVectorT¶
- Inherits from paddle::CpuVectorT< T > - Public Functions - 
ParallelCpuVectorT(size_t size, SyncThreadPool *pool)¶
 - 
virtual void zeroMem()¶
 - 
virtual void randnorm(real mean, real standardDeviation)¶
- generate univariate Gaussian distributed random numbers with given mean and standardDeviation. 
 - 
virtual void uniform(real left, real right)¶
- generate uniform distributed random numbers with given range. 
 - 
virtual void exec(SyncThreadPool::JobFunc func)¶
- exec a func in single/multi thread 
 - Private Types - 
typedef std::function<void(CpuVectorT<T> &vec)> ExecFunc¶
 - Private Members - 
SyncThreadPool *pool_¶
 
- 
 - template <class T>
- 
class CpuGpuVectorT¶
- #include <Vector.h>A class to do conversion between CpuVector and GpuVector automatically. Public Types Public Functions - 
CpuGpuVectorT(size_t size, bool useGpu)¶
- A constructor, create cpuVectorT_ or gpuVectorT_. - Parameters
- size-- data size. 
- useGpu-- use gpu or not. 
 
 
 - A constructor, create CpuGpuVectorT by VectorT. - If src is CpuVector, cpuVectorT_ is shared data with src. - If src is GpuVector, gpuVectorT_ is shared data with src. 
 - 
CpuGpuVectorT(size_t size, T *data, bool useGpu)¶
- A constructor. - If useGpu is true, data should be located in device and create gpuVectorT_ with data. - If useGpu is false, data should be located in host and create cpuVectorT_ with data. - Note
- Data is owned by the caller and should be valid during the life of this vector. Caller is responsible for release the memory.
 
 - 
CpuGpuVectorT(CpuGpuVectorT<T> &src, size_t offset, size_t size)¶
 - 
virtual ~CpuGpuVectorT()¶
 - 
void resize(size_t size, bool useGpu)¶
- resize vector. - If useGpu is true, resize gpuVectorT_ and set syncFlag_ to DATA_AT_GPU, - otherwise resize cpuVectorT_ and set syncFlag_ to DATA_AT_CPU. 
 - 
std::shared_ptr<const VectorT<T>> getVector(bool useGpu) const¶
- return a const cpuVectorT_ or gpuVectorT_. - If useGpu is true, return gpuVectorT_. - If useGpu is false, return cpuVectorT_. - Note
- Caller should not change the data. If caller changes const attribute, should set syncFlag_.
 
 - 
std::shared_ptr<VectorT<T>> &getMutableVector(bool useGpu)¶
- return a const cpuVectorT_ or gpuVectorT_. - Note
- : This interface will change syncFlag_, so if you will not change the data, you should call getVector.
 
 - 
const T *getData(bool useGpu) const¶
- return const T* data. - If useGpu is true, return device data. - If useGpu is false, return host data. 
 - 
T *getMutableData(bool useGpu)¶
 - 
void zeroMem(bool useGpu)¶
- If useGpu is true, gpuVectorT_->Op(). - If useGpu is false, cpuVectorT_->Op(). - Op is zeroMem, fillSequence, ... 
 - 
void fillSequence(bool useGpu)¶
 - 
void setElement(size_t i, const T &value, bool useGpu)¶
 - 
T getElement(size_t i) const¶
- return i-th element. 
 - 
size_t getSize() const¶
- return vector size. 
 - 
void copyToCpu(const T *data, size_t size)¶
- copy data to cpuVectorT_. 
 - 
void copyToCpu(const T *data, size_t size, hl_stream_t stream)¶
- copy data to cpuVectorT_ using specifed-stream. 
 - 
void copyToGpu(const T *data, size_t size)¶
- copy data to gpuVectorT_. 
 - 
void copyToGpu(const T *data, size_t size, hl_stream_t stream)¶
- copy data to gpuVectorT_ using specifed-stream. 
 - 
void copyFrom(const VectorT<T> &src, hl_stream_t stream)¶
- copy from src using specifed-stream. - If src is CpuVectorT, copy to cpuVectorT_. - If src is GpuVectorT, copy to gpuVectorT_. 
 - 
void copyFrom(const T *data, size_t size, bool useGpu)¶
- copy data. - If useGpu is false, copy host data to cpuVectorT_. - If useGpu is true, copy device data to gpuVectorT_. - Note
- data address should consistent with useGpu.
 
 - 
void copyFrom(const T *data, size_t size, hl_stream_t stream, bool useGpu)¶
 - 
void copyFrom(CpuGpuVectorT<T> &src, size_t offset, size_t size, bool useGpu, hl_stream_t stream)¶
- copy from (src + offset) using specifed-stream. 
 - 
void copyFrom(CpuGpuVectorT<T> &src, hl_stream_t stream)¶
- copy from src using specifed-stream. 
 - 
SyncedFlag *getSync() const¶
- return sync_. 
 - 
void setSync(SyncedFlag *sync)¶
- set sync_. 
 - 
void setSync(SyncedFlag syncFlag)¶
 - 
void setSync(bool useGpu)¶
 Public Static Functions - 
std::shared_ptr<CpuGpuVectorT<T>> create(size_t size, bool useGpu)¶
 - resize or create CpuGpuVectorT. 
 Protected Functions - 
void resizeOrCreate(size_t size, bool useGpu)¶
 - 
void copyToCpu()¶
- copy between cpuVectorT_ and gpuVectorT_. - If syncFlag_ is DATA_AT_CPU and SYNCED, do nothing. - If syncFlag_ is DATA_AT_GPU, copy gpuVectorT_ to cpuVectorT_ and set syncFlag_ to SYNCED. 
 - 
void copyToGpu()¶
- copy between cpuVectorT_ and gpuVectorT_. - If syncFlag_ is DATA_AT_GPU and SYNCED, do nothing. - If syncFlag_ is DATA_AT_CPU, copy cpuVectorT_ to gpuVectorT_ and set syncFlag_ to SYNCED. 
 
- 
 
- 
typedef CpuVectorT<real> 
- 
namespace paddle
- Typedefs - 
typedef std::shared_ptr<_hl_sparse_matrix_s> hl_sparse_matrix_s_ptr¶
 - 
class GpuSparseMatrix¶
- Inherits from paddle::Matrix - Public Functions - 
GpuSparseMatrix(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat format_ = SPARSE_CSR, bool trans = false)¶
 - 
GpuSparseMatrix(GpuMemHandlePtr dataHandle, hl_sparse_matrix_s_ptr sMatrix, size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat format_ = SPARSE_CSR, bool trans = false, MemoryHandlePtr sMemoryHandle = NULL)¶
 - 
GpuSparseMatrix(real *value, int *rows, int *cols, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans)¶
 - 
GpuSparseMatrix(hl_sparse_matrix_s_ptr sMatrix, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans, MemoryHandlePtr sMemoryHandle)¶
 - 
~GpuSparseMatrix()¶
 - 
virtual void resize(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶
- Note
- This should only be used for sparse matrix.
 
 - 
virtual void resize(size_t newHeight, size_t newWidth)¶
- Note
- Original data may not be preserved after resize().
 
 - 
void sparseResizeCSR()¶
 - 
void sparseResizeCSC()¶
 - 
void resizeCSR(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType)¶
 - 
void resizeCSC(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType)¶
 - 
void mul(const GpuMatrixPtr a, const GpuMatrixPtr b, real scaleAB, real scaleT)¶
 - 
virtual void copyFrom(const Matrix &src, hl_stream_t stream)¶
 - 
void copyFromCSR(CpuSparseMatrix &src, hl_stream_t stream)¶
 - 
void copyFromCSC(CpuSparseMatrix &src, hl_stream_t stream)¶
 - 
virtual void copyFrom(const IVector &src)¶
- convert a int vector to a real matrix. - (1) source and dest are both in CPU. - (2) sizes are exactly match. 
 - 
void copyFrom(const IVector &src, hl_stream_t stream)¶
 - template <class T>
- 
void copyFrom(int64_t *ids, int64_t *indices, T *data, hl_stream_t stream)¶
 - 
virtual void setRow(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶
- This should only be used for sparse matrix. - Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row. 
 - 
virtual SparseValueType getValueType() const¶
 - 
virtual SparseFormat getFormat() const¶
 - 
const int *getRowCols(size_t x) const¶
 - 
const real *getRowValues(size_t x) const¶
 - 
size_t getColNum(size_t x) const¶
 - 
virtual void print(std::ostream &os) const¶
- print out the values of elements to os 
 - 
virtual void zeroMem()¶
- only set value_ of FLOAT_VALUE sparse matrix to zero 
 - 
void add3(GpuMatrix *b)¶
- sparseMatrix += denseMatrix - Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function. - Only add value of same (row, col) index in dense matrix and do not use others values. - Parameters
- b-- dense matrix 
 
 
 - 
virtual void add3(MatrixPtr b)¶
- matrix elment-wise add - Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function. 
 - 
virtual void addBias(Matrix &b, real scale)¶
- sparseMatrix[i,j] += bias[j], (j is the col index of sparse matrix) - Parameters
- b-- bias, dense matrix and height = 1 
- scale-- scale of b 
 
 
 - 
virtual int *getRows() const¶
- return rows, which is gpu address 
 - 
virtual int *getCols() const¶
- return cols, which is gpu address 
 - 
real *getValue() const¶
- return value, which is gpu address 
 - 
virtual real *getData()¶
- return value_ of sparse matrix - Some times CpuSparseMatrix maybe Matrix, if getValue, must dynamic_cast to CpuSparseMatrix, getData is convenient to get value 
 - 
virtual const real *getData() const¶
 - 
virtual void rowMax(IVector &maxIds, Matrix &maxVal)¶
- Get top k value of each row in sparse matrix. - Store the value in maxVal and theirs index in maxIds. k = maxVal.width - Parameters
- maxIds-- index of top k 
- maxVal-- value of top k 
 
 
 - 
virtual void mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶
- this = scaleAB*(a*b) + scaleT*this 
 - 
void copyFrom(CpuSparseMatrix &src, hl_stream_t stream)¶
 - 
void copyFrom(GpuSparseMatrix &src, hl_stream_t stream)¶
 - 
virtual void trimFrom(const CpuSparseMatrix &src)¶
 - 
void trimFromCSR(const CpuSparseMatrix &src)¶
 - 
void trimFromCSC(const CpuSparseMatrix &src)¶
 - 
virtual bool isSparse() const¶
 - Public Members - 
MemoryHandlePtr sMemoryHandle_¶
 - 
int *rows_¶
 - 
int *cols_¶
 - 
real *value_¶
 - 
const char *end_¶
 - 
hl_sparse_matrix_s_ptr sMatrix_¶
 - 
SparseValueType valueType_¶
 - 
SparseFormat format_¶
 - Protected Functions - 
void sparseResize()¶
 - 
void copyRow(int offsets, size_t colNum, const sparse_non_value_t *row)¶
 - 
void copyRow(int offsets, size_t colNum, const sparse_float_value_t *row)¶
 
- 
 
- 
typedef std::shared_ptr<_hl_sparse_matrix_s> 
Functions
- 
P_DECLARE_bool(allow_inefficient_sparse_update)¶
- 
namespace paddle
- 
class SparseRowCpuMatrix¶
- #include <SparseRowMatrix.h>Sparse Row Inherits from paddle::CpuMatrix Subclassed by paddle::SparseAutoGrowRowCpuMatrix, paddle::SparsePrefetchRowCpuMatrix Public Functions - 
SparseRowCpuMatrix(CpuMemHandlePtr dataHandle, size_t height, size_t width, IndexDictPtr indexDictHandle = nullptr, bool trans = false)¶
- heightStore is max number of rows of the sparse matrix. 
 - 
virtual ~SparseRowCpuMatrix()¶
 - 
real *getRow(size_t row)¶
- Get the row buf - Parameters
- row-- row id in the original matrix 
 
 
 - 
real *getLocalRow(size_t row)¶
- Get the row buf - Parameters
- row-- row id in local storage 
 
 
 - 
void reserveStore()¶
- reserve the storage for rows according to current size of indexDictHandle. - This is only used when SparseRowCpuMatrix is constructed with indexDictHandle. 
 - 
virtual real *getRowBuf(size_t row)¶
 - 
virtual void mul(CpuSparseMatrix *a, CpuMatrix *b, real scaleAB, real scaleT)¶
 - 
virtual void copyFrom(const real *src, size_t size)¶
- Fill data according to row indexs added, setup indices inside. - src and size are data and size of normal dense CpuMatrix. 
 - 
virtual void zeroMem()¶
 - 
void applyL1Decay(real learningRate, real decayRate)¶
- apply L1 to all sparse rows, should be apply after indices ready. 
 - 
void clearIndices()¶
 - 
void zeroMemThread(size_t tid, size_t numThreads)¶
 - 
void sgdUpdate(BaseMatrix &value, IVector &t0, real learningRate, int currentTime, real decayRate, bool useL1, bool fini = false)¶
- value -= grad * learningRate, this is gradient. - If L1 decay set use L1, else if L2 set use L2, otherwise no decay atall. - t0 is a int vector used by L1/L2 decay, size = height of parameter matrix, store the time that each weight row last updated. - Time is batchId, currentTime is current batchId. - While pass finished, caller should call this func one more time with (fini=true) to let weight decay catch up current time. 
 - 
void addTo(BaseMatrix &dest, std::vector<uint32_t> &ids, size_t tid, size_t numThreads)¶
- merge rows in this to dest for designated thread - values add to dest matrix - ids occured in this append to ids filtered by (id % numThreads == tid) 
 - 
void addTo(SparseRowCpuMatrix &dest, size_t tid, size_t numThreads)¶
- the second version addTo(), dest is a SparseRowCpuMatrix. - The dest’s indices should be setup already, addTo() will check src ids is exist in dest’s indices. 
 - 
const IndexDictPtr &getIndexDictHandle() const¶
 - 
void checkIndices()¶
- check all local and global indices consistency 
 - 
void checkIndex(size_t i)¶
- check whether row i exist in indices 
 - 
std::vector<unsigned int> &getLocalIndices() const¶
 Protected Functions - template <typename Func>
- 
void apply(Func f)¶
 - 
void init(size_t height, size_t width)¶
 - 
void clearRows()¶
- clear row indices. 
 - 
void checkStoreSize()¶
 Protected Attributes - 
std::vector<real, AlignedAllocator<real, 32>> rowStore_¶
 - 
IndexDictPtr indexDictHandle_¶
 - 
std::vector<unsigned int> *localIndices_¶
 - 
unsigned int *globalIndices_¶
 Protected Static Attributes - 
const unsigned int kUnusedId_¶
 - 
struct IndexDict¶
 
- 
 - 
class SparsePrefetchRowCpuMatrix¶
- #include <SparseRowMatrix.h>For prefetching parameters from remote Parameter server. Inherits from paddle::SparseRowCpuMatrix Public Functions - 
SparsePrefetchRowCpuMatrix(CpuMemHandlePtr dataHandle, size_t height, size_t width, IndexDictPtr indexDictHandle = nullptr, SyncThreadPool *pool = nullptr, bool trans = false)¶
 - 
void addRows(MatrixPtr input)¶
- Extract feature ids from input, to fill row indexs. - input must be sparse matrix. - Can call many times before setup. 
 - 
void addRows(IVectorPtr ids)¶
 - 
void setupIndices()¶
- setup global indices of SparseRowMatrix after finish add rows. 
 Protected Functions - 
void addRows(const unsigned int *ids, size_t len)¶
 Protected Attributes - 
SyncThreadPool *pool_¶
 
- 
 - 
class SparseAutoGrowRowCpuMatrix¶
- Inherits from paddle::SparseRowCpuMatrix - Subclassed by paddle::CacheRowCpuMatrix 
 - 
class CacheRowCpuMatrix¶
- Inherits from paddle::SparseAutoGrowRowCpuMatrix - Public Functions - 
CacheRowCpuMatrix(size_t height, size_t width, IndexDictPtr indexDictHandle = nullptr, bool trans = false)¶
 - 
void setSourceData(CpuVectorPtr sourceVec)¶
 - 
real *getRow(size_t row)¶
 - 
virtual real *getRowBuf(size_t row)¶
 - 
virtual void mul(CpuSparseMatrix *a, CpuMatrix *b, real scaleAB, real scaleT)¶
 
- 
 - 
class SparseRowIdsCpuMatrix¶
- #include <SparseRowMatrix.h>Sparse Row Ids Matrix. mostly same as CpuMatrix, but maintain sparse row ids occured, ids are hashed by worker thread id. Inherits from paddle::CpuMatrix Public Functions - 
SparseRowIdsCpuMatrix(CpuMemHandlePtr dataHandle, size_t height, size_t width, bool trans = false)¶
 - 
void setNumOfThreads(size_t numOfThreads)¶
 - 
std::vector<uint32_t> &getIds(size_t threadId)¶
 Private Members - 
std::vector<std::vector<uint32_t>> idsArray_¶
 
- 
 
- 
class 
- 
namespace paddle
- 
class CpuSparseMatrix¶
- Inherits from paddle::Matrix - Public Functions - 
CpuSparseMatrix(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat format = SPARSE_CSR, bool trans = false)¶
 - 
CpuSparseMatrix(CpuMemHandlePtr memHandle, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans)¶
 - 
CpuSparseMatrix(real *data, int *rows, int *cols, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans)¶
 - 
~CpuSparseMatrix()¶
 - 
virtual void resize(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶
- Note
- This should only be used for sparse matrix.
 
 - 
virtual void resize(size_t newHeight, size_t newWidth)¶
- Note
- Original data may not be preserved after resize().
 
 - 
SparseValueType getValueType()¶
 - 
real *getRowValues(size_t i) const¶
 - 
int *getRowCols(size_t i) const¶
 - 
void fillRowIndices(IVectorPtr &outVec) const¶
- fill row indices of each value in CSR matrix 
 - 
size_t getColNum(size_t i) const¶
 - 
real *getColumn(size_t i) const¶
 - 
size_t getColStartIdx(size_t i) const¶
 - 
size_t getRowStartIdx(size_t i) const¶
 - 
size_t getRowNum(size_t i) const¶
 - 
virtual real getSum()¶
 - 
virtual void square()¶
 - 
virtual real getMin()¶
- only consider nonzero values. the actual min value should compare with 0.0. 
 - 
virtual real getMax()¶
- only consider nonzero values. the actual max value should compare with 0.0. 
 - 
virtual void rowMax(IVector &maxIds, Matrix &max)¶
- Get the top k elements of each row of this matrix. - The column ids and values of these elements are stored in maxIds and max respectively. Note that the top k elements are not sorted. 
 - 
virtual int *getRows() const¶
 - 
virtual int *getCols() const¶
 - 
real *getValue() const¶
 - 
virtual SparseFormat getFormat() const¶
 - 
virtual SparseValueType getValueType() const¶
 - 
virtual real *getData()¶
- return value_ of sparse matrix - Some times CpuSparseMatrix maybe Matrix, if getValue, must dynamic_cast to CpuSparseMatrix, getData is convenient to get value 
 - 
virtual const real *getData() const¶
 - 
virtual void zeroMem()¶
- only set value_ of FLOAT_VALUE sparse matrix to zero 
 - 
virtual void transpose(MatrixPtr matTrans, bool memAlloc)¶
- mem MUST be alloced outside (memAlloc=false) 
 - 
virtual void mul(MatrixPtr a, MatrixPtr b, real scaleAB, real scaleT)¶
- this = scaleAB*(a*b) + scaleT*this 
 - 
void add3(CpuMatrix *b)¶
- sparseMatrix += denseMatrix - Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function. - Only add value of same (row, col) index in dense matrix and do not use others values whoes postions are not in sparse matirx. - Parameters
- b-- dense matrix 
 
 
 - 
virtual void add3(MatrixPtr b)¶
- matrix elment-wise add - Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function. 
 - 
virtual void addBias(Matrix &b, real scale)¶
- sparseMatrix[i,j] += bias[j], (j is the col index of sparse matrix) - Parameters
- b-- bias, dense matrix and height = 1 
- scale-- scale of b 
 
 
 - 
virtual void print(std::ostream &os) const¶
- print out the values of elements to os 
 - 
virtual void printOneRow(std::ostream &os, size_t idx) const¶
- print one row to os 
 - 
virtual void setRow(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶
- This should only be used for sparse matrix. - Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row. 
 - 
virtual void randomizeUniform()¶
 - 
void copyFrom(const GpuSparseMatrix &src, hl_stream_t stream)¶
 - 
virtual void copyFrom(const Matrix &src, hl_stream_t stream = HPPL_STREAM_DEFAULT)¶
 - 
CpuSparseMatrixPtr getTmpSparseMatrix(size_t height, size_t width)¶
- Get a temporary matrix. This is threadsafe. It should be only used temporarily, i.e. do not store it or use it as return value. - Note
- Do NOT use large amount of tmp matrix.
 
 - 
void copyFrom(std::vector<int> &rows, std::vector<int> &cols, std::vector<real> &values)¶
 - 
void copyFrom(const CpuSparseMatrix &src)¶
 - 
virtual void trimFrom(const CpuSparseMatrix &src)¶
 - 
void copyRow(int offsets, size_t colNum, const sparse_non_value_t *row)¶
 - 
void copyRow(int offsets, size_t colNum, const sparse_float_value_t *row)¶
 - template <class T>
- 
void copyFrom(int64_t *ids, int64_t *indices, T *data)¶
 - template <class T>
- 
void copyFrom(int64_t *indices, T *data)¶
 - 
virtual void copyFrom(const real *src, size_t size)¶
- If this is GpuMatrix, src is assumed to be CPU memory - If this is CpuMatrix, src is assumed to be CPU memory 
 - 
virtual bool isSparse() const¶
 - Protected Functions - 
void sparseResize()¶
 - Protected Attributes - 
int *rows_¶
 - 
int *cols_¶
 - 
real *value_¶
 - 
SparseFormat format_¶
 - 
SparseValueType valueType_¶
 - Protected Static Attributes - 
const size_t DEFAULT_AVG_WIDTH¶
 - 
ThreadLocal<std::vector<CpuSparseMatrixPtr>> cpuLocalMats_¶
 - Private Functions - 
virtual MatrixPtr clone(size_t height = 0, size_t width = 0, bool useGpu = false)¶
- Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this. - If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size. 
 
- 
 
- 
class 
Others¶
- 
namespace paddle
- Functions - template <class T>
- 
void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, const int M, const int N, const int K, const T alpha, const T *A, const int lda, const T *B, const int ldb, const T beta, T *C, const int ldc)¶
 - template <class T>
- 
void axpy(const int n, const T alpha, const T *x, T *y)¶
 - template <class T>
- 
T dotProduct(const int n, const T *x, const T *y)¶
 
- 
namespace paddle
- 
namespace simd¶
- Functions - template <typename Type>
- 
void addTo(Type *a, const Type *b, size_t len)¶
 - template <typename Type>
- 
void batchAddTo(Type *a, const Type *b[], int batch, size_t len)¶
 - template <typename Type>
- 
void colMax(Type *result, const Type *data, int dim, int numSamples)¶
 - template <typename Type>
- 
void decayL1(Type *dst, Type *src, Type *lr, Type lambda, size_t len)¶
 - template <typename Type>
- 
void decayL1(Type *dst, Type *src, Type lambda, size_t len)¶
 - template <size_t AlignSize>
- 
bool isPointerAlign(void *ptr)¶
 - 
bool vec_check(size_t len)¶
 - template <>
- 
void addTo(float *a, const float *b, size_t len)¶
 - template <>
- 
void batchAddTo(float *a, const float *b[], int batch, size_t len)¶
 - template <>
- 
void colMax(float *result, const float *data, int dim, int numSamples)¶
 - template <>
- 
void decayL1(float *dst, float *src, float lambda, size_t len)¶
 - template <>
- 
void decayL1(float *dst, float *src, float *lr, float lambda, size_t len)¶
 - 
namespace naive¶
- Functions - template <typename Type>
- 
void addTo(Type *a, const Type *b, size_t len)¶
 - template <typename Type>
- 
void batchAddTo(Type *a, const Type *b[], int batch, size_t len)¶
 - template <typename Type>
- 
void colMax(Type *result, const Type *data, int dim, int numSamples)¶
- Note
- this method is unused in paddle.
 
 - template <typename Type>
- 
void decayL1(Type *dst, Type *src, Type *lr, Type lambda, size_t len)¶
 - template <class Type>
- 
void decayL1(Type *dst, Type *src, Type lambda, size_t len)¶
 
 
 
- 
namespace