Matrix¶
Base¶
Defines
-
CAL_MATRIX_START_ADDRESS
(address, height, width, ld, col, row)¶ Calculate matrix element address.
For instance, address of A[i][j] = i * ld + j.
-
namespace
paddle
¶ Typedefs
-
typedef bool_constant<bool, false>
false_type
¶
-
typedef bool_constant<bool, true>
true_type
¶
-
typedef BaseMatrixT<real>
BaseMatrix
¶
-
typedef BaseMatrixT<int>
IBaseMatrix
¶
- template <class T>
-
class
BaseMatrixT
¶ Subclassed by paddle::BaseVector< T >, paddle::Matrix
Public Functions
-
virtual
~BaseMatrixT
()¶
-
BaseMatrixT
(size_t height, size_t width, T *data, bool trans, bool useGpu)¶
-
BaseMatrixT
(BaseMatrixT &mat, bool useGpu)¶ - Note
- This constructor is for temporarily making a matrix with different useGpu flag as the original matrix so that mixed gpu/cpu operations can be performed successfully.
-
BaseMatrixT
(size_t height, size_t width, size_t stride, T *data, bool trans, bool use_gpu)¶
-
void
setData
(T *data)¶ caller should make sure that the size of data is at least height*width
- template <class Op>
-
int
applyUnary
(Op op)¶ unary operator: element wise op(a).
for 0 <= i < this->height_ & for 0 <= j < this->width_.
- template <class Op>
-
int
applyUnary
(Op op, int numRows, int numCols, MatrixOffset &offset)¶ unary operator: element wise op(a).
for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*ld + offset.aCol_;
- template <class Op>
-
int
applyBinary
(Op op, BaseMatrixT &b)¶ binary operator: element wise op(a, b).
for 0 <= i < this->height_ & for 0 <= j < this->width_. While this->height_ == b.height_ && this->width_ == b.width_.
- template <class Op, class bAsRowVector, class bAsColVector>
-
int
applyBinary
(Op op, BaseMatrixT &b, int numRows, int numCols, MatrixOffset &offset, bAsRowVector, bAsColVector)¶ binary operator: element wise op(a, b)
for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*lda + offset.aCol_; B = b->data_ + offset.bRow_*ldb + offset.bCol_; if (bAsRowVector == false_type && bAsColVector == false_type) op(A[i * lda + j], B[i * ldb + j]) if (bAsRowVector == true_type && bAsColVector == false_type) op(A[i * lda + j], B[j]) if (bAsRowVector == false_type && bAsColVector == true_type) op(A[i * lda + j], B[i * ldb]) if (bAsRowVector == true_type && bAsColVector == true_type) op(A[i * lda + j], B[0])
- template <class Op>
-
int
applyBinary
(Op op, BaseMatrixT &b, int numRows, int numCols, MatrixOffset &offset)¶
- template <class Op>
-
int
applyTernary
(Op op, BaseMatrixT &b, BaseMatrixT &c)¶ ternary operator: element wise op(a, b, c).
for 0 <= i < this->height_ & for 0 <= j < this->width_. While this->height_ == b.height_ && this->width_ == b.width_ && this->height_ == c.height_ && this->width_ == c.width_
- template <class Op, class cAsRowVector, class cAsColVector>
-
int
applyTernary
(Op op, BaseMatrixT &b, BaseMatrixT &c, int numRows, int numCols, MatrixOffset &offset, cAsRowVector, cAsColVector)¶ ternary operator: element wise op(a, b, c).
for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*lda + offset.aCol_; B = b->data_ + offset.bRow_*ldb + offset.bCol_; C = c->data_ + offset.cRow_*ldc + offset.cCol_; if (cAsRowVector == false_type && cAsColVector == false_type) op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j]) if (cAsRowVector == true_type && cAsColVector == false_type) op(A[i*lda + j], B[i*ldb + j], C[j]) if (cAsRowVector == false_type && cAsColVector == true_type) op(A[i*lda + j], B[i*ldb + j], C[i*ldc]) if (cAsRowVector == 1 && cAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[0])
- template <class Op>
-
int
applyTernary
(Op op, BaseMatrixT &b, BaseMatrixT &c, int numRows, int numCols, MatrixOffset &offset)¶
- template <class Op>
-
int
applyQuaternary
(Op op, BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶ quaternary operator: element wise op(a, b, c, d).
for 0 <= i < this->height_ & for 0 <= j < this->width_. While this->height_ == b.height_ && this->width_ == b.width_ && this->height_ == c.height_ && this->width_ == c.width_ && this->height_ == d.height_ && this->width_ == d.width_
- template <class Op>
-
int
applyQuaternary
(Op op, BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d, int numRows, int numCols, MatrixOffset &offset)¶ quaternary operator: element wise op(a, b, c, d).
for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*lda + offset.aCol_; B = b->data_ + offset.bRow_*ldb + offset.bCol_; C = c->data_ + offset.cRow_*ldc + offset.cCol_; D = d->data_ + offset.dRow_*ldd + offset.dCol_;
- template <class Agg, class Op, class Saver, class aAsRowVector, class aAsColVector>
-
int
aggregate
(Agg agg, Op op, Saver sv, BaseMatrixT &b, int numRows, int numCols, MatrixOffset &offset, aAsRowVector, aAsColVector)¶ a aggregate expression that apply each row(or column) of matrix b. op and sv is element wise operator.
if (aAsRowVector == true_type && aAsColVector == false_type) for each column j & 0 <= i < numRows, do: dst = agg(op(b[i*ldb + j])) a[j] = sv(a[j], dst) if (aAsRowVector == false_type && aAsColVector == true_type) for each row i & 0 <= j < numCols, do: dst = agg(op(b[i*ldb + j])) a[i] = sv(a[i], dst)
- template <class Agg, class Op, class Saver, class aAsRowVector, class aAsColVector>
-
int
aggregate
(Agg agg, Op op, Saver sv, BaseMatrixT &b, BaseMatrixT &c, int numRows, int numCols, MatrixOffset &offset, aAsRowVector, aAsColVector)¶ a aggregate expression that apply each row(or column) of matrix b and c.
op and sv is element wise operator.
if (aAsRowVector == true_type && aAsColVector == false_type) for each column j & 0 <= i < numRows, do: dst = agg(op(b[i*ldb + j], c[i*ldc + j])) a[j] = sv(a[j], dst) if (aAsRowVector == false_type && aAsColVector == true_type) for each row i & 0 <= j < numCols, do: dst = agg(op(b[i*ldb + j], c[i*ldc + j])) a[i] = sv(a[i], dst)
- template <class Agg>
-
int
applyRow
(Agg agg, BaseMatrixT &b)¶ a aggregate expression that apply each row of matrix b.
for each row i & 0 <= j < b.width_, do: this[i] = agg(b[i*ldb + j])
- template <class Agg, class Saver>
-
int
applyRow
(Agg agg, Saver sv, BaseMatrixT &b)¶ a aggregate expression that apply each row of matrix b.
for each row i & 0 <= j < b.width_, do: dst = agg(b[i*ldb + j]) this[i] = sv(this[i], dst)
- template <class Agg>
-
int
applyCol
(Agg agg, BaseMatrixT &b)¶ a aggregate expression that apply each column of matrix b.
for each column j & 0 <= i < b.height_, do: this[j] = agg(b[i*ldb + j])
- template <class Agg, class Saver>
-
int
applyCol
(Agg agg, Saver sv, BaseMatrixT &b)¶ a aggregate expression that apply each column of matrix b.
for each column j & 0 <= i < b.height_, do: dst = agg(b[i*ldb + j]) this[j] = sv(this[j], dst)
-
bool
useGpu
() const¶
-
const T *
rowBuf
(size_t row) const¶
-
T *
rowBuf
(size_t row)¶
-
void
neg
()¶ unary operator.
-
void
exp
()¶
-
void
pow
(T p)¶
-
void
log
()¶
-
void
sqrt
()¶
-
void
square
()¶
-
void
reciprocal
()¶
-
void
abs
()¶
-
void
sign
()¶
-
void
zero
()¶
-
void
zeroAtOffset
(int64_t columnOffset, int64_t numColumns)¶ this(row, col + columnOffset) = 0 for 0 <= col < numColumns
-
void
one
()¶
-
void
subScalar
(T p)¶
-
void
mulScalar
(T p)¶
-
void
divScalar
(T p)¶
-
void
assign
(T p)¶ this = p
-
void
add
(T p)¶ this = this + p
-
void
add
(T p1, T p2)¶ this = this*p1 + p2
-
void
clip
(T p1, T p2)¶ this = this < low ? low : this
this = this > high ? high : this
-
void
biggerThanScalar
(T p)¶ a = a > p ? 1.0f : 0.0f
-
void
downClip
(T p)¶ a = a > p ? a : p
-
void
assign
(BaseMatrixT &b)¶ this = b
-
void
assignAtOffset
(BaseMatrixT &b, int64_t columnOffset)¶ If b.width + columOffset <= this.width this(row, col + columnOffset) = b(row, col) for 0 <= col < b.width If this.width + columnOffset <= b.width this(row, col) = b(row, col + columnOffset) for 0 <= col < this.width Otherwise, FATAL
-
void
add
(BaseMatrixT &b)¶ this = this + b
-
void
addAtOffset
(BaseMatrixT &b, int64_t columnOffset)¶ If b.width + columOffset <= this.width this(row, col + columnOffset) += b(row, col) for 0 <= col < b.width If this.width + columnOffset <= b.width this(row, col) += b(row, col + columnOffset) for 0 <= col < this.width Otherwise, FATAL
-
void
addColVector
(BaseMatrixT &b)¶
-
void
addRowVector
(BaseMatrixT &b)¶
-
void
addBias
(BaseMatrixT &b, T scale)¶
-
void
mulRowVector
(BaseMatrixT &b)¶
-
void
divRowVector
(BaseMatrixT &b)¶
-
void
addP2P
(BaseMatrixT &b)¶
-
void
add
(BaseMatrixT &b, T p)¶ this = this + b*p
-
void
add
(BaseMatrixT &b, T p1, T p2)¶ this = p1*this + p2*b
-
void
sub
(BaseMatrixT &b)¶ this = this - b
-
void
sub
(BaseMatrixT &b, T p)¶ this = this - b*p
-
void
relu
(BaseMatrixT &b)¶ b = max(0, this)
-
void
reluDerivative
(BaseMatrixT &b)¶
-
void
softrelu
(BaseMatrixT &b)¶ b = log(1.0 + exp(this))
-
void
softreluDerivative
(BaseMatrixT &b)¶
-
void
brelu
(BaseMatrixT &b)¶ b = min(max(this, p1), p2)
-
void
breluDerivative
(BaseMatrixT &b)¶
-
void
square
(BaseMatrixT &b)¶ b = this * this
-
void
squareDerivative
(BaseMatrixT &b)¶
-
void
tanh
(BaseMatrixT &b)¶ b = tanh(this)
-
void
tanhDerivative
(BaseMatrixT &b)¶
-
void
scaledTanh
(BaseMatrixT &b, T p1, T p2)¶ b = p1 * tanh(p2 * this)
-
void
scaledTanhDerivative
(BaseMatrixT &b, T p1, T p2)¶
-
void
reciprocal
(BaseMatrixT &b)¶ b = 1.0f / this
-
void
reciprocalDerivative
(BaseMatrixT &b)¶
-
void
abs
(BaseMatrixT &b)¶ b = this > 0.0f ? this : -this
-
void
absDerivative
(BaseMatrixT &b)¶
-
void
sigmoid
(BaseMatrixT &b)¶ b = 1.0f / (1.0f + exp(-this))
-
void
sigmoidDerivative
(BaseMatrixT &b)¶
-
void
expDerivative
(BaseMatrixT &b)¶ b = a
-
void
sign
(BaseMatrixT &b)¶
-
void
exp
(BaseMatrixT &b)¶
-
void
pow
(BaseMatrixT &b, T p)¶
-
void
log
(BaseMatrixT &b)¶
-
void
sqrt
(BaseMatrixT &b)¶
-
void
addScalar
(BaseMatrixT &b, T p)¶
-
void
subScalar
(BaseMatrixT &b, T p)¶
-
void
mulScalar
(BaseMatrixT &b, T p)¶
-
void
divScalar
(BaseMatrixT &b, T p)¶
-
void
scalarDiv
(BaseMatrixT &b, T p)¶
-
void
invSqrt
(BaseMatrixT &b)¶ this = 1.0f / sqrt(b)
-
void
isEqualTo
(BaseMatrixT &b, T value)¶ this = (b == value)
-
void
softCrossEntropy
(BaseMatrixT &b, BaseMatrixT &c)¶ ternary operator.
-
void
softCrossEntropyBp
(BaseMatrixT &b, BaseMatrixT &c)¶
-
void
binaryLabelCrossEntropy
(BaseMatrixT &b, BaseMatrixT &c)¶
-
void
binaryLabelCrossEntropyBp
(BaseMatrixT &b, BaseMatrixT &c)¶
-
void
add
(BaseMatrixT &b, BaseMatrixT &c)¶ this = b + c
-
void
add
(BaseMatrixT &b, T p1, BaseMatrixT &c, T p2)¶ this = b*p1 + c*p2
-
void
sub
(BaseMatrixT &b, BaseMatrixT &c)¶ this = b - c
-
void
sub
(BaseMatrixT &b, T p1, BaseMatrixT &c, T p2)¶ this = b*p1 - c*p2
-
void
add2
(BaseMatrixT &b, BaseMatrixT &c)¶ this = this + b + c
-
void
add2
(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2, T p3)¶ this = this*p1 + b*p2 + c*p3
-
void
add3
(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d, T p1, T p2, T p3)¶ this = a*p1 + b*p2 + c*p3
-
void
sgdUpdate
(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2, T p3)¶ c = p2 * c - p1 * (b + p3 * this) this += mom
-
void
sgdUpdate
(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d, T p1, T p2, T p3)¶ c = p2 * c - p1 * d * (b + p3 * this) this += mom
-
void
applyL1
(T learningRate, T decayRate)¶ apply L1/L2 to this
-
void
applyL1
(BaseMatrixT &lr, T learningRate, T decayRate)¶
-
void
applyL2
(T learningRate, T decayRate)¶
-
void
applyL2
(BaseMatrixT &lr, T learningRate, T decayRate)¶
-
void
dotMul
(BaseMatrixT &b)¶ this *= b
-
void
dotMul
(BaseMatrixT &b, BaseMatrixT &c)¶ this = b * c
-
void
dotDiv
(BaseMatrixT &b, BaseMatrixT &c)¶ this = b / c
-
void
dotDiv
(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this = (b + p1) / (c + p2)
-
void
rankLoss
(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶ this = log(1 + exp(b - c)) - d * (b - c)
-
void
rankLossBp
(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶
-
void
logisticRegressionLoss
(BaseMatrixT &b, BaseMatrixT &c)¶ this = log(1 + exp(b)) - c * b
-
void
logisticRegressionLossBp
(BaseMatrixT &b, BaseMatrixT &c)¶ this += exp(b)/(1+exp(b)) - c
-
void
biggerThan
(BaseMatrixT &b, BaseMatrixT &c)¶ this = b > c ? 1.0 : 0.0
-
void
biggerThan
(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶ this = ((b>c && d>0.5) || (b<c && d<0.5)) ? 1 : 0)
-
void
max
(BaseMatrixT &b, BaseMatrixT &c)¶ this = b>c ? b : c
-
void
binaryClassificationError
(size_t destCol, BaseMatrixT &b, BaseMatrixT &c, T p)¶ this[destCol] += (b>p1 == c>p1) ? 0 : 1)
-
void
binaryClassificationError2
(size_t destCol, BaseMatrixT &b, BaseMatrixT &c, T p)¶
-
void
dotMulSquare
(BaseMatrixT &b)¶ this = this * b * b
-
void
dotSquareMul
(BaseMatrixT &b)¶ this = this * this * b
-
void
dotMulSquare
(BaseMatrixT &b, BaseMatrixT &c)¶ this = b * c * c
-
void
dotSquareSquare
(BaseMatrixT &b, BaseMatrixT &c)¶ this = b * b * c * c
-
void
dotMulSquareSum
(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this = this * (p1*b + p2*c)^2
-
void
dotSquareSum
(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this = (p1*b + p2*c)^2
-
void
dotMulSum
(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this= this * (p1*b + p2*c)
-
void
addSquareSum
(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT d, T p1, T p2, T p3)¶ this += sqr(p1*b + p2*c + p3*d)
-
void
addSquare
(BaseMatrixT &b, T p)¶ this += p * sqr(b)
-
void
decayAddSquare
(BaseMatrixT &b, T p1, T p2)¶ this = p1 * this + p2 * sqr(b)
-
void
decayAddSquareMul
(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this = p1 * this + p2 * sqr(b * c)
-
void
reciprocal
(BaseMatrixT &b, T p1, T p2)¶ this = 1 / (p1 * b + p2)
-
void
reciprocalSum
(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2, T p3)¶ this = 1 / (p1 * b + p2 * c + p3)
-
void
copyAndClear
(BaseMatrixT &b)¶ b = this; this = 0
-
void
rowDotMul
(size_t destCol, BaseMatrixT &b, BaseMatrixT &c)¶ this_row[destCol] += dotprod(b_row, c_row)
-
void
rowDotMul2
(size_t destCol, BaseMatrixT &b, BaseMatrixT &c)¶
-
void
addDotMulVMM
(BaseMatrixT &b, BaseMatrixT &c)¶ this is vector (one row matrix)
for each row i, do: this_row += dotmul(b_row_i, c_row_i)
-
void
addDotMulVMM2
(BaseMatrixT &b, BaseMatrixT &c)¶
-
void
addDotMulMMV
(BaseMatrixT &b, BaseMatrixT &c)¶ c is vector (one row matrix)
for each row i, do: this_row_i += dotmul(b_row_i, c_row)
-
void
addDotMulMMV2
(BaseMatrixT &b, BaseMatrixT &c)¶
-
void
addDotMul
(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this = p1 * this + p2 * b * c
-
void
rowScale
(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶ this_row = b_row * c_row[cCol]
-
void
rowScale2
(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶
-
void
colScale
(size_t cRow, BaseMatrixT &b, BaseMatrixT &c)¶ this_col = b_col * c_col[cRow]
-
void
addColScale
(size_t cRow, BaseMatrixT &b, BaseMatrixT &c)¶ this_col += b_col * c_col[cRow]
-
void
addRowScale
(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶ this_row += b_row * c_row[cCol]
-
void
sumRows
(BaseMatrixT &b)¶ calculate the sum of each row of the matrix b.
-
void
maxRows
(BaseMatrixT &b)¶ calculate the maximum value of each row of the matrix b.
-
void
minRows
(BaseMatrixT &b)¶ calculate the minimum value of each row of the matrix b.
-
void
sumCols
(BaseMatrixT &b)¶ calculate the sum of each column of the matrix b.
-
void
maxCols
(BaseMatrixT &b)¶ calculate the maximum value of each column of the matrix b.
-
void
minCols
(BaseMatrixT &b)¶ calculate the minimum value of each column of the matrix b.
-
void
sumCols
(BaseMatrixT &b, T scale)¶
-
void
sumOfSquares
(BaseMatrixT &b, BaseMatrixT &c)¶ calculate the sum of each row of (b - c)^2.
-
void
rowAdd
(size_t cCol, BaseMatrixT &b, BaseMatrixT &c, T p)¶ this_row = b_row + p * ones * c_row[cCol]
-
void
rowPow
(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶ this_row = pow(b_row, c_row[cCol])
-
virtual bool
isSparse
() const¶
-
virtual
-
typedef bool_constant<bool, false>
Sparse Matrix¶
-
namespace
paddle
¶ Typedefs
-
typedef std::shared_ptr<GpuSparseMatrix>
GpuSparseMatrixPtr
¶
-
typedef std::shared_ptr<CpuSparseMatrix>
CpuSparseMatrixPtr
¶
Enums
-
enum
SparseFormat
¶ matrix sparse_format .
nnz represents nonzero number in sparse matrix.
SPARSE_CSR: row major matrix. length of row is height_ + 1, each element represents row start index in Matrix. length of col and value are nnz.
SPARSE_CSC: col major matrix. length of col is width_ + 1, each element represents col start index in Matrix. length of col and value are nnz.
for example: [0, 1, 0, 2, 0; 1, 0, 0, 0, 0; 0, 0, 0, 2, 5]; SPARSE_CSR row [0, 2, 3, 5]; col [1, 3, 0, 3, 4]; value [1, 2, 1, 2, 5] SPARSE_CSC col [0, 1, 2, 2, 4, 5]; row [1, 0, 0, 2, 2]; value [1, 1, 2, 2, 5]
Values:
-
SPARSE_CSR
= 0¶
-
SPARSE_CSC
= 1¶
-
-
class
CpuMatrix
¶ Inherits from paddle::Matrix
Subclassed by paddle::SharedCpuMatrix, paddle::SparseRowCpuMatrix, paddle::SparseRowIdsCpuMatrix
Public Functions
-
CpuMatrix
(real *data, size_t height, size_t width, bool trans = false)¶
-
CpuMatrix
(real *data, size_t height, size_t width, size_t stride, bool trans = false)¶
-
CpuMatrix
(CpuMemHandlePtr dataHandle, size_t height, size_t width, bool trans = false)¶
-
~CpuMatrix
()¶
-
virtual void
zeroMem
()¶
-
virtual void
resetOne
()¶
-
virtual void
resize
(size_t newHeight, size_t newWidth)¶ - Note
- Original data may not be preserved after resize().
-
virtual void
resize
(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶ - Note
- This should only be used for sparse matrix.
-
virtual void
setRow
(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶ This should only be used for sparse matrix.
Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row.
-
virtual real
getElement
(size_t x, size_t y) const¶
-
virtual real
getSum
()¶
-
virtual real
getAbsSum
()¶
-
virtual void
transpose
(MatrixPtr matTrans, bool memAlloc)¶ hard transpose.
allocate matTrans’ memory outside, then set memAlloc as false; else set as true.
-
virtual void
copyFrom
(const Matrix &src, hl_stream_t stream)¶
-
virtual void
copyFrom
(const real *src, size_t size)¶ If this is GpuMatrix, src is assumed to be CPU memory
If this is CpuMatrix, src is assumed to be CPU memory
-
virtual void
copyFrom
(const real *cpuSrc, const int64_t *seq)¶
-
virtual void
copyFrom
(const IVector &src)¶ convert a int vector to a real matrix.
(1) source and dest are both in CPU.
(2) sizes are exactly match.
-
void
copyFrom
(CpuSparseMatrix &src)¶
-
virtual MatrixPtr
clone
(size_t height, size_t width, bool useGpu = false)¶ Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this.
If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size.
-
virtual void
convExpand
(Matrix &feature, int feaImgHeight, int feaImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW)¶ This function is used to calculate the convolution:
It will expand a feature matrix according to the convolution filters
-
virtual void
convShrink
(Matrix &expandColMat, int thisImgHeight, int thisImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW, real alpha = 1.0f, real beta = 0.0f)¶ This function is the reverse implementation of convExpand:
Its function is to restore a expanded-matrix into a feature matrix
-
virtual void
maxPoolForward
(Matrix &inputMat, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, int start_, size_t stride, size_t outputH, size_t outputW)¶ Pooling forward operation, pick out the largest element in the sizeX of value
-
virtual void
maxPoolBackward
(Matrix &image, size_t imgSizeH, size_t imgSizeW, Matrix &outGrad, Matrix &outV, size_t sizeX, int start, size_t stride, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput)¶ Pooling backward operation.
-
virtual void
avgPoolForward
(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, int start, size_t stride, size_t outputH, size_t outputW)¶ Pooling forward operation, caculate the average of sizeX elements.
-
virtual void
avgPoolBackward
(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t sizeX, int start, size_t stride, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput)¶
-
virtual void
crossMapNormalFwd
(Matrix &input, size_t imgSizeH, size_t imgSizeW, Matrix &denoms, size_t channels, size_t sizeX, float scale, float pow, bool blocked)¶ normalize-operation.
-
virtual void
crossMapNormalBwd
(Matrix &localGrad, Matrix &denoms, Matrix &preOutV, Matrix &localOutV, size_t channels, size_t imgSizeH, size_t imgSizeW, size_t sizeX, float scale, float pow, bool blocked)¶
-
virtual void
maxSequenceForward
(Matrix &input, const IVector &sequence, IVector &index)¶ Input: one or more sequences. Each sequence contains some instances. Output: output size is the number of input sequences (NOT input instances). output[i] is set to max_{for each instance in this sequence}{input[i]}
-
virtual void
contextProjectionForward
(MatrixPtr input, MatrixPtr weight, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶
-
virtual void
contextProjectionBackward
(MatrixPtr inputGrad, MatrixPtr weightGrad, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶
-
real *
getRow
(size_t row)¶
-
virtual real *
getRowBuf
(size_t row)¶
- template <typename TableMatType>
-
void
selectRowsImp
(TableMatType &table, IVector &ids)¶ use abstract getRow() to get row from table.
Define table as template instead of virtual class for performance sake. internal used by above two virtual funcs.
-
virtual void
addColumnVector
(const Matrix &b)¶ Add a vector (column) b to matrix a, column by column.
-
virtual void
mul
(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶ this = scaleAB*(a*b) + scaleT*this
-
void
mul
(CpuMatrix *a, CpuSparseMatrix *b, real scaleAB, real scaleT)¶
-
virtual void
mul
(CpuSparseMatrix *a, CpuMatrix *b, real scaleAB, real scaleT)¶
-
virtual void
rowMax
(IVector &maxIds, Matrix &max)¶ Get the top k elements of each row of this matrix.
The column ids and values of these elements are stored in maxIds and max respectively. Note that the top k elements are not sorted.
-
virtual void
oneHotCrossEntropy
(Matrix &output, IVector &label)¶ copy -log(output[label]) to this->data[i].
-
virtual void
oneHotCrossEntropyBp
(Matrix &outputV, IVector &label)¶ calculate the error of outputV according to label.
-
virtual void
oneHotCrossEntropyWithSelfNorm
(Matrix &output, IVector &label, real alpha)¶ copy -log(output[label]) to this->data[i].
-
virtual void
oneHotCrossEntropyWithSelfNormBp
(Matrix &outputV, IVector &label, real alpha)¶ calculate the error of outputV according to label.
-
virtual void
circularConv
(Matrix &b, Matrix &c)¶ - \[ a[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} b_{i+j} * c_{j} \]
b contains M elements, c contains N elements (N is odd), b’s index arithmetic is computed modulo M, c’s index arithmetic is computed modulo N.
-
virtual void
circularConvDerivative
(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2)¶
-
virtual void
cosSim
(Matrix &output1, Matrix &output2, real scale)¶ cosine similarity, for each row i, this[i] = cos(output1[i], output2[i])
output2 can only have one row, then for each row i, this[i] = cos(output1[i], output2[0])
-
virtual void
cosSimDerivative
(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2, real scale)¶
-
virtual void
print
(std::ostream &os) const¶ print out the values of elements to os
-
virtual void
print
(std::ostream &os, size_t height, size_t width) const¶ print a part of the matrix from the (top,left) value to the (height, width) value (not included)
-
virtual void
printOneRow
(std::ostream &os, size_t idx) const¶ print one row to os
-
virtual real
getMin
()¶
-
virtual real
getMax
()¶
-
virtual void
randomizeUniform
()¶
-
virtual void
classificationError
(MatrixPtr output, IVectorPtr label)¶ calulate the error of classification
output[i] = 1 if row i is an error.
output[i] = 0 if row i is correct.
-
virtual void
addByBitCode
(size_t numClasses, const IVector &codes, const Matrix &vec)¶ For j < codeLength: this(i, j) += vec(index(i, j), 0) where index(i, j) = ((codes(i) + numClasses) >> (j + 1)) - 1
-
virtual void
addByBitCodeBackward
(size_t numClasses, const IVector &codes, Matrix &vec)¶ For j < codeLength: vec(index(i, j), 0) += this(i, j) where index is same as the index for addByBitCode
-
virtual void
mulByBitCode
(size_t numClasses, const IVector &codes, const Matrix &mat, const Matrix &input)¶ For j < codeLength: this(i, j) += <mat.row(index(i, j)), input.row(i)> where index is same as the index for addByBitCode
-
virtual void
mulByBitCodeBackwardWeight
(size_t numClasses, const IVector &codes, Matrix &mat, const Matrix &input)¶ For j < codeLength: mat.row(index(i, j)) += this(i, j) * input.row(i) where index is same as the index for addByBitCode
-
virtual void
mulByBitCodeBackwardError
(size_t numClasses, const IVector &codes, const Matrix &mat, Matrix &input)¶ For j < codeLength: input.row(i) += this(i, j) * mat.row(index(i, j)) where index is same as the index for addByBitCode
-
virtual void
sumByBitCode
(size_t numClasses, IVector &codes, Matrix &sum, real scaleSum)¶ For j < codeLength sum(i, 0) = scaleSum * \sum_j bit(i, j) * this(i, j) where bit(i, j) = ((codes(i) + numClasses) & 2^j) ? 1 : 0
-
virtual void
subByBitCode
(size_t numClasses_, IVector &codes)¶ For j < codeLength this(i, j) -= bit(i, j) where bit(i, j) is same as that for sumByBitCode
-
virtual void
multiBinaryLabelCrossEntropy
(Matrix &output, Matrix &label)¶ cross entropy for multi binary labels
this[i] = -sum(label[i][j]*log(output[i][j]) + (1-label[i][j])*log(1-output[i][j]))
Public Static Functions
-
void
mul
(CpuMatrix *a, CpuMatrix *b, CpuSparseMatrix *c, real scaleAB, real scaleT)¶
- template <typename MatBType, typename MatCType>
-
void
mul
(CpuSparseMatrix *a, MatBType *b, MatCType *c, real scaleAB, real scaleT)¶ c = a * b
use abstract getRow() to get row from B,C. Define B,C as template instead of virtual class for performance sake.
-
-
class
GpuMatrix
¶ Inherits from paddle::Matrix
Public Functions
-
GpuMatrix
()¶
-
GpuMatrix
(size_t height, size_t width, bool trans = false)¶
-
GpuMatrix
(real *data, size_t height, size_t width, bool trans = false)¶
-
GpuMatrix
(real *data, size_t height, size_t width, size_t stride, bool trans = false)¶
-
GpuMatrix
(GpuMemHandlePtr dataHandle, size_t height, size_t width, bool trans = false)¶
-
~GpuMatrix
()¶
-
virtual void
zeroMem
()¶
-
virtual void
resetOne
()¶
-
virtual void
resize
(size_t newHeight, size_t newWidth)¶ - Note
- Original data may not be preserved after resize().
-
virtual void
resize
(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶ - Note
- This should only be used for sparse matrix.
-
virtual void
setRow
(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶ This should only be used for sparse matrix.
Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row.
-
virtual void
copyFrom
(const real *hostSrc, size_t size)¶ Copy the data from cpu_memory buffer
-
virtual void
copyFrom
(const real *hostSrc, const int64_t *seq)¶
-
virtual void
copyFrom
(const Matrix &src, hl_stream_t stream)¶
-
virtual void
copyFrom
(const IVector &src)¶ convert a int vector to a real matrix.
(1) source and dest are both in CPU.
(2) sizes are exactly match.
-
virtual MatrixPtr
clone
(size_t height, size_t width, bool useGpu = false)¶ Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this.
If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size.
-
virtual real
getElement
(size_t x, size_t y) const¶
-
real *
getRow
(size_t row)¶
-
virtual real *
getRowBuf
(size_t row)¶
-
virtual real
getSum
()¶
-
virtual real
getAbsSum
()¶
-
virtual void
transpose
(MatrixPtr matTrans, bool memAlloc)¶ hard transpose.
allocate matTrans’ memory outside, then set memAlloc as false; else set as true.
-
virtual void
addColumnVector
(const Matrix &b)¶ Add a vector (column) b to matrix a, column by column.
-
virtual void
mul
(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶ this = scaleAB*(a*b) + scaleT*this
-
void
mul
(const GpuSparseMatrix &a, const GpuMatrix &b, real scaleAB, real scaleT)¶
-
void
mul
(const GpuMatrix &a, const GpuSparseMatrix &b, real scaleAB, real scaleT)¶
-
virtual void
rowMax
(IVector &maxIds, Matrix &max)¶ Get the top k elements of each row of this matrix.
The column ids and values of these elements are stored in maxIds and max respectively. Note that the top k elements are not sorted.
-
virtual void
oneHotCrossEntropy
(Matrix &output, IVector &label)¶ copy -log(output[label]) to this->data[i].
-
virtual void
oneHotCrossEntropyBp
(Matrix &outputV, IVector &label)¶ calculate the error of outputV according to label.
-
virtual void
oneHotCrossEntropyWithSelfNorm
(Matrix &output, IVector &label, real alpha)¶ copy -log(output[label]) to this->data[i].
-
virtual void
oneHotCrossEntropyWithSelfNormBp
(Matrix &outputV, IVector &label, real alpha)¶ calculate the error of outputV according to label.
-
virtual void
cosSim
(Matrix &output1, Matrix &output2, real scale)¶ cosine similarity, for each row i, this[i] = cos(output1[i], output2[i])
output2 can only have one row, then for each row i, this[i] = cos(output1[i], output2[0])
-
virtual void
cosSimDerivative
(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2, real scale)¶
-
virtual void
print
(std::ostream &os) const¶ print out the values of elements to os
-
virtual void
print
(std::ostream &os, size_t height, size_t width) const¶ print a part of the matrix from the (top,left) value to the (height, width) value (not included)
-
virtual void
randomizeUniform
()¶
-
virtual void
classificationError
(MatrixPtr output, IVectorPtr label)¶ calulate the error of classification
output[i] = 1 if row i is an error.
output[i] = 0 if row i is correct.
-
virtual void
convExpand
(Matrix &feature, int feaImgHeight, int feaImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW)¶ This function is used to calculate the convolution:
It will expand a feature matrix according to the convolution filters
-
virtual void
convShrink
(Matrix &expandColMat, int thisImgHeight, int thisImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW, real alpha = 1.0f, real beta = 0.0f)¶ This function is the reverse implementation of convExpand:
Its function is to restore a expanded-matrix into a feature matrix
-
virtual void
maxPoolForward
(Matrix &inputMat, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, int start_, size_t stride, size_t outputH, size_t outputW)¶ Pooling forward operation, pick out the largest element in the sizeX of value
-
virtual void
maxPoolBackward
(Matrix &image, size_t imgSizeH, size_t imgSizeW, Matrix &outGrad, Matrix &outV, size_t sizeX, int start, size_t stride, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput)¶ Pooling backward operation.
-
virtual void
avgPoolForward
(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, int start, size_t stride, size_t outputH, size_t outputW)¶ Pooling forward operation, caculate the average of sizeX elements.
-
virtual void
avgPoolBackward
(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t sizeX, int start, size_t stride, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput)¶
-
virtual void
crossMapNormalFwd
(Matrix &input, size_t imgSizeH, size_t imgSizeW, Matrix &denoms, size_t channels, size_t sizeX, float scale, float pow, bool blocked)¶ normalize-operation.
-
virtual void
crossMapNormalBwd
(Matrix &localGrad, Matrix &denoms, Matrix &preOutV, Matrix &localOutV, size_t channels, size_t imgSizeH, size_t imgSizeW, size_t sizeX, float scale, float pow, bool blocked)¶
-
virtual void
maxSequenceForward
(Matrix &input, const IVector &sequence, IVector &index)¶ Input: one or more sequences. Each sequence contains some instances.
Output: output size is the number of input sequences (NOT input instances).
output[i] is set to max_input[i].
-
virtual void
contextProjectionForward
(MatrixPtr input, MatrixPtr weight, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶
-
-
class
Matrix
¶ - #include <Matrix.h>
Copy or assignemnt constructor will share the data as opposed to making a copy of the original data. To make a copy of the orinal data, use copyFrom() instead.
Inherits from paddle::BaseMatrixT< real >
Subclassed by paddle::CpuMatrix, paddle::CpuSparseMatrix, paddle::GpuMatrix, paddle::GpuSparseMatrix
Public Functions
-
virtual
~Matrix
()¶
-
void
setData
(real *data)¶ set the data buffer used to hold the matrix data.
caller should make sure that the size of data is at least sizeof(real)*height*width.
-
void
setData
(real *data, size_t newHeight, size_t newWidth)¶ the data should be contiguous
-
size_t
getWidth
() const¶
-
size_t
getHeight
() const¶
-
size_t
getStride
() const¶
-
size_t
getElementCnt
() const¶
-
virtual real *
getData
()¶
-
virtual const real *
getData
() const¶
-
bool
isTransposed
() const¶
-
bool
isContiguous
() const¶
-
virtual int *
getRows
() const¶
-
virtual int *
getCols
() const¶
-
virtual SparseFormat
getFormat
() const¶
-
virtual SparseValueType
getValueType
() const¶
-
virtual void
add3
(MatrixPtr b)¶ matrix elment-wise add
Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function.
-
MemoryHandlePtr
getMemoryHandle
() const¶
-
virtual void
zeroMem
()¶
-
virtual void
resetOne
()¶
-
virtual void
trimFrom
(const CpuSparseMatrix &src)¶
-
virtual void
copyFrom
(const Matrix &src, hl_stream_t stream)¶
-
MatrixPtr
subMatrix
(size_t startRow, size_t endRow, size_t startCol, size_t endCol)¶
-
virtual void
copyFrom
(const real *src, size_t size)¶ If this is GpuMatrix, src is assumed to be CPU memory
If this is CpuMatrix, src is assumed to be CPU memory
-
virtual void
copyFrom
(const real *src, const int64_t *seq)¶
-
virtual void
copyFrom
(const IVector &src)¶ convert a int vector to a real matrix.
(1) source and dest are both in CPU.
(2) sizes are exactly match.
-
virtual MatrixPtr
clone
(size_t height = 0, size_t width = 0, bool useGpu = false)¶ Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this.
If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size.
-
virtual real *
getRowBuf
(size_t row)¶
-
virtual real
getElement
(size_t x, size_t y) const¶
-
virtual real
getSum
()¶
-
virtual real
getAbsSum
()¶
-
virtual void
resize
(size_t newHeight, size_t newWidth) = 0¶ - Note
- Original data may not be preserved after resize().
-
virtual void
resize
(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format) = 0¶ - Note
- This should only be used for sparse matrix.
-
virtual void
setRow
(size_t row, size_t colNum, const unsigned int *cols, const real *values) = 0¶ This should only be used for sparse matrix.
Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row.
-
virtual void
transpose
(MatrixPtr matTrans, bool memAlloc)¶ hard transpose.
allocate matTrans’ memory outside, then set memAlloc as false; else set as true.
-
virtual void
clear
()¶ Only set all variables to 0 or NULL but not free them.
-
void
reshape
(size_t height, size_t width)¶
-
virtual void
mul
(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶ this = scaleAB*(a*b) + scaleT*this
-
virtual void
addColumnVector
(const Matrix &b)¶ Add a vector (column) b to matrix a, column by column.
-
virtual void
addByBitCode
(size_t numClasses, const IVector &codes, const Matrix &vec)¶ For j < codeLength: this(i, j) += vec(index(i, j), 0) where index(i, j) = ((codes(i) + numClasses) >> (j + 1)) - 1
-
virtual void
addByBitCodeBackward
(size_t numClasses, const IVector &codes, Matrix &vec)¶ For j < codeLength: vec(index(i, j), 0) += this(i, j) where index is same as the index for addByBitCode
-
virtual void
mulByBitCode
(size_t numClasses, const IVector &codes, const Matrix &mat, const Matrix &input)¶ For j < codeLength: this(i, j) += <mat.row(index(i, j)), input.row(i)> where index is same as the index for addByBitCode
-
virtual void
mulByBitCodeBackwardWeight
(size_t numClasses, const IVector &codes, Matrix &mat, const Matrix &input)¶ For j < codeLength: mat.row(index(i, j)) += this(i, j) * input.row(i) where index is same as the index for addByBitCode
-
virtual void
mulByBitCodeBackwardError
(size_t numClasses, const IVector &codes, const Matrix &mat, Matrix &input)¶ For j < codeLength: input.row(i) += this(i, j) * mat.row(index(i, j)) where index is same as the index for addByBitCode
-
virtual void
sumByBitCode
(size_t numClasses, IVector &codes, Matrix &sum, real scaleSum)¶ For j < codeLength sum(i, 0) = scaleSum * \sum_j bit(i, j) * this(i, j) where bit(i, j) = ((codes(i) + numClasses) & 2^j) ? 1 : 0
-
virtual void
subByBitCode
(size_t numClasses_, IVector &codes)¶ For j < codeLength this(i, j) -= bit(i, j) where bit(i, j) is same as that for sumByBitCode
-
virtual void
rowMax
(IVector &maxIds, Matrix &max)¶ Get the top k elements of each row of this matrix.
The column ids and values of these elements are stored in maxIds and max respectively. Note that the top k elements are not sorted.
-
virtual void
oneHotCrossEntropy
(Matrix &output, IVector &label)¶ copy -log(output[label]) to this->data[i].
-
virtual void
oneHotCrossEntropyBp
(Matrix &outputV, IVector &label)¶ calculate the error of outputV according to label.
-
virtual void
oneHotCrossEntropyWithSelfNorm
(Matrix &output, IVector &label, real alpha)¶ copy -log(output[label]) to this->data[i].
-
virtual void
oneHotCrossEntropyWithSelfNormBp
(Matrix &outputV, IVector &label, real alpha)¶ calculate the error of outputV according to label.
-
virtual void
circularConv
(Matrix &b, Matrix &c)¶ - \[ a[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} b_{i+j} * c_{j} \]
b contains M elements, c contains N elements (N is odd), b’s index arithmetic is computed modulo M, c’s index arithmetic is computed modulo N.
-
virtual void
circularConvDerivative
(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2)¶
-
virtual void
cosSim
(Matrix &output1, Matrix &output2, real scale = 1.0f)¶ cosine similarity, for each row i, this[i] = cos(output1[i], output2[i])
output2 can only have one row, then for each row i, this[i] = cos(output1[i], output2[0])
-
virtual void
cosSimDerivative
(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2, real scale = 1.0f)¶
-
virtual void
print
(std::ostream &os) const¶ print out the values of elements to os
-
virtual void
print
(std::ostream &os, size_t height, size_t width) const¶ print a part of the matrix from the (top,left) value to the (height, width) value (not included)
-
virtual void
printOneRow
(std::ostream &os, size_t idx) const¶ print one row to os
-
virtual real
getMin
()¶
-
virtual real
getMax
()¶
-
virtual void
randomizeUniform
()¶
-
virtual void
classificationError
(MatrixPtr output, IVectorPtr label)¶ calulate the error of classification
output[i] = 1 if row i is an error.
output[i] = 0 if row i is correct.
-
virtual void
convExpand
(Matrix &feature, int feaImgHeight, int feaImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW)¶ This function is used to calculate the convolution:
It will expand a feature matrix according to the convolution filters
-
virtual void
convShrink
(Matrix &expandColMat, int thisImgHeight, int thisImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW, real alpha = 1.0f, real beta = 0.0f)¶ This function is the reverse implementation of convExpand:
Its function is to restore a expanded-matrix into a feature matrix
-
virtual void
maxPoolForward
(Matrix &inputMat, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, int start_, size_t stride, size_t outputH, size_t outputW)¶ Pooling forward operation, pick out the largest element in the sizeX of value
-
virtual void
maxPoolBackward
(Matrix &image, size_t imgSizeH, size_t imgSizeW, Matrix &outGrad, Matrix &outV, size_t sizeX, int start, size_t stride, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput)¶ Pooling backward operation.
-
virtual void
avgPoolForward
(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, int start, size_t stride, size_t outputH, size_t outputW)¶ Pooling forward operation, caculate the average of sizeX elements.
-
virtual void
avgPoolBackward
(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t sizeX, int start, size_t stride, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput)¶
-
virtual void
crossMapNormalFwd
(Matrix &input, size_t imgSizeH, size_t imgSizeW, Matrix &denoms, size_t channels, size_t sizeX, float scale, float pow, bool blocked)¶ normalize-operation.
-
virtual void
crossMapNormalBwd
(Matrix &localGrad, Matrix &denoms, Matrix &preOutV, Matrix &localOutV, size_t channels, size_t imgSizeH, size_t imgSizeW, size_t size, float scale, float pow, bool blocked)¶
-
virtual void
maxSequenceForward
(Matrix &input, const IVector &sequence, IVector &index)¶ Input: one or more sequences. Each sequence contains some instances.
Output: output size is the number of input sequences (NOT input instances).
output[i] is set to max_input[i].
-
virtual void
contextProjectionForward
(MatrixPtr input, MatrixPtr weight, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶
-
virtual void
contextProjectionBackward
(MatrixPtr inputGrad, MatrixPtr weightGrad, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶
-
virtual void
contextProjectionBackwardData
(MatrixPtr inputGrad, const IVector &sequence, int contextLength, int contextStart)¶
-
virtual void
contextProjectionBackwardWeight
(MatrixPtr weightGrad, const IVector &sequence, int contextLength, int contextStart, int totalPad, size_t beginPad)¶
-
virtual void
selectRows
(Matrix &table, IVector &ids)¶ this.row[i] += table.row[ids[i]] if ids[i] == -1, it will be ignored
-
virtual void
addToRows
(Matrix &table, IVector &ids)¶ table.row[ids[i]] += this.row[i] if ids[i] == -1, it will be ignored
-
virtual void
multiBinaryLabelCrossEntropy
(Matrix &output, Matrix &label)¶ cross entropy for multi binary labels
this[i] = -sum(label[i][j]*log(output[i][j]) + (1-label[i][j])*log(1-output[i][j]))
-
virtual void
multiBinaryLabelCrossEntropyBp
(Matrix &output, Matrix &label)¶ The gradient of cross entropy for multi binary labels on output.
this[i][j] = -label[i][j]/output[i][j] + (1-label[i][j])/(1-output[i][j])
Public Static Functions
-
MatrixPtr
create
(MemoryHandlePtr memHandle, size_t height, size_t width, bool trans = false)¶
-
MatrixPtr
create
(size_t height, size_t width, bool trans = false, bool useGpu = false)¶
-
MatrixPtr
create
(real *data, size_t height, size_t width, bool trans = false, bool useGpu = false)¶
-
MatrixPtr
create
(real *data, size_t height, size_t width, size_t stride, bool trans = false, bool useGpu = false)¶
-
MatrixPtr
createSparseMatrix
(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, bool trans = false, bool useGpu = false)¶
-
MatrixPtr
createSparseMatrix
(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat foramt = SPARSE_CSR, bool trans = false, bool useGpu = false)¶
-
MatrixPtr
createSparseMatrix
(real *data, int *row, int *col, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans, bool useGpu)¶
-
void
resizeOrCreateSparseMatrix
(MatrixPtr &matrix, size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat foramt = SPARSE_CSR, bool trans = false, bool useGpu = false)¶
-
void
resizeOrCreate
(MatrixPtr &a, size_t height, size_t width, bool trans = false, bool useGpu = false)¶
-
virtual
Inherits from paddle::CpuMatrix
Public Functions
Private Functions
Private Members
-
struct
sparse_float_value_t
¶
-
typedef std::shared_ptr<GpuSparseMatrix>
-
namespace
paddle
¶ Typedefs
-
typedef CpuVectorT<real>
CpuVector
¶
-
typedef GpuVectorT<real>
GpuVector
¶
-
typedef CpuVectorT<int>
CpuIVector
¶
-
typedef GpuVectorT<int>
GpuIVector
¶
-
typedef std::shared_ptr<CpuIVector>
CpuIVectorPtr
¶
-
typedef std::shared_ptr<GpuIVector>
GpuIVectorPtr
¶
-
typedef CpuGpuVectorT<real>
CpuGpuVector
¶
-
typedef CpuGpuVectorT<int>
ICpuGpuVector
¶
-
typedef std::shared_ptr<CpuGpuVector>
CpuGpuVectorPtr
¶
-
typedef std::shared_ptr<ICpuGpuVector>
ICpuGpuVectorPtr
¶
- template <class T>
-
class
BaseVector
¶ Inherits from paddle::BaseMatrixT< T >
Subclassed by paddle::VectorT< T >
Protected Attributes
-
size_t &
size_
¶
-
size_t &
- template <class T>
-
class
CpuGpuVectorT
¶ - #include <Vector.h>
A class to do conversion between CpuVector and GpuVector automatically.
Public Types
Public Functions
-
CpuGpuVectorT
(size_t size, bool useGpu)¶ A constructor, create cpuVectorT_ or gpuVectorT_.
- Parameters
size
-data size.
useGpu
-use gpu or not.
A constructor, create CpuGpuVectorT by VectorT.
If src is CpuVector, cpuVectorT_ is shared data with src.
If src is GpuVector, gpuVectorT_ is shared data with src.
-
CpuGpuVectorT
(size_t size, T *data, bool useGpu)¶ A constructor.
If useGpu is true, data should be located in device and create gpuVectorT_ with data.
If useGpu is false, data should be located in host and create cpuVectorT_ with data.
- Note
- Data is owned by the caller and should be valid during the life of this vector. Caller is responsible for release the memory.
-
CpuGpuVectorT
(CpuGpuVectorT<T> &src, size_t offset, size_t size)¶
-
virtual
~CpuGpuVectorT
()¶
-
void
resize
(size_t size, bool useGpu)¶ resize vector.
If useGpu is true, resize gpuVectorT_ and set syncFlag_ to DATA_AT_GPU,
otherwise resize cpuVectorT_ and set syncFlag_ to DATA_AT_CPU.
-
std::shared_ptr<const VectorT<T>>
getVector
(bool useGpu) const¶ return a const cpuVectorT_ or gpuVectorT_.
If useGpu is true, return gpuVectorT_.
If useGpu is false, return cpuVectorT_.
- Note
- Caller should not change the data. If caller changes const attribute, should set syncFlag_.
-
std::shared_ptr<VectorT<T>> &
getMutableVector
(bool useGpu)¶ return a const cpuVectorT_ or gpuVectorT_.
- Note
- : This interface will change syncFlag_, so if you will not change the data, you should call getVector.
-
const T *
getData
(bool useGpu) const¶ return const T* data.
If useGpu is true, return device data.
If useGpu is false, return host data.
-
T *
getMutableData
(bool useGpu)¶
-
void
zeroMem
(bool useGpu)¶ If useGpu is true, gpuVectorT_->Op().
If useGpu is false, cpuVectorT_->Op().
Op is zeroMem, fillSequence, ...
-
void
fillSequence
(bool useGpu)¶
-
void
setElement
(size_t i, const T &value, bool useGpu)¶
-
T
getElement
(size_t i) const¶ return i-th element.
-
size_t
getSize
() const¶ return vector size.
-
void
copyToCpu
(const T *data, size_t size)¶ copy data to cpuVectorT_.
-
void
copyToCpu
(const T *data, size_t size, hl_stream_t stream)¶ copy data to cpuVectorT_ using specifed-stream.
-
void
copyToGpu
(const T *data, size_t size)¶ copy data to gpuVectorT_.
-
void
copyToGpu
(const T *data, size_t size, hl_stream_t stream)¶ copy data to gpuVectorT_ using specifed-stream.
-
void
copyFrom
(const VectorT<T> &src, hl_stream_t stream)¶ copy from src using specifed-stream.
If src is CpuVectorT, copy to cpuVectorT_.
If src is GpuVectorT, copy to gpuVectorT_.
-
void
copyFrom
(const T *data, size_t size, bool useGpu)¶ copy data.
If useGpu is false, copy host data to cpuVectorT_.
If useGpu is true, copy device data to gpuVectorT_.
- Note
- data address should consistent with useGpu.
-
void
copyFrom
(const T *data, size_t size, hl_stream_t stream, bool useGpu)¶
-
void
copyFrom
(CpuGpuVectorT<T> &src, size_t offset, size_t size, bool useGpu, hl_stream_t stream)¶ copy from (src + offset) using specifed-stream.
-
void
copyFrom
(CpuGpuVectorT<T> &src, hl_stream_t stream)¶ copy from src using specifed-stream.
-
SyncedFlag *
getSync
() const¶ return sync_.
-
void
setSync
(SyncedFlag *sync)¶ set sync_.
-
void
setSync
(SyncedFlag syncFlag)¶
-
void
setSync
(bool useGpu)¶
Public Static Functions
-
std::shared_ptr<CpuGpuVectorT<T>>
create
(size_t size, bool useGpu)¶
resize or create CpuGpuVectorT.
Protected Functions
-
void
resizeOrCreate
(size_t size, bool useGpu)¶
-
void
copyToCpu
()¶ copy between cpuVectorT_ and gpuVectorT_.
If syncFlag_ is DATA_AT_CPU and SYNCED, do nothing.
If syncFlag_ is DATA_AT_GPU, copy gpuVectorT_ to cpuVectorT_ and set syncFlag_ to SYNCED.
-
void
copyToGpu
()¶ copy between cpuVectorT_ and gpuVectorT_.
If syncFlag_ is DATA_AT_GPU and SYNCED, do nothing.
If syncFlag_ is DATA_AT_CPU, copy cpuVectorT_ to gpuVectorT_ and set syncFlag_ to SYNCED.
-
- template <class T>
-
class
CpuVectorT
¶ Inherits from paddle::VectorT< T >
Subclassed by paddle::ParallelCpuVectorT< T >
Public Functions
-
CpuVectorT
(size_t size)¶
-
CpuVectorT
(size_t size, MemoryHandlePtr memoryHandle, size_t offset)¶
-
CpuVectorT
(size_t size, T *data)¶
-
CpuVectorT
(const VectorT<T> &src)¶ If src is a CpuVector, the new CpuVector will share the data with src
If src is a GpuVector, the new CpuVector will copy data from src
-
virtual MemoryHandlePtr
newMemory
(size_t size)¶
-
virtual void
zeroMem
()¶
-
virtual void
reset
(const T &value)¶
-
virtual void
fillSequence
()¶
-
virtual void
copyFrom
(const T *src, size_t size)¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory
-
virtual void
copyFrom
(const T *src, size_t size, hl_stream_t stream)¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory,
-
virtual void
copyFrom
(const VectorT<T> &src)¶ This function will crash if the size of src and dest is different.
-
virtual void
copyFrom
(const VectorT<T> &src, hl_stream_t stream)¶ If use_gpu, this function will push the copy-task to the specifed-stream and return immediately.
If not use GPU, this function is same as the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT.
-
virtual void
copyTo
(CpuVectorT<T> *dest) const¶
-
virtual void
copyTo
(GpuVectorT<T> *dest) const¶
-
virtual T *
getPoint
(const uint64_t beginPos)¶ Get the buffer point with beginPos.
-
virtual T
getElement
(size_t i) const¶ Get the value for the i’th element.
-
virtual void
setElement
(size_t i, const T &value)¶
-
virtual T
getAbsSum
()¶
-
virtual T
getSum
()¶
-
virtual T
getMax
()¶
-
virtual T
getAbsMax
()¶
-
virtual T
getMin
()¶
-
virtual void
selectFrom
(const VectorT<T> &src, const VectorT<int> &ids)¶ select elements indexed by ids from vector src
-
virtual void
histogram
(std::ostream &os, int type)¶ print histogram of vector values
- Note
- only exponent histogram supported currently
-
virtual void
rand
()¶ generate uniform random value for each element
-
virtual void
rand
(size_t classes)¶ generate uniform random value for each element, data range is from 0 to (classes - 1).
-
virtual void
randnorm
(real mean, real standardDeviation)¶ generate univariate Gaussian distributed random numbers with given mean and standardDeviation.
-
virtual void
uniform
(real left, real right)¶ generate uniform distributed random numbers with given range.
-
virtual T
get
(size_t pos)¶ Debug use only. Very inefficient for GPU vector. get the value at pos.
-
- template <class T>
-
class
GpuVectorT
¶ Inherits from paddle::VectorT< T >
Public Functions
-
GpuVectorT
(size_t size)¶
-
GpuVectorT
(size_t size, GpuMemHandlePtr memHandle, size_t offset)¶
-
GpuVectorT
(size_t size, T *data)¶
-
virtual MemoryHandlePtr
newMemory
(size_t size)¶
-
virtual void
zeroMem
()¶
-
virtual void
reset
(const T &value)¶
-
virtual void
fillSequence
()¶
-
virtual void
copyFrom
(const T *src, size_t size)¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory
-
virtual void
copyFrom
(const T *src, size_t size, hl_stream_t stream)¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory,
-
virtual void
copyFrom
(const VectorT<T> &src)¶ This function will crash if the size of src and dest is different.
-
virtual void
copyFrom
(const VectorT<T> &src, hl_stream_t stream)¶ If use_gpu, this function will push the copy-task to the specifed-stream and return immediately.
If not use GPU, this function is same as the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT.
-
virtual T
getElement
(size_t i) const¶ Get the value for the i’th element.
-
virtual void
setElement
(size_t i, const T &value)¶
-
virtual T *
getPoint
(const uint64_t beginPos)¶ Get the buffer point with beginPos.
-
virtual T
getAbsSum
()¶
-
virtual T
getSum
()¶
-
virtual T
getMax
()¶
-
virtual T
getAbsMax
()¶
-
virtual T
getMin
()¶
-
virtual void
selectFrom
(const VectorT<T> &src, const VectorT<int> &ids)¶ select elements indexed by ids from vector src
-
virtual void
histogram
(std::ostream &os, int type)¶ print histogram of vector values
- Note
- only exponent histogram supported currently
-
virtual void
rand
()¶ generate uniform random value for each element
-
virtual void
rand
(size_t classes)¶ generate uniform random value for each element, data range is from 0 to (classes - 1).
-
virtual void
randnorm
(real mean, real standardDeviation)¶ generate univariate Gaussian distributed random numbers with given mean and standardDeviation.
-
virtual void
uniform
(real left, real right)¶ generate uniform distributed random numbers with given range.
-
virtual T
get
(size_t pos)¶ Debug use only. Very inefficient for GPU vector. get the value at pos.
Protected Functions
-
virtual void
copyTo
(CpuVectorT<T> *dest) const¶
-
virtual void
copyTo
(GpuVectorT<T> *dest) const¶
-
- template <class T>
-
class
ParallelCpuVectorT
¶ Inherits from paddle::CpuVectorT< T >
Public Functions
-
ParallelCpuVectorT
(size_t size, SyncThreadPool *pool)¶
-
virtual void
zeroMem
()¶
-
virtual void
randnorm
(real mean, real standardDeviation)¶ generate univariate Gaussian distributed random numbers with given mean and standardDeviation.
-
virtual void
uniform
(real left, real right)¶ generate uniform distributed random numbers with given range.
-
virtual void
exec
(SyncThreadPool::JobFunc func)¶ exec a func in single/multi thread
Private Types
-
typedef std::function<void(CpuVectorT<T>& vec)>
ExecFunc
¶
Private Members
-
SyncThreadPool *
pool_
¶
-
- template <class T>
-
class
VectorT
¶ - #include <Vector.h>
Copy or assignemnt constructor will share the data as opposed to making a copy of the original data. To make a copy of the orinal data, use copyFrom() instead.
Inherits from paddle::BaseVector< T >
Subclassed by paddle::CpuVectorT< T >, paddle::GpuVectorT< T >
Public Functions
-
virtual
~VectorT
()¶
-
size_t
getSize
() const¶
-
const T *
getData
() const¶
-
T *
getData
()¶
-
virtual void
zeroMem
() = 0¶
-
virtual void
reset
(const T &value) = 0¶
-
virtual void
fillSequence
() = 0¶
-
MemoryHandlePtr
getMemoryHandle
() const¶
-
void
resize
(size_t newSize)¶ resizing to a big vector will not preserve old values.
-
virtual MemoryHandlePtr
newMemory
(size_t size) = 0¶
-
void
subVecFrom
(const VectorT<T> &src, size_t start, size_t size)¶ form sub vector from src, shallow copy
-
void
subVecFrom
(const T *src, size_t start, size_t size)¶ form sub vector from src, shallow copy
-
void
subVecFrom
(const VectorT<T> &src, std::pair<size_t, size_t> interval)¶ form sub vector from src, shallow copy in interval [interval.first, interval.second)
-
virtual void
copyFrom
(const VectorT<T> &src) = 0¶ This function will crash if the size of src and dest is different.
-
virtual void
copyFrom
(const VectorT<T> &src, hl_stream_t stream) = 0¶ If use_gpu, this function will push the copy-task to the specifed-stream and return immediately.
If not use GPU, this function is same as the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT.
-
virtual void
copyFrom
(const T *src, size_t size) = 0¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory
-
virtual void
copyFrom
(const T *src, size_t size, hl_stream_t stream) = 0¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory,
-
virtual void
exec
(SyncThreadPool::JobFunc func)¶ exec a func in single/multi thread
-
virtual T *
getPoint
(const uint64_t beginPos) = 0¶ Get the buffer point with beginPos.
-
virtual T
getElement
(size_t i) const = 0¶ Get the value for the i’th element.
-
virtual void
setElement
(size_t i, const T &value) = 0¶
-
virtual T
getAbsSum
() = 0¶
-
virtual T
getSum
() = 0¶
-
virtual T
getMax
() = 0¶
-
virtual T
getAbsMax
() = 0¶
-
virtual T
getMin
() = 0¶
-
virtual void
isEqualTo
(const VectorT<T> &b, const T &value) = 0¶ element-wise calc: this = (b == value)
-
virtual void
selectFrom
(const VectorT<T> &src, const VectorT<int> &ids) = 0¶ select elements indexed by ids from vector src
-
virtual void
histogram
(std::ostream &os, int type = HISTOGRAM_EXPONENT) = 0¶ print histogram of vector values
- Note
- only exponent histogram supported currently
-
virtual void
rand
() = 0¶ generate uniform random value for each element
-
virtual void
rand
(size_t classes) = 0¶ generate uniform random value for each element, data range is from 0 to (classes - 1).
-
virtual T
get
(size_t pos) = 0¶ Debug use only. Very inefficient for GPU vector. get the value at pos.
-
virtual void
randnorm
(real mean, real standardDeviation) = 0¶ generate univariate Gaussian distributed random numbers with given mean and standardDeviation.
-
virtual void
uniform
(real left, real right) = 0¶ generate uniform distributed random numbers with given range.
Public Static Functions
-
std::shared_ptr<VectorT<T>>
create
(size_t size, MemoryHandlePtr memoryHandle, size_t offset = 0)¶
-
std::shared_ptr<VectorT<T>>
createParallelVector
(size_t size, bool useGpu, SyncThreadPool *pool = nullptr)¶
Protected Functions
-
VectorT
(size_t size, MemoryHandlePtr memoryHandle, size_t offset, bool useGpu)¶
-
VectorT
(size_t size, T *data, bool useGpu)¶
-
virtual void
copyTo
(CpuVectorT<T> *dest) const = 0¶
-
virtual void
copyTo
(GpuVectorT<T> *dest) const = 0¶
Protected Attributes
-
MemoryHandlePtr
memoryHandle_
¶
Friends
-
friend
paddle::GpuVectorT< T >
-
friend
paddle::CpuVectorT< T >
-
virtual
-
typedef CpuVectorT<real>
-
namespace
paddle
¶ Typedefs
-
typedef std::shared_ptr<_hl_sparse_matrix_s>
hl_sparse_matrix_s_ptr
¶
-
class
GpuSparseMatrix
¶ Inherits from paddle::Matrix
Public Functions
-
GpuSparseMatrix
(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat format_ = SPARSE_CSR, bool trans = false)¶
-
GpuSparseMatrix
(GpuMemHandlePtr dataHandle, hl_sparse_matrix_s_ptr sMatrix, size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat format_ = SPARSE_CSR, bool trans = false, MemoryHandlePtr sMemoryHandle = NULL)¶
-
GpuSparseMatrix
(real *value, int *rows, int *cols, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans)¶
-
GpuSparseMatrix
(hl_sparse_matrix_s_ptr sMatrix, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans, MemoryHandlePtr sMemoryHandle)¶
-
~GpuSparseMatrix
()¶
-
virtual void
resize
(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶ - Note
- This should only be used for sparse matrix.
-
virtual void
resize
(size_t newHeight, size_t newWidth)¶ - Note
- Original data may not be preserved after resize().
-
void
sparseResizeCSR
()¶
-
void
sparseResizeCSC
()¶
-
void
resizeCSR
(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType)¶
-
void
resizeCSC
(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType)¶
-
void
mul
(const GpuMatrixPtr a, const GpuMatrixPtr b, real scaleAB, real scaleT)¶
-
virtual void
copyFrom
(const Matrix &src, hl_stream_t stream)¶
-
void
copyFromCSR
(CpuSparseMatrix &src, hl_stream_t stream)¶
-
void
copyFromCSC
(CpuSparseMatrix &src, hl_stream_t stream)¶
-
virtual void
copyFrom
(const IVector &src)¶ convert a int vector to a real matrix.
(1) source and dest are both in CPU.
(2) sizes are exactly match.
-
void
copyFrom
(const IVector &src, hl_stream_t stream)¶
- template <class T>
-
void
copyFrom
(int64_t *ids, int64_t *indices, T *data, hl_stream_t stream)¶
-
virtual void
setRow
(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶ This should only be used for sparse matrix.
Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row.
-
virtual SparseValueType
getValueType
() const¶
-
virtual SparseFormat
getFormat
() const¶
-
const int *
getRowCols
(size_t x) const¶
-
const real *
getRowValues
(size_t x) const¶
-
size_t
getColNum
(size_t x) const¶
-
virtual void
print
(std::ostream &os) const¶ print out the values of elements to os
-
virtual void
zeroMem
()¶ only set value_ of FLOAT_VALUE sparse matrix to zero
-
void
add3
(GpuMatrix *b)¶ sparseMatrix += denseMatrix
Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function.
Only add value of same (row, col) index in dense matrix and do not use others values.
- Parameters
b
-dense matrix
-
virtual void
add3
(MatrixPtr b)¶ matrix elment-wise add
Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function.
-
virtual void
addBias
(Matrix &b, real scale)¶ sparseMatrix[i,j] += bias[j], (j is the col index of sparse matrix)
- Parameters
b
-bias, dense matrix and height = 1
scale
-scale of b
-
virtual int *
getRows
() const¶ return rows, which is gpu address
-
virtual int *
getCols
() const¶ return cols, which is gpu address
-
real *
getValue
() const¶ return value, which is gpu address
-
virtual real *
getData
()¶ return value_ of sparse matrix
Some times CpuSparseMatrix maybe Matrix, if getValue, must dynamic_cast to CpuSparseMatrix, getData is convenient to get value
-
virtual const real *
getData
() const¶
-
virtual void
rowMax
(IVector &maxIds, Matrix &maxVal)¶ Get top k value of each row in sparse matrix.
Store the value in maxVal and theirs index in maxIds. k = maxVal.width
- Parameters
maxIds
-index of top k
maxVal
-value of top k
-
virtual void
mul
(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶ this = scaleAB*(a*b) + scaleT*this
-
void
copyFrom
(CpuSparseMatrix &src, hl_stream_t stream)¶
-
void
copyFrom
(GpuSparseMatrix &src, hl_stream_t stream)¶
-
virtual void
trimFrom
(const CpuSparseMatrix &src)¶
-
void
trimFromCSR
(const CpuSparseMatrix &src)¶
-
void
trimFromCSC
(const CpuSparseMatrix &src)¶
-
virtual bool
isSparse
() const¶
Public Members
-
MemoryHandlePtr
sMemoryHandle_
¶
-
int *
rows_
¶
-
int *
cols_
¶
-
real *
value_
¶
-
const char *
end_
¶
-
hl_sparse_matrix_s_ptr
sMatrix_
¶
-
SparseValueType
valueType_
¶
-
SparseFormat
format_
¶
Protected Functions
-
void
sparseResize
()¶
-
void
copyRow
(int offsets, size_t colNum, const sparse_non_value_t *row)¶
-
void
copyRow
(int offsets, size_t colNum, const sparse_float_value_t *row)¶
-
-
typedef std::shared_ptr<_hl_sparse_matrix_s>
Functions
-
P_DECLARE_bool
(allow_inefficient_sparse_update)¶
-
namespace
paddle
¶ -
class
CacheRowCpuMatrix
¶ Inherits from paddle::SparseAutoGrowRowCpuMatrix
Public Functions
-
CacheRowCpuMatrix
(size_t height, size_t width, IndexDictPtr indexDictHandle = nullptr, bool trans = false)¶
-
void
setSourceData
(CpuVectorPtr sourceVec)¶
-
real *
getRow
(size_t row)¶
-
virtual real *
getRowBuf
(size_t row)¶
-
virtual void
mul
(CpuSparseMatrix *a, CpuMatrix *b, real scaleAB, real scaleT)¶
-
-
class
SparseAutoGrowRowCpuMatrix
¶ Inherits from paddle::SparseRowCpuMatrix
Subclassed by paddle::CacheRowCpuMatrix
-
class
SparsePrefetchRowCpuMatrix
¶ - #include <SparseRowMatrix.h>
For prefetching parameters from remote Parameter server.
Inherits from paddle::SparseRowCpuMatrix
Public Functions
-
SparsePrefetchRowCpuMatrix
(CpuMemHandlePtr dataHandle, size_t height, size_t width, IndexDictPtr indexDictHandle = nullptr, SyncThreadPool *pool = nullptr, bool trans = false)¶
-
void
addRows
(MatrixPtr input)¶ Extract feature ids from input, to fill row indexs.
input must be sparse matrix.
Can call many times before setup.
-
void
addRows
(IVectorPtr ids)¶
-
void
setupIndices
()¶ setup global indices of SparseRowMatrix after finish add rows.
Protected Functions
-
void
addRows
(const unsigned int *ids, size_t len)¶
Protected Attributes
-
SyncThreadPool *
pool_
¶
-
-
class
SparseRowCpuMatrix
¶ - #include <SparseRowMatrix.h>
Sparse Row
Inherits from paddle::CpuMatrix
Subclassed by paddle::SparseAutoGrowRowCpuMatrix, paddle::SparsePrefetchRowCpuMatrix
Public Functions
-
SparseRowCpuMatrix
(CpuMemHandlePtr dataHandle, size_t height, size_t width, IndexDictPtr indexDictHandle = nullptr, bool trans = false)¶ heightStore is max number of rows of the sparse matrix.
-
virtual
~SparseRowCpuMatrix
()¶
-
real *
getRow
(size_t row)¶ Get the row buf
- Parameters
row
-row id in the original matrix
-
real *
getLocalRow
(size_t row)¶ Get the row buf
- Parameters
row
-row id in local storage
-
void
reserveStore
()¶ reserve the storage for rows according to current size of indexDictHandle.
This is only used when SparseRowCpuMatrix is constructed with indexDictHandle.
-
virtual real *
getRowBuf
(size_t row)¶
-
virtual void
mul
(CpuSparseMatrix *a, CpuMatrix *b, real scaleAB, real scaleT)¶
-
virtual void
copyFrom
(const real *src, size_t size)¶ Fill data according to row indexs added, setup indices inside.
src and size are data and size of normal dense CpuMatrix.
-
virtual void
zeroMem
()¶
-
void
applyL1Decay
(real learningRate, real decayRate)¶ apply L1 to all sparse rows, should be apply after indices ready.
-
void
clearIndices
()¶
-
void
zeroMemThread
(size_t tid, size_t numThreads)¶
-
void
sgdUpdate
(BaseMatrix &value, IVector &t0, real learningRate, int currentTime, real decayRate, bool useL1, bool fini = false)¶ value -= grad * learningRate, this is gradient.
If L1 decay set use L1, else if L2 set use L2, otherwise no decay atall.
t0 is a int vector used by L1/L2 decay, size = height of parameter matrix, store the time that each weight row last updated.
Time is batchId, currentTime is current batchId.
While pass finished, caller should call this func one more time with (fini=true) to let weight decay catch up current time.
-
void
addTo
(BaseMatrix &dest, std::vector<uint32_t> &ids, size_t tid, size_t numThreads)¶ merge rows in this to dest for designated thread
values add to dest matrix
ids occured in this append to ids filtered by (id % numThreads == tid)
-
void
addTo
(SparseRowCpuMatrix &dest, size_t tid, size_t numThreads)¶ the second version addTo(), dest is a SparseRowCpuMatrix.
The dest’s indices should be setup already, addTo() will check src ids is exist in dest’s indices.
-
const IndexDictPtr &
getIndexDictHandle
() const¶
-
void
checkIndices
()¶ check all local and global indices consistency
-
void
checkIndex
(size_t i)¶ check whether row i exist in indices
-
std::vector<unsigned int> &
getLocalIndices
() const¶
Protected Functions
- template <typename Func>
-
void
apply
(Func f)¶
-
void
init
(size_t height, size_t width)¶
-
void
clearRows
()¶ clear row indices.
-
void
checkStoreSize
()¶
Protected Attributes
-
std::vector<real, AlignedAllocator<real, 32>>
rowStore_
¶
-
IndexDictPtr
indexDictHandle_
¶
-
std::vector<unsigned int> *
localIndices_
¶
-
unsigned int *
globalIndices_
¶
Protected Static Attributes
-
const unsigned int
kUnusedId_
¶
-
struct
IndexDict
¶
-
-
class
SparseRowIdsCpuMatrix
¶ - #include <SparseRowMatrix.h>
Sparse Row Ids Matrix.
mostly same as CpuMatrix, but maintain sparse row ids occured, ids are hashed by worker thread id.
Inherits from paddle::CpuMatrix
Public Functions
-
SparseRowIdsCpuMatrix
(CpuMemHandlePtr dataHandle, size_t height, size_t width, bool trans = false)¶
-
void
setNumOfThreads
(size_t numOfThreads)¶
-
std::vector<uint32_t> &
getIds
(size_t threadId)¶
Private Members
-
std::vector<std::vector<uint32_t>>
idsArray_
¶
-
-
class
-
namespace
paddle
¶ -
class
CpuSparseMatrix
¶ Inherits from paddle::Matrix
Public Functions
-
CpuSparseMatrix
(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat format = SPARSE_CSR, bool trans = false)¶
-
CpuSparseMatrix
(CpuMemHandlePtr memHandle, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans)¶
-
CpuSparseMatrix
(real *data, int *rows, int *cols, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans)¶
-
~CpuSparseMatrix
()¶
-
virtual void
resize
(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶ - Note
- This should only be used for sparse matrix.
-
virtual void
resize
(size_t newHeight, size_t newWidth)¶ - Note
- Original data may not be preserved after resize().
-
SparseValueType
getValueType
()¶
-
real *
getRowValues
(size_t i) const¶
-
int *
getRowCols
(size_t i) const¶
-
void
fillRowIndices
(IVectorPtr &outVec) const¶ fill row indices of each value in CSR matrix
-
size_t
getColNum
(size_t i) const¶
-
real *
getColumn
(size_t i) const¶
-
size_t
getColStartIdx
(size_t i) const¶
-
size_t
getRowStartIdx
(size_t i) const¶
-
size_t
getRowNum
(size_t i) const¶
-
virtual real
getSum
()¶
-
virtual void
square
()¶
-
virtual real
getMin
()¶ only consider nonzero values. the actual min value should compare with 0.0.
-
virtual real
getMax
()¶ only consider nonzero values. the actual max value should compare with 0.0.
-
virtual void
rowMax
(IVector &maxIds, Matrix &max)¶ Get the top k elements of each row of this matrix.
The column ids and values of these elements are stored in maxIds and max respectively. Note that the top k elements are not sorted.
-
virtual int *
getRows
() const¶
-
virtual int *
getCols
() const¶
-
real *
getValue
() const¶
-
virtual SparseFormat
getFormat
() const¶
-
virtual SparseValueType
getValueType
() const¶
-
virtual real *
getData
()¶ return value_ of sparse matrix
Some times CpuSparseMatrix maybe Matrix, if getValue, must dynamic_cast to CpuSparseMatrix, getData is convenient to get value
-
virtual const real *
getData
() const¶
-
virtual void
zeroMem
()¶ only set value_ of FLOAT_VALUE sparse matrix to zero
-
virtual void
transpose
(MatrixPtr matTrans, bool memAlloc)¶ mem MUST be alloced outside (memAlloc=false)
-
virtual void
mul
(MatrixPtr a, MatrixPtr b, real scaleAB, real scaleT)¶ this = scaleAB*(a*b) + scaleT*this
-
void
add3
(CpuMatrix *b)¶ sparseMatrix += denseMatrix
Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function.
Only add value of same (row, col) index in dense matrix and do not use others values whoes postions are not in sparse matirx.
- Parameters
b
-dense matrix
-
virtual void
add3
(MatrixPtr b)¶ matrix elment-wise add
Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function.
-
virtual void
addBias
(Matrix &b, real scale)¶ sparseMatrix[i,j] += bias[j], (j is the col index of sparse matrix)
- Parameters
b
-bias, dense matrix and height = 1
scale
-scale of b
-
virtual void
print
(std::ostream &os) const¶ print out the values of elements to os
-
virtual void
printOneRow
(std::ostream &os, size_t idx) const¶ print one row to os
-
virtual void
setRow
(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶ This should only be used for sparse matrix.
Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row.
-
virtual void
randomizeUniform
()¶
-
void
copyFrom
(const GpuSparseMatrix &src, hl_stream_t stream)¶
-
virtual void
copyFrom
(const Matrix &src, hl_stream_t stream = HPPL_STREAM_DEFAULT)¶
-
CpuSparseMatrixPtr
getTmpSparseMatrix
(size_t height, size_t width)¶ Get a temporary matrix. This is threadsafe. It should be only used temporarily, i.e. do not store it or use it as return value.
- Note
- Do NOT use large amount of tmp matrix.
-
void
copyFrom
(std::vector<int> &rows, std::vector<int> &cols, std::vector<real> &values)¶
-
void
copyFrom
(const CpuSparseMatrix &src)¶
-
virtual void
trimFrom
(const CpuSparseMatrix &src)¶
-
void
copyRow
(int offsets, size_t colNum, const sparse_non_value_t *row)¶
-
void
copyRow
(int offsets, size_t colNum, const sparse_float_value_t *row)¶
- template <class T>
-
void
copyFrom
(int64_t *ids, int64_t *indices, T *data)¶
- template <class T>
-
void
copyFrom
(int64_t *indices, T *data)¶
-
virtual void
copyFrom
(const real *src, size_t size)¶ If this is GpuMatrix, src is assumed to be CPU memory
If this is CpuMatrix, src is assumed to be CPU memory
-
virtual bool
isSparse
() const¶
Protected Functions
-
void
sparseResize
()¶
Protected Attributes
-
int *
rows_
¶
-
int *
cols_
¶
-
real *
value_
¶
-
SparseFormat
format_
¶
-
SparseValueType
valueType_
¶
Protected Static Attributes
-
const size_t
DEFAULT_AVG_WIDTH
¶
-
ThreadLocal<std::vector<CpuSparseMatrixPtr>>
cpuLocalMats_
¶
Private Functions
-
virtual MatrixPtr
clone
(size_t height = 0, size_t width = 0, bool useGpu = false)¶ Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this.
If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size.
-
-
class
Others¶
-
namespace
paddle
¶ Functions
- template <class T>
-
void
gemm
(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, const int M, const int N, const int K, const T alpha, const T *A, const int lda, const T *B, const int ldb, const T beta, T *C, const int ldc)¶
- template <class T>
-
void
axpy
(const int n, const T alpha, const T *x, T *y)¶
- template <class T>
-
T
dotProduct
(const int n, const T *x, const T *y)¶
-
namespace
paddle
¶ -
namespace
simd
¶ Functions
- template <typename Type>
-
void
addTo
(Type *a, const Type *b, size_t len)¶
- template <typename Type>
-
void
batchAddTo
(Type *a, const Type *b[], int batch, size_t len)¶
- template <typename Type>
-
void
colMax
(Type *result, const Type *data, int dim, int numSamples)¶
- template <typename Type>
-
void
decayL1
(Type *dst, Type *src, Type *lr, Type lambda, size_t len)¶
- template <typename Type>
-
void
decayL1
(Type *dst, Type *src, Type lambda, size_t len)¶
- template <size_t AlignSize>
-
bool
isPointerAlign
(void *ptr)¶
-
bool
vec_check
(size_t len)¶
- template <>
-
void
addTo
(float *a, const float *b, size_t len)¶
- template <>
-
void
batchAddTo
(float *a, const float *b[], int batch, size_t len)¶
- template <>
-
void
colMax
(float *result, const float *data, int dim, int numSamples)¶
- template <>
-
void
decayL1
(float *dst, float *src, float lambda, size_t len)¶
- template <>
-
void
decayL1
(float *dst, float *src, float *lr, float lambda, size_t len)¶
-
namespace
naive
¶ Functions
- template <typename Type>
-
void
addTo
(Type *a, const Type *b, size_t len)¶
- template <typename Type>
-
void
batchAddTo
(Type *a, const Type *b[], int batch, size_t len)¶
- template <typename Type>
-
void
colMax
(Type *result, const Type *data, int dim, int numSamples)¶ - Note
- this method is unused in paddle.
- template <typename Type>
-
void
decayL1
(Type *dst, Type *src, Type *lr, Type lambda, size_t len)¶
- template <class Type>
-
void
decayL1
(Type *dst, Type *src, Type lambda, size_t len)¶
-
namespace