Matrix¶
Base¶
Defines
- 
CAL_MATRIX_START_ADDRESS(address, height, width, ld, col, row)¶ Calculate matrix element address.
For instance, address of A[i][j] = i * ld + j.
- 
namespace 
paddle¶ Typedefs
- 
typedef bool_constant<bool, false> 
false_type¶ 
- 
typedef bool_constant<bool, true> 
true_type¶ 
- 
typedef BaseMatrixT<real> 
BaseMatrix¶ 
- 
typedef BaseMatrixT<int> 
IBaseMatrix¶ 
- 
class 
MatrixOffset¶ Public Functions
- 
MatrixOffset(size_t aCol = 0, size_t aRow = 0, size_t bCol = 0, size_t bRow = 0, size_t cCol = 0, size_t cRow = 0, size_t dCol = 0, size_t dRow = 0)¶ 
- 
 
- template <class T>
 - 
class 
BaseMatrixT¶ Subclassed by paddle::BaseVector< T >, paddle::Matrix
Public Functions
- 
virtual 
~BaseMatrixT()¶ 
- 
BaseMatrixT(size_t height, size_t width, T *data, bool trans, bool useGpu)¶ 
- 
BaseMatrixT(BaseMatrixT &mat, bool useGpu)¶ - Note
 - This constructor is for temporarily making a matrix with different useGpu flag as the original matrix so that mixed gpu/cpu operations can be performed successfully.
 
- 
BaseMatrixT(size_t height, size_t width, size_t stride, T *data, bool trans, bool use_gpu)¶ 
- 
void 
setData(T *data)¶ caller should make sure that the size of data is at least height*width
- template <class Op>
 - 
int 
applyUnary(Op op)¶ unary operator: element wise op(a).
for 0 <= i < this->height_ & for 0 <= j < this->width_.
- template <class Op>
 - 
int 
applyUnary(Op op, int numRows, int numCols, MatrixOffset &offset)¶ unary operator: element wise op(a).
for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*ld + offset.aCol_;
- template <class Op>
 - 
int 
applyBinary(Op op, BaseMatrixT &b)¶ binary operator: element wise op(a, b).
for 0 <= i < this->height_ & for 0 <= j < this->width_. While this->height_ == b.height_ && this->width_ == b.width_.
- template <class Op, class bAsRowVector, class bAsColVector>
 - 
int 
applyBinary(Op op, BaseMatrixT &b, int numRows, int numCols, MatrixOffset &offset, bAsRowVector, bAsColVector)¶ binary operator: element wise op(a, b)
for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*lda + offset.aCol_; B = b->data_ + offset.bRow_*ldb + offset.bCol_; if (bAsRowVector == false_type && bAsColVector == false_type) op(A[i * lda + j], B[i * ldb + j]) if (bAsRowVector == true_type && bAsColVector == false_type) op(A[i * lda + j], B[j]) if (bAsRowVector == false_type && bAsColVector == true_type) op(A[i * lda + j], B[i * ldb]) if (bAsRowVector == true_type && bAsColVector == true_type) op(A[i * lda + j], B[0])
- template <class Op>
 - 
int 
applyBinary(Op op, BaseMatrixT &b, int numRows, int numCols, MatrixOffset &offset)¶ 
- template <class Op>
 - 
int 
applyTernary(Op op, BaseMatrixT &b, BaseMatrixT &c)¶ ternary operator: element wise op(a, b, c).
for 0 <= i < this->height_ & for 0 <= j < this->width_. While this->height_ == b.height_ && this->width_ == b.width_ && this->height_ == c.height_ && this->width_ == c.width_
- template <class Op, class cAsRowVector, class cAsColVector>
 - 
int 
applyTernary(Op op, BaseMatrixT &b, BaseMatrixT &c, int numRows, int numCols, MatrixOffset &offset, cAsRowVector, cAsColVector)¶ ternary operator: element wise op(a, b, c).
for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*lda + offset.aCol_; B = b->data_ + offset.bRow_*ldb + offset.bCol_; C = c->data_ + offset.cRow_*ldc + offset.cCol_; if (cAsRowVector == false_type && cAsColVector == false_type) op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j]) if (cAsRowVector == true_type && cAsColVector == false_type) op(A[i*lda + j], B[i*ldb + j], C[j]) if (cAsRowVector == false_type && cAsColVector == true_type) op(A[i*lda + j], B[i*ldb + j], C[i*ldc]) if (cAsRowVector == 1 && cAsColVector == 1) op(A[i*lda + j], B[i*ldb + j], C[0])
- template <class Op>
 - 
int 
applyTernary(Op op, BaseMatrixT &b, BaseMatrixT &c, int numRows, int numCols, MatrixOffset &offset)¶ 
- template <class Op>
 - 
int 
applyQuaternary(Op op, BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶ quaternary operator: element wise op(a, b, c, d).
for 0 <= i < this->height_ & for 0 <= j < this->width_. While this->height_ == b.height_ && this->width_ == b.width_ && this->height_ == c.height_ && this->width_ == c.width_ && this->height_ == d.height_ && this->width_ == d.width_
- template <class Op>
 - 
int 
applyQuaternary(Op op, BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d, int numRows, int numCols, MatrixOffset &offset)¶ quaternary operator: element wise op(a, b, c, d).
for 0 <= i < numRows & for 0 <= j < numCols. While matrix start address is: A = this->data_ + offset.aRow_*lda + offset.aCol_; B = b->data_ + offset.bRow_*ldb + offset.bCol_; C = c->data_ + offset.cRow_*ldc + offset.cCol_; D = d->data_ + offset.dRow_*ldd + offset.dCol_;
- template <class Agg, class Op, class Saver, class aAsRowVector, class aAsColVector>
 - 
int 
aggregate(Agg agg, Op op, Saver sv, BaseMatrixT &b, int numRows, int numCols, MatrixOffset &offset, aAsRowVector, aAsColVector)¶ a aggregate expression that apply each row(or column) of matrix b. op and sv is element wise operator.
if (aAsRowVector == true_type && aAsColVector == false_type) for each column j & 0 <= i < numRows, do: dst = agg(op(b[i*ldb + j])) a[j] = sv(a[j], dst) if (aAsRowVector == false_type && aAsColVector == true_type) for each row i & 0 <= j < numCols, do: dst = agg(op(b[i*ldb + j])) a[i] = sv(a[i], dst)
- template <class Agg, class Op, class Saver, class aAsRowVector, class aAsColVector>
 - 
int 
aggregate(Agg agg, Op op, Saver sv, BaseMatrixT &b, BaseMatrixT &c, int numRows, int numCols, MatrixOffset &offset, aAsRowVector, aAsColVector)¶ a aggregate expression that apply each row(or column) of matrix b and c.
op and sv is element wise operator.
if (aAsRowVector == true_type && aAsColVector == false_type) for each column j & 0 <= i < numRows, do: dst = agg(op(b[i*ldb + j], c[i*ldc + j])) a[j] = sv(a[j], dst) if (aAsRowVector == false_type && aAsColVector == true_type) for each row i & 0 <= j < numCols, do: dst = agg(op(b[i*ldb + j], c[i*ldc + j])) a[i] = sv(a[i], dst)
- template <class Agg>
 - 
int 
applyRow(Agg agg, BaseMatrixT &b)¶ a aggregate expression that apply each row of matrix b.
for each row i & 0 <= j < b.width_, do: this[i] = agg(b[i*ldb + j])
- template <class Agg, class Saver>
 - 
int 
applyRow(Agg agg, Saver sv, BaseMatrixT &b)¶ a aggregate expression that apply each row of matrix b.
for each row i & 0 <= j < b.width_, do: dst = agg(b[i*ldb + j]) this[i] = sv(this[i], dst)
- template <class Agg>
 - 
int 
applyCol(Agg agg, BaseMatrixT &b)¶ a aggregate expression that apply each column of matrix b.
for each column j & 0 <= i < b.height_, do: this[j] = agg(b[i*ldb + j])
- template <class Agg, class Saver>
 - 
int 
applyCol(Agg agg, Saver sv, BaseMatrixT &b)¶ a aggregate expression that apply each column of matrix b.
for each column j & 0 <= i < b.height_, do: dst = agg(b[i*ldb + j]) this[j] = sv(this[j], dst)
- 
bool 
useGpu() const¶ 
- 
const T *
rowBuf(size_t row) const¶ 
- 
T *
rowBuf(size_t row)¶ 
- 
void 
neg()¶ unary operator.
- 
void 
exp()¶ 
- 
void 
pow(T p)¶ 
- 
void 
log()¶ 
- 
void 
sqrt()¶ 
- 
void 
square()¶ 
- 
void 
reciprocal()¶ 
- 
void 
abs()¶ 
- 
void 
sign()¶ 
- 
void 
zero()¶ 
- 
void 
zeroAtOffset(int64_t columnOffset, int64_t numColumns)¶ this(row, col + columnOffset) = 0 for 0 <= col < numColumns
- 
void 
one()¶ 
- 
void 
subScalar(T p)¶ 
- 
void 
mulScalar(T p)¶ 
- 
void 
divScalar(T p)¶ 
- 
void 
assign(T p)¶ this = p
- 
void 
add(T p)¶ this = this + p
- 
void 
add(T p1, T p2)¶ this = this*p1 + p2
- 
void 
clip(T p1, T p2)¶ this = this < low ? low : this
this = this > high ? high : this
- 
void 
biggerThanScalar(T p)¶ a = a > p ? 1.0f : 0.0f
- 
void 
downClip(T p)¶ a = a > p ? a : p
- 
void 
assign(BaseMatrixT &b)¶ this = b
- 
void 
assignAtOffset(BaseMatrixT &b, int64_t columnOffset)¶ If b.width + columOffset <= this.width this(row, col + columnOffset) = b(row, col) for 0 <= col < b.width If this.width + columnOffset <= b.width this(row, col) = b(row, col + columnOffset) for 0 <= col < this.width Otherwise, FATAL
- 
void 
add(BaseMatrixT &b)¶ this = this + b
- 
void 
addAtOffset(BaseMatrixT &b, int64_t columnOffset)¶ If b.width + columOffset <= this.width this(row, col + columnOffset) += b(row, col) for 0 <= col < b.width If this.width + columnOffset <= b.width this(row, col) += b(row, col + columnOffset) for 0 <= col < this.width Otherwise, FATAL
- 
void 
addColVector(BaseMatrixT &b)¶ 
- 
void 
addRowVector(BaseMatrixT &b)¶ 
- 
void 
addBias(BaseMatrixT &b, T scale)¶ 
- 
void 
mulRowVector(BaseMatrixT &b)¶ 
- 
void 
divRowVector(BaseMatrixT &b)¶ 
- 
void 
addP2P(BaseMatrixT &b)¶ 
- 
void 
add(BaseMatrixT &b, T p)¶ this = this + b*p
- 
void 
add(BaseMatrixT &b, T p1, T p2)¶ this = p1*this + p2*b
- 
void 
sub(BaseMatrixT &b)¶ this = this - b
- 
void 
sub(BaseMatrixT &b, T p)¶ this = this - b*p
- 
void 
relu(BaseMatrixT &b)¶ b = max(0, this)
- 
void 
reluDerivative(BaseMatrixT &b)¶ 
- 
void 
softrelu(BaseMatrixT &b)¶ b = log(1.0 + exp(this))
- 
void 
softreluDerivative(BaseMatrixT &b)¶ 
- 
void 
brelu(BaseMatrixT &b)¶ b = min(max(this, p1), p2)
- 
void 
breluDerivative(BaseMatrixT &b)¶ 
- 
void 
square(BaseMatrixT &b)¶ b = this * this
- 
void 
squareDerivative(BaseMatrixT &b)¶ 
- 
void 
tanh(BaseMatrixT &b)¶ b = tanh(this)
- 
void 
tanhDerivative(BaseMatrixT &b)¶ 
- 
void 
scaledTanh(BaseMatrixT &b, T p1, T p2)¶ b = p1 * tanh(p2 * this)
- 
void 
scaledTanhDerivative(BaseMatrixT &b, T p1, T p2)¶ 
- 
void 
reciprocal(BaseMatrixT &b)¶ b = 1.0f / this
- 
void 
reciprocalDerivative(BaseMatrixT &b)¶ 
- 
void 
abs(BaseMatrixT &b)¶ b = this > 0.0f ? this : -this
- 
void 
absDerivative(BaseMatrixT &b)¶ 
- 
void 
sigmoid(BaseMatrixT &b)¶ b = 1.0f / (1.0f + exp(-this))
- 
void 
sigmoidDerivative(BaseMatrixT &b)¶ 
- 
void 
expDerivative(BaseMatrixT &b)¶ b = a
- 
void 
sign(BaseMatrixT &b)¶ 
- 
void 
exp(BaseMatrixT &b)¶ 
- 
void 
pow(BaseMatrixT &b, T p)¶ 
- 
void 
log(BaseMatrixT &b)¶ 
- 
void 
sqrt(BaseMatrixT &b)¶ 
- 
void 
addScalar(BaseMatrixT &b, T p)¶ 
- 
void 
subScalar(BaseMatrixT &b, T p)¶ 
- 
void 
mulScalar(BaseMatrixT &b, T p)¶ 
- 
void 
divScalar(BaseMatrixT &b, T p)¶ 
- 
void 
scalarDiv(BaseMatrixT &b, T p)¶ 
- 
void 
invSqrt(BaseMatrixT &b)¶ this = 1.0f / sqrt(b)
- 
void 
isEqualTo(BaseMatrixT &b, T value)¶ this = (b == value)
- 
void 
softCrossEntropy(BaseMatrixT &b, BaseMatrixT &c)¶ ternary operator.
- 
void 
softCrossEntropyBp(BaseMatrixT &b, BaseMatrixT &c)¶ 
- 
void 
binaryLabelCrossEntropy(BaseMatrixT &b, BaseMatrixT &c)¶ 
- 
void 
binaryLabelCrossEntropyBp(BaseMatrixT &b, BaseMatrixT &c)¶ 
- 
void 
add(BaseMatrixT &b, BaseMatrixT &c)¶ this = b + c
- 
void 
add(BaseMatrixT &b, T p1, BaseMatrixT &c, T p2)¶ this = b*p1 + c*p2
- 
void 
sub(BaseMatrixT &b, BaseMatrixT &c)¶ this = b - c
- 
void 
sub(BaseMatrixT &b, T p1, BaseMatrixT &c, T p2)¶ this = b*p1 - c*p2
- 
void 
add2(BaseMatrixT &b, BaseMatrixT &c)¶ this = this + b + c
- 
void 
add2(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2, T p3)¶ this = this*p1 + b*p2 + c*p3
- 
void 
add3(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d, T p1, T p2, T p3)¶ this = a*p1 + b*p2 + c*p3
- 
void 
sgdUpdate(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2, T p3)¶ c = p2 * c - p1 * (b + p3 * this) this += mom
- 
void 
sgdUpdate(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d, T p1, T p2, T p3)¶ c = p2 * c - p1 * d * (b + p3 * this) this += mom
- 
void 
applyL1(T learningRate, T decayRate)¶ apply L1/L2 to this
- 
void 
applyL1(BaseMatrixT &lr, T learningRate, T decayRate)¶ 
- 
void 
applyL2(T learningRate, T decayRate)¶ 
- 
void 
applyL2(BaseMatrixT &lr, T learningRate, T decayRate)¶ 
- 
void 
dotMul(BaseMatrixT &b)¶ this *= b
- 
void 
dotMul(BaseMatrixT &b, BaseMatrixT &c)¶ this = b * c
- 
void 
dotDiv(BaseMatrixT &b, BaseMatrixT &c)¶ this = b / c
- 
void 
dotDiv(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this = (b + p1) / (c + p2)
- 
void 
rankLoss(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶ this = log(1 + exp(b - c)) - d * (b - c)
- 
void 
rankLossBp(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶ 
- 
void 
logisticRegressionLoss(BaseMatrixT &b, BaseMatrixT &c)¶ this = log(1 + exp(b)) - c * b
- 
void 
logisticRegressionLossBp(BaseMatrixT &b, BaseMatrixT &c)¶ this += exp(b)/(1+exp(b)) - c
- 
void 
biggerThan(BaseMatrixT &b, BaseMatrixT &c)¶ this = b > c ? 1.0 : 0.0
- 
void 
biggerThan(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT &d)¶ this = ((b>c && d>0.5) || (b<c && d<0.5)) ? 1 : 0)
- 
void 
max(BaseMatrixT &b, BaseMatrixT &c)¶ this = b>c ? b : c
- 
void 
binaryClassificationError(size_t destCol, BaseMatrixT &b, BaseMatrixT &c, T p)¶ this[destCol] += (b>p1 == c>p1) ? 0 : 1)
- 
void 
binaryClassificationError2(size_t destCol, BaseMatrixT &b, BaseMatrixT &c, T p)¶ 
- 
void 
dotMulSquare(BaseMatrixT &b)¶ this = this * b * b
- 
void 
dotSquareMul(BaseMatrixT &b)¶ this = this * this * b
- 
void 
dotMulSquare(BaseMatrixT &b, BaseMatrixT &c)¶ this = b * c * c
- 
void 
dotSquareSquare(BaseMatrixT &b, BaseMatrixT &c)¶ this = b * b * c * c
- 
void 
dotMulSquareSum(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this = this * (p1*b + p2*c)^2
- 
void 
dotSquareSum(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this = (p1*b + p2*c)^2
- 
void 
dotMulSum(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this= this * (p1*b + p2*c)
- 
void 
addSquareSum(BaseMatrixT &b, BaseMatrixT &c, BaseMatrixT d, T p1, T p2, T p3)¶ this += sqr(p1*b + p2*c + p3*d)
- 
void 
addSquare(BaseMatrixT &b, T p)¶ this += p * sqr(b)
- 
void 
decayAddSquare(BaseMatrixT &b, T p1, T p2)¶ this = p1 * this + p2 * sqr(b)
- 
void 
decayAddSquareMul(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this = p1 * this + p2 * sqr(b * c)
- 
void 
reciprocal(BaseMatrixT &b, T p1, T p2)¶ this = 1 / (p1 * b + p2)
- 
void 
reciprocalSum(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2, T p3)¶ this = 1 / (p1 * b + p2 * c + p3)
- 
void 
copyAndClear(BaseMatrixT &b)¶ b = this; this = 0
- 
void 
rowDotMul(size_t destCol, BaseMatrixT &b, BaseMatrixT &c)¶ this_row[destCol] += dotprod(b_row, c_row)
- 
void 
rowDotMul2(size_t destCol, BaseMatrixT &b, BaseMatrixT &c)¶ 
- 
void 
addDotMulVMM(BaseMatrixT &b, BaseMatrixT &c)¶ this is vector (one row matrix)
for each row i, do: this_row += dotmul(b_row_i, c_row_i)
- 
void 
addDotMulVMM2(BaseMatrixT &b, BaseMatrixT &c)¶ 
- 
void 
addDotMulMMV(BaseMatrixT &b, BaseMatrixT &c)¶ c is vector (one row matrix)
for each row i, do: this_row_i += dotmul(b_row_i, c_row)
- 
void 
addDotMulMMV2(BaseMatrixT &b, BaseMatrixT &c)¶ 
- 
void 
addDotMul(BaseMatrixT &b, BaseMatrixT &c, T p1, T p2)¶ this = p1 * this + p2 * b * c
- 
void 
rowScale(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶ this_row = b_row * c_row[cCol]
- 
void 
rowScale2(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶ 
- 
void 
colScale(size_t cRow, BaseMatrixT &b, BaseMatrixT &c)¶ this_col = b_col * c_col[cRow]
- 
void 
addColScale(size_t cRow, BaseMatrixT &b, BaseMatrixT &c)¶ this_col += b_col * c_col[cRow]
- 
void 
addRowScale(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶ this_row += b_row * c_row[cCol]
- 
void 
sumRows(BaseMatrixT &b)¶ calculate the sum of each row of the matrix b.
- 
void 
maxRows(BaseMatrixT &b)¶ calculate the maximum value of each row of the matrix b.
- 
void 
minRows(BaseMatrixT &b)¶ calculate the minimum value of each row of the matrix b.
- 
void 
sumCols(BaseMatrixT &b)¶ calculate the sum of each column of the matrix b.
- 
void 
maxCols(BaseMatrixT &b)¶ calculate the maximum value of each column of the matrix b.
- 
void 
minCols(BaseMatrixT &b)¶ calculate the minimum value of each column of the matrix b.
- 
void 
sumCols(BaseMatrixT &b, T scale)¶ 
- 
void 
sumOfSquares(BaseMatrixT &b, BaseMatrixT &c)¶ calculate the sum of each row of (b - c)^2.
- 
void 
rowAdd(size_t cCol, BaseMatrixT &b, BaseMatrixT &c, T p)¶ this_row = b_row + p * ones * c_row[cCol]
- 
void 
rowPow(size_t cCol, BaseMatrixT &b, BaseMatrixT &c)¶ this_row = pow(b_row, c_row[cCol])
- 
virtual bool 
isSparse() const¶ 
- 
virtual 
 
- 
typedef bool_constant<bool, false> 
 
Sparse Matrix¶
- 
namespace 
paddle¶ Typedefs
- 
typedef std::shared_ptr<GpuSparseMatrix> 
GpuSparseMatrixPtr¶ 
- 
typedef std::shared_ptr<CpuSparseMatrix> 
CpuSparseMatrixPtr¶ 
Enums
- 
enum 
SparseFormat¶ matrix sparse_format .
nnz represents nonzero number in sparse matrix.
SPARSE_CSR: row major matrix. length of row is height_ + 1, each element represents row start index in Matrix. length of col and value are nnz.
SPARSE_CSC: col major matrix. length of col is width_ + 1, each element represents col start index in Matrix. length of col and value are nnz.
for example: [0, 1, 0, 2, 0; 1, 0, 0, 0, 0; 0, 0, 0, 2, 5]; SPARSE_CSR row [0, 2, 3, 5]; col [1, 3, 0, 3, 4]; value [1, 2, 1, 2, 5] SPARSE_CSC col [0, 1, 2, 2, 4, 5]; row [1, 0, 0, 2, 2]; value [1, 1, 2, 2, 5]
Values:
- 
SPARSE_CSR= 0¶ 
- 
SPARSE_CSC= 1¶ 
- 
 
- 
class 
Matrix¶ - #include <Matrix.h>
Copy or assignemnt constructor will share the data as opposed to making a copy of the original data. To make a copy of the orinal data, use copyFrom() instead.
Inherits from paddle::BaseMatrixT< real >
Subclassed by paddle::CpuMatrix, paddle::CpuSparseMatrix, paddle::GpuMatrix, paddle::GpuSparseMatrix
Public Functions
- 
virtual 
~Matrix()¶ 
- 
void 
setData(real *data)¶ set the data buffer used to hold the matrix data.
caller should make sure that the size of data is at least sizeof(real)*height*width.
- 
void 
setData(real *data, size_t newHeight, size_t newWidth)¶ the data should be contiguous
- 
size_t 
getWidth() const¶ 
- 
size_t 
getHeight() const¶ 
- 
size_t 
getStride() const¶ 
- 
size_t 
getElementCnt() const¶ 
- 
virtual real *
getData()¶ 
- 
virtual const real *
getData() const¶ 
- 
bool 
isTransposed() const¶ 
- 
bool 
isContiguous() const¶ 
- 
virtual int *
getRows() const¶ 
- 
virtual int *
getCols() const¶ 
- 
virtual SparseFormat 
getFormat() const¶ 
- 
virtual SparseValueType 
getValueType() const¶ 
- 
virtual void 
add3(MatrixPtr b)¶ matrix elment-wise add
Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function.
- 
MemoryHandlePtr 
getMemoryHandle() const¶ 
- 
virtual void 
zeroMem()¶ 
- 
virtual void 
resetOne()¶ 
- 
virtual void 
trimFrom(const CpuSparseMatrix &src)¶ 
- 
virtual void 
copyFrom(const Matrix &src, hl_stream_t stream)¶ 
- 
MatrixPtr 
subMatrix(size_t startRow, size_t endRow, size_t startCol, size_t endCol)¶ 
- 
virtual void 
copyFrom(const real *src, size_t size)¶ If this is GpuMatrix, src is assumed to be CPU memory
If this is CpuMatrix, src is assumed to be CPU memory
- 
virtual void 
copyFrom(const real *src, const int64_t *seq)¶ 
- 
virtual void 
copyFrom(const IVector &src)¶ convert a int vector to a real matrix.
(1) source and dest are both in CPU.
(2) sizes are exactly match.
- 
virtual MatrixPtr 
clone(size_t height = 0, size_t width = 0, bool useGpu = false)¶ Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this.
If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size.
- 
virtual real *
getRowBuf(size_t row)¶ 
- 
virtual real 
getElement(size_t x, size_t y) const¶ 
- 
virtual real 
getSum()¶ 
- 
virtual real 
getAbsSum()¶ 
- 
virtual void 
resize(size_t newHeight, size_t newWidth) = 0¶ - Note
 - Original data may not be preserved after resize().
 
- 
virtual void 
resize(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format) = 0¶ - Note
 - This should only be used for sparse matrix.
 
- 
virtual void 
setRow(size_t row, size_t colNum, const unsigned int *cols, const real *values) = 0¶ This should only be used for sparse matrix.
Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row.
- 
virtual void 
transpose(MatrixPtr matTrans, bool memAlloc)¶ hard transpose.
allocate matTrans’ memory outside, then set memAlloc as false; else set as true.
- 
virtual void 
clear()¶ Only set all variables to 0 or NULL but not free them.
- 
void 
reshape(size_t height, size_t width)¶ 
- 
virtual void 
mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶ this = scaleAB*(a*b) + scaleT*this
- 
virtual void 
addColumnVector(const Matrix &b)¶ Add a vector (column) b to matrix a, column by column.
- 
virtual void 
addByBitCode(size_t numClasses, const IVector &codes, const Matrix &vec)¶ For j < codeLength: this(i, j) += vec(index(i, j), 0) where index(i, j) = ((codes(i) + numClasses) >> (j + 1)) - 1
- 
virtual void 
addByBitCodeBackward(size_t numClasses, const IVector &codes, Matrix &vec)¶ For j < codeLength: vec(index(i, j), 0) += this(i, j) where index is same as the index for addByBitCode
- 
virtual void 
mulByBitCode(size_t numClasses, const IVector &codes, const Matrix &mat, const Matrix &input)¶ For j < codeLength: this(i, j) += <mat.row(index(i, j)), input.row(i)> where index is same as the index for addByBitCode
- 
virtual void 
mulByBitCodeBackwardWeight(size_t numClasses, const IVector &codes, Matrix &mat, const Matrix &input)¶ For j < codeLength: mat.row(index(i, j)) += this(i, j) * input.row(i) where index is same as the index for addByBitCode
- 
virtual void 
mulByBitCodeBackwardError(size_t numClasses, const IVector &codes, const Matrix &mat, Matrix &input)¶ For j < codeLength: input.row(i) += this(i, j) * mat.row(index(i, j)) where index is same as the index for addByBitCode
- 
virtual void 
sumByBitCode(size_t numClasses, IVector &codes, Matrix &sum, real scaleSum)¶ For j < codeLength sum(i, 0) = scaleSum * \sum_j bit(i, j) * this(i, j) where bit(i, j) = ((codes(i) + numClasses) & 2^j) ? 1 : 0
- 
virtual void 
subByBitCode(size_t numClasses_, IVector &codes)¶ For j < codeLength this(i, j) -= bit(i, j) where bit(i, j) is same as that for sumByBitCode
- 
virtual void 
colMax(IVector &maxIds, Matrix &maxVal)¶ Get the top k elements of each column of this matrix.
The row ids and values of these elements are stored in maxIds and max respectively. where k is the size of maxIds. And note that the top k elements are not sorted.
- 
virtual void 
rowMax(IVector &maxIds, Matrix &max)¶ Get the top k elements of each row of this matrix.
The column ids and values of these elements are stored in maxIds and max respectively. where k is the size of maxIds. And note that the top k elements are not sorted.
- 
virtual void 
oneHotCrossEntropy(Matrix &output, IVector &label)¶ copy -log(output[label]) to this->data[i].
- 
virtual void 
oneHotCrossEntropyBp(Matrix &outputV, IVector &label)¶ calculate the error of outputV according to label.
- 
virtual void 
oneHotCrossEntropyWithSelfNorm(Matrix &output, IVector &label, real alpha)¶ copy -log(output[label]) to this->data[i].
- 
virtual void 
oneHotCrossEntropyWithSelfNormBp(Matrix &outputV, IVector &label, real alpha)¶ calculate the error of outputV according to label.
- 
virtual void 
circularConv(Matrix &b, Matrix &c)¶ - \[ a[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} b_{i+j} * c_{j} \]
b contains M elements, c contains N elements (N is odd), b’s index arithmetic is computed modulo M, c’s index arithmetic is computed modulo N.
 
- 
virtual void 
circularConvDerivative(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2)¶ 
- 
virtual void 
cosSim(Matrix &output1, Matrix &output2, real scale = 1.0f)¶ cosine similarity, for each row i, this[i] = cos(output1[i], output2[i])
output2 can only have one row, then for each row i, this[i] = cos(output1[i], output2[0])
- 
virtual void 
cosSimDerivative(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2, real scale = 1.0f)¶ 
- 
virtual void 
print(std::ostream &os) const¶ print out the values of elements to os
- 
virtual void 
print(std::ostream &os, size_t height, size_t width) const¶ print a part of the matrix from the (top,left) value to the (height, width) value (not included)
- 
virtual void 
printOneRow(std::ostream &os, size_t idx) const¶ print one row to os
- 
virtual real 
getMin()¶ 
- 
virtual real 
getMax()¶ 
- 
virtual void 
randomizeUniform()¶ 
- 
virtual void 
classificationError(MatrixPtr output, IVectorPtr label)¶ calulate the error of classification
output[i] = 1 if row i is an error.
output[i] = 0 if row i is correct.
- 
virtual void 
convExpand(Matrix &feature, int feaImgHeight, int feaImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW)¶ This function is used to calculate the convolution:
It will expand a feature matrix according to the convolution filters
- 
virtual void 
convShrink(Matrix &expandColMat, int thisImgHeight, int thisImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW, real alpha = 1.0f, real beta = 0.0f)¶ This function is the reverse implementation of convExpand:
Its function is to restore a expanded-matrix into a feature matrix
- 
virtual void 
maxPoolForward(Matrix &inputMat, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶ Pooling forward operation, pick out the largest element in the sizeX of value
- 
virtual void 
maxPoolBackward(Matrix &image, size_t imgSizeH, size_t imgSizeW, Matrix &outGrad, Matrix &outV, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶ Pooling backward operation.
- 
virtual void 
avgPoolForward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶ Pooling forward operation, caculate the average of sizeX elements.
- 
virtual void 
avgPoolBackward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶ 
- 
virtual void 
crossMapNormalFwd(Matrix &input, size_t imgSizeH, size_t imgSizeW, Matrix &denoms, size_t channels, size_t sizeX, float scale, float pow)¶ normalize-operation.
- 
virtual void 
crossMapNormalBwd(Matrix &localGrad, Matrix &denoms, Matrix &preOutV, Matrix &localOutV, size_t channels, size_t imgSizeH, size_t imgSizeW, size_t size, float scale, float pow)¶ 
- 
virtual void 
maxSequenceForward(Matrix &input, const IVector &sequence, IVector &index)¶ Input: one or more sequences. Each sequence contains some instances.
Output: output size is the number of input sequences (NOT input instances).
output[i] is set to max_input[i].
- 
virtual void 
contextProjectionForward(MatrixPtr input, MatrixPtr weight, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶ 
- 
virtual void 
contextProjectionBackward(MatrixPtr inputGrad, MatrixPtr weightGrad, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶ 
- 
virtual void 
contextProjectionBackwardData(MatrixPtr inputGrad, const IVector &sequence, int contextLength, int contextStart)¶ 
- 
virtual void 
contextProjectionBackwardWeight(MatrixPtr weightGrad, const IVector &sequence, int contextLength, int contextStart, int totalPad, size_t beginPad)¶ 
- 
virtual void 
selectRows(Matrix &table, IVector &ids)¶ this.row[i] += table.row[ids[i]] if ids[i] == -1, it will be ignored
- 
virtual void 
addToRows(Matrix &table, IVector &ids)¶ table.row[ids[i]] += this.row[i] if ids[i] == -1, it will be ignored
- 
virtual void 
multiBinaryLabelCrossEntropy(Matrix &output, Matrix &label)¶ cross entropy for multi binary labels
this[i] = -sum(label[i][j]*log(output[i][j]) + (1-label[i][j])*log(1-output[i][j]))
- 
virtual void 
multiBinaryLabelCrossEntropyBp(Matrix &output, Matrix &label)¶ The gradient of cross entropy for multi binary labels on output.
this[i][j] = -label[i][j]/output[i][j] + (1-label[i][j])/(1-output[i][j])
Public Static Functions
- 
MatrixPtr 
create(MemoryHandlePtr memHandle, size_t height, size_t width, bool trans = false)¶ 
- 
MatrixPtr 
create(size_t height, size_t width, bool trans = false, bool useGpu = false)¶ 
- 
MatrixPtr 
create(real *data, size_t height, size_t width, bool trans = false, bool useGpu = false)¶ 
- 
MatrixPtr 
create(real *data, size_t height, size_t width, size_t stride, bool trans = false, bool useGpu = false)¶ 
- 
MatrixPtr 
createSparseMatrix(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, bool trans = false, bool useGpu = false)¶ 
- 
MatrixPtr 
createSparseMatrix(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat foramt = SPARSE_CSR, bool trans = false, bool useGpu = false)¶ 
- 
MatrixPtr 
createSparseMatrix(real *data, int *row, int *col, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans, bool useGpu)¶ 
- 
void 
resizeOrCreateSparseMatrix(MatrixPtr &matrix, size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat foramt = SPARSE_CSR, bool trans = false, bool useGpu = false)¶ 
- 
void 
resizeOrCreate(MatrixPtr &a, size_t height, size_t width, bool trans = false, bool useGpu = false)¶ 
 - 
virtual 
 
- 
class 
GpuMatrix¶ Inherits from paddle::Matrix
Public Functions
- 
GpuMatrix()¶ 
- 
GpuMatrix(size_t height, size_t width, bool trans = false)¶ 
- 
GpuMatrix(real *data, size_t height, size_t width, bool trans = false)¶ 
- 
GpuMatrix(real *data, size_t height, size_t width, size_t stride, bool trans = false)¶ 
- 
GpuMatrix(GpuMemHandlePtr dataHandle, size_t height, size_t width, bool trans = false)¶ 
- 
~GpuMatrix()¶ 
- 
virtual void 
zeroMem()¶ 
- 
virtual void 
resetOne()¶ 
- 
virtual void 
resize(size_t newHeight, size_t newWidth)¶ - Note
 - Original data may not be preserved after resize().
 
- 
virtual void 
resize(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶ - Note
 - This should only be used for sparse matrix.
 
- 
virtual void 
setRow(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶ This should only be used for sparse matrix.
Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row.
- 
virtual void 
copyFrom(const real *hostSrc, size_t size)¶ Copy the data from cpu_memory buffer
- 
virtual void 
copyFrom(const real *hostSrc, const int64_t *seq)¶ 
- 
virtual void 
copyFrom(const Matrix &src, hl_stream_t stream)¶ 
- 
virtual void 
copyFrom(const IVector &src)¶ convert a int vector to a real matrix.
(1) source and dest are both in CPU.
(2) sizes are exactly match.
- 
virtual MatrixPtr 
clone(size_t height, size_t width, bool useGpu = false)¶ Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this.
If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size.
- 
virtual real 
getElement(size_t x, size_t y) const¶ 
- 
real *
getRow(size_t row)¶ 
- 
virtual real *
getRowBuf(size_t row)¶ 
- 
virtual real 
getSum()¶ 
- 
virtual real 
getAbsSum()¶ 
- 
virtual void 
transpose(MatrixPtr matTrans, bool memAlloc)¶ hard transpose.
allocate matTrans’ memory outside, then set memAlloc as false; else set as true.
- 
virtual void 
addColumnVector(const Matrix &b)¶ Add a vector (column) b to matrix a, column by column.
- 
virtual void 
mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶ this = scaleAB*(a*b) + scaleT*this
- 
void 
mul(const GpuSparseMatrix &a, const GpuMatrix &b, real scaleAB, real scaleT)¶ 
- 
void 
mul(const GpuMatrix &a, const GpuSparseMatrix &b, real scaleAB, real scaleT)¶ 
- 
virtual void 
rowMax(IVector &maxIds, Matrix &max)¶ Get the top k elements of each row of this matrix.
The column ids and values of these elements are stored in maxIds and max respectively. where k is the size of maxIds. And note that the top k elements are not sorted.
- 
virtual void 
colMax(IVector &maxIds, Matrix &maxVal)¶ Get the top k elements of each column of this matrix.
The row ids and values of these elements are stored in maxIds and max respectively. where k is the size of maxIds. And note that the top k elements are not sorted.
- 
virtual void 
oneHotCrossEntropy(Matrix &output, IVector &label)¶ copy -log(output[label]) to this->data[i].
- 
virtual void 
oneHotCrossEntropyBp(Matrix &outputV, IVector &label)¶ calculate the error of outputV according to label.
- 
virtual void 
oneHotCrossEntropyWithSelfNorm(Matrix &output, IVector &label, real alpha)¶ copy -log(output[label]) to this->data[i].
- 
virtual void 
oneHotCrossEntropyWithSelfNormBp(Matrix &outputV, IVector &label, real alpha)¶ calculate the error of outputV according to label.
- 
virtual void 
cosSim(Matrix &output1, Matrix &output2, real scale)¶ cosine similarity, for each row i, this[i] = cos(output1[i], output2[i])
output2 can only have one row, then for each row i, this[i] = cos(output1[i], output2[0])
- 
virtual void 
cosSimDerivative(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2, real scale)¶ 
- 
virtual void 
print(std::ostream &os) const¶ print out the values of elements to os
- 
virtual void 
print(std::ostream &os, size_t height, size_t width) const¶ print a part of the matrix from the (top,left) value to the (height, width) value (not included)
- 
virtual void 
randomizeUniform()¶ 
- 
virtual void 
classificationError(MatrixPtr output, IVectorPtr label)¶ calulate the error of classification
output[i] = 1 if row i is an error.
output[i] = 0 if row i is correct.
- 
virtual void 
convExpand(Matrix &feature, int feaImgHeight, int feaImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW)¶ This function is used to calculate the convolution:
It will expand a feature matrix according to the convolution filters
- 
virtual void 
convShrink(Matrix &expandColMat, int thisImgHeight, int thisImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW, real alpha = 1.0f, real beta = 0.0f)¶ This function is the reverse implementation of convExpand:
Its function is to restore a expanded-matrix into a feature matrix
- 
virtual void 
maxPoolForward(Matrix &inputMat, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶ Pooling forward operation, pick out the largest element in the sizeX of value
- 
virtual void 
maxPoolBackward(Matrix &image, size_t imgSizeH, size_t imgSizeW, Matrix &outGrad, Matrix &outV, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶ Pooling backward operation.
- 
virtual void 
avgPoolForward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶ Pooling forward operation, caculate the average of sizeX elements.
- 
virtual void 
avgPoolBackward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶ 
- 
virtual void 
crossMapNormalFwd(Matrix &input, size_t imgSizeH, size_t imgSizeW, Matrix &denoms, size_t channels, size_t sizeX, float scale, float pow)¶ normalize-operation.
- 
virtual void 
crossMapNormalBwd(Matrix &localGrad, Matrix &denoms, Matrix &preOutV, Matrix &localOutV, size_t channels, size_t imgSizeH, size_t imgSizeW, size_t sizeX, float scale, float pow)¶ 
- 
virtual void 
maxSequenceForward(Matrix &input, const IVector &sequence, IVector &index)¶ Input: one or more sequences. Each sequence contains some instances.
Output: output size is the number of input sequences (NOT input instances).
output[i] is set to max_input[i].
- 
virtual void 
contextProjectionForward(MatrixPtr input, MatrixPtr weight, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶ 
- 
 
- 
class 
CpuMatrix¶ Inherits from paddle::Matrix
Subclassed by paddle::SharedCpuMatrix, paddle::SparseRowCpuMatrix, paddle::SparseRowIdsCpuMatrix
Public Functions
- 
CpuMatrix(real *data, size_t height, size_t width, bool trans = false)¶ 
- 
CpuMatrix(real *data, size_t height, size_t width, size_t stride, bool trans = false)¶ 
- 
CpuMatrix(CpuMemHandlePtr dataHandle, size_t height, size_t width, bool trans = false)¶ 
- 
~CpuMatrix()¶ 
- 
virtual void 
zeroMem()¶ 
- 
virtual void 
resetOne()¶ 
- 
virtual void 
resize(size_t newHeight, size_t newWidth)¶ - Note
 - Original data may not be preserved after resize().
 
- 
virtual void 
resize(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶ - Note
 - This should only be used for sparse matrix.
 
- 
virtual void 
setRow(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶ This should only be used for sparse matrix.
Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row.
- 
virtual real 
getElement(size_t x, size_t y) const¶ 
- 
virtual real 
getSum()¶ 
- 
virtual real 
getAbsSum()¶ 
- 
virtual void 
transpose(MatrixPtr matTrans, bool memAlloc)¶ hard transpose.
allocate matTrans’ memory outside, then set memAlloc as false; else set as true.
- 
virtual void 
copyFrom(const Matrix &src, hl_stream_t stream)¶ 
- 
virtual void 
copyFrom(const real *src, size_t size)¶ If this is GpuMatrix, src is assumed to be CPU memory
If this is CpuMatrix, src is assumed to be CPU memory
- 
virtual void 
copyFrom(const real *cpuSrc, const int64_t *seq)¶ 
- 
virtual void 
copyFrom(const IVector &src)¶ convert a int vector to a real matrix.
(1) source and dest are both in CPU.
(2) sizes are exactly match.
- 
void 
copyFrom(CpuSparseMatrix &src)¶ 
- 
virtual MatrixPtr 
clone(size_t height, size_t width, bool useGpu = false)¶ Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this.
If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size.
- 
virtual void 
convExpand(Matrix &feature, int feaImgHeight, int feaImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW)¶ This function is used to calculate the convolution:
It will expand a feature matrix according to the convolution filters
- 
virtual void 
convShrink(Matrix &expandColMat, int thisImgHeight, int thisImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, int paddingW, int outputH, int outputW, real alpha = 1.0f, real beta = 0.0f)¶ This function is the reverse implementation of convExpand:
Its function is to restore a expanded-matrix into a feature matrix
- 
virtual void 
maxPoolForward(Matrix &inputMat, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶ Pooling forward operation, pick out the largest element in the sizeX of value
- 
virtual void 
maxPoolBackward(Matrix &image, size_t imgSizeH, size_t imgSizeW, Matrix &outGrad, Matrix &outV, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶ Pooling backward operation.
- 
virtual void 
avgPoolForward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, size_t paddingH, size_t paddingW)¶ Pooling forward operation, caculate the average of sizeX elements.
- 
virtual void 
avgPoolBackward(Matrix &input, size_t imgSizeH, size_t imgSizeW, size_t sizeX, size_t sizeY, size_t strideH, size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW)¶ 
- 
virtual void 
crossMapNormalFwd(Matrix &input, size_t imgSizeH, size_t imgSizeW, Matrix &denoms, size_t channels, size_t sizeX, float scale, float pow)¶ normalize-operation.
- 
virtual void 
crossMapNormalBwd(Matrix &localGrad, Matrix &denoms, Matrix &preOutV, Matrix &localOutV, size_t channels, size_t imgSizeH, size_t imgSizeW, size_t sizeX, float scale, float pow)¶ 
- 
virtual void 
maxSequenceForward(Matrix &input, const IVector &sequence, IVector &index)¶ Input: one or more sequences. Each sequence contains some instances. Output: output size is the number of input sequences (NOT input instances). output[i] is set to max_{for each instance in this sequence}{input[i]}
- 
virtual void 
contextProjectionForward(MatrixPtr input, MatrixPtr weight, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶ 
- 
virtual void 
contextProjectionBackward(MatrixPtr inputGrad, MatrixPtr weightGrad, const IVector &sequence, int contextLength, int contextStart, size_t beginPad, bool isPadding)¶ 
- 
real *
getRow(size_t row)¶ 
- 
virtual real *
getRowBuf(size_t row)¶ 
- template <typename TableMatType>
 - 
void 
selectRowsImp(TableMatType &table, IVector &ids)¶ use abstract getRow() to get row from table.
Define table as template instead of virtual class for performance sake. internal used by above two virtual funcs.
- 
virtual void 
addColumnVector(const Matrix &b)¶ Add a vector (column) b to matrix a, column by column.
- 
virtual void 
mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶ this = scaleAB*(a*b) + scaleT*this
- 
void 
mul(CpuMatrix *a, CpuSparseMatrix *b, real scaleAB, real scaleT)¶ 
- 
virtual void 
mul(CpuSparseMatrix *a, CpuMatrix *b, real scaleAB, real scaleT)¶ 
- 
virtual void 
rowMax(IVector &maxIds, Matrix &max)¶ Get the top k elements of each row of this matrix.
The column ids and values of these elements are stored in maxIds and max respectively. where k is the size of maxIds. And note that the top k elements are not sorted.
- 
virtual void 
colMax(IVector &maxIds, Matrix &maxVal)¶ Get the top k elements of each column of this matrix.
The row ids and values of these elements are stored in maxIds and max respectively. where k is the size of maxIds. And note that the top k elements are not sorted.
- 
virtual void 
oneHotCrossEntropy(Matrix &output, IVector &label)¶ copy -log(output[label]) to this->data[i].
- 
virtual void 
oneHotCrossEntropyBp(Matrix &outputV, IVector &label)¶ calculate the error of outputV according to label.
- 
virtual void 
oneHotCrossEntropyWithSelfNorm(Matrix &output, IVector &label, real alpha)¶ copy -log(output[label]) to this->data[i].
- 
virtual void 
oneHotCrossEntropyWithSelfNormBp(Matrix &outputV, IVector &label, real alpha)¶ calculate the error of outputV according to label.
- 
virtual void 
circularConv(Matrix &b, Matrix &c)¶ - \[ a[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} b_{i+j} * c_{j} \]
b contains M elements, c contains N elements (N is odd), b’s index arithmetic is computed modulo M, c’s index arithmetic is computed modulo N.
 
- 
virtual void 
circularConvDerivative(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2)¶ 
- 
virtual void 
cosSim(Matrix &output1, Matrix &output2, real scale)¶ cosine similarity, for each row i, this[i] = cos(output1[i], output2[i])
output2 can only have one row, then for each row i, this[i] = cos(output1[i], output2[0])
- 
virtual void 
cosSimDerivative(Matrix &output, Matrix &prevOut1, Matrix &prevOut2, Matrix &prevGrad1, Matrix &prevGrad2, real scale)¶ 
- 
virtual void 
print(std::ostream &os) const¶ print out the values of elements to os
- 
virtual void 
print(std::ostream &os, size_t height, size_t width) const¶ print a part of the matrix from the (top,left) value to the (height, width) value (not included)
- 
virtual void 
printOneRow(std::ostream &os, size_t idx) const¶ print one row to os
- 
virtual real 
getMin()¶ 
- 
virtual real 
getMax()¶ 
- 
virtual void 
randomizeUniform()¶ 
- 
virtual void 
classificationError(MatrixPtr output, IVectorPtr label)¶ calulate the error of classification
output[i] = 1 if row i is an error.
output[i] = 0 if row i is correct.
- 
virtual void 
addByBitCode(size_t numClasses, const IVector &codes, const Matrix &vec)¶ For j < codeLength: this(i, j) += vec(index(i, j), 0) where index(i, j) = ((codes(i) + numClasses) >> (j + 1)) - 1
- 
virtual void 
addByBitCodeBackward(size_t numClasses, const IVector &codes, Matrix &vec)¶ For j < codeLength: vec(index(i, j), 0) += this(i, j) where index is same as the index for addByBitCode
- 
virtual void 
mulByBitCode(size_t numClasses, const IVector &codes, const Matrix &mat, const Matrix &input)¶ For j < codeLength: this(i, j) += <mat.row(index(i, j)), input.row(i)> where index is same as the index for addByBitCode
- 
virtual void 
mulByBitCodeBackwardWeight(size_t numClasses, const IVector &codes, Matrix &mat, const Matrix &input)¶ For j < codeLength: mat.row(index(i, j)) += this(i, j) * input.row(i) where index is same as the index for addByBitCode
- 
virtual void 
mulByBitCodeBackwardError(size_t numClasses, const IVector &codes, const Matrix &mat, Matrix &input)¶ For j < codeLength: input.row(i) += this(i, j) * mat.row(index(i, j)) where index is same as the index for addByBitCode
- 
virtual void 
sumByBitCode(size_t numClasses, IVector &codes, Matrix &sum, real scaleSum)¶ For j < codeLength sum(i, 0) = scaleSum * \sum_j bit(i, j) * this(i, j) where bit(i, j) = ((codes(i) + numClasses) & 2^j) ? 1 : 0
- 
virtual void 
subByBitCode(size_t numClasses_, IVector &codes)¶ For j < codeLength this(i, j) -= bit(i, j) where bit(i, j) is same as that for sumByBitCode
- 
virtual void 
multiBinaryLabelCrossEntropy(Matrix &output, Matrix &label)¶ cross entropy for multi binary labels
this[i] = -sum(label[i][j]*log(output[i][j]) + (1-label[i][j])*log(1-output[i][j]))
Public Static Functions
- 
void 
mul(CpuMatrix *a, CpuMatrix *b, CpuSparseMatrix *c, real scaleAB, real scaleT)¶ 
- template <typename MatBType, typename MatCType>
 - 
void 
mul(CpuSparseMatrix *a, MatBType *b, MatCType *c, real scaleAB, real scaleT)¶ c = a * b
use abstract getRow() to get row from B,C. Define B,C as template instead of virtual class for performance sake.
- 
 
Inherits from paddle::CpuMatrix
Public Functions
Private Functions
Private Members
- 
struct 
sparse_float_value_t¶ 
- 
typedef std::shared_ptr<GpuSparseMatrix> 
 
- 
namespace 
paddle Typedefs
- 
typedef CpuVectorT<real> 
CpuVector¶ 
- 
typedef GpuVectorT<real> 
GpuVector¶ 
- 
typedef CpuVectorT<int> 
CpuIVector¶ 
- 
typedef GpuVectorT<int> 
GpuIVector¶ 
- 
typedef std::shared_ptr<CpuIVector> 
CpuIVectorPtr¶ 
- 
typedef std::shared_ptr<GpuIVector> 
GpuIVectorPtr¶ 
- 
typedef CpuGpuVectorT<real> 
CpuGpuVector¶ 
- 
typedef CpuGpuVectorT<int> 
ICpuGpuVector¶ 
- 
typedef std::shared_ptr<CpuGpuVector> 
CpuGpuVectorPtr¶ 
- 
typedef std::shared_ptr<ICpuGpuVector> 
ICpuGpuVectorPtr¶ 
- template <class T>
 - 
class 
GpuVectorT¶ Inherits from paddle::VectorT< T >
Public Functions
- 
GpuVectorT(size_t size)¶ 
- 
GpuVectorT(size_t size, GpuMemHandlePtr memHandle, size_t offset)¶ 
- 
GpuVectorT(size_t size, T *data)¶ 
- 
virtual MemoryHandlePtr 
newMemory(size_t size)¶ 
- 
virtual void 
zeroMem()¶ 
- 
virtual void 
reset(const T &value)¶ 
- 
virtual void 
fillSequence()¶ 
- 
virtual void 
copyFrom(const T *src, size_t size)¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory
- 
virtual void 
copyFrom(const T *src, size_t size, hl_stream_t stream)¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory,
- 
virtual void 
copyFrom(const VectorT<T> &src)¶ This function will crash if the size of src and dest is different.
- 
virtual void 
copyFrom(const VectorT<T> &src, hl_stream_t stream)¶ If use_gpu, this function will push the copy-task to the specifed-stream and return immediately.
If not use GPU, this function is same as the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT.
- 
virtual T 
getElement(size_t i) const¶ Get the value for the i’th element.
- 
virtual void 
setElement(size_t i, const T &value)¶ 
- 
virtual T *
getPoint(const uint64_t beginPos)¶ Get the buffer point with beginPos.
- 
virtual T 
getAbsSum()¶ 
- 
virtual T 
getSum()¶ 
- 
virtual T 
getMax()¶ 
- 
virtual T 
getAbsMax()¶ 
- 
virtual T 
getMin()¶ 
- 
virtual void 
selectFrom(const VectorT<T> &src, const VectorT<int> &ids)¶ select elements indexed by ids from vector src
- 
virtual void 
histogram(std::ostream &os, int type)¶ print histogram of vector values
- Note
 - only exponent histogram supported currently
 
- 
virtual void 
rand()¶ generate uniform random value for each element
- 
virtual void 
rand(size_t classes)¶ generate uniform random value for each element, data range is from 0 to (classes - 1).
- 
virtual void 
randnorm(real mean, real standardDeviation)¶ generate univariate Gaussian distributed random numbers with given mean and standardDeviation.
- 
virtual void 
uniform(real left, real right)¶ generate uniform distributed random numbers with given range.
- 
virtual T 
get(size_t pos)¶ Debug use only. Very inefficient for GPU vector. get the value at pos.
Protected Functions
- 
virtual void 
copyTo(CpuVectorT<T> *dest) const¶ 
- 
virtual void 
copyTo(GpuVectorT<T> *dest) const¶ 
- 
 
- template <class T>
 - 
class 
CpuVectorT¶ Inherits from paddle::VectorT< T >
Subclassed by paddle::ParallelCpuVectorT< T >
Public Functions
- 
CpuVectorT(size_t size)¶ 
- 
CpuVectorT(size_t size, MemoryHandlePtr memoryHandle, size_t offset)¶ 
- 
CpuVectorT(size_t size, T *data)¶ 
- 
CpuVectorT(const VectorT<T> &src)¶ If src is a CpuVector, the new CpuVector will share the data with src
If src is a GpuVector, the new CpuVector will copy data from src
- 
virtual MemoryHandlePtr 
newMemory(size_t size)¶ 
- 
virtual void 
zeroMem()¶ 
- 
virtual void 
reset(const T &value)¶ 
- 
virtual void 
fillSequence()¶ 
- 
virtual void 
copyFrom(const T *src, size_t size)¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory
- 
virtual void 
copyFrom(const T *src, size_t size, hl_stream_t stream)¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory,
- 
virtual void 
copyFrom(const VectorT<T> &src)¶ This function will crash if the size of src and dest is different.
- 
virtual void 
copyFrom(const VectorT<T> &src, hl_stream_t stream)¶ If use_gpu, this function will push the copy-task to the specifed-stream and return immediately.
If not use GPU, this function is same as the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT.
- 
virtual void 
copyTo(CpuVectorT<T> *dest) const¶ 
- 
virtual void 
copyTo(GpuVectorT<T> *dest) const¶ 
- 
virtual T *
getPoint(const uint64_t beginPos)¶ Get the buffer point with beginPos.
- 
virtual T 
getElement(size_t i) const¶ Get the value for the i’th element.
- 
virtual void 
setElement(size_t i, const T &value)¶ 
- 
virtual T 
getAbsSum()¶ 
- 
virtual T 
getSum()¶ 
- 
virtual T 
getMax()¶ 
- 
virtual T 
getAbsMax()¶ 
- 
virtual T 
getMin()¶ 
- 
virtual void 
selectFrom(const VectorT<T> &src, const VectorT<int> &ids)¶ select elements indexed by ids from vector src
- 
virtual void 
histogram(std::ostream &os, int type)¶ print histogram of vector values
- Note
 - only exponent histogram supported currently
 
- 
virtual void 
rand()¶ generate uniform random value for each element
- 
virtual void 
rand(size_t classes)¶ generate uniform random value for each element, data range is from 0 to (classes - 1).
- 
virtual void 
randnorm(real mean, real standardDeviation)¶ generate univariate Gaussian distributed random numbers with given mean and standardDeviation.
- 
virtual void 
uniform(real left, real right)¶ generate uniform distributed random numbers with given range.
- 
virtual T 
get(size_t pos)¶ Debug use only. Very inefficient for GPU vector. get the value at pos.
- 
 
- template <class T>
 - 
class 
BaseVector¶ Inherits from paddle::BaseMatrixT< T >
Subclassed by paddle::VectorT< T >
Protected Attributes
- 
size_t &
size_¶ 
- 
size_t &
 
- template <class T>
 - 
class 
VectorT¶ - #include <Vector.h>
Copy or assignemnt constructor will share the data as opposed to making a copy of the original data. To make a copy of the orinal data, use copyFrom() instead.
Inherits from paddle::BaseVector< T >
Subclassed by paddle::CpuVectorT< T >, paddle::GpuVectorT< T >
Public Functions
- 
virtual 
~VectorT()¶ 
- 
size_t 
getSize() const¶ 
- 
const T *
getData() const¶ 
- 
T *
getData()¶ 
- 
virtual void 
zeroMem() = 0¶ 
- 
virtual void 
reset(const T &value) = 0¶ 
- 
virtual void 
fillSequence() = 0¶ 
- 
MemoryHandlePtr 
getMemoryHandle() const¶ 
- 
void 
resize(size_t newSize)¶ resizing to a big vector will not preserve old values.
- 
virtual MemoryHandlePtr 
newMemory(size_t size) = 0¶ 
- 
void 
subVecFrom(const VectorT<T> &src, size_t start, size_t size)¶ form sub vector from src, shallow copy
- 
void 
subVecFrom(const T *src, size_t start, size_t size)¶ form sub vector from src, shallow copy
- 
void 
subVecFrom(const VectorT<T> &src, std::pair<size_t, size_t> interval)¶ form sub vector from src, shallow copy in interval [interval.first, interval.second)
- 
virtual void 
copyFrom(const VectorT<T> &src) = 0¶ This function will crash if the size of src and dest is different.
- 
virtual void 
copyFrom(const VectorT<T> &src, hl_stream_t stream) = 0¶ If use_gpu, this function will push the copy-task to the specifed-stream and return immediately.
If not use GPU, this function is same as the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT.
- 
virtual void 
copyFrom(const T *src, size_t size) = 0¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory
- 
virtual void 
copyFrom(const T *src, size_t size, hl_stream_t stream) = 0¶ copy size elements from src
If this is GpuVector, src can be cpu or gpu memory
If this is CpuVector, src is assumed to be cpu memory,
- 
virtual void 
exec(SyncThreadPool::JobFunc func)¶ exec a func in single/multi thread
- 
virtual T *
getPoint(const uint64_t beginPos) = 0¶ Get the buffer point with beginPos.
- 
virtual T 
getElement(size_t i) const = 0¶ Get the value for the i’th element.
- 
virtual void 
setElement(size_t i, const T &value) = 0¶ 
- 
virtual T 
getAbsSum() = 0¶ 
- 
virtual T 
getSum() = 0¶ 
- 
virtual T 
getMax() = 0¶ 
- 
virtual T 
getAbsMax() = 0¶ 
- 
virtual T 
getMin() = 0¶ 
- 
virtual void 
isEqualTo(const VectorT<T> &b, const T &value) = 0¶ element-wise calc: this = (b == value)
- 
virtual void 
selectFrom(const VectorT<T> &src, const VectorT<int> &ids) = 0¶ select elements indexed by ids from vector src
- 
virtual void 
histogram(std::ostream &os, int type = HISTOGRAM_EXPONENT) = 0¶ print histogram of vector values
- Note
 - only exponent histogram supported currently
 
- 
virtual void 
rand() = 0¶ generate uniform random value for each element
- 
virtual void 
rand(size_t classes) = 0¶ generate uniform random value for each element, data range is from 0 to (classes - 1).
- 
virtual T 
get(size_t pos) = 0¶ Debug use only. Very inefficient for GPU vector. get the value at pos.
- 
virtual void 
randnorm(real mean, real standardDeviation) = 0¶ generate univariate Gaussian distributed random numbers with given mean and standardDeviation.
- 
virtual void 
uniform(real left, real right) = 0¶ generate uniform distributed random numbers with given range.
Public Static Functions
- 
std::shared_ptr<VectorT<T>> 
create(size_t size, MemoryHandlePtr memoryHandle, size_t offset = 0)¶ 
- 
std::shared_ptr<VectorT<T>> 
createParallelVector(size_t size, bool useGpu, SyncThreadPool *pool = nullptr)¶ 
Protected Functions
- 
VectorT(size_t size, MemoryHandlePtr memoryHandle, size_t offset, bool useGpu)¶ 
- 
VectorT(size_t size, T *data, bool useGpu)¶ 
- 
virtual void 
copyTo(CpuVectorT<T> *dest) const = 0¶ 
- 
virtual void 
copyTo(GpuVectorT<T> *dest) const = 0¶ 
Protected Attributes
- 
MemoryHandlePtr 
memoryHandle_¶ 
Friends
- 
friend 
paddle::GpuVectorT< T > 
- 
friend 
paddle::CpuVectorT< T > 
 - 
virtual 
 
- template <class T>
 - 
class 
ParallelCpuVectorT¶ Inherits from paddle::CpuVectorT< T >
Public Functions
- 
ParallelCpuVectorT(size_t size, SyncThreadPool *pool)¶ 
- 
virtual void 
zeroMem()¶ 
- 
virtual void 
randnorm(real mean, real standardDeviation)¶ generate univariate Gaussian distributed random numbers with given mean and standardDeviation.
- 
virtual void 
uniform(real left, real right)¶ generate uniform distributed random numbers with given range.
- 
virtual void 
exec(SyncThreadPool::JobFunc func)¶ exec a func in single/multi thread
Private Types
- 
typedef std::function<void(CpuVectorT<T> &vec)> 
ExecFunc¶ 
Private Members
- 
SyncThreadPool *
pool_¶ 
- 
 
- template <class T>
 - 
class 
CpuGpuVectorT¶ - #include <Vector.h>
A class to do conversion between CpuVector and GpuVector automatically.
Public Types
Public Functions
- 
CpuGpuVectorT(size_t size, bool useGpu)¶ A constructor, create cpuVectorT_ or gpuVectorT_.
- Parameters
 size-data size.
useGpu-use gpu or not.
A constructor, create CpuGpuVectorT by VectorT.
If src is CpuVector, cpuVectorT_ is shared data with src.
If src is GpuVector, gpuVectorT_ is shared data with src.
- 
CpuGpuVectorT(size_t size, T *data, bool useGpu)¶ A constructor.
If useGpu is true, data should be located in device and create gpuVectorT_ with data.
If useGpu is false, data should be located in host and create cpuVectorT_ with data.
- Note
 - Data is owned by the caller and should be valid during the life of this vector. Caller is responsible for release the memory.
 
- 
CpuGpuVectorT(CpuGpuVectorT<T> &src, size_t offset, size_t size)¶ 
- 
virtual 
~CpuGpuVectorT()¶ 
- 
void 
resize(size_t size, bool useGpu)¶ resize vector.
If useGpu is true, resize gpuVectorT_ and set syncFlag_ to DATA_AT_GPU,
otherwise resize cpuVectorT_ and set syncFlag_ to DATA_AT_CPU.
- 
std::shared_ptr<const VectorT<T>> 
getVector(bool useGpu) const¶ return a const cpuVectorT_ or gpuVectorT_.
If useGpu is true, return gpuVectorT_.
If useGpu is false, return cpuVectorT_.
- Note
 - Caller should not change the data. If caller changes const attribute, should set syncFlag_.
 
- 
std::shared_ptr<VectorT<T>> &
getMutableVector(bool useGpu)¶ return a const cpuVectorT_ or gpuVectorT_.
- Note
 - : This interface will change syncFlag_, so if you will not change the data, you should call getVector.
 
- 
const T *
getData(bool useGpu) const¶ return const T* data.
If useGpu is true, return device data.
If useGpu is false, return host data.
- 
T *
getMutableData(bool useGpu)¶ 
- 
void 
zeroMem(bool useGpu)¶ If useGpu is true, gpuVectorT_->Op().
If useGpu is false, cpuVectorT_->Op().
Op is zeroMem, fillSequence, ...
- 
void 
fillSequence(bool useGpu)¶ 
- 
void 
setElement(size_t i, const T &value, bool useGpu)¶ 
- 
T 
getElement(size_t i) const¶ return i-th element.
- 
size_t 
getSize() const¶ return vector size.
- 
void 
copyToCpu(const T *data, size_t size)¶ copy data to cpuVectorT_.
- 
void 
copyToCpu(const T *data, size_t size, hl_stream_t stream)¶ copy data to cpuVectorT_ using specifed-stream.
- 
void 
copyToGpu(const T *data, size_t size)¶ copy data to gpuVectorT_.
- 
void 
copyToGpu(const T *data, size_t size, hl_stream_t stream)¶ copy data to gpuVectorT_ using specifed-stream.
- 
void 
copyFrom(const VectorT<T> &src, hl_stream_t stream)¶ copy from src using specifed-stream.
If src is CpuVectorT, copy to cpuVectorT_.
If src is GpuVectorT, copy to gpuVectorT_.
- 
void 
copyFrom(const T *data, size_t size, bool useGpu)¶ copy data.
If useGpu is false, copy host data to cpuVectorT_.
If useGpu is true, copy device data to gpuVectorT_.
- Note
 - data address should consistent with useGpu.
 
- 
void 
copyFrom(const T *data, size_t size, hl_stream_t stream, bool useGpu)¶ 
- 
void 
copyFrom(CpuGpuVectorT<T> &src, size_t offset, size_t size, bool useGpu, hl_stream_t stream)¶ copy from (src + offset) using specifed-stream.
- 
void 
copyFrom(CpuGpuVectorT<T> &src, hl_stream_t stream)¶ copy from src using specifed-stream.
- 
SyncedFlag *
getSync() const¶ return sync_.
- 
void 
setSync(SyncedFlag *sync)¶ set sync_.
- 
void 
setSync(SyncedFlag syncFlag)¶ 
- 
void 
setSync(bool useGpu)¶ 
Public Static Functions
- 
std::shared_ptr<CpuGpuVectorT<T>> 
create(size_t size, bool useGpu)¶ 
resize or create CpuGpuVectorT.
Protected Functions
- 
void 
resizeOrCreate(size_t size, bool useGpu)¶ 
- 
void 
copyToCpu()¶ copy between cpuVectorT_ and gpuVectorT_.
If syncFlag_ is DATA_AT_CPU and SYNCED, do nothing.
If syncFlag_ is DATA_AT_GPU, copy gpuVectorT_ to cpuVectorT_ and set syncFlag_ to SYNCED.
- 
void 
copyToGpu()¶ copy between cpuVectorT_ and gpuVectorT_.
If syncFlag_ is DATA_AT_GPU and SYNCED, do nothing.
If syncFlag_ is DATA_AT_CPU, copy cpuVectorT_ to gpuVectorT_ and set syncFlag_ to SYNCED.
 - 
 
- 
typedef CpuVectorT<real> 
 
- 
namespace 
paddle Typedefs
- 
typedef std::shared_ptr<_hl_sparse_matrix_s> 
hl_sparse_matrix_s_ptr¶ 
- 
class 
GpuSparseMatrix¶ Inherits from paddle::Matrix
Public Functions
- 
GpuSparseMatrix(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat format_ = SPARSE_CSR, bool trans = false)¶ 
- 
GpuSparseMatrix(GpuMemHandlePtr dataHandle, hl_sparse_matrix_s_ptr sMatrix, size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat format_ = SPARSE_CSR, bool trans = false, MemoryHandlePtr sMemoryHandle = NULL)¶ 
- 
GpuSparseMatrix(real *value, int *rows, int *cols, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans)¶ 
- 
GpuSparseMatrix(hl_sparse_matrix_s_ptr sMatrix, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans, MemoryHandlePtr sMemoryHandle)¶ 
- 
~GpuSparseMatrix()¶ 
- 
virtual void 
resize(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶ - Note
 - This should only be used for sparse matrix.
 
- 
virtual void 
resize(size_t newHeight, size_t newWidth)¶ - Note
 - Original data may not be preserved after resize().
 
- 
void 
sparseResizeCSR()¶ 
- 
void 
sparseResizeCSC()¶ 
- 
void 
resizeCSR(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType)¶ 
- 
void 
resizeCSC(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType)¶ 
- 
void 
mul(const GpuMatrixPtr a, const GpuMatrixPtr b, real scaleAB, real scaleT)¶ 
- 
virtual void 
copyFrom(const Matrix &src, hl_stream_t stream)¶ 
- 
void 
copyFromCSR(CpuSparseMatrix &src, hl_stream_t stream)¶ 
- 
void 
copyFromCSC(CpuSparseMatrix &src, hl_stream_t stream)¶ 
- 
virtual void 
copyFrom(const IVector &src)¶ convert a int vector to a real matrix.
(1) source and dest are both in CPU.
(2) sizes are exactly match.
- 
void 
copyFrom(const IVector &src, hl_stream_t stream)¶ 
- template <class T>
 - 
void 
copyFrom(int64_t *ids, int64_t *indices, T *data, hl_stream_t stream)¶ 
- 
virtual void 
setRow(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶ This should only be used for sparse matrix.
Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row.
- 
virtual SparseValueType 
getValueType() const¶ 
- 
virtual SparseFormat 
getFormat() const¶ 
- 
const int *
getRowCols(size_t x) const¶ 
- 
const real *
getRowValues(size_t x) const¶ 
- 
size_t 
getColNum(size_t x) const¶ 
- 
virtual void 
print(std::ostream &os) const¶ print out the values of elements to os
- 
virtual void 
zeroMem()¶ only set value_ of FLOAT_VALUE sparse matrix to zero
- 
void 
add3(GpuMatrix *b)¶ sparseMatrix += denseMatrix
Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function.
Only add value of same (row, col) index in dense matrix and do not use others values.
- Parameters
 b-dense matrix
- 
virtual void 
add3(MatrixPtr b)¶ matrix elment-wise add
Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function.
- 
virtual void 
addBias(Matrix &b, real scale)¶ sparseMatrix[i,j] += bias[j], (j is the col index of sparse matrix)
- Parameters
 b-bias, dense matrix and height = 1
scale-scale of b
- 
virtual int *
getRows() const¶ return rows, which is gpu address
- 
virtual int *
getCols() const¶ return cols, which is gpu address
- 
real *
getValue() const¶ return value, which is gpu address
- 
virtual real *
getData()¶ return value_ of sparse matrix
Some times CpuSparseMatrix maybe Matrix, if getValue, must dynamic_cast to CpuSparseMatrix, getData is convenient to get value
- 
virtual const real *
getData() const¶ 
- 
virtual void 
rowMax(IVector &maxIds, Matrix &maxVal)¶ Get top k value of each row in sparse matrix.
Store the value in maxVal and theirs index in maxIds. k = maxVal.width
- Parameters
 maxIds-index of top k
maxVal-value of top k
- 
virtual void 
mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT)¶ this = scaleAB*(a*b) + scaleT*this
- 
void 
copyFrom(CpuSparseMatrix &src, hl_stream_t stream)¶ 
- 
void 
copyFrom(GpuSparseMatrix &src, hl_stream_t stream)¶ 
- 
virtual void 
trimFrom(const CpuSparseMatrix &src)¶ 
- 
void 
trimFromCSR(const CpuSparseMatrix &src)¶ 
- 
void 
trimFromCSC(const CpuSparseMatrix &src)¶ 
- 
virtual bool 
isSparse() const¶ 
Public Members
- 
MemoryHandlePtr 
sMemoryHandle_¶ 
- 
int *
rows_¶ 
- 
int *
cols_¶ 
- 
real *
value_¶ 
- 
const char *
end_¶ 
- 
hl_sparse_matrix_s_ptr 
sMatrix_¶ 
- 
SparseValueType 
valueType_¶ 
- 
SparseFormat 
format_¶ 
Protected Functions
- 
void 
sparseResize()¶ 
- 
void 
copyRow(int offsets, size_t colNum, const sparse_non_value_t *row)¶ 
- 
void 
copyRow(int offsets, size_t colNum, const sparse_float_value_t *row)¶ 
- 
 
- 
typedef std::shared_ptr<_hl_sparse_matrix_s> 
 
Functions
- 
P_DECLARE_bool(allow_inefficient_sparse_update)¶ 
- 
namespace 
paddle - 
class 
SparseRowCpuMatrix¶ - #include <SparseRowMatrix.h>
Sparse Row
Inherits from paddle::CpuMatrix
Subclassed by paddle::SparseAutoGrowRowCpuMatrix, paddle::SparsePrefetchRowCpuMatrix
Public Functions
- 
SparseRowCpuMatrix(CpuMemHandlePtr dataHandle, size_t height, size_t width, IndexDictPtr indexDictHandle = nullptr, bool trans = false)¶ heightStore is max number of rows of the sparse matrix.
- 
virtual 
~SparseRowCpuMatrix()¶ 
- 
real *
getRow(size_t row)¶ Get the row buf
- Parameters
 row-row id in the original matrix
- 
real *
getLocalRow(size_t row)¶ Get the row buf
- Parameters
 row-row id in local storage
- 
void 
reserveStore()¶ reserve the storage for rows according to current size of indexDictHandle.
This is only used when SparseRowCpuMatrix is constructed with indexDictHandle.
- 
virtual real *
getRowBuf(size_t row)¶ 
- 
virtual void 
mul(CpuSparseMatrix *a, CpuMatrix *b, real scaleAB, real scaleT)¶ 
- 
virtual void 
copyFrom(const real *src, size_t size)¶ Fill data according to row indexs added, setup indices inside.
src and size are data and size of normal dense CpuMatrix.
- 
virtual void 
zeroMem()¶ 
- 
void 
applyL1Decay(real learningRate, real decayRate)¶ apply L1 to all sparse rows, should be apply after indices ready.
- 
void 
clearIndices()¶ 
- 
void 
zeroMemThread(size_t tid, size_t numThreads)¶ 
- 
void 
sgdUpdate(BaseMatrix &value, IVector &t0, real learningRate, int currentTime, real decayRate, bool useL1, bool fini = false)¶ value -= grad * learningRate, this is gradient.
If L1 decay set use L1, else if L2 set use L2, otherwise no decay atall.
t0 is a int vector used by L1/L2 decay, size = height of parameter matrix, store the time that each weight row last updated.
Time is batchId, currentTime is current batchId.
While pass finished, caller should call this func one more time with (fini=true) to let weight decay catch up current time.
- 
void 
addTo(BaseMatrix &dest, std::vector<uint32_t> &ids, size_t tid, size_t numThreads)¶ merge rows in this to dest for designated thread
values add to dest matrix
ids occured in this append to ids filtered by (id % numThreads == tid)
- 
void 
addTo(SparseRowCpuMatrix &dest, size_t tid, size_t numThreads)¶ the second version addTo(), dest is a SparseRowCpuMatrix.
The dest’s indices should be setup already, addTo() will check src ids is exist in dest’s indices.
- 
const IndexDictPtr &
getIndexDictHandle() const¶ 
- 
void 
checkIndices()¶ check all local and global indices consistency
- 
void 
checkIndex(size_t i)¶ check whether row i exist in indices
- 
std::vector<unsigned int> &
getLocalIndices() const¶ 
Protected Functions
- template <typename Func>
 - 
void 
apply(Func f)¶ 
- 
void 
init(size_t height, size_t width)¶ 
- 
void 
clearRows()¶ clear row indices.
- 
void 
checkStoreSize()¶ 
Protected Attributes
- 
std::vector<real, AlignedAllocator<real, 32>> 
rowStore_¶ 
- 
IndexDictPtr 
indexDictHandle_¶ 
- 
std::vector<unsigned int> *
localIndices_¶ 
- 
unsigned int *
globalIndices_¶ 
Protected Static Attributes
- 
const unsigned int 
kUnusedId_¶ 
- 
struct 
IndexDict¶ 
 - 
 
- 
class 
SparsePrefetchRowCpuMatrix¶ - #include <SparseRowMatrix.h>
For prefetching parameters from remote Parameter server.
Inherits from paddle::SparseRowCpuMatrix
Public Functions
- 
SparsePrefetchRowCpuMatrix(CpuMemHandlePtr dataHandle, size_t height, size_t width, IndexDictPtr indexDictHandle = nullptr, SyncThreadPool *pool = nullptr, bool trans = false)¶ 
- 
void 
addRows(MatrixPtr input)¶ Extract feature ids from input, to fill row indexs.
input must be sparse matrix.
Can call many times before setup.
- 
void 
addRows(IVectorPtr ids)¶ 
- 
void 
setupIndices()¶ setup global indices of SparseRowMatrix after finish add rows.
Protected Functions
- 
void 
addRows(const unsigned int *ids, size_t len)¶ 
Protected Attributes
- 
SyncThreadPool *
pool_¶ 
 - 
 
- 
class 
SparseAutoGrowRowCpuMatrix¶ Inherits from paddle::SparseRowCpuMatrix
Subclassed by paddle::CacheRowCpuMatrix
- 
class 
CacheRowCpuMatrix¶ Inherits from paddle::SparseAutoGrowRowCpuMatrix
Public Functions
- 
CacheRowCpuMatrix(size_t height, size_t width, IndexDictPtr indexDictHandle = nullptr, bool trans = false)¶ 
- 
void 
setSourceData(CpuVectorPtr sourceVec)¶ 
- 
real *
getRow(size_t row)¶ 
- 
virtual real *
getRowBuf(size_t row)¶ 
- 
virtual void 
mul(CpuSparseMatrix *a, CpuMatrix *b, real scaleAB, real scaleT)¶ 
- 
 
- 
class 
SparseRowIdsCpuMatrix¶ - #include <SparseRowMatrix.h>
Sparse Row Ids Matrix.
mostly same as CpuMatrix, but maintain sparse row ids occured, ids are hashed by worker thread id.
Inherits from paddle::CpuMatrix
Public Functions
- 
SparseRowIdsCpuMatrix(CpuMemHandlePtr dataHandle, size_t height, size_t width, bool trans = false)¶ 
- 
void 
setNumOfThreads(size_t numOfThreads)¶ 
- 
std::vector<uint32_t> &
getIds(size_t threadId)¶ 
Private Members
- 
std::vector<std::vector<uint32_t>> 
idsArray_¶ 
 - 
 
- 
class 
 
- 
namespace 
paddle - 
class 
CpuSparseMatrix¶ Inherits from paddle::Matrix
Public Functions
- 
CpuSparseMatrix(size_t height, size_t width, size_t nnz, SparseValueType valueType = FLOAT_VALUE, SparseFormat format = SPARSE_CSR, bool trans = false)¶ 
- 
CpuSparseMatrix(CpuMemHandlePtr memHandle, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans)¶ 
- 
CpuSparseMatrix(real *data, int *rows, int *cols, size_t height, size_t width, size_t nnz, SparseValueType valueType, SparseFormat format, bool trans)¶ 
- 
~CpuSparseMatrix()¶ 
- 
virtual void 
resize(size_t newHeight, size_t newWidth, size_t newNnz, SparseValueType valueType, SparseFormat format)¶ - Note
 - This should only be used for sparse matrix.
 
- 
virtual void 
resize(size_t newHeight, size_t newWidth)¶ - Note
 - Original data may not be preserved after resize().
 
- 
SparseValueType 
getValueType()¶ 
- 
real *
getRowValues(size_t i) const¶ 
- 
int *
getRowCols(size_t i) const¶ 
- 
void 
fillRowIndices(IVectorPtr &outVec) const¶ fill row indices of each value in CSR matrix
- 
size_t 
getColNum(size_t i) const¶ 
- 
real *
getColumn(size_t i) const¶ 
- 
size_t 
getColStartIdx(size_t i) const¶ 
- 
size_t 
getRowStartIdx(size_t i) const¶ 
- 
size_t 
getRowNum(size_t i) const¶ 
- 
virtual real 
getSum()¶ 
- 
virtual void 
square()¶ 
- 
virtual real 
getMin()¶ only consider nonzero values. the actual min value should compare with 0.0.
- 
virtual real 
getMax()¶ only consider nonzero values. the actual max value should compare with 0.0.
- 
virtual void 
rowMax(IVector &maxIds, Matrix &max)¶ Get the top k elements of each row of this matrix.
The column ids and values of these elements are stored in maxIds and max respectively. where k is the size of maxIds. And note that the top k elements are not sorted.
- 
virtual int *
getRows() const¶ 
- 
virtual int *
getCols() const¶ 
- 
real *
getValue() const¶ 
- 
virtual SparseFormat 
getFormat() const¶ 
- 
virtual SparseValueType 
getValueType() const¶ 
- 
virtual real *
getData()¶ return value_ of sparse matrix
Some times CpuSparseMatrix maybe Matrix, if getValue, must dynamic_cast to CpuSparseMatrix, getData is convenient to get value
- 
virtual const real *
getData() const¶ 
- 
virtual void 
zeroMem()¶ only set value_ of FLOAT_VALUE sparse matrix to zero
- 
virtual void 
transpose(MatrixPtr matTrans, bool memAlloc)¶ mem MUST be alloced outside (memAlloc=false)
- 
virtual void 
mul(MatrixPtr a, MatrixPtr b, real scaleAB, real scaleT)¶ this = scaleAB*(a*b) + scaleT*this
- 
void 
add3(CpuMatrix *b)¶ sparseMatrix += denseMatrix
Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function.
Only add value of same (row, col) index in dense matrix and do not use others values whoes postions are not in sparse matirx.
- Parameters
 b-dense matrix
- 
virtual void 
add3(MatrixPtr b)¶ matrix elment-wise add
Named add3 just because add/add2 has been used in BaseMatrix.cu and they are not virtual function.
- 
virtual void 
addBias(Matrix &b, real scale)¶ sparseMatrix[i,j] += bias[j], (j is the col index of sparse matrix)
- Parameters
 b-bias, dense matrix and height = 1
scale-scale of b
- 
virtual void 
print(std::ostream &os) const¶ print out the values of elements to os
- 
virtual void 
printOneRow(std::ostream &os, size_t idx) const¶ print one row to os
- 
virtual void 
setRow(size_t row, size_t colNum, const unsigned int *cols, const real *values)¶ This should only be used for sparse matrix.
Currently must be called for each row in order. The matrix is not valid until setRow is called for the last row.
- 
virtual void 
randomizeUniform()¶ 
- 
void 
copyFrom(const GpuSparseMatrix &src, hl_stream_t stream)¶ 
- 
virtual void 
copyFrom(const Matrix &src, hl_stream_t stream = HPPL_STREAM_DEFAULT)¶ 
- 
CpuSparseMatrixPtr 
getTmpSparseMatrix(size_t height, size_t width)¶ Get a temporary matrix. This is threadsafe. It should be only used temporarily, i.e. do not store it or use it as return value.
- Note
 - Do NOT use large amount of tmp matrix.
 
- 
void 
copyFrom(std::vector<int> &rows, std::vector<int> &cols, std::vector<real> &values)¶ 
- 
void 
copyFrom(const CpuSparseMatrix &src)¶ 
- 
virtual void 
trimFrom(const CpuSparseMatrix &src)¶ 
- 
void 
copyRow(int offsets, size_t colNum, const sparse_non_value_t *row)¶ 
- 
void 
copyRow(int offsets, size_t colNum, const sparse_float_value_t *row)¶ 
- template <class T>
 - 
void 
copyFrom(int64_t *ids, int64_t *indices, T *data)¶ 
- template <class T>
 - 
void 
copyFrom(int64_t *indices, T *data)¶ 
- 
virtual void 
copyFrom(const real *src, size_t size)¶ If this is GpuMatrix, src is assumed to be CPU memory
If this is CpuMatrix, src is assumed to be CPU memory
- 
virtual bool 
isSparse() const¶ 
Protected Functions
- 
void 
sparseResize()¶ 
Protected Attributes
- 
int *
rows_¶ 
- 
int *
cols_¶ 
- 
real *
value_¶ 
- 
SparseFormat 
format_¶ 
- 
SparseValueType 
valueType_¶ 
Protected Static Attributes
- 
const size_t 
DEFAULT_AVG_WIDTH¶ 
- 
ThreadLocal<std::vector<CpuSparseMatrixPtr>> 
cpuLocalMats_¶ 
Private Functions
- 
virtual MatrixPtr 
clone(size_t height = 0, size_t width = 0, bool useGpu = false)¶ Create a matrix with the same type (GpuMatrix, CpuMatrix, NonValueSparseMatrix, etc.) as this.
If height and width is zero, the new matrix will have the same size as this, otherwise the new matrix will have the specified size.
- 
 
- 
class 
 
Others¶
- 
namespace 
paddle Functions
- template <class T>
 - 
void 
gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, const int M, const int N, const int K, const T alpha, const T *A, const int lda, const T *B, const int ldb, const T beta, T *C, const int ldc)¶ 
- template <class T>
 - 
void 
axpy(const int n, const T alpha, const T *x, T *y)¶ 
- template <class T>
 - 
T 
dotProduct(const int n, const T *x, const T *y)¶ 
- 
namespace 
paddle - 
namespace 
simd¶ Functions
- template <typename Type>
 - 
void 
addTo(Type *a, const Type *b, size_t len)¶ 
- template <typename Type>
 - 
void 
batchAddTo(Type *a, const Type *b[], int batch, size_t len)¶ 
- template <typename Type>
 - 
void 
colMax(Type *result, const Type *data, int dim, int numSamples)¶ 
- template <typename Type>
 - 
void 
decayL1(Type *dst, Type *src, Type *lr, Type lambda, size_t len)¶ 
- template <typename Type>
 - 
void 
decayL1(Type *dst, Type *src, Type lambda, size_t len)¶ 
- template <size_t AlignSize>
 - 
bool 
isPointerAlign(void *ptr)¶ 
- 
bool 
vec_check(size_t len)¶ 
- template <>
 - 
void 
addTo(float *a, const float *b, size_t len)¶ 
- template <>
 - 
void 
batchAddTo(float *a, const float *b[], int batch, size_t len)¶ 
- template <>
 - 
void 
colMax(float *result, const float *data, int dim, int numSamples)¶ 
- template <>
 - 
void 
decayL1(float *dst, float *src, float lambda, size_t len)¶ 
- template <>
 - 
void 
decayL1(float *dst, float *src, float *lr, float lambda, size_t len)¶ 
- 
namespace 
naive¶ Functions
- template <typename Type>
 - 
void 
addTo(Type *a, const Type *b, size_t len)¶ 
- template <typename Type>
 - 
void 
batchAddTo(Type *a, const Type *b[], int batch, size_t len)¶ 
- template <typename Type>
 - 
void 
colMax(Type *result, const Type *data, int dim, int numSamples)¶ - Note
 - this method is unused in paddle.
 
- template <typename Type>
 - 
void 
decayL1(Type *dst, Type *src, Type *lr, Type lambda, size_t len)¶ 
- template <class Type>
 - 
void 
decayL1(Type *dst, Type *src, Type lambda, size_t len)¶ 
- 
namespace