diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index d787d2814d88bfc8753620e5f51d7a99936e432f..3d28249f69c2bbc6efbd539bcc66dfa1282275bd 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -38,16 +38,40 @@ enum SparseDataType { enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 }; -/** - * BufferArg used as the argument type for Function. - */ class BufferArg; class SequenceArg; class SparseMatrixArg; typedef std::shared_ptr BufferArgPtr; -// an array of arbitrary dimensions +/** + * \brief BufferArg used as the argument type of Function. + * + * The arguments of the Paddle Function have four Buffer types. + * 1. BufferArg for a dense Buffer of any dimension. + * 2. SequenceIdArg for a Buffer of sequence start positions. + * 3. SequenceArg for a Buffer of sequence data. + * 4. SparseMatrixArg for a Buffer of sparse matrix. + * + * There is an ArgType property for the BufferArg used as Function Output. + * Whether the result of the Function calculation is assigned to the + * output Buffer or added to the output Buffer is determined by the + * argType_ property of the output BufferArg. + */ class BufferArg { +public: + // ArgType is only used by output BufferArg. + // For input argument, argType_ is ignored. + // For output argument, need to set the argType_ of the BufferArg. + enum ArgType { + UNSPECIFIED = 0, + ASSIGN_TO = 1, + ADD_TO = 2, + }; + + void setArgType(ArgType argType) { argType_ = argType; } + + ArgType getArgType() const { return argType_; } + public: BufferArg(void* buf, ValueType valueType, const TensorShape& shape) : buf_(buf), valueType_(valueType), shape_(shape) {} @@ -56,7 +80,8 @@ public: : buf_(buf), valueType_(valueType) {} BufferArg(const Matrix& matrix) - : buf_(reinterpret_cast(matrix.getData())), + : buf_( + const_cast(reinterpret_cast(matrix.getData()))), valueType_(DataType::value), shape_(2) { shape_.setDim(0, matrix.getHeight()); @@ -64,21 +89,24 @@ public: } BufferArg(const Matrix& matrix, const TensorShape& shape) - : buf_(reinterpret_cast(matrix.getData())), + : buf_( + const_cast(reinterpret_cast(matrix.getData()))), valueType_(DataType::value), shape_(shape) { CHECK_EQ(matrix.getElementCnt(), shape.getElements()); } BufferArg(const Vector& vector) - : buf_(reinterpret_cast(vector.getData())), + : buf_( + const_cast(reinterpret_cast(vector.getData()))), valueType_(DataType::value), shape_(1) { shape_.setDim(0, vector.getSize()); } BufferArg(const IVector& vector) - : buf_(reinterpret_cast(vector.getData())), + : buf_( + const_cast(reinterpret_cast(vector.getData()))), valueType_(VALUE_TYPE_INT32), shape_(1) { shape_.setDim(0, vector.getSize()); @@ -124,6 +152,7 @@ protected: ValueType valueType_; TensorShape shape_; BufferType bufferType_; + ArgType argType_ = UNSPECIFIED; // leading dimensions. The size is dims_.size() // Dims lds_; }; diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 27ebe808aaf446771afb931e3a4611519cf340f0..88d6824aa393933f29eb62975627b80133a8783c 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -56,12 +56,18 @@ public: BufferArgs() {} size_t size() const { return args_.size(); } - // add argument into BufferArgss + // add argument into BufferArgs + // Tensor can be Matrix, Vector, IVector. template void addArg(const Tensor& arg) { args_.push_back(std::make_shared(arg)); } + // Add arg into BufferArgs and reshape the arg. + // + // For example, arg represents an image buffer, + // but Matrix can only represent a two-dimensional Tensor. + // So need an extra argument to describe the shape of the image buffer. void addArg(const Matrix& arg, const TensorShape& shape); void addArg(const CpuSparseMatrix& arg); @@ -78,10 +84,20 @@ private: }; /** - * Base class for Function. + * \brief Base class for Function. * The basic Function implementation requires override init and calc interfaces. - * Need to pay attention to the inouts argument. For the input argument - * that will be modified, it needs to be passed through inouts. + * + * Function inputs are readonly, Function outputs have two modes: ASSIGN_TO + * and ADD_TO. + * If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation + * result of Function assigned to the output BufferArg. + * If output.getArgType() == ADD_TO, this is add mode, and the calculation + * result of Function need added to the output BufferArg. + * + * For example: + * ASSIGN_TO: output = Function(inputs) + * ADD_TO: output += Function(inputs) + * If Function has more than one output, each output can have different modes. */ class FunctionBase { public: @@ -89,9 +105,7 @@ public: virtual void init(const FuncConfig& config) {} - virtual void calc(const BufferArgs& inputs, - const BufferArgs& outputs, - const BufferArgs& inouts) {} + virtual void calc(const BufferArgs& inputs, const BufferArgs& outputs) {} static ClassRegistrar funcRegistrar_; }; diff --git a/paddle/function/FunctionTest.cpp b/paddle/function/FunctionTest.cpp index 7c3d6684cded1db1801cb18d637bbd4a976298ca..7ce908320a6f6f764e8fdacc96432aca78d7b2df 100644 --- a/paddle/function/FunctionTest.cpp +++ b/paddle/function/FunctionTest.cpp @@ -35,7 +35,7 @@ void FunctionApi(GpuMatrix& output, const GpuMatrix& input) { template void Function(const BufferArgs& arguments) { - auto input = arguments[0].matrix(); + const auto input = arguments[0].matrix(); auto output = arguments[1].matrix(); FunctionApi(output, input); } diff --git a/paddle/math/Matrix.h~RFbb8b484f.TMP b/paddle/math/Matrix.h~RFbb8b484f.TMP deleted file mode 100644 index d89b0f67b3c98d7a651f297d703a397c3461b6d8..0000000000000000000000000000000000000000 --- a/paddle/math/Matrix.h~RFbb8b484f.TMP +++ /dev/null @@ -1,1870 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include - -#include "paddle/utils/Logging.h" -#include "paddle/utils/ThreadLocal.h" - -#include - -#include "BaseMatrix.h" -#include "MemoryHandle.h" -#include "Vector.h" -#include "paddle/utils/ThreadLocal.h" -#include "paddle/utils/common.h" - -namespace paddle { - -enum SparseValueType { NO_VALUE = 0, FLOAT_VALUE = 1 }; - -/** - * @brief matrix sparse_format . - * - * nnz represents nonzero number in sparse matrix. - * - * SPARSE_CSR: row major matrix. length of row is height_ + 1, each element - * represents row start index in Matrix. length of col and value are nnz. - * - * SPARSE_CSC: col major matrix. length of col is width_ + 1, each element - * represents col start index in Matrix. length of col and value are nnz. - * - * @code - * for example: [0, 1, 0, 2, 0; - * 1, 0, 0, 0, 0; - * 0, 0, 0, 2, 5]; - * SPARSE_CSR row [0, 2, 3, 5]; - * col [1, 3, 0, 3, 4]; - * value [1, 2, 1, 2, 5] - * SPARSE_CSC col [0, 1, 2, 2, 4, 5]; - * row [1, 0, 0, 2, 2]; - * value [1, 1, 2, 2, 5] - * @endcode - */ -enum SparseFormat { SPARSE_CSR = 0, SPARSE_CSC = 1 }; - -class Matrix; -class GpuMatrix; -class CpuMatrix; -class CpuSparseMatrix; -class GpuSparseMatrix; -typedef std::shared_ptr MatrixPtr; -typedef std::shared_ptr GpuMatrixPtr; -typedef std::shared_ptr CpuMatrixPtr; -typedef std::shared_ptr GpuSparseMatrixPtr; -typedef std::shared_ptr CpuSparseMatrixPtr; - -/** - * Copy or assignemnt constructor will share the data as opposed to making a - * copy of the original data. To make a copy of the orinal data, use copyFrom() - * instead. - */ -class Matrix : public BaseMatrix { -protected: - Matrix(MemoryHandlePtr memHandle, - size_t height, - size_t width, - bool trans, - bool use_gpu); - - Matrix(real* data, size_t height, size_t width, bool trans, bool use_gpu); - - Matrix(real* data, - size_t height, - size_t width, - size_t stride, - bool trans, - bool use_gpu); - - static ThreadLocal tmpMat_; - -public: - size_t elementCnt_; // maximal number of elements which can be held in data_ - MemoryHandlePtr memoryHandle_; - -public: - virtual ~Matrix() {} - - static MatrixPtr create(MemoryHandlePtr memHandle, - size_t height, - size_t width, - bool trans = false); - static MatrixPtr create(size_t height, - size_t width, - bool trans = false, - bool useGpu = false); - static MatrixPtr create(real* data, - size_t height, - size_t width, - bool trans = false, - bool useGpu = false); - static MatrixPtr create(real* data, - size_t height, - size_t width, - size_t stride, - bool trans = false, - bool useGpu = false); - - static MatrixPtr createSparseMatrix(size_t height, - size_t width, - size_t nnz, - SparseValueType valueType = FLOAT_VALUE, - bool trans = false, - bool useGpu = false); - static MatrixPtr createSparseMatrix(size_t height, - size_t width, - size_t nnz, - SparseValueType valueType = FLOAT_VALUE, - SparseFormat foramt = SPARSE_CSR, - bool trans = false, - bool useGpu = false); - - static MatrixPtr createSparseMatrix(real* data, - int* row, - int* col, - size_t height, - size_t width, - size_t nnz, /* used to allocate space */ - SparseValueType valueType, /*value type*/ - SparseFormat format, - bool trans, - bool useGpu); - - static void resizeOrCreateSparseMatrix( - MatrixPtr& matrix, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType = FLOAT_VALUE, - SparseFormat foramt = SPARSE_CSR, - bool trans = false, - bool useGpu = false); - - static void resizeOrCreate(MatrixPtr& a, - size_t height, - size_t width, - bool trans = false, - bool useGpu = false); - - /** - * @brief set the data buffer used to hold the matrix data. - * - * caller should make sure that the size of data is at least - * sizeof(real)*height*width. - */ - void setData(real* data) { - BaseMatrix::setData(data); - memoryHandle_.reset(); - } - - /// the data should be contiguous - void setData(real* data, size_t newHeight, size_t newWidth) { - setData(data); - height_ = newHeight; - width_ = newWidth; - elementCnt_ = newHeight * newWidth; - stride_ = width_; - } - - size_t getWidth() const { return width_; } - size_t getHeight() const { return height_; } - size_t getStride() const { return stride_; } - size_t getElementCnt() const { return elementCnt_; } - virtual real* getData() { return data_; } - virtual const real* getData() const { return data_; } - bool isTransposed() const { return trans_; } - bool isContiguous() const { return stride_ == width_ || height_ == 1; } - - // If sparse matrix, need to dynamic_cast to CpuSparseMatrix/GpuSparseMatrix - // befor call the following functions. - // Declare these functions in the base class just easy to call them. - // And these declarations should be moved to base class of sparse matrix - // if refactor sparse matrix - virtual int* getRows() const { - LOG(FATAL) << "Not implemented"; - return nullptr; //! suppress warning for no return value. - } - - virtual int* getCols() const { - LOG(FATAL) << "Not implemented"; - return nullptr; //! suppress warning for no return value. - } - - virtual SparseFormat getFormat() const { - LOG(FATAL) << "Not implemented"; - return SPARSE_CSR; //! suppress warning for no return value. - } - - virtual SparseValueType getValueType() const { - LOG(FATAL) << "Not implemented"; - return NO_VALUE; //! suppress warning for no return value. - } - - /** - * @brief matrix elment-wise add - * - * Named add3 just because add/add2 has been used in BaseMatrix.cu - * and they are not virtual function. - */ - virtual void add3(MatrixPtr b) { LOG(FATAL) << "Not implemented"; } - - MemoryHandlePtr getMemoryHandle() const { return memoryHandle_; } - - virtual void zeroMem() { LOG(FATAL) << "Not implemented"; } - - virtual void resetOne() { LOG(FATAL) << "Not implemented"; } - - void setDiag(real value); - - virtual void copyFrom(const Matrix& src) { LOG(FATAL) << "Not implemented"; } - - virtual void trimFrom(const CpuSparseMatrix& src) { - LOG(FATAL) << "Not implemented"; - } - - // asynchronous copy - virtual void copyFrom(const Matrix& src, hl_stream_t stream) { - LOG(FATAL) << "Not implemented"; - } - - MatrixPtr subMatrix(size_t startRow, - size_t endRow, - size_t startCol, - size_t endCol); - - MatrixPtr subRowMatrix(size_t startRow, size_t endRow) { - return subMatrix(startRow, endRow, 0, getWidth()); - } - - MatrixPtr subColMatrix(size_t startCol, size_t endCol) { - return subMatrix(0, getHeight(), startCol, endCol); - } - - virtual MatrixPtr subMatrix(size_t startRow, size_t numRows) { - CHECK_LE(startRow + numRows, getHeight()); - return Matrix::create(getData() + startRow * getWidth(), - numRows, - getWidth(), - trans_, - useGpu_); - } - virtual MatrixPtr subMatrix(size_t startRow, size_t numRows, MatrixPtr dest) { - CHECK_LE(startRow + numRows, getHeight()); - CHECK_EQ(useGpu_, dest->useGpu_); - dest->setData(this->rowBuf(startRow), numRows, getWidth()); - return dest; - } - - /** - * If this is GpuMatrix, src is assumed to be CPU memory - * - * If this is CpuMatrix, src is assumed to be CPU memory - */ - virtual void copyFrom(const real* src, size_t size) { - LOG(FATAL) << "Not implemented"; - } - - virtual void copyFrom(const real* src, const int64_t* seq) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief convert a int vector to a real matrix. - * - * (1) source and dest are both in CPU. - * - * (2) sizes are exactly match. - */ - virtual void copyFrom(const IVector& src) { - LOG(FATAL) << "copy data from int vector only available on CpuMatrix."; - } - - virtual void copyByRowIndex(Matrix& b, const IVector& rowIndex) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief Create a matrix with the same type (GpuMatrix, CpuMatrix, - * NonValueSparseMatrix, etc.) as this. - * - * If height and width is zero, the new matrix will have the same size - * as this, otherwise the new matrix will have the specified size. - * - */ - virtual MatrixPtr clone(size_t height = 0, - size_t width = 0, - bool useGpu = false) { - LOG(FATAL) << "Not implemented"; - return nullptr; - } - - virtual real* getRowBuf(size_t row) { - LOG(FATAL) << "Not implemented"; - return nullptr; - } - - virtual real getElement(size_t x, size_t y) const { - LOG(FATAL) << "Not implemented"; - return 0; - } - - virtual real getSum() { - LOG(FATAL) << "Not implemented"; - return 0; - } - - virtual void accumulateColSum(Matrix& src) { - LOG(FATAL) << "Not implemented"; - } - - virtual real getAbsSum() { - LOG(FATAL) << "Not implemented"; - return 0; - } - - /** - * @note Original data may not be preserved after resize(). - */ - virtual void resize(size_t newHeight, size_t newWidth) = 0; - - /** - * @note This should only be used for sparse matrix. - */ - virtual void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* total item used to allocate space */ - SparseValueType valueType, - SparseFormat format) = 0; - - /** - * @brief This should only be used for sparse matrix. - * - * Currently must be called for each row in order. - * The matrix is not valid until setRow is called for the last row. - */ - virtual void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) = 0; - - virtual MatrixPtr getTranspose() = 0; - - /** - * @brief hard transpose. - * - * allocate matTrans' memory outside, then set memAlloc as false; - * else set as true. - */ - virtual void transpose(MatrixPtr matTrans, bool memAlloc) { - LOG(FATAL) << "Not implemented"; - } - - virtual MatrixPtr getInverse() { - LOG(FATAL) << "Not implemented"; - return nullptr; - } - - /** - * @brief inverse. - * - * if allocate matInv's memory outside, then set memAlloc as false; - * else set as true. - */ - virtual void inverse(MatrixPtr matInv, bool memAlloc) { - LOG(FATAL) << "Not implemented"; - } - -public: - /// Only set all variables to 0 or NULL but not free them. - virtual void clear() { - height_ = 0; - width_ = 0; - data_ = NULL; - } - - void reshape(size_t height, size_t width); - - /// add b to each sample of this. - virtual void addBias(Matrix& b, real scale) { - LOG(FATAL) << "Not implemented"; - } - - virtual void addSharedBias(Matrix& b, real scale) { - LOG(FATAL) << "Not implemented"; - } - - void addBias(Matrix& b, real scale, bool sharedBias) { - if (!sharedBias) { - addBias(b, scale); - } else { - addSharedBias(b, scale); - } - } - - /// add each sample from a to this. - virtual void collectBias(Matrix& a, real scale) { - LOG(FATAL) << "Not implemented"; - } - - virtual void collectSharedBias(Matrix& a, real scale) { - LOG(FATAL) << "Not implemented"; - } - - void collectBias(Matrix& a, real scale, bool sharedBias) { - if (!sharedBias) { - collectBias(a, scale); - } else { - collectSharedBias(a, scale); - } - } - - virtual void sequenceAvgForward(Matrix& a, - const IVector& startsPos, - int mode) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = scaleAB*(a*b) + scaleT*this - * @endcode - */ - virtual void mul(const Matrix& a, - const Matrix& b, - real scaleAB, - real scaleT) { - LOG(FATAL) << "Not implemented"; - } - - /// Add a vector (column) b to matrix a, column by column. - virtual void addColumnVector(const Matrix& b) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * For j < codeLength: - * this(i, j) += vec(index(i, j), 0) - * where index(i, j) = ((codes(i) + numClasses) >> (j + 1)) - 1 - * @endcode - */ - virtual void addByBitCode(size_t numClasses, - const IVector& codes, - const Matrix& vec) { - (void)numClasses; - (void)codes; - (void)vec; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * vec(index(i, j), 0) += this(i, j) - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void addByBitCodeBackward(size_t numClasses, - const IVector& codes, - Matrix& vec) { - (void)numClasses; - (void)codes; - (void)vec; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * this(i, j) += - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void mulByBitCode(size_t numClasses, - const IVector& codes, - const Matrix& mat, - const Matrix& input) { - (void)numClasses; - (void)codes; - (void)mat; - (void)input; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * mat.row(index(i, j)) += this(i, j) * input.row(i) - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void mulByBitCodeBackwardWeight(size_t numClasses, - const IVector& codes, - Matrix& mat, - const Matrix& input) { - (void)numClasses; - (void)codes; - (void)mat; - (void)input; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * input.row(i) += this(i, j) * mat.row(index(i, j)) - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void mulByBitCodeBackwardError(size_t numClasses, - const IVector& codes, - const Matrix& mat, - Matrix& input) { - (void)numClasses; - (void)codes; - (void)mat; - (void)input; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength - * sum(i, 0) = scaleSum * \sum_j bit(i, j) * this(i, j) - * where bit(i, j) = ((codes(i) + numClasses) & 2^j) ? 1 : 0 - * @endcode - */ - virtual void sumByBitCode(size_t numClasses, - IVector& codes, - Matrix& sum, - real scaleSum) { - (void)numClasses; - (void)codes; - (void)sum; - (void)scaleSum; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength - * this(i, j) -= bit(i, j) - * where bit(i, j) is same as that for sumByBitCode - * @endcode - */ - virtual void subByBitCode(size_t numClasses_, IVector& codes) { - (void)numClasses_; - (void)codes; - LOG(FATAL) << "Not implemeted"; - } - - /** - * add the sum of each row of this to mat - */ - virtual void rowSum(Matrix& sum) { - (void)sum; - LOG(FATAL) << "Not implemeted"; - } - - /** - * set the max of each row of this to mat - */ - virtual void rowMax(Matrix& max) { - (void)max; - LOG(FATAL) << "Not implemeted"; - } - - /** - * set the max of each column of this to mat - */ - virtual void colMax(Matrix& max) { LOG(FATAL) << "not implemented"; } - - /** - * @brief Get the top k elements of each column of this matrix. - * - * The row ids and values of these elements are stored in - * maxIds and max respectively. where k is the size of maxIds. - * And note that the top k elements are not sorted. - */ - virtual void colMax(IVector& maxIds, Matrix& maxVal) { - LOG(FATAL) << "not implemented"; - } - - virtual void maxoutForward(Matrix& a, - IVector& id, - size_t channels, - size_t groups) { - LOG(FATAL) << "not implemented"; - } - - virtual void maxoutBackward(Matrix& a, - IVector& id, - size_t channels, - size_t groups) { - LOG(FATAL) << "not implemented"; - } - - virtual void rowMaxId(IVector& maxIds) { LOG(FATAL) << "Not implemented"; } - - /** - * @brief Get the top k elements of each row of this matrix. - * - * The column ids and values of these elements are stored in - * maxIds and max respectively. where k is the size of maxIds. - * And note that the top k elements are not sorted. - */ - virtual void rowMax(IVector& maxIds, Matrix& max) { - LOG(FATAL) << "Not implemented"; - } - - /// normalize each row so that the sum of each row is 1. - virtual void rowNormalizeL1(Matrix& out) { - (void)out; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * this = a*b - * @endcode - */ - virtual void mul(const Matrix& a, const Matrix& b) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = scaleAB*(this*b) + scaleT*this - * @endcode - */ - virtual void rightMul(Matrix& b, real scaleAB, real scaleT) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = this* b - * @endcode - */ - virtual void rightMul(Matrix& b) { LOG(FATAL) << "Not implemented"; } - - /** - * @code - * this = scaleAB*(a*this) + scaleT*this - * @endcode - */ - virtual void leftMul(Matrix& a, real scaleAB, real scaleT) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = a*this) - * @endcode - */ - virtual void leftMul(Matrix& a) { LOG(FATAL) << "Not implemented"; } - - /// merge the element for each col. - virtual void colMerge(Matrix& src) { LOG(FATAL) << "Not implemented"; } - - /// copy -log(output[label]) to this->data[i]. - virtual void oneHotCrossEntropy(Matrix& output, IVector& label) { - LOG(FATAL) << "Not implemented"; - } - - /// calculate the error of outputV according to label. - virtual void oneHotCrossEntropyBp(Matrix& outputV, IVector& label) { - LOG(FATAL) << "Not implemented"; - } - - /// copy -log(output[label]) to this->data[i]. - virtual void oneHotCrossEntropyWithSelfNorm(Matrix& output, - IVector& label, - real alpha) { - LOG(FATAL) << "Not implemented"; - } - - /// calculate the error of outputV according to label. - virtual void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, - IVector& label, - real alpha) { - LOG(FATAL) << "Not implemented"; - } - - /** - * \f[ - * a[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} b_{i+j} * c_{j} - * \f] - * - * b contains M elements, - * c contains N elements (N is odd), - * b's index arithmetic is computed modulo M, - * c's index arithmetic is computed modulo N. - */ - virtual void circularConv(Matrix& b, Matrix& c) { - LOG(FATAL) << "Not implemented"; - } - - virtual void circularConvDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2) { - LOG(FATAL) << "Not implemented"; - } - - /* output_ij = exp(this_{ij}) / (sum_j exp(this_ij)) */ - virtual void softmax(Matrix& output) { - (void)output; - LOG(FATAL) << "Not implemeted"; - } - virtual void sequenceSoftmax(Matrix& output, const IVector& index) { - (void)output; - LOG(FATAL) << "Not implemeted"; - } - - virtual void softmaxBackward(Matrix& outputV) { - (void)outputV; - LOG(FATAL) << "Not implemeted"; - } - - /* - sum_i = sum_j this_ij * output_ij - this_ij = output_ij* (this_ij - sum_i) - */ - virtual void softmaxDerivative(Matrix& output, Matrix& sftmaxSum) { - LOG(FATAL) << "Not implemented"; - } - - /// calculate the sum of squares diff cost. - virtual void sumOfSquares(Matrix& output, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - /// gradient of sumOfSquares. - virtual void sumOfSquaresBp(Matrix& outputV, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - virtual void tanh(Matrix& output) { LOG(FATAL) << "Not implemented"; } - - virtual void tanhDerivative(Matrix& output) { - LOG(FATAL) << "Not implemented"; - } - - virtual void softrelu(Matrix& output) { LOG(FATAL) << "Not implemented"; } - - virtual void softreluDerivative(Matrix& output) { - LOG(FATAL) << "Not implemented"; - } - - virtual void scaledTanh(Matrix& output, real p1, real p2) { - LOG(FATAL) << "Not implemented"; - } - - /** - * cosine similarity, for each row i, - * this[i] = cos(output1[i], output2[i]) - * - * output2 can only have one row, then for each row i, - * this[i] = cos(output1[i], output2[0]) - */ - virtual void cosSim(Matrix& output1, Matrix& output2, real scale = 1.0f) { - LOG(FATAL) << "Not implemented"; - } - - virtual void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale = 1.0f) { - LOG(FATAL) << "Not implemented"; - } - - /// print out the values of elements to os - virtual void print(std::ostream& os) const { - LOG(FATAL) << "Not implemented"; - } - - /** - * print a part of the matrix - * from the (top,left) value to the (height, width) value (not included) - */ - virtual void print(std::ostream& os, size_t height, size_t width) const { - LOG(FATAL) << "Not implemented"; - } - - /// print one row to os - virtual void printOneRow(std::ostream& os, size_t idx) const { - LOG(FATAL) << "Not implemented"; - } - - virtual void check(std::ostream& os, Matrix& refMat, bool printDiff = true) {} - - virtual real getMin() { - LOG(FATAL) << "Not implemented"; - return 0; - } - virtual real getMax() { - LOG(FATAL) << "Not implemented"; - return 0; - } - - virtual void randomizeUniform() { LOG(FATAL) << "Not implemented"; } - - /** - * @brief calulate the error of classification - * - * output[i] = 1 if row i is an error. - * - * output[i] = 0 if row i is correct. - */ - virtual void classificationError(Matrix& output, IVector& label) { - LOG(FATAL) << "Not implemented"; - } - - /** - * This function is used to calculate the convolution: - * - * It will expand a feature matrix according to the - * convolution filters - */ - virtual void convExpand(Matrix& feature, - int feaImgHeight, - int feaImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * This function is the reverse implementation of convExpand: - * - * Its function is to restore a expanded-matrix into a feature matrix - */ - virtual void convShrink(Matrix& expandColMat, - int thisImgHeight, - int thisImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW, - real alpha = 1.0f, - real beta = 0.0f) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * Pooling forward operation, pick out the largest element - * in the sizeX of value - */ - virtual void maxPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - /// Pooling backward operation. - virtual void maxPoolBackward(Matrix& image, - size_t imgSizeH, - size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - /// Pooling forward operation, caculate the average of sizeX elements. - virtual void avgPoolForward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void avgPoolBackward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * Input: one or more sequences. Each sequence contains some instances. - * - * Output: output size is the number of input sequences (NOT input - * instances). - * - * output[i] is set to max_input[i]. - */ - virtual void maxSequenceForward(Matrix& input, - const IVector& sequence, - IVector& index) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, - IVector& index) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * this.row[i] += table.row[ids[i]] - * if ids[i] == -1, it will be ignored - * @endcode - */ - virtual void selectRows(Matrix& table, IVector& ids) { - (void)table; - (void)ids; - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this[i] = table[i, id[i]] - * @endcode - */ - virtual void selectElements(Matrix& table, IVector& ids) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * table.row[ids[i]] += this.row[i] - * if ids[i] == -1, it will be ignored - * @endcode - */ - virtual void addToRows(Matrix& table, IVector& ids) { - (void)table; - (void)ids; - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * table[i, id[i]] += this[i] - * @endcode - */ - virtual void addElements(Matrix& table, IVector& ids) { - LOG(FATAL) << "Not implemented"; - } - /** - * @brief cross entropy for multi binary labels - * - * @code - * this[i] = -sum(label[i][j]*log(output[i][j]) - * + (1-label[i][j])*log(1-output[i][j])) - * @endcode - */ - virtual void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief The gradient of cross entropy for multi binary labels on output - * - * @code - * this[i][j] = -label[i][j]/output[i][j] - * + (1-label[i][j])/(1-output[i][j]) - * @endcode - */ - virtual void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief Calculate the classification error for multi binary labels - * - * @code - * this[i] = sum((output[i][j] >= threshold && label[i][j] == 0) - * || (output[i][j] < threshold && label[i][j] == 1)) - * / output->getWidth() - * @endcode - */ - virtual void classificationErrorMulti(Matrix& output, - Matrix& label, - real threshold) { - LOG(FATAL) << "Not implemented"; - } - - virtual void paramReluForward(Matrix& data, Matrix& W) { - LOG(FATAL) << "Not implemented"; - } - virtual void paramReluBackwardW(Matrix& oGrad, Matrix& data) { - LOG(FATAL) << "Not implemented"; - } - virtual void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) { - LOG(FATAL) << "Not implemented"; - } - - virtual void bilinearForward(const Matrix& in, - const size_t inImgH, - const size_t inImgW, - const size_t outImgH, - const size_t outImgW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - LOG(FATAL) << "Not implemented"; - } - virtual void bilinearBackward(const Matrix& out, - const size_t outImgH, - const size_t outImgW, - const size_t inImgH, - const size_t inImgW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - LOG(FATAL) << "Not implemented"; - } - - template - void operator=(const ExpressionType& expr) { - if (useGpu_) { - TensorGpuApply(*this, expr); - } else { - TensorCpuApply(*this, expr); - } - } - - bool isEmpty() const { - return data_ == nullptr; - } - - explicit operator bool() const { - return !isEmpty(); - } -}; - -inline std::ostream& operator<<(std::ostream& os, const Matrix& mat) { - mat.print(os); - return os; -} - -class GpuMatrix : public Matrix { -public: - GpuMatrix(); - - GpuMatrix(size_t height, size_t width, bool trans = false); - GpuMatrix(real* data, size_t height, size_t width, bool trans = false) - : Matrix(data, height, width, trans, true) {} - GpuMatrix(real* data, - size_t height, - size_t width, - size_t stride, - bool trans = false) - : Matrix(data, height, width, stride, trans, true) {} - GpuMatrix(GpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : Matrix(dataHandle, height, width, trans, true) {} - ~GpuMatrix(); - - void zeroMem(); - void resetOne(); - void setDiag(real value); - - void resize(size_t newHeight, size_t newWidth); - void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* used to allocate space */ - SparseValueType valueType, - SparseFormat format) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - - /** - * Copy the data from cpu_memory buffer - */ - void copyFrom(const real* hostSrc, size_t size); - - void copyFrom(const real* hostSrc, const int64_t* seq); - - void copyFrom(const Matrix& src, hl_stream_t stream); - - void copyFrom(const Matrix& src); - - void copyFrom(const IVector& src); - - void copyByRowIndex(Matrix& b, const IVector& rowIndex); - - MatrixPtr clone(size_t height, size_t width, bool useGpu = false); - - real getElement(size_t x, size_t y) const; - - real* getRow(size_t row) { return BaseMatrix::rowBuf(row); } - virtual real* getRowBuf(size_t row) { return getRow(row); } - - real getSum(); - void accumulateColSum(Matrix& src); - real getAbsSum(); - - MatrixPtr getTranspose(); - void transpose(MatrixPtr matTrans, bool memAlloc); - - MatrixPtr getInverse(); - void inverse(MatrixPtr matInv, bool memAlloc); - - /// add b to each sample of this. - void addBias(Matrix& b, real scale); - void addSharedBias(Matrix& b, real scale); - - /** - * @code - * add each sample from a to this. - * @endcode - */ - void collectBias(Matrix& a, real scale); - void collectSharedBias(Matrix& a, real scale); - - void sequenceAvgForward(Matrix& a, const IVector& startsPos, int mode); - - /** - * @code - * this.row[i] += table.row[ids[i]] - * @endcode - */ - virtual void selectRows(Matrix& table, IVector& ids); - - /** - * @code - * this[i] = table[i, id[i]] - * @endcode - */ - virtual void selectElements(Matrix& table, IVector& ids); - - /** - * @code - * table.row[ids[i]] += this.row[i] - * @endcode - */ - virtual void addToRows(Matrix& table, IVector& ids); - - void addColumnVector(const Matrix& b); - - /** - * @code - * this = scaleAB*(a*b) + scaleT*this - * @endcode - */ - void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); - - /** - * @code - * this = a*b - * @endcode - */ - void mul(const Matrix& a, const Matrix& b); - - void mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB, real scaleT); - - void mul(const GpuSparseMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT); - - void mul(const GpuMatrix& a, - const GpuSparseMatrix& b, - real scaleAB, - real scaleT); - - /** - * @code - * this = scaleAB*(this*b) + scaleT*this - * @endcode - */ - void rightMul(Matrix& b, real scaleAB, real scaleT); - - /** - * @code - * this = this* b - * @endcode - */ - void rightMul(Matrix& b); - - /** - * @code - * this = scaleAB*(a*this) + scaleT*this - * @endcode - */ - void leftMul(Matrix& a, real scaleAB, real scaleT); - - /** - * @code - * this = a*this - * @endcode - */ - void leftMul(Matrix& a); - - void colMerge(Matrix& src); - void rowSum(Matrix& sum); - void rowMax(Matrix& max); - void rowMax(IVector& maxIds, Matrix& max); - void colMax(Matrix& max); - void colMax(IVector& maxIds, Matrix& max); - void maxoutForward(Matrix& a, IVector& id, size_t channels, size_t groups); - void maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t groups); - - void oneHotCrossEntropy(Matrix& output, IVector& label); - void oneHotCrossEntropyBp(Matrix& outputV, IVector& label); - void oneHotCrossEntropyWithSelfNorm(Matrix& output, - IVector& label, - real alpha); - void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, - IVector& label, - real alpha); - - void softmax(Matrix& output); - void sequenceSoftmax(Matrix& output, const IVector& index); - void softmaxBackward(Matrix& outputV); - void softmaxDerivative(Matrix& output, Matrix& sftmaxSum); - - /// calculate the sum of squares diff cost. - void sumOfSquares(Matrix& output, Matrix& label); - - /// gradient of sumOfSquares. - void sumOfSquaresBp(Matrix& outputV, Matrix& label); - void tanh(Matrix& output); - void tanhDerivative(Matrix& output); - void softrelu(Matrix& output); - void softreluDerivative(Matrix& output); - void scaledTanh(Matrix& output, real p1, real p2); - - void cosSim(Matrix& output1, Matrix& output2, real scale); - void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale); - - virtual void print(std::ostream& os) const; - virtual void print(std::ostream& os, size_t height, size_t width) const; - - void paramReluForward(Matrix& data, Matrix& W); - void paramReluBackwardW(Matrix& oGrad, Matrix& data); - void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W); - - void check(std::ostream& os, Matrix& refMat, bool printDiff = true); - void randomizeUniform(); - - void classificationError(Matrix& output, IVector& label); - - void convExpand(Matrix& feature, - int feaImgHeight, - int feaImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW); - - void convShrink(Matrix& expandColMat, - int thisImgHeight, - int thisImgWidth, - int channels, - int blockH, - int blochW, - int strideH, - int strideW, - int paddingH, - int paddingWreal, - int outputH, - int outputW, - real alpha = 1.0f, - real beta = 0.0f); - - void maxPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW); - - void maxPoolBackward(Matrix& image, - size_t imgSizeH, - size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW); - - void avgPoolForward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW); - - void avgPoolBackward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW); - - void maxSequenceForward(Matrix& input, - const IVector& sequence, - IVector& index); - - void maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, - IVector& index); - - void bilinearForward(const Matrix& in, - const size_t inImgH, - const size_t inImgW, - const size_t outImgH, - const size_t outImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - void bilinearBackward(const Matrix& out, - const size_t outImgH, - const size_t outImgW, - const size_t inImgH, - const size_t inImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); - - void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); - - template - void operator=(const ExpressionType& expr) { - TensorGpuApply(*this, expr); - } -}; - -class CpuMatrix : public Matrix { -public: - CpuMatrix(size_t height, size_t width, bool trans = false); - CpuMatrix(real* data, size_t height, size_t width, bool trans = false) - : Matrix(data, height, width, trans, false) {} - CpuMatrix(real* data, - size_t height, - size_t width, - size_t stride, - bool trans = false) - : Matrix(data, height, width, stride, trans, false) {} - - CpuMatrix(CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : Matrix(dataHandle, height, width, trans, false) {} - - ~CpuMatrix(); - - void zeroMem(); - void resetOne(); - void setDiag(real value); - - void resize(size_t newHeight, size_t newWidth); - void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* used to allocate space */ - SparseValueType valueType, - SparseFormat format) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - - real getElement(size_t x, size_t y) const; - real getSum(); - void accumulateColSum(Matrix& src); - real getAbsSum(); - - MatrixPtr getTranspose(); - void transpose(MatrixPtr matTrans, bool memAlloc); - - MatrixPtr getInverse(); - void inverse(MatrixPtr matInv, bool memAlloc); - - void copyFrom(const Matrix& src); - - void copyFrom(const Matrix& src, hl_stream_t stream); - - void copyFrom(const real* cpuSrc, size_t size); - - void copyFrom(const real* cpuSrc, const int64_t* seq); - - void copyFrom(const IVector& src); - - void copyFrom(CpuSparseMatrix& src); - - void copyByRowIndex(Matrix& b, const IVector& rowIndex); - - MatrixPtr clone(size_t height, size_t width, bool useGpu = false); - - void convExpand(Matrix& feature, - int feaImgHeight, - int feaImgWidth, - int channels, - int blcokH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW); - - void convShrink(Matrix& expandFeat, - int thisImgHeight, - int thisImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW, - real alpha = 1.0f, - real beta = 0.0f); - - void maxPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW); - - void maxPoolBackward(Matrix& image, - size_t imgSizeH, - size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW); - - void avgPoolForward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW); - - void avgPoolBackward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW); - - void maxSequenceForward(Matrix& input, - const IVector& sequence, - IVector& index); - - void maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, - IVector& index); - - real* getRow(size_t row) { return BaseMatrix::rowBuf(row); } - virtual real* getRowBuf(size_t row) { return getRow(row); } - -public: - /// add b to each sample of this. - void addBias(Matrix& b, real scale); - void addSharedBias(Matrix& b, real scale); - - /// add each sample of a to this. - void collectBias(Matrix& a, real scale); - void collectSharedBias(Matrix& a, real scale); - - void sequenceAvgForward(Matrix& a, const IVector& startsPos, int mode); - - /** - * @code - * this.row[i] += table.row[ids[i]] - * @endcode - */ - virtual void selectRows(Matrix& table, IVector& ids); - - /** - * @code - * table.row[ids[i]] += this.row[i] - * @endcode - */ - virtual void addToRows(Matrix& table, IVector& ids); - - /** - * @code - * this[i] = table[i, id[i]] - * @endcode - */ - virtual void selectElements(Matrix& table, IVector& ids); - - /** - * @code - * table[i, id[i]] += this[i] - * @endcode - */ - virtual void addElements(Matrix& table, IVector& ids); - - /** - * use abstract getRow() to get row from table. - * - * Define table as template instead of virtual class for performance sake. - * internal used by above two virtual funcs. - */ - template - void selectRowsImp(TableMatType& table, IVector& ids); - template - void addToRowsImp(TableMatType& table, IVector& ids); - - void addColumnVector(const Matrix& b); - - void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); - void mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - - void mul(CpuMatrix* a, CpuSparseMatrix* b, real scaleAB, real scaleT); - - static void mul(CpuMatrix* a, - CpuMatrix* b, - CpuSparseMatrix* c, - real scaleAB, - real scaleT); - - /** - * c = a * b - * - * use abstract getRow() to get row from B,C. - * Define B,C as template instead of virtual class for performance sake. - */ - template - static void mul( - CpuSparseMatrix* a, MatBType* b, MatCType* c, real scaleAB, real scaleT); - - virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - - void mul(const Matrix& a, const Matrix& b); - - void rightMul(Matrix& b, real scaleAB, real scaleT); - void rightMul(Matrix& b); - - void leftMul(Matrix& a, real scaleAB, real scaleT); - void leftMul(Matrix& a); - void colMerge(Matrix& src); - void rowSum(Matrix& sum); - void rowMaxId(IVector& maxIds); - void rowMax(Matrix& max); - void rowMax(IVector& maxIds, Matrix& maxVal); - void colMax(Matrix& max); - void colMax(IVector& maxIds, Matrix& maxVal); - void maxoutForward(Matrix& a, IVector& id, size_t channels, size_t groups); - void maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t groups); - void rowNormalizeL1(Matrix& out); - - void oneHotCrossEntropy(Matrix& output, IVector& label); - void oneHotCrossEntropyBp(Matrix& outputV, IVector& label); - void oneHotCrossEntropyWithSelfNorm(Matrix& output, - IVector& label, - real alpha); - void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, - IVector& label, - real alpha); - - void circularConv(Matrix& b, Matrix& c); - void circularConvDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2); - - void softmax(Matrix& output); - void sequenceSoftmax(Matrix& output, const IVector& index); - void softmaxDerivative(Matrix& output, Matrix& sftmaxSum); - - /// calculate the sum of squares diff cost. - void sumOfSquares(Matrix& output, Matrix& label); - - /// gradient of sumOfSquares. - void sumOfSquaresBp(Matrix& outputV, Matrix& label); - - void tanh(Matrix& output); - void tanhDerivative(Matrix& output); - - void softrelu(Matrix& output); - void softreluDerivative(Matrix& output); - void scaledTanh(Matrix& output, real p1, real p2); - - void cosSim(Matrix& output1, Matrix& output2, real scale); - void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale); - - void print(std::ostream& os) const; - void print(std::ostream& os, size_t height, size_t width) const; - void printOneRow(std::ostream& os, size_t idx) const; - - void paramReluForward(Matrix& data, Matrix& W); - void paramReluBackwardW(Matrix& oGrad, Matrix& data); - void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W); - - void check(std::ostream& os, Matrix& refMat, bool printDiff = true); - - real getMin(); - real getMax(); - - void randomizeUniform(); - - void classificationError(Matrix& output, IVector& label); - - void addByBitCode(size_t numClasses, const IVector& codes, const Matrix& vec); - - void addByBitCodeBackward(size_t numClasses, - const IVector& codes, - Matrix& vec); - - void mulByBitCode(size_t numClasses, - const IVector& codes, - const Matrix& mat, - const Matrix& input); - - void mulByBitCodeBackwardWeight(size_t numClasses, - const IVector& codes, - Matrix& mat, - const Matrix& input); - - void mulByBitCodeBackwardError(size_t numClasses, - const IVector& codes, - const Matrix& mat, - Matrix& input); - - void sumByBitCode(size_t numClasses, - IVector& codes, - Matrix& sum, - real scaleSum); - - void subByBitCode(size_t numClasses_, IVector& codes); - - void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); - void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); - void classificationErrorMulti(Matrix& output, Matrix& label, real threshold); - - void bilinearForward(const Matrix& in, - const size_t inImgH, - const size_t inImgW, - const size_t outImgH, - const size_t outImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - void bilinearBackward(const Matrix& out, - const size_t outImgH, - const size_t outImgW, - const size_t inImgH, - const size_t inImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - template - void operator=(const ExpressionType& expr) { - TensorCpuApply(*this, expr); - } -}; - -class SharedCpuMatrix : public CpuMatrix { -public: - /* blockNum is number of partitions of the matrix */ - SharedCpuMatrix(int blockNum, size_t height, size_t width, bool trans = false) - : CpuMatrix(height, width, trans) { - initShared(blockNum); - } - SharedCpuMatrix( - int blockNum, real* data, size_t height, size_t width, bool trans = false) - : CpuMatrix(data, height, width, trans) { - initShared(blockNum); - } - - SharedCpuMatrix(int blockNum, - CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : CpuMatrix(dataHandle, height, width, trans) { - initShared(blockNum); - } - - SharedCpuMatrix(CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : CpuMatrix(dataHandle, height, width, trans) { - initBlock(1); - } - - ~SharedCpuMatrix() {} - -public: - virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - virtual void add(Matrix& b, real p1, real p2); - virtual void add(real p1, real p2); - -private: - using Matrix::mul; - void initShared(int blockNum); - void initBlock(int blockNum); - - int blockNum_; - std::vector> blockLocks_; - ThreadLocal localBuf_; - ThreadLocal> localBufRows_; - ThreadLocal> blockSeq_; -}; - -typedef struct { unsigned int col; } sparse_non_value_t; - -typedef struct { - unsigned int col; - float value; -} sparse_float_value_t; - -} // namespace paddle -#include "ExecViaCpu.h"