diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu index 5435808fb7f70fdf1ac98815f7fe8890fb85527c..ba2b47d6cc6961a380b7db2781b4d214dea829db 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/math/BaseMatrix.cu @@ -12,21 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include #include +#include +#include #include "BaseMatrix.h" -#include "MathFunctions.h" -#include "SIMDFunctions.h" -#include "hl_matrix_apply.cuh" -#include "hl_matrix_base.cuh" #include "hl_matrix_ops.cuh" +#include "hl_matrix_base.cuh" +#include "hl_matrix_apply.cuh" +#include "SIMDFunctions.h" +#include "MathFunctions.h" namespace paddle { const char* SPARSE_SUPPORT_ERROR = "Sparse Matrix/Vector is not supported."; -template +template template int BaseMatrixT::applyUnary(Op op) { MatrixOffset offset(0, 0); @@ -34,11 +34,9 @@ int BaseMatrixT::applyUnary(Op op) { return 0; } -template +template template -int BaseMatrixT::applyUnary(Op op, - int numRows, - int numCols, +int BaseMatrixT::applyUnary(Op op, int numRows, int numCols, MatrixOffset& offset) { CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; int dimM = numRows; @@ -58,7 +56,7 @@ int BaseMatrixT::applyUnary(Op op, return 0; } -template +template template int BaseMatrixT::applyBinary(Op op, BaseMatrixT& b) { CHECK(height_ == b.height_ && width_ == b.width_) @@ -69,23 +67,18 @@ int BaseMatrixT::applyBinary(Op op, BaseMatrixT& b) { return 0; } -template +template template -int BaseMatrixT::applyBinary( - Op op, BaseMatrixT& b, int numRows, int numCols, MatrixOffset& offset) { +int BaseMatrixT::applyBinary(Op op, BaseMatrixT& b, int numRows, int numCols, + MatrixOffset& offset) { applyBinary(op, b, numRows, numCols, offset, false_type(), false_type()); return 0; } -template +template template -int BaseMatrixT::applyBinary(Op op, - BaseMatrixT& b, - int numRows, - int numCols, - MatrixOffset& offset, - bAsRowVector, - bAsColVector) { +int BaseMatrixT::applyBinary(Op op, BaseMatrixT& b, int numRows, int numCols, + MatrixOffset& offset, bAsRowVector, bAsColVector) { CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(useGpu_ == b.useGpu_) << "Matrix type mismatch"; @@ -98,8 +91,8 @@ int BaseMatrixT::applyBinary(Op op, T* A = data_; T* B = b.data_; CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS( - B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); + CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_, + offset.bRow_); CHECK_LE(dimM + offset.aRow_, this->height_); CHECK_LE(dimN + offset.aCol_, this->width_); if (!bAsRowVector::value && !bAsColVector::value) { @@ -122,7 +115,7 @@ int BaseMatrixT::applyBinary(Op op, return 0; } -template +template template int BaseMatrixT::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c) { CHECK_EQ(height_, b.height_); @@ -136,29 +129,21 @@ int BaseMatrixT::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c) { return 0; } -template +template template -int BaseMatrixT::applyTernary(Op op, - BaseMatrixT& b, - BaseMatrixT& c, - int numRows, - int numCols, +int BaseMatrixT::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c, + int numRows, int numCols, MatrixOffset& offset) { applyTernary(op, b, c, numRows, numCols, offset, false_type(), false_type()); return 0; } -template +template template -int BaseMatrixT::applyTernary(Op op, - BaseMatrixT& b, - BaseMatrixT& c, - int numRows, - int numCols, - MatrixOffset& offset, - cAsRowVector, - cAsColVector) { +int BaseMatrixT::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c, + int numRows, int numCols, MatrixOffset& offset, + cAsRowVector, cAsColVector) { CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!c.isSparse()) << SPARSE_SUPPORT_ERROR; @@ -175,10 +160,10 @@ int BaseMatrixT::applyTernary(Op op, T* B = b.data_; T* C = c.data_; CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS( - B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); - CAL_MATRIX_START_ADDRESS( - C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); + CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_, + offset.bRow_); + CAL_MATRIX_START_ADDRESS(C, c.height_, c.width_, ldc, offset.cCol_, + offset.cRow_); CHECK_LE(dimM + offset.aRow_, this->height_); CHECK_LE(dimN + offset.aCol_, this->width_); @@ -195,21 +180,21 @@ int BaseMatrixT::applyTernary(Op op, } if (true == useGpu_) { - hl_gpu_apply_ternary_op( + hl_gpu_apply_ternary_op + ( op, A, B, C, dimM, dimN, lda, ldb, ldc); } else { - hl_cpu_apply_ternary_op( + hl_cpu_apply_ternary_op + ( op, A, B, C, dimM, dimN, lda, ldb, ldc); } return 0; } -template +template template -int BaseMatrixT::applyQuaternary(Op op, - BaseMatrixT& b, - BaseMatrixT& c, +int BaseMatrixT::applyQuaternary(Op op, BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d) { CHECK_EQ(height_, b.height_); CHECK_EQ(width_, b.width_); @@ -224,14 +209,10 @@ int BaseMatrixT::applyQuaternary(Op op, return 0; } -template +template template -int BaseMatrixT::applyQuaternary(Op op, - BaseMatrixT& b, - BaseMatrixT& c, - BaseMatrixT& d, - int numRows, - int numCols, +int BaseMatrixT::applyQuaternary(Op op, BaseMatrixT& b, BaseMatrixT& c, + BaseMatrixT& d, int numRows, int numCols, MatrixOffset& offset) { CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; @@ -253,12 +234,12 @@ int BaseMatrixT::applyQuaternary(Op op, T* C = c.data_; T* D = d.data_; CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS( - B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); - CAL_MATRIX_START_ADDRESS( - C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); - CAL_MATRIX_START_ADDRESS( - D, d.height_, d.width_, ldd, offset.dCol_, offset.dRow_); + CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_, + offset.bRow_); + CAL_MATRIX_START_ADDRESS(C, c.height_, c.width_, ldc, offset.cCol_, + offset.cRow_); + CAL_MATRIX_START_ADDRESS(D, d.height_, d.width_, ldd, offset.dCol_, + offset.dRow_); CHECK_LE(dimM + offset.aRow_, this->height_); CHECK_LE(dimN + offset.aCol_, this->width_); @@ -269,29 +250,22 @@ int BaseMatrixT::applyQuaternary(Op op, CHECK_LE(dimM + offset.dRow_, d.height_); CHECK_LE(dimN + offset.dCol_, d.width_); if (true == useGpu_) { - hl_gpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, ldc, ldd); + hl_gpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, + ldc, ldd); } else { - hl_cpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, ldc, ldd); + hl_cpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, + ldc, ldd); } return 0; } -template -template +template -int BaseMatrixT::aggregate(Agg agg, - Op op, - Saver sv, - BaseMatrixT& b, - int numRows, - int numCols, - MatrixOffset& offset, - aAsRowVector, - aAsColVector) { +int BaseMatrixT::aggregate(Agg agg, Op op, Saver sv, BaseMatrixT& b, + int numRows, int numCols, MatrixOffset& offset, + aAsRowVector, aAsColVector) { CHECK_EQ(useGpu_, b.useGpu_); int ld = stride_; @@ -299,10 +273,10 @@ int BaseMatrixT::aggregate(Agg agg, T* dst = data_; T* B = b.data_; - CAL_MATRIX_START_ADDRESS( - dst, height_, width_, ld, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS( - B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); + CAL_MATRIX_START_ADDRESS(dst, height_, width_, ld, offset.aCol_, + offset.aRow_); + CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_, + offset.bRow_); if (aAsRowVector::value && !aAsColVector::value) { if (useGpu_) { @@ -323,21 +297,12 @@ int BaseMatrixT::aggregate(Agg agg, return 0; } -template -template +template -int BaseMatrixT::aggregate(Agg agg, - Op op, - Saver sv, - BaseMatrixT& b, - BaseMatrixT& c, - int numRows, - int numCols, - MatrixOffset& offset, - aAsRowVector, +int BaseMatrixT::aggregate(Agg agg, Op op, Saver sv, BaseMatrixT& b, + BaseMatrixT& c, int numRows, int numCols, + MatrixOffset& offset, aAsRowVector, aAsColVector) { CHECK_EQ(useGpu_, b.useGpu_); CHECK_EQ(useGpu_, c.useGpu_); @@ -349,28 +314,28 @@ int BaseMatrixT::aggregate(Agg agg, T* dst = data_; T* B = b.data_; T* C = c.data_; - CAL_MATRIX_START_ADDRESS( - dst, height_, width_, ld, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS( - B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); - CAL_MATRIX_START_ADDRESS( - C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); + CAL_MATRIX_START_ADDRESS(dst, height_, width_, ld, offset.aCol_, + offset.aRow_); + CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_, + offset.bRow_); + CAL_MATRIX_START_ADDRESS(C, c.height_, c.width_, ldc, offset.cCol_, + offset.cRow_); if (aAsRowVector::value && !aAsColVector::value) { if (useGpu_) { - hl_gpu_matrix_column_op( - agg, op, sv, numRows, numCols, dst, B, ldb, C, ldc); + hl_gpu_matrix_column_op(agg, op, sv, numRows, numCols, dst, B, + ldb, C, ldc); } else { - hl_cpu_matrix_column_op( - agg, op, sv, numRows, numCols, dst, B, ldb, C, ldc); + hl_cpu_matrix_column_op(agg, op, sv, numRows, numCols, dst, B, + ldb, C, ldc); } } else if (!aAsRowVector::value && aAsColVector::value) { if (useGpu_) { - hl_gpu_matrix_row_op( - agg, op, sv, numRows, numCols, dst, ld, B, ldb, C, ldc); + hl_gpu_matrix_row_op(agg, op, sv, numRows, numCols, dst, ld, B, + ldb, C, ldc); } else { - hl_cpu_matrix_row_op( - agg, op, sv, numRows, numCols, dst, ld, B, ldb, C, ldc); + hl_cpu_matrix_row_op(agg, op, sv, numRows, numCols, dst, ld, B, + ldb, C, ldc); } } else { LOG(FATAL) << "not supported"; @@ -385,19 +350,15 @@ int BaseMatrixT::aggregate(Agg agg, */ DEFINE_MATRIX_UNARY_OP(Neg, a = -a); -template -void BaseMatrixT::neg() { - applyUnary(unary::Neg()); -} +template +void BaseMatrixT::neg() { applyUnary(unary::Neg()); } DEFINE_MATRIX_UNARY_OP(Exp, a = exp(a)); -template <> -void BaseMatrixT::exp2() { - applyUnary(unary::Exp()); -} +template<> +void BaseMatrixT::exp2() { applyUnary(unary::Exp()); } DEFINE_MATRIX_UNARY_OP(Log, a = log(a)); -template <> +template<> void BaseMatrixT::log2() { if (useGpu_) { applyUnary(unary::Log()); @@ -407,42 +368,30 @@ void BaseMatrixT::log2() { } DEFINE_MATRIX_UNARY_OP(Sqrt, a = sqrt(a)); -template <> -void BaseMatrixT::sqrt2() { - applyUnary(unary::Sqrt()); -} +template<> +void BaseMatrixT::sqrt2() { applyUnary(unary::Sqrt()); } DEFINE_MATRIX_UNARY_OP(Square, a = a * a); -template -void BaseMatrixT::square2() { - applyUnary(unary::Square()); -} +template +void BaseMatrixT::square2() { applyUnary(unary::Square()); } DEFINE_MATRIX_UNARY_OP(Reciprocal, a = 1.0f / a); -template -void BaseMatrixT::reciprocal2() { - applyUnary(unary::Reciprocal()); -} +template +void BaseMatrixT::reciprocal2() { applyUnary(unary::Reciprocal()); } DEFINE_MATRIX_UNARY_OP(Abs, a = a > 0 ? a : -a); -template -void BaseMatrixT::abs2() { - applyUnary(unary::Abs()); -} +template +void BaseMatrixT::abs2() { applyUnary(unary::Abs()); } DEFINE_MATRIX_UNARY_OP(Sign, a = (a > 0) - (a < 0)); -template -void BaseMatrixT::sign2() { - applyUnary(unary::Sign()); -} +template +void BaseMatrixT::sign2() { applyUnary(unary::Sign()); } DEFINE_MATRIX_UNARY_OP(Zero, a = 0); -template -void BaseMatrixT::zero() { - applyUnary(unary::Zero()); -} +template +void BaseMatrixT::zero() { applyUnary(unary::Zero()); } -template +template void BaseMatrixT::zeroAtOffset(int64_t columnOffset, int64_t numColumns) { int numRows = height_; int numCols = numColumns; @@ -451,13 +400,11 @@ void BaseMatrixT::zeroAtOffset(int64_t columnOffset, int64_t numColumns) { } DEFINE_MATRIX_UNARY_OP(One, a = 1); -template -void BaseMatrixT::one() { - applyUnary(unary::One()); -} +template +void BaseMatrixT::one() { applyUnary(unary::One()); } DEFINE_MATRIX_UNARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(a, p)); -template <> +template<> void BaseMatrixT::pow2(real p) { if (useGpu_) { applyUnary(unary::Pow(p)); @@ -467,67 +414,51 @@ void BaseMatrixT::pow2(real p) { } DEFINE_MATRIX_UNARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a -= p); -template -void BaseMatrixT::subScalar(T p) { - applyUnary(unary::SubScalar(p)); -} +template +void BaseMatrixT::subScalar(T p) { applyUnary(unary::SubScalar(p)); } DEFINE_MATRIX_UNARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a *= p); -template -void BaseMatrixT::mulScalar(T p) { - applyUnary(unary::MulScalar(p)); -} +template +void BaseMatrixT::mulScalar(T p) { applyUnary(unary::MulScalar(p)); } DEFINE_MATRIX_UNARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a /= p); -template -void BaseMatrixT::divScalar(T p) { - applyUnary(unary::DivScalar(p)); -} +template +void BaseMatrixT::divScalar(T p) { applyUnary(unary::DivScalar(p)); } DEFINE_MATRIX_UNARY_PARAMETER_OP(Assign, ONE_PARAMETER, a = p); -template -void BaseMatrixT::assign(T p) { - applyUnary(unary::Assign(p)); -} +template +void BaseMatrixT::assign(T p) { applyUnary(unary::Assign(p)); } DEFINE_MATRIX_UNARY_PARAMETER_OP(Add, ONE_PARAMETER, a += p); -template -void BaseMatrixT::add(T p) { - applyUnary(unary::Add(p)); -} +template +void BaseMatrixT::add(T p) { applyUnary(unary::Add(p)); } DEFINE_MATRIX_UNARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = a * p1 + p2); -template -void BaseMatrixT::add(T p1, T p2) { - applyUnary(unary::Add2(p1, p2)); -} +template +void BaseMatrixT::add(T p1, T p2) { applyUnary(unary::Add2(p1, p2)); } -DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, - TWO_PARAMETER, +DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, TWO_PARAMETER, a = a < p1 ? p1 : (a > p2 ? p2 : a)); -template -void BaseMatrixT::clip(T p1, T p2) { - applyUnary(unary::Clip(p1, p2)); -} +template +void BaseMatrixT::clip(T p1, T p2) { applyUnary(unary::Clip(p1, p2)); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative, - TWO_PARAMETER, - a = b < p1 ? 0 : (b > p2 ? 0 : 1)); -template +DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative, TWO_PARAMETER, + a = b < p1 ? 0 : (b > p2 ? 0 : 1)); +template void BaseMatrixT::clipDerivative(BaseMatrixT& b, T p1, T p2) { applyBinary(binary::ClipDerivative(p1, p2), b); } -DEFINE_MATRIX_UNARY_PARAMETER_OP(BiggerThanScalar, - ONE_PARAMETER, +DEFINE_MATRIX_UNARY_PARAMETER_OP(BiggerThanScalar, ONE_PARAMETER, a = a > p ? 1.0f : 0.0f); -template +template void BaseMatrixT::biggerThanScalar(T p) { applyUnary(unary::BiggerThanScalar(p)); } -DEFINE_MATRIX_UNARY_PARAMETER_OP(DownClip, ONE_PARAMETER, a = a > p ? a : p); -template +DEFINE_MATRIX_UNARY_PARAMETER_OP(DownClip, ONE_PARAMETER, + a = a > p ? a : p); +template void BaseMatrixT::downClip(T p) { applyUnary(unary::DownClip(p)); } @@ -538,12 +469,12 @@ void BaseMatrixT::downClip(T p) { */ DEFINE_MATRIX_BINARY_OP(Add, a += b); -template +template void BaseMatrixT::add(BaseMatrixT& b) { applyBinary(binary::Add(), b); } -template <> +template<> void BaseMatrixT::add(BaseMatrixT& b) { if (useGpu_) { applyBinary(binary::Add(), b); @@ -554,7 +485,7 @@ void BaseMatrixT::add(BaseMatrixT& b) { } } -template +template void BaseMatrixT::addAtOffset(BaseMatrixT& b, int64_t columnOffset) { if (columnOffset + b.width_ <= width_) { int numRows = height_; @@ -573,53 +504,43 @@ void BaseMatrixT::addAtOffset(BaseMatrixT& b, int64_t columnOffset) { } } -template +template void BaseMatrixT::addP2P(BaseMatrixT& b) { T* A = data_; T* B = b.data_; int dimM = height_; int dimN = width_; - hl_gpu_apply_binary_op, 0, 0>( - binary::Add(), A, B, dimM, dimN, dimN, dimN); + hl_gpu_apply_binary_op, 0, 0> + (binary::Add(), A, B, dimM, dimN, dimN, dimN); } -template +template void BaseMatrixT::addColVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::Add(), - b, - numRows, - numCols, - offset, - false_type(), + applyBinary(binary::Add(), b, numRows, numCols, offset, false_type(), true_type() /* bAsColVector */); } -template +template void BaseMatrixT::addRowVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::Add(), - b, - numRows, - numCols, - offset, - true_type() /* bAsRowVector */, - false_type()); + applyBinary(binary::Add(), b, numRows, numCols, offset, + true_type() /* bAsRowVector */, false_type()); } DEFINE_MATRIX_BINARY_PARAMETER_OP(Add1, ONE_PARAMETER, a += b * p); -template +template void BaseMatrixT::add(BaseMatrixT& b, T p) { applyBinary(binary::Add1(p), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(b, p)); -template <> +template<> void BaseMatrixT::pow2(BaseMatrixT& b, real p) { if (useGpu_) { applyBinary(binary::Pow(p), b); @@ -629,45 +550,36 @@ void BaseMatrixT::pow2(BaseMatrixT& b, real p) { } DEFINE_MATRIX_BINARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = p1 * a + p2 * b); -template +template void BaseMatrixT::add(BaseMatrixT& b, T p1, T p2) { applyBinary(binary::Add2(p1, p2), b); } -template +template void BaseMatrixT::addBias(BaseMatrixT& b, T scale) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::Add1(scale), - b, - numRows, - numCols, - offset, - true_type() /* bAsRowVector */, - false_type()); + applyBinary(binary::Add1(scale), b, numRows, numCols, offset, + true_type() /* bAsRowVector */, false_type()); } DEFINE_MATRIX_BINARY_OP(Sub, a -= b); -template -void BaseMatrixT::sub(BaseMatrixT& b) { - applyBinary(binary::Sub(), b); -} +template +void BaseMatrixT::sub(BaseMatrixT& b) { applyBinary(binary::Sub(), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(Sub1, ONE_PARAMETER, a -= b * p); -template +template void BaseMatrixT::sub(BaseMatrixT& b, T p) { applyBinary(binary::Sub1(p), b); } DEFINE_MATRIX_BINARY_OP(Relu, b = a > 0.0f ? a : 0.0f); -template -void BaseMatrixT::relu(BaseMatrixT& b) { - applyBinary(binary::Relu(), b); -} +template +void BaseMatrixT::relu(BaseMatrixT& b) { applyBinary(binary::Relu(), b); } DEFINE_MATRIX_BINARY_OP(ReluDerivative, a *= (b > 0.0f ? 1.0f : 0.0f)); -template +template void BaseMatrixT::reluDerivative(BaseMatrixT& b) { applyBinary(binary::ReluDerivative(), b); } @@ -677,7 +589,7 @@ DEFINE_MATRIX_BINARY_OP(Softrelu, const T THRESHOLD = 40.0; ? THRESHOLD : ((a < -THRESHOLD) ? (-THRESHOLD) : a)))); -template <> +template<> void BaseMatrixT::softrelu(BaseMatrixT& b) { applyBinary(binary::Softrelu(), b); } @@ -687,100 +599,97 @@ DEFINE_MATRIX_BINARY_OP( a *= (1.0 - exp(-1.0 * ((b > THRESHOLD) ? THRESHOLD : ((b < -THRESHOLD) ? (-THRESHOLD) : b))))); -template <> +template<> void BaseMatrixT::softreluDerivative(BaseMatrixT& b) { applyBinary(binary::SoftreluDerivative(), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(Brelu, TWO_PARAMETER, b = a > p1 ? a : p1; b = b < p2 ? b : p2); -template +template void BaseMatrixT::brelu(BaseMatrixT& b) { - int p1 = 0, p2 = 24; //! TODO(yuyang18): Make p1,p2 configuable. + int p1 = 0, p2 = 24; //! TODO(yuyang18): Make p1,p2 configuable. applyBinary(binary::Brelu(p1, p2), b); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(BreluDerivative, - TWO_PARAMETER, +DEFINE_MATRIX_BINARY_PARAMETER_OP(BreluDerivative, TWO_PARAMETER, a *= (b > p1 && b < p2) ? 1.0 : 0.0); -template +template void BaseMatrixT::breluDerivative(BaseMatrixT& b) { int p1 = 0, p2 = 24; applyBinary(binary::BreluDerivative(p1, p2), b); } DEFINE_MATRIX_BINARY_OP(Square, b = a * a); -template +template void BaseMatrixT::square2(BaseMatrixT& b) { applyBinary(binary::Square(), b); } DEFINE_MATRIX_BINARY_OP(SquareDerivative, a *= 2.0 * b); -template +template void BaseMatrixT::squareDerivative(BaseMatrixT& b) { applyBinary(binary::SquareDerivative(), b); } -DEFINE_MATRIX_BINARY_OP(Tanh, T tmp = -2.0 * a; - tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; - b = 2.0 / (1.0 + std::exp(tmp)) - 1.0); -template <> +DEFINE_MATRIX_BINARY_OP(Tanh, + T tmp = -2.0 * a; + tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; + b = 2.0 / (1.0 + std::exp(tmp)) - 1.0); +template<> void BaseMatrixT::tanh(BaseMatrixT& b) { applyBinary(binary::Tanh(), b); } DEFINE_MATRIX_BINARY_OP(TanhDerivative, a *= 1 - b * b); -template +template void BaseMatrixT::tanhDerivative(BaseMatrixT& b) { applyBinary(binary::TanhDerivative(), b); } -DEFINE_MATRIX_BINARY_PARAMETER_OP( - ScaledTanh, TWO_PARAMETER, b = p1 * (2.0 / (1.0 + exp(-2 * p2 * a)) - 1.0)); -template <> +DEFINE_MATRIX_BINARY_PARAMETER_OP(ScaledTanh, TWO_PARAMETER, + b = p1 * + (2.0 / (1.0 + exp(-2 * p2 * a)) - 1.0)); +template<> void BaseMatrixT::scaledTanh(BaseMatrixT& b, real p1, real p2) { applyBinary(binary::ScaledTanh(p1, p2), b); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(ScaledTanhDerivative, - TWO_PARAMETER, +DEFINE_MATRIX_BINARY_PARAMETER_OP(ScaledTanhDerivative, TWO_PARAMETER, a *= p2 * (p1 - b * b)); -template +template void BaseMatrixT::scaledTanhDerivative(BaseMatrixT& b, T p1, T p2) { applyBinary(binary::ScaledTanhDerivative(p1 * p1, p2 / p1), b); } DEFINE_MATRIX_BINARY_OP(Reciprocal, b = 1.0f / a); -template +template void BaseMatrixT::reciprocal2(BaseMatrixT& b) { applyBinary(binary::Reciprocal(), b); } DEFINE_MATRIX_BINARY_OP(ReciprocalDerivative, a *= -b * b); -template +template void BaseMatrixT::reciprocalDerivative(BaseMatrixT& b) { applyBinary(binary::ReciprocalDerivative(), b); } DEFINE_MATRIX_BINARY_OP(Abs, b = a > 0.0f ? a : -a); -template -void BaseMatrixT::abs2(BaseMatrixT& b) { - applyBinary(binary::Abs(), b); -} +template +void BaseMatrixT::abs2(BaseMatrixT& b) { applyBinary(binary::Abs(), b); } DEFINE_MATRIX_BINARY_OP(AbsDerivative, a = (b > 0) ? a : (b < 0) ? -a : 0); -template +template void BaseMatrixT::absDerivative(BaseMatrixT& b) { applyBinary(binary::AbsDerivative(), b); } -DEFINE_MATRIX_BINARY_OP(Sigmoid, const T THRESHOLD_MIN = -40.0; - const T THRESHOLD_MAX = 13.0; - T tmp = (a < THRESHOLD_MIN) - ? THRESHOLD_MIN - : ((a > THRESHOLD_MAX) ? THRESHOLD_MAX : a); - b = 1.0f / (1.0f + exp(-tmp))); -template <> +DEFINE_MATRIX_BINARY_OP( + Sigmoid, const T THRESHOLD_MIN = -40.0; const T THRESHOLD_MAX = 13.0; + T tmp = (a < THRESHOLD_MIN) ? THRESHOLD_MIN + : ((a > THRESHOLD_MAX) ? THRESHOLD_MAX : a); + b = 1.0f / (1.0f + exp(-tmp))); +template<> void BaseMatrixT::sigmoid(BaseMatrixT& b) { if (useGpu_) { applyBinary(binary::Sigmoid(), b); @@ -814,31 +723,31 @@ void BaseMatrixT::sigmoid(BaseMatrixT& b) { } DEFINE_MATRIX_BINARY_OP(SigmoidDerivative, a *= b * (1 - b)); -template +template void BaseMatrixT::sigmoidDerivative(BaseMatrixT& b) { applyBinary(binary::SigmoidDerivative(), b); } DEFINE_MATRIX_BINARY_OP(ExpDerivative, a *= b); -template +template void BaseMatrixT::expDerivative(BaseMatrixT& b) { applyBinary(binary::ExpDerivative(), b); } DEFINE_MATRIX_BINARY_OP(Sign, b = a > 0.0f ? 1.0f : -1.0f); -template +template void BaseMatrixT::sign2(BaseMatrixT& b) { applyBinary(binary::Sign(), b); } DEFINE_MATRIX_BINARY_OP(Exp, a = exp(b)); -template <> +template<> void BaseMatrixT::exp2(BaseMatrixT& b) { applyBinary(binary::Exp(), b); } DEFINE_MATRIX_BINARY_OP(Log, a = log(b)); -template <> +template<> void BaseMatrixT::log2(BaseMatrixT& b) { if (useGpu_) { applyBinary(binary::Log(), b); @@ -848,13 +757,13 @@ void BaseMatrixT::log2(BaseMatrixT& b) { } DEFINE_MATRIX_BINARY_OP(Sqrt, a = sqrt(b)); -template <> +template<> void BaseMatrixT::sqrt2(BaseMatrixT& b) { applyBinary(binary::Sqrt(), b); } DEFINE_MATRIX_BINARY_OP(InvSqrt, a = 1.0f / sqrt(b)); -template <> +template<> void BaseMatrixT::invSqrt(BaseMatrixT& b) { if (useGpu_) { applyBinary(binary::InvSqrt(), b); @@ -866,37 +775,37 @@ void BaseMatrixT::invSqrt(BaseMatrixT& b) { } DEFINE_MATRIX_BINARY_PARAMETER_OP(IsEqual, ONE_PARAMETER, a = (b == p)); -template +template void BaseMatrixT::isEqualTo(BaseMatrixT& b, T value) { applyBinary(binary::IsEqual(value), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(AddScalar, ONE_PARAMETER, a = b + p); -template +template void BaseMatrixT::addScalar(BaseMatrixT& b, T p) { applyBinary(binary::AddScalar(p), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a = b - p); -template +template void BaseMatrixT::subScalar(BaseMatrixT& b, T p) { applyBinary(binary::SubScalar(p), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a = b * p); -template +template void BaseMatrixT::mulScalar(BaseMatrixT& b, T p) { applyBinary(binary::MulScalar(p), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a = b / p); -template +template void BaseMatrixT::divScalar(BaseMatrixT& b, T p) { applyBinary(binary::DivScalar(p), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(ScalarDiv, ONE_PARAMETER, a = p / b); -template +template void BaseMatrixT::scalarDiv(BaseMatrixT& b, T p) { applyBinary(binary::ScalarDiv(p), b); } @@ -908,20 +817,20 @@ void BaseMatrixT::scalarDiv(BaseMatrixT& b, T p) { DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropy, a = -c * log(b) - (1 - c) * log(1 - b)); -template <> +template<> void BaseMatrixT::softCrossEntropy(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::SoftCrossEntropy(), b, c); } DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropyBp, a += (b - c) / (b * (1 - b))); -template +template void BaseMatrixT::softCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::SoftCrossEntropyBp(), b, c); } DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropy, a = c > 0.5 ? -log(b) : -log(1.0 - b)); -template <> +template<> void BaseMatrixT::binaryLabelCrossEntropy(BaseMatrixT& b, BaseMatrixT& c) { if (useGpu_) { @@ -949,73 +858,70 @@ void BaseMatrixT::binaryLabelCrossEntropy(BaseMatrixT& b, DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropyBp, a += c > 0.5 ? -1.0 / b : 1.0 / (1.0 - b)); -template +template void BaseMatrixT::binaryLabelCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::BinaryCrossEntropyBp(), b, c); } DEFINE_MATRIX_TERNARY_OP(Add, a = b + c); -template +template void BaseMatrixT::add(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::Add(), b, c); } DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add1, TWO_PARAMETER, a = p1 * b + p2 * c); -template +template void BaseMatrixT::add(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) { applyTernary(ternary::Add1(p1, p2), b, c); } DEFINE_MATRIX_TERNARY_OP(Sub, a = b - c); -template +template void BaseMatrixT::sub(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::Sub(), b, c); } DEFINE_MATRIX_TERNARY_PARAMETER_OP(Sub1, TWO_PARAMETER, a = p1 * b - p2 * c); -template +template void BaseMatrixT::sub(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) { applyTernary(ternary::Sub1(p1, p2), b, c); } DEFINE_MATRIX_TERNARY_OP(Add2, a = a + b + c); -template +template void BaseMatrixT::add2(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::Add2(), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add3, - THREE_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add3, THREE_PARAMETER, a = p1 * a + p2 * b + p3 * c); -template +template void BaseMatrixT::add2(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3) { applyTernary(ternary::Add3(p1, p2, p3), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(SgdUpdate, - THREE_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(SgdUpdate, THREE_PARAMETER, c = p2 * c - p1 * (b + p3 * a); a = a + c); -template +template void BaseMatrixT::sgdUpdate(BaseMatrixT& b, // grad BaseMatrixT& c, // mom - T p1, // learningRate, - T p2, // momentum, - T p3) { // decayRate + T p1, // learningRate, + T p2, // momentum, + T p3) { // decayRate applyTernary(ternary::SgdUpdate(p1, p2, p3), b, c); } -DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(SgdUpdate, - THREE_PARAMETER, +DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(SgdUpdate, THREE_PARAMETER, c = p2 * c - p1 * d * (b + p3 * a); a += c); -template +template void BaseMatrixT::sgdUpdate(BaseMatrixT& b, // grad, BaseMatrixT& c, // mom, BaseMatrixT& d, // lr, - T p1, // learningRate, - T p2, // momentum, - T p3) { // decayRate + T p1, // learningRate, + T p2, // momentum, + T p3) { // decayRate applyQuaternary(quaternary::SgdUpdate(p1, p2, p3), b, c, d); } @@ -1023,22 +929,19 @@ DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p * b; a = (a > lambda) ? (a - lambda) : (a < -lambda) ? (a + lambda) : 0); -template +template void BaseMatrixT::applyL1(BaseMatrixT& lr, T learningRate, T decayRate) { applyBinary(binary::ApplyL1(learningRate * decayRate), lr); } -template <> +template<> void BaseMatrixT::applyL1(BaseMatrixT& lr, real learningRate, real decayRate) { if (useGpu_) { applyBinary(binary::ApplyL1(learningRate * decayRate), lr); } else { - simd::decayL1(this->data_, - this->data_, - lr.data_, - learningRate * decayRate, + simd::decayL1(this->data_, this->data_, lr.data_, learningRate * decayRate, height_ * width_); } } @@ -1047,25 +950,24 @@ DEFINE_MATRIX_UNARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p; a = (a > lambda) ? (a - lambda) : (a < -lambda) ? (a + lambda) : 0); -template +template void BaseMatrixT::applyL1(T learningRate, T decayRate) { applyUnary(unary::ApplyL1(learningRate * decayRate)); } -template <> +template<> void BaseMatrixT::applyL1(real learningRate, real decayRate) { if (useGpu_) { applyUnary(unary::ApplyL1(learningRate * decayRate)); } else { - simd::decayL1( - this->data_, this->data_, learningRate * decayRate, height_ * width_); + simd::decayL1(this->data_, this->data_, learningRate * decayRate, + height_ * width_); } } -DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL2, - ONE_PARAMETER, +DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL2, ONE_PARAMETER, a *= (1.0f / (1.0f + p * b))); -template +template void BaseMatrixT::applyL2(BaseMatrixT& lr, T learningRate, T decayRate) { if (useGpu_) { applyBinary(binary::ApplyL2(learningRate * decayRate), lr); @@ -1078,33 +980,32 @@ void BaseMatrixT::applyL2(BaseMatrixT& lr, T learningRate, T decayRate) { } } -template +template void BaseMatrixT::applyL2(T learningRate, T decayRate) { BaseMatrixT::mulScalar(1.0f / (1.0f + learningRate * decayRate)); } DEFINE_MATRIX_BINARY_OP(DotMul, a *= b); -template +template void BaseMatrixT::dotMul(BaseMatrixT& b) { applyBinary(binary::DotMul(), b); } DEFINE_MATRIX_TERNARY_OP(DotMul, a = b * c); -template +template void BaseMatrixT::dotMul(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::DotMul(), b, c); } DEFINE_MATRIX_TERNARY_OP(DotDiv, a = (b == 0.0) ? 0.0 : b / c); -template +template void BaseMatrixT::dotDiv(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::DotDiv(), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotDiv2P, - TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotDiv2P, TWO_PARAMETER, a = (b + p1) / (c + p2)); -template +template void BaseMatrixT::dotDiv(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { applyTernary(ternary::DotDiv2P(p1, p2), b, c); } @@ -1114,7 +1015,7 @@ DEFINE_MATRIX_QUATERNARY_OP(RankLoss, const T THRESHOLD = 40.0; a = b - c; ? THRESHOLD : ((a < -THRESHOLD) ? (-THRESHOLD) : a); a = log(1 + exp(a)) - a * d); -template <> +template<> void BaseMatrixT::rankLoss(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d) { @@ -1125,9 +1026,8 @@ DEFINE_MATRIX_QUATERNARY_OP(RankLossBp, const T THRESHOLD = 40.0; a = b - c; a = (a > THRESHOLD) ? THRESHOLD : ((a < -THRESHOLD) ? (-THRESHOLD) : a); - a = exp(a); - a = (a / (1 + a) - d)); -template <> + a = exp(a); a = (a / (1 + a) - d)); +template<> void BaseMatrixT::rankLossBp(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d) { @@ -1140,7 +1040,7 @@ DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLoss, const T THRESHOLD = 40.0; ? -THRESHOLD : b; a = log(1 + exp(x)) - c * x); -template <> +template<> void BaseMatrixT::logisticRegressionLoss(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::LogisticRegressionLoss(), b, c); } @@ -1150,23 +1050,22 @@ DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLossBp, const T THRESHOLD = 40.0; T x = (b > THRESHOLD) ? THRESHOLD : (b < -THRESHOLD) ? -THRESHOLD : b; - x = exp(x); - a = x / (1 + x) - c); -template <> + x = exp(x); a = x / (1 + x) - c); +template<> void BaseMatrixT::logisticRegressionLossBp(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::LogisticRegressionLossBp(), b, c); } DEFINE_MATRIX_TERNARY_OP(BiggerThan, a = (b > c) ? 1.0f : 0.0f); -template +template void BaseMatrixT::biggerThan(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::BiggerThan(), b, c); } DEFINE_MATRIX_QUATERNARY_OP( BiggerThan, a = ((b > c && d > 0.5f) || (b < c && d < 0.5f)) ? 1.0f : 0.0f); -template +template void BaseMatrixT::biggerThan(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d) { @@ -1174,34 +1073,25 @@ void BaseMatrixT::biggerThan(BaseMatrixT& b, } DEFINE_MATRIX_TERNARY_OP(Max, a = (b > c) ? b : c); -template +template void BaseMatrixT::max2(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::Max(), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(BinaryClassificationError, - ONE_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(BinaryClassificationError, ONE_PARAMETER, c += ((a > p) == (b > p)) ? 0.0f : 1.0f); -template -void BaseMatrixT::binaryClassificationError2(size_t destCol, - BaseMatrixT& b, - BaseMatrixT& c, - T p) { +template +void BaseMatrixT::binaryClassificationError2(size_t destCol, BaseMatrixT& b, + BaseMatrixT& c, T p) { CHECK(!useGpu_) << "do not support gpu"; MatrixOffset offset(0, 0, 0, 0, destCol, 0); int numRows = b.height_; int numCols = b.width_; - b.applyTernary(ternary::BinaryClassificationError(p), - c, - *this, - numRows, - numCols, - offset, - false_type(), - true_type() /*cAsColVector*/); + b.applyTernary(ternary::BinaryClassificationError(p), c, *this, numRows, + numCols, offset, false_type(), true_type() /*cAsColVector*/); } -template <> +template<> void BaseMatrixT::binaryClassificationError(size_t destCol, BaseMatrixT& b, BaseMatrixT& c, @@ -1209,148 +1099,127 @@ void BaseMatrixT::binaryClassificationError(size_t destCol, MatrixOffset offset(destCol, 0, 0, 0, 0, 0); int numRows = b.height_; int numCols = b.width_; - aggregate(aggregate::sum(), - base::binary::classificationError(p), - base::binary::add(), - b, - c, - numRows, - numCols, - offset, - false_type(), + aggregate(aggregate::sum(), base::binary::classificationError(p), + base::binary::add(), b, c, numRows, numCols, offset, false_type(), true_type() /*aAsColVector*/); } -DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(Add3, - THREE_PARAMETER, +DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(Add3, THREE_PARAMETER, a = p1 * b + p2 * c + p3 * d); -template -void BaseMatrixT::add3( - BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d, T p1, T p2, T p3) { +template +void BaseMatrixT::add3(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d, T p1, + T p2, T p3) { applyQuaternary(quaternary::Add3(p1, p2, p3), b, c, d); } DEFINE_MATRIX_TERNARY_OP(DotMulSquare, a = b * c * c); -template +template void BaseMatrixT::dotMulSquare(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::DotMulSquare(), b, c); } DEFINE_MATRIX_TERNARY_OP(DotSquareSquare, a = b * b * c * c); -template +template void BaseMatrixT::dotSquareSquare(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::DotSquareSquare(), b, c); } DEFINE_MATRIX_BINARY_OP(DotMulSquare, a *= b * b); -template +template void BaseMatrixT::dotMulSquare(BaseMatrixT& b) { applyBinary(binary::DotMulSquare(), b); } DEFINE_MATRIX_BINARY_OP(DotSquareMul, a = a * a * b); -template +template void BaseMatrixT::dotSquareMul(BaseMatrixT& b) { applyBinary(binary::DotSquareMul(), b); } -DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(AddSquareSum, - THREE_PARAMETER, +DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(AddSquareSum, THREE_PARAMETER, T tmp = p1 * b + p2 * c + p3 * d; a += tmp * tmp); -template -void BaseMatrixT::addSquareSum( - BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT d, T p1, T p2, T p3) { +template +void BaseMatrixT::addSquareSum(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT d, + T p1, T p2, T p3) { applyQuaternary(quaternary::AddSquareSum(p1, p2, p3), b, c, d); } DEFINE_MATRIX_BINARY_PARAMETER_OP(AddSquare, ONE_PARAMETER, a += p * b * b); -template +template void BaseMatrixT::addSquare(BaseMatrixT& b, T p) { applyBinary(binary::AddSquare(p), b); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(DecayAddSquare, - TWO_PARAMETER, +DEFINE_MATRIX_BINARY_PARAMETER_OP(DecayAddSquare, TWO_PARAMETER, a = p1 * a + p2 * b * b); -template +template void BaseMatrixT::decayAddSquare(BaseMatrixT& b, T p1, T p2) { applyBinary(binary::DecayAddSquare(p1, p2), b); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DecayAddSquareMul, - TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(DecayAddSquareMul, TWO_PARAMETER, a = p1 * a + p2 * b * b * c * c); -template -void BaseMatrixT::decayAddSquareMul(BaseMatrixT& b, - BaseMatrixT& c, - T p1, +template +void BaseMatrixT::decayAddSquareMul(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { applyTernary(ternary::DecayAddSquareMul(p1, p2), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(ReciprocalSum, - THREE_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(ReciprocalSum, THREE_PARAMETER, a = 1 / (p1 * b + p2 * c + p3)); -template -void BaseMatrixT::reciprocalSum( - BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3) { +template +void BaseMatrixT::reciprocalSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, + T p3) { applyTernary(ternary::ReciprocalSum(p1, p2, p3), b, c); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(Reciprocal2, - TWO_PARAMETER, +DEFINE_MATRIX_BINARY_PARAMETER_OP(Reciprocal2, TWO_PARAMETER, a = 1 / (p1 * b + p2)); -template +template void BaseMatrixT::reciprocal2(BaseMatrixT& b, T p1, T p2) { applyBinary(binary::Reciprocal2(p1, p2), b); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSquareSum, - TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSquareSum, TWO_PARAMETER, T tmp = p1 * b + p2 * c; a *= tmp * tmp); -template -void BaseMatrixT::dotMulSquareSum(BaseMatrixT& b, - BaseMatrixT& c, - T p1, +template +void BaseMatrixT::dotMulSquareSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { applyTernary(ternary::DotMulSquareSum(p1, p2), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotSquareSum, - TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotSquareSum, TWO_PARAMETER, T tmp = p1 * b + p2 * c; a = tmp * tmp); -template +template void BaseMatrixT::dotSquareSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { applyTernary(ternary::DotSquareSum(p1, p2), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSum, - TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSum, TWO_PARAMETER, a *= p1 * b + p2 * c); -template +template void BaseMatrixT::dotMulSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { applyTernary(ternary::DotMulSum(p1, p2), b, c); } DEFINE_MATRIX_BINARY_OP(CopyAndClear, b = a; a = 0); -template +template void BaseMatrixT::copyAndClear(BaseMatrixT& b) { applyBinary(binary::CopyAndClear(), b); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(AddDotMul, - TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(AddDotMul, TWO_PARAMETER, a = p1 * a + p2 * b * c); -template +template void BaseMatrixT::addDotMul(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { applyTernary(ternary::AddDotMul(p1, p2), b, c); } DEFINE_MATRIX_BINARY_OP(Assign, a = b;); -template +template void BaseMatrixT::assign(BaseMatrixT& b) { if (useGpu_) { applyBinary(binary::Assign(), b); @@ -1361,7 +1230,7 @@ void BaseMatrixT::assign(BaseMatrixT& b) { } } -template +template void BaseMatrixT::assignAtOffset(BaseMatrixT& b, int64_t columnOffset) { if (columnOffset + b.width_ <= width_) { int numRows = height_; @@ -1381,31 +1250,24 @@ void BaseMatrixT::assignAtOffset(BaseMatrixT& b, int64_t columnOffset) { } DEFINE_MATRIX_BINARY_OP(DeepSwap, T tmp = a; a = b; b = tmp); -template +template void BaseMatrixT::deepSwap(BaseMatrixT& b) { - applyBinary(binary::DeepSwap(), b); + applyBinary(binary::DeepSwap(), b); } -template <> +template<> void BaseMatrixT::rowDotMul(size_t destCol, BaseMatrixT& b, BaseMatrixT& c) { int numRows = b.height_; int numCols = b.width_; MatrixOffset offset(destCol, 0, 0, 0, 0, 0); - aggregate(aggregate::sum(), - base::binary::mul(), - base::binary::add(), - b, - c, - numRows, - numCols, - offset, - false_type(), + aggregate(aggregate::sum(), base::binary::mul(), base::binary::add(), b, c, + numRows, numCols, offset, false_type(), true_type() /*aAsColVector*/); } -template +template void BaseMatrixT::rowDotMul2(size_t destCol, BaseMatrixT& b, BaseMatrixT& c) { @@ -1428,24 +1290,17 @@ void BaseMatrixT::rowDotMul2(size_t destCol, } } -template <> +template<> void BaseMatrixT::addDotMulVMM(BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, 0, 0); int numRows = b.height_; int numCols = b.width_; - aggregate(aggregate::sum(), - base::binary::mul(), - base::binary::add(), - b, - c, - numRows, - numCols, - offset, - true_type() /*aAsRowVector*/, + aggregate(aggregate::sum(), base::binary::mul(), base::binary::add(), b, c, + numRows, numCols, offset, true_type() /*aAsRowVector*/, false_type()); } -template +template void BaseMatrixT::addDotMulVMM2(BaseMatrixT& b, BaseMatrixT& c) { CHECK(!useGpu_) << "do not support gpu"; @@ -1466,22 +1321,16 @@ void BaseMatrixT::addDotMulVMM2(BaseMatrixT& b, BaseMatrixT& c) { } DEFINE_MATRIX_TERNARY_OP(addDotMulMMV, a += b * c); -template +template void BaseMatrixT::addDotMulMMV(BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyTernary(ternary::addDotMulMMV(), - b, - c, - numRows, - numCols, - offset, - true_type() /*cAsRowVector*/, - false_type()); + applyTernary(ternary::addDotMulMMV(), b, c, numRows, numCols, offset, + true_type() /*cAsRowVector*/, false_type()); } -template +template void BaseMatrixT::addDotMulMMV2(BaseMatrixT& b, BaseMatrixT& c) { CHECK(!useGpu_) << "do not support gpu"; @@ -1501,22 +1350,16 @@ void BaseMatrixT::addDotMulMMV2(BaseMatrixT& b, BaseMatrixT& c) { } } -template +template void BaseMatrixT::rowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, cCol, 0); int numRows = height_; int numCols = width_; - applyTernary(ternary::DotMul(), - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*cAsColVector*/); + applyTernary(ternary::DotMul(), b, c, numRows, numCols, offset, + false_type(), true_type() /*cAsColVector*/); } -template +template void BaseMatrixT::rowScale2(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { CHECK(!useGpu_) << "do not support gpu"; @@ -1536,82 +1379,52 @@ void BaseMatrixT::rowScale2(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { } } -template +template void BaseMatrixT::colScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, 0, cRow); int numRows = height_; int numCols = width_; - applyTernary(ternary::DotMul(), - b, - c, - numRows, - numCols, - offset, - true_type() /* cAsRowVector */, - false_type() /* cAsColVector */); + applyTernary(ternary::DotMul(), b, c, numRows, numCols, offset, + true_type() /* cAsRowVector */, false_type() /* cAsColVector */); } -template +template void BaseMatrixT::addColScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, 0, cRow); int numRows = height_; int numCols = width_; - applyTernary(ternary::addDotMulMMV(), - b, - c, - numRows, - numCols, - offset, - true_type() /* cAsRowVector */, - false_type() /* cAsColVector */); + applyTernary(ternary::addDotMulMMV(), b, c, numRows, numCols, offset, + true_type() /* cAsRowVector */, false_type() /* cAsColVector */); } -template +template void BaseMatrixT::addRowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, cCol, 0); int numRows = height_; int numCols = width_; - applyTernary(ternary::addDotMulMMV(), - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*cAsColVector*/); + applyTernary(ternary::addDotMulMMV(), b, c, numRows, numCols, offset, + false_type(), true_type() /*cAsColVector*/); } DEFINE_MATRIX_TERNARY_PARAMETER_OP(RowAdd, ONE_PARAMETER, a = b + p * c); -template +template void BaseMatrixT::rowAdd(size_t cCol, BaseMatrixT& b, BaseMatrixT& c, T p) { MatrixOffset offset(0, 0, 0, 0, cCol, 0); int numRows = height_; int numCols = width_; - applyTernary(ternary::RowAdd(p), - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*cAsColVector*/); + applyTernary(ternary::RowAdd(p), b, c, numRows, numCols, offset, + false_type(), true_type() /*cAsColVector*/); } DEFINE_MATRIX_TERNARY_OP(RowPow, a = pow(b, c)); -template <> +template<> void BaseMatrixT::rowPow(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { if (useGpu_) { MatrixOffset offset(0, 0, 0, 0, cCol, 0); int numRows = height_; int numCols = width_; - applyTernary(ternary::RowPow(), - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*cAsColVector*/); + applyTernary(ternary::RowPow(), b, c, numRows, numCols, offset, + false_type(), true_type() /*cAsColVector*/); } else { size_t height = this->height_; size_t width = this->width_; @@ -1628,64 +1441,44 @@ void BaseMatrixT::rowPow(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { } } -template +template void BaseMatrixT::mulRowVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::DotMul(), - b, - numRows, - numCols, - offset, - true_type() /* bAsRowVector */, - false_type()); + applyBinary(binary::DotMul(), b, numRows, numCols, offset, + true_type() /* bAsRowVector */, false_type()); } DEFINE_MATRIX_BINARY_OP(DotDiv, a /= b); -template +template void BaseMatrixT::divRowVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::DotDiv(), - b, - numRows, - numCols, - offset, - true_type() /* bAsRowVector */, - false_type()); + applyBinary(binary::DotDiv(), b, numRows, numCols, offset, + true_type() /* bAsRowVector */, false_type()); } -template +template void BaseMatrixT::mulColVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::DotMul(), - b, - numRows, - numCols, - offset, - false_type(), - true_type() /* bAsColVector */); + applyBinary(binary::DotMul(), b, numRows, numCols, offset, + false_type(), true_type() /* bAsColVector */); } -template +template void BaseMatrixT::divColVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::DotDiv(), - b, - numRows, - numCols, - offset, - false_type(), - true_type() /* bAsColVector */); + applyBinary(binary::DotDiv(), b, numRows, numCols, offset, + false_type(), true_type() /* bAsColVector */); } -template <> +template<> template int BaseMatrixT::applyRow(Agg agg, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); @@ -1693,20 +1486,13 @@ int BaseMatrixT::applyRow(Agg agg, BaseMatrixT& b) { size_t numCols = b.width_; CHECK_EQ(height_, numRows); CHECK_EQ(width_, 1UL); - aggregate(agg, - base::unary::identity(), - base::binary::second(), - b, - numRows, - numCols, - offset, - false_type(), - true_type() /*aAsColVector*/); + aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows, + numCols, offset, false_type(), true_type() /*aAsColVector*/); return 0; } -template <> +template<> template int BaseMatrixT::applyRow(Agg agg, Saver sv, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); @@ -1714,25 +1500,16 @@ int BaseMatrixT::applyRow(Agg agg, Saver sv, BaseMatrixT& b) { size_t numCols = b.width_; CHECK_EQ(height_, numRows); CHECK_EQ(width_, 1UL); - aggregate(agg, - base::unary::identity(), - sv, - b, - numRows, - numCols, - offset, - false_type(), - true_type() /*aAsColVector*/); + aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset, + false_type(), true_type() /*aAsColVector*/); return 0; } -template <> +template<> template -int BaseMatrixT::applyRow(Agg agg, - real scaleDest, - real scaleAgg, - BaseMatrixT& b) { +int BaseMatrixT::applyRow( + Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) { if (scaleDest != 0) { applyRow(agg, base::binary::add2(scaleDest, scaleAgg), b); } else { @@ -1744,10 +1521,10 @@ int BaseMatrixT::applyRow(Agg agg, return 0; } -template <> +template<> template -int BaseMatrixT::applyRow( - Agg agg, Op op, Saver sv, BaseMatrixT& b, BaseMatrixT& c) { +int BaseMatrixT::applyRow(Agg agg, Op op, Saver sv, + BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, 0, 0); size_t numRows = b.height_; size_t numCols = b.width_; @@ -1755,27 +1532,16 @@ int BaseMatrixT::applyRow( CHECK_EQ(width_, 1UL); CHECK_EQ(c.height_, numRows); CHECK_EQ(c.width_, numCols); - aggregate(agg, - op, - sv, - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*aAsColVector*/); + aggregate(agg, op, sv, + b, c, numRows, numCols, offset, + false_type(), true_type() /*aAsColVector*/); return 0; } -template <> +template<> template -int BaseMatrixT::applyRow(Agg agg, - Op op, - real scaleDest, - real scaleAgg, - BaseMatrixT& b, - BaseMatrixT& c) { +int BaseMatrixT::applyRow(Agg agg, Op op, real scaleDest, real scaleAgg, + BaseMatrixT& b, BaseMatrixT& c) { if (scaleDest != 0) { applyRow(agg, op, base::binary::add2(scaleDest, scaleAgg), b, c); } else { @@ -1787,7 +1553,7 @@ int BaseMatrixT::applyRow(Agg agg, return 0; } -template <> +template<> template int BaseMatrixT::applyCol(Agg agg, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); @@ -1795,20 +1561,13 @@ int BaseMatrixT::applyCol(Agg agg, BaseMatrixT& b) { size_t numCols = b.width_; CHECK_EQ(width_, numCols); CHECK_EQ(height_, 1UL); - aggregate(agg, - base::unary::identity(), - base::binary::second(), - b, - numRows, - numCols, - offset, - true_type() /*aAsRowVector*/, - false_type()); + aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows, + numCols, offset, true_type() /*aAsRowVector*/, false_type()); return 0; } -template <> +template<> template int BaseMatrixT::applyCol(Agg agg, Saver sv, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); @@ -1816,25 +1575,16 @@ int BaseMatrixT::applyCol(Agg agg, Saver sv, BaseMatrixT& b) { size_t numCols = b.width_; CHECK_EQ(width_, numCols); CHECK_EQ(height_, 1UL); - aggregate(agg, - base::unary::identity(), - sv, - b, - numRows, - numCols, - offset, - true_type() /*aAsRowVector*/, - false_type()); + aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset, + true_type() /*aAsRowVector*/, false_type()); return 0; } -template <> +template<> template -int BaseMatrixT::applyCol(Agg agg, - real scaleDest, - real scaleAgg, - BaseMatrixT& b) { +int BaseMatrixT::applyCol( + Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) { if (scaleDest != 0) { applyCol(agg, base::binary::add2(scaleDest, scaleAgg), b); } else { @@ -1846,51 +1596,48 @@ int BaseMatrixT::applyCol(Agg agg, return 0; } -template <> +template<> void BaseMatrixT::sumRows(BaseMatrixT& b, real scaleSum, real scaleDest) { applyRow(aggregate::sum(), scaleDest, scaleSum, b); } -template <> +template<> void BaseMatrixT::maxRows(BaseMatrixT& b) { applyRow(aggregate::max(), b); } -template <> +template<> void BaseMatrixT::minRows(BaseMatrixT& b) { applyRow(aggregate::min(), b); } -template <> +template<> void BaseMatrixT::maxCols(BaseMatrixT& b) { applyCol(aggregate::max(), b); } -template <> +template<> void BaseMatrixT::minCols(BaseMatrixT& b) { applyCol(aggregate::min(), b); } -template <> +template<> void BaseMatrixT::sumCols(BaseMatrixT& b, real scaleSum, real scaleDest) { applyCol(aggregate::sum(), scaleDest, scaleSum, b); } -template <> -void BaseMatrixT::sumOfSquaredDiffs(BaseMatrixT& b, - BaseMatrixT& c, - real scaleSum, - real scaleDest) { - applyRow( - aggregate::sum(), base::binary::squaredDiff(), scaleDest, scaleSum, b, c); +template<> +void BaseMatrixT::sumOfSquaredDiffs( + BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) { + applyRow(aggregate::sum(), base::binary::squaredDiff(), + scaleDest, scaleSum, b, c); } -template <> -void BaseMatrixT::sumOfProducts(BaseMatrixT& b, - BaseMatrixT& c, - real scaleSum, - real scaleDest) { - applyRow(aggregate::sum(), base::binary::mul(), scaleDest, scaleSum, b, c); +template<> +void BaseMatrixT::sumOfProducts( + BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) { + applyRow(aggregate::sum(), base::binary::mul(), + scaleDest, scaleSum, b, c); } template class BaseMatrixT;