diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu index 344cad496a98cc2d1924ee6e206ca928279e9fde..5435808fb7f70fdf1ac98815f7fe8890fb85527c 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/math/BaseMatrix.cu @@ -12,21 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include #include +#include +#include #include "BaseMatrix.h" -#include "hl_matrix_ops.cuh" -#include "hl_matrix_base.cuh" -#include "hl_matrix_apply.cuh" -#include "SIMDFunctions.h" #include "MathFunctions.h" +#include "SIMDFunctions.h" +#include "hl_matrix_apply.cuh" +#include "hl_matrix_base.cuh" +#include "hl_matrix_ops.cuh" namespace paddle { const char* SPARSE_SUPPORT_ERROR = "Sparse Matrix/Vector is not supported."; -template +template template int BaseMatrixT::applyUnary(Op op) { MatrixOffset offset(0, 0); @@ -34,9 +34,11 @@ int BaseMatrixT::applyUnary(Op op) { return 0; } -template +template template -int BaseMatrixT::applyUnary(Op op, int numRows, int numCols, +int BaseMatrixT::applyUnary(Op op, + int numRows, + int numCols, MatrixOffset& offset) { CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; int dimM = numRows; @@ -56,7 +58,7 @@ int BaseMatrixT::applyUnary(Op op, int numRows, int numCols, return 0; } -template +template template int BaseMatrixT::applyBinary(Op op, BaseMatrixT& b) { CHECK(height_ == b.height_ && width_ == b.width_) @@ -67,18 +69,23 @@ int BaseMatrixT::applyBinary(Op op, BaseMatrixT& b) { return 0; } -template +template template -int BaseMatrixT::applyBinary(Op op, BaseMatrixT& b, int numRows, int numCols, - MatrixOffset& offset) { +int BaseMatrixT::applyBinary( + Op op, BaseMatrixT& b, int numRows, int numCols, MatrixOffset& offset) { applyBinary(op, b, numRows, numCols, offset, false_type(), false_type()); return 0; } -template +template template -int BaseMatrixT::applyBinary(Op op, BaseMatrixT& b, int numRows, int numCols, - MatrixOffset& offset, bAsRowVector, bAsColVector) { +int BaseMatrixT::applyBinary(Op op, + BaseMatrixT& b, + int numRows, + int numCols, + MatrixOffset& offset, + bAsRowVector, + bAsColVector) { CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(useGpu_ == b.useGpu_) << "Matrix type mismatch"; @@ -91,8 +98,8 @@ int BaseMatrixT::applyBinary(Op op, BaseMatrixT& b, int numRows, int numCols, T* A = data_; T* B = b.data_; CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_, - offset.bRow_); + CAL_MATRIX_START_ADDRESS( + B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); CHECK_LE(dimM + offset.aRow_, this->height_); CHECK_LE(dimN + offset.aCol_, this->width_); if (!bAsRowVector::value && !bAsColVector::value) { @@ -115,7 +122,7 @@ int BaseMatrixT::applyBinary(Op op, BaseMatrixT& b, int numRows, int numCols, return 0; } -template +template template int BaseMatrixT::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c) { CHECK_EQ(height_, b.height_); @@ -129,21 +136,29 @@ int BaseMatrixT::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c) { return 0; } -template +template template -int BaseMatrixT::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c, - int numRows, int numCols, +int BaseMatrixT::applyTernary(Op op, + BaseMatrixT& b, + BaseMatrixT& c, + int numRows, + int numCols, MatrixOffset& offset) { applyTernary(op, b, c, numRows, numCols, offset, false_type(), false_type()); return 0; } -template +template template -int BaseMatrixT::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c, - int numRows, int numCols, MatrixOffset& offset, - cAsRowVector, cAsColVector) { +int BaseMatrixT::applyTernary(Op op, + BaseMatrixT& b, + BaseMatrixT& c, + int numRows, + int numCols, + MatrixOffset& offset, + cAsRowVector, + cAsColVector) { CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!c.isSparse()) << SPARSE_SUPPORT_ERROR; @@ -160,10 +175,10 @@ int BaseMatrixT::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c, T* B = b.data_; T* C = c.data_; CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_, - offset.bRow_); - CAL_MATRIX_START_ADDRESS(C, c.height_, c.width_, ldc, offset.cCol_, - offset.cRow_); + CAL_MATRIX_START_ADDRESS( + B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); + CAL_MATRIX_START_ADDRESS( + C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); CHECK_LE(dimM + offset.aRow_, this->height_); CHECK_LE(dimN + offset.aCol_, this->width_); @@ -180,21 +195,21 @@ int BaseMatrixT::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c, } if (true == useGpu_) { - hl_gpu_apply_ternary_op - ( + hl_gpu_apply_ternary_op( op, A, B, C, dimM, dimN, lda, ldb, ldc); } else { - hl_cpu_apply_ternary_op - ( + hl_cpu_apply_ternary_op( op, A, B, C, dimM, dimN, lda, ldb, ldc); } return 0; } -template +template template -int BaseMatrixT::applyQuaternary(Op op, BaseMatrixT& b, BaseMatrixT& c, +int BaseMatrixT::applyQuaternary(Op op, + BaseMatrixT& b, + BaseMatrixT& c, BaseMatrixT& d) { CHECK_EQ(height_, b.height_); CHECK_EQ(width_, b.width_); @@ -209,10 +224,14 @@ int BaseMatrixT::applyQuaternary(Op op, BaseMatrixT& b, BaseMatrixT& c, return 0; } -template +template template -int BaseMatrixT::applyQuaternary(Op op, BaseMatrixT& b, BaseMatrixT& c, - BaseMatrixT& d, int numRows, int numCols, +int BaseMatrixT::applyQuaternary(Op op, + BaseMatrixT& b, + BaseMatrixT& c, + BaseMatrixT& d, + int numRows, + int numCols, MatrixOffset& offset) { CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; @@ -234,12 +253,12 @@ int BaseMatrixT::applyQuaternary(Op op, BaseMatrixT& b, BaseMatrixT& c, T* C = c.data_; T* D = d.data_; CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_, - offset.bRow_); - CAL_MATRIX_START_ADDRESS(C, c.height_, c.width_, ldc, offset.cCol_, - offset.cRow_); - CAL_MATRIX_START_ADDRESS(D, d.height_, d.width_, ldd, offset.dCol_, - offset.dRow_); + CAL_MATRIX_START_ADDRESS( + B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); + CAL_MATRIX_START_ADDRESS( + C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); + CAL_MATRIX_START_ADDRESS( + D, d.height_, d.width_, ldd, offset.dCol_, offset.dRow_); CHECK_LE(dimM + offset.aRow_, this->height_); CHECK_LE(dimN + offset.aCol_, this->width_); @@ -250,22 +269,29 @@ int BaseMatrixT::applyQuaternary(Op op, BaseMatrixT& b, BaseMatrixT& c, CHECK_LE(dimM + offset.dRow_, d.height_); CHECK_LE(dimN + offset.dCol_, d.width_); if (true == useGpu_) { - hl_gpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, - ldc, ldd); + hl_gpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, ldc, ldd); } else { - hl_cpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, - ldc, ldd); + hl_cpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, ldc, ldd); } return 0; } -template -template +template -int BaseMatrixT::aggregate(Agg agg, Op op, Saver sv, BaseMatrixT& b, - int numRows, int numCols, MatrixOffset& offset, - aAsRowVector, aAsColVector) { +int BaseMatrixT::aggregate(Agg agg, + Op op, + Saver sv, + BaseMatrixT& b, + int numRows, + int numCols, + MatrixOffset& offset, + aAsRowVector, + aAsColVector) { CHECK_EQ(useGpu_, b.useGpu_); int ld = stride_; @@ -273,10 +299,10 @@ int BaseMatrixT::aggregate(Agg agg, Op op, Saver sv, BaseMatrixT& b, T* dst = data_; T* B = b.data_; - CAL_MATRIX_START_ADDRESS(dst, height_, width_, ld, offset.aCol_, - offset.aRow_); - CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_, - offset.bRow_); + CAL_MATRIX_START_ADDRESS( + dst, height_, width_, ld, offset.aCol_, offset.aRow_); + CAL_MATRIX_START_ADDRESS( + B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); if (aAsRowVector::value && !aAsColVector::value) { if (useGpu_) { @@ -297,12 +323,21 @@ int BaseMatrixT::aggregate(Agg agg, Op op, Saver sv, BaseMatrixT& b, return 0; } -template -template +template -int BaseMatrixT::aggregate(Agg agg, Op op, Saver sv, BaseMatrixT& b, - BaseMatrixT& c, int numRows, int numCols, - MatrixOffset& offset, aAsRowVector, +int BaseMatrixT::aggregate(Agg agg, + Op op, + Saver sv, + BaseMatrixT& b, + BaseMatrixT& c, + int numRows, + int numCols, + MatrixOffset& offset, + aAsRowVector, aAsColVector) { CHECK_EQ(useGpu_, b.useGpu_); CHECK_EQ(useGpu_, c.useGpu_); @@ -314,28 +349,28 @@ int BaseMatrixT::aggregate(Agg agg, Op op, Saver sv, BaseMatrixT& b, T* dst = data_; T* B = b.data_; T* C = c.data_; - CAL_MATRIX_START_ADDRESS(dst, height_, width_, ld, offset.aCol_, - offset.aRow_); - CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_, - offset.bRow_); - CAL_MATRIX_START_ADDRESS(C, c.height_, c.width_, ldc, offset.cCol_, - offset.cRow_); + CAL_MATRIX_START_ADDRESS( + dst, height_, width_, ld, offset.aCol_, offset.aRow_); + CAL_MATRIX_START_ADDRESS( + B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); + CAL_MATRIX_START_ADDRESS( + C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); if (aAsRowVector::value && !aAsColVector::value) { if (useGpu_) { - hl_gpu_matrix_column_op(agg, op, sv, numRows, numCols, dst, B, - ldb, C, ldc); + hl_gpu_matrix_column_op( + agg, op, sv, numRows, numCols, dst, B, ldb, C, ldc); } else { - hl_cpu_matrix_column_op(agg, op, sv, numRows, numCols, dst, B, - ldb, C, ldc); + hl_cpu_matrix_column_op( + agg, op, sv, numRows, numCols, dst, B, ldb, C, ldc); } } else if (!aAsRowVector::value && aAsColVector::value) { if (useGpu_) { - hl_gpu_matrix_row_op(agg, op, sv, numRows, numCols, dst, ld, B, - ldb, C, ldc); + hl_gpu_matrix_row_op( + agg, op, sv, numRows, numCols, dst, ld, B, ldb, C, ldc); } else { - hl_cpu_matrix_row_op(agg, op, sv, numRows, numCols, dst, ld, B, - ldb, C, ldc); + hl_cpu_matrix_row_op( + agg, op, sv, numRows, numCols, dst, ld, B, ldb, C, ldc); } } else { LOG(FATAL) << "not supported"; @@ -350,15 +385,19 @@ int BaseMatrixT::aggregate(Agg agg, Op op, Saver sv, BaseMatrixT& b, */ DEFINE_MATRIX_UNARY_OP(Neg, a = -a); -template -void BaseMatrixT::neg() { applyUnary(unary::Neg()); } +template +void BaseMatrixT::neg() { + applyUnary(unary::Neg()); +} DEFINE_MATRIX_UNARY_OP(Exp, a = exp(a)); -template<> -void BaseMatrixT::exp2() { applyUnary(unary::Exp()); } +template <> +void BaseMatrixT::exp2() { + applyUnary(unary::Exp()); +} DEFINE_MATRIX_UNARY_OP(Log, a = log(a)); -template<> +template <> void BaseMatrixT::log2() { if (useGpu_) { applyUnary(unary::Log()); @@ -368,30 +407,42 @@ void BaseMatrixT::log2() { } DEFINE_MATRIX_UNARY_OP(Sqrt, a = sqrt(a)); -template<> -void BaseMatrixT::sqrt2() { applyUnary(unary::Sqrt()); } +template <> +void BaseMatrixT::sqrt2() { + applyUnary(unary::Sqrt()); +} DEFINE_MATRIX_UNARY_OP(Square, a = a * a); -template -void BaseMatrixT::square2() { applyUnary(unary::Square()); } +template +void BaseMatrixT::square2() { + applyUnary(unary::Square()); +} DEFINE_MATRIX_UNARY_OP(Reciprocal, a = 1.0f / a); -template -void BaseMatrixT::reciprocal2() { applyUnary(unary::Reciprocal()); } +template +void BaseMatrixT::reciprocal2() { + applyUnary(unary::Reciprocal()); +} DEFINE_MATRIX_UNARY_OP(Abs, a = a > 0 ? a : -a); -template -void BaseMatrixT::abs2() { applyUnary(unary::Abs()); } +template +void BaseMatrixT::abs2() { + applyUnary(unary::Abs()); +} DEFINE_MATRIX_UNARY_OP(Sign, a = (a > 0) - (a < 0)); -template -void BaseMatrixT::sign2() { applyUnary(unary::Sign()); } +template +void BaseMatrixT::sign2() { + applyUnary(unary::Sign()); +} DEFINE_MATRIX_UNARY_OP(Zero, a = 0); -template -void BaseMatrixT::zero() { applyUnary(unary::Zero()); } +template +void BaseMatrixT::zero() { + applyUnary(unary::Zero()); +} -template +template void BaseMatrixT::zeroAtOffset(int64_t columnOffset, int64_t numColumns) { int numRows = height_; int numCols = numColumns; @@ -400,11 +451,13 @@ void BaseMatrixT::zeroAtOffset(int64_t columnOffset, int64_t numColumns) { } DEFINE_MATRIX_UNARY_OP(One, a = 1); -template -void BaseMatrixT::one() { applyUnary(unary::One()); } +template +void BaseMatrixT::one() { + applyUnary(unary::One()); +} DEFINE_MATRIX_UNARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(a, p)); -template<> +template <> void BaseMatrixT::pow2(real p) { if (useGpu_) { applyUnary(unary::Pow(p)); @@ -414,51 +467,67 @@ void BaseMatrixT::pow2(real p) { } DEFINE_MATRIX_UNARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a -= p); -template -void BaseMatrixT::subScalar(T p) { applyUnary(unary::SubScalar(p)); } +template +void BaseMatrixT::subScalar(T p) { + applyUnary(unary::SubScalar(p)); +} DEFINE_MATRIX_UNARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a *= p); -template -void BaseMatrixT::mulScalar(T p) { applyUnary(unary::MulScalar(p)); } +template +void BaseMatrixT::mulScalar(T p) { + applyUnary(unary::MulScalar(p)); +} DEFINE_MATRIX_UNARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a /= p); -template -void BaseMatrixT::divScalar(T p) { applyUnary(unary::DivScalar(p)); } +template +void BaseMatrixT::divScalar(T p) { + applyUnary(unary::DivScalar(p)); +} DEFINE_MATRIX_UNARY_PARAMETER_OP(Assign, ONE_PARAMETER, a = p); -template -void BaseMatrixT::assign(T p) { applyUnary(unary::Assign(p)); } +template +void BaseMatrixT::assign(T p) { + applyUnary(unary::Assign(p)); +} DEFINE_MATRIX_UNARY_PARAMETER_OP(Add, ONE_PARAMETER, a += p); -template -void BaseMatrixT::add(T p) { applyUnary(unary::Add(p)); } +template +void BaseMatrixT::add(T p) { + applyUnary(unary::Add(p)); +} DEFINE_MATRIX_UNARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = a * p1 + p2); -template -void BaseMatrixT::add(T p1, T p2) { applyUnary(unary::Add2(p1, p2)); } +template +void BaseMatrixT::add(T p1, T p2) { + applyUnary(unary::Add2(p1, p2)); +} -DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, TWO_PARAMETER, +DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, + TWO_PARAMETER, a = a < p1 ? p1 : (a > p2 ? p2 : a)); -template -void BaseMatrixT::clip(T p1, T p2) { applyUnary(unary::Clip(p1, p2)); } +template +void BaseMatrixT::clip(T p1, T p2) { + applyUnary(unary::Clip(p1, p2)); +} -DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative, TWO_PARAMETER, - a = b < p1 ? 0 : (b > p2 ? 0 : 1)); -template +DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative, + TWO_PARAMETER, + a = b < p1 ? 0 : (b > p2 ? 0 : 1)); +template void BaseMatrixT::clipDerivative(BaseMatrixT& b, T p1, T p2) { applyBinary(binary::ClipDerivative(p1, p2), b); } -DEFINE_MATRIX_UNARY_PARAMETER_OP(BiggerThanScalar, ONE_PARAMETER, +DEFINE_MATRIX_UNARY_PARAMETER_OP(BiggerThanScalar, + ONE_PARAMETER, a = a > p ? 1.0f : 0.0f); -template +template void BaseMatrixT::biggerThanScalar(T p) { applyUnary(unary::BiggerThanScalar(p)); } -DEFINE_MATRIX_UNARY_PARAMETER_OP(DownClip, ONE_PARAMETER, - a = a > p ? a : p); -template +DEFINE_MATRIX_UNARY_PARAMETER_OP(DownClip, ONE_PARAMETER, a = a > p ? a : p); +template void BaseMatrixT::downClip(T p) { applyUnary(unary::DownClip(p)); } @@ -469,12 +538,12 @@ void BaseMatrixT::downClip(T p) { */ DEFINE_MATRIX_BINARY_OP(Add, a += b); -template +template void BaseMatrixT::add(BaseMatrixT& b) { applyBinary(binary::Add(), b); } -template<> +template <> void BaseMatrixT::add(BaseMatrixT& b) { if (useGpu_) { applyBinary(binary::Add(), b); @@ -485,7 +554,7 @@ void BaseMatrixT::add(BaseMatrixT& b) { } } -template +template void BaseMatrixT::addAtOffset(BaseMatrixT& b, int64_t columnOffset) { if (columnOffset + b.width_ <= width_) { int numRows = height_; @@ -504,43 +573,53 @@ void BaseMatrixT::addAtOffset(BaseMatrixT& b, int64_t columnOffset) { } } -template +template void BaseMatrixT::addP2P(BaseMatrixT& b) { T* A = data_; T* B = b.data_; int dimM = height_; int dimN = width_; - hl_gpu_apply_binary_op, 0, 0> - (binary::Add(), A, B, dimM, dimN, dimN, dimN); + hl_gpu_apply_binary_op, 0, 0>( + binary::Add(), A, B, dimM, dimN, dimN, dimN); } -template +template void BaseMatrixT::addColVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::Add(), b, numRows, numCols, offset, false_type(), + applyBinary(binary::Add(), + b, + numRows, + numCols, + offset, + false_type(), true_type() /* bAsColVector */); } -template +template void BaseMatrixT::addRowVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::Add(), b, numRows, numCols, offset, - true_type() /* bAsRowVector */, false_type()); + applyBinary(binary::Add(), + b, + numRows, + numCols, + offset, + true_type() /* bAsRowVector */, + false_type()); } DEFINE_MATRIX_BINARY_PARAMETER_OP(Add1, ONE_PARAMETER, a += b * p); -template +template void BaseMatrixT::add(BaseMatrixT& b, T p) { applyBinary(binary::Add1(p), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(b, p)); -template<> +template <> void BaseMatrixT::pow2(BaseMatrixT& b, real p) { if (useGpu_) { applyBinary(binary::Pow(p), b); @@ -550,36 +629,45 @@ void BaseMatrixT::pow2(BaseMatrixT& b, real p) { } DEFINE_MATRIX_BINARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = p1 * a + p2 * b); -template +template void BaseMatrixT::add(BaseMatrixT& b, T p1, T p2) { applyBinary(binary::Add2(p1, p2), b); } -template +template void BaseMatrixT::addBias(BaseMatrixT& b, T scale) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::Add1(scale), b, numRows, numCols, offset, - true_type() /* bAsRowVector */, false_type()); + applyBinary(binary::Add1(scale), + b, + numRows, + numCols, + offset, + true_type() /* bAsRowVector */, + false_type()); } DEFINE_MATRIX_BINARY_OP(Sub, a -= b); -template -void BaseMatrixT::sub(BaseMatrixT& b) { applyBinary(binary::Sub(), b); } +template +void BaseMatrixT::sub(BaseMatrixT& b) { + applyBinary(binary::Sub(), b); +} DEFINE_MATRIX_BINARY_PARAMETER_OP(Sub1, ONE_PARAMETER, a -= b * p); -template +template void BaseMatrixT::sub(BaseMatrixT& b, T p) { applyBinary(binary::Sub1(p), b); } DEFINE_MATRIX_BINARY_OP(Relu, b = a > 0.0f ? a : 0.0f); -template -void BaseMatrixT::relu(BaseMatrixT& b) { applyBinary(binary::Relu(), b); } +template +void BaseMatrixT::relu(BaseMatrixT& b) { + applyBinary(binary::Relu(), b); +} DEFINE_MATRIX_BINARY_OP(ReluDerivative, a *= (b > 0.0f ? 1.0f : 0.0f)); -template +template void BaseMatrixT::reluDerivative(BaseMatrixT& b) { applyBinary(binary::ReluDerivative(), b); } @@ -589,7 +677,7 @@ DEFINE_MATRIX_BINARY_OP(Softrelu, const T THRESHOLD = 40.0; ? THRESHOLD : ((a < -THRESHOLD) ? (-THRESHOLD) : a)))); -template<> +template <> void BaseMatrixT::softrelu(BaseMatrixT& b) { applyBinary(binary::Softrelu(), b); } @@ -599,97 +687,100 @@ DEFINE_MATRIX_BINARY_OP( a *= (1.0 - exp(-1.0 * ((b > THRESHOLD) ? THRESHOLD : ((b < -THRESHOLD) ? (-THRESHOLD) : b))))); -template<> +template <> void BaseMatrixT::softreluDerivative(BaseMatrixT& b) { applyBinary(binary::SoftreluDerivative(), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(Brelu, TWO_PARAMETER, b = a > p1 ? a : p1; b = b < p2 ? b : p2); -template +template void BaseMatrixT::brelu(BaseMatrixT& b) { - int p1 = 0, p2 = 24; //! TODO(yuyang18): Make p1,p2 configuable. + int p1 = 0, p2 = 24; //! TODO(yuyang18): Make p1,p2 configuable. applyBinary(binary::Brelu(p1, p2), b); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(BreluDerivative, TWO_PARAMETER, +DEFINE_MATRIX_BINARY_PARAMETER_OP(BreluDerivative, + TWO_PARAMETER, a *= (b > p1 && b < p2) ? 1.0 : 0.0); -template +template void BaseMatrixT::breluDerivative(BaseMatrixT& b) { int p1 = 0, p2 = 24; applyBinary(binary::BreluDerivative(p1, p2), b); } DEFINE_MATRIX_BINARY_OP(Square, b = a * a); -template +template void BaseMatrixT::square2(BaseMatrixT& b) { applyBinary(binary::Square(), b); } DEFINE_MATRIX_BINARY_OP(SquareDerivative, a *= 2.0 * b); -template +template void BaseMatrixT::squareDerivative(BaseMatrixT& b) { applyBinary(binary::SquareDerivative(), b); } -DEFINE_MATRIX_BINARY_OP(Tanh, - T tmp = -2.0 * a; - tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; - b = 2.0 / (1.0 + std::exp(tmp)) - 1.0); -template<> +DEFINE_MATRIX_BINARY_OP(Tanh, T tmp = -2.0 * a; + tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; + b = 2.0 / (1.0 + std::exp(tmp)) - 1.0); +template <> void BaseMatrixT::tanh(BaseMatrixT& b) { applyBinary(binary::Tanh(), b); } DEFINE_MATRIX_BINARY_OP(TanhDerivative, a *= 1 - b * b); -template +template void BaseMatrixT::tanhDerivative(BaseMatrixT& b) { applyBinary(binary::TanhDerivative(), b); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(ScaledTanh, TWO_PARAMETER, - b = p1 * - (2.0 / (1.0 + exp(-2 * p2 * a)) - 1.0)); -template<> +DEFINE_MATRIX_BINARY_PARAMETER_OP( + ScaledTanh, TWO_PARAMETER, b = p1 * (2.0 / (1.0 + exp(-2 * p2 * a)) - 1.0)); +template <> void BaseMatrixT::scaledTanh(BaseMatrixT& b, real p1, real p2) { applyBinary(binary::ScaledTanh(p1, p2), b); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(ScaledTanhDerivative, TWO_PARAMETER, +DEFINE_MATRIX_BINARY_PARAMETER_OP(ScaledTanhDerivative, + TWO_PARAMETER, a *= p2 * (p1 - b * b)); -template +template void BaseMatrixT::scaledTanhDerivative(BaseMatrixT& b, T p1, T p2) { applyBinary(binary::ScaledTanhDerivative(p1 * p1, p2 / p1), b); } DEFINE_MATRIX_BINARY_OP(Reciprocal, b = 1.0f / a); -template +template void BaseMatrixT::reciprocal2(BaseMatrixT& b) { applyBinary(binary::Reciprocal(), b); } DEFINE_MATRIX_BINARY_OP(ReciprocalDerivative, a *= -b * b); -template +template void BaseMatrixT::reciprocalDerivative(BaseMatrixT& b) { applyBinary(binary::ReciprocalDerivative(), b); } DEFINE_MATRIX_BINARY_OP(Abs, b = a > 0.0f ? a : -a); -template -void BaseMatrixT::abs2(BaseMatrixT& b) { applyBinary(binary::Abs(), b); } +template +void BaseMatrixT::abs2(BaseMatrixT& b) { + applyBinary(binary::Abs(), b); +} DEFINE_MATRIX_BINARY_OP(AbsDerivative, a = (b > 0) ? a : (b < 0) ? -a : 0); -template +template void BaseMatrixT::absDerivative(BaseMatrixT& b) { applyBinary(binary::AbsDerivative(), b); } -DEFINE_MATRIX_BINARY_OP( - Sigmoid, const T THRESHOLD_MIN = -40.0; const T THRESHOLD_MAX = 13.0; - T tmp = (a < THRESHOLD_MIN) ? THRESHOLD_MIN - : ((a > THRESHOLD_MAX) ? THRESHOLD_MAX : a); - b = 1.0f / (1.0f + exp(-tmp))); -template<> +DEFINE_MATRIX_BINARY_OP(Sigmoid, const T THRESHOLD_MIN = -40.0; + const T THRESHOLD_MAX = 13.0; + T tmp = (a < THRESHOLD_MIN) + ? THRESHOLD_MIN + : ((a > THRESHOLD_MAX) ? THRESHOLD_MAX : a); + b = 1.0f / (1.0f + exp(-tmp))); +template <> void BaseMatrixT::sigmoid(BaseMatrixT& b) { if (useGpu_) { applyBinary(binary::Sigmoid(), b); @@ -723,31 +814,31 @@ void BaseMatrixT::sigmoid(BaseMatrixT& b) { } DEFINE_MATRIX_BINARY_OP(SigmoidDerivative, a *= b * (1 - b)); -template +template void BaseMatrixT::sigmoidDerivative(BaseMatrixT& b) { applyBinary(binary::SigmoidDerivative(), b); } DEFINE_MATRIX_BINARY_OP(ExpDerivative, a *= b); -template +template void BaseMatrixT::expDerivative(BaseMatrixT& b) { applyBinary(binary::ExpDerivative(), b); } DEFINE_MATRIX_BINARY_OP(Sign, b = a > 0.0f ? 1.0f : -1.0f); -template +template void BaseMatrixT::sign2(BaseMatrixT& b) { applyBinary(binary::Sign(), b); } DEFINE_MATRIX_BINARY_OP(Exp, a = exp(b)); -template<> +template <> void BaseMatrixT::exp2(BaseMatrixT& b) { applyBinary(binary::Exp(), b); } DEFINE_MATRIX_BINARY_OP(Log, a = log(b)); -template<> +template <> void BaseMatrixT::log2(BaseMatrixT& b) { if (useGpu_) { applyBinary(binary::Log(), b); @@ -757,13 +848,13 @@ void BaseMatrixT::log2(BaseMatrixT& b) { } DEFINE_MATRIX_BINARY_OP(Sqrt, a = sqrt(b)); -template<> +template <> void BaseMatrixT::sqrt2(BaseMatrixT& b) { applyBinary(binary::Sqrt(), b); } DEFINE_MATRIX_BINARY_OP(InvSqrt, a = 1.0f / sqrt(b)); -template<> +template <> void BaseMatrixT::invSqrt(BaseMatrixT& b) { if (useGpu_) { applyBinary(binary::InvSqrt(), b); @@ -775,37 +866,37 @@ void BaseMatrixT::invSqrt(BaseMatrixT& b) { } DEFINE_MATRIX_BINARY_PARAMETER_OP(IsEqual, ONE_PARAMETER, a = (b == p)); -template +template void BaseMatrixT::isEqualTo(BaseMatrixT& b, T value) { applyBinary(binary::IsEqual(value), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(AddScalar, ONE_PARAMETER, a = b + p); -template +template void BaseMatrixT::addScalar(BaseMatrixT& b, T p) { applyBinary(binary::AddScalar(p), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a = b - p); -template +template void BaseMatrixT::subScalar(BaseMatrixT& b, T p) { applyBinary(binary::SubScalar(p), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a = b * p); -template +template void BaseMatrixT::mulScalar(BaseMatrixT& b, T p) { applyBinary(binary::MulScalar(p), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a = b / p); -template +template void BaseMatrixT::divScalar(BaseMatrixT& b, T p) { applyBinary(binary::DivScalar(p), b); } DEFINE_MATRIX_BINARY_PARAMETER_OP(ScalarDiv, ONE_PARAMETER, a = p / b); -template +template void BaseMatrixT::scalarDiv(BaseMatrixT& b, T p) { applyBinary(binary::ScalarDiv(p), b); } @@ -817,20 +908,20 @@ void BaseMatrixT::scalarDiv(BaseMatrixT& b, T p) { DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropy, a = -c * log(b) - (1 - c) * log(1 - b)); -template<> +template <> void BaseMatrixT::softCrossEntropy(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::SoftCrossEntropy(), b, c); } DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropyBp, a += (b - c) / (b * (1 - b))); -template +template void BaseMatrixT::softCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::SoftCrossEntropyBp(), b, c); } DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropy, a = c > 0.5 ? -log(b) : -log(1.0 - b)); -template<> +template <> void BaseMatrixT::binaryLabelCrossEntropy(BaseMatrixT& b, BaseMatrixT& c) { if (useGpu_) { @@ -858,70 +949,73 @@ void BaseMatrixT::binaryLabelCrossEntropy(BaseMatrixT& b, DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropyBp, a += c > 0.5 ? -1.0 / b : 1.0 / (1.0 - b)); -template +template void BaseMatrixT::binaryLabelCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::BinaryCrossEntropyBp(), b, c); } DEFINE_MATRIX_TERNARY_OP(Add, a = b + c); -template +template void BaseMatrixT::add(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::Add(), b, c); } DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add1, TWO_PARAMETER, a = p1 * b + p2 * c); -template +template void BaseMatrixT::add(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) { applyTernary(ternary::Add1(p1, p2), b, c); } DEFINE_MATRIX_TERNARY_OP(Sub, a = b - c); -template +template void BaseMatrixT::sub(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::Sub(), b, c); } DEFINE_MATRIX_TERNARY_PARAMETER_OP(Sub1, TWO_PARAMETER, a = p1 * b - p2 * c); -template +template void BaseMatrixT::sub(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) { applyTernary(ternary::Sub1(p1, p2), b, c); } DEFINE_MATRIX_TERNARY_OP(Add2, a = a + b + c); -template +template void BaseMatrixT::add2(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::Add2(), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add3, THREE_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add3, + THREE_PARAMETER, a = p1 * a + p2 * b + p3 * c); -template +template void BaseMatrixT::add2(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3) { applyTernary(ternary::Add3(p1, p2, p3), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(SgdUpdate, THREE_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(SgdUpdate, + THREE_PARAMETER, c = p2 * c - p1 * (b + p3 * a); a = a + c); -template +template void BaseMatrixT::sgdUpdate(BaseMatrixT& b, // grad BaseMatrixT& c, // mom - T p1, // learningRate, - T p2, // momentum, - T p3) { // decayRate + T p1, // learningRate, + T p2, // momentum, + T p3) { // decayRate applyTernary(ternary::SgdUpdate(p1, p2, p3), b, c); } -DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(SgdUpdate, THREE_PARAMETER, +DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(SgdUpdate, + THREE_PARAMETER, c = p2 * c - p1 * d * (b + p3 * a); a += c); -template +template void BaseMatrixT::sgdUpdate(BaseMatrixT& b, // grad, BaseMatrixT& c, // mom, BaseMatrixT& d, // lr, - T p1, // learningRate, - T p2, // momentum, - T p3) { // decayRate + T p1, // learningRate, + T p2, // momentum, + T p3) { // decayRate applyQuaternary(quaternary::SgdUpdate(p1, p2, p3), b, c, d); } @@ -929,19 +1023,22 @@ DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p * b; a = (a > lambda) ? (a - lambda) : (a < -lambda) ? (a + lambda) : 0); -template +template void BaseMatrixT::applyL1(BaseMatrixT& lr, T learningRate, T decayRate) { applyBinary(binary::ApplyL1(learningRate * decayRate), lr); } -template<> +template <> void BaseMatrixT::applyL1(BaseMatrixT& lr, real learningRate, real decayRate) { if (useGpu_) { applyBinary(binary::ApplyL1(learningRate * decayRate), lr); } else { - simd::decayL1(this->data_, this->data_, lr.data_, learningRate * decayRate, + simd::decayL1(this->data_, + this->data_, + lr.data_, + learningRate * decayRate, height_ * width_); } } @@ -950,24 +1047,25 @@ DEFINE_MATRIX_UNARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p; a = (a > lambda) ? (a - lambda) : (a < -lambda) ? (a + lambda) : 0); -template +template void BaseMatrixT::applyL1(T learningRate, T decayRate) { applyUnary(unary::ApplyL1(learningRate * decayRate)); } -template<> +template <> void BaseMatrixT::applyL1(real learningRate, real decayRate) { if (useGpu_) { applyUnary(unary::ApplyL1(learningRate * decayRate)); } else { - simd::decayL1(this->data_, this->data_, learningRate * decayRate, - height_ * width_); + simd::decayL1( + this->data_, this->data_, learningRate * decayRate, height_ * width_); } } -DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL2, ONE_PARAMETER, +DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL2, + ONE_PARAMETER, a *= (1.0f / (1.0f + p * b))); -template +template void BaseMatrixT::applyL2(BaseMatrixT& lr, T learningRate, T decayRate) { if (useGpu_) { applyBinary(binary::ApplyL2(learningRate * decayRate), lr); @@ -980,32 +1078,33 @@ void BaseMatrixT::applyL2(BaseMatrixT& lr, T learningRate, T decayRate) { } } -template +template void BaseMatrixT::applyL2(T learningRate, T decayRate) { BaseMatrixT::mulScalar(1.0f / (1.0f + learningRate * decayRate)); } DEFINE_MATRIX_BINARY_OP(DotMul, a *= b); -template +template void BaseMatrixT::dotMul(BaseMatrixT& b) { applyBinary(binary::DotMul(), b); } DEFINE_MATRIX_TERNARY_OP(DotMul, a = b * c); -template +template void BaseMatrixT::dotMul(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::DotMul(), b, c); } DEFINE_MATRIX_TERNARY_OP(DotDiv, a = (b == 0.0) ? 0.0 : b / c); -template +template void BaseMatrixT::dotDiv(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::DotDiv(), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotDiv2P, TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotDiv2P, + TWO_PARAMETER, a = (b + p1) / (c + p2)); -template +template void BaseMatrixT::dotDiv(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { applyTernary(ternary::DotDiv2P(p1, p2), b, c); } @@ -1015,7 +1114,7 @@ DEFINE_MATRIX_QUATERNARY_OP(RankLoss, const T THRESHOLD = 40.0; a = b - c; ? THRESHOLD : ((a < -THRESHOLD) ? (-THRESHOLD) : a); a = log(1 + exp(a)) - a * d); -template<> +template <> void BaseMatrixT::rankLoss(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d) { @@ -1026,8 +1125,9 @@ DEFINE_MATRIX_QUATERNARY_OP(RankLossBp, const T THRESHOLD = 40.0; a = b - c; a = (a > THRESHOLD) ? THRESHOLD : ((a < -THRESHOLD) ? (-THRESHOLD) : a); - a = exp(a); a = (a / (1 + a) - d)); -template<> + a = exp(a); + a = (a / (1 + a) - d)); +template <> void BaseMatrixT::rankLossBp(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d) { @@ -1040,7 +1140,7 @@ DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLoss, const T THRESHOLD = 40.0; ? -THRESHOLD : b; a = log(1 + exp(x)) - c * x); -template<> +template <> void BaseMatrixT::logisticRegressionLoss(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::LogisticRegressionLoss(), b, c); } @@ -1050,22 +1150,23 @@ DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLossBp, const T THRESHOLD = 40.0; T x = (b > THRESHOLD) ? THRESHOLD : (b < -THRESHOLD) ? -THRESHOLD : b; - x = exp(x); a = x / (1 + x) - c); -template<> + x = exp(x); + a = x / (1 + x) - c); +template <> void BaseMatrixT::logisticRegressionLossBp(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::LogisticRegressionLossBp(), b, c); } DEFINE_MATRIX_TERNARY_OP(BiggerThan, a = (b > c) ? 1.0f : 0.0f); -template +template void BaseMatrixT::biggerThan(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::BiggerThan(), b, c); } DEFINE_MATRIX_QUATERNARY_OP( BiggerThan, a = ((b > c && d > 0.5f) || (b < c && d < 0.5f)) ? 1.0f : 0.0f); -template +template void BaseMatrixT::biggerThan(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d) { @@ -1073,25 +1174,34 @@ void BaseMatrixT::biggerThan(BaseMatrixT& b, } DEFINE_MATRIX_TERNARY_OP(Max, a = (b > c) ? b : c); -template +template void BaseMatrixT::max2(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::Max(), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(BinaryClassificationError, ONE_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(BinaryClassificationError, + ONE_PARAMETER, c += ((a > p) == (b > p)) ? 0.0f : 1.0f); -template -void BaseMatrixT::binaryClassificationError2(size_t destCol, BaseMatrixT& b, - BaseMatrixT& c, T p) { +template +void BaseMatrixT::binaryClassificationError2(size_t destCol, + BaseMatrixT& b, + BaseMatrixT& c, + T p) { CHECK(!useGpu_) << "do not support gpu"; MatrixOffset offset(0, 0, 0, 0, destCol, 0); int numRows = b.height_; int numCols = b.width_; - b.applyTernary(ternary::BinaryClassificationError(p), c, *this, numRows, - numCols, offset, false_type(), true_type() /*cAsColVector*/); + b.applyTernary(ternary::BinaryClassificationError(p), + c, + *this, + numRows, + numCols, + offset, + false_type(), + true_type() /*cAsColVector*/); } -template<> +template <> void BaseMatrixT::binaryClassificationError(size_t destCol, BaseMatrixT& b, BaseMatrixT& c, @@ -1099,127 +1209,148 @@ void BaseMatrixT::binaryClassificationError(size_t destCol, MatrixOffset offset(destCol, 0, 0, 0, 0, 0); int numRows = b.height_; int numCols = b.width_; - aggregate(aggregate::sum(), base::binary::classificationError(p), - base::binary::add(), b, c, numRows, numCols, offset, false_type(), + aggregate(aggregate::sum(), + base::binary::classificationError(p), + base::binary::add(), + b, + c, + numRows, + numCols, + offset, + false_type(), true_type() /*aAsColVector*/); } -DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(Add3, THREE_PARAMETER, +DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(Add3, + THREE_PARAMETER, a = p1 * b + p2 * c + p3 * d); -template -void BaseMatrixT::add3(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d, T p1, - T p2, T p3) { +template +void BaseMatrixT::add3( + BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d, T p1, T p2, T p3) { applyQuaternary(quaternary::Add3(p1, p2, p3), b, c, d); } DEFINE_MATRIX_TERNARY_OP(DotMulSquare, a = b * c * c); -template +template void BaseMatrixT::dotMulSquare(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::DotMulSquare(), b, c); } DEFINE_MATRIX_TERNARY_OP(DotSquareSquare, a = b * b * c * c); -template +template void BaseMatrixT::dotSquareSquare(BaseMatrixT& b, BaseMatrixT& c) { applyTernary(ternary::DotSquareSquare(), b, c); } DEFINE_MATRIX_BINARY_OP(DotMulSquare, a *= b * b); -template +template void BaseMatrixT::dotMulSquare(BaseMatrixT& b) { applyBinary(binary::DotMulSquare(), b); } DEFINE_MATRIX_BINARY_OP(DotSquareMul, a = a * a * b); -template +template void BaseMatrixT::dotSquareMul(BaseMatrixT& b) { applyBinary(binary::DotSquareMul(), b); } -DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(AddSquareSum, THREE_PARAMETER, +DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(AddSquareSum, + THREE_PARAMETER, T tmp = p1 * b + p2 * c + p3 * d; a += tmp * tmp); -template -void BaseMatrixT::addSquareSum(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT d, - T p1, T p2, T p3) { +template +void BaseMatrixT::addSquareSum( + BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT d, T p1, T p2, T p3) { applyQuaternary(quaternary::AddSquareSum(p1, p2, p3), b, c, d); } DEFINE_MATRIX_BINARY_PARAMETER_OP(AddSquare, ONE_PARAMETER, a += p * b * b); -template +template void BaseMatrixT::addSquare(BaseMatrixT& b, T p) { applyBinary(binary::AddSquare(p), b); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(DecayAddSquare, TWO_PARAMETER, +DEFINE_MATRIX_BINARY_PARAMETER_OP(DecayAddSquare, + TWO_PARAMETER, a = p1 * a + p2 * b * b); -template +template void BaseMatrixT::decayAddSquare(BaseMatrixT& b, T p1, T p2) { applyBinary(binary::DecayAddSquare(p1, p2), b); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DecayAddSquareMul, TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(DecayAddSquareMul, + TWO_PARAMETER, a = p1 * a + p2 * b * b * c * c); -template -void BaseMatrixT::decayAddSquareMul(BaseMatrixT& b, BaseMatrixT& c, T p1, +template +void BaseMatrixT::decayAddSquareMul(BaseMatrixT& b, + BaseMatrixT& c, + T p1, T p2) { applyTernary(ternary::DecayAddSquareMul(p1, p2), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(ReciprocalSum, THREE_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(ReciprocalSum, + THREE_PARAMETER, a = 1 / (p1 * b + p2 * c + p3)); -template -void BaseMatrixT::reciprocalSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, - T p3) { +template +void BaseMatrixT::reciprocalSum( + BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3) { applyTernary(ternary::ReciprocalSum(p1, p2, p3), b, c); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(Reciprocal2, TWO_PARAMETER, +DEFINE_MATRIX_BINARY_PARAMETER_OP(Reciprocal2, + TWO_PARAMETER, a = 1 / (p1 * b + p2)); -template +template void BaseMatrixT::reciprocal2(BaseMatrixT& b, T p1, T p2) { applyBinary(binary::Reciprocal2(p1, p2), b); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSquareSum, TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSquareSum, + TWO_PARAMETER, T tmp = p1 * b + p2 * c; a *= tmp * tmp); -template -void BaseMatrixT::dotMulSquareSum(BaseMatrixT& b, BaseMatrixT& c, T p1, +template +void BaseMatrixT::dotMulSquareSum(BaseMatrixT& b, + BaseMatrixT& c, + T p1, T p2) { applyTernary(ternary::DotMulSquareSum(p1, p2), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotSquareSum, TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotSquareSum, + TWO_PARAMETER, T tmp = p1 * b + p2 * c; a = tmp * tmp); -template +template void BaseMatrixT::dotSquareSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { applyTernary(ternary::DotSquareSum(p1, p2), b, c); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSum, TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSum, + TWO_PARAMETER, a *= p1 * b + p2 * c); -template +template void BaseMatrixT::dotMulSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { applyTernary(ternary::DotMulSum(p1, p2), b, c); } DEFINE_MATRIX_BINARY_OP(CopyAndClear, b = a; a = 0); -template +template void BaseMatrixT::copyAndClear(BaseMatrixT& b) { applyBinary(binary::CopyAndClear(), b); } -DEFINE_MATRIX_TERNARY_PARAMETER_OP(AddDotMul, TWO_PARAMETER, +DEFINE_MATRIX_TERNARY_PARAMETER_OP(AddDotMul, + TWO_PARAMETER, a = p1 * a + p2 * b * c); -template +template void BaseMatrixT::addDotMul(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { applyTernary(ternary::AddDotMul(p1, p2), b, c); } DEFINE_MATRIX_BINARY_OP(Assign, a = b;); -template +template void BaseMatrixT::assign(BaseMatrixT& b) { if (useGpu_) { applyBinary(binary::Assign(), b); @@ -1230,7 +1361,7 @@ void BaseMatrixT::assign(BaseMatrixT& b) { } } -template +template void BaseMatrixT::assignAtOffset(BaseMatrixT& b, int64_t columnOffset) { if (columnOffset + b.width_ <= width_) { int numRows = height_; @@ -1250,24 +1381,31 @@ void BaseMatrixT::assignAtOffset(BaseMatrixT& b, int64_t columnOffset) { } DEFINE_MATRIX_BINARY_OP(DeepSwap, T tmp = a; a = b; b = tmp); -template +template void BaseMatrixT::deepSwap(BaseMatrixT& b) { - applyBinary(binary::DeepSwap(), b); + applyBinary(binary::DeepSwap(), b); } -template<> +template <> void BaseMatrixT::rowDotMul(size_t destCol, BaseMatrixT& b, BaseMatrixT& c) { int numRows = b.height_; int numCols = b.width_; MatrixOffset offset(destCol, 0, 0, 0, 0, 0); - aggregate(aggregate::sum(), base::binary::mul(), base::binary::add(), b, c, - numRows, numCols, offset, false_type(), + aggregate(aggregate::sum(), + base::binary::mul(), + base::binary::add(), + b, + c, + numRows, + numCols, + offset, + false_type(), true_type() /*aAsColVector*/); } -template +template void BaseMatrixT::rowDotMul2(size_t destCol, BaseMatrixT& b, BaseMatrixT& c) { @@ -1290,17 +1428,24 @@ void BaseMatrixT::rowDotMul2(size_t destCol, } } -template<> +template <> void BaseMatrixT::addDotMulVMM(BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, 0, 0); int numRows = b.height_; int numCols = b.width_; - aggregate(aggregate::sum(), base::binary::mul(), base::binary::add(), b, c, - numRows, numCols, offset, true_type() /*aAsRowVector*/, + aggregate(aggregate::sum(), + base::binary::mul(), + base::binary::add(), + b, + c, + numRows, + numCols, + offset, + true_type() /*aAsRowVector*/, false_type()); } -template +template void BaseMatrixT::addDotMulVMM2(BaseMatrixT& b, BaseMatrixT& c) { CHECK(!useGpu_) << "do not support gpu"; @@ -1321,16 +1466,22 @@ void BaseMatrixT::addDotMulVMM2(BaseMatrixT& b, BaseMatrixT& c) { } DEFINE_MATRIX_TERNARY_OP(addDotMulMMV, a += b * c); -template +template void BaseMatrixT::addDotMulMMV(BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyTernary(ternary::addDotMulMMV(), b, c, numRows, numCols, offset, - true_type() /*cAsRowVector*/, false_type()); + applyTernary(ternary::addDotMulMMV(), + b, + c, + numRows, + numCols, + offset, + true_type() /*cAsRowVector*/, + false_type()); } -template +template void BaseMatrixT::addDotMulMMV2(BaseMatrixT& b, BaseMatrixT& c) { CHECK(!useGpu_) << "do not support gpu"; @@ -1350,16 +1501,22 @@ void BaseMatrixT::addDotMulMMV2(BaseMatrixT& b, BaseMatrixT& c) { } } -template +template void BaseMatrixT::rowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, cCol, 0); int numRows = height_; int numCols = width_; - applyTernary(ternary::DotMul(), b, c, numRows, numCols, offset, - false_type(), true_type() /*cAsColVector*/); + applyTernary(ternary::DotMul(), + b, + c, + numRows, + numCols, + offset, + false_type(), + true_type() /*cAsColVector*/); } -template +template void BaseMatrixT::rowScale2(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { CHECK(!useGpu_) << "do not support gpu"; @@ -1379,52 +1536,82 @@ void BaseMatrixT::rowScale2(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { } } -template +template void BaseMatrixT::colScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, 0, cRow); int numRows = height_; int numCols = width_; - applyTernary(ternary::DotMul(), b, c, numRows, numCols, offset, - true_type() /* cAsRowVector */, false_type() /* cAsColVector */); + applyTernary(ternary::DotMul(), + b, + c, + numRows, + numCols, + offset, + true_type() /* cAsRowVector */, + false_type() /* cAsColVector */); } -template +template void BaseMatrixT::addColScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, 0, cRow); int numRows = height_; int numCols = width_; - applyTernary(ternary::addDotMulMMV(), b, c, numRows, numCols, offset, - true_type() /* cAsRowVector */, false_type() /* cAsColVector */); + applyTernary(ternary::addDotMulMMV(), + b, + c, + numRows, + numCols, + offset, + true_type() /* cAsRowVector */, + false_type() /* cAsColVector */); } -template +template void BaseMatrixT::addRowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, cCol, 0); int numRows = height_; int numCols = width_; - applyTernary(ternary::addDotMulMMV(), b, c, numRows, numCols, offset, - false_type(), true_type() /*cAsColVector*/); + applyTernary(ternary::addDotMulMMV(), + b, + c, + numRows, + numCols, + offset, + false_type(), + true_type() /*cAsColVector*/); } DEFINE_MATRIX_TERNARY_PARAMETER_OP(RowAdd, ONE_PARAMETER, a = b + p * c); -template +template void BaseMatrixT::rowAdd(size_t cCol, BaseMatrixT& b, BaseMatrixT& c, T p) { MatrixOffset offset(0, 0, 0, 0, cCol, 0); int numRows = height_; int numCols = width_; - applyTernary(ternary::RowAdd(p), b, c, numRows, numCols, offset, - false_type(), true_type() /*cAsColVector*/); + applyTernary(ternary::RowAdd(p), + b, + c, + numRows, + numCols, + offset, + false_type(), + true_type() /*cAsColVector*/); } DEFINE_MATRIX_TERNARY_OP(RowPow, a = pow(b, c)); -template<> +template <> void BaseMatrixT::rowPow(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { if (useGpu_) { MatrixOffset offset(0, 0, 0, 0, cCol, 0); int numRows = height_; int numCols = width_; - applyTernary(ternary::RowPow(), b, c, numRows, numCols, offset, - false_type(), true_type() /*cAsColVector*/); + applyTernary(ternary::RowPow(), + b, + c, + numRows, + numCols, + offset, + false_type(), + true_type() /*cAsColVector*/); } else { size_t height = this->height_; size_t width = this->width_; @@ -1441,44 +1628,64 @@ void BaseMatrixT::rowPow(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { } } -template +template void BaseMatrixT::mulRowVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::DotMul(), b, numRows, numCols, offset, - true_type() /* bAsRowVector */, false_type()); + applyBinary(binary::DotMul(), + b, + numRows, + numCols, + offset, + true_type() /* bAsRowVector */, + false_type()); } DEFINE_MATRIX_BINARY_OP(DotDiv, a /= b); -template +template void BaseMatrixT::divRowVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::DotDiv(), b, numRows, numCols, offset, - true_type() /* bAsRowVector */, false_type()); + applyBinary(binary::DotDiv(), + b, + numRows, + numCols, + offset, + true_type() /* bAsRowVector */, + false_type()); } -template +template void BaseMatrixT::mulColVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::DotMul(), b, numRows, numCols, offset, - false_type(), true_type() /* bAsColVector */); + applyBinary(binary::DotMul(), + b, + numRows, + numCols, + offset, + false_type(), + true_type() /* bAsColVector */); } -template +template void BaseMatrixT::divColVector(BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0); int numRows = height_; int numCols = width_; - applyBinary(binary::DotDiv(), b, numRows, numCols, offset, - false_type(), true_type() /* bAsColVector */); + applyBinary(binary::DotDiv(), + b, + numRows, + numCols, + offset, + false_type(), + true_type() /* bAsColVector */); } -template<> +template <> template int BaseMatrixT::applyRow(Agg agg, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); @@ -1486,13 +1693,20 @@ int BaseMatrixT::applyRow(Agg agg, BaseMatrixT& b) { size_t numCols = b.width_; CHECK_EQ(height_, numRows); CHECK_EQ(width_, 1UL); - aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows, - numCols, offset, false_type(), true_type() /*aAsColVector*/); + aggregate(agg, + base::unary::identity(), + base::binary::second(), + b, + numRows, + numCols, + offset, + false_type(), + true_type() /*aAsColVector*/); return 0; } -template<> +template <> template int BaseMatrixT::applyRow(Agg agg, Saver sv, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); @@ -1500,16 +1714,25 @@ int BaseMatrixT::applyRow(Agg agg, Saver sv, BaseMatrixT& b) { size_t numCols = b.width_; CHECK_EQ(height_, numRows); CHECK_EQ(width_, 1UL); - aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset, - false_type(), true_type() /*aAsColVector*/); + aggregate(agg, + base::unary::identity(), + sv, + b, + numRows, + numCols, + offset, + false_type(), + true_type() /*aAsColVector*/); return 0; } -template<> +template <> template -int BaseMatrixT::applyRow( - Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) { +int BaseMatrixT::applyRow(Agg agg, + real scaleDest, + real scaleAgg, + BaseMatrixT& b) { if (scaleDest != 0) { applyRow(agg, base::binary::add2(scaleDest, scaleAgg), b); } else { @@ -1521,10 +1744,10 @@ int BaseMatrixT::applyRow( return 0; } -template<> +template <> template -int BaseMatrixT::applyRow(Agg agg, Op op, Saver sv, - BaseMatrixT& b, BaseMatrixT& c) { +int BaseMatrixT::applyRow( + Agg agg, Op op, Saver sv, BaseMatrixT& b, BaseMatrixT& c) { MatrixOffset offset(0, 0, 0, 0, 0, 0); size_t numRows = b.height_; size_t numCols = b.width_; @@ -1532,16 +1755,27 @@ int BaseMatrixT::applyRow(Agg agg, Op op, Saver sv, CHECK_EQ(width_, 1UL); CHECK_EQ(c.height_, numRows); CHECK_EQ(c.width_, numCols); - aggregate(agg, op, sv, - b, c, numRows, numCols, offset, - false_type(), true_type() /*aAsColVector*/); + aggregate(agg, + op, + sv, + b, + c, + numRows, + numCols, + offset, + false_type(), + true_type() /*aAsColVector*/); return 0; } -template<> +template <> template -int BaseMatrixT::applyRow(Agg agg, Op op, real scaleDest, real scaleAgg, - BaseMatrixT& b, BaseMatrixT& c) { +int BaseMatrixT::applyRow(Agg agg, + Op op, + real scaleDest, + real scaleAgg, + BaseMatrixT& b, + BaseMatrixT& c) { if (scaleDest != 0) { applyRow(agg, op, base::binary::add2(scaleDest, scaleAgg), b, c); } else { @@ -1553,7 +1787,7 @@ int BaseMatrixT::applyRow(Agg agg, Op op, real scaleDest, real scaleAgg, return 0; } -template<> +template <> template int BaseMatrixT::applyCol(Agg agg, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); @@ -1561,13 +1795,20 @@ int BaseMatrixT::applyCol(Agg agg, BaseMatrixT& b) { size_t numCols = b.width_; CHECK_EQ(width_, numCols); CHECK_EQ(height_, 1UL); - aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows, - numCols, offset, true_type() /*aAsRowVector*/, false_type()); + aggregate(agg, + base::unary::identity(), + base::binary::second(), + b, + numRows, + numCols, + offset, + true_type() /*aAsRowVector*/, + false_type()); return 0; } -template<> +template <> template int BaseMatrixT::applyCol(Agg agg, Saver sv, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); @@ -1575,16 +1816,25 @@ int BaseMatrixT::applyCol(Agg agg, Saver sv, BaseMatrixT& b) { size_t numCols = b.width_; CHECK_EQ(width_, numCols); CHECK_EQ(height_, 1UL); - aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset, - true_type() /*aAsRowVector*/, false_type()); + aggregate(agg, + base::unary::identity(), + sv, + b, + numRows, + numCols, + offset, + true_type() /*aAsRowVector*/, + false_type()); return 0; } -template<> +template <> template -int BaseMatrixT::applyCol( - Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) { +int BaseMatrixT::applyCol(Agg agg, + real scaleDest, + real scaleAgg, + BaseMatrixT& b) { if (scaleDest != 0) { applyCol(agg, base::binary::add2(scaleDest, scaleAgg), b); } else { @@ -1596,48 +1846,51 @@ int BaseMatrixT::applyCol( return 0; } -template<> +template <> void BaseMatrixT::sumRows(BaseMatrixT& b, real scaleSum, real scaleDest) { applyRow(aggregate::sum(), scaleDest, scaleSum, b); } -template<> +template <> void BaseMatrixT::maxRows(BaseMatrixT& b) { applyRow(aggregate::max(), b); } -template<> +template <> void BaseMatrixT::minRows(BaseMatrixT& b) { applyRow(aggregate::min(), b); } -template<> +template <> void BaseMatrixT::maxCols(BaseMatrixT& b) { applyCol(aggregate::max(), b); } -template<> +template <> void BaseMatrixT::minCols(BaseMatrixT& b) { applyCol(aggregate::min(), b); } -template<> +template <> void BaseMatrixT::sumCols(BaseMatrixT& b, real scaleSum, real scaleDest) { applyCol(aggregate::sum(), scaleDest, scaleSum, b); } -template<> -void BaseMatrixT::sumOfSquaredDiffs( - BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) { - applyRow(aggregate::sum(), base::binary::squaredDiff(), - scaleDest, scaleSum, b, c); +template <> +void BaseMatrixT::sumOfSquaredDiffs(BaseMatrixT& b, + BaseMatrixT& c, + real scaleSum, + real scaleDest) { + applyRow( + aggregate::sum(), base::binary::squaredDiff(), scaleDest, scaleSum, b, c); } -template<> -void BaseMatrixT::sumOfProducts( - BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) { - applyRow(aggregate::sum(), base::binary::mul(), - scaleDest, scaleSum, b, c); +template <> +void BaseMatrixT::sumOfProducts(BaseMatrixT& b, + BaseMatrixT& c, + real scaleSum, + real scaleDest) { + applyRow(aggregate::sum(), base::binary::mul(), scaleDest, scaleSum, b, c); } template class BaseMatrixT;