提交 1a34becf 编写于 作者: Y Yu Yang

Reset develop BaseMatrix.cu

上级 aaf2d66d
...@@ -12,21 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,21 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <paddle/utils/Logging.h>
#include <string.h>
#include <cmath> #include <cmath>
#include <string.h>
#include <paddle/utils/Logging.h>
#include "BaseMatrix.h" #include "BaseMatrix.h"
#include "MathFunctions.h"
#include "SIMDFunctions.h"
#include "hl_matrix_apply.cuh"
#include "hl_matrix_base.cuh"
#include "hl_matrix_ops.cuh" #include "hl_matrix_ops.cuh"
#include "hl_matrix_base.cuh"
#include "hl_matrix_apply.cuh"
#include "SIMDFunctions.h"
#include "MathFunctions.h"
namespace paddle { namespace paddle {
const char* SPARSE_SUPPORT_ERROR = "Sparse Matrix/Vector is not supported."; const char* SPARSE_SUPPORT_ERROR = "Sparse Matrix/Vector is not supported.";
template <class T> template<class T>
template <class Op> template <class Op>
int BaseMatrixT<T>::applyUnary(Op op) { int BaseMatrixT<T>::applyUnary(Op op) {
MatrixOffset offset(0, 0); MatrixOffset offset(0, 0);
...@@ -34,11 +34,9 @@ int BaseMatrixT<T>::applyUnary(Op op) { ...@@ -34,11 +34,9 @@ int BaseMatrixT<T>::applyUnary(Op op) {
return 0; return 0;
} }
template <class T> template<class T>
template <class Op> template <class Op>
int BaseMatrixT<T>::applyUnary(Op op, int BaseMatrixT<T>::applyUnary(Op op, int numRows, int numCols,
int numRows,
int numCols,
MatrixOffset& offset) { MatrixOffset& offset) {
CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR;
int dimM = numRows; int dimM = numRows;
...@@ -58,7 +56,7 @@ int BaseMatrixT<T>::applyUnary(Op op, ...@@ -58,7 +56,7 @@ int BaseMatrixT<T>::applyUnary(Op op,
return 0; return 0;
} }
template <class T> template<class T>
template <class Op> template <class Op>
int BaseMatrixT<T>::applyBinary(Op op, BaseMatrixT& b) { int BaseMatrixT<T>::applyBinary(Op op, BaseMatrixT& b) {
CHECK(height_ == b.height_ && width_ == b.width_) CHECK(height_ == b.height_ && width_ == b.width_)
...@@ -69,23 +67,18 @@ int BaseMatrixT<T>::applyBinary(Op op, BaseMatrixT& b) { ...@@ -69,23 +67,18 @@ int BaseMatrixT<T>::applyBinary(Op op, BaseMatrixT& b) {
return 0; return 0;
} }
template <class T> template<class T>
template <class Op> template <class Op>
int BaseMatrixT<T>::applyBinary( int BaseMatrixT<T>::applyBinary(Op op, BaseMatrixT& b, int numRows, int numCols,
Op op, BaseMatrixT& b, int numRows, int numCols, MatrixOffset& offset) { MatrixOffset& offset) {
applyBinary(op, b, numRows, numCols, offset, false_type(), false_type()); applyBinary(op, b, numRows, numCols, offset, false_type(), false_type());
return 0; return 0;
} }
template <class T> template<class T>
template <class Op, class bAsRowVector, class bAsColVector> template <class Op, class bAsRowVector, class bAsColVector>
int BaseMatrixT<T>::applyBinary(Op op, int BaseMatrixT<T>::applyBinary(Op op, BaseMatrixT& b, int numRows, int numCols,
BaseMatrixT& b, MatrixOffset& offset, bAsRowVector, bAsColVector) {
int numRows,
int numCols,
MatrixOffset& offset,
bAsRowVector,
bAsColVector) {
CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR;
CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR;
CHECK(useGpu_ == b.useGpu_) << "Matrix type mismatch"; CHECK(useGpu_ == b.useGpu_) << "Matrix type mismatch";
...@@ -98,8 +91,8 @@ int BaseMatrixT<T>::applyBinary(Op op, ...@@ -98,8 +91,8 @@ int BaseMatrixT<T>::applyBinary(Op op,
T* A = data_; T* A = data_;
T* B = b.data_; T* B = b.data_;
CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_);
CAL_MATRIX_START_ADDRESS( CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_,
B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); offset.bRow_);
CHECK_LE(dimM + offset.aRow_, this->height_); CHECK_LE(dimM + offset.aRow_, this->height_);
CHECK_LE(dimN + offset.aCol_, this->width_); CHECK_LE(dimN + offset.aCol_, this->width_);
if (!bAsRowVector::value && !bAsColVector::value) { if (!bAsRowVector::value && !bAsColVector::value) {
...@@ -122,7 +115,7 @@ int BaseMatrixT<T>::applyBinary(Op op, ...@@ -122,7 +115,7 @@ int BaseMatrixT<T>::applyBinary(Op op,
return 0; return 0;
} }
template <class T> template<class T>
template <class Op> template <class Op>
int BaseMatrixT<T>::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c) { int BaseMatrixT<T>::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c) {
CHECK_EQ(height_, b.height_); CHECK_EQ(height_, b.height_);
...@@ -136,29 +129,21 @@ int BaseMatrixT<T>::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c) { ...@@ -136,29 +129,21 @@ int BaseMatrixT<T>::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c) {
return 0; return 0;
} }
template <class T> template<class T>
template <class Op> template <class Op>
int BaseMatrixT<T>::applyTernary(Op op, int BaseMatrixT<T>::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c,
BaseMatrixT& b, int numRows, int numCols,
BaseMatrixT& c,
int numRows,
int numCols,
MatrixOffset& offset) { MatrixOffset& offset) {
applyTernary(op, b, c, numRows, numCols, offset, false_type(), false_type()); applyTernary(op, b, c, numRows, numCols, offset, false_type(), false_type());
return 0; return 0;
} }
template <class T> template<class T>
template <class Op, class cAsRowVector, class cAsColVector> template <class Op, class cAsRowVector, class cAsColVector>
int BaseMatrixT<T>::applyTernary(Op op, int BaseMatrixT<T>::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c,
BaseMatrixT& b, int numRows, int numCols, MatrixOffset& offset,
BaseMatrixT& c, cAsRowVector, cAsColVector) {
int numRows,
int numCols,
MatrixOffset& offset,
cAsRowVector,
cAsColVector) {
CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR;
CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR;
CHECK(!c.isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!c.isSparse()) << SPARSE_SUPPORT_ERROR;
...@@ -175,10 +160,10 @@ int BaseMatrixT<T>::applyTernary(Op op, ...@@ -175,10 +160,10 @@ int BaseMatrixT<T>::applyTernary(Op op,
T* B = b.data_; T* B = b.data_;
T* C = c.data_; T* C = c.data_;
CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_);
CAL_MATRIX_START_ADDRESS( CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_,
B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); offset.bRow_);
CAL_MATRIX_START_ADDRESS( CAL_MATRIX_START_ADDRESS(C, c.height_, c.width_, ldc, offset.cCol_,
C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); offset.cRow_);
CHECK_LE(dimM + offset.aRow_, this->height_); CHECK_LE(dimM + offset.aRow_, this->height_);
CHECK_LE(dimN + offset.aCol_, this->width_); CHECK_LE(dimN + offset.aCol_, this->width_);
...@@ -195,21 +180,21 @@ int BaseMatrixT<T>::applyTernary(Op op, ...@@ -195,21 +180,21 @@ int BaseMatrixT<T>::applyTernary(Op op,
} }
if (true == useGpu_) { if (true == useGpu_) {
hl_gpu_apply_ternary_op<T, Op, cAsRowVector::value, cAsColVector::value>( hl_gpu_apply_ternary_op
<T, Op, cAsRowVector::value, cAsColVector::value>(
op, A, B, C, dimM, dimN, lda, ldb, ldc); op, A, B, C, dimM, dimN, lda, ldb, ldc);
} else { } else {
hl_cpu_apply_ternary_op<T, Op, cAsRowVector::value, cAsColVector::value>( hl_cpu_apply_ternary_op
<T, Op, cAsRowVector::value, cAsColVector::value>(
op, A, B, C, dimM, dimN, lda, ldb, ldc); op, A, B, C, dimM, dimN, lda, ldb, ldc);
} }
return 0; return 0;
} }
template <class T> template<class T>
template <class Op> template <class Op>
int BaseMatrixT<T>::applyQuaternary(Op op, int BaseMatrixT<T>::applyQuaternary(Op op, BaseMatrixT& b, BaseMatrixT& c,
BaseMatrixT& b,
BaseMatrixT& c,
BaseMatrixT& d) { BaseMatrixT& d) {
CHECK_EQ(height_, b.height_); CHECK_EQ(height_, b.height_);
CHECK_EQ(width_, b.width_); CHECK_EQ(width_, b.width_);
...@@ -224,14 +209,10 @@ int BaseMatrixT<T>::applyQuaternary(Op op, ...@@ -224,14 +209,10 @@ int BaseMatrixT<T>::applyQuaternary(Op op,
return 0; return 0;
} }
template <class T> template<class T>
template <class Op> template <class Op>
int BaseMatrixT<T>::applyQuaternary(Op op, int BaseMatrixT<T>::applyQuaternary(Op op, BaseMatrixT& b, BaseMatrixT& c,
BaseMatrixT& b, BaseMatrixT& d, int numRows, int numCols,
BaseMatrixT& c,
BaseMatrixT& d,
int numRows,
int numCols,
MatrixOffset& offset) { MatrixOffset& offset) {
CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR;
CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR;
...@@ -253,12 +234,12 @@ int BaseMatrixT<T>::applyQuaternary(Op op, ...@@ -253,12 +234,12 @@ int BaseMatrixT<T>::applyQuaternary(Op op,
T* C = c.data_; T* C = c.data_;
T* D = d.data_; T* D = d.data_;
CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_);
CAL_MATRIX_START_ADDRESS( CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_,
B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); offset.bRow_);
CAL_MATRIX_START_ADDRESS( CAL_MATRIX_START_ADDRESS(C, c.height_, c.width_, ldc, offset.cCol_,
C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); offset.cRow_);
CAL_MATRIX_START_ADDRESS( CAL_MATRIX_START_ADDRESS(D, d.height_, d.width_, ldd, offset.dCol_,
D, d.height_, d.width_, ldd, offset.dCol_, offset.dRow_); offset.dRow_);
CHECK_LE(dimM + offset.aRow_, this->height_); CHECK_LE(dimM + offset.aRow_, this->height_);
CHECK_LE(dimN + offset.aCol_, this->width_); CHECK_LE(dimN + offset.aCol_, this->width_);
...@@ -269,29 +250,22 @@ int BaseMatrixT<T>::applyQuaternary(Op op, ...@@ -269,29 +250,22 @@ int BaseMatrixT<T>::applyQuaternary(Op op,
CHECK_LE(dimM + offset.dRow_, d.height_); CHECK_LE(dimM + offset.dRow_, d.height_);
CHECK_LE(dimN + offset.dCol_, d.width_); CHECK_LE(dimN + offset.dCol_, d.width_);
if (true == useGpu_) { if (true == useGpu_) {
hl_gpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, ldc, ldd); hl_gpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb,
ldc, ldd);
} else { } else {
hl_cpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, ldc, ldd); hl_cpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb,
ldc, ldd);
} }
return 0; return 0;
} }
template <class T> template<class T>
template <class Agg, template <class Agg, class Op, class Saver, class aAsRowVector,
class Op,
class Saver,
class aAsRowVector,
class aAsColVector> class aAsColVector>
int BaseMatrixT<T>::aggregate(Agg agg, int BaseMatrixT<T>::aggregate(Agg agg, Op op, Saver sv, BaseMatrixT& b,
Op op, int numRows, int numCols, MatrixOffset& offset,
Saver sv, aAsRowVector, aAsColVector) {
BaseMatrixT& b,
int numRows,
int numCols,
MatrixOffset& offset,
aAsRowVector,
aAsColVector) {
CHECK_EQ(useGpu_, b.useGpu_); CHECK_EQ(useGpu_, b.useGpu_);
int ld = stride_; int ld = stride_;
...@@ -299,10 +273,10 @@ int BaseMatrixT<T>::aggregate(Agg agg, ...@@ -299,10 +273,10 @@ int BaseMatrixT<T>::aggregate(Agg agg,
T* dst = data_; T* dst = data_;
T* B = b.data_; T* B = b.data_;
CAL_MATRIX_START_ADDRESS( CAL_MATRIX_START_ADDRESS(dst, height_, width_, ld, offset.aCol_,
dst, height_, width_, ld, offset.aCol_, offset.aRow_); offset.aRow_);
CAL_MATRIX_START_ADDRESS( CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_,
B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); offset.bRow_);
if (aAsRowVector::value && !aAsColVector::value) { if (aAsRowVector::value && !aAsColVector::value) {
if (useGpu_) { if (useGpu_) {
...@@ -323,21 +297,12 @@ int BaseMatrixT<T>::aggregate(Agg agg, ...@@ -323,21 +297,12 @@ int BaseMatrixT<T>::aggregate(Agg agg,
return 0; return 0;
} }
template <class T> template<class T>
template <class Agg, template <class Agg, class Op, class Saver, class aAsRowVector,
class Op,
class Saver,
class aAsRowVector,
class aAsColVector> class aAsColVector>
int BaseMatrixT<T>::aggregate(Agg agg, int BaseMatrixT<T>::aggregate(Agg agg, Op op, Saver sv, BaseMatrixT& b,
Op op, BaseMatrixT& c, int numRows, int numCols,
Saver sv, MatrixOffset& offset, aAsRowVector,
BaseMatrixT& b,
BaseMatrixT& c,
int numRows,
int numCols,
MatrixOffset& offset,
aAsRowVector,
aAsColVector) { aAsColVector) {
CHECK_EQ(useGpu_, b.useGpu_); CHECK_EQ(useGpu_, b.useGpu_);
CHECK_EQ(useGpu_, c.useGpu_); CHECK_EQ(useGpu_, c.useGpu_);
...@@ -349,28 +314,28 @@ int BaseMatrixT<T>::aggregate(Agg agg, ...@@ -349,28 +314,28 @@ int BaseMatrixT<T>::aggregate(Agg agg,
T* dst = data_; T* dst = data_;
T* B = b.data_; T* B = b.data_;
T* C = c.data_; T* C = c.data_;
CAL_MATRIX_START_ADDRESS( CAL_MATRIX_START_ADDRESS(dst, height_, width_, ld, offset.aCol_,
dst, height_, width_, ld, offset.aCol_, offset.aRow_); offset.aRow_);
CAL_MATRIX_START_ADDRESS( CAL_MATRIX_START_ADDRESS(B, b.height_, b.width_, ldb, offset.bCol_,
B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); offset.bRow_);
CAL_MATRIX_START_ADDRESS( CAL_MATRIX_START_ADDRESS(C, c.height_, c.width_, ldc, offset.cCol_,
C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); offset.cRow_);
if (aAsRowVector::value && !aAsColVector::value) { if (aAsRowVector::value && !aAsColVector::value) {
if (useGpu_) { if (useGpu_) {
hl_gpu_matrix_column_op( hl_gpu_matrix_column_op(agg, op, sv, numRows, numCols, dst, B,
agg, op, sv, numRows, numCols, dst, B, ldb, C, ldc); ldb, C, ldc);
} else { } else {
hl_cpu_matrix_column_op( hl_cpu_matrix_column_op(agg, op, sv, numRows, numCols, dst, B,
agg, op, sv, numRows, numCols, dst, B, ldb, C, ldc); ldb, C, ldc);
} }
} else if (!aAsRowVector::value && aAsColVector::value) { } else if (!aAsRowVector::value && aAsColVector::value) {
if (useGpu_) { if (useGpu_) {
hl_gpu_matrix_row_op( hl_gpu_matrix_row_op(agg, op, sv, numRows, numCols, dst, ld, B,
agg, op, sv, numRows, numCols, dst, ld, B, ldb, C, ldc); ldb, C, ldc);
} else { } else {
hl_cpu_matrix_row_op( hl_cpu_matrix_row_op(agg, op, sv, numRows, numCols, dst, ld, B,
agg, op, sv, numRows, numCols, dst, ld, B, ldb, C, ldc); ldb, C, ldc);
} }
} else { } else {
LOG(FATAL) << "not supported"; LOG(FATAL) << "not supported";
...@@ -385,19 +350,15 @@ int BaseMatrixT<T>::aggregate(Agg agg, ...@@ -385,19 +350,15 @@ int BaseMatrixT<T>::aggregate(Agg agg,
*/ */
DEFINE_MATRIX_UNARY_OP(Neg, a = -a); DEFINE_MATRIX_UNARY_OP(Neg, a = -a);
template <class T> template<class T>
void BaseMatrixT<T>::neg() { void BaseMatrixT<T>::neg() { applyUnary(unary::Neg<T>()); }
applyUnary(unary::Neg<T>());
}
DEFINE_MATRIX_UNARY_OP(Exp, a = exp(a)); DEFINE_MATRIX_UNARY_OP(Exp, a = exp(a));
template <> template<>
void BaseMatrixT<real>::exp2() { void BaseMatrixT<real>::exp2() { applyUnary(unary::Exp<real>()); }
applyUnary(unary::Exp<real>());
}
DEFINE_MATRIX_UNARY_OP(Log, a = log(a)); DEFINE_MATRIX_UNARY_OP(Log, a = log(a));
template <> template<>
void BaseMatrixT<real>::log2() { void BaseMatrixT<real>::log2() {
if (useGpu_) { if (useGpu_) {
applyUnary(unary::Log<real>()); applyUnary(unary::Log<real>());
...@@ -407,42 +368,30 @@ void BaseMatrixT<real>::log2() { ...@@ -407,42 +368,30 @@ void BaseMatrixT<real>::log2() {
} }
DEFINE_MATRIX_UNARY_OP(Sqrt, a = sqrt(a)); DEFINE_MATRIX_UNARY_OP(Sqrt, a = sqrt(a));
template <> template<>
void BaseMatrixT<real>::sqrt2() { void BaseMatrixT<real>::sqrt2() { applyUnary(unary::Sqrt<real>()); }
applyUnary(unary::Sqrt<real>());
}
DEFINE_MATRIX_UNARY_OP(Square, a = a * a); DEFINE_MATRIX_UNARY_OP(Square, a = a * a);
template <class T> template<class T>
void BaseMatrixT<T>::square2() { void BaseMatrixT<T>::square2() { applyUnary(unary::Square<T>()); }
applyUnary(unary::Square<T>());
}
DEFINE_MATRIX_UNARY_OP(Reciprocal, a = 1.0f / a); DEFINE_MATRIX_UNARY_OP(Reciprocal, a = 1.0f / a);
template <class T> template<class T>
void BaseMatrixT<T>::reciprocal2() { void BaseMatrixT<T>::reciprocal2() { applyUnary(unary::Reciprocal<T>()); }
applyUnary(unary::Reciprocal<T>());
}
DEFINE_MATRIX_UNARY_OP(Abs, a = a > 0 ? a : -a); DEFINE_MATRIX_UNARY_OP(Abs, a = a > 0 ? a : -a);
template <class T> template<class T>
void BaseMatrixT<T>::abs2() { void BaseMatrixT<T>::abs2() { applyUnary(unary::Abs<T>()); }
applyUnary(unary::Abs<T>());
}
DEFINE_MATRIX_UNARY_OP(Sign, a = (a > 0) - (a < 0)); DEFINE_MATRIX_UNARY_OP(Sign, a = (a > 0) - (a < 0));
template <class T> template<class T>
void BaseMatrixT<T>::sign2() { void BaseMatrixT<T>::sign2() { applyUnary(unary::Sign<T>()); }
applyUnary(unary::Sign<T>());
}
DEFINE_MATRIX_UNARY_OP(Zero, a = 0); DEFINE_MATRIX_UNARY_OP(Zero, a = 0);
template <class T> template<class T>
void BaseMatrixT<T>::zero() { void BaseMatrixT<T>::zero() { applyUnary(unary::Zero<T>()); }
applyUnary(unary::Zero<T>());
}
template <class T> template<class T>
void BaseMatrixT<T>::zeroAtOffset(int64_t columnOffset, int64_t numColumns) { void BaseMatrixT<T>::zeroAtOffset(int64_t columnOffset, int64_t numColumns) {
int numRows = height_; int numRows = height_;
int numCols = numColumns; int numCols = numColumns;
...@@ -451,13 +400,11 @@ void BaseMatrixT<T>::zeroAtOffset(int64_t columnOffset, int64_t numColumns) { ...@@ -451,13 +400,11 @@ void BaseMatrixT<T>::zeroAtOffset(int64_t columnOffset, int64_t numColumns) {
} }
DEFINE_MATRIX_UNARY_OP(One, a = 1); DEFINE_MATRIX_UNARY_OP(One, a = 1);
template <class T> template<class T>
void BaseMatrixT<T>::one() { void BaseMatrixT<T>::one() { applyUnary(unary::One<T>()); }
applyUnary(unary::One<T>());
}
DEFINE_MATRIX_UNARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(a, p)); DEFINE_MATRIX_UNARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(a, p));
template <> template<>
void BaseMatrixT<real>::pow2(real p) { void BaseMatrixT<real>::pow2(real p) {
if (useGpu_) { if (useGpu_) {
applyUnary(unary::Pow<real>(p)); applyUnary(unary::Pow<real>(p));
...@@ -467,67 +414,51 @@ void BaseMatrixT<real>::pow2(real p) { ...@@ -467,67 +414,51 @@ void BaseMatrixT<real>::pow2(real p) {
} }
DEFINE_MATRIX_UNARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a -= p); DEFINE_MATRIX_UNARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a -= p);
template <class T> template<class T>
void BaseMatrixT<T>::subScalar(T p) { void BaseMatrixT<T>::subScalar(T p) { applyUnary(unary::SubScalar<T>(p)); }
applyUnary(unary::SubScalar<T>(p));
}
DEFINE_MATRIX_UNARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a *= p); DEFINE_MATRIX_UNARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a *= p);
template <class T> template<class T>
void BaseMatrixT<T>::mulScalar(T p) { void BaseMatrixT<T>::mulScalar(T p) { applyUnary(unary::MulScalar<T>(p)); }
applyUnary(unary::MulScalar<T>(p));
}
DEFINE_MATRIX_UNARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a /= p); DEFINE_MATRIX_UNARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a /= p);
template <class T> template<class T>
void BaseMatrixT<T>::divScalar(T p) { void BaseMatrixT<T>::divScalar(T p) { applyUnary(unary::DivScalar<T>(p)); }
applyUnary(unary::DivScalar<T>(p));
}
DEFINE_MATRIX_UNARY_PARAMETER_OP(Assign, ONE_PARAMETER, a = p); DEFINE_MATRIX_UNARY_PARAMETER_OP(Assign, ONE_PARAMETER, a = p);
template <class T> template<class T>
void BaseMatrixT<T>::assign(T p) { void BaseMatrixT<T>::assign(T p) { applyUnary(unary::Assign<T>(p)); }
applyUnary(unary::Assign<T>(p));
}
DEFINE_MATRIX_UNARY_PARAMETER_OP(Add, ONE_PARAMETER, a += p); DEFINE_MATRIX_UNARY_PARAMETER_OP(Add, ONE_PARAMETER, a += p);
template <class T> template<class T>
void BaseMatrixT<T>::add(T p) { void BaseMatrixT<T>::add(T p) { applyUnary(unary::Add<T>(p)); }
applyUnary(unary::Add<T>(p));
}
DEFINE_MATRIX_UNARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = a * p1 + p2); DEFINE_MATRIX_UNARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = a * p1 + p2);
template <class T> template<class T>
void BaseMatrixT<T>::add(T p1, T p2) { void BaseMatrixT<T>::add(T p1, T p2) { applyUnary(unary::Add2<T>(p1, p2)); }
applyUnary(unary::Add2<T>(p1, p2));
}
DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, TWO_PARAMETER,
TWO_PARAMETER,
a = a < p1 ? p1 : (a > p2 ? p2 : a)); a = a < p1 ? p1 : (a > p2 ? p2 : a));
template <class T> template<class T>
void BaseMatrixT<T>::clip(T p1, T p2) { void BaseMatrixT<T>::clip(T p1, T p2) { applyUnary(unary::Clip<T>(p1, p2)); }
applyUnary(unary::Clip<T>(p1, p2));
}
DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative, DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative, TWO_PARAMETER,
TWO_PARAMETER,
a = b < p1 ? 0 : (b > p2 ? 0 : 1)); a = b < p1 ? 0 : (b > p2 ? 0 : 1));
template <class T> template<class T>
void BaseMatrixT<T>::clipDerivative(BaseMatrixT& b, T p1, T p2) { void BaseMatrixT<T>::clipDerivative(BaseMatrixT& b, T p1, T p2) {
applyBinary(binary::ClipDerivative<T>(p1, p2), b); applyBinary(binary::ClipDerivative<T>(p1, p2), b);
} }
DEFINE_MATRIX_UNARY_PARAMETER_OP(BiggerThanScalar, DEFINE_MATRIX_UNARY_PARAMETER_OP(BiggerThanScalar, ONE_PARAMETER,
ONE_PARAMETER,
a = a > p ? 1.0f : 0.0f); a = a > p ? 1.0f : 0.0f);
template <class T> template<class T>
void BaseMatrixT<T>::biggerThanScalar(T p) { void BaseMatrixT<T>::biggerThanScalar(T p) {
applyUnary(unary::BiggerThanScalar<T>(p)); applyUnary(unary::BiggerThanScalar<T>(p));
} }
DEFINE_MATRIX_UNARY_PARAMETER_OP(DownClip, ONE_PARAMETER, a = a > p ? a : p); DEFINE_MATRIX_UNARY_PARAMETER_OP(DownClip, ONE_PARAMETER,
template <class T> a = a > p ? a : p);
template<class T>
void BaseMatrixT<T>::downClip(T p) { void BaseMatrixT<T>::downClip(T p) {
applyUnary(unary::DownClip<T>(p)); applyUnary(unary::DownClip<T>(p));
} }
...@@ -538,12 +469,12 @@ void BaseMatrixT<T>::downClip(T p) { ...@@ -538,12 +469,12 @@ void BaseMatrixT<T>::downClip(T p) {
*/ */
DEFINE_MATRIX_BINARY_OP(Add, a += b); DEFINE_MATRIX_BINARY_OP(Add, a += b);
template <class T> template<class T>
void BaseMatrixT<T>::add(BaseMatrixT& b) { void BaseMatrixT<T>::add(BaseMatrixT& b) {
applyBinary(binary::Add<T>(), b); applyBinary(binary::Add<T>(), b);
} }
template <> template<>
void BaseMatrixT<real>::add(BaseMatrixT& b) { void BaseMatrixT<real>::add(BaseMatrixT& b) {
if (useGpu_) { if (useGpu_) {
applyBinary(binary::Add<real>(), b); applyBinary(binary::Add<real>(), b);
...@@ -554,7 +485,7 @@ void BaseMatrixT<real>::add(BaseMatrixT& b) { ...@@ -554,7 +485,7 @@ void BaseMatrixT<real>::add(BaseMatrixT& b) {
} }
} }
template <class T> template<class T>
void BaseMatrixT<T>::addAtOffset(BaseMatrixT& b, int64_t columnOffset) { void BaseMatrixT<T>::addAtOffset(BaseMatrixT& b, int64_t columnOffset) {
if (columnOffset + b.width_ <= width_) { if (columnOffset + b.width_ <= width_) {
int numRows = height_; int numRows = height_;
...@@ -573,53 +504,43 @@ void BaseMatrixT<T>::addAtOffset(BaseMatrixT& b, int64_t columnOffset) { ...@@ -573,53 +504,43 @@ void BaseMatrixT<T>::addAtOffset(BaseMatrixT& b, int64_t columnOffset) {
} }
} }
template <class T> template<class T>
void BaseMatrixT<T>::addP2P(BaseMatrixT& b) { void BaseMatrixT<T>::addP2P(BaseMatrixT& b) {
T* A = data_; T* A = data_;
T* B = b.data_; T* B = b.data_;
int dimM = height_; int dimM = height_;
int dimN = width_; int dimN = width_;
hl_gpu_apply_binary_op<T, binary::Add<T>, 0, 0>( hl_gpu_apply_binary_op<T, binary::Add<T>, 0, 0>
binary::Add<T>(), A, B, dimM, dimN, dimN, dimN); (binary::Add<T>(), A, B, dimM, dimN, dimN, dimN);
} }
template <class T> template<class T>
void BaseMatrixT<T>::addColVector(BaseMatrixT& b) { void BaseMatrixT<T>::addColVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyBinary(binary::Add<T>(), applyBinary(binary::Add<T>(), b, numRows, numCols, offset, false_type(),
b,
numRows,
numCols,
offset,
false_type(),
true_type() /* bAsColVector */); true_type() /* bAsColVector */);
} }
template <class T> template<class T>
void BaseMatrixT<T>::addRowVector(BaseMatrixT& b) { void BaseMatrixT<T>::addRowVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyBinary(binary::Add<T>(), applyBinary(binary::Add<T>(), b, numRows, numCols, offset,
b, true_type() /* bAsRowVector */, false_type());
numRows,
numCols,
offset,
true_type() /* bAsRowVector */,
false_type());
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(Add1, ONE_PARAMETER, a += b * p); DEFINE_MATRIX_BINARY_PARAMETER_OP(Add1, ONE_PARAMETER, a += b * p);
template <class T> template<class T>
void BaseMatrixT<T>::add(BaseMatrixT& b, T p) { void BaseMatrixT<T>::add(BaseMatrixT& b, T p) {
applyBinary(binary::Add1<T>(p), b); applyBinary(binary::Add1<T>(p), b);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(b, p)); DEFINE_MATRIX_BINARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(b, p));
template <> template<>
void BaseMatrixT<real>::pow2(BaseMatrixT& b, real p) { void BaseMatrixT<real>::pow2(BaseMatrixT& b, real p) {
if (useGpu_) { if (useGpu_) {
applyBinary(binary::Pow<real>(p), b); applyBinary(binary::Pow<real>(p), b);
...@@ -629,45 +550,36 @@ void BaseMatrixT<real>::pow2(BaseMatrixT& b, real p) { ...@@ -629,45 +550,36 @@ void BaseMatrixT<real>::pow2(BaseMatrixT& b, real p) {
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = p1 * a + p2 * b); DEFINE_MATRIX_BINARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = p1 * a + p2 * b);
template <class T> template<class T>
void BaseMatrixT<T>::add(BaseMatrixT& b, T p1, T p2) { void BaseMatrixT<T>::add(BaseMatrixT& b, T p1, T p2) {
applyBinary(binary::Add2<T>(p1, p2), b); applyBinary(binary::Add2<T>(p1, p2), b);
} }
template <class T> template<class T>
void BaseMatrixT<T>::addBias(BaseMatrixT& b, T scale) { void BaseMatrixT<T>::addBias(BaseMatrixT& b, T scale) {
MatrixOffset offset(0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyBinary(binary::Add1<T>(scale), applyBinary(binary::Add1<T>(scale), b, numRows, numCols, offset,
b, true_type() /* bAsRowVector */, false_type());
numRows,
numCols,
offset,
true_type() /* bAsRowVector */,
false_type());
} }
DEFINE_MATRIX_BINARY_OP(Sub, a -= b); DEFINE_MATRIX_BINARY_OP(Sub, a -= b);
template <class T> template<class T>
void BaseMatrixT<T>::sub(BaseMatrixT& b) { void BaseMatrixT<T>::sub(BaseMatrixT& b) { applyBinary(binary::Sub<T>(), b); }
applyBinary(binary::Sub<T>(), b);
}
DEFINE_MATRIX_BINARY_PARAMETER_OP(Sub1, ONE_PARAMETER, a -= b * p); DEFINE_MATRIX_BINARY_PARAMETER_OP(Sub1, ONE_PARAMETER, a -= b * p);
template <class T> template<class T>
void BaseMatrixT<T>::sub(BaseMatrixT& b, T p) { void BaseMatrixT<T>::sub(BaseMatrixT& b, T p) {
applyBinary(binary::Sub1<T>(p), b); applyBinary(binary::Sub1<T>(p), b);
} }
DEFINE_MATRIX_BINARY_OP(Relu, b = a > 0.0f ? a : 0.0f); DEFINE_MATRIX_BINARY_OP(Relu, b = a > 0.0f ? a : 0.0f);
template <class T> template<class T>
void BaseMatrixT<T>::relu(BaseMatrixT& b) { void BaseMatrixT<T>::relu(BaseMatrixT& b) { applyBinary(binary::Relu<T>(), b); }
applyBinary(binary::Relu<T>(), b);
}
DEFINE_MATRIX_BINARY_OP(ReluDerivative, a *= (b > 0.0f ? 1.0f : 0.0f)); DEFINE_MATRIX_BINARY_OP(ReluDerivative, a *= (b > 0.0f ? 1.0f : 0.0f));
template <class T> template<class T>
void BaseMatrixT<T>::reluDerivative(BaseMatrixT& b) { void BaseMatrixT<T>::reluDerivative(BaseMatrixT& b) {
applyBinary(binary::ReluDerivative<T>(), b); applyBinary(binary::ReluDerivative<T>(), b);
} }
...@@ -677,7 +589,7 @@ DEFINE_MATRIX_BINARY_OP(Softrelu, const T THRESHOLD = 40.0; ...@@ -677,7 +589,7 @@ DEFINE_MATRIX_BINARY_OP(Softrelu, const T THRESHOLD = 40.0;
? THRESHOLD ? THRESHOLD
: ((a < -THRESHOLD) ? (-THRESHOLD) : ((a < -THRESHOLD) ? (-THRESHOLD)
: a)))); : a))));
template <> template<>
void BaseMatrixT<real>::softrelu(BaseMatrixT& b) { void BaseMatrixT<real>::softrelu(BaseMatrixT& b) {
applyBinary(binary::Softrelu<real>(), b); applyBinary(binary::Softrelu<real>(), b);
} }
...@@ -687,100 +599,97 @@ DEFINE_MATRIX_BINARY_OP( ...@@ -687,100 +599,97 @@ DEFINE_MATRIX_BINARY_OP(
a *= (1.0 - exp(-1.0 * ((b > THRESHOLD) a *= (1.0 - exp(-1.0 * ((b > THRESHOLD)
? THRESHOLD ? THRESHOLD
: ((b < -THRESHOLD) ? (-THRESHOLD) : b))))); : ((b < -THRESHOLD) ? (-THRESHOLD) : b)))));
template <> template<>
void BaseMatrixT<real>::softreluDerivative(BaseMatrixT& b) { void BaseMatrixT<real>::softreluDerivative(BaseMatrixT& b) {
applyBinary(binary::SoftreluDerivative<real>(), b); applyBinary(binary::SoftreluDerivative<real>(), b);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(Brelu, TWO_PARAMETER, b = a > p1 ? a : p1; DEFINE_MATRIX_BINARY_PARAMETER_OP(Brelu, TWO_PARAMETER, b = a > p1 ? a : p1;
b = b < p2 ? b : p2); b = b < p2 ? b : p2);
template <class T> template<class T>
void BaseMatrixT<T>::brelu(BaseMatrixT& b) { void BaseMatrixT<T>::brelu(BaseMatrixT& b) {
int p1 = 0, p2 = 24; //! TODO(yuyang18): Make p1,p2 configuable. int p1 = 0, p2 = 24; //! TODO(yuyang18): Make p1,p2 configuable.
applyBinary(binary::Brelu<T>(p1, p2), b); applyBinary(binary::Brelu<T>(p1, p2), b);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(BreluDerivative, DEFINE_MATRIX_BINARY_PARAMETER_OP(BreluDerivative, TWO_PARAMETER,
TWO_PARAMETER,
a *= (b > p1 && b < p2) ? 1.0 : 0.0); a *= (b > p1 && b < p2) ? 1.0 : 0.0);
template <class T> template<class T>
void BaseMatrixT<T>::breluDerivative(BaseMatrixT& b) { void BaseMatrixT<T>::breluDerivative(BaseMatrixT& b) {
int p1 = 0, p2 = 24; int p1 = 0, p2 = 24;
applyBinary(binary::BreluDerivative<T>(p1, p2), b); applyBinary(binary::BreluDerivative<T>(p1, p2), b);
} }
DEFINE_MATRIX_BINARY_OP(Square, b = a * a); DEFINE_MATRIX_BINARY_OP(Square, b = a * a);
template <class T> template<class T>
void BaseMatrixT<T>::square2(BaseMatrixT& b) { void BaseMatrixT<T>::square2(BaseMatrixT& b) {
applyBinary(binary::Square<T>(), b); applyBinary(binary::Square<T>(), b);
} }
DEFINE_MATRIX_BINARY_OP(SquareDerivative, a *= 2.0 * b); DEFINE_MATRIX_BINARY_OP(SquareDerivative, a *= 2.0 * b);
template <class T> template<class T>
void BaseMatrixT<T>::squareDerivative(BaseMatrixT& b) { void BaseMatrixT<T>::squareDerivative(BaseMatrixT& b) {
applyBinary(binary::SquareDerivative<T>(), b); applyBinary(binary::SquareDerivative<T>(), b);
} }
DEFINE_MATRIX_BINARY_OP(Tanh, T tmp = -2.0 * a; DEFINE_MATRIX_BINARY_OP(Tanh,
T tmp = -2.0 * a;
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
b = 2.0 / (1.0 + std::exp(tmp)) - 1.0); b = 2.0 / (1.0 + std::exp(tmp)) - 1.0);
template <> template<>
void BaseMatrixT<real>::tanh(BaseMatrixT& b) { void BaseMatrixT<real>::tanh(BaseMatrixT& b) {
applyBinary(binary::Tanh<real>(), b); applyBinary(binary::Tanh<real>(), b);
} }
DEFINE_MATRIX_BINARY_OP(TanhDerivative, a *= 1 - b * b); DEFINE_MATRIX_BINARY_OP(TanhDerivative, a *= 1 - b * b);
template <class T> template<class T>
void BaseMatrixT<T>::tanhDerivative(BaseMatrixT& b) { void BaseMatrixT<T>::tanhDerivative(BaseMatrixT& b) {
applyBinary(binary::TanhDerivative<T>(), b); applyBinary(binary::TanhDerivative<T>(), b);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP( DEFINE_MATRIX_BINARY_PARAMETER_OP(ScaledTanh, TWO_PARAMETER,
ScaledTanh, TWO_PARAMETER, b = p1 * (2.0 / (1.0 + exp(-2 * p2 * a)) - 1.0)); b = p1 *
template <> (2.0 / (1.0 + exp(-2 * p2 * a)) - 1.0));
template<>
void BaseMatrixT<real>::scaledTanh(BaseMatrixT& b, real p1, real p2) { void BaseMatrixT<real>::scaledTanh(BaseMatrixT& b, real p1, real p2) {
applyBinary(binary::ScaledTanh<real>(p1, p2), b); applyBinary(binary::ScaledTanh<real>(p1, p2), b);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(ScaledTanhDerivative, DEFINE_MATRIX_BINARY_PARAMETER_OP(ScaledTanhDerivative, TWO_PARAMETER,
TWO_PARAMETER,
a *= p2 * (p1 - b * b)); a *= p2 * (p1 - b * b));
template <class T> template<class T>
void BaseMatrixT<T>::scaledTanhDerivative(BaseMatrixT& b, T p1, T p2) { void BaseMatrixT<T>::scaledTanhDerivative(BaseMatrixT& b, T p1, T p2) {
applyBinary(binary::ScaledTanhDerivative<T>(p1 * p1, p2 / p1), b); applyBinary(binary::ScaledTanhDerivative<T>(p1 * p1, p2 / p1), b);
} }
DEFINE_MATRIX_BINARY_OP(Reciprocal, b = 1.0f / a); DEFINE_MATRIX_BINARY_OP(Reciprocal, b = 1.0f / a);
template <class T> template<class T>
void BaseMatrixT<T>::reciprocal2(BaseMatrixT& b) { void BaseMatrixT<T>::reciprocal2(BaseMatrixT& b) {
applyBinary(binary::Reciprocal<T>(), b); applyBinary(binary::Reciprocal<T>(), b);
} }
DEFINE_MATRIX_BINARY_OP(ReciprocalDerivative, a *= -b * b); DEFINE_MATRIX_BINARY_OP(ReciprocalDerivative, a *= -b * b);
template <class T> template<class T>
void BaseMatrixT<T>::reciprocalDerivative(BaseMatrixT& b) { void BaseMatrixT<T>::reciprocalDerivative(BaseMatrixT& b) {
applyBinary(binary::ReciprocalDerivative<T>(), b); applyBinary(binary::ReciprocalDerivative<T>(), b);
} }
DEFINE_MATRIX_BINARY_OP(Abs, b = a > 0.0f ? a : -a); DEFINE_MATRIX_BINARY_OP(Abs, b = a > 0.0f ? a : -a);
template <class T> template<class T>
void BaseMatrixT<T>::abs2(BaseMatrixT& b) { void BaseMatrixT<T>::abs2(BaseMatrixT& b) { applyBinary(binary::Abs<T>(), b); }
applyBinary(binary::Abs<T>(), b);
}
DEFINE_MATRIX_BINARY_OP(AbsDerivative, a = (b > 0) ? a : (b < 0) ? -a : 0); DEFINE_MATRIX_BINARY_OP(AbsDerivative, a = (b > 0) ? a : (b < 0) ? -a : 0);
template <class T> template<class T>
void BaseMatrixT<T>::absDerivative(BaseMatrixT& b) { void BaseMatrixT<T>::absDerivative(BaseMatrixT& b) {
applyBinary(binary::AbsDerivative<T>(), b); applyBinary(binary::AbsDerivative<T>(), b);
} }
DEFINE_MATRIX_BINARY_OP(Sigmoid, const T THRESHOLD_MIN = -40.0; DEFINE_MATRIX_BINARY_OP(
const T THRESHOLD_MAX = 13.0; Sigmoid, const T THRESHOLD_MIN = -40.0; const T THRESHOLD_MAX = 13.0;
T tmp = (a < THRESHOLD_MIN) T tmp = (a < THRESHOLD_MIN) ? THRESHOLD_MIN
? THRESHOLD_MIN
: ((a > THRESHOLD_MAX) ? THRESHOLD_MAX : a); : ((a > THRESHOLD_MAX) ? THRESHOLD_MAX : a);
b = 1.0f / (1.0f + exp(-tmp))); b = 1.0f / (1.0f + exp(-tmp)));
template <> template<>
void BaseMatrixT<real>::sigmoid(BaseMatrixT& b) { void BaseMatrixT<real>::sigmoid(BaseMatrixT& b) {
if (useGpu_) { if (useGpu_) {
applyBinary(binary::Sigmoid<real>(), b); applyBinary(binary::Sigmoid<real>(), b);
...@@ -814,31 +723,31 @@ void BaseMatrixT<real>::sigmoid(BaseMatrixT& b) { ...@@ -814,31 +723,31 @@ void BaseMatrixT<real>::sigmoid(BaseMatrixT& b) {
} }
DEFINE_MATRIX_BINARY_OP(SigmoidDerivative, a *= b * (1 - b)); DEFINE_MATRIX_BINARY_OP(SigmoidDerivative, a *= b * (1 - b));
template <class T> template<class T>
void BaseMatrixT<T>::sigmoidDerivative(BaseMatrixT& b) { void BaseMatrixT<T>::sigmoidDerivative(BaseMatrixT& b) {
applyBinary(binary::SigmoidDerivative<T>(), b); applyBinary(binary::SigmoidDerivative<T>(), b);
} }
DEFINE_MATRIX_BINARY_OP(ExpDerivative, a *= b); DEFINE_MATRIX_BINARY_OP(ExpDerivative, a *= b);
template <class T> template<class T>
void BaseMatrixT<T>::expDerivative(BaseMatrixT& b) { void BaseMatrixT<T>::expDerivative(BaseMatrixT& b) {
applyBinary(binary::ExpDerivative<T>(), b); applyBinary(binary::ExpDerivative<T>(), b);
} }
DEFINE_MATRIX_BINARY_OP(Sign, b = a > 0.0f ? 1.0f : -1.0f); DEFINE_MATRIX_BINARY_OP(Sign, b = a > 0.0f ? 1.0f : -1.0f);
template <class T> template<class T>
void BaseMatrixT<T>::sign2(BaseMatrixT& b) { void BaseMatrixT<T>::sign2(BaseMatrixT& b) {
applyBinary(binary::Sign<T>(), b); applyBinary(binary::Sign<T>(), b);
} }
DEFINE_MATRIX_BINARY_OP(Exp, a = exp(b)); DEFINE_MATRIX_BINARY_OP(Exp, a = exp(b));
template <> template<>
void BaseMatrixT<real>::exp2(BaseMatrixT& b) { void BaseMatrixT<real>::exp2(BaseMatrixT& b) {
applyBinary(binary::Exp<real>(), b); applyBinary(binary::Exp<real>(), b);
} }
DEFINE_MATRIX_BINARY_OP(Log, a = log(b)); DEFINE_MATRIX_BINARY_OP(Log, a = log(b));
template <> template<>
void BaseMatrixT<real>::log2(BaseMatrixT& b) { void BaseMatrixT<real>::log2(BaseMatrixT& b) {
if (useGpu_) { if (useGpu_) {
applyBinary(binary::Log<real>(), b); applyBinary(binary::Log<real>(), b);
...@@ -848,13 +757,13 @@ void BaseMatrixT<real>::log2(BaseMatrixT& b) { ...@@ -848,13 +757,13 @@ void BaseMatrixT<real>::log2(BaseMatrixT& b) {
} }
DEFINE_MATRIX_BINARY_OP(Sqrt, a = sqrt(b)); DEFINE_MATRIX_BINARY_OP(Sqrt, a = sqrt(b));
template <> template<>
void BaseMatrixT<real>::sqrt2(BaseMatrixT& b) { void BaseMatrixT<real>::sqrt2(BaseMatrixT& b) {
applyBinary(binary::Sqrt<real>(), b); applyBinary(binary::Sqrt<real>(), b);
} }
DEFINE_MATRIX_BINARY_OP(InvSqrt, a = 1.0f / sqrt(b)); DEFINE_MATRIX_BINARY_OP(InvSqrt, a = 1.0f / sqrt(b));
template <> template<>
void BaseMatrixT<real>::invSqrt(BaseMatrixT& b) { void BaseMatrixT<real>::invSqrt(BaseMatrixT& b) {
if (useGpu_) { if (useGpu_) {
applyBinary(binary::InvSqrt<real>(), b); applyBinary(binary::InvSqrt<real>(), b);
...@@ -866,37 +775,37 @@ void BaseMatrixT<real>::invSqrt(BaseMatrixT& b) { ...@@ -866,37 +775,37 @@ void BaseMatrixT<real>::invSqrt(BaseMatrixT& b) {
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(IsEqual, ONE_PARAMETER, a = (b == p)); DEFINE_MATRIX_BINARY_PARAMETER_OP(IsEqual, ONE_PARAMETER, a = (b == p));
template <class T> template<class T>
void BaseMatrixT<T>::isEqualTo(BaseMatrixT& b, T value) { void BaseMatrixT<T>::isEqualTo(BaseMatrixT& b, T value) {
applyBinary(binary::IsEqual<T>(value), b); applyBinary(binary::IsEqual<T>(value), b);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(AddScalar, ONE_PARAMETER, a = b + p); DEFINE_MATRIX_BINARY_PARAMETER_OP(AddScalar, ONE_PARAMETER, a = b + p);
template <class T> template<class T>
void BaseMatrixT<T>::addScalar(BaseMatrixT& b, T p) { void BaseMatrixT<T>::addScalar(BaseMatrixT& b, T p) {
applyBinary(binary::AddScalar<T>(p), b); applyBinary(binary::AddScalar<T>(p), b);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a = b - p); DEFINE_MATRIX_BINARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a = b - p);
template <class T> template<class T>
void BaseMatrixT<T>::subScalar(BaseMatrixT& b, T p) { void BaseMatrixT<T>::subScalar(BaseMatrixT& b, T p) {
applyBinary(binary::SubScalar<T>(p), b); applyBinary(binary::SubScalar<T>(p), b);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a = b * p); DEFINE_MATRIX_BINARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a = b * p);
template <class T> template<class T>
void BaseMatrixT<T>::mulScalar(BaseMatrixT& b, T p) { void BaseMatrixT<T>::mulScalar(BaseMatrixT& b, T p) {
applyBinary(binary::MulScalar<T>(p), b); applyBinary(binary::MulScalar<T>(p), b);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a = b / p); DEFINE_MATRIX_BINARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a = b / p);
template <class T> template<class T>
void BaseMatrixT<T>::divScalar(BaseMatrixT& b, T p) { void BaseMatrixT<T>::divScalar(BaseMatrixT& b, T p) {
applyBinary(binary::DivScalar<T>(p), b); applyBinary(binary::DivScalar<T>(p), b);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(ScalarDiv, ONE_PARAMETER, a = p / b); DEFINE_MATRIX_BINARY_PARAMETER_OP(ScalarDiv, ONE_PARAMETER, a = p / b);
template <class T> template<class T>
void BaseMatrixT<T>::scalarDiv(BaseMatrixT& b, T p) { void BaseMatrixT<T>::scalarDiv(BaseMatrixT& b, T p) {
applyBinary(binary::ScalarDiv<T>(p), b); applyBinary(binary::ScalarDiv<T>(p), b);
} }
...@@ -908,20 +817,20 @@ void BaseMatrixT<T>::scalarDiv(BaseMatrixT& b, T p) { ...@@ -908,20 +817,20 @@ void BaseMatrixT<T>::scalarDiv(BaseMatrixT& b, T p) {
DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropy, DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropy,
a = -c * log(b) - (1 - c) * log(1 - b)); a = -c * log(b) - (1 - c) * log(1 - b));
template <> template<>
void BaseMatrixT<real>::softCrossEntropy(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<real>::softCrossEntropy(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::SoftCrossEntropy<real>(), b, c); applyTernary(ternary::SoftCrossEntropy<real>(), b, c);
} }
DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropyBp, a += (b - c) / (b * (1 - b))); DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropyBp, a += (b - c) / (b * (1 - b)));
template <class T> template<class T>
void BaseMatrixT<T>::softCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::softCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::SoftCrossEntropyBp<T>(), b, c); applyTernary(ternary::SoftCrossEntropyBp<T>(), b, c);
} }
DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropy, DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropy,
a = c > 0.5 ? -log(b) : -log(1.0 - b)); a = c > 0.5 ? -log(b) : -log(1.0 - b));
template <> template<>
void BaseMatrixT<real>::binaryLabelCrossEntropy(BaseMatrixT& b, void BaseMatrixT<real>::binaryLabelCrossEntropy(BaseMatrixT& b,
BaseMatrixT& c) { BaseMatrixT& c) {
if (useGpu_) { if (useGpu_) {
...@@ -949,54 +858,52 @@ void BaseMatrixT<real>::binaryLabelCrossEntropy(BaseMatrixT& b, ...@@ -949,54 +858,52 @@ void BaseMatrixT<real>::binaryLabelCrossEntropy(BaseMatrixT& b,
DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropyBp, DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropyBp,
a += c > 0.5 ? -1.0 / b : 1.0 / (1.0 - b)); a += c > 0.5 ? -1.0 / b : 1.0 / (1.0 - b));
template <class T> template<class T>
void BaseMatrixT<T>::binaryLabelCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::binaryLabelCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::BinaryCrossEntropyBp<T>(), b, c); applyTernary(ternary::BinaryCrossEntropyBp<T>(), b, c);
} }
DEFINE_MATRIX_TERNARY_OP(Add, a = b + c); DEFINE_MATRIX_TERNARY_OP(Add, a = b + c);
template <class T> template<class T>
void BaseMatrixT<T>::add(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::add(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::Add<T>(), b, c); applyTernary(ternary::Add<T>(), b, c);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add1, TWO_PARAMETER, a = p1 * b + p2 * c); DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add1, TWO_PARAMETER, a = p1 * b + p2 * c);
template <class T> template<class T>
void BaseMatrixT<T>::add(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) { void BaseMatrixT<T>::add(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) {
applyTernary(ternary::Add1<T>(p1, p2), b, c); applyTernary(ternary::Add1<T>(p1, p2), b, c);
} }
DEFINE_MATRIX_TERNARY_OP(Sub, a = b - c); DEFINE_MATRIX_TERNARY_OP(Sub, a = b - c);
template <class T> template<class T>
void BaseMatrixT<T>::sub(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::sub(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::Sub<T>(), b, c); applyTernary(ternary::Sub<T>(), b, c);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(Sub1, TWO_PARAMETER, a = p1 * b - p2 * c); DEFINE_MATRIX_TERNARY_PARAMETER_OP(Sub1, TWO_PARAMETER, a = p1 * b - p2 * c);
template <class T> template<class T>
void BaseMatrixT<T>::sub(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) { void BaseMatrixT<T>::sub(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) {
applyTernary(ternary::Sub1<T>(p1, p2), b, c); applyTernary(ternary::Sub1<T>(p1, p2), b, c);
} }
DEFINE_MATRIX_TERNARY_OP(Add2, a = a + b + c); DEFINE_MATRIX_TERNARY_OP(Add2, a = a + b + c);
template <class T> template<class T>
void BaseMatrixT<T>::add2(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::add2(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::Add2<T>(), b, c); applyTernary(ternary::Add2<T>(), b, c);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add3, DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add3, THREE_PARAMETER,
THREE_PARAMETER,
a = p1 * a + p2 * b + p3 * c); a = p1 * a + p2 * b + p3 * c);
template <class T> template<class T>
void BaseMatrixT<T>::add2(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3) { void BaseMatrixT<T>::add2(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3) {
applyTernary(ternary::Add3<T>(p1, p2, p3), b, c); applyTernary(ternary::Add3<T>(p1, p2, p3), b, c);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(SgdUpdate, DEFINE_MATRIX_TERNARY_PARAMETER_OP(SgdUpdate, THREE_PARAMETER,
THREE_PARAMETER,
c = p2 * c - p1 * (b + p3 * a); c = p2 * c - p1 * (b + p3 * a);
a = a + c); a = a + c);
template <class T> template<class T>
void BaseMatrixT<T>::sgdUpdate(BaseMatrixT& b, // grad void BaseMatrixT<T>::sgdUpdate(BaseMatrixT& b, // grad
BaseMatrixT& c, // mom BaseMatrixT& c, // mom
T p1, // learningRate, T p1, // learningRate,
...@@ -1005,11 +912,10 @@ void BaseMatrixT<T>::sgdUpdate(BaseMatrixT& b, // grad ...@@ -1005,11 +912,10 @@ void BaseMatrixT<T>::sgdUpdate(BaseMatrixT& b, // grad
applyTernary(ternary::SgdUpdate<T>(p1, p2, p3), b, c); applyTernary(ternary::SgdUpdate<T>(p1, p2, p3), b, c);
} }
DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(SgdUpdate, DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(SgdUpdate, THREE_PARAMETER,
THREE_PARAMETER,
c = p2 * c - p1 * d * (b + p3 * a); c = p2 * c - p1 * d * (b + p3 * a);
a += c); a += c);
template <class T> template<class T>
void BaseMatrixT<T>::sgdUpdate(BaseMatrixT& b, // grad, void BaseMatrixT<T>::sgdUpdate(BaseMatrixT& b, // grad,
BaseMatrixT& c, // mom, BaseMatrixT& c, // mom,
BaseMatrixT& d, // lr, BaseMatrixT& d, // lr,
...@@ -1023,22 +929,19 @@ DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p * b; ...@@ -1023,22 +929,19 @@ DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p * b;
a = (a > lambda) a = (a > lambda)
? (a - lambda) ? (a - lambda)
: (a < -lambda) ? (a + lambda) : 0); : (a < -lambda) ? (a + lambda) : 0);
template <class T> template<class T>
void BaseMatrixT<T>::applyL1(BaseMatrixT& lr, T learningRate, T decayRate) { void BaseMatrixT<T>::applyL1(BaseMatrixT& lr, T learningRate, T decayRate) {
applyBinary(binary::ApplyL1<T>(learningRate * decayRate), lr); applyBinary(binary::ApplyL1<T>(learningRate * decayRate), lr);
} }
template <> template<>
void BaseMatrixT<real>::applyL1(BaseMatrixT& lr, void BaseMatrixT<real>::applyL1(BaseMatrixT& lr,
real learningRate, real learningRate,
real decayRate) { real decayRate) {
if (useGpu_) { if (useGpu_) {
applyBinary(binary::ApplyL1<real>(learningRate * decayRate), lr); applyBinary(binary::ApplyL1<real>(learningRate * decayRate), lr);
} else { } else {
simd::decayL1(this->data_, simd::decayL1(this->data_, this->data_, lr.data_, learningRate * decayRate,
this->data_,
lr.data_,
learningRate * decayRate,
height_ * width_); height_ * width_);
} }
} }
...@@ -1047,25 +950,24 @@ DEFINE_MATRIX_UNARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p; ...@@ -1047,25 +950,24 @@ DEFINE_MATRIX_UNARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p;
a = (a > lambda) a = (a > lambda)
? (a - lambda) ? (a - lambda)
: (a < -lambda) ? (a + lambda) : 0); : (a < -lambda) ? (a + lambda) : 0);
template <class T> template<class T>
void BaseMatrixT<T>::applyL1(T learningRate, T decayRate) { void BaseMatrixT<T>::applyL1(T learningRate, T decayRate) {
applyUnary(unary::ApplyL1<T>(learningRate * decayRate)); applyUnary(unary::ApplyL1<T>(learningRate * decayRate));
} }
template <> template<>
void BaseMatrixT<real>::applyL1(real learningRate, real decayRate) { void BaseMatrixT<real>::applyL1(real learningRate, real decayRate) {
if (useGpu_) { if (useGpu_) {
applyUnary(unary::ApplyL1<real>(learningRate * decayRate)); applyUnary(unary::ApplyL1<real>(learningRate * decayRate));
} else { } else {
simd::decayL1( simd::decayL1(this->data_, this->data_, learningRate * decayRate,
this->data_, this->data_, learningRate * decayRate, height_ * width_); height_ * width_);
} }
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL2, DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL2, ONE_PARAMETER,
ONE_PARAMETER,
a *= (1.0f / (1.0f + p * b))); a *= (1.0f / (1.0f + p * b)));
template <class T> template<class T>
void BaseMatrixT<T>::applyL2(BaseMatrixT& lr, T learningRate, T decayRate) { void BaseMatrixT<T>::applyL2(BaseMatrixT& lr, T learningRate, T decayRate) {
if (useGpu_) { if (useGpu_) {
applyBinary(binary::ApplyL2<T>(learningRate * decayRate), lr); applyBinary(binary::ApplyL2<T>(learningRate * decayRate), lr);
...@@ -1078,33 +980,32 @@ void BaseMatrixT<T>::applyL2(BaseMatrixT& lr, T learningRate, T decayRate) { ...@@ -1078,33 +980,32 @@ void BaseMatrixT<T>::applyL2(BaseMatrixT& lr, T learningRate, T decayRate) {
} }
} }
template <class T> template<class T>
void BaseMatrixT<T>::applyL2(T learningRate, T decayRate) { void BaseMatrixT<T>::applyL2(T learningRate, T decayRate) {
BaseMatrixT<T>::mulScalar(1.0f / (1.0f + learningRate * decayRate)); BaseMatrixT<T>::mulScalar(1.0f / (1.0f + learningRate * decayRate));
} }
DEFINE_MATRIX_BINARY_OP(DotMul, a *= b); DEFINE_MATRIX_BINARY_OP(DotMul, a *= b);
template <class T> template<class T>
void BaseMatrixT<T>::dotMul(BaseMatrixT& b) { void BaseMatrixT<T>::dotMul(BaseMatrixT& b) {
applyBinary(binary::DotMul<T>(), b); applyBinary(binary::DotMul<T>(), b);
} }
DEFINE_MATRIX_TERNARY_OP(DotMul, a = b * c); DEFINE_MATRIX_TERNARY_OP(DotMul, a = b * c);
template <class T> template<class T>
void BaseMatrixT<T>::dotMul(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::dotMul(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::DotMul<T>(), b, c); applyTernary(ternary::DotMul<T>(), b, c);
} }
DEFINE_MATRIX_TERNARY_OP(DotDiv, a = (b == 0.0) ? 0.0 : b / c); DEFINE_MATRIX_TERNARY_OP(DotDiv, a = (b == 0.0) ? 0.0 : b / c);
template <class T> template<class T>
void BaseMatrixT<T>::dotDiv(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::dotDiv(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::DotDiv<T>(), b, c); applyTernary(ternary::DotDiv<T>(), b, c);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotDiv2P, DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotDiv2P, TWO_PARAMETER,
TWO_PARAMETER,
a = (b + p1) / (c + p2)); a = (b + p1) / (c + p2));
template <class T> template<class T>
void BaseMatrixT<T>::dotDiv(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { void BaseMatrixT<T>::dotDiv(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) {
applyTernary(ternary::DotDiv2P<T>(p1, p2), b, c); applyTernary(ternary::DotDiv2P<T>(p1, p2), b, c);
} }
...@@ -1114,7 +1015,7 @@ DEFINE_MATRIX_QUATERNARY_OP(RankLoss, const T THRESHOLD = 40.0; a = b - c; ...@@ -1114,7 +1015,7 @@ DEFINE_MATRIX_QUATERNARY_OP(RankLoss, const T THRESHOLD = 40.0; a = b - c;
? THRESHOLD ? THRESHOLD
: ((a < -THRESHOLD) ? (-THRESHOLD) : a); : ((a < -THRESHOLD) ? (-THRESHOLD) : a);
a = log(1 + exp(a)) - a * d); a = log(1 + exp(a)) - a * d);
template <> template<>
void BaseMatrixT<real>::rankLoss(BaseMatrixT& b, void BaseMatrixT<real>::rankLoss(BaseMatrixT& b,
BaseMatrixT& c, BaseMatrixT& c,
BaseMatrixT& d) { BaseMatrixT& d) {
...@@ -1125,9 +1026,8 @@ DEFINE_MATRIX_QUATERNARY_OP(RankLossBp, const T THRESHOLD = 40.0; a = b - c; ...@@ -1125,9 +1026,8 @@ DEFINE_MATRIX_QUATERNARY_OP(RankLossBp, const T THRESHOLD = 40.0; a = b - c;
a = (a > THRESHOLD) a = (a > THRESHOLD)
? THRESHOLD ? THRESHOLD
: ((a < -THRESHOLD) ? (-THRESHOLD) : a); : ((a < -THRESHOLD) ? (-THRESHOLD) : a);
a = exp(a); a = exp(a); a = (a / (1 + a) - d));
a = (a / (1 + a) - d)); template<>
template <>
void BaseMatrixT<real>::rankLossBp(BaseMatrixT& b, void BaseMatrixT<real>::rankLossBp(BaseMatrixT& b,
BaseMatrixT& c, BaseMatrixT& c,
BaseMatrixT& d) { BaseMatrixT& d) {
...@@ -1140,7 +1040,7 @@ DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLoss, const T THRESHOLD = 40.0; ...@@ -1140,7 +1040,7 @@ DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLoss, const T THRESHOLD = 40.0;
? -THRESHOLD ? -THRESHOLD
: b; : b;
a = log(1 + exp(x)) - c * x); a = log(1 + exp(x)) - c * x);
template <> template<>
void BaseMatrixT<real>::logisticRegressionLoss(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<real>::logisticRegressionLoss(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::LogisticRegressionLoss<real>(), b, c); applyTernary(ternary::LogisticRegressionLoss<real>(), b, c);
} }
...@@ -1150,23 +1050,22 @@ DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLossBp, const T THRESHOLD = 40.0; ...@@ -1150,23 +1050,22 @@ DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLossBp, const T THRESHOLD = 40.0;
T x = (b > THRESHOLD) ? THRESHOLD : (b < -THRESHOLD) T x = (b > THRESHOLD) ? THRESHOLD : (b < -THRESHOLD)
? -THRESHOLD ? -THRESHOLD
: b; : b;
x = exp(x); x = exp(x); a = x / (1 + x) - c);
a = x / (1 + x) - c); template<>
template <>
void BaseMatrixT<real>::logisticRegressionLossBp(BaseMatrixT& b, void BaseMatrixT<real>::logisticRegressionLossBp(BaseMatrixT& b,
BaseMatrixT& c) { BaseMatrixT& c) {
applyTernary(ternary::LogisticRegressionLossBp<real>(), b, c); applyTernary(ternary::LogisticRegressionLossBp<real>(), b, c);
} }
DEFINE_MATRIX_TERNARY_OP(BiggerThan, a = (b > c) ? 1.0f : 0.0f); DEFINE_MATRIX_TERNARY_OP(BiggerThan, a = (b > c) ? 1.0f : 0.0f);
template <class T> template<class T>
void BaseMatrixT<T>::biggerThan(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::biggerThan(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::BiggerThan<T>(), b, c); applyTernary(ternary::BiggerThan<T>(), b, c);
} }
DEFINE_MATRIX_QUATERNARY_OP( DEFINE_MATRIX_QUATERNARY_OP(
BiggerThan, a = ((b > c && d > 0.5f) || (b < c && d < 0.5f)) ? 1.0f : 0.0f); BiggerThan, a = ((b > c && d > 0.5f) || (b < c && d < 0.5f)) ? 1.0f : 0.0f);
template <class T> template<class T>
void BaseMatrixT<T>::biggerThan(BaseMatrixT& b, void BaseMatrixT<T>::biggerThan(BaseMatrixT& b,
BaseMatrixT& c, BaseMatrixT& c,
BaseMatrixT& d) { BaseMatrixT& d) {
...@@ -1174,34 +1073,25 @@ void BaseMatrixT<T>::biggerThan(BaseMatrixT& b, ...@@ -1174,34 +1073,25 @@ void BaseMatrixT<T>::biggerThan(BaseMatrixT& b,
} }
DEFINE_MATRIX_TERNARY_OP(Max, a = (b > c) ? b : c); DEFINE_MATRIX_TERNARY_OP(Max, a = (b > c) ? b : c);
template <class T> template<class T>
void BaseMatrixT<T>::max2(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::max2(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::Max<T>(), b, c); applyTernary(ternary::Max<T>(), b, c);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(BinaryClassificationError, DEFINE_MATRIX_TERNARY_PARAMETER_OP(BinaryClassificationError, ONE_PARAMETER,
ONE_PARAMETER,
c += ((a > p) == (b > p)) ? 0.0f : 1.0f); c += ((a > p) == (b > p)) ? 0.0f : 1.0f);
template <class T> template<class T>
void BaseMatrixT<T>::binaryClassificationError2(size_t destCol, void BaseMatrixT<T>::binaryClassificationError2(size_t destCol, BaseMatrixT& b,
BaseMatrixT& b, BaseMatrixT& c, T p) {
BaseMatrixT& c,
T p) {
CHECK(!useGpu_) << "do not support gpu"; CHECK(!useGpu_) << "do not support gpu";
MatrixOffset offset(0, 0, 0, 0, destCol, 0); MatrixOffset offset(0, 0, 0, 0, destCol, 0);
int numRows = b.height_; int numRows = b.height_;
int numCols = b.width_; int numCols = b.width_;
b.applyTernary(ternary::BinaryClassificationError<T>(p), b.applyTernary(ternary::BinaryClassificationError<T>(p), c, *this, numRows,
c, numCols, offset, false_type(), true_type() /*cAsColVector*/);
*this,
numRows,
numCols,
offset,
false_type(),
true_type() /*cAsColVector*/);
} }
template <> template<>
void BaseMatrixT<real>::binaryClassificationError(size_t destCol, void BaseMatrixT<real>::binaryClassificationError(size_t destCol,
BaseMatrixT& b, BaseMatrixT& b,
BaseMatrixT& c, BaseMatrixT& c,
...@@ -1209,148 +1099,127 @@ void BaseMatrixT<real>::binaryClassificationError(size_t destCol, ...@@ -1209,148 +1099,127 @@ void BaseMatrixT<real>::binaryClassificationError(size_t destCol,
MatrixOffset offset(destCol, 0, 0, 0, 0, 0); MatrixOffset offset(destCol, 0, 0, 0, 0, 0);
int numRows = b.height_; int numRows = b.height_;
int numCols = b.width_; int numCols = b.width_;
aggregate(aggregate::sum(), aggregate(aggregate::sum(), base::binary::classificationError(p),
base::binary::classificationError(p), base::binary::add(), b, c, numRows, numCols, offset, false_type(),
base::binary::add(),
b,
c,
numRows,
numCols,
offset,
false_type(),
true_type() /*aAsColVector*/); true_type() /*aAsColVector*/);
} }
DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(Add3, DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(Add3, THREE_PARAMETER,
THREE_PARAMETER,
a = p1 * b + p2 * c + p3 * d); a = p1 * b + p2 * c + p3 * d);
template <class T> template<class T>
void BaseMatrixT<T>::add3( void BaseMatrixT<T>::add3(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d, T p1,
BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d, T p1, T p2, T p3) { T p2, T p3) {
applyQuaternary(quaternary::Add3<T>(p1, p2, p3), b, c, d); applyQuaternary(quaternary::Add3<T>(p1, p2, p3), b, c, d);
} }
DEFINE_MATRIX_TERNARY_OP(DotMulSquare, a = b * c * c); DEFINE_MATRIX_TERNARY_OP(DotMulSquare, a = b * c * c);
template <class T> template<class T>
void BaseMatrixT<T>::dotMulSquare(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::dotMulSquare(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::DotMulSquare<T>(), b, c); applyTernary(ternary::DotMulSquare<T>(), b, c);
} }
DEFINE_MATRIX_TERNARY_OP(DotSquareSquare, a = b * b * c * c); DEFINE_MATRIX_TERNARY_OP(DotSquareSquare, a = b * b * c * c);
template <class T> template<class T>
void BaseMatrixT<T>::dotSquareSquare(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::dotSquareSquare(BaseMatrixT& b, BaseMatrixT& c) {
applyTernary(ternary::DotSquareSquare<T>(), b, c); applyTernary(ternary::DotSquareSquare<T>(), b, c);
} }
DEFINE_MATRIX_BINARY_OP(DotMulSquare, a *= b * b); DEFINE_MATRIX_BINARY_OP(DotMulSquare, a *= b * b);
template <class T> template<class T>
void BaseMatrixT<T>::dotMulSquare(BaseMatrixT& b) { void BaseMatrixT<T>::dotMulSquare(BaseMatrixT& b) {
applyBinary(binary::DotMulSquare<T>(), b); applyBinary(binary::DotMulSquare<T>(), b);
} }
DEFINE_MATRIX_BINARY_OP(DotSquareMul, a = a * a * b); DEFINE_MATRIX_BINARY_OP(DotSquareMul, a = a * a * b);
template <class T> template<class T>
void BaseMatrixT<T>::dotSquareMul(BaseMatrixT& b) { void BaseMatrixT<T>::dotSquareMul(BaseMatrixT& b) {
applyBinary(binary::DotSquareMul<T>(), b); applyBinary(binary::DotSquareMul<T>(), b);
} }
DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(AddSquareSum, DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(AddSquareSum, THREE_PARAMETER,
THREE_PARAMETER,
T tmp = p1 * b + p2 * c + p3 * d; T tmp = p1 * b + p2 * c + p3 * d;
a += tmp * tmp); a += tmp * tmp);
template <class T> template<class T>
void BaseMatrixT<T>::addSquareSum( void BaseMatrixT<T>::addSquareSum(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT d,
BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT d, T p1, T p2, T p3) { T p1, T p2, T p3) {
applyQuaternary(quaternary::AddSquareSum<T>(p1, p2, p3), b, c, d); applyQuaternary(quaternary::AddSquareSum<T>(p1, p2, p3), b, c, d);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(AddSquare, ONE_PARAMETER, a += p * b * b); DEFINE_MATRIX_BINARY_PARAMETER_OP(AddSquare, ONE_PARAMETER, a += p * b * b);
template <class T> template<class T>
void BaseMatrixT<T>::addSquare(BaseMatrixT& b, T p) { void BaseMatrixT<T>::addSquare(BaseMatrixT& b, T p) {
applyBinary(binary::AddSquare<T>(p), b); applyBinary(binary::AddSquare<T>(p), b);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(DecayAddSquare, DEFINE_MATRIX_BINARY_PARAMETER_OP(DecayAddSquare, TWO_PARAMETER,
TWO_PARAMETER,
a = p1 * a + p2 * b * b); a = p1 * a + p2 * b * b);
template <class T> template<class T>
void BaseMatrixT<T>::decayAddSquare(BaseMatrixT& b, T p1, T p2) { void BaseMatrixT<T>::decayAddSquare(BaseMatrixT& b, T p1, T p2) {
applyBinary(binary::DecayAddSquare<T>(p1, p2), b); applyBinary(binary::DecayAddSquare<T>(p1, p2), b);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(DecayAddSquareMul, DEFINE_MATRIX_TERNARY_PARAMETER_OP(DecayAddSquareMul, TWO_PARAMETER,
TWO_PARAMETER,
a = p1 * a + p2 * b * b * c * c); a = p1 * a + p2 * b * b * c * c);
template <class T> template<class T>
void BaseMatrixT<T>::decayAddSquareMul(BaseMatrixT& b, void BaseMatrixT<T>::decayAddSquareMul(BaseMatrixT& b, BaseMatrixT& c, T p1,
BaseMatrixT& c,
T p1,
T p2) { T p2) {
applyTernary(ternary::DecayAddSquareMul<T>(p1, p2), b, c); applyTernary(ternary::DecayAddSquareMul<T>(p1, p2), b, c);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(ReciprocalSum, DEFINE_MATRIX_TERNARY_PARAMETER_OP(ReciprocalSum, THREE_PARAMETER,
THREE_PARAMETER,
a = 1 / (p1 * b + p2 * c + p3)); a = 1 / (p1 * b + p2 * c + p3));
template <class T> template<class T>
void BaseMatrixT<T>::reciprocalSum( void BaseMatrixT<T>::reciprocalSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2,
BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3) { T p3) {
applyTernary(ternary::ReciprocalSum<T>(p1, p2, p3), b, c); applyTernary(ternary::ReciprocalSum<T>(p1, p2, p3), b, c);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(Reciprocal2, DEFINE_MATRIX_BINARY_PARAMETER_OP(Reciprocal2, TWO_PARAMETER,
TWO_PARAMETER,
a = 1 / (p1 * b + p2)); a = 1 / (p1 * b + p2));
template <class T> template<class T>
void BaseMatrixT<T>::reciprocal2(BaseMatrixT& b, T p1, T p2) { void BaseMatrixT<T>::reciprocal2(BaseMatrixT& b, T p1, T p2) {
applyBinary(binary::Reciprocal2<T>(p1, p2), b); applyBinary(binary::Reciprocal2<T>(p1, p2), b);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSquareSum, DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSquareSum, TWO_PARAMETER,
TWO_PARAMETER,
T tmp = p1 * b + p2 * c; T tmp = p1 * b + p2 * c;
a *= tmp * tmp); a *= tmp * tmp);
template <class T> template<class T>
void BaseMatrixT<T>::dotMulSquareSum(BaseMatrixT& b, void BaseMatrixT<T>::dotMulSquareSum(BaseMatrixT& b, BaseMatrixT& c, T p1,
BaseMatrixT& c,
T p1,
T p2) { T p2) {
applyTernary(ternary::DotMulSquareSum<T>(p1, p2), b, c); applyTernary(ternary::DotMulSquareSum<T>(p1, p2), b, c);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotSquareSum, DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotSquareSum, TWO_PARAMETER,
TWO_PARAMETER,
T tmp = p1 * b + p2 * c; T tmp = p1 * b + p2 * c;
a = tmp * tmp); a = tmp * tmp);
template <class T> template<class T>
void BaseMatrixT<T>::dotSquareSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { void BaseMatrixT<T>::dotSquareSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) {
applyTernary(ternary::DotSquareSum<T>(p1, p2), b, c); applyTernary(ternary::DotSquareSum<T>(p1, p2), b, c);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSum, DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSum, TWO_PARAMETER,
TWO_PARAMETER,
a *= p1 * b + p2 * c); a *= p1 * b + p2 * c);
template <class T> template<class T>
void BaseMatrixT<T>::dotMulSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { void BaseMatrixT<T>::dotMulSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) {
applyTernary(ternary::DotMulSum<T>(p1, p2), b, c); applyTernary(ternary::DotMulSum<T>(p1, p2), b, c);
} }
DEFINE_MATRIX_BINARY_OP(CopyAndClear, b = a; a = 0); DEFINE_MATRIX_BINARY_OP(CopyAndClear, b = a; a = 0);
template <class T> template<class T>
void BaseMatrixT<T>::copyAndClear(BaseMatrixT& b) { void BaseMatrixT<T>::copyAndClear(BaseMatrixT& b) {
applyBinary(binary::CopyAndClear<T>(), b); applyBinary(binary::CopyAndClear<T>(), b);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(AddDotMul, DEFINE_MATRIX_TERNARY_PARAMETER_OP(AddDotMul, TWO_PARAMETER,
TWO_PARAMETER,
a = p1 * a + p2 * b * c); a = p1 * a + p2 * b * c);
template <class T> template<class T>
void BaseMatrixT<T>::addDotMul(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { void BaseMatrixT<T>::addDotMul(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) {
applyTernary(ternary::AddDotMul<T>(p1, p2), b, c); applyTernary(ternary::AddDotMul<T>(p1, p2), b, c);
} }
DEFINE_MATRIX_BINARY_OP(Assign, a = b;); DEFINE_MATRIX_BINARY_OP(Assign, a = b;);
template <class T> template<class T>
void BaseMatrixT<T>::assign(BaseMatrixT& b) { void BaseMatrixT<T>::assign(BaseMatrixT& b) {
if (useGpu_) { if (useGpu_) {
applyBinary(binary::Assign<T>(), b); applyBinary(binary::Assign<T>(), b);
...@@ -1361,7 +1230,7 @@ void BaseMatrixT<T>::assign(BaseMatrixT& b) { ...@@ -1361,7 +1230,7 @@ void BaseMatrixT<T>::assign(BaseMatrixT& b) {
} }
} }
template <class T> template<class T>
void BaseMatrixT<T>::assignAtOffset(BaseMatrixT& b, int64_t columnOffset) { void BaseMatrixT<T>::assignAtOffset(BaseMatrixT& b, int64_t columnOffset) {
if (columnOffset + b.width_ <= width_) { if (columnOffset + b.width_ <= width_) {
int numRows = height_; int numRows = height_;
...@@ -1381,31 +1250,24 @@ void BaseMatrixT<T>::assignAtOffset(BaseMatrixT& b, int64_t columnOffset) { ...@@ -1381,31 +1250,24 @@ void BaseMatrixT<T>::assignAtOffset(BaseMatrixT& b, int64_t columnOffset) {
} }
DEFINE_MATRIX_BINARY_OP(DeepSwap, T tmp = a; a = b; b = tmp); DEFINE_MATRIX_BINARY_OP(DeepSwap, T tmp = a; a = b; b = tmp);
template <class T> template<class T>
void BaseMatrixT<T>::deepSwap(BaseMatrixT& b) { void BaseMatrixT<T>::deepSwap(BaseMatrixT& b) {
applyBinary(binary::DeepSwap<T>(), b); applyBinary(binary::DeepSwap<T>(), b);
} }
template <> template<>
void BaseMatrixT<real>::rowDotMul(size_t destCol, void BaseMatrixT<real>::rowDotMul(size_t destCol,
BaseMatrixT& b, BaseMatrixT& b,
BaseMatrixT& c) { BaseMatrixT& c) {
int numRows = b.height_; int numRows = b.height_;
int numCols = b.width_; int numCols = b.width_;
MatrixOffset offset(destCol, 0, 0, 0, 0, 0); MatrixOffset offset(destCol, 0, 0, 0, 0, 0);
aggregate(aggregate::sum(), aggregate(aggregate::sum(), base::binary::mul(), base::binary::add(), b, c,
base::binary::mul(), numRows, numCols, offset, false_type(),
base::binary::add(),
b,
c,
numRows,
numCols,
offset,
false_type(),
true_type() /*aAsColVector*/); true_type() /*aAsColVector*/);
} }
template <class T> template<class T>
void BaseMatrixT<T>::rowDotMul2(size_t destCol, void BaseMatrixT<T>::rowDotMul2(size_t destCol,
BaseMatrixT& b, BaseMatrixT& b,
BaseMatrixT& c) { BaseMatrixT& c) {
...@@ -1428,24 +1290,17 @@ void BaseMatrixT<T>::rowDotMul2(size_t destCol, ...@@ -1428,24 +1290,17 @@ void BaseMatrixT<T>::rowDotMul2(size_t destCol,
} }
} }
template <> template<>
void BaseMatrixT<real>::addDotMulVMM(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<real>::addDotMulVMM(BaseMatrixT& b, BaseMatrixT& c) {
MatrixOffset offset(0, 0, 0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0, 0, 0);
int numRows = b.height_; int numRows = b.height_;
int numCols = b.width_; int numCols = b.width_;
aggregate(aggregate::sum(), aggregate(aggregate::sum(), base::binary::mul(), base::binary::add(), b, c,
base::binary::mul(), numRows, numCols, offset, true_type() /*aAsRowVector*/,
base::binary::add(),
b,
c,
numRows,
numCols,
offset,
true_type() /*aAsRowVector*/,
false_type()); false_type());
} }
template <class T> template<class T>
void BaseMatrixT<T>::addDotMulVMM2(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::addDotMulVMM2(BaseMatrixT& b, BaseMatrixT& c) {
CHECK(!useGpu_) << "do not support gpu"; CHECK(!useGpu_) << "do not support gpu";
...@@ -1466,22 +1321,16 @@ void BaseMatrixT<T>::addDotMulVMM2(BaseMatrixT& b, BaseMatrixT& c) { ...@@ -1466,22 +1321,16 @@ void BaseMatrixT<T>::addDotMulVMM2(BaseMatrixT& b, BaseMatrixT& c) {
} }
DEFINE_MATRIX_TERNARY_OP(addDotMulMMV, a += b * c); DEFINE_MATRIX_TERNARY_OP(addDotMulMMV, a += b * c);
template <class T> template<class T>
void BaseMatrixT<T>::addDotMulMMV(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::addDotMulMMV(BaseMatrixT& b, BaseMatrixT& c) {
MatrixOffset offset(0, 0, 0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0, 0, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyTernary(ternary::addDotMulMMV<T>(), applyTernary(ternary::addDotMulMMV<T>(), b, c, numRows, numCols, offset,
b, true_type() /*cAsRowVector*/, false_type());
c,
numRows,
numCols,
offset,
true_type() /*cAsRowVector*/,
false_type());
} }
template <class T> template<class T>
void BaseMatrixT<T>::addDotMulMMV2(BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::addDotMulMMV2(BaseMatrixT& b, BaseMatrixT& c) {
CHECK(!useGpu_) << "do not support gpu"; CHECK(!useGpu_) << "do not support gpu";
...@@ -1501,22 +1350,16 @@ void BaseMatrixT<T>::addDotMulMMV2(BaseMatrixT& b, BaseMatrixT& c) { ...@@ -1501,22 +1350,16 @@ void BaseMatrixT<T>::addDotMulMMV2(BaseMatrixT& b, BaseMatrixT& c) {
} }
} }
template <class T> template<class T>
void BaseMatrixT<T>::rowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::rowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) {
MatrixOffset offset(0, 0, 0, 0, cCol, 0); MatrixOffset offset(0, 0, 0, 0, cCol, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyTernary(ternary::DotMul<T>(), applyTernary(ternary::DotMul<T>(), b, c, numRows, numCols, offset,
b, false_type(), true_type() /*cAsColVector*/);
c,
numRows,
numCols,
offset,
false_type(),
true_type() /*cAsColVector*/);
} }
template <class T> template<class T>
void BaseMatrixT<T>::rowScale2(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::rowScale2(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) {
CHECK(!useGpu_) << "do not support gpu"; CHECK(!useGpu_) << "do not support gpu";
...@@ -1536,82 +1379,52 @@ void BaseMatrixT<T>::rowScale2(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { ...@@ -1536,82 +1379,52 @@ void BaseMatrixT<T>::rowScale2(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) {
} }
} }
template <class T> template<class T>
void BaseMatrixT<T>::colScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::colScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) {
MatrixOffset offset(0, 0, 0, 0, 0, cRow); MatrixOffset offset(0, 0, 0, 0, 0, cRow);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyTernary(ternary::DotMul<T>(), applyTernary(ternary::DotMul<T>(), b, c, numRows, numCols, offset,
b, true_type() /* cAsRowVector */, false_type() /* cAsColVector */);
c,
numRows,
numCols,
offset,
true_type() /* cAsRowVector */,
false_type() /* cAsColVector */);
} }
template <class T> template<class T>
void BaseMatrixT<T>::addColScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::addColScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) {
MatrixOffset offset(0, 0, 0, 0, 0, cRow); MatrixOffset offset(0, 0, 0, 0, 0, cRow);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyTernary(ternary::addDotMulMMV<T>(), applyTernary(ternary::addDotMulMMV<T>(), b, c, numRows, numCols, offset,
b, true_type() /* cAsRowVector */, false_type() /* cAsColVector */);
c,
numRows,
numCols,
offset,
true_type() /* cAsRowVector */,
false_type() /* cAsColVector */);
} }
template <class T> template<class T>
void BaseMatrixT<T>::addRowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<T>::addRowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) {
MatrixOffset offset(0, 0, 0, 0, cCol, 0); MatrixOffset offset(0, 0, 0, 0, cCol, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyTernary(ternary::addDotMulMMV<T>(), applyTernary(ternary::addDotMulMMV<T>(), b, c, numRows, numCols, offset,
b, false_type(), true_type() /*cAsColVector*/);
c,
numRows,
numCols,
offset,
false_type(),
true_type() /*cAsColVector*/);
} }
DEFINE_MATRIX_TERNARY_PARAMETER_OP(RowAdd, ONE_PARAMETER, a = b + p * c); DEFINE_MATRIX_TERNARY_PARAMETER_OP(RowAdd, ONE_PARAMETER, a = b + p * c);
template <class T> template<class T>
void BaseMatrixT<T>::rowAdd(size_t cCol, BaseMatrixT& b, BaseMatrixT& c, T p) { void BaseMatrixT<T>::rowAdd(size_t cCol, BaseMatrixT& b, BaseMatrixT& c, T p) {
MatrixOffset offset(0, 0, 0, 0, cCol, 0); MatrixOffset offset(0, 0, 0, 0, cCol, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyTernary(ternary::RowAdd<T>(p), applyTernary(ternary::RowAdd<T>(p), b, c, numRows, numCols, offset,
b, false_type(), true_type() /*cAsColVector*/);
c,
numRows,
numCols,
offset,
false_type(),
true_type() /*cAsColVector*/);
} }
DEFINE_MATRIX_TERNARY_OP(RowPow, a = pow(b, c)); DEFINE_MATRIX_TERNARY_OP(RowPow, a = pow(b, c));
template <> template<>
void BaseMatrixT<real>::rowPow(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { void BaseMatrixT<real>::rowPow(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) {
if (useGpu_) { if (useGpu_) {
MatrixOffset offset(0, 0, 0, 0, cCol, 0); MatrixOffset offset(0, 0, 0, 0, cCol, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyTernary(ternary::RowPow<real>(), applyTernary(ternary::RowPow<real>(), b, c, numRows, numCols, offset,
b, false_type(), true_type() /*cAsColVector*/);
c,
numRows,
numCols,
offset,
false_type(),
true_type() /*cAsColVector*/);
} else { } else {
size_t height = this->height_; size_t height = this->height_;
size_t width = this->width_; size_t width = this->width_;
...@@ -1628,64 +1441,44 @@ void BaseMatrixT<real>::rowPow(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { ...@@ -1628,64 +1441,44 @@ void BaseMatrixT<real>::rowPow(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) {
} }
} }
template <class T> template<class T>
void BaseMatrixT<T>::mulRowVector(BaseMatrixT& b) { void BaseMatrixT<T>::mulRowVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyBinary(binary::DotMul<T>(), applyBinary(binary::DotMul<T>(), b, numRows, numCols, offset,
b, true_type() /* bAsRowVector */, false_type());
numRows,
numCols,
offset,
true_type() /* bAsRowVector */,
false_type());
} }
DEFINE_MATRIX_BINARY_OP(DotDiv, a /= b); DEFINE_MATRIX_BINARY_OP(DotDiv, a /= b);
template <class T> template<class T>
void BaseMatrixT<T>::divRowVector(BaseMatrixT& b) { void BaseMatrixT<T>::divRowVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyBinary(binary::DotDiv<T>(), applyBinary(binary::DotDiv<T>(), b, numRows, numCols, offset,
b, true_type() /* bAsRowVector */, false_type());
numRows,
numCols,
offset,
true_type() /* bAsRowVector */,
false_type());
} }
template <class T> template<class T>
void BaseMatrixT<T>::mulColVector(BaseMatrixT& b) { void BaseMatrixT<T>::mulColVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyBinary(binary::DotMul<T>(), applyBinary(binary::DotMul<T>(), b, numRows, numCols, offset,
b, false_type(), true_type() /* bAsColVector */);
numRows,
numCols,
offset,
false_type(),
true_type() /* bAsColVector */);
} }
template <class T> template<class T>
void BaseMatrixT<T>::divColVector(BaseMatrixT& b) { void BaseMatrixT<T>::divColVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0);
int numRows = height_; int numRows = height_;
int numCols = width_; int numCols = width_;
applyBinary(binary::DotDiv<T>(), applyBinary(binary::DotDiv<T>(), b, numRows, numCols, offset,
b, false_type(), true_type() /* bAsColVector */);
numRows,
numCols,
offset,
false_type(),
true_type() /* bAsColVector */);
} }
template <> template<>
template <class Agg> template <class Agg>
int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) { int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0, 0, 0);
...@@ -1693,20 +1486,13 @@ int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) { ...@@ -1693,20 +1486,13 @@ int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) {
size_t numCols = b.width_; size_t numCols = b.width_;
CHECK_EQ(height_, numRows); CHECK_EQ(height_, numRows);
CHECK_EQ(width_, 1UL); CHECK_EQ(width_, 1UL);
aggregate(agg, aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows,
base::unary::identity(), numCols, offset, false_type(), true_type() /*aAsColVector*/);
base::binary::second(),
b,
numRows,
numCols,
offset,
false_type(),
true_type() /*aAsColVector*/);
return 0; return 0;
} }
template <> template<>
template <class Agg, class Saver> template <class Agg, class Saver>
int BaseMatrixT<real>::applyRow(Agg agg, Saver sv, BaseMatrixT& b) { int BaseMatrixT<real>::applyRow(Agg agg, Saver sv, BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0, 0, 0);
...@@ -1714,25 +1500,16 @@ int BaseMatrixT<real>::applyRow(Agg agg, Saver sv, BaseMatrixT& b) { ...@@ -1714,25 +1500,16 @@ int BaseMatrixT<real>::applyRow(Agg agg, Saver sv, BaseMatrixT& b) {
size_t numCols = b.width_; size_t numCols = b.width_;
CHECK_EQ(height_, numRows); CHECK_EQ(height_, numRows);
CHECK_EQ(width_, 1UL); CHECK_EQ(width_, 1UL);
aggregate(agg, aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset,
base::unary::identity(), false_type(), true_type() /*aAsColVector*/);
sv,
b,
numRows,
numCols,
offset,
false_type(),
true_type() /*aAsColVector*/);
return 0; return 0;
} }
template <> template<>
template <class Agg> template <class Agg>
int BaseMatrixT<real>::applyRow(Agg agg, int BaseMatrixT<real>::applyRow(
real scaleDest, Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) {
real scaleAgg,
BaseMatrixT& b) {
if (scaleDest != 0) { if (scaleDest != 0) {
applyRow(agg, base::binary::add2(scaleDest, scaleAgg), b); applyRow(agg, base::binary::add2(scaleDest, scaleAgg), b);
} else { } else {
...@@ -1744,10 +1521,10 @@ int BaseMatrixT<real>::applyRow(Agg agg, ...@@ -1744,10 +1521,10 @@ int BaseMatrixT<real>::applyRow(Agg agg,
return 0; return 0;
} }
template <> template<>
template <class Agg, class Op, class Saver> template <class Agg, class Op, class Saver>
int BaseMatrixT<real>::applyRow( int BaseMatrixT<real>::applyRow(Agg agg, Op op, Saver sv,
Agg agg, Op op, Saver sv, BaseMatrixT& b, BaseMatrixT& c) { BaseMatrixT& b, BaseMatrixT& c) {
MatrixOffset offset(0, 0, 0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0, 0, 0);
size_t numRows = b.height_; size_t numRows = b.height_;
size_t numCols = b.width_; size_t numCols = b.width_;
...@@ -1755,27 +1532,16 @@ int BaseMatrixT<real>::applyRow( ...@@ -1755,27 +1532,16 @@ int BaseMatrixT<real>::applyRow(
CHECK_EQ(width_, 1UL); CHECK_EQ(width_, 1UL);
CHECK_EQ(c.height_, numRows); CHECK_EQ(c.height_, numRows);
CHECK_EQ(c.width_, numCols); CHECK_EQ(c.width_, numCols);
aggregate(agg, aggregate(agg, op, sv,
op, b, c, numRows, numCols, offset,
sv, false_type(), true_type() /*aAsColVector*/);
b,
c,
numRows,
numCols,
offset,
false_type(),
true_type() /*aAsColVector*/);
return 0; return 0;
} }
template <> template<>
template <class Agg, class Op> template <class Agg, class Op>
int BaseMatrixT<real>::applyRow(Agg agg, int BaseMatrixT<real>::applyRow(Agg agg, Op op, real scaleDest, real scaleAgg,
Op op, BaseMatrixT& b, BaseMatrixT& c) {
real scaleDest,
real scaleAgg,
BaseMatrixT& b,
BaseMatrixT& c) {
if (scaleDest != 0) { if (scaleDest != 0) {
applyRow(agg, op, base::binary::add2(scaleDest, scaleAgg), b, c); applyRow(agg, op, base::binary::add2(scaleDest, scaleAgg), b, c);
} else { } else {
...@@ -1787,7 +1553,7 @@ int BaseMatrixT<real>::applyRow(Agg agg, ...@@ -1787,7 +1553,7 @@ int BaseMatrixT<real>::applyRow(Agg agg,
return 0; return 0;
} }
template <> template<>
template <class Agg> template <class Agg>
int BaseMatrixT<real>::applyCol(Agg agg, BaseMatrixT& b) { int BaseMatrixT<real>::applyCol(Agg agg, BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0, 0, 0);
...@@ -1795,20 +1561,13 @@ int BaseMatrixT<real>::applyCol(Agg agg, BaseMatrixT& b) { ...@@ -1795,20 +1561,13 @@ int BaseMatrixT<real>::applyCol(Agg agg, BaseMatrixT& b) {
size_t numCols = b.width_; size_t numCols = b.width_;
CHECK_EQ(width_, numCols); CHECK_EQ(width_, numCols);
CHECK_EQ(height_, 1UL); CHECK_EQ(height_, 1UL);
aggregate(agg, aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows,
base::unary::identity(), numCols, offset, true_type() /*aAsRowVector*/, false_type());
base::binary::second(),
b,
numRows,
numCols,
offset,
true_type() /*aAsRowVector*/,
false_type());
return 0; return 0;
} }
template <> template<>
template <class Agg, class Saver> template <class Agg, class Saver>
int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) { int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0, 0, 0); MatrixOffset offset(0, 0, 0, 0, 0, 0);
...@@ -1816,25 +1575,16 @@ int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) { ...@@ -1816,25 +1575,16 @@ int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) {
size_t numCols = b.width_; size_t numCols = b.width_;
CHECK_EQ(width_, numCols); CHECK_EQ(width_, numCols);
CHECK_EQ(height_, 1UL); CHECK_EQ(height_, 1UL);
aggregate(agg, aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset,
base::unary::identity(), true_type() /*aAsRowVector*/, false_type());
sv,
b,
numRows,
numCols,
offset,
true_type() /*aAsRowVector*/,
false_type());
return 0; return 0;
} }
template <> template<>
template <class Agg> template <class Agg>
int BaseMatrixT<real>::applyCol(Agg agg, int BaseMatrixT<real>::applyCol(
real scaleDest, Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) {
real scaleAgg,
BaseMatrixT& b) {
if (scaleDest != 0) { if (scaleDest != 0) {
applyCol(agg, base::binary::add2(scaleDest, scaleAgg), b); applyCol(agg, base::binary::add2(scaleDest, scaleAgg), b);
} else { } else {
...@@ -1846,51 +1596,48 @@ int BaseMatrixT<real>::applyCol(Agg agg, ...@@ -1846,51 +1596,48 @@ int BaseMatrixT<real>::applyCol(Agg agg,
return 0; return 0;
} }
template <> template<>
void BaseMatrixT<real>::sumRows(BaseMatrixT& b, real scaleSum, real scaleDest) { void BaseMatrixT<real>::sumRows(BaseMatrixT& b, real scaleSum, real scaleDest) {
applyRow(aggregate::sum(), scaleDest, scaleSum, b); applyRow(aggregate::sum(), scaleDest, scaleSum, b);
} }
template <> template<>
void BaseMatrixT<real>::maxRows(BaseMatrixT& b) { void BaseMatrixT<real>::maxRows(BaseMatrixT& b) {
applyRow(aggregate::max(), b); applyRow(aggregate::max(), b);
} }
template <> template<>
void BaseMatrixT<real>::minRows(BaseMatrixT& b) { void BaseMatrixT<real>::minRows(BaseMatrixT& b) {
applyRow(aggregate::min(), b); applyRow(aggregate::min(), b);
} }
template <> template<>
void BaseMatrixT<real>::maxCols(BaseMatrixT& b) { void BaseMatrixT<real>::maxCols(BaseMatrixT& b) {
applyCol(aggregate::max(), b); applyCol(aggregate::max(), b);
} }
template <> template<>
void BaseMatrixT<real>::minCols(BaseMatrixT& b) { void BaseMatrixT<real>::minCols(BaseMatrixT& b) {
applyCol(aggregate::min(), b); applyCol(aggregate::min(), b);
} }
template <> template<>
void BaseMatrixT<real>::sumCols(BaseMatrixT& b, real scaleSum, real scaleDest) { void BaseMatrixT<real>::sumCols(BaseMatrixT& b, real scaleSum, real scaleDest) {
applyCol(aggregate::sum(), scaleDest, scaleSum, b); applyCol(aggregate::sum(), scaleDest, scaleSum, b);
} }
template <> template<>
void BaseMatrixT<real>::sumOfSquaredDiffs(BaseMatrixT& b, void BaseMatrixT<real>::sumOfSquaredDiffs(
BaseMatrixT& c, BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) {
real scaleSum, applyRow(aggregate::sum(), base::binary::squaredDiff(),
real scaleDest) { scaleDest, scaleSum, b, c);
applyRow(
aggregate::sum(), base::binary::squaredDiff(), scaleDest, scaleSum, b, c);
} }
template <> template<>
void BaseMatrixT<real>::sumOfProducts(BaseMatrixT& b, void BaseMatrixT<real>::sumOfProducts(
BaseMatrixT& c, BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) {
real scaleSum, applyRow(aggregate::sum(), base::binary::mul(),
real scaleDest) { scaleDest, scaleSum, b, c);
applyRow(aggregate::sum(), base::binary::mul(), scaleDest, scaleSum, b, c);
} }
template class BaseMatrixT<real>; template class BaseMatrixT<real>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册