diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu index a723ef7bc8329329fa82113f8e96a1bdbe750277..351744f1ef0e99d4978a8286f00e110ac77a5d42 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/math/BaseMatrix.cu @@ -1584,11 +1584,6 @@ void BaseMatrixT::minRows(BaseMatrixT& b) { applyRow(aggregate::min(), b); } -template<> -void BaseMatrixT::sumCols(BaseMatrixT& b) { - applyCol(aggregate::sum(), b); -} - template<> void BaseMatrixT::maxCols(BaseMatrixT& b) { applyCol(aggregate::max(), b); diff --git a/paddle/math/BaseMatrix.h b/paddle/math/BaseMatrix.h index ea58c861a3d6a03642291c172af76795e90fcb92..4cc4f4d29ddabfc05f5837633fb73a650920823e 100644 --- a/paddle/math/BaseMatrix.h +++ b/paddle/math/BaseMatrix.h @@ -1018,8 +1018,6 @@ public: /// calculate the minimum value of each row of the matrix b. void minRows(BaseMatrixT& b); - /// calculate the sum of each column of the matrix b. - void sumCols(BaseMatrixT& b); /// calculate the maximum value of each column of the matrix b. void maxCols(BaseMatrixT& b); /// calculate the minimum value of each column of the matrix b. diff --git a/paddle/math/tests/CMakeLists.txt b/paddle/math/tests/CMakeLists.txt index 33d4478b4d36d7be5da6fb43365acb95e5bc7d04..893597b158750876c307defc87d55c8fa1a10173 100644 --- a/paddle/math/tests/CMakeLists.txt +++ b/paddle/math/tests/CMakeLists.txt @@ -2,7 +2,7 @@ add_simple_unittest(test_ExecViaCpu) add_simple_unittest(test_SIMDFunctions) -add_simple_unittest(test_matrix) +add_simple_unittest(test_SparseMatrix) # TODO(yuyang18): Refactor TestUtil.cpp. Remove this cross module reference. add_unittest(test_matrixCompare @@ -14,4 +14,6 @@ add_simple_unittest(test_perturbation) add_simple_unittest(test_CpuGpuVector) add_simple_unittest(test_Allocator) add_simple_unittest(test_FPException) -add_simple_unittest(test_GpuProfiler) \ No newline at end of file +add_simple_unittest(test_GpuProfiler) +add_simple_unittest(test_BaseMatrix) +add_simple_unittest(test_Matrix) diff --git a/paddle/math/tests/TensorCheck.h b/paddle/math/tests/TensorCheck.h new file mode 100644 index 0000000000000000000000000000000000000000..956bcf61a455dea6fdded823cd2fdd4801b0771a --- /dev/null +++ b/paddle/math/tests/TensorCheck.h @@ -0,0 +1,216 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +/** + * This file provides a TensorCheck template function, which can be used to + * compare CpuMatrix and GpuMatrix, CpuVector and GpuVector, and so on. + */ + +#include +#include "paddle/math/Matrix.h" + +namespace autotest { + +using paddle::Matrix; +using paddle::CpuMatrix; +using paddle::GpuMatrix; +using paddle::VectorT; +using paddle::CpuVectorT; +using paddle::GpuVectorT; + +class AssertEqual { +public: + AssertEqual(real err = 0) : err_(err) {} + + inline bool operator()(real a, real b) { + if (err_ == 0) { + if (a != b) { + return false; + } + } else { + if (std::fabs(a - b) > err_) { + if ((std::fabs(a - b) / std::fabs(a)) > (err_ / 10.0f)) { + return false; + } + } + } + + return true; + } + +private: + real err_; +}; + +template +class CopyToCpu; + +template <> +class CopyToCpu { +public: + explicit CopyToCpu(const CpuMatrix& arg) : arg_(arg) {} + const CpuMatrix& copiedArg() const { return arg_; } + +private: + const CpuMatrix& arg_; +}; + +template <> +class CopyToCpu { +public: + explicit CopyToCpu(const GpuMatrix& arg) + : arg_(arg.getHeight(), arg.getWidth()) { + arg_.copyFrom(arg); + } + CpuMatrix& copiedArg() { return arg_; } + +private: + CpuMatrix arg_; +}; + +template <> +class CopyToCpu { +public: + explicit CopyToCpu(const Matrix& arg) + : arg_(arg.getHeight(), arg.getWidth()) { + arg_.copyFrom(arg); + } + CpuMatrix& copiedArg() { return arg_; } + +private: + CpuMatrix arg_; +}; + +template +class CopyToCpu> { +public: + explicit CopyToCpu(const CpuVectorT& arg) : arg_(arg) {} + const CpuVectorT& copiedArg() const { return arg_; } + +private: + const CpuVectorT& arg_; +}; + +template +class CopyToCpu> { +public: + explicit CopyToCpu(const GpuVectorT& arg) : arg_(arg.getSize()) { + arg_.copyFrom(arg); + } + CpuVectorT& copiedArg() { return arg_; } + +private: + CpuVectorT arg_; +}; + +template +class CopyToCpu> { +public: + explicit CopyToCpu(const VectorT& arg) : arg_(arg.getSize()) { + arg_.copyFrom(arg); + } + CpuVectorT& copiedArg() { return arg_; } + +private: + CpuVectorT arg_; +}; + +template +void TensorCheck(AssertEq compare, + const CpuMatrix& matrix1, + const CpuMatrix& matrix2) { + CHECK(matrix1.getHeight() == matrix2.getHeight()); + CHECK(matrix1.getWidth() == matrix2.getWidth()); + + int height = matrix1.getHeight(); + int width = matrix1.getWidth(); + const real* data1 = matrix1.getData(); + const real* data2 = matrix2.getData(); + int count = 0; + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + real a = data1[i * width + j]; + real b = data2[i * width + j]; + if (!compare(a, b)) { + count++; + } + } + } + EXPECT_EQ(count, 0) << "There are " << count << " different element."; +} + +template +void TensorCheck(AssertEq compare, + const CpuVectorT& vector1, + const CpuVectorT& vector2) { + CHECK(vector1.getSize() == vector2.getSize()); + + const T* data1 = vector1.getData(); + const T* data2 = vector2.getData(); + size_t size = vector1.getSize(); + int count = 0; + for (size_t i = 0; i < size; i++) { + real a = data1[i]; + real b = data2[i]; + if (!compare(a, b)) { + count++; + } + } + EXPECT_EQ(count, 0) << "There are " << count << " different element."; +} + +template +void TensorCheck(AssertEq compare, + const Tensor1& tensor1, + const Tensor2& tensor2) { + TensorCheck(compare, + CopyToCpu(tensor1).copiedArg(), + CopyToCpu(tensor2).copiedArg()); +} + +template +void TensorCheck(AssertEq compare, real args1, real args2) { + EXPECT_EQ(compare(args1, args2), true) << "[Test error] args1 = " << args1 + << ", args2 = " << args2; +} + +template +void TensorCheck(AssertEq compare, size_t args1, size_t args2) { + EXPECT_EQ(args1, args2) << "[Test error] args1 = " << args1 + << ", args2 = " << args2; +} + +template +void TensorCheckEqual(const Tensor1& tensor1, const Tensor2& tensor2) { + AssertEqual compare(0); + TensorCheck(compare, + CopyToCpu(tensor1).copiedArg(), + CopyToCpu(tensor2).copiedArg()); +} + +template +void TensorCheckErr(const Tensor1& tensor1, const Tensor2& tensor2) { +#ifndef PADDLE_TYPE_DOUBLE + AssertEqual compare(1e-3); +#else + AssertEqual compare(1e-10); +#endif + TensorCheck(compare, + CopyToCpu(tensor1).copiedArg(), + CopyToCpu(tensor2).copiedArg()); +} + +} // namespace autotest diff --git a/paddle/math/tests/TestUtils.h b/paddle/math/tests/TestUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..2edb07de0144ba194cd18e644fbc93efcbe4837a --- /dev/null +++ b/paddle/math/tests/TestUtils.h @@ -0,0 +1,294 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +/** + * This file provides a AutoCompare calss to simplify the comparison + * of CPU and GPU member functions. + * + * This takes two steps + * 1. Construct an AutoCompare object. + * When constructing an AutoCompare object, you can set the err argument + * to specify the maximum error for CPU and GPU functions. + * + * 2. Use the template functions cmpWithArg or cmpWithoutArg. + * A. [cmpWithArg] Requires the caller construct the cpu arguments. + * + * AutoCompare test; + * Init Argument arg1,arg2... + * test.cmpWithArg(function, arg1, arg2....) + * + * B. [cmpWithoutArg] The caller do not need construct arguments. + * If matrix used in these functions arguments is the same size. + * Such as the element wise function and the aggregate function + * defined in the BaseMatrix.cpp. + * + * AutoCompare test; + * test.cmpWithoutArg(function, height, width) +*/ + +#include +#include "paddle/math/Matrix.h" +#include "paddle/math/SparseMatrix.h" +#include "TensorCheck.h" + +namespace autotest { + +using paddle::BaseMatrix; +using paddle::CpuMatrix; +using paddle::GpuMatrix; +using paddle::CpuIVector; +using paddle::GpuIVector; +using paddle::CpuSparseMatrix; +using paddle::GpuSparseMatrix; + +template +class ReplaceType { +public: + typedef T1 type; +}; + +template <> +class ReplaceType { +public: + typedef CpuMatrix type; +}; + +template <> +class ReplaceType { +public: + typedef GpuMatrix type; +}; + +template <> +class ReplaceType { +public: + typedef CpuMatrix type; +}; + +template <> +class ReplaceType { +public: + typedef GpuMatrix type; +}; + +// construct a argument +template +T construct(int height, int width); + +template <> +float construct(int height, int width) { + return 0.5; +} + +template <> +double construct(int height, int width) { + return 0.5; +} + +template <> +size_t construct(int height, int width) { + size_t offset = std::rand() % (height < width ? height : width); + return offset; +} + +template <> +CpuMatrix construct(int height, int width) { + CpuMatrix a(height, width); + return a; +} + +template <> +GpuMatrix construct(int height, int width) { + GpuMatrix a(height, width); + return a; +} + +// init a argument +template +void init(T& v) { + return; +} + +template <> +void init(CpuMatrix& v) { + v.randomizeUniform(); +} + +template <> +void init(GpuMatrix& v) { + v.randomizeUniform(); +} + +// init a tuple which contains a set of arguments. +template +inline typename std::enable_if::type initTuple( + std::tuple& t) {} + +template + inline typename std::enable_if < + I::type initTuple(std::tuple& t) { + init(std::get(t)); + initTuple(t); +} + +// copy a argument, copy src to dest +template +void copy(T1& dest, T2& src) { + dest = src; +} + +template <> +void copy(GpuMatrix& dest, CpuMatrix& src) { + dest.copyFrom(src); +} + +// copy a tuple, copy src to dest +template +inline typename std::enable_if::type copyTuple( + std::tuple& dest, std::tuple& src) {} + +template + inline typename std::enable_if < + I::type copyTuple(std::tuple& dest, + std::tuple& src) { + copy(std::get(dest), std::get(src)); + copyTuple(dest, src); +} + +// call member function +template +R call(C& obj, R (FC::*f)(FArgs...), Args&&... args) { + return (obj.*f)(args...); +} + +template +class ReturnType { +public: + typedef T type; +}; + +template <> +class ReturnType { +public: + typedef GpuMatrix type; +}; + +template <> +class ReturnType { +public: + typedef GpuIVector type; +}; + +template <> +class ReturnType { +public: + typedef GpuSparseMatrix type; +}; + +template +typename ReturnType::type autoArgs(T& v) { + return v; +} + +template <> +GpuMatrix autoArgs(CpuMatrix& v) { + GpuMatrix a(v.getHeight(), v.getWidth()); + a.copyFrom(v); + return a; +} + +template <> +GpuIVector autoArgs(CpuIVector& v) { + GpuIVector a(v.getSize()); + a.copyFrom(v); + return a; +} + +template <> +GpuSparseMatrix autoArgs(CpuSparseMatrix& v) { + GpuSparseMatrix a(v.getHeight(), + v.getWidth(), + v.getElementCnt(), + v.getValueType(), + v.getFormat()); + a.copyFrom(v, HPPL_STREAM_DEFAULT); + hl_stream_synchronize(HPPL_STREAM_DEFAULT); + return a; +} + +class AutoCompare { +public: + /** + * err is the allowed calculation error. + * The smaller the value of err, + * the stricter the comparison is between CPU and GPU calculations. + */ + AutoCompare(size_t height, size_t width, real err = 1e-3) + : cpu(height, width), gpu(height, width), compare(err) { + init(cpu); + copy(gpu, cpu); + } + + template + void cmpWithArg(R (C::*f)(FArgs...), Args&&... args) { + static_assert(sizeof...(FArgs) == sizeof...(Args), + "size of parameter packs are not equal"); + call(cpu, f, args...); + call(gpu, f, autoArgs(args)...); + + TensorCheck(compare, cpu, gpu); + } + + template + void cmpWithoutArg(R (C::*f)(Args...), size_t height, size_t width) { + static_assert(sizeof...(I) == sizeof...(Args), + "size of parameter packs are not equal"); + (void)height; + (void)width; + auto tuple1 = std::make_tuple( + construct>::type>::type, + CpuMatrix>::type>(height, width)...); + + auto tuple2 = std::make_tuple( + construct>::type>::type, + GpuMatrix>::type>(height, width)...); + + initTuple(tuple1); + copyTuple(tuple2, tuple1); + + call(cpu, f, std::get(tuple1)...); + call(gpu, f, std::get(tuple2)...); + + TensorCheck(compare, cpu, gpu); + } + +protected: + CpuMatrix cpu; + GpuMatrix gpu; + AssertEqual compare; +}; + +} // namespace autotest diff --git a/paddle/math/tests/test_BaseMatrix.cpp b/paddle/math/tests/test_BaseMatrix.cpp new file mode 100644 index 0000000000000000000000000000000000000000..521ea8aeb09744a59e64d493062ce42748ee716b --- /dev/null +++ b/paddle/math/tests/test_BaseMatrix.cpp @@ -0,0 +1,251 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef PADDLE_ONLY_CPU +/** + * This test file use autotest::AutoCompare and cmpWithoutArg to compares the + * implementation of CPU and GPU member function in + * BaseMatrix.cpp and Matrix.cpp. + */ + +#include +#include "paddle/math/BaseMatrix.h" +#include "TestUtils.h" + +using paddle::BaseMatrix; +using paddle::Matrix; +using autotest::AutoCompare; + +// Test all void (BaseMatrix::*)() function +TEST(BaseMatrix, void) { + for (auto height : {1, 3, 11, 73, 128, 200, 330}) { + for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { + auto compare = [height, width](void (BaseMatrix::*f)()) { + AutoCompare test(height, width, 1e-5); + test.cmpWithoutArg(f, height, width); + }; + + compare(&BaseMatrix::neg); + compare(&BaseMatrix::exp); + compare(&BaseMatrix::log); + compare(&BaseMatrix::sqrt); + compare(&BaseMatrix::square); + compare(&BaseMatrix::reciprocal); + compare(&BaseMatrix::abs); + compare(&BaseMatrix::sign); + compare(&BaseMatrix::zero); + compare(&BaseMatrix::one); + } + } +} + +// Test all void (BaseMatrix::*)(real) function +TEST(BaseMatrix, real) { + for (auto height : {1, 3, 11, 73, 128, 200, 330}) { + for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { + auto compare = [height, width](void (BaseMatrix::*f)(real)) { + AutoCompare test(height, width, 1e-5); + test.cmpWithoutArg<0>(f, height, width); + }; + + compare(&BaseMatrix::pow); + compare(&BaseMatrix::subScalar); + compare(&BaseMatrix::mulScalar); + compare(&BaseMatrix::divScalar); + compare(&BaseMatrix::assign); + compare(&BaseMatrix::add); + compare(&BaseMatrix::biggerThanScalar); + compare(&BaseMatrix::downClip); + } + } +} + +// Test all void (BaseMatrix::*)(BaseMatrix&) function +TEST(BaseMatrix, BaseMatrix) { + for (auto height : {1, 3, 11, 73, 128, 200, 330}) { + for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { + auto compare = [height, width](void (BaseMatrix::*f)(BaseMatrix&)) { + AutoCompare test(height, width, 1e-5); + test.cmpWithoutArg<0>(f, height, width); + }; + + compare(&BaseMatrix::assign); + compare(&BaseMatrix::add); + compare(&BaseMatrix::relu); + compare(&BaseMatrix::reluDerivative); + compare(&BaseMatrix::softrelu); + compare(&BaseMatrix::softreluDerivative); + compare(&BaseMatrix::brelu); + compare(&BaseMatrix::breluDerivative); + compare(&BaseMatrix::square); + compare(&BaseMatrix::squareDerivative); + compare(&BaseMatrix::tanh); + compare(&BaseMatrix::tanhDerivative); + compare(&BaseMatrix::reciprocal); + compare(&BaseMatrix::reciprocalDerivative); + compare(&BaseMatrix::abs); + compare(&BaseMatrix::absDerivative); + compare(&BaseMatrix::sigmoid); + compare(&BaseMatrix::sigmoidDerivative); + compare(&BaseMatrix::expDerivative); + compare(&BaseMatrix::sign); + compare(&BaseMatrix::exp); + compare(&BaseMatrix::log); + compare(&BaseMatrix::sqrt); + compare(&BaseMatrix::dotMul); + compare(&BaseMatrix::dotMulSquare); + compare(&BaseMatrix::dotSquareMul); + compare(&BaseMatrix::addColVector); + compare(&BaseMatrix::addRowVector); + compare(&BaseMatrix::mulRowVector); + compare(&BaseMatrix::divRowVector); + compare(&BaseMatrix::addP2P); + compare(&BaseMatrix::invSqrt); + } + } +} + +// Test all void (BaseMatrix::*)(real, real) function +TEST(BaseMatrix, real_real) { + for (auto height : {1, 3, 11, 73, 128, 200, 330}) { + for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { + auto compare = [height, width](void (BaseMatrix::*f)(real, real)) { + AutoCompare test(height, width, 1e-5); + test.cmpWithoutArg<0, 1>(f, height, width); + }; + + compare(&BaseMatrix::add); + compare(&BaseMatrix::clip); + } + } +} + +// Test all void (BaseMatrix::*)(BaseMatrix&, real) function +TEST(BaseMatrix, BaseMatrix_real) { + for (auto height : {1, 3, 11, 73, 128, 200, 330}) { + for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { + auto compare = [height, width](void (BaseMatrix::*f)(BaseMatrix&, real)) { + AutoCompare test(height, width, 1e-5); + test.cmpWithoutArg<0, 1>(f, height, width); + }; + + compare(&BaseMatrix::addBias); + compare(&BaseMatrix::add); + compare(&BaseMatrix::sub); + compare(&BaseMatrix::pow); + compare(&BaseMatrix::addScalar); + compare(&BaseMatrix::subScalar); + compare(&BaseMatrix::mulScalar); + compare(&BaseMatrix::divScalar); + compare(&BaseMatrix::scalarDiv); + compare(&BaseMatrix::addSquare); + compare(&BaseMatrix::isEqualTo); + } + } +} + +// Test all void (BaseMatrix::*)(BaseMatrix&, BaseMatrix&) function +TEST(BaseMatrix, BaseMatrix_BaseMatrix) { + for (auto height : {1, 3, 11, 73, 128, 200, 330}) { + for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { + auto compare = [height, + width](void (BaseMatrix::*f)(BaseMatrix&, BaseMatrix&)) { + AutoCompare test(height, width, 1e-5); + test.cmpWithoutArg<0, 1>(f, height, width); + }; + + compare(&BaseMatrix::softCrossEntropy); + compare(&BaseMatrix::softCrossEntropyBp); + compare(&BaseMatrix::binaryLabelCrossEntropy); + compare(&BaseMatrix::binaryLabelCrossEntropyBp); + compare(&BaseMatrix::sub); + compare(&BaseMatrix::add2); + compare(&BaseMatrix::dotMul); + compare(&BaseMatrix::dotDiv); + compare(&BaseMatrix::logisticRegressionLoss); + compare(&BaseMatrix::logisticRegressionLossBp); + compare(&BaseMatrix::biggerThan); + compare(&BaseMatrix::max); + compare(&BaseMatrix::dotMulSquare); + compare(&BaseMatrix::dotSquareSquare); + } + } +} + +void TestEelementWise(size_t height, size_t width) { + AutoCompare rowScale(height, width); + rowScale.cmpWithoutArg<0, 1, 2>(&BaseMatrix::rowScale, height, width); + + AutoCompare rowDotMul(height, width); + rowDotMul.cmpWithoutArg<0, 1, 2>(&BaseMatrix::rowDotMul, height, width); + + AutoCompare binaryClassificationError(height, width); + binaryClassificationError.cmpWithoutArg<0, 1, 2, 3>( + &BaseMatrix::binaryClassificationError, height, width); + + AutoCompare sumOfSquaresBp(height, width); + sumOfSquaresBp.cmpWithoutArg<0, 1>(&Matrix::sumOfSquaresBp, height, width); +} + +void TestAggregateToRow(size_t height, size_t width) { + AutoCompare maxCols(1, width); + maxCols.cmpWithoutArg<0>(&BaseMatrix::maxCols, height, width); + + AutoCompare minCols(1, width); + minCols.cmpWithoutArg<0>(&BaseMatrix::minCols, height, width); + + AutoCompare addDotMulVMM(1, width); + addDotMulVMM.cmpWithoutArg<0, 1>(&BaseMatrix::addDotMulVMM, height, width); + + AutoCompare sumCols(1, width); + sumCols.cmpWithoutArg<0, 1, 2>(&BaseMatrix::sumCols, height, width); + + AutoCompare collectBias(1, width); + collectBias.cmpWithoutArg<0, 1>( + static_cast(&Matrix::collectBias), + height, + width); +} + +void TestAggregateToCol(size_t height, size_t width) { + AutoCompare maxRows(height, 1); + maxRows.cmpWithoutArg<0>(&BaseMatrix::maxRows, height, width); + + AutoCompare minRows(height, 1); + minRows.cmpWithoutArg<0>(&BaseMatrix::minRows, height, width); + + AutoCompare sumRows(height, 1); + sumRows.cmpWithoutArg<0, 1, 2>(&BaseMatrix::sumRows, height, width); + + AutoCompare sumOfSquares(height, 1); + sumOfSquares.cmpWithoutArg<0, 1>(&Matrix::sumOfSquares, height, width); +} + +TEST(BaseMatrix, Other) { + for (auto height : {1, 3, 11, 73, 128, 200, 330}) { + for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { + TestEelementWise(height, width); + TestAggregateToRow(height, width); + TestAggregateToCol(height, width); + } + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + paddle::initMain(argc, argv); + return RUN_ALL_TESTS(); +} + +#endif diff --git a/paddle/math/tests/test_Matrix.cpp b/paddle/math/tests/test_Matrix.cpp new file mode 100644 index 0000000000000000000000000000000000000000..edc9d74103240ff3790a4baf2ae796cab4aca55b --- /dev/null +++ b/paddle/math/tests/test_Matrix.cpp @@ -0,0 +1,300 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef PADDLE_ONLY_CPU +/** + * This test file use autotest::AutoCompare and cmpWithArg to compares the + * implementation of CPU and GPU member function in Matrix.cpp. + */ + +#include +#include "TestUtils.h" + +using paddle::BaseMatrix; +using paddle::Matrix; +using paddle::CpuMatrix; +using paddle::CpuIVector; +using paddle::CpuSparseMatrix; +using autotest::AutoCompare; + +void testBilinearFwdBwd(int numSamples, + int imgSizeH, + int imgSizeW, + int channels) { + int inWidth = imgSizeH * imgSizeW * channels; + int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels; + real ratioH = 0.5; + real ratioW = 0.5; + + AutoCompare forward(numSamples, outWidth); + CpuMatrix arg1(numSamples, inWidth); + arg1.randomizeUniform(); + forward.cmpWithArg(&Matrix::bilinearForward, + arg1, + imgSizeH, + imgSizeW, + 2 * imgSizeH, + 2 * imgSizeW, + channels, + ratioH, + ratioW); + + AutoCompare backward(numSamples, inWidth); + CpuMatrix arg2(numSamples, outWidth); + arg2.randomizeUniform(); + backward.cmpWithArg(&Matrix::bilinearBackward, + arg2, + 2 * imgSizeH, + 2 * imgSizeW, + imgSizeH, + imgSizeW, + channels, + ratioH, + ratioW); +} + +TEST(Matrix, BilinearFwdBwd) { + for (auto numSamples : {5, 10}) { + for (auto channels : {8, 16}) { + for (auto imgSizeH : {14, 28}) { + for (auto imgSizeW : {16, 30}) { + VLOG(3) << " numSamples=" << numSamples << " channels=" << channels + << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; + testBilinearFwdBwd(numSamples, imgSizeH, imgSizeW, channels); + } + } + } + } +} + +void testMatrixAddBias(int height, int width, real scale) { + AutoCompare test(height, width); + CpuMatrix arg1(1, width); + arg1.randomizeUniform(); + test.cmpWithArg( + static_cast(&Matrix::addBias), + arg1, + scale); +} + +void testMatrixAddDotMulMMV(int height, int width) { + AutoCompare test(height, width); + CpuMatrix arg1(height, width); + CpuMatrix arg2(1, width); + arg1.randomizeUniform(); + arg2.randomizeUniform(); + test.cmpWithArg(&BaseMatrix::addDotMulMMV, arg1, arg2); +} + +TEST(Matrix, unary) { + for (auto height : {1, 3, 11, 73, 128, 200, 330}) { + for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { + VLOG(3) << " height=" << height << " width=" << width; + testMatrixAddBias(height, width, 1.0); + testMatrixAddBias(height, width, 3.5); + testMatrixAddDotMulMMV(height, width); + } + } +} + +void testMatrixAddAtOffset(int height, int width1, int width2, int offset) { + AutoCompare test(height, width2); + CpuMatrix arg1(height, width1); + arg1.randomizeUniform(); + test.cmpWithArg(&Matrix::addAtOffset, arg1, offset); +} + +void testMatrixAssignAtOffset(int height, int width1, int width2, int offset) { + AutoCompare test(height, width2); + CpuMatrix arg1(height, width1); + arg1.randomizeUniform(); + test.cmpWithArg(&Matrix::assignAtOffset, arg1, offset); +} + +TEST(Matrix, AtOffset) { + for (auto height : {1, 11, 73, 128, 200}) { + for (auto width1 : {1, 32, 100, 512, 1000}) { + for (auto width2 : {1, 32, 100, 512, 1000}) { + int columnOffset = 0; + int offset = std::abs(width1 - width2); + if (offset) { + columnOffset = std::rand() % offset; + } + VLOG(3) << " height=" << height << " width1=" << width1 + << " width2=" << width2 << " columnOffset = " << columnOffset; + testMatrixAddAtOffset(height, width1, width2, columnOffset); + testMatrixAssignAtOffset(height, width1, width2, columnOffset); + } + } + } +} + +void testMatrixSelectRows(int numSamples, int tableSize, int inputDim) { + AutoCompare test(numSamples, inputDim); + CpuMatrix arg1(tableSize, inputDim); + CpuIVector arg2(numSamples); + arg1.randomizeUniform(); + arg2.rand(tableSize); + test.cmpWithArg(&Matrix::selectRows, arg1, arg2); +} + +TEST(Matrix, tableProjection) { + for (auto numSamples : {10, 100, 1000, 10000, 80000}) { + for (auto tableSize : {10, 100}) { + for (auto inputDim : {20, 50}) { + VLOG(3) << " numSamples=" << numSamples << " tableSize=" << tableSize + << " inputDim=" << inputDim; + testMatrixSelectRows(numSamples, tableSize, inputDim); + } + } + } +} + +void testMatrixCopyByRowIndex(int outHeight, int inHeight, int width) { + AutoCompare test(outHeight, width); + CpuMatrix arg1(inHeight, width); + CpuIVector arg2(outHeight); + arg1.randomizeUniform(); + arg2.rand(inHeight); + test.cmpWithArg(&Matrix::copyByRowIndex, arg1, arg2); +} + +TEST(Matrix, copyByRowIndex) { + for (auto outHeight : {31, 500, 1000}) { + for (auto inHeight : {17, 257, 500, 1200}) { + for (auto width : {512, 1024}) { + VLOG(3) << outHeight << " " << inHeight << " " << width; + testMatrixCopyByRowIndex(outHeight, inHeight, width); + } + } + } +} + +void testCosSim(int heightX, int heightY, int width, real scale) { + AutoCompare test(heightX, 1); + CpuMatrix arg1(heightX, width); + CpuMatrix arg2(heightY, width); + arg1.randomizeUniform(); + arg2.randomizeUniform(); + arg2.add(-0.5); + test.cmpWithArg(&Matrix::cosSim, arg1, arg2, scale); +} + +TEST(Matrix, cosSim) { + for (auto heightX : {10, 100, 1000}) { + for (auto heightY : {1, heightX}) { + for (auto width : {10, 100, 1000}) { + for (auto scale : {1.0, 2.0}) { + testCosSim(heightX, heightY, width, scale); + } + } + } + } +} + +void testParamReluForward(int height, int width, int w_height, int w_width) { + AutoCompare test(height, width); + CpuMatrix arg1(height, width); + CpuMatrix arg2(w_height, w_width); + arg1.randomizeUniform(); + arg2.randomizeUniform(); + arg1.add(-0.5); + test.cmpWithArg(&Matrix::paramReluForward, arg1, arg2); +} + +void testParamReluBackwardW(int height, int width, int w_height, int w_width) { + AutoCompare test(w_height, w_width); + CpuMatrix arg1(height, width); + CpuMatrix arg2(height, width); + arg1.randomizeUniform(); + arg2.randomizeUniform(); + arg2.add(-0.5); + test.cmpWithArg(&Matrix::paramReluBackwardW, arg1, arg2); +} + +TEST(Matrix, paramRelu) { + for (auto height : {10, 100}) { + for (auto width : {10, 100}) { + for (auto w_height : {1, 2}) { + for (auto w_width : {1, 2}) { + testParamReluForward(height, width, w_height, w_width); + testParamReluBackwardW(height, width, w_height, w_width); + } + } + } + } +} + +void testAddSharedBias(int numSamples, int dim, int channel) { + AutoCompare test(numSamples, dim); + CpuMatrix arg1(1, channel); + arg1.randomizeUniform(); + test.cmpWithArg(&Matrix::addSharedBias, arg1, 1.0); +} + +void testCollectSharedBias(int numSamples, int dim, int channel) { + AutoCompare test(1, channel); + CpuMatrix arg1(numSamples, dim); + arg1.randomizeUniform(); + test.cmpWithArg(&Matrix::collectSharedBias, arg1, 1.0); +} + +TEST(Matrix, sharedBias) { + for (auto numSamples : {1, 100, 520}) { + for (auto dim : {100 * 16, 100 * 32}) { + for (auto channel : {8, 16}) { + VLOG(3) << " numSamples=" << numSamples << " dim=" << dim + << " channel=" << channel; + testAddSharedBias(numSamples, dim, channel); + testCollectSharedBias(numSamples, dim, channel); + } + } + } +} + +void testMultiBinaryLabelCrossEntropy(int numSamples, int dim) { + AutoCompare forward(numSamples, 1); + CpuMatrix arg1(numSamples, dim); + CpuSparseMatrix arg2( + numSamples, dim, numSamples, paddle::NO_VALUE, paddle::SPARSE_CSR); + + CpuMatrix output1(numSamples, dim); + output1.randomizeUniform(); + output1.softmax(arg1); + for (int i = 0; i < numSamples; i++) { + const unsigned int id = std::rand() % dim; + arg2.setRow(i, 1, &id, nullptr); + } + forward.cmpWithArg(&Matrix::multiBinaryLabelCrossEntropy, arg1, arg2); + + AutoCompare backward(numSamples, dim); + backward.cmpWithArg(&Matrix::multiBinaryLabelCrossEntropyBp, arg1, arg2); +} + +TEST(Matrix, multiBinaryCrossEntropy) { + for (auto numSamples : {100, 1000, 10000}) { + for (auto dim : {100, 1000, 10000}) { + VLOG(3) << " numSamples=" << numSamples << " dim=" << dim; + testMultiBinaryLabelCrossEntropy(numSamples, dim); + } + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + paddle::initMain(argc, argv); + return RUN_ALL_TESTS(); +} + +#endif diff --git a/paddle/math/tests/test_matrix.cpp b/paddle/math/tests/test_SparseMatrix.cpp similarity index 100% rename from paddle/math/tests/test_matrix.cpp rename to paddle/math/tests/test_SparseMatrix.cpp diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index de540dad4c8eefe5084c7089d7960d8ca8cf9875..0883066947ae67cd55c2c505eef72168f3139b8d 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -22,163 +22,12 @@ limitations under the License. */ #include #include "paddle/gserver/tests/TestUtil.h" #include "paddle/utils/Stat.h" +#include "TensorCheck.h" using namespace paddle; // NOLINT using namespace std; // NOLINT - -template -void VectorCheckEqual(const VectorT& vector1, const VectorT& vector2) { - CHECK(vector1.getSize() == vector2.getSize()); - - const T* data1 = vector1.getData(); - const T* data2 = vector2.getData(); - size_t size = vector1.getSize(); - int count = 0; - for (size_t i = 0; i < size; i++) { - if (data1[i] != data2[i]) { - count++; - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -void MatrixCheckEqual(const Matrix& matrix1, const Matrix& matrix2) { - CHECK(matrix1.getHeight() == matrix2.getHeight()); - CHECK(matrix1.getWidth() == matrix2.getWidth()); - - int height = matrix1.getHeight(); - int width = matrix1.getWidth(); - const real* data1 = matrix1.getData(); - const real* data2 = matrix2.getData(); - int count = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (data1[i * width + j] != data2[i * width + j]) { - count++; - } - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -void MatrixCheckErr(const Matrix& matrix1, const Matrix& matrix2) { - CHECK(matrix1.getHeight() == matrix2.getHeight()); - CHECK(matrix1.getWidth() == matrix2.getWidth()); -#ifndef PADDLE_TYPE_DOUBLE - real err = 1e-3; -#else - real err = 1e-10; -#endif - - int height = matrix1.getHeight(); - int width = matrix1.getWidth(); - const real* data1 = matrix1.getData(); - const real* data2 = matrix2.getData(); - int count = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - real a = data1[i * width + j]; - real b = data2[i * width + j]; - if (fabs(a - b) > err) { - if ((fabsf(a - b) / fabsf(a)) > (err / 10.0f)) { - count++; - } - } - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -void testBilinearFwdBwd(int numSamples, - int imgSizeH, - int imgSizeW, - int channels) { - int inWidth = imgSizeH * imgSizeW * channels; - int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels; - real ratioH = 0.5; - real ratioW = 0.5; - // forward - MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); - - MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); - MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); - - input->randomizeUniform(); - inputGpu->copyFrom(*input); - - target->bilinearForward(*input, - imgSizeH, - imgSizeW, - 2 * imgSizeH, - 2 * imgSizeW, - channels, - ratioH, - ratioW); - targetGpu->bilinearForward(*inputGpu, - imgSizeH, - imgSizeW, - 2 * imgSizeH, - 2 * imgSizeW, - channels, - ratioH, - ratioW); - - // check - targetCheck->copyFrom(*targetGpu); - MatrixCheckErr(*target, *targetCheck); - - // backward - MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); - - MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpuGrad = - GpuMatrix::create(numSamples, outWidth, false, true); - MatrixPtr targetCheckGrad = - CpuMatrix::create(numSamples, inWidth, false, false); - - inputGrad->randomizeUniform(); - targetGrad->randomizeUniform(); - inputGpuGrad->copyFrom(*inputGrad); - targetGpuGrad->copyFrom(*targetGrad); - - inputGrad->bilinearBackward(*targetGrad, - 2 * imgSizeH, - 2 * imgSizeW, - imgSizeH, - imgSizeW, - channels, - ratioH, - ratioW); - inputGpuGrad->bilinearBackward(*targetGpuGrad, - 2 * imgSizeH, - 2 * imgSizeW, - imgSizeH, - imgSizeW, - channels, - ratioH, - ratioW); - - // check - targetCheckGrad->copyFrom(*inputGpuGrad); - MatrixCheckErr(*inputGrad, *targetCheckGrad); -} - -TEST(Matrix, BilinearFwdBwd) { - for (auto numSamples : {5, 10}) { - for (auto channels : {8, 16}) { - for (auto imgSizeH : {14, 28}) { - for (auto imgSizeW : {16, 30}) { - VLOG(3) << " numSamples=" << numSamples << " channels=" << channels - << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; - testBilinearFwdBwd(numSamples, imgSizeH, imgSizeW, channels); - } - } - } - } -} +using autotest::TensorCheckEqual; +using autotest::TensorCheckErr; void testMatrixProjectionForward(int contextStart, int contextLength, @@ -232,12 +81,7 @@ void testMatrixProjectionForward(int contextStart, beginPad, padding); - // check - MatrixPtr outputCheck = - std::make_shared(batchSize, inputDim * contextLength); - outputCheck->copyFrom(*gpuOutput); - - MatrixCheckEqual(*cpuOutput, *outputCheck); + TensorCheckEqual(*cpuOutput, *gpuOutput); } void testMatrixProjectionBackward(int contextStart, @@ -294,15 +138,9 @@ void testMatrixProjectionBackward(int contextStart, beginPad); } - // check - MatrixPtr inputGradCheck = std::make_shared(batchSize, inputDim); - inputGradCheck->copyFrom(*gpuInputGrad); - MatrixCheckErr(*cpuInputGrad, *inputGradCheck); - + TensorCheckErr(*cpuInputGrad, *gpuInputGrad); if (padding) { - MatrixPtr weightGradChcek = std::make_shared(pad, inputDim); - weightGradChcek->copyFrom(*gpuWeightGrad); - MatrixCheckErr(*cpuWeightGrad, *weightGradChcek); + TensorCheckErr(*cpuWeightGrad, *gpuWeightGrad); } } @@ -361,15 +199,8 @@ void testMatrixMaxSequence(int batchSize, int inputDim) { cpuOutput->maxSequenceForward(*cpuInput, *cpuSequence, *cpuIndex); gpuOutput->maxSequenceForward(*gpuInput, *gpuSequence, *gpuIndex); - // check - MatrixPtr outputCheck = std::make_shared(newBatchSize, inputDim); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckEqual(*cpuOutput, *outputCheck); - - IVectorPtr indexCheck = nullptr; - IVector::resizeOrCreate(indexCheck, newBatchSize * inputDim, false); - indexCheck->copyFrom(*gpuIndex); - VectorCheckEqual(*cpuIndex, *indexCheck); + TensorCheckEqual(*cpuOutput, *gpuOutput); + TensorCheckEqual(*cpuIndex, *gpuIndex); // backward MatrixPtr cpuOutputGrad = std::make_shared(newBatchSize, inputDim); @@ -385,10 +216,7 @@ void testMatrixMaxSequence(int batchSize, int inputDim) { cpuInputGrad->maxSequenceBackward(*cpuOutputGrad, *cpuSequence, *cpuIndex); gpuInputGrad->maxSequenceBackward(*gpuOutputGrad, *gpuSequence, *gpuIndex); - // check - MatrixPtr inputGradCheck = std::make_shared(batchSize, inputDim); - inputGradCheck->copyFrom(*gpuInputGrad); - MatrixCheckEqual(*cpuInputGrad, *inputGradCheck); + TensorCheckEqual(*cpuInputGrad, *gpuInputGrad); } TEST(Matrix, maxSequence) { @@ -431,6 +259,8 @@ void testMatrixZeroAtOffset(int height, int width) { int columnOffset = rand() % width; // NOLINT we just use rand() for test. int numColumns = rand() % (width - columnOffset); // NOLINT + if (numColumns == 0) return; + cpuA->zeroAtOffset(columnOffset, numColumns); gpuA->zeroAtOffset(columnOffset, numColumns); @@ -442,10 +272,8 @@ void testMatrixZeroAtOffset(int height, int width) { } } - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckEqual(*cpuA, *outputCheck); - MatrixCheckEqual(*cpuA, *cpuTest); + TensorCheckEqual(*cpuA, *gpuA); + TensorCheckEqual(*cpuA, *cpuTest); } void testMatrixDeepSwap(int height, int width) { @@ -462,303 +290,8 @@ void testMatrixDeepSwap(int height, int width) { // swap matrix cpuA and cpuB cpuA->deepSwap(*cpuB); - MatrixCheckEqual(*cpuA, *cpuCopyB); - MatrixCheckEqual(*cpuB, *cpuCopyA); -} - -void testMatrixBinaryAdd(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - cpuA->add(*cpuB); - gpuA->add(*gpuB); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckEqual(*cpuA, *outputCheck); -} - -void testMatrixAssign(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, width); - - cpuA->randomizeUniform(); - gpuA->copyFrom(*cpuA); - cpuA->assign(2.5); - gpuA->assign(2.5); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckEqual(*cpuA, *outputCheck); -} - - -void testMatrixAdd(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, width); - - cpuA->randomizeUniform(); - gpuA->copyFrom(*cpuA); - cpuA->add(2.5); - gpuA->add(2.5); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckEqual(*cpuA, *outputCheck); -} - -void testMatrixSqrt(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, width); - - cpuA->randomizeUniform(); - gpuA->copyFrom(*cpuA); - cpuA->sqrt(); - gpuA->sqrt(); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixTanhDerivative(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - cpuA->tanhDerivative(*cpuB); - gpuA->tanhDerivative(*gpuB); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixTanh(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - cpuA->tanh(*cpuB); - gpuA->tanh(*gpuB); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixTernarySub(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr cpuC = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(height, width); - MatrixPtr gpuC = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - gpuC->copyFrom(*cpuC); - - cpuA->sub(*cpuB, *cpuC); - gpuA->sub(*gpuB, *gpuC); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckEqual(*cpuA, *outputCheck); -} - -void testMatrixSumOfSquaresBp(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr cpuC = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(height, width); - MatrixPtr gpuC = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - gpuC->copyFrom(*cpuC); - - cpuA->sumOfSquaresBp(*cpuB, *cpuC); - gpuA->sumOfSquaresBp(*gpuB, *gpuC); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixBinaryRowScale(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, 1); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(height, 1); - - MatrixPtr cpuA1 = std::make_shared(height, width); - MatrixPtr cpuB1 = std::make_shared(height, 1); - MatrixPtr gpuA1 = std::make_shared(height, width); - MatrixPtr gpuB1 = std::make_shared(height, 1); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - cpuA1->copyFrom(*cpuA); - cpuB1->copyFrom(*cpuB); - gpuA1->copyFrom(*cpuA); - gpuB1->copyFrom(*cpuB); - - cpuA->addColVector(*cpuB); - gpuA->addColVector(*gpuB); - cpuA1->addColumnVector(*cpuB1); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckEqual(*cpuA, *outputCheck); - - MatrixCheckEqual(*cpuA, *cpuA1); -} - -void testMatrixAddBias(int height, int width, real scale) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(1, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(1, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - - cpuA->addBias(*cpuB, scale); - gpuA->addBias(*gpuB, scale); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixTernaryRowScale(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr cpuC = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(height, width); - MatrixPtr gpuC = std::make_shared(height, width); - - MatrixPtr cpuA1 = std::make_shared(height, width); - MatrixPtr cpuB1 = std::make_shared(height, width); - MatrixPtr cpuC1 = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - gpuC->copyFrom(*cpuC); - cpuA1->copyFrom(*cpuA); - cpuB1->copyFrom(*cpuB); - cpuC1->copyFrom(*cpuC); - - int columnOffset = rand() % width; // NOLINT - - cpuA->rowScale(columnOffset, *cpuB, *cpuC); - gpuA->rowScale(columnOffset, *gpuB, *gpuC); - cpuA1->rowScale2(columnOffset, *cpuB1, *cpuC1); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckEqual(*cpuA, *outputCheck); - - MatrixCheckEqual(*cpuA, *cpuA1); -} - -void testMatrixTernaryRowDotMul(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr cpuC = std::make_shared(height, width); - - MatrixPtr cpuA1 = std::make_shared(height, width); - MatrixPtr cpuB1 = std::make_shared(height, width); - MatrixPtr cpuC1 = std::make_shared(height, width); - - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(height, width); - MatrixPtr gpuC = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - cpuA1->copyFrom(*cpuA); - cpuB1->copyFrom(*cpuB); - cpuC1->copyFrom(*cpuC); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - gpuC->copyFrom(*cpuC); - - int columnOffset = rand() % width; // NOLINT - - cpuA->rowDotMul(columnOffset, *cpuB, *cpuC); - gpuA->rowDotMul(columnOffset, *gpuB, *gpuC); - cpuA1->rowDotMul2(columnOffset, *cpuB1, *cpuC1); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *cpuA1); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixAddDotMulMMV(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr cpuC = std::make_shared(1, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(height, width); - MatrixPtr gpuC = std::make_shared(1, width); - - MatrixPtr cpuA1 = std::make_shared(height, width); - MatrixPtr cpuB1 = std::make_shared(height, width); - MatrixPtr cpuC1 = std::make_shared(1, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - gpuC->copyFrom(*cpuC); - cpuA1->copyFrom(*cpuA); - cpuB1->copyFrom(*cpuB); - cpuC1->copyFrom(*cpuC); - - cpuA->addDotMulMMV(*cpuB, *cpuC); - gpuA->addDotMulMMV(*gpuB, *gpuC); - cpuA1->addDotMulMMV2(*cpuB1, *cpuC1); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); - MatrixCheckEqual(*cpuA, *cpuA1); + TensorCheckEqual(*cpuA, *cpuCopyB); + TensorCheckEqual(*cpuB, *cpuCopyA); } void testMatrixTranspose(int height, int width) { @@ -772,9 +305,7 @@ void testMatrixTranspose(int height, int width) { cpu->transpose(cpuT, false); gpu->transpose(gpuT, false); - MatrixPtr outputCheck = std::make_shared(width, height); - outputCheck->copyFrom(*gpuT); - MatrixCheckEqual(*cpuT, *outputCheck); + TensorCheckEqual(*cpuT, *gpuT); } void testMatrixInverse(int height) { @@ -795,530 +326,127 @@ void testMatrixInverse(int height) { cpu->inverse(cpuI, false); gpu->inverse(gpuI, false); - outputCheck->copyFrom(*gpuI); - MatrixCheckErr(*cpuI, *outputCheck); + TensorCheckErr(*cpuI, *gpuI); outputCheck->mul(cpu, cpuI); - cpu->setDiag(1.0); - MatrixCheckErr(*cpu, *outputCheck); -} - -TEST(Matrix, unary) { - for (auto height : {1, 3, 11, 73, 128, 200, 330}) { - for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { - VLOG(3) << " height=" << height << " width=" << width; - - // applyUnary - testMatrixAssign(height, width); - testMatrixAdd(height, width); - testMatrixSqrt(height, width); - - // applyBinary - testMatrixBinaryAdd(height, width); - testMatrixTanh(height, width); - testMatrixTanhDerivative(height, width); - testMatrixDeepSwap(height, width); - - // applyTernary - testMatrixTernarySub(height, width); - testMatrixSumOfSquaresBp(height, width); - - // asRowVector - testMatrixAddBias(height, width, 1.0); - testMatrixAddBias(height, width, 3.5); - testMatrixAddDotMulMMV(height, width); - - // asColVector - testMatrixTernaryRowScale(height, width); - testMatrixBinaryRowScale(height, width); - - // sum - testMatrixGetSum(height, width); - - // transpose - testMatrixTranspose(height, width); - } - // inverse - testMatrixInverse(height); - } -} - -void testMatrixSoftmax(int height, int width) { - MatrixPtr cpuInput = std::make_shared(height, width); - MatrixPtr cpuOutput = std::make_shared(height, width); - MatrixPtr gpuInput = std::make_shared(height, width); - MatrixPtr gpuOutput = std::make_shared(height, width); - - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - cpuOutput->zero(); - gpuOutput->zero(); - cpuInput->softmax(*cpuOutput); - gpuInput->softmax(*gpuOutput); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckErr(*cpuOutput, *outputCheck); -} - -void testSequenceSoftmax(int batchSize) { - // forward - int inputDim = 1; - MatrixPtr cpuInput = std::make_shared(batchSize, inputDim); - MatrixPtr gpuInput = std::make_shared(batchSize, inputDim); - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - - IVectorPtr cpuSequence; - generateSequenceStartPositions(batchSize, cpuSequence); - IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true); - gpuSequence->copyFrom(*cpuSequence); - - cpuInput->sequenceSoftmax(*cpuInput, *cpuSequence); - gpuInput->sequenceSoftmax(*gpuInput, *gpuSequence); - - // check - MatrixPtr outputCheck = std::make_shared(batchSize, inputDim); - outputCheck->copyFrom(*gpuInput); - MatrixCheckErr(*cpuInput, *outputCheck); -} - -void testMatrixSoftmaxThreshold(int height, int width) { - MatrixPtr cpuInput = std::make_shared(height, width); - MatrixPtr cpuOutput = std::make_shared(height, width); - MatrixPtr gpuInput = std::make_shared(height, width); - MatrixPtr gpuOutput = std::make_shared(height, width); - - cpuInput->randomizeUniform(); - cpuInput->getData()[0] = 100.0; - gpuInput->copyFrom(*cpuInput); - cpuOutput->zero(); - gpuOutput->zero(); - cpuInput->softmax(*cpuOutput); - gpuInput->softmax(*gpuOutput); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuOutput); - // check output zero - int cpuCount = 0; - int gpuCount = 0; - auto zeroNum = [](MatrixPtr out, int& count) { - for (size_t i = 0; i < out->getHeight(); i++) { - for (size_t j = 0; j < out->getWidth(); j++) { - if (out->getElement(i, j) == 0) count++; - } - } - }; - zeroNum(cpuOutput, cpuCount); - zeroNum(outputCheck, gpuCount); - EXPECT_EQ(cpuCount, 0) << "Cpu softmax output value 0"; - EXPECT_EQ(gpuCount, 0) << "Gpu softmax output value 0"; -} - -void testMatrixSoftmaxBp(int height, int width) { - MatrixPtr cpuInput = std::make_shared(height, width); - MatrixPtr cpuOutput = std::make_shared(height, width); - MatrixPtr gpuInput = std::make_shared(height, width); - MatrixPtr gpuOutput = std::make_shared(height, width); - - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - gpuOutput->softmaxBackward(*gpuInput); - - MatrixPtr sftMaxSum = std::make_shared(height, 1); - MatrixPtr sftMaxDot = std::make_shared(height, width); - sftMaxDot->dotMul(*cpuOutput, *cpuInput); - sftMaxSum->colMerge(*sftMaxDot); - cpuOutput->softmaxDerivative(*cpuInput, *sftMaxSum); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckErr(*cpuOutput, *outputCheck); -} - -TEST(Matrix, softmax) { - for (auto height : {1, 11, 73, 128, 200}) { - for (auto width : {1, 32, 100, 512, 1000}) { - VLOG(3) << " height=" << height << " width=" << width; - - testMatrixSoftmax(height, width); - testMatrixSoftmaxBp(height, width); - testMatrixSoftmaxThreshold(height, width); - } - testSequenceSoftmax(height); - } -} - -void testMatrixAddDotMulVMM(int height, int width, int endCol = 0) { - MatrixPtr cpuA = std::make_shared(1, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr cpuC = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(1, width); - MatrixPtr gpuB = std::make_shared(height, width); - MatrixPtr gpuC = std::make_shared(height, width); - - MatrixPtr cpuA1 = std::make_shared(1, width); - MatrixPtr cpuB1 = std::make_shared(height, width); - MatrixPtr cpuC1 = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - gpuC->copyFrom(*cpuC); - cpuA1->copyFrom(*cpuA); - cpuB1->copyFrom(*cpuB); - cpuC1->copyFrom(*cpuC); - - if (!endCol) { - cpuA->addDotMulVMM(*cpuB, *cpuC); - gpuA->addDotMulVMM(*gpuB, *gpuC); - cpuA1->addDotMulVMM2(*cpuB1, *cpuC1); - - MatrixCheckErr(*cpuA, *cpuA1); - } else { - MatrixPtr subCpuA = cpuA->subColMatrix(0, endCol); - MatrixPtr subCpuB = cpuB->subColMatrix(0, endCol); - MatrixPtr subCpuC = cpuC->subColMatrix(0, endCol); - MatrixPtr subGpuA = gpuA->subColMatrix(0, endCol); - MatrixPtr subGpuB = gpuB->subColMatrix(0, endCol); - MatrixPtr subGpuC = gpuC->subColMatrix(0, endCol); - subCpuA->addDotMulVMM(*subCpuB, *subCpuC); - subGpuA->addDotMulVMM(*subGpuB, *subGpuC); - } - - MatrixPtr outputCheck = std::make_shared(1, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixRowSum(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, 1); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, 1); - MatrixPtr gpuB = std::make_shared(height, width); - - MatrixPtr cpuA1 = std::make_shared(height, 1); - MatrixPtr cpuB1 = std::make_shared(height, width); - MatrixPtr gpuA1 = std::make_shared(height, 1); - MatrixPtr gpuB1 = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - cpuA1->copyFrom(*cpuA); - cpuB1->copyFrom(*cpuB); - gpuA1->copyFrom(*cpuA); - gpuB1->copyFrom(*cpuB); - - cpuA->colMerge(*cpuB); - gpuA->colMerge(*gpuB); - - cpuB1->rowSum(*cpuA1); - gpuB1->rowSum(*gpuA1); - - MatrixPtr outputCheck = std::make_shared(height, 1); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); - outputCheck->copyFrom(*gpuA1); - MatrixCheckErr(*cpuA1, *outputCheck); -} - -void testMatrixRowMax(int height, int width, int endCol = 0) { - MatrixPtr cpuA = std::make_shared(height, 1); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, 1); - MatrixPtr gpuB = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - - if (!endCol) { - cpuB->rowMax(*cpuA); - gpuB->rowMax(*gpuA); - } else { - MatrixPtr subCpuB = cpuB->subColMatrix(0, endCol); - MatrixPtr subGpuB = gpuB->subColMatrix(0, endCol); - subCpuB->rowMax(*cpuA); - subGpuB->rowMax(*gpuA); - } - - MatrixPtr outputCheck = std::make_shared(height, 1); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixColSum(int height, int width, int endCol = 0) { - MatrixPtr cpuA = std::make_shared(1, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(1, width); - MatrixPtr gpuB = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - - if (!endCol) { - cpuA->accumulateColSum(*cpuB); - gpuA->accumulateColSum(*gpuB); - } else { - MatrixPtr subCpuA = cpuA->subColMatrix(0, endCol); - MatrixPtr subGpuA = gpuA->subColMatrix(0, endCol); - MatrixPtr subCpuB = cpuB->subColMatrix(0, endCol); - MatrixPtr subGpuB = gpuB->subColMatrix(0, endCol); - subCpuA->accumulateColSum(*subCpuB); - subGpuA->accumulateColSum(*subGpuB); - } - - MatrixPtr outputCheck = std::make_shared(1, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixColMax(int height, int width, int endCol = 0) { - MatrixPtr cpuA = std::make_shared(1, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(1, width); - MatrixPtr gpuB = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - - if (!endCol) { - cpuB->colMax(*cpuA); - gpuB->colMax(*gpuA); - } else { - MatrixPtr subCpuA = cpuA->subColMatrix(0, endCol); - MatrixPtr subGpuA = gpuA->subColMatrix(0, endCol); - MatrixPtr subCpuB = cpuB->subColMatrix(0, endCol); - MatrixPtr subGpuB = gpuB->subColMatrix(0, endCol); - subCpuB->colMax(*subCpuA); - subGpuB->colMax(*subGpuA); - } - - MatrixPtr outputCheck = std::make_shared(1, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixCollectBias(int height, int width) { - MatrixPtr cpuA = std::make_shared(1, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(1, width); - MatrixPtr gpuB = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - - real scale = 1.0f / (rand() % 10); // NOLINT - - cpuA->collectBias(*cpuB, scale); - gpuA->collectBias(*gpuB, scale); - - MatrixPtr outputCheck = std::make_shared(1, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixSumOfSquares(int height, int width, int endCol = 0) { - MatrixPtr cpuA = std::make_shared(height, 1); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr cpuC = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, 1); - MatrixPtr gpuB = std::make_shared(height, width); - MatrixPtr gpuC = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - gpuC->copyFrom(*cpuC); - - if (!endCol) { - cpuA->sumOfSquares(*cpuB, *cpuC); - gpuA->sumOfSquares(*gpuB, *gpuC); - } else { - MatrixPtr subCpuB = cpuB->subColMatrix(0, endCol); - MatrixPtr subCpuC = cpuC->subColMatrix(0, endCol); - MatrixPtr subGpuB = gpuB->subColMatrix(0, endCol); - MatrixPtr subGpuC = gpuC->subColMatrix(0, endCol); - cpuA->sumOfSquares(*subCpuB, *subCpuC); - gpuA->sumOfSquares(*subGpuB, *subGpuC); - } - - MatrixPtr outputCheck = std::make_shared(height, 1); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixBinaryClassificationError(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr cpuC = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(height, width); - MatrixPtr gpuC = std::make_shared(height, width); - - MatrixPtr cpuA2 = std::make_shared(height, width); - MatrixPtr cpuB2 = std::make_shared(height, width); - MatrixPtr cpuC2 = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - gpuC->copyFrom(*cpuC); - cpuA2->copyFrom(*cpuA); - cpuB2->copyFrom(*cpuB); - cpuC2->copyFrom(*cpuC); - - real scale = 0.5; - int columnOffset = rand() % width; // NOLINT - - cpuA->binaryClassificationError(columnOffset, *cpuB, *cpuC, scale); - gpuA->binaryClassificationError(columnOffset, *gpuB, *gpuC, scale); - cpuA2->binaryClassificationError2(columnOffset, *cpuB2, *cpuC2, scale); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); - MatrixCheckErr(*cpuA, *cpuA2); -} - -TEST(Matrix, aggregate) { - for (auto height : {1, 11, 16, 32, 64, 73, 128, 200, 1024, 2345}) { - for (auto width : {1, 9, 16, 32, 64, 100, 512, 1000, 1024, 2453}) { - VLOG(3) << " height=" << height << " width=" << width; - testMatrixRowSum(height, width); - testMatrixRowMax(height, width); - testMatrixColSum(height, width); - testMatrixColMax(height, width); - testMatrixCollectBias(height, width); - testMatrixTernaryRowDotMul(height, width); - testMatrixAddDotMulVMM(height, width); - - testMatrixSumOfSquares(height, width); - testMatrixBinaryClassificationError(height, width); - } - } + cpu->setDiag(1.0); + TensorCheckErr(*cpu, *outputCheck); } -TEST(Matrix, aggregate2) { - for (auto height : {16, 32, 128, 512, 1024}) { - for (auto width : - {16, 32, 64, 128, 256, 512, 768, 1024, 2048, 3072, 4096}) { +TEST(Matrix, unary) { + for (auto height : {1, 3, 11, 73, 128, 200, 330}) { + for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { VLOG(3) << " height=" << height << " width=" << width; - int endCol = rand() % width; // NOLINT - testMatrixRowMax(height, width, endCol); - testMatrixSumOfSquares(height, width, endCol); - testMatrixColSum(height, width, endCol); - testMatrixColMax(height, width, endCol); - testMatrixAddDotMulVMM(height, width, endCol); + testMatrixDeepSwap(height, width); + testMatrixZeroAtOffset(height, width); + testMatrixGetSum(height, width); + testMatrixTranspose(height, width); } + // inverse + testMatrixInverse(height); } } -void testMatrixAddAtOffset(int height, int width1, int width2) { - MatrixPtr cpuInput = std::make_shared(height, width1); - MatrixPtr cpuOutput = std::make_shared(height, width2); - MatrixPtr gpuInput = std::make_shared(height, width1); - MatrixPtr gpuOutput = std::make_shared(height, width2); +void testMatrixSoftmax(int height, int width) { + MatrixPtr cpuInput = std::make_shared(height, width); + MatrixPtr cpuOutput = std::make_shared(height, width); + MatrixPtr gpuInput = std::make_shared(height, width); + MatrixPtr gpuOutput = std::make_shared(height, width); cpuInput->randomizeUniform(); gpuInput->copyFrom(*cpuInput); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - - int columnOffset = 0; - int offset = std::abs(width1 - width2); - if (offset) { - columnOffset = rand() % offset; // NOLINT - } - cpuOutput->addAtOffset(*cpuInput, columnOffset); - gpuOutput->addAtOffset(*gpuInput, columnOffset); + cpuOutput->zero(); + gpuOutput->zero(); + cpuInput->softmax(*cpuOutput); + gpuInput->softmax(*gpuOutput); - MatrixPtr outputCheck = std::make_shared(height, width2); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckEqual(*cpuOutput, *outputCheck); + TensorCheckErr(*cpuOutput, *gpuOutput); } -void testMatrixAssignAtOffset(int height, int width1, int width2) { - MatrixPtr cpuInput = std::make_shared(height, width1); - MatrixPtr cpuOutput = std::make_shared(height, width2); - MatrixPtr gpuInput = std::make_shared(height, width1); - MatrixPtr gpuOutput = std::make_shared(height, width2); - +void testSequenceSoftmax(int batchSize) { + // forward + int inputDim = 1; + MatrixPtr cpuInput = std::make_shared(batchSize, inputDim); + MatrixPtr gpuInput = std::make_shared(batchSize, inputDim); cpuInput->randomizeUniform(); gpuInput->copyFrom(*cpuInput); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - int columnOffset = 0; - int offset = std::abs(width1 - width2); - if (offset) { - columnOffset = rand() % offset; // NOLINT - } - cpuOutput->assignAtOffset(*cpuInput, columnOffset); - gpuOutput->assignAtOffset(*gpuInput, columnOffset); + IVectorPtr cpuSequence; + generateSequenceStartPositions(batchSize, cpuSequence); + IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true); + gpuSequence->copyFrom(*cpuSequence); - MatrixPtr outputCheck = std::make_shared(height, width2); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckEqual(*cpuOutput, *outputCheck); + cpuInput->sequenceSoftmax(*cpuInput, *cpuSequence); + gpuInput->sequenceSoftmax(*gpuInput, *gpuSequence); + + TensorCheckErr(*cpuInput, *gpuInput); } -TEST(Matrix, AtOffset) { - for (auto height : {1, 11, 73, 128, 200}) { - for (auto width1 : {1, 32, 100, 512, 1000}) { - for (auto width2 : {1, 32, 100, 512, 1000}) { - VLOG(3) << " height=" << height << " width1=" << width1 - << " width2=" << width2; +void testMatrixSoftmaxThreshold(int height, int width) { + MatrixPtr cpuInput = std::make_shared(height, width); + MatrixPtr cpuOutput = std::make_shared(height, width); + MatrixPtr gpuInput = std::make_shared(height, width); + MatrixPtr gpuOutput = std::make_shared(height, width); + + cpuInput->randomizeUniform(); + cpuInput->getData()[0] = 100.0; + gpuInput->copyFrom(*cpuInput); + cpuOutput->zero(); + gpuOutput->zero(); + cpuInput->softmax(*cpuOutput); + gpuInput->softmax(*gpuOutput); - testMatrixAddAtOffset(height, width1, width2); - testMatrixAssignAtOffset(height, width1, width2); + MatrixPtr outputCheck = std::make_shared(height, width); + outputCheck->copyFrom(*gpuOutput); + // check output zero + int cpuCount = 0; + int gpuCount = 0; + auto zeroNum = [](MatrixPtr out, int& count) { + for (size_t i = 0; i < out->getHeight(); i++) { + for (size_t j = 0; j < out->getWidth(); j++) { + if (out->getElement(i, j) == 0) count++; } } - } + }; + zeroNum(cpuOutput, cpuCount); + zeroNum(outputCheck, gpuCount); + EXPECT_EQ(cpuCount, 0) << "Cpu softmax output value 0"; + EXPECT_EQ(gpuCount, 0) << "Gpu softmax output value 0"; } -void testMatrixSelectRows(int numSamples, int tableSize, int inputDim) { - MatrixPtr cpuTable = std::make_shared(tableSize, inputDim); - MatrixPtr gpuTable = std::make_shared(tableSize, inputDim); - cpuTable->randomizeUniform(); - gpuTable->copyFrom(*cpuTable); - - IVectorPtr cpuIds; - IVectorPtr gpuIds; - cpuIds = VectorT::create(numSamples, false); - gpuIds = VectorT::create(numSamples, true); - cpuIds->rand(tableSize); - gpuIds->copyFrom(*cpuIds); +void testMatrixSoftmaxBp(int height, int width) { + MatrixPtr cpuInput = std::make_shared(height, width); + MatrixPtr cpuOutput = std::make_shared(height, width); + MatrixPtr gpuInput = std::make_shared(height, width); + MatrixPtr gpuOutput = std::make_shared(height, width); - MatrixPtr cpuOutput = std::make_shared(numSamples, inputDim); - MatrixPtr gpuOutput = std::make_shared(numSamples, inputDim); + cpuInput->randomizeUniform(); + gpuInput->copyFrom(*cpuInput); cpuOutput->randomizeUniform(); gpuOutput->copyFrom(*cpuOutput); + gpuOutput->softmaxBackward(*gpuInput); - cpuOutput->selectRows(*cpuTable, *cpuIds); - gpuOutput->selectRows(*gpuTable, *gpuIds); + MatrixPtr sftMaxSum = std::make_shared(height, 1); + MatrixPtr sftMaxDot = std::make_shared(height, width); + sftMaxDot->dotMul(*cpuOutput, *cpuInput); + sftMaxSum->colMerge(*sftMaxDot); + cpuOutput->softmaxDerivative(*cpuInput, *sftMaxSum); - // check - MatrixPtr outputCheck = std::make_shared(numSamples, inputDim); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckEqual(*cpuOutput, *outputCheck); + TensorCheckErr(*cpuOutput, *gpuOutput); +} + +TEST(Matrix, softmax) { + for (auto height : {1, 11, 73, 128, 200}) { + for (auto width : {1, 32, 100, 512, 1000}) { + VLOG(3) << " height=" << height << " width=" << width; + + testMatrixSoftmax(height, width); + testMatrixSoftmaxBp(height, width); + testMatrixSoftmaxThreshold(height, width); + } + testSequenceSoftmax(height); + } } void testMatrixAddToRows(int numSamples, int tableSize, int inputDim) { @@ -1342,10 +470,7 @@ void testMatrixAddToRows(int numSamples, int tableSize, int inputDim) { cpuOutput->addToRows(*cpuTable, *cpuIds); gpuOutput->addToRows(*gpuTable, *gpuIds); - // check - MatrixPtr outputCheck = std::make_shared(tableSize, inputDim); - outputCheck->copyFrom(*gpuTable); - MatrixCheckErr(*cpuTable, *outputCheck); + TensorCheckErr(*cpuTable, *gpuTable); } TEST(Matrix, tableProjection) { @@ -1354,7 +479,6 @@ TEST(Matrix, tableProjection) { for (auto inputDim : {20, 50}) { VLOG(3) << " numSamples=" << numSamples << " tableSize=" << tableSize << " inputDim=" << inputDim; - testMatrixSelectRows(numSamples, tableSize, inputDim); testMatrixAddToRows(numSamples, tableSize, inputDim); } } @@ -1388,9 +512,7 @@ void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { cpuC->mul(cpuA, cpuB, alpha, beta); gpuC->mul(gpuA, gpuB, alpha, beta); - MatrixPtr outputCheck = std::make_shared(heightC, widthC); - outputCheck->copyFrom(*gpuC); - MatrixCheckErr(*cpuC, *outputCheck); + TensorCheckErr(*cpuC, *gpuC); } void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { @@ -1462,9 +584,7 @@ void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { subCpuC->mul(subCpuA, subCpuB, alpha, beta); subGpuC->mul(subGpuA, subGpuB, alpha, beta); - MatrixPtr outputCheck = std::make_shared(heightC, widthC); - outputCheck->copyFrom(*gpuC); - MatrixCheckErr(*cpuC, *outputCheck); + TensorCheckErr(*cpuC, *gpuC); } TEST(Matrix, mul) { @@ -1518,9 +638,7 @@ void testVectorReset(int size) { cpu->reset(value); gpu->reset(value); - std::shared_ptr> out = std::make_shared>(size); - out->copyFrom(*gpu); - VectorCheckEqual(*cpu, *out); + TensorCheckEqual(*cpu, *gpu); } template @@ -1546,9 +664,7 @@ void testVecortSelectFrom(int size) { cpuDst->selectFrom(*cpuSrc, *cpuIds); gpuDst->selectFrom(*gpuSrc, *gpuIds); - std::shared_ptr> out = std::make_shared>(size); - out->copyFrom(*gpuDst); - VectorCheckEqual(*cpuDst, *out); + TensorCheckEqual(*cpuDst, *gpuDst); } template @@ -1559,9 +675,7 @@ void testVecotrZeroMem(int size) { cpu->zeroMem(); gpu->zeroMem(); - std::shared_ptr> out = std::make_shared>(size); - out->copyFrom(*gpu); - VectorCheckEqual(*cpu, *out); + TensorCheckEqual(*cpu, *gpu); } template @@ -1582,9 +696,7 @@ void testVectorIsEqual(int size) { cpuA->isEqualTo(*cpuB, value); gpuA->isEqualTo(*gpuB, value); - std::shared_ptr> out = std::make_shared>(size); - out->copyFrom(*gpuA); - VectorCheckEqual(*cpuA, *out); + TensorCheckEqual(*cpuA, *gpuA); } TEST(Vector, Equal) { @@ -1615,9 +727,7 @@ void testMatrixTopK(int samples, int dim, int beamSize) { cpuSrc->rowMax(*cpuIds, *cpuVal); gpuSrc->rowMax(*gpuIds, *gpuVal); - MatrixPtr outVal = std::make_shared(samples, beamSize); - outVal->copyFrom(*gpuVal); - MatrixCheckEqual(*cpuVal, *outVal); + TensorCheckEqual(*cpuVal, *gpuVal); } TEST(Matrix, topK) { @@ -1653,9 +763,7 @@ void testSMatrixTopK(int samples, int dim, int beamSize, real ratio) { cpuSrc->rowMax(*cpuIds, *cpuVal); gpuSrc->rowMax(*gpuIds, *gpuVal); - MatrixPtr outCheckMaxVal = std::make_shared(samples, beamSize); - outCheckMaxVal->copyFrom(*gpuVal); - MatrixCheckEqual(*cpuVal, *outCheckMaxVal); + TensorCheckEqual(*cpuVal, *gpuVal); IVectorPtr outCheckIds = std::make_shared(samples * beamSize); outCheckIds->copyFrom(*gpuIds); @@ -1685,42 +793,6 @@ TEST(SMatrix, topK) { } } -void testMatrixCopyByRowIndex(int outHeight, int inHeight, int width) { - MatrixPtr cpuInput = std::make_shared(inHeight, width); - MatrixPtr gpuInput = std::make_shared(inHeight, width); - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - - MatrixPtr cpuOutput = std::make_shared(outHeight, width); - MatrixPtr gpuOutput = std::make_shared(outHeight, width); - cpuOutput->zero(); - gpuOutput->zero(); - - IVectorPtr cpuRowIndex = IVector::create(outHeight, false); - IVectorPtr gpuRowIndex = IVector::create(outHeight, true); - cpuRowIndex->rand(inHeight); - gpuRowIndex->copyFrom(*cpuRowIndex); - - cpuOutput->copyByRowIndex(*cpuInput, *cpuRowIndex); - gpuOutput->copyByRowIndex(*gpuInput, *gpuRowIndex); - - // check - MatrixPtr outputCheck = std::make_shared(outHeight, width); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckEqual(*cpuOutput, *outputCheck); -} - -TEST(Matrix, copyByRowIndex) { - for (auto outHeight : {31, 500, 1000}) { - for (auto inHeight : {17, 257, 500, 1200}) { - for (auto width : {512, 1024}) { - VLOG(3) << outHeight << " " << inHeight << " " << width; - testMatrixCopyByRowIndex(outHeight, inHeight, width); - } - } - } -} - void testMatrixSequenceAvgForward(int batchSize, int inputDim, int mode) { MatrixPtr cpuInput = std::make_shared(batchSize, inputDim); MatrixPtr gpuInput = std::make_shared(batchSize, inputDim); @@ -1741,10 +813,7 @@ void testMatrixSequenceAvgForward(int batchSize, int inputDim, int mode) { cpuOutput->sequenceAvgForward(*cpuInput, *cpuSequence, mode); gpuOutput->sequenceAvgForward(*gpuInput, *gpuSequence, mode); - // check - MatrixPtr outputCheck = std::make_shared(newBatchSize, inputDim); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckErr(*cpuOutput, *outputCheck); + TensorCheckErr(*cpuOutput, *gpuOutput); } TEST(Matrix, sequenceAvgForward) { @@ -1759,45 +828,6 @@ TEST(Matrix, sequenceAvgForward) { } } -void testCosSim(int heightX, int heightY, int width, real scale) { - MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false); - MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false); - MatrixPtr output = CpuMatrix::create(heightX, 1, false, false); - - prevOutX->randomizeUniform(); - prevOutY->randomizeUniform(); - prevOutX->add(-0.5); - prevOutY->add(-0.5); - output->randomizeUniform(); - - MatrixPtr prevOutXGpu = GpuMatrix::create(heightX, width, false, true); - MatrixPtr prevOutYGpu = GpuMatrix::create(heightY, width, false, true); - MatrixPtr outputGpu = GpuMatrix::create(heightX, 1, false, true); - - prevOutXGpu->copyFrom(*prevOutX); - prevOutYGpu->copyFrom(*prevOutY); - outputGpu->copyFrom(*output); - - output->cosSim(*prevOutX, *prevOutY, scale); - outputGpu->cosSim(*prevOutXGpu, *prevOutYGpu, scale); - - MatrixPtr outputCheck = CpuMatrix::create(heightX, 1, false, false); - outputCheck->copyFrom(*outputGpu); - MatrixCheckErr(*output, *outputCheck); -} - -TEST(Matrix, cosSim) { - for (auto heightX : {10, 100, 1000}) { - for (auto heightY : {1, heightX}) { - for (auto width : {10, 100, 1000}) { - for (auto scale : {1.0, 2.0}) { - testCosSim(heightX, heightY, width, scale); - } - } - } - } -} - void testCosSimDerivate(int heightX, int heightY, int width, real scale) { MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false); MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false); @@ -1837,12 +867,8 @@ void testCosSimDerivate(int heightX, int heightY, int width, real scale) { *prevGradYGpu, scale); - MatrixPtr prevGradXCheck = CpuMatrix::create(heightX, width, false, false); - MatrixPtr prevGradYCheck = CpuMatrix::create(heightY, width, false, false); - prevGradXCheck->copyFrom(*prevGradXGpu); - prevGradYCheck->copyFrom(*prevGradYGpu); - MatrixCheckErr(*prevGradX, *prevGradXCheck); - MatrixCheckErr(*prevGradY, *prevGradYCheck); + TensorCheckErr(*prevGradX, *prevGradXGpu); + TensorCheckErr(*prevGradY, *prevGradYGpu); } TEST(Matrix, cosSimDerivate) { @@ -1857,80 +883,6 @@ TEST(Matrix, cosSimDerivate) { } } -void testParamReluForward(int height, int width, int w_height, int w_width) { - MatrixPtr output = CpuMatrix::create(height, width, false, false); - MatrixPtr input = CpuMatrix::create(height, width, false, false); - MatrixPtr w = CpuMatrix::create(w_height, w_width, false, false); - - output->randomizeUniform(); - input->randomizeUniform(); - w->randomizeUniform(); - input->add(-0.5); - - MatrixPtr outputGpu = GpuMatrix::create(height, width, false, true); - MatrixPtr inputGpu = GpuMatrix::create(height, width, false, true); - MatrixPtr wGpu = GpuMatrix::create(w_height, w_width, false, true); - - inputGpu->copyFrom(*input); - wGpu->copyFrom(*w); - - output->paramReluForward(*input, *w); - outputGpu->paramReluForward(*inputGpu, *wGpu); - - MatrixPtr outputCheck = CpuMatrix::create(height, width, false, false); - outputCheck->copyFrom(*outputGpu); - MatrixCheckEqual(*output, *outputCheck); -} - -TEST(Matrix, paramReluForward) { - for (auto height : {10, 100}) { - for (auto width : {10, 100}) { - for (auto w_height : {1, 2}) { - for (auto w_width : {1, 2}) { - testParamReluForward(height, width, w_height, w_width); - } - } - } - } -} - -void testParamReluBackwardW(int height, int width, int w_height, int w_width) { - MatrixPtr oGrad = CpuMatrix::create(height, width, false, false); - MatrixPtr input = CpuMatrix::create(height, width, false, false); - MatrixPtr w = CpuMatrix::create(w_height, w_width, false, false); - - oGrad->randomizeUniform(); - input->randomizeUniform(); - w->randomizeUniform(); - input->add(-0.5); - - MatrixPtr oGradGpu = GpuMatrix::create(height, width, false, true); - MatrixPtr inputGpu = GpuMatrix::create(height, width, false, true); - MatrixPtr wGpu = GpuMatrix::create(w_height, w_width, false, true); - - oGradGpu->copyFrom(*oGrad); - inputGpu->copyFrom(*input); - wGpu->copyFrom(*w); - - w->paramReluBackwardW(*oGrad, *input); - wGpu->paramReluBackwardW(*oGradGpu, *inputGpu); - MatrixPtr wCheck = CpuMatrix::create(w_height, w_width, false, false); - wCheck->copyFrom(*wGpu); - MatrixCheckErr(*w, *wCheck); -} - -TEST(Matrix, paramReluBackwardW) { - for (auto height : {10, 100}) { - for (auto width : {10, 100}) { - for (auto w_height : {1, 2}) { - for (auto w_width : {1, 2}) { - testParamReluBackwardW(height, width, w_height, w_width); - } - } - } - } -} - void testParamReluBackwardDiff(int height, int width, int w_height, @@ -1959,9 +911,7 @@ void testParamReluBackwardDiff(int height, diff->paramReluBackwardDiff(*oGrad, *input, *w); diffGpu->paramReluBackwardDiff(*oGradGpu, *inputGpu, *wGpu); - MatrixPtr diffCheck = CpuMatrix::create(height, width, false, false); - diffCheck->copyFrom(*diffGpu); - MatrixCheckErr(*diff, *diffCheck); + TensorCheckErr(*diff, *diffGpu); } TEST(Matrix, paramReluBackwardDiff) { @@ -1992,9 +942,7 @@ void testClassificationError(int numSamples, int dim) { cpuError->classificationError(cpuOutput, cpuLabel); gpuError->classificationError(gpuOutput, gpuLabel); - MatrixPtr check = std::make_shared(numSamples, 1); - check->copyFrom(*gpuError); - MatrixCheckEqual(*cpuError, *check); + TensorCheckEqual(*cpuError, *gpuError); } TEST(Matrix, classificationError) { @@ -2159,9 +1107,8 @@ void testAvgPoolFwdBwd(int numSamples, outW, padH, padW); - MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); - targetCheck->copyFrom(*targetGpu); - MatrixCheckErr(*target, *targetCheck); + + TensorCheckErr(*target, *targetGpu); MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); @@ -2200,10 +1147,8 @@ void testAvgPoolFwdBwd(int numSamples, 1.0, padH, padW); - MatrixPtr targetBwdCheck = - CpuMatrix::create(numSamples, inWidth, false, false); - targetBwdCheck->copyFrom(*inputGpuGrad); - MatrixCheckErr(*inputGrad, *targetBwdCheck); + + TensorCheckErr(*inputGrad, *inputGpuGrad); } TEST(Matrix, PoolFwdBwd) { @@ -2268,11 +1213,9 @@ void testMaxOutFwdBwd( MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); - MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); IVectorPtr id = CpuIVector::create(numSamples * outWidth, false); IVectorPtr idGpu = GpuIVector::create(numSamples * outWidth, true); - IVectorPtr idCheck = CpuIVector::create(numSamples * outWidth, false); input->randomizeUniform(); inputGpu->copyFrom(*input); @@ -2280,11 +1223,8 @@ void testMaxOutFwdBwd( target->maxoutForward(*input, *id, outChannels, groups); targetGpu->maxoutForward(*inputGpu, *idGpu, outChannels, groups); - // check - targetCheck->copyFrom(*targetGpu); - MatrixCheckErr(*target, *targetCheck); - idCheck->copyFrom(*idGpu); - VectorCheckEqual(*id, *idCheck); + TensorCheckErr(*target, *targetGpu); + TensorCheckEqual(*id, *idGpu); // backward MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); @@ -2293,8 +1233,6 @@ void testMaxOutFwdBwd( MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth, false, true); - MatrixPtr targetCheckGrad = - CpuMatrix::create(numSamples, inWidth, false, false); inputGrad->randomizeUniform(); targetGrad->randomizeUniform(); @@ -2304,9 +1242,7 @@ void testMaxOutFwdBwd( inputGrad->maxoutBackward(*targetGrad, *id, outChannels, groups); inputGpuGrad->maxoutBackward(*targetGpuGrad, *idGpu, outChannels, groups); - // check - targetCheckGrad->copyFrom(*inputGpuGrad); - MatrixCheckErr(*inputGrad, *targetCheckGrad); + TensorCheckErr(*inputGrad, *inputGpuGrad); } TEST(Matrix, MaxOutFwdBwd) { @@ -2326,113 +1262,6 @@ TEST(Matrix, MaxOutFwdBwd) { } } -void testAddSharedBias(int numSamples, int dim, int channel) { - MatrixPtr cpuData = std::make_shared(numSamples, dim); - MatrixPtr gpuData = std::make_shared(numSamples, dim); - - MatrixPtr cpuBias = std::make_shared(1, channel); - MatrixPtr gpuBias = std::make_shared(1, channel); - - cpuData->randomizeUniform(); - gpuData->copyFrom(*cpuData); - cpuBias->randomizeUniform(); - gpuBias->copyFrom(*cpuBias); - - cpuData->addSharedBias(*cpuBias, 1.0); - gpuData->addSharedBias(*gpuBias, 1.0); - - MatrixPtr check = std::make_shared(numSamples, dim); - check->copyFrom(*gpuData); - MatrixCheckErr(*cpuData, *check); -} - -void testCollectSharedBias(int numSamples, int dim, int channel) { - MatrixPtr cpuData = std::make_shared(numSamples, dim); - MatrixPtr gpuData = std::make_shared(numSamples, dim); - - MatrixPtr cpuBias = std::make_shared(1, channel); - MatrixPtr gpuBias = std::make_shared(1, channel); - - cpuData->randomizeUniform(); - gpuData->copyFrom(*cpuData); - cpuBias->randomizeUniform(); - gpuBias->copyFrom(*cpuBias); - - cpuBias->collectSharedBias(*cpuData, 1.0); - gpuBias->collectSharedBias(*gpuData, 1.0); - - MatrixPtr check = std::make_shared(1, channel); - check->copyFrom(*gpuBias); - MatrixCheckErr(*cpuBias, *check); -} - -TEST(Matrix, sharedBias) { - for (auto numSamples : {1, 100, 520}) { - for (auto dim : {100 * 16, 100 * 32}) { - for (auto channel : {8, 16}) { - VLOG(3) << " numSamples=" << numSamples << " dim=" << dim - << " channel=" << channel; - testAddSharedBias(numSamples, dim, channel); - testCollectSharedBias(numSamples, dim, channel); - } - } - } -} - -void testMultiBinaryLabelCrossEntropy(int numSamples, int dim) { - MatrixPtr output = std::make_shared(numSamples, dim); - MatrixPtr cpuOutput = std::make_shared(numSamples, dim); - MatrixPtr gpuOutput = std::make_shared(numSamples, dim); - - MatrixPtr cpuEntropy = std::make_shared(numSamples, 1); - MatrixPtr gpuEntropy = std::make_shared(numSamples, 1); - - MatrixPtr cpuGrad = std::make_shared(numSamples, dim); - MatrixPtr gpuGrad = std::make_shared(numSamples, dim); - - MatrixPtr cpuLabel = std::make_shared( - numSamples, dim, numSamples, NO_VALUE, SPARSE_CSR, false); - MatrixPtr gpuLabel = std::make_shared( - numSamples, dim, numSamples, NO_VALUE, SPARSE_CSR, false); - for (int i = 0; i < numSamples; i++) { - const unsigned int id = rand() % dim; // NOLINT - cpuLabel->setRow(i, 1, &id, nullptr); - gpuLabel->setRow(i, 1, &id, nullptr); - } - - output->randomizeUniform(); - cpuOutput->zeroMem(); - output->softmax(*cpuOutput); - gpuOutput->copyFrom(*cpuOutput); - - cpuEntropy->zeroMem(); - gpuEntropy->zeroMem(); - cpuEntropy->multiBinaryLabelCrossEntropy(*cpuOutput, *cpuLabel); - gpuEntropy->multiBinaryLabelCrossEntropy(*gpuOutput, *gpuLabel); - - MatrixPtr check1 = std::make_shared(numSamples, 1); - check1->copyFrom(*gpuEntropy); - MatrixCheckErr(*cpuEntropy, *check1); - - cpuGrad->zeroMem(); - gpuGrad->zeroMem(); - cpuGrad->multiBinaryLabelCrossEntropyBp(*cpuOutput, *cpuLabel); - gpuGrad->multiBinaryLabelCrossEntropyBp(*gpuOutput, *gpuLabel); - - MatrixPtr check2 = std::make_shared(numSamples, dim); - check2->copyFrom(*gpuGrad); - MatrixCheckErr(*cpuGrad, *check2); -} - -TEST(Matrix, multiBinaryCrossEntropy) { - for (auto numSamples : {100, 1000, 10000}) { - for (auto dim : {100, 1000, 10000}) { - VLOG(3) << " numSamples=" << numSamples << " dim=" << dim; - testMultiBinaryLabelCrossEntropy(numSamples, dim); - } - } -} - int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv);