diff --git a/paddle/math/tests/TensorCheck.h b/paddle/math/tests/TensorCheck.h index beee9e7c0fbcad0784c21f463ad6d74f41dd4165..796f2fce6428edc55e745e5977df022973237a38 100644 --- a/paddle/math/tests/TensorCheck.h +++ b/paddle/math/tests/TensorCheck.h @@ -13,11 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include #include "paddle/math/Matrix.h" -using namespace paddle; // NOLINT -using namespace std; // NOLINT +using paddle::Matrix; +using paddle::CpuMatrix; +using paddle::GpuMatrix; +using paddle::VectorT; +using paddle::CpuVectorT; +using paddle::GpuVectorT; namespace autotest { @@ -71,6 +74,53 @@ private: CpuMatrix arg_; }; +template <> +class CopyToCpu { +public: + explicit CopyToCpu(const Matrix& arg) + : arg_(arg.getHeight(), arg.getWidth()) { + arg_.copyFrom(arg); + } + CpuMatrix& copiedArg() { return arg_; } + +private: + CpuMatrix arg_; +}; + +template +class CopyToCpu> { +public: + explicit CopyToCpu(const CpuVectorT& arg) : arg_(arg) {} + const CpuVectorT& copiedArg() const { return arg_; } + +private: + const CpuVectorT& arg_; +}; + +template +class CopyToCpu> { +public: + explicit CopyToCpu(const GpuVectorT& arg) : arg_(arg.getSize()) { + arg_.copyFrom(arg); + } + CpuVectorT& copiedArg() { return arg_; } + +private: + CpuVectorT arg_; +}; + +template +class CopyToCpu> { +public: + explicit CopyToCpu(const VectorT& arg) : arg_(arg.getSize()) { + arg_.copyFrom(arg); + } + CpuVectorT& copiedArg() { return arg_; } + +private: + CpuVectorT arg_; +}; + template void TensorCheck(AssertEq compare, const CpuMatrix& matrix1, @@ -95,10 +145,30 @@ void TensorCheck(AssertEq compare, EXPECT_EQ(count, 0) << "There are " << count << " different element."; } +template +void TensorCheck(AssertEq compare, + const CpuVectorT& vector1, + const CpuVectorT& vector2) { + CHECK(vector1.getSize() == vector2.getSize()); + + const T* data1 = vector1.getData(); + const T* data2 = vector2.getData(); + size_t size = vector1.getSize(); + int count = 0; + for (size_t i = 0; i < size; i++) { + real a = data1[i]; + real b = data2[i]; + if (!compare(a, b)) { + count++; + } + } + EXPECT_EQ(count, 0) << "There are " << count << " different element."; +} + template -extern void TensorCheck(AssertEq compare, - const Tensor1& tensor1, - const Tensor2& tensor2) { +void TensorCheck(AssertEq compare, + const Tensor1& tensor1, + const Tensor2& tensor2) { TensorCheck(compare, CopyToCpu(tensor1).copiedArg(), CopyToCpu(tensor2).copiedArg()); @@ -116,4 +186,24 @@ void TensorCheck(AssertEq compare, size_t args1, size_t args2) { << ", args2 = " << args2; } +template +void TensorCheckEqual(const Tensor1& tensor1, const Tensor2& tensor2) { + AssertEqual compare(0); + TensorCheck(compare, + CopyToCpu(tensor1).copiedArg(), + CopyToCpu(tensor2).copiedArg()); +} + +template +void TensorCheckErr(const Tensor1& tensor1, const Tensor2& tensor2) { +#ifndef PADDLE_TYPE_DOUBLE + AssertEqual compare(1e-3); +#else + AssertEqual compare(1e-10); +#endif + TensorCheck(compare, + CopyToCpu(tensor1).copiedArg(), + CopyToCpu(tensor2).copiedArg()); +} + } // namespace autotest diff --git a/paddle/math/tests/TestUtils.h b/paddle/math/tests/TestUtils.h index 324ecf801783491a60d8c7ed8c5c80ee17e726e7..fe78f7bf09b1949f1491719483e5238d5409903b 100644 --- a/paddle/math/tests/TestUtils.h +++ b/paddle/math/tests/TestUtils.h @@ -14,21 +14,19 @@ limitations under the License. */ /** * TestUtils.h is used to automatically compare CPU and GPU code is consistent. - * - * Auto compare BaseMatrix member function: - * Use case: - * a. void BaseMatrix::tanh(BaseMatrixT& b); - * Compare method: BaseMatrixCompare<0>(&BaseMatrix::tanh); - * - * b. - * + * Refer test_Matrix.cpp and test_BaseMatrix.cpp for how to use autotest. */ #include #include "paddle/math/Matrix.h" +#include "paddle/math/SparseMatrix.h" #include "TensorCheck.h" -using namespace paddle; // NOLINT +using paddle::BaseMatrix; +using paddle::CpuIVector; +using paddle::GpuIVector; +using paddle::CpuSparseMatrix; +using paddle::GpuSparseMatrix; namespace autotest { @@ -196,9 +194,7 @@ template -void BaseMatrixCompare(R (C::*f)(Args...), - AssertEq compare, - bool checkArgs = false) { +void BaseMatrixCompare(R (C::*f)(Args...), AssertEq compare) { for (auto height : {1, 11, 73, 128, 200, 330}) { for (auto width : {1, 3, 32, 100, 512, 1000}) { CpuMatrix obj1(AsRowVector ? 1 : height, AsColVector ? 1 : width); @@ -227,17 +223,91 @@ void BaseMatrixCompare(R (C::*f)(Args...), call(obj2, f, std::get(tuple2)...); TensorCheck(compare, obj1, obj2); - if (checkArgs) { - checkTuple(tuple1, tuple2, compare); - } } } } +template +class ReturnType { +public: + typedef T type; +}; + +template <> +class ReturnType { +public: + typedef GpuMatrix type; +}; + +template <> +class ReturnType { +public: + typedef GpuIVector type; +}; + +template <> +class ReturnType { +public: + typedef GpuSparseMatrix type; +}; + +template +typename ReturnType::type autoArgs(T v) { + return v; +} + +template <> +GpuMatrix autoArgs(CpuMatrix v) { + GpuMatrix a(v.getHeight(), v.getWidth()); + a.copyFrom(v); + return a; +} + +template <> +GpuIVector autoArgs(CpuIVector v) { + GpuIVector a(v.getSize()); + a.copyFrom(v); + return a; +} + +template <> +GpuSparseMatrix autoArgs(CpuSparseMatrix v) { + GpuSparseMatrix a(v.getHeight(), + v.getWidth(), + v.getElementCnt(), + v.getValueType(), + v.getFormat()); + + a.copyFrom(v, HPPL_STREAM_DEFAULT); + hl_stream_synchronize(HPPL_STREAM_DEFAULT); + return a; +} + +class AutoCompare { +public: + AutoCompare(size_t height, size_t width) + : cpu(height, width), gpu(height, width) { + init(cpu); + copy(gpu, cpu); + } + + template + void operator()(R (C::*f)(FArgs...), Args&&... args) { + call(cpu, f, args...); + call(gpu, f, autoArgs(args)...); + + TensorCheckErr(cpu, gpu); + } + +protected: + CpuMatrix cpu; + GpuMatrix gpu; +}; + } // namespace autotest template -void BaseMatrixCompare(R (C::*f)(Args...), bool checkArgs = false) { +void BaseMatrixCompare(R (C::*f)(Args...)) { static_assert(sizeof...(I) == sizeof...(Args), "size of parameter packs are not equal"); @@ -247,7 +317,7 @@ void BaseMatrixCompare(R (C::*f)(Args...), bool checkArgs = false) { autotest::AssertEqual compare(1e-10); #endif - autotest::BaseMatrixCompare(f, compare, checkArgs); + autotest::BaseMatrixCompare(f, compare); } template diff --git a/paddle/math/tests/test_BaseMatrix.cpp b/paddle/math/tests/test_BaseMatrix.cpp index 1d334135a0cadf4438067cc29f8714e852202d62..c68080057c31cc099b3cad79198862c594deb64a 100644 --- a/paddle/math/tests/test_BaseMatrix.cpp +++ b/paddle/math/tests/test_BaseMatrix.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #ifndef PADDLE_ONLY_CPU /** * This test file compares the implementation of CPU and GPU function - * in BaseMatrix.cpp. + * in BaseMatrix.cpp or Matrix.cpp. */ #include @@ -188,17 +188,22 @@ TEST(BaseMatrix, Other) { BaseMatrixCompare<0, 1, 2>(&BaseMatrix::rowScale); BaseMatrixCompare<0, 1, 2>(&BaseMatrix::rowDotMul); BaseMatrixCompare<0, 1, 2, 3>(&BaseMatrix::binaryClassificationError); + + BaseMatrixCompare<0, 1>(&Matrix::sumOfSquaresBp); } TEST(BaseMatrix, Aggregate) { BaseMatrixAsColVector<0>(&BaseMatrix::maxRows); BaseMatrixAsColVector<0>(&BaseMatrix::minRows); BaseMatrixAsColVector<0, 1, 2>(&BaseMatrix::sumRows); + BaseMatrixAsColVector<0, 1>(&Matrix::sumOfSquares); BaseMatrixAsRowVector<0>(&BaseMatrix::maxCols); BaseMatrixAsRowVector<0>(&BaseMatrix::minCols); BaseMatrixAsRowVector<0, 1>(&BaseMatrix::addDotMulVMM); BaseMatrixAsRowVector<0, 1, 2>(&BaseMatrix::sumCols); + BaseMatrixAsRowVector<0, 1>( + static_cast(&Matrix::collectBias)); } int main(int argc, char** argv) { diff --git a/paddle/math/tests/test_Matrix.cpp b/paddle/math/tests/test_Matrix.cpp index 485e702a66f05a472919297b01500c94ef9f36b5..b766e5ebe27f087108b65235999de78aa4539d5a 100644 --- a/paddle/math/tests/test_Matrix.cpp +++ b/paddle/math/tests/test_Matrix.cpp @@ -14,25 +14,295 @@ limitations under the License. */ #ifndef PADDLE_ONLY_CPU /** - * This test file compares the implementation of CPU and GPU function - * in Matrix.cpp. + * This test file use AutoCompare to compares the implementation + * of CPU and GPU member function in Matrix.cpp. + * + * 1. Constructs an AutoCompare object, a AutoCompare object contains + * a CpuMatrix and a GpuMatrix; + * 2. Initializes the required parameters for the member function. + * Only need to initialize the CPU parameters. + * 3. Use the operator() template for testing. In the operator() will call back + * member functions, and compare the results. + * + * use case: + * AutoCompare test(...); + * Init Argument arg1,arg2... + * test(function, arg1, arg2....) + * */ #include #include "TestUtils.h" -using namespace paddle; // NOLINT +using paddle::CpuMatrix; +using paddle::SparseValueType; +using paddle::SparseFormat; +using paddle::NO_VALUE; +using paddle::SPARSE_CSR; +using paddle::initMain; +using autotest::TensorCheckEqual; +using autotest::TensorCheckErr; +using autotest::AutoCompare; -TEST(Matrix, Matrix) { - BaseMatrixCompare<0>(&Matrix::softmax, true); - BaseMatrixCompare<0, 1>(&Matrix::sumOfSquaresBp); +void testBilinearFwdBwd(int numSamples, + int imgSizeH, + int imgSizeW, + int channels) { + int inWidth = imgSizeH * imgSizeW * channels; + int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels; + real ratioH = 0.5; + real ratioW = 0.5; + + AutoCompare forward(numSamples, outWidth); + CpuMatrix arg1(numSamples, inWidth); + arg1.randomizeUniform(); + forward(&Matrix::bilinearForward, + arg1, + imgSizeH, + imgSizeW, + 2 * imgSizeH, + 2 * imgSizeW, + channels, + ratioH, + ratioW); + + AutoCompare backward(numSamples, inWidth); + CpuMatrix arg2(numSamples, outWidth); + arg2.randomizeUniform(); + backward(&Matrix::bilinearBackward, + arg2, + 2 * imgSizeH, + 2 * imgSizeW, + imgSizeH, + imgSizeW, + channels, + ratioH, + ratioW); +} + +TEST(Matrix, BilinearFwdBwd) { + for (auto numSamples : {5, 10}) { + for (auto channels : {8, 16}) { + for (auto imgSizeH : {14, 28}) { + for (auto imgSizeW : {16, 30}) { + VLOG(3) << " numSamples=" << numSamples << " channels=" << channels + << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; + testBilinearFwdBwd(numSamples, imgSizeH, imgSizeW, channels); + } + } + } + } +} + +void testMatrixAddBias(int height, int width, real scale) { + AutoCompare test(height, width); + CpuMatrix arg1(1, width); + arg1.randomizeUniform(); + test(static_cast(&Matrix::addBias), + arg1, + scale); +} + +void testMatrixAddDotMulMMV(int height, int width) { + AutoCompare test(height, width); + CpuMatrix arg1(height, width); + CpuMatrix arg2(1, width); + arg1.randomizeUniform(); + arg2.randomizeUniform(); + test(&BaseMatrix::addDotMulMMV, arg1, arg2); +} + +TEST(Matrix, unary) { + for (auto height : {1, 3, 11, 73, 128, 200, 330}) { + for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { + VLOG(3) << " height=" << height << " width=" << width; + testMatrixAddBias(height, width, 1.0); + testMatrixAddBias(height, width, 3.5); + testMatrixAddDotMulMMV(height, width); + } + } +} + +void testMatrixAddAtOffset(int height, int width1, int width2, int offset) { + AutoCompare test(height, width2); + CpuMatrix arg1(height, width1); + arg1.randomizeUniform(); + test(&Matrix::addAtOffset, arg1, offset); +} + +void testMatrixAssignAtOffset(int height, int width1, int width2, int offset) { + AutoCompare test(height, width2); + CpuMatrix arg1(height, width1); + arg1.randomizeUniform(); + test(&Matrix::assignAtOffset, arg1, offset); +} + +TEST(Matrix, AtOffset) { + for (auto height : {1, 11, 73, 128, 200}) { + for (auto width1 : {1, 32, 100, 512, 1000}) { + for (auto width2 : {1, 32, 100, 512, 1000}) { + int columnOffset = 0; + int offset = std::abs(width1 - width2); + if (offset) { + columnOffset = std::rand() % offset; + } + VLOG(3) << " height=" << height << " width1=" << width1 + << " width2=" << width2 << " columnOffset = " << columnOffset; + testMatrixAddAtOffset(height, width1, width2, columnOffset); + testMatrixAssignAtOffset(height, width1, width2, columnOffset); + } + } + } +} + +void testMatrixSelectRows(int numSamples, int tableSize, int inputDim) { + AutoCompare test(numSamples, inputDim); + CpuMatrix arg1(tableSize, inputDim); + CpuIVector arg2(numSamples); + arg1.randomizeUniform(); + arg2.rand(tableSize); + test(&Matrix::selectRows, arg1, arg2); +} + +TEST(Matrix, tableProjection) { + for (auto numSamples : {10, 100, 1000, 10000, 80000}) { + for (auto tableSize : {10, 100}) { + for (auto inputDim : {20, 50}) { + VLOG(3) << " numSamples=" << numSamples << " tableSize=" << tableSize + << " inputDim=" << inputDim; + testMatrixSelectRows(numSamples, tableSize, inputDim); + } + } + } +} + +void testMatrixCopyByRowIndex(int outHeight, int inHeight, int width) { + AutoCompare test(outHeight, width); + CpuMatrix arg1(inHeight, width); + CpuIVector arg2(outHeight); + arg1.randomizeUniform(); + arg2.rand(inHeight); + test(&Matrix::copyByRowIndex, arg1, arg2); } -TEST(Matrix, Aggregate) { - BaseMatrixAsRowVector<0, 1>( - static_cast(&Matrix::collectBias)); +TEST(Matrix, copyByRowIndex) { + for (auto outHeight : {31, 500, 1000}) { + for (auto inHeight : {17, 257, 500, 1200}) { + for (auto width : {512, 1024}) { + VLOG(3) << outHeight << " " << inHeight << " " << width; + testMatrixCopyByRowIndex(outHeight, inHeight, width); + } + } + } +} + +void testCosSim(int heightX, int heightY, int width, real scale) { + AutoCompare test(heightX, 1); + CpuMatrix arg1(heightX, width); + CpuMatrix arg2(heightY, width); + arg1.randomizeUniform(); + arg2.randomizeUniform(); + arg2.add(-0.5); + test(&Matrix::cosSim, arg1, arg2, scale); +} + +TEST(Matrix, cosSim) { + for (auto heightX : {10, 100, 1000}) { + for (auto heightY : {1, heightX}) { + for (auto width : {10, 100, 1000}) { + for (auto scale : {1.0, 2.0}) { + testCosSim(heightX, heightY, width, scale); + } + } + } + } +} + +void testParamReluForward(int height, int width, int w_height, int w_width) { + AutoCompare test(height, width); + CpuMatrix arg1(height, width); + CpuMatrix arg2(w_height, w_width); + arg1.randomizeUniform(); + arg2.randomizeUniform(); + arg1.add(-0.5); + test(&Matrix::paramReluForward, arg1, arg2); +} + +void testParamReluBackwardW(int height, int width, int w_height, int w_width) { + AutoCompare test(w_height, w_width); + CpuMatrix arg1(height, width); + CpuMatrix arg2(height, width); + arg1.randomizeUniform(); + arg2.randomizeUniform(); + arg2.add(-0.5); + test(&Matrix::paramReluBackwardW, arg1, arg2); +} + +TEST(Matrix, paramRelu) { + for (auto height : {10, 100}) { + for (auto width : {10, 100}) { + for (auto w_height : {1, 2}) { + for (auto w_width : {1, 2}) { + testParamReluForward(height, width, w_height, w_width); + testParamReluBackwardW(height, width, w_height, w_width); + } + } + } + } +} + +void testAddSharedBias(int numSamples, int dim, int channel) { + AutoCompare test(numSamples, dim); + CpuMatrix arg1(1, channel); + arg1.randomizeUniform(); + test(&Matrix::addSharedBias, arg1, 1.0); +} + +void testCollectSharedBias(int numSamples, int dim, int channel) { + AutoCompare test(1, channel); + CpuMatrix arg1(numSamples, dim); + arg1.randomizeUniform(); + test(&Matrix::collectSharedBias, arg1, 1.0); +} + +TEST(Matrix, sharedBias) { + for (auto numSamples : {1, 100, 520}) { + for (auto dim : {100 * 16, 100 * 32}) { + for (auto channel : {8, 16}) { + VLOG(3) << " numSamples=" << numSamples << " dim=" << dim + << " channel=" << channel; + testAddSharedBias(numSamples, dim, channel); + testCollectSharedBias(numSamples, dim, channel); + } + } + } +} + +void testMultiBinaryLabelCrossEntropy(int numSamples, int dim) { + AutoCompare forward(numSamples, 1); + CpuMatrix arg1(numSamples, dim); + CpuSparseMatrix arg2(numSamples, dim, numSamples, NO_VALUE, SPARSE_CSR); + + CpuMatrix output1(numSamples, dim); + output1.randomizeUniform(); + output1.softmax(arg1); + for (int i = 0; i < numSamples; i++) { + const unsigned int id = std::rand() % dim; + arg2.setRow(i, 1, &id, nullptr); + } + forward(&Matrix::multiBinaryLabelCrossEntropy, arg1, arg2); + + AutoCompare backward(numSamples, dim); + backward(&Matrix::multiBinaryLabelCrossEntropyBp, arg1, arg2); +} - BaseMatrixAsColVector<0, 1>(&Matrix::sumOfSquares); +TEST(Matrix, multiBinaryCrossEntropy) { + for (auto numSamples : {100, 1000, 10000}) { + for (auto dim : {100, 1000, 10000}) { + VLOG(3) << " numSamples=" << numSamples << " dim=" << dim; + testMultiBinaryLabelCrossEntropy(numSamples, dim); + } + } } int main(int argc, char** argv) { diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 86a4a0e5ec5a046c7339644280d7f67400485569..4895583d32675214e5f14583a2c2b7a1ca075558 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -22,163 +22,12 @@ limitations under the License. */ #include #include "paddle/gserver/tests/TestUtil.h" #include "paddle/utils/Stat.h" +#include "TensorCheck.h" using namespace paddle; // NOLINT using namespace std; // NOLINT - -template -void VectorCheckEqual(const VectorT& vector1, const VectorT& vector2) { - CHECK(vector1.getSize() == vector2.getSize()); - - const T* data1 = vector1.getData(); - const T* data2 = vector2.getData(); - size_t size = vector1.getSize(); - int count = 0; - for (size_t i = 0; i < size; i++) { - if (data1[i] != data2[i]) { - count++; - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -void MatrixCheckEqual(const Matrix& matrix1, const Matrix& matrix2) { - CHECK(matrix1.getHeight() == matrix2.getHeight()); - CHECK(matrix1.getWidth() == matrix2.getWidth()); - - int height = matrix1.getHeight(); - int width = matrix1.getWidth(); - const real* data1 = matrix1.getData(); - const real* data2 = matrix2.getData(); - int count = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (data1[i * width + j] != data2[i * width + j]) { - count++; - } - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -void MatrixCheckErr(const Matrix& matrix1, const Matrix& matrix2) { - CHECK(matrix1.getHeight() == matrix2.getHeight()); - CHECK(matrix1.getWidth() == matrix2.getWidth()); -#ifndef PADDLE_TYPE_DOUBLE - real err = 1e-3; -#else - real err = 1e-10; -#endif - - int height = matrix1.getHeight(); - int width = matrix1.getWidth(); - const real* data1 = matrix1.getData(); - const real* data2 = matrix2.getData(); - int count = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - real a = data1[i * width + j]; - real b = data2[i * width + j]; - if (fabs(a - b) > err) { - if ((fabsf(a - b) / fabsf(a)) > (err / 10.0f)) { - count++; - } - } - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -void testBilinearFwdBwd(int numSamples, - int imgSizeH, - int imgSizeW, - int channels) { - int inWidth = imgSizeH * imgSizeW * channels; - int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels; - real ratioH = 0.5; - real ratioW = 0.5; - // forward - MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); - - MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); - MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); - - input->randomizeUniform(); - inputGpu->copyFrom(*input); - - target->bilinearForward(*input, - imgSizeH, - imgSizeW, - 2 * imgSizeH, - 2 * imgSizeW, - channels, - ratioH, - ratioW); - targetGpu->bilinearForward(*inputGpu, - imgSizeH, - imgSizeW, - 2 * imgSizeH, - 2 * imgSizeW, - channels, - ratioH, - ratioW); - - // check - targetCheck->copyFrom(*targetGpu); - MatrixCheckErr(*target, *targetCheck); - - // backward - MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); - - MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpuGrad = - GpuMatrix::create(numSamples, outWidth, false, true); - MatrixPtr targetCheckGrad = - CpuMatrix::create(numSamples, inWidth, false, false); - - inputGrad->randomizeUniform(); - targetGrad->randomizeUniform(); - inputGpuGrad->copyFrom(*inputGrad); - targetGpuGrad->copyFrom(*targetGrad); - - inputGrad->bilinearBackward(*targetGrad, - 2 * imgSizeH, - 2 * imgSizeW, - imgSizeH, - imgSizeW, - channels, - ratioH, - ratioW); - inputGpuGrad->bilinearBackward(*targetGpuGrad, - 2 * imgSizeH, - 2 * imgSizeW, - imgSizeH, - imgSizeW, - channels, - ratioH, - ratioW); - - // check - targetCheckGrad->copyFrom(*inputGpuGrad); - MatrixCheckErr(*inputGrad, *targetCheckGrad); -} - -TEST(Matrix, BilinearFwdBwd) { - for (auto numSamples : {5, 10}) { - for (auto channels : {8, 16}) { - for (auto imgSizeH : {14, 28}) { - for (auto imgSizeW : {16, 30}) { - VLOG(3) << " numSamples=" << numSamples << " channels=" << channels - << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; - testBilinearFwdBwd(numSamples, imgSizeH, imgSizeW, channels); - } - } - } - } -} +using autotest::TensorCheckEqual; +using autotest::TensorCheckErr; void testMatrixProjectionForward(int contextStart, int contextLength, @@ -232,12 +81,7 @@ void testMatrixProjectionForward(int contextStart, beginPad, padding); - // check - MatrixPtr outputCheck = - std::make_shared(batchSize, inputDim * contextLength); - outputCheck->copyFrom(*gpuOutput); - - MatrixCheckEqual(*cpuOutput, *outputCheck); + TensorCheckEqual(*cpuOutput, *gpuOutput); } void testMatrixProjectionBackward(int contextStart, @@ -294,15 +138,9 @@ void testMatrixProjectionBackward(int contextStart, beginPad); } - // check - MatrixPtr inputGradCheck = std::make_shared(batchSize, inputDim); - inputGradCheck->copyFrom(*gpuInputGrad); - MatrixCheckErr(*cpuInputGrad, *inputGradCheck); - + TensorCheckErr(*cpuInputGrad, *gpuInputGrad); if (padding) { - MatrixPtr weightGradChcek = std::make_shared(pad, inputDim); - weightGradChcek->copyFrom(*gpuWeightGrad); - MatrixCheckErr(*cpuWeightGrad, *weightGradChcek); + TensorCheckErr(*cpuWeightGrad, *gpuWeightGrad); } } @@ -361,15 +199,8 @@ void testMatrixMaxSequence(int batchSize, int inputDim) { cpuOutput->maxSequenceForward(*cpuInput, *cpuSequence, *cpuIndex); gpuOutput->maxSequenceForward(*gpuInput, *gpuSequence, *gpuIndex); - // check - MatrixPtr outputCheck = std::make_shared(newBatchSize, inputDim); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckEqual(*cpuOutput, *outputCheck); - - IVectorPtr indexCheck = nullptr; - IVector::resizeOrCreate(indexCheck, newBatchSize * inputDim, false); - indexCheck->copyFrom(*gpuIndex); - VectorCheckEqual(*cpuIndex, *indexCheck); + TensorCheckEqual(*cpuOutput, *gpuOutput); + TensorCheckEqual(*cpuIndex, *gpuIndex); // backward MatrixPtr cpuOutputGrad = std::make_shared(newBatchSize, inputDim); @@ -385,10 +216,7 @@ void testMatrixMaxSequence(int batchSize, int inputDim) { cpuInputGrad->maxSequenceBackward(*cpuOutputGrad, *cpuSequence, *cpuIndex); gpuInputGrad->maxSequenceBackward(*gpuOutputGrad, *gpuSequence, *gpuIndex); - // check - MatrixPtr inputGradCheck = std::make_shared(batchSize, inputDim); - inputGradCheck->copyFrom(*gpuInputGrad); - MatrixCheckEqual(*cpuInputGrad, *inputGradCheck); + TensorCheckEqual(*cpuInputGrad, *gpuInputGrad); } TEST(Matrix, maxSequence) { @@ -431,6 +259,8 @@ void testMatrixZeroAtOffset(int height, int width) { int columnOffset = rand() % width; // NOLINT we just use rand() for test. int numColumns = rand() % (width - columnOffset); // NOLINT + if (numColumns == 0) return; + cpuA->zeroAtOffset(columnOffset, numColumns); gpuA->zeroAtOffset(columnOffset, numColumns); @@ -442,61 +272,8 @@ void testMatrixZeroAtOffset(int height, int width) { } } - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckEqual(*cpuA, *outputCheck); - MatrixCheckEqual(*cpuA, *cpuTest); -} - -void testMatrixAddBias(int height, int width, real scale) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(1, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(1, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - - cpuA->addBias(*cpuB, scale); - gpuA->addBias(*gpuB, scale); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); -} - -void testMatrixAddDotMulMMV(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr cpuC = std::make_shared(1, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr gpuB = std::make_shared(height, width); - MatrixPtr gpuC = std::make_shared(1, width); - - MatrixPtr cpuA1 = std::make_shared(height, width); - MatrixPtr cpuB1 = std::make_shared(height, width); - MatrixPtr cpuC1 = std::make_shared(1, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - gpuC->copyFrom(*cpuC); - cpuA1->copyFrom(*cpuA); - cpuB1->copyFrom(*cpuB); - cpuC1->copyFrom(*cpuC); - - cpuA->addDotMulMMV(*cpuB, *cpuC); - gpuA->addDotMulMMV(*gpuB, *gpuC); - cpuA1->addDotMulMMV2(*cpuB1, *cpuC1); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuA); - MatrixCheckErr(*cpuA, *outputCheck); - MatrixCheckEqual(*cpuA, *cpuA1); + TensorCheckEqual(*cpuA, *gpuA); + TensorCheckEqual(*cpuA, *cpuTest); } void testMatrixTranspose(int height, int width) { @@ -510,9 +287,7 @@ void testMatrixTranspose(int height, int width) { cpu->transpose(cpuT, false); gpu->transpose(gpuT, false); - MatrixPtr outputCheck = std::make_shared(width, height); - outputCheck->copyFrom(*gpuT); - MatrixCheckEqual(*cpuT, *outputCheck); + TensorCheckEqual(*cpuT, *gpuT); } void testMatrixInverse(int height) { @@ -533,12 +308,11 @@ void testMatrixInverse(int height) { cpu->inverse(cpuI, false); gpu->inverse(gpuI, false); - outputCheck->copyFrom(*gpuI); - MatrixCheckErr(*cpuI, *outputCheck); + TensorCheckErr(*cpuI, *gpuI); outputCheck->mul(cpu, cpuI); cpu->setDiag(1.0); - MatrixCheckErr(*cpu, *outputCheck); + TensorCheckErr(*cpu, *outputCheck); } TEST(Matrix, unary) { @@ -546,15 +320,8 @@ TEST(Matrix, unary) { for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { VLOG(3) << " height=" << height << " width=" << width; - // asRowVector - testMatrixAddBias(height, width, 1.0); - testMatrixAddBias(height, width, 3.5); - testMatrixAddDotMulMMV(height, width); - - // sum + testMatrixZeroAtOffset(height, width); testMatrixGetSum(height, width); - - // transpose testMatrixTranspose(height, width); } // inverse @@ -562,6 +329,22 @@ TEST(Matrix, unary) { } } +void testMatrixSoftmax(int height, int width) { + MatrixPtr cpuInput = std::make_shared(height, width); + MatrixPtr cpuOutput = std::make_shared(height, width); + MatrixPtr gpuInput = std::make_shared(height, width); + MatrixPtr gpuOutput = std::make_shared(height, width); + + cpuInput->randomizeUniform(); + gpuInput->copyFrom(*cpuInput); + cpuOutput->zero(); + gpuOutput->zero(); + cpuInput->softmax(*cpuOutput); + gpuInput->softmax(*gpuOutput); + + TensorCheckErr(*cpuOutput, *gpuOutput); +} + void testSequenceSoftmax(int batchSize) { // forward int inputDim = 1; @@ -578,10 +361,7 @@ void testSequenceSoftmax(int batchSize) { cpuInput->sequenceSoftmax(*cpuInput, *cpuSequence); gpuInput->sequenceSoftmax(*gpuInput, *gpuSequence); - // check - MatrixPtr outputCheck = std::make_shared(batchSize, inputDim); - outputCheck->copyFrom(*gpuInput); - MatrixCheckErr(*cpuInput, *outputCheck); + TensorCheckErr(*cpuInput, *gpuInput); } void testMatrixSoftmaxThreshold(int height, int width) { @@ -634,9 +414,7 @@ void testMatrixSoftmaxBp(int height, int width) { sftMaxSum->colMerge(*sftMaxDot); cpuOutput->softmaxDerivative(*cpuInput, *sftMaxSum); - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckErr(*cpuOutput, *outputCheck); + TensorCheckErr(*cpuOutput, *gpuOutput); } TEST(Matrix, softmax) { @@ -644,6 +422,7 @@ TEST(Matrix, softmax) { for (auto width : {1, 32, 100, 512, 1000}) { VLOG(3) << " height=" << height << " width=" << width; + testMatrixSoftmax(height, width); testMatrixSoftmaxBp(height, width); testMatrixSoftmaxThreshold(height, width); } @@ -651,95 +430,6 @@ TEST(Matrix, softmax) { } } -void testMatrixAddAtOffset(int height, int width1, int width2) { - MatrixPtr cpuInput = std::make_shared(height, width1); - MatrixPtr cpuOutput = std::make_shared(height, width2); - MatrixPtr gpuInput = std::make_shared(height, width1); - MatrixPtr gpuOutput = std::make_shared(height, width2); - - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - - int columnOffset = 0; - int offset = std::abs(width1 - width2); - if (offset) { - columnOffset = rand() % offset; // NOLINT - } - cpuOutput->addAtOffset(*cpuInput, columnOffset); - gpuOutput->addAtOffset(*gpuInput, columnOffset); - - MatrixPtr outputCheck = std::make_shared(height, width2); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckEqual(*cpuOutput, *outputCheck); -} - -void testMatrixAssignAtOffset(int height, int width1, int width2) { - MatrixPtr cpuInput = std::make_shared(height, width1); - MatrixPtr cpuOutput = std::make_shared(height, width2); - MatrixPtr gpuInput = std::make_shared(height, width1); - MatrixPtr gpuOutput = std::make_shared(height, width2); - - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - - int columnOffset = 0; - int offset = std::abs(width1 - width2); - if (offset) { - columnOffset = rand() % offset; // NOLINT - } - cpuOutput->assignAtOffset(*cpuInput, columnOffset); - gpuOutput->assignAtOffset(*gpuInput, columnOffset); - - MatrixPtr outputCheck = std::make_shared(height, width2); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckEqual(*cpuOutput, *outputCheck); -} - -TEST(Matrix, AtOffset) { - for (auto height : {1, 11, 73, 128, 200}) { - for (auto width1 : {1, 32, 100, 512, 1000}) { - for (auto width2 : {1, 32, 100, 512, 1000}) { - VLOG(3) << " height=" << height << " width1=" << width1 - << " width2=" << width2; - - testMatrixAddAtOffset(height, width1, width2); - testMatrixAssignAtOffset(height, width1, width2); - } - } - } -} - -void testMatrixSelectRows(int numSamples, int tableSize, int inputDim) { - MatrixPtr cpuTable = std::make_shared(tableSize, inputDim); - MatrixPtr gpuTable = std::make_shared(tableSize, inputDim); - cpuTable->randomizeUniform(); - gpuTable->copyFrom(*cpuTable); - - IVectorPtr cpuIds; - IVectorPtr gpuIds; - cpuIds = VectorT::create(numSamples, false); - gpuIds = VectorT::create(numSamples, true); - cpuIds->rand(tableSize); - gpuIds->copyFrom(*cpuIds); - - MatrixPtr cpuOutput = std::make_shared(numSamples, inputDim); - MatrixPtr gpuOutput = std::make_shared(numSamples, inputDim); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - - cpuOutput->selectRows(*cpuTable, *cpuIds); - gpuOutput->selectRows(*gpuTable, *gpuIds); - - // check - MatrixPtr outputCheck = std::make_shared(numSamples, inputDim); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckEqual(*cpuOutput, *outputCheck); -} - void testMatrixAddToRows(int numSamples, int tableSize, int inputDim) { MatrixPtr cpuTable = std::make_shared(tableSize, inputDim); MatrixPtr gpuTable = std::make_shared(tableSize, inputDim); @@ -761,10 +451,7 @@ void testMatrixAddToRows(int numSamples, int tableSize, int inputDim) { cpuOutput->addToRows(*cpuTable, *cpuIds); gpuOutput->addToRows(*gpuTable, *gpuIds); - // check - MatrixPtr outputCheck = std::make_shared(tableSize, inputDim); - outputCheck->copyFrom(*gpuTable); - MatrixCheckErr(*cpuTable, *outputCheck); + TensorCheckErr(*cpuTable, *gpuTable); } TEST(Matrix, tableProjection) { @@ -773,7 +460,6 @@ TEST(Matrix, tableProjection) { for (auto inputDim : {20, 50}) { VLOG(3) << " numSamples=" << numSamples << " tableSize=" << tableSize << " inputDim=" << inputDim; - testMatrixSelectRows(numSamples, tableSize, inputDim); testMatrixAddToRows(numSamples, tableSize, inputDim); } } @@ -807,9 +493,7 @@ void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { cpuC->mul(cpuA, cpuB, alpha, beta); gpuC->mul(gpuA, gpuB, alpha, beta); - MatrixPtr outputCheck = std::make_shared(heightC, widthC); - outputCheck->copyFrom(*gpuC); - MatrixCheckErr(*cpuC, *outputCheck); + TensorCheckErr(*cpuC, *gpuC); } void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { @@ -881,9 +565,7 @@ void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { subCpuC->mul(subCpuA, subCpuB, alpha, beta); subGpuC->mul(subGpuA, subGpuB, alpha, beta); - MatrixPtr outputCheck = std::make_shared(heightC, widthC); - outputCheck->copyFrom(*gpuC); - MatrixCheckErr(*cpuC, *outputCheck); + TensorCheckErr(*cpuC, *gpuC); } TEST(Matrix, mul) { @@ -937,9 +619,7 @@ void testVectorReset(int size) { cpu->reset(value); gpu->reset(value); - std::shared_ptr> out = std::make_shared>(size); - out->copyFrom(*gpu); - VectorCheckEqual(*cpu, *out); + TensorCheckEqual(*cpu, *gpu); } template @@ -965,9 +645,7 @@ void testVecortSelectFrom(int size) { cpuDst->selectFrom(*cpuSrc, *cpuIds); gpuDst->selectFrom(*gpuSrc, *gpuIds); - std::shared_ptr> out = std::make_shared>(size); - out->copyFrom(*gpuDst); - VectorCheckEqual(*cpuDst, *out); + TensorCheckEqual(*cpuDst, *gpuDst); } template @@ -978,9 +656,7 @@ void testVecotrZeroMem(int size) { cpu->zeroMem(); gpu->zeroMem(); - std::shared_ptr> out = std::make_shared>(size); - out->copyFrom(*gpu); - VectorCheckEqual(*cpu, *out); + TensorCheckEqual(*cpu, *gpu); } template @@ -1001,9 +677,7 @@ void testVectorIsEqual(int size) { cpuA->isEqualTo(*cpuB, value); gpuA->isEqualTo(*gpuB, value); - std::shared_ptr> out = std::make_shared>(size); - out->copyFrom(*gpuA); - VectorCheckEqual(*cpuA, *out); + TensorCheckEqual(*cpuA, *gpuA); } TEST(Vector, Equal) { @@ -1034,9 +708,7 @@ void testMatrixTopK(int samples, int dim, int beamSize) { cpuSrc->rowMax(*cpuIds, *cpuVal); gpuSrc->rowMax(*gpuIds, *gpuVal); - MatrixPtr outVal = std::make_shared(samples, beamSize); - outVal->copyFrom(*gpuVal); - MatrixCheckEqual(*cpuVal, *outVal); + TensorCheckEqual(*cpuVal, *gpuVal); } TEST(Matrix, topK) { @@ -1072,9 +744,7 @@ void testSMatrixTopK(int samples, int dim, int beamSize, real ratio) { cpuSrc->rowMax(*cpuIds, *cpuVal); gpuSrc->rowMax(*gpuIds, *gpuVal); - MatrixPtr outCheckMaxVal = std::make_shared(samples, beamSize); - outCheckMaxVal->copyFrom(*gpuVal); - MatrixCheckEqual(*cpuVal, *outCheckMaxVal); + TensorCheckEqual(*cpuVal, *gpuVal); IVectorPtr outCheckIds = std::make_shared(samples * beamSize); outCheckIds->copyFrom(*gpuIds); @@ -1104,42 +774,6 @@ TEST(SMatrix, topK) { } } -void testMatrixCopyByRowIndex(int outHeight, int inHeight, int width) { - MatrixPtr cpuInput = std::make_shared(inHeight, width); - MatrixPtr gpuInput = std::make_shared(inHeight, width); - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - - MatrixPtr cpuOutput = std::make_shared(outHeight, width); - MatrixPtr gpuOutput = std::make_shared(outHeight, width); - cpuOutput->zero(); - gpuOutput->zero(); - - IVectorPtr cpuRowIndex = IVector::create(outHeight, false); - IVectorPtr gpuRowIndex = IVector::create(outHeight, true); - cpuRowIndex->rand(inHeight); - gpuRowIndex->copyFrom(*cpuRowIndex); - - cpuOutput->copyByRowIndex(*cpuInput, *cpuRowIndex); - gpuOutput->copyByRowIndex(*gpuInput, *gpuRowIndex); - - // check - MatrixPtr outputCheck = std::make_shared(outHeight, width); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckEqual(*cpuOutput, *outputCheck); -} - -TEST(Matrix, copyByRowIndex) { - for (auto outHeight : {31, 500, 1000}) { - for (auto inHeight : {17, 257, 500, 1200}) { - for (auto width : {512, 1024}) { - VLOG(3) << outHeight << " " << inHeight << " " << width; - testMatrixCopyByRowIndex(outHeight, inHeight, width); - } - } - } -} - void testMatrixSequenceAvgForward(int batchSize, int inputDim, int mode) { MatrixPtr cpuInput = std::make_shared(batchSize, inputDim); MatrixPtr gpuInput = std::make_shared(batchSize, inputDim); @@ -1160,10 +794,7 @@ void testMatrixSequenceAvgForward(int batchSize, int inputDim, int mode) { cpuOutput->sequenceAvgForward(*cpuInput, *cpuSequence, mode); gpuOutput->sequenceAvgForward(*gpuInput, *gpuSequence, mode); - // check - MatrixPtr outputCheck = std::make_shared(newBatchSize, inputDim); - outputCheck->copyFrom(*gpuOutput); - MatrixCheckErr(*cpuOutput, *outputCheck); + TensorCheckErr(*cpuOutput, *gpuOutput); } TEST(Matrix, sequenceAvgForward) { @@ -1178,45 +809,6 @@ TEST(Matrix, sequenceAvgForward) { } } -void testCosSim(int heightX, int heightY, int width, real scale) { - MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false); - MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false); - MatrixPtr output = CpuMatrix::create(heightX, 1, false, false); - - prevOutX->randomizeUniform(); - prevOutY->randomizeUniform(); - prevOutX->add(-0.5); - prevOutY->add(-0.5); - output->randomizeUniform(); - - MatrixPtr prevOutXGpu = GpuMatrix::create(heightX, width, false, true); - MatrixPtr prevOutYGpu = GpuMatrix::create(heightY, width, false, true); - MatrixPtr outputGpu = GpuMatrix::create(heightX, 1, false, true); - - prevOutXGpu->copyFrom(*prevOutX); - prevOutYGpu->copyFrom(*prevOutY); - outputGpu->copyFrom(*output); - - output->cosSim(*prevOutX, *prevOutY, scale); - outputGpu->cosSim(*prevOutXGpu, *prevOutYGpu, scale); - - MatrixPtr outputCheck = CpuMatrix::create(heightX, 1, false, false); - outputCheck->copyFrom(*outputGpu); - MatrixCheckErr(*output, *outputCheck); -} - -TEST(Matrix, cosSim) { - for (auto heightX : {10, 100, 1000}) { - for (auto heightY : {1, heightX}) { - for (auto width : {10, 100, 1000}) { - for (auto scale : {1.0, 2.0}) { - testCosSim(heightX, heightY, width, scale); - } - } - } - } -} - void testCosSimDerivate(int heightX, int heightY, int width, real scale) { MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false); MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false); @@ -1256,12 +848,8 @@ void testCosSimDerivate(int heightX, int heightY, int width, real scale) { *prevGradYGpu, scale); - MatrixPtr prevGradXCheck = CpuMatrix::create(heightX, width, false, false); - MatrixPtr prevGradYCheck = CpuMatrix::create(heightY, width, false, false); - prevGradXCheck->copyFrom(*prevGradXGpu); - prevGradYCheck->copyFrom(*prevGradYGpu); - MatrixCheckErr(*prevGradX, *prevGradXCheck); - MatrixCheckErr(*prevGradY, *prevGradYCheck); + TensorCheckErr(*prevGradX, *prevGradXGpu); + TensorCheckErr(*prevGradY, *prevGradYGpu); } TEST(Matrix, cosSimDerivate) { @@ -1276,80 +864,6 @@ TEST(Matrix, cosSimDerivate) { } } -void testParamReluForward(int height, int width, int w_height, int w_width) { - MatrixPtr output = CpuMatrix::create(height, width, false, false); - MatrixPtr input = CpuMatrix::create(height, width, false, false); - MatrixPtr w = CpuMatrix::create(w_height, w_width, false, false); - - output->randomizeUniform(); - input->randomizeUniform(); - w->randomizeUniform(); - input->add(-0.5); - - MatrixPtr outputGpu = GpuMatrix::create(height, width, false, true); - MatrixPtr inputGpu = GpuMatrix::create(height, width, false, true); - MatrixPtr wGpu = GpuMatrix::create(w_height, w_width, false, true); - - inputGpu->copyFrom(*input); - wGpu->copyFrom(*w); - - output->paramReluForward(*input, *w); - outputGpu->paramReluForward(*inputGpu, *wGpu); - - MatrixPtr outputCheck = CpuMatrix::create(height, width, false, false); - outputCheck->copyFrom(*outputGpu); - MatrixCheckEqual(*output, *outputCheck); -} - -TEST(Matrix, paramReluForward) { - for (auto height : {10, 100}) { - for (auto width : {10, 100}) { - for (auto w_height : {1, 2}) { - for (auto w_width : {1, 2}) { - testParamReluForward(height, width, w_height, w_width); - } - } - } - } -} - -void testParamReluBackwardW(int height, int width, int w_height, int w_width) { - MatrixPtr oGrad = CpuMatrix::create(height, width, false, false); - MatrixPtr input = CpuMatrix::create(height, width, false, false); - MatrixPtr w = CpuMatrix::create(w_height, w_width, false, false); - - oGrad->randomizeUniform(); - input->randomizeUniform(); - w->randomizeUniform(); - input->add(-0.5); - - MatrixPtr oGradGpu = GpuMatrix::create(height, width, false, true); - MatrixPtr inputGpu = GpuMatrix::create(height, width, false, true); - MatrixPtr wGpu = GpuMatrix::create(w_height, w_width, false, true); - - oGradGpu->copyFrom(*oGrad); - inputGpu->copyFrom(*input); - wGpu->copyFrom(*w); - - w->paramReluBackwardW(*oGrad, *input); - wGpu->paramReluBackwardW(*oGradGpu, *inputGpu); - MatrixPtr wCheck = CpuMatrix::create(w_height, w_width, false, false); - wCheck->copyFrom(*wGpu); - MatrixCheckErr(*w, *wCheck); -} - -TEST(Matrix, paramReluBackwardW) { - for (auto height : {10, 100}) { - for (auto width : {10, 100}) { - for (auto w_height : {1, 2}) { - for (auto w_width : {1, 2}) { - testParamReluBackwardW(height, width, w_height, w_width); - } - } - } - } -} - void testParamReluBackwardDiff(int height, int width, int w_height, @@ -1378,9 +892,7 @@ void testParamReluBackwardDiff(int height, diff->paramReluBackwardDiff(*oGrad, *input, *w); diffGpu->paramReluBackwardDiff(*oGradGpu, *inputGpu, *wGpu); - MatrixPtr diffCheck = CpuMatrix::create(height, width, false, false); - diffCheck->copyFrom(*diffGpu); - MatrixCheckErr(*diff, *diffCheck); + TensorCheckErr(*diff, *diffGpu); } TEST(Matrix, paramReluBackwardDiff) { @@ -1411,9 +923,7 @@ void testClassificationError(int numSamples, int dim) { cpuError->classificationError(cpuOutput, cpuLabel); gpuError->classificationError(gpuOutput, gpuLabel); - MatrixPtr check = std::make_shared(numSamples, 1); - check->copyFrom(*gpuError); - MatrixCheckEqual(*cpuError, *check); + TensorCheckEqual(*cpuError, *gpuError); } TEST(Matrix, classificationError) { @@ -1578,9 +1088,8 @@ void testAvgPoolFwdBwd(int numSamples, outW, padH, padW); - MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); - targetCheck->copyFrom(*targetGpu); - MatrixCheckErr(*target, *targetCheck); + + TensorCheckErr(*target, *targetGpu); MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); @@ -1619,10 +1128,8 @@ void testAvgPoolFwdBwd(int numSamples, 1.0, padH, padW); - MatrixPtr targetBwdCheck = - CpuMatrix::create(numSamples, inWidth, false, false); - targetBwdCheck->copyFrom(*inputGpuGrad); - MatrixCheckErr(*inputGrad, *targetBwdCheck); + + TensorCheckErr(*inputGrad, *inputGpuGrad); } TEST(Matrix, PoolFwdBwd) { @@ -1687,11 +1194,9 @@ void testMaxOutFwdBwd( MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); - MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); IVectorPtr id = CpuIVector::create(numSamples * outWidth, false); IVectorPtr idGpu = GpuIVector::create(numSamples * outWidth, true); - IVectorPtr idCheck = CpuIVector::create(numSamples * outWidth, false); input->randomizeUniform(); inputGpu->copyFrom(*input); @@ -1699,11 +1204,8 @@ void testMaxOutFwdBwd( target->maxoutForward(*input, *id, outChannels, groups); targetGpu->maxoutForward(*inputGpu, *idGpu, outChannels, groups); - // check - targetCheck->copyFrom(*targetGpu); - MatrixCheckErr(*target, *targetCheck); - idCheck->copyFrom(*idGpu); - VectorCheckEqual(*id, *idCheck); + TensorCheckErr(*target, *targetGpu); + TensorCheckEqual(*id, *idGpu); // backward MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); @@ -1712,8 +1214,6 @@ void testMaxOutFwdBwd( MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth, false, true); - MatrixPtr targetCheckGrad = - CpuMatrix::create(numSamples, inWidth, false, false); inputGrad->randomizeUniform(); targetGrad->randomizeUniform(); @@ -1723,9 +1223,7 @@ void testMaxOutFwdBwd( inputGrad->maxoutBackward(*targetGrad, *id, outChannels, groups); inputGpuGrad->maxoutBackward(*targetGpuGrad, *idGpu, outChannels, groups); - // check - targetCheckGrad->copyFrom(*inputGpuGrad); - MatrixCheckErr(*inputGrad, *targetCheckGrad); + TensorCheckErr(*inputGrad, *inputGpuGrad); } TEST(Matrix, MaxOutFwdBwd) { @@ -1745,113 +1243,6 @@ TEST(Matrix, MaxOutFwdBwd) { } } -void testAddSharedBias(int numSamples, int dim, int channel) { - MatrixPtr cpuData = std::make_shared(numSamples, dim); - MatrixPtr gpuData = std::make_shared(numSamples, dim); - - MatrixPtr cpuBias = std::make_shared(1, channel); - MatrixPtr gpuBias = std::make_shared(1, channel); - - cpuData->randomizeUniform(); - gpuData->copyFrom(*cpuData); - cpuBias->randomizeUniform(); - gpuBias->copyFrom(*cpuBias); - - cpuData->addSharedBias(*cpuBias, 1.0); - gpuData->addSharedBias(*gpuBias, 1.0); - - MatrixPtr check = std::make_shared(numSamples, dim); - check->copyFrom(*gpuData); - MatrixCheckErr(*cpuData, *check); -} - -void testCollectSharedBias(int numSamples, int dim, int channel) { - MatrixPtr cpuData = std::make_shared(numSamples, dim); - MatrixPtr gpuData = std::make_shared(numSamples, dim); - - MatrixPtr cpuBias = std::make_shared(1, channel); - MatrixPtr gpuBias = std::make_shared(1, channel); - - cpuData->randomizeUniform(); - gpuData->copyFrom(*cpuData); - cpuBias->randomizeUniform(); - gpuBias->copyFrom(*cpuBias); - - cpuBias->collectSharedBias(*cpuData, 1.0); - gpuBias->collectSharedBias(*gpuData, 1.0); - - MatrixPtr check = std::make_shared(1, channel); - check->copyFrom(*gpuBias); - MatrixCheckErr(*cpuBias, *check); -} - -TEST(Matrix, sharedBias) { - for (auto numSamples : {1, 100, 520}) { - for (auto dim : {100 * 16, 100 * 32}) { - for (auto channel : {8, 16}) { - VLOG(3) << " numSamples=" << numSamples << " dim=" << dim - << " channel=" << channel; - testAddSharedBias(numSamples, dim, channel); - testCollectSharedBias(numSamples, dim, channel); - } - } - } -} - -void testMultiBinaryLabelCrossEntropy(int numSamples, int dim) { - MatrixPtr output = std::make_shared(numSamples, dim); - MatrixPtr cpuOutput = std::make_shared(numSamples, dim); - MatrixPtr gpuOutput = std::make_shared(numSamples, dim); - - MatrixPtr cpuEntropy = std::make_shared(numSamples, 1); - MatrixPtr gpuEntropy = std::make_shared(numSamples, 1); - - MatrixPtr cpuGrad = std::make_shared(numSamples, dim); - MatrixPtr gpuGrad = std::make_shared(numSamples, dim); - - MatrixPtr cpuLabel = std::make_shared( - numSamples, dim, numSamples, NO_VALUE, SPARSE_CSR, false); - MatrixPtr gpuLabel = std::make_shared( - numSamples, dim, numSamples, NO_VALUE, SPARSE_CSR, false); - for (int i = 0; i < numSamples; i++) { - const unsigned int id = rand() % dim; // NOLINT - cpuLabel->setRow(i, 1, &id, nullptr); - gpuLabel->setRow(i, 1, &id, nullptr); - } - - output->randomizeUniform(); - cpuOutput->zeroMem(); - output->softmax(*cpuOutput); - gpuOutput->copyFrom(*cpuOutput); - - cpuEntropy->zeroMem(); - gpuEntropy->zeroMem(); - cpuEntropy->multiBinaryLabelCrossEntropy(*cpuOutput, *cpuLabel); - gpuEntropy->multiBinaryLabelCrossEntropy(*gpuOutput, *gpuLabel); - - MatrixPtr check1 = std::make_shared(numSamples, 1); - check1->copyFrom(*gpuEntropy); - MatrixCheckErr(*cpuEntropy, *check1); - - cpuGrad->zeroMem(); - gpuGrad->zeroMem(); - cpuGrad->multiBinaryLabelCrossEntropyBp(*cpuOutput, *cpuLabel); - gpuGrad->multiBinaryLabelCrossEntropyBp(*gpuOutput, *gpuLabel); - - MatrixPtr check2 = std::make_shared(numSamples, dim); - check2->copyFrom(*gpuGrad); - MatrixCheckErr(*cpuGrad, *check2); -} - -TEST(Matrix, multiBinaryCrossEntropy) { - for (auto numSamples : {100, 1000, 10000}) { - for (auto dim : {100, 1000, 10000}) { - VLOG(3) << " numSamples=" << numSamples << " dim=" << dim; - testMultiBinaryLabelCrossEntropy(numSamples, dim); - } - } -} - int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv);