提交 17f7125b 编写于 作者: H hedaoyuan 提交者: GitHub

Merge pull request #643 from hedaoyuan/auto_compare

Auto compare cpu and gpu function.
......@@ -1584,11 +1584,6 @@ void BaseMatrixT<real>::minRows(BaseMatrixT& b) {
applyRow(aggregate::min(), b);
}
template<>
void BaseMatrixT<real>::sumCols(BaseMatrixT& b) {
applyCol(aggregate::sum(), b);
}
template<>
void BaseMatrixT<real>::maxCols(BaseMatrixT& b) {
applyCol(aggregate::max(), b);
......
......@@ -1018,8 +1018,6 @@ public:
/// calculate the minimum value of each row of the matrix b.
void minRows(BaseMatrixT& b);
/// calculate the sum of each column of the matrix b.
void sumCols(BaseMatrixT& b);
/// calculate the maximum value of each column of the matrix b.
void maxCols(BaseMatrixT& b);
/// calculate the minimum value of each column of the matrix b.
......
......@@ -2,7 +2,7 @@
add_simple_unittest(test_ExecViaCpu)
add_simple_unittest(test_SIMDFunctions)
add_simple_unittest(test_matrix)
add_simple_unittest(test_SparseMatrix)
# TODO(yuyang18): Refactor TestUtil.cpp. Remove this cross module reference.
add_unittest(test_matrixCompare
......@@ -14,4 +14,6 @@ add_simple_unittest(test_perturbation)
add_simple_unittest(test_CpuGpuVector)
add_simple_unittest(test_Allocator)
add_simple_unittest(test_FPException)
add_simple_unittest(test_GpuProfiler)
\ No newline at end of file
add_simple_unittest(test_GpuProfiler)
add_simple_unittest(test_BaseMatrix)
add_simple_unittest(test_Matrix)
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
/**
* This file provides a TensorCheck template function, which can be used to
* compare CpuMatrix and GpuMatrix, CpuVector and GpuVector, and so on.
*/
#include <cmath>
#include "paddle/math/Matrix.h"
namespace autotest {
using paddle::Matrix;
using paddle::CpuMatrix;
using paddle::GpuMatrix;
using paddle::VectorT;
using paddle::CpuVectorT;
using paddle::GpuVectorT;
class AssertEqual {
public:
AssertEqual(real err = 0) : err_(err) {}
inline bool operator()(real a, real b) {
if (err_ == 0) {
if (a != b) {
return false;
}
} else {
if (std::fabs(a - b) > err_) {
if ((std::fabs(a - b) / std::fabs(a)) > (err_ / 10.0f)) {
return false;
}
}
}
return true;
}
private:
real err_;
};
template <typename Tensor>
class CopyToCpu;
template <>
class CopyToCpu<CpuMatrix> {
public:
explicit CopyToCpu(const CpuMatrix& arg) : arg_(arg) {}
const CpuMatrix& copiedArg() const { return arg_; }
private:
const CpuMatrix& arg_;
};
template <>
class CopyToCpu<GpuMatrix> {
public:
explicit CopyToCpu(const GpuMatrix& arg)
: arg_(arg.getHeight(), arg.getWidth()) {
arg_.copyFrom(arg);
}
CpuMatrix& copiedArg() { return arg_; }
private:
CpuMatrix arg_;
};
template <>
class CopyToCpu<Matrix> {
public:
explicit CopyToCpu(const Matrix& arg)
: arg_(arg.getHeight(), arg.getWidth()) {
arg_.copyFrom(arg);
}
CpuMatrix& copiedArg() { return arg_; }
private:
CpuMatrix arg_;
};
template <typename T>
class CopyToCpu<CpuVectorT<T>> {
public:
explicit CopyToCpu(const CpuVectorT<T>& arg) : arg_(arg) {}
const CpuVectorT<T>& copiedArg() const { return arg_; }
private:
const CpuVectorT<T>& arg_;
};
template <typename T>
class CopyToCpu<GpuVectorT<T>> {
public:
explicit CopyToCpu(const GpuVectorT<T>& arg) : arg_(arg.getSize()) {
arg_.copyFrom(arg);
}
CpuVectorT<T>& copiedArg() { return arg_; }
private:
CpuVectorT<T> arg_;
};
template <typename T>
class CopyToCpu<VectorT<T>> {
public:
explicit CopyToCpu(const VectorT<T>& arg) : arg_(arg.getSize()) {
arg_.copyFrom(arg);
}
CpuVectorT<T>& copiedArg() { return arg_; }
private:
CpuVectorT<T> arg_;
};
template <typename AssertEq>
void TensorCheck(AssertEq compare,
const CpuMatrix& matrix1,
const CpuMatrix& matrix2) {
CHECK(matrix1.getHeight() == matrix2.getHeight());
CHECK(matrix1.getWidth() == matrix2.getWidth());
int height = matrix1.getHeight();
int width = matrix1.getWidth();
const real* data1 = matrix1.getData();
const real* data2 = matrix2.getData();
int count = 0;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
real a = data1[i * width + j];
real b = data2[i * width + j];
if (!compare(a, b)) {
count++;
}
}
}
EXPECT_EQ(count, 0) << "There are " << count << " different element.";
}
template <typename AssertEq, class T>
void TensorCheck(AssertEq compare,
const CpuVectorT<T>& vector1,
const CpuVectorT<T>& vector2) {
CHECK(vector1.getSize() == vector2.getSize());
const T* data1 = vector1.getData();
const T* data2 = vector2.getData();
size_t size = vector1.getSize();
int count = 0;
for (size_t i = 0; i < size; i++) {
real a = data1[i];
real b = data2[i];
if (!compare(a, b)) {
count++;
}
}
EXPECT_EQ(count, 0) << "There are " << count << " different element.";
}
template <typename AssertEq, typename Tensor1, typename Tensor2>
void TensorCheck(AssertEq compare,
const Tensor1& tensor1,
const Tensor2& tensor2) {
TensorCheck(compare,
CopyToCpu<Tensor1>(tensor1).copiedArg(),
CopyToCpu<Tensor2>(tensor2).copiedArg());
}
template <typename AssertEq>
void TensorCheck(AssertEq compare, real args1, real args2) {
EXPECT_EQ(compare(args1, args2), true) << "[Test error] args1 = " << args1
<< ", args2 = " << args2;
}
template <typename AssertEq>
void TensorCheck(AssertEq compare, size_t args1, size_t args2) {
EXPECT_EQ(args1, args2) << "[Test error] args1 = " << args1
<< ", args2 = " << args2;
}
template <typename Tensor1, typename Tensor2>
void TensorCheckEqual(const Tensor1& tensor1, const Tensor2& tensor2) {
AssertEqual compare(0);
TensorCheck(compare,
CopyToCpu<Tensor1>(tensor1).copiedArg(),
CopyToCpu<Tensor2>(tensor2).copiedArg());
}
template <typename Tensor1, typename Tensor2>
void TensorCheckErr(const Tensor1& tensor1, const Tensor2& tensor2) {
#ifndef PADDLE_TYPE_DOUBLE
AssertEqual compare(1e-3);
#else
AssertEqual compare(1e-10);
#endif
TensorCheck(compare,
CopyToCpu<Tensor1>(tensor1).copiedArg(),
CopyToCpu<Tensor2>(tensor2).copiedArg());
}
} // namespace autotest
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
/**
* This file provides a AutoCompare calss to simplify the comparison
* of CPU and GPU member functions.
*
* This takes two steps
* 1. Construct an AutoCompare object.
* When constructing an AutoCompare object, you can set the err argument
* to specify the maximum error for CPU and GPU functions.
*
* 2. Use the template functions cmpWithArg or cmpWithoutArg.
* A. [cmpWithArg] Requires the caller construct the cpu arguments.
*
* AutoCompare test;
* Init Argument arg1,arg2...
* test.cmpWithArg(function, arg1, arg2....)
*
* B. [cmpWithoutArg] The caller do not need construct arguments.
* If matrix used in these functions arguments is the same size.
* Such as the element wise function and the aggregate function
* defined in the BaseMatrix.cpp.
*
* AutoCompare test;
* test.cmpWithoutArg<I...>(function, height, width)
*/
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "TensorCheck.h"
namespace autotest {
using paddle::BaseMatrix;
using paddle::CpuMatrix;
using paddle::GpuMatrix;
using paddle::CpuIVector;
using paddle::GpuIVector;
using paddle::CpuSparseMatrix;
using paddle::GpuSparseMatrix;
template <typename T1, typename T2>
class ReplaceType {
public:
typedef T1 type;
};
template <>
class ReplaceType<BaseMatrix, CpuMatrix> {
public:
typedef CpuMatrix type;
};
template <>
class ReplaceType<BaseMatrix, GpuMatrix> {
public:
typedef GpuMatrix type;
};
template <>
class ReplaceType<Matrix, CpuMatrix> {
public:
typedef CpuMatrix type;
};
template <>
class ReplaceType<Matrix, GpuMatrix> {
public:
typedef GpuMatrix type;
};
// construct a argument
template <typename T>
T construct(int height, int width);
template <>
float construct(int height, int width) {
return 0.5;
}
template <>
double construct(int height, int width) {
return 0.5;
}
template <>
size_t construct(int height, int width) {
size_t offset = std::rand() % (height < width ? height : width);
return offset;
}
template <>
CpuMatrix construct(int height, int width) {
CpuMatrix a(height, width);
return a;
}
template <>
GpuMatrix construct(int height, int width) {
GpuMatrix a(height, width);
return a;
}
// init a argument
template <typename T>
void init(T& v) {
return;
}
template <>
void init(CpuMatrix& v) {
v.randomizeUniform();
}
template <>
void init(GpuMatrix& v) {
v.randomizeUniform();
}
// init a tuple which contains a set of arguments.
template <std::size_t I = 0, typename... Args>
inline typename std::enable_if<I == sizeof...(Args), void>::type initTuple(
std::tuple<Args...>& t) {}
template <std::size_t I = 0, typename... Args>
inline typename std::enable_if <
I<sizeof...(Args), void>::type initTuple(std::tuple<Args...>& t) {
init(std::get<I>(t));
initTuple<I + 1>(t);
}
// copy a argument, copy src to dest
template <typename T1, typename T2>
void copy(T1& dest, T2& src) {
dest = src;
}
template <>
void copy(GpuMatrix& dest, CpuMatrix& src) {
dest.copyFrom(src);
}
// copy a tuple, copy src to dest
template <std::size_t I = 0, typename... Args1, typename... Args2>
inline typename std::enable_if<I == sizeof...(Args1), void>::type copyTuple(
std::tuple<Args1...>& dest, std::tuple<Args2...>& src) {}
template <std::size_t I = 0, typename... Args1, typename... Args2>
inline typename std::enable_if <
I<sizeof...(Args1), void>::type copyTuple(std::tuple<Args1...>& dest,
std::tuple<Args2...>& src) {
copy(std::get<I>(dest), std::get<I>(src));
copyTuple<I + 1>(dest, src);
}
// call member function
template <typename C,
typename FC,
typename R,
typename... FArgs,
typename... Args>
R call(C& obj, R (FC::*f)(FArgs...), Args&&... args) {
return (obj.*f)(args...);
}
template <typename T>
class ReturnType {
public:
typedef T type;
};
template <>
class ReturnType<CpuMatrix> {
public:
typedef GpuMatrix type;
};
template <>
class ReturnType<CpuIVector> {
public:
typedef GpuIVector type;
};
template <>
class ReturnType<CpuSparseMatrix> {
public:
typedef GpuSparseMatrix type;
};
template <typename T>
typename ReturnType<T>::type autoArgs(T& v) {
return v;
}
template <>
GpuMatrix autoArgs(CpuMatrix& v) {
GpuMatrix a(v.getHeight(), v.getWidth());
a.copyFrom(v);
return a;
}
template <>
GpuIVector autoArgs(CpuIVector& v) {
GpuIVector a(v.getSize());
a.copyFrom(v);
return a;
}
template <>
GpuSparseMatrix autoArgs(CpuSparseMatrix& v) {
GpuSparseMatrix a(v.getHeight(),
v.getWidth(),
v.getElementCnt(),
v.getValueType(),
v.getFormat());
a.copyFrom(v, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
return a;
}
class AutoCompare {
public:
/**
* err is the allowed calculation error.
* The smaller the value of err,
* the stricter the comparison is between CPU and GPU calculations.
*/
AutoCompare(size_t height, size_t width, real err = 1e-3)
: cpu(height, width), gpu(height, width), compare(err) {
init(cpu);
copy(gpu, cpu);
}
template <typename C, typename R, typename... FArgs, typename... Args>
void cmpWithArg(R (C::*f)(FArgs...), Args&&... args) {
static_assert(sizeof...(FArgs) == sizeof...(Args),
"size of parameter packs are not equal");
call(cpu, f, args...);
call(gpu, f, autoArgs(args)...);
TensorCheck(compare, cpu, gpu);
}
template <std::size_t... I, typename C, typename R, typename... Args>
void cmpWithoutArg(R (C::*f)(Args...), size_t height, size_t width) {
static_assert(sizeof...(I) == sizeof...(Args),
"size of parameter packs are not equal");
(void)height;
(void)width;
auto tuple1 = std::make_tuple(
construct<typename ReplaceType<
typename std::decay<
typename std::tuple_element<I,
std::tuple<Args...>>::type>::type,
CpuMatrix>::type>(height, width)...);
auto tuple2 = std::make_tuple(
construct<typename ReplaceType<
typename std::decay<
typename std::tuple_element<I,
std::tuple<Args...>>::type>::type,
GpuMatrix>::type>(height, width)...);
initTuple(tuple1);
copyTuple(tuple2, tuple1);
call(cpu, f, std::get<I>(tuple1)...);
call(gpu, f, std::get<I>(tuple2)...);
TensorCheck(compare, cpu, gpu);
}
protected:
CpuMatrix cpu;
GpuMatrix gpu;
AssertEqual compare;
};
} // namespace autotest
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_ONLY_CPU
/**
* This test file use autotest::AutoCompare and cmpWithoutArg to compares the
* implementation of CPU and GPU member function in
* BaseMatrix.cpp and Matrix.cpp.
*/
#include <gtest/gtest.h>
#include "paddle/math/BaseMatrix.h"
#include "TestUtils.h"
using paddle::BaseMatrix;
using paddle::Matrix;
using autotest::AutoCompare;
// Test all void (BaseMatrix::*)() function
TEST(BaseMatrix, void) {
for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
auto compare = [height, width](void (BaseMatrix::*f)()) {
AutoCompare test(height, width, 1e-5);
test.cmpWithoutArg(f, height, width);
};
compare(&BaseMatrix::neg);
compare(&BaseMatrix::exp);
compare(&BaseMatrix::log);
compare(&BaseMatrix::sqrt);
compare(&BaseMatrix::square);
compare(&BaseMatrix::reciprocal);
compare(&BaseMatrix::abs);
compare(&BaseMatrix::sign);
compare(&BaseMatrix::zero);
compare(&BaseMatrix::one);
}
}
}
// Test all void (BaseMatrix::*)(real) function
TEST(BaseMatrix, real) {
for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
auto compare = [height, width](void (BaseMatrix::*f)(real)) {
AutoCompare test(height, width, 1e-5);
test.cmpWithoutArg<0>(f, height, width);
};
compare(&BaseMatrix::pow);
compare(&BaseMatrix::subScalar);
compare(&BaseMatrix::mulScalar);
compare(&BaseMatrix::divScalar);
compare(&BaseMatrix::assign);
compare(&BaseMatrix::add);
compare(&BaseMatrix::biggerThanScalar);
compare(&BaseMatrix::downClip);
}
}
}
// Test all void (BaseMatrix::*)(BaseMatrix&) function
TEST(BaseMatrix, BaseMatrix) {
for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
auto compare = [height, width](void (BaseMatrix::*f)(BaseMatrix&)) {
AutoCompare test(height, width, 1e-5);
test.cmpWithoutArg<0>(f, height, width);
};
compare(&BaseMatrix::assign);
compare(&BaseMatrix::add);
compare(&BaseMatrix::relu);
compare(&BaseMatrix::reluDerivative);
compare(&BaseMatrix::softrelu);
compare(&BaseMatrix::softreluDerivative);
compare(&BaseMatrix::brelu);
compare(&BaseMatrix::breluDerivative);
compare(&BaseMatrix::square);
compare(&BaseMatrix::squareDerivative);
compare(&BaseMatrix::tanh);
compare(&BaseMatrix::tanhDerivative);
compare(&BaseMatrix::reciprocal);
compare(&BaseMatrix::reciprocalDerivative);
compare(&BaseMatrix::abs);
compare(&BaseMatrix::absDerivative);
compare(&BaseMatrix::sigmoid);
compare(&BaseMatrix::sigmoidDerivative);
compare(&BaseMatrix::expDerivative);
compare(&BaseMatrix::sign);
compare(&BaseMatrix::exp);
compare(&BaseMatrix::log);
compare(&BaseMatrix::sqrt);
compare(&BaseMatrix::dotMul);
compare(&BaseMatrix::dotMulSquare);
compare(&BaseMatrix::dotSquareMul);
compare(&BaseMatrix::addColVector);
compare(&BaseMatrix::addRowVector);
compare(&BaseMatrix::mulRowVector);
compare(&BaseMatrix::divRowVector);
compare(&BaseMatrix::addP2P);
compare(&BaseMatrix::invSqrt);
}
}
}
// Test all void (BaseMatrix::*)(real, real) function
TEST(BaseMatrix, real_real) {
for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
auto compare = [height, width](void (BaseMatrix::*f)(real, real)) {
AutoCompare test(height, width, 1e-5);
test.cmpWithoutArg<0, 1>(f, height, width);
};
compare(&BaseMatrix::add);
compare(&BaseMatrix::clip);
}
}
}
// Test all void (BaseMatrix::*)(BaseMatrix&, real) function
TEST(BaseMatrix, BaseMatrix_real) {
for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
auto compare = [height, width](void (BaseMatrix::*f)(BaseMatrix&, real)) {
AutoCompare test(height, width, 1e-5);
test.cmpWithoutArg<0, 1>(f, height, width);
};
compare(&BaseMatrix::addBias);
compare(&BaseMatrix::add);
compare(&BaseMatrix::sub);
compare(&BaseMatrix::pow);
compare(&BaseMatrix::addScalar);
compare(&BaseMatrix::subScalar);
compare(&BaseMatrix::mulScalar);
compare(&BaseMatrix::divScalar);
compare(&BaseMatrix::scalarDiv);
compare(&BaseMatrix::addSquare);
compare(&BaseMatrix::isEqualTo);
}
}
}
// Test all void (BaseMatrix::*)(BaseMatrix&, BaseMatrix&) function
TEST(BaseMatrix, BaseMatrix_BaseMatrix) {
for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
auto compare = [height,
width](void (BaseMatrix::*f)(BaseMatrix&, BaseMatrix&)) {
AutoCompare test(height, width, 1e-5);
test.cmpWithoutArg<0, 1>(f, height, width);
};
compare(&BaseMatrix::softCrossEntropy);
compare(&BaseMatrix::softCrossEntropyBp);
compare(&BaseMatrix::binaryLabelCrossEntropy);
compare(&BaseMatrix::binaryLabelCrossEntropyBp);
compare(&BaseMatrix::sub);
compare(&BaseMatrix::add2);
compare(&BaseMatrix::dotMul);
compare(&BaseMatrix::dotDiv);
compare(&BaseMatrix::logisticRegressionLoss);
compare(&BaseMatrix::logisticRegressionLossBp);
compare(&BaseMatrix::biggerThan);
compare(&BaseMatrix::max);
compare(&BaseMatrix::dotMulSquare);
compare(&BaseMatrix::dotSquareSquare);
}
}
}
void TestEelementWise(size_t height, size_t width) {
AutoCompare rowScale(height, width);
rowScale.cmpWithoutArg<0, 1, 2>(&BaseMatrix::rowScale, height, width);
AutoCompare rowDotMul(height, width);
rowDotMul.cmpWithoutArg<0, 1, 2>(&BaseMatrix::rowDotMul, height, width);
AutoCompare binaryClassificationError(height, width);
binaryClassificationError.cmpWithoutArg<0, 1, 2, 3>(
&BaseMatrix::binaryClassificationError, height, width);
AutoCompare sumOfSquaresBp(height, width);
sumOfSquaresBp.cmpWithoutArg<0, 1>(&Matrix::sumOfSquaresBp, height, width);
}
void TestAggregateToRow(size_t height, size_t width) {
AutoCompare maxCols(1, width);
maxCols.cmpWithoutArg<0>(&BaseMatrix::maxCols, height, width);
AutoCompare minCols(1, width);
minCols.cmpWithoutArg<0>(&BaseMatrix::minCols, height, width);
AutoCompare addDotMulVMM(1, width);
addDotMulVMM.cmpWithoutArg<0, 1>(&BaseMatrix::addDotMulVMM, height, width);
AutoCompare sumCols(1, width);
sumCols.cmpWithoutArg<0, 1, 2>(&BaseMatrix::sumCols, height, width);
AutoCompare collectBias(1, width);
collectBias.cmpWithoutArg<0, 1>(
static_cast<void (Matrix::*)(Matrix&, real)>(&Matrix::collectBias),
height,
width);
}
void TestAggregateToCol(size_t height, size_t width) {
AutoCompare maxRows(height, 1);
maxRows.cmpWithoutArg<0>(&BaseMatrix::maxRows, height, width);
AutoCompare minRows(height, 1);
minRows.cmpWithoutArg<0>(&BaseMatrix::minRows, height, width);
AutoCompare sumRows(height, 1);
sumRows.cmpWithoutArg<0, 1, 2>(&BaseMatrix::sumRows, height, width);
AutoCompare sumOfSquares(height, 1);
sumOfSquares.cmpWithoutArg<0, 1>(&Matrix::sumOfSquares, height, width);
}
TEST(BaseMatrix, Other) {
for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
TestEelementWise(height, width);
TestAggregateToRow(height, width);
TestAggregateToCol(height, width);
}
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
paddle::initMain(argc, argv);
return RUN_ALL_TESTS();
}
#endif
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_ONLY_CPU
/**
* This test file use autotest::AutoCompare and cmpWithArg to compares the
* implementation of CPU and GPU member function in Matrix.cpp.
*/
#include <gtest/gtest.h>
#include "TestUtils.h"
using paddle::BaseMatrix;
using paddle::Matrix;
using paddle::CpuMatrix;
using paddle::CpuIVector;
using paddle::CpuSparseMatrix;
using autotest::AutoCompare;
void testBilinearFwdBwd(int numSamples,
int imgSizeH,
int imgSizeW,
int channels) {
int inWidth = imgSizeH * imgSizeW * channels;
int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels;
real ratioH = 0.5;
real ratioW = 0.5;
AutoCompare forward(numSamples, outWidth);
CpuMatrix arg1(numSamples, inWidth);
arg1.randomizeUniform();
forward.cmpWithArg(&Matrix::bilinearForward,
arg1,
imgSizeH,
imgSizeW,
2 * imgSizeH,
2 * imgSizeW,
channels,
ratioH,
ratioW);
AutoCompare backward(numSamples, inWidth);
CpuMatrix arg2(numSamples, outWidth);
arg2.randomizeUniform();
backward.cmpWithArg(&Matrix::bilinearBackward,
arg2,
2 * imgSizeH,
2 * imgSizeW,
imgSizeH,
imgSizeW,
channels,
ratioH,
ratioW);
}
TEST(Matrix, BilinearFwdBwd) {
for (auto numSamples : {5, 10}) {
for (auto channels : {8, 16}) {
for (auto imgSizeH : {14, 28}) {
for (auto imgSizeW : {16, 30}) {
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW;
testBilinearFwdBwd(numSamples, imgSizeH, imgSizeW, channels);
}
}
}
}
}
void testMatrixAddBias(int height, int width, real scale) {
AutoCompare test(height, width);
CpuMatrix arg1(1, width);
arg1.randomizeUniform();
test.cmpWithArg(
static_cast<void (Matrix::*)(Matrix&, real)>(&Matrix::addBias),
arg1,
scale);
}
void testMatrixAddDotMulMMV(int height, int width) {
AutoCompare test(height, width);
CpuMatrix arg1(height, width);
CpuMatrix arg2(1, width);
arg1.randomizeUniform();
arg2.randomizeUniform();
test.cmpWithArg(&BaseMatrix::addDotMulMMV, arg1, arg2);
}
TEST(Matrix, unary) {
for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
VLOG(3) << " height=" << height << " width=" << width;
testMatrixAddBias(height, width, 1.0);
testMatrixAddBias(height, width, 3.5);
testMatrixAddDotMulMMV(height, width);
}
}
}
void testMatrixAddAtOffset(int height, int width1, int width2, int offset) {
AutoCompare test(height, width2);
CpuMatrix arg1(height, width1);
arg1.randomizeUniform();
test.cmpWithArg(&Matrix::addAtOffset, arg1, offset);
}
void testMatrixAssignAtOffset(int height, int width1, int width2, int offset) {
AutoCompare test(height, width2);
CpuMatrix arg1(height, width1);
arg1.randomizeUniform();
test.cmpWithArg(&Matrix::assignAtOffset, arg1, offset);
}
TEST(Matrix, AtOffset) {
for (auto height : {1, 11, 73, 128, 200}) {
for (auto width1 : {1, 32, 100, 512, 1000}) {
for (auto width2 : {1, 32, 100, 512, 1000}) {
int columnOffset = 0;
int offset = std::abs(width1 - width2);
if (offset) {
columnOffset = std::rand() % offset;
}
VLOG(3) << " height=" << height << " width1=" << width1
<< " width2=" << width2 << " columnOffset = " << columnOffset;
testMatrixAddAtOffset(height, width1, width2, columnOffset);
testMatrixAssignAtOffset(height, width1, width2, columnOffset);
}
}
}
}
void testMatrixSelectRows(int numSamples, int tableSize, int inputDim) {
AutoCompare test(numSamples, inputDim);
CpuMatrix arg1(tableSize, inputDim);
CpuIVector arg2(numSamples);
arg1.randomizeUniform();
arg2.rand(tableSize);
test.cmpWithArg(&Matrix::selectRows, arg1, arg2);
}
TEST(Matrix, tableProjection) {
for (auto numSamples : {10, 100, 1000, 10000, 80000}) {
for (auto tableSize : {10, 100}) {
for (auto inputDim : {20, 50}) {
VLOG(3) << " numSamples=" << numSamples << " tableSize=" << tableSize
<< " inputDim=" << inputDim;
testMatrixSelectRows(numSamples, tableSize, inputDim);
}
}
}
}
void testMatrixCopyByRowIndex(int outHeight, int inHeight, int width) {
AutoCompare test(outHeight, width);
CpuMatrix arg1(inHeight, width);
CpuIVector arg2(outHeight);
arg1.randomizeUniform();
arg2.rand(inHeight);
test.cmpWithArg(&Matrix::copyByRowIndex, arg1, arg2);
}
TEST(Matrix, copyByRowIndex) {
for (auto outHeight : {31, 500, 1000}) {
for (auto inHeight : {17, 257, 500, 1200}) {
for (auto width : {512, 1024}) {
VLOG(3) << outHeight << " " << inHeight << " " << width;
testMatrixCopyByRowIndex(outHeight, inHeight, width);
}
}
}
}
void testCosSim(int heightX, int heightY, int width, real scale) {
AutoCompare test(heightX, 1);
CpuMatrix arg1(heightX, width);
CpuMatrix arg2(heightY, width);
arg1.randomizeUniform();
arg2.randomizeUniform();
arg2.add(-0.5);
test.cmpWithArg(&Matrix::cosSim, arg1, arg2, scale);
}
TEST(Matrix, cosSim) {
for (auto heightX : {10, 100, 1000}) {
for (auto heightY : {1, heightX}) {
for (auto width : {10, 100, 1000}) {
for (auto scale : {1.0, 2.0}) {
testCosSim(heightX, heightY, width, scale);
}
}
}
}
}
void testParamReluForward(int height, int width, int w_height, int w_width) {
AutoCompare test(height, width);
CpuMatrix arg1(height, width);
CpuMatrix arg2(w_height, w_width);
arg1.randomizeUniform();
arg2.randomizeUniform();
arg1.add(-0.5);
test.cmpWithArg(&Matrix::paramReluForward, arg1, arg2);
}
void testParamReluBackwardW(int height, int width, int w_height, int w_width) {
AutoCompare test(w_height, w_width);
CpuMatrix arg1(height, width);
CpuMatrix arg2(height, width);
arg1.randomizeUniform();
arg2.randomizeUniform();
arg2.add(-0.5);
test.cmpWithArg(&Matrix::paramReluBackwardW, arg1, arg2);
}
TEST(Matrix, paramRelu) {
for (auto height : {10, 100}) {
for (auto width : {10, 100}) {
for (auto w_height : {1, 2}) {
for (auto w_width : {1, 2}) {
testParamReluForward(height, width, w_height, w_width);
testParamReluBackwardW(height, width, w_height, w_width);
}
}
}
}
}
void testAddSharedBias(int numSamples, int dim, int channel) {
AutoCompare test(numSamples, dim);
CpuMatrix arg1(1, channel);
arg1.randomizeUniform();
test.cmpWithArg(&Matrix::addSharedBias, arg1, 1.0);
}
void testCollectSharedBias(int numSamples, int dim, int channel) {
AutoCompare test(1, channel);
CpuMatrix arg1(numSamples, dim);
arg1.randomizeUniform();
test.cmpWithArg(&Matrix::collectSharedBias, arg1, 1.0);
}
TEST(Matrix, sharedBias) {
for (auto numSamples : {1, 100, 520}) {
for (auto dim : {100 * 16, 100 * 32}) {
for (auto channel : {8, 16}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim
<< " channel=" << channel;
testAddSharedBias(numSamples, dim, channel);
testCollectSharedBias(numSamples, dim, channel);
}
}
}
}
void testMultiBinaryLabelCrossEntropy(int numSamples, int dim) {
AutoCompare forward(numSamples, 1);
CpuMatrix arg1(numSamples, dim);
CpuSparseMatrix arg2(
numSamples, dim, numSamples, paddle::NO_VALUE, paddle::SPARSE_CSR);
CpuMatrix output1(numSamples, dim);
output1.randomizeUniform();
output1.softmax(arg1);
for (int i = 0; i < numSamples; i++) {
const unsigned int id = std::rand() % dim;
arg2.setRow(i, 1, &id, nullptr);
}
forward.cmpWithArg(&Matrix::multiBinaryLabelCrossEntropy, arg1, arg2);
AutoCompare backward(numSamples, dim);
backward.cmpWithArg(&Matrix::multiBinaryLabelCrossEntropyBp, arg1, arg2);
}
TEST(Matrix, multiBinaryCrossEntropy) {
for (auto numSamples : {100, 1000, 10000}) {
for (auto dim : {100, 1000, 10000}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim;
testMultiBinaryLabelCrossEntropy(numSamples, dim);
}
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
paddle::initMain(argc, argv);
return RUN_ALL_TESTS();
}
#endif
......@@ -22,163 +22,12 @@ limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/gserver/tests/TestUtil.h"
#include "paddle/utils/Stat.h"
#include "TensorCheck.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
template <class T>
void VectorCheckEqual(const VectorT<T>& vector1, const VectorT<T>& vector2) {
CHECK(vector1.getSize() == vector2.getSize());
const T* data1 = vector1.getData();
const T* data2 = vector2.getData();
size_t size = vector1.getSize();
int count = 0;
for (size_t i = 0; i < size; i++) {
if (data1[i] != data2[i]) {
count++;
}
}
EXPECT_EQ(count, 0) << "There are " << count << " different element.";
}
void MatrixCheckEqual(const Matrix& matrix1, const Matrix& matrix2) {
CHECK(matrix1.getHeight() == matrix2.getHeight());
CHECK(matrix1.getWidth() == matrix2.getWidth());
int height = matrix1.getHeight();
int width = matrix1.getWidth();
const real* data1 = matrix1.getData();
const real* data2 = matrix2.getData();
int count = 0;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (data1[i * width + j] != data2[i * width + j]) {
count++;
}
}
}
EXPECT_EQ(count, 0) << "There are " << count << " different element.";
}
void MatrixCheckErr(const Matrix& matrix1, const Matrix& matrix2) {
CHECK(matrix1.getHeight() == matrix2.getHeight());
CHECK(matrix1.getWidth() == matrix2.getWidth());
#ifndef PADDLE_TYPE_DOUBLE
real err = 1e-3;
#else
real err = 1e-10;
#endif
int height = matrix1.getHeight();
int width = matrix1.getWidth();
const real* data1 = matrix1.getData();
const real* data2 = matrix2.getData();
int count = 0;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
real a = data1[i * width + j];
real b = data2[i * width + j];
if (fabs(a - b) > err) {
if ((fabsf(a - b) / fabsf(a)) > (err / 10.0f)) {
count++;
}
}
}
}
EXPECT_EQ(count, 0) << "There are " << count << " different element.";
}
void testBilinearFwdBwd(int numSamples,
int imgSizeH,
int imgSizeW,
int channels) {
int inWidth = imgSizeH * imgSizeW * channels;
int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels;
real ratioH = 0.5;
real ratioW = 0.5;
// forward
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
input->randomizeUniform();
inputGpu->copyFrom(*input);
target->bilinearForward(*input,
imgSizeH,
imgSizeW,
2 * imgSizeH,
2 * imgSizeW,
channels,
ratioH,
ratioW);
targetGpu->bilinearForward(*inputGpu,
imgSizeH,
imgSizeW,
2 * imgSizeH,
2 * imgSizeW,
channels,
ratioH,
ratioW);
// check
targetCheck->copyFrom(*targetGpu);
MatrixCheckErr(*target, *targetCheck);
// backward
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad =
GpuMatrix::create(numSamples, outWidth, false, true);
MatrixPtr targetCheckGrad =
CpuMatrix::create(numSamples, inWidth, false, false);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
inputGpuGrad->copyFrom(*inputGrad);
targetGpuGrad->copyFrom(*targetGrad);
inputGrad->bilinearBackward(*targetGrad,
2 * imgSizeH,
2 * imgSizeW,
imgSizeH,
imgSizeW,
channels,
ratioH,
ratioW);
inputGpuGrad->bilinearBackward(*targetGpuGrad,
2 * imgSizeH,
2 * imgSizeW,
imgSizeH,
imgSizeW,
channels,
ratioH,
ratioW);
// check
targetCheckGrad->copyFrom(*inputGpuGrad);
MatrixCheckErr(*inputGrad, *targetCheckGrad);
}
TEST(Matrix, BilinearFwdBwd) {
for (auto numSamples : {5, 10}) {
for (auto channels : {8, 16}) {
for (auto imgSizeH : {14, 28}) {
for (auto imgSizeW : {16, 30}) {
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW;
testBilinearFwdBwd(numSamples, imgSizeH, imgSizeW, channels);
}
}
}
}
}
using autotest::TensorCheckEqual;
using autotest::TensorCheckErr;
void testMatrixProjectionForward(int contextStart,
int contextLength,
......@@ -232,12 +81,7 @@ void testMatrixProjectionForward(int contextStart,
beginPad,
padding);
// check
MatrixPtr outputCheck =
std::make_shared<CpuMatrix>(batchSize, inputDim * contextLength);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
TensorCheckEqual(*cpuOutput, *gpuOutput);
}
void testMatrixProjectionBackward(int contextStart,
......@@ -294,15 +138,9 @@ void testMatrixProjectionBackward(int contextStart,
beginPad);
}
// check
MatrixPtr inputGradCheck = std::make_shared<CpuMatrix>(batchSize, inputDim);
inputGradCheck->copyFrom(*gpuInputGrad);
MatrixCheckErr(*cpuInputGrad, *inputGradCheck);
TensorCheckErr(*cpuInputGrad, *gpuInputGrad);
if (padding) {
MatrixPtr weightGradChcek = std::make_shared<CpuMatrix>(pad, inputDim);
weightGradChcek->copyFrom(*gpuWeightGrad);
MatrixCheckErr(*cpuWeightGrad, *weightGradChcek);
TensorCheckErr(*cpuWeightGrad, *gpuWeightGrad);
}
}
......@@ -361,15 +199,8 @@ void testMatrixMaxSequence(int batchSize, int inputDim) {
cpuOutput->maxSequenceForward(*cpuInput, *cpuSequence, *cpuIndex);
gpuOutput->maxSequenceForward(*gpuInput, *gpuSequence, *gpuIndex);
// check
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(newBatchSize, inputDim);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
IVectorPtr indexCheck = nullptr;
IVector::resizeOrCreate(indexCheck, newBatchSize * inputDim, false);
indexCheck->copyFrom(*gpuIndex);
VectorCheckEqual(*cpuIndex, *indexCheck);
TensorCheckEqual(*cpuOutput, *gpuOutput);
TensorCheckEqual(*cpuIndex, *gpuIndex);
// backward
MatrixPtr cpuOutputGrad = std::make_shared<CpuMatrix>(newBatchSize, inputDim);
......@@ -385,10 +216,7 @@ void testMatrixMaxSequence(int batchSize, int inputDim) {
cpuInputGrad->maxSequenceBackward(*cpuOutputGrad, *cpuSequence, *cpuIndex);
gpuInputGrad->maxSequenceBackward(*gpuOutputGrad, *gpuSequence, *gpuIndex);
// check
MatrixPtr inputGradCheck = std::make_shared<CpuMatrix>(batchSize, inputDim);
inputGradCheck->copyFrom(*gpuInputGrad);
MatrixCheckEqual(*cpuInputGrad, *inputGradCheck);
TensorCheckEqual(*cpuInputGrad, *gpuInputGrad);
}
TEST(Matrix, maxSequence) {
......@@ -431,6 +259,8 @@ void testMatrixZeroAtOffset(int height, int width) {
int columnOffset = rand() % width; // NOLINT we just use rand() for test.
int numColumns = rand() % (width - columnOffset); // NOLINT
if (numColumns == 0) return;
cpuA->zeroAtOffset(columnOffset, numColumns);
gpuA->zeroAtOffset(columnOffset, numColumns);
......@@ -442,10 +272,8 @@ void testMatrixZeroAtOffset(int height, int width) {
}
}
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckEqual(*cpuA, *outputCheck);
MatrixCheckEqual(*cpuA, *cpuTest);
TensorCheckEqual(*cpuA, *gpuA);
TensorCheckEqual(*cpuA, *cpuTest);
}
void testMatrixDeepSwap(int height, int width) {
......@@ -462,303 +290,8 @@ void testMatrixDeepSwap(int height, int width) {
// swap matrix cpuA and cpuB
cpuA->deepSwap(*cpuB);
MatrixCheckEqual(*cpuA, *cpuCopyB);
MatrixCheckEqual(*cpuB, *cpuCopyA);
}
void testMatrixBinaryAdd(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
cpuA->add(*cpuB);
gpuA->add(*gpuB);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckEqual(*cpuA, *outputCheck);
}
void testMatrixAssign(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
gpuA->copyFrom(*cpuA);
cpuA->assign(2.5);
gpuA->assign(2.5);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckEqual(*cpuA, *outputCheck);
}
void testMatrixAdd(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
gpuA->copyFrom(*cpuA);
cpuA->add(2.5);
gpuA->add(2.5);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckEqual(*cpuA, *outputCheck);
}
void testMatrixSqrt(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
gpuA->copyFrom(*cpuA);
cpuA->sqrt();
gpuA->sqrt();
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixTanhDerivative(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
cpuA->tanhDerivative(*cpuB);
gpuA->tanhDerivative(*gpuB);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixTanh(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
cpuA->tanh(*cpuB);
gpuA->tanh(*gpuB);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixTernarySub(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuC = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
cpuC->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
cpuA->sub(*cpuB, *cpuC);
gpuA->sub(*gpuB, *gpuC);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckEqual(*cpuA, *outputCheck);
}
void testMatrixSumOfSquaresBp(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuC = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
cpuC->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
cpuA->sumOfSquaresBp(*cpuB, *cpuC);
gpuA->sumOfSquaresBp(*gpuB, *gpuC);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixBinaryRowScale(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, 1);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, 1);
MatrixPtr cpuA1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB1 = std::make_shared<CpuMatrix>(height, 1);
MatrixPtr gpuA1 = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB1 = std::make_shared<GpuMatrix>(height, 1);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
cpuA1->copyFrom(*cpuA);
cpuB1->copyFrom(*cpuB);
gpuA1->copyFrom(*cpuA);
gpuB1->copyFrom(*cpuB);
cpuA->addColVector(*cpuB);
gpuA->addColVector(*gpuB);
cpuA1->addColumnVector(*cpuB1);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckEqual(*cpuA, *outputCheck);
MatrixCheckEqual(*cpuA, *cpuA1);
}
void testMatrixAddBias(int height, int width, real scale) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(1, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(1, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
cpuA->addBias(*cpuB, scale);
gpuA->addBias(*gpuB, scale);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixTernaryRowScale(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuC = std::make_shared<GpuMatrix>(height, width);
MatrixPtr cpuA1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC1 = std::make_shared<CpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
cpuC->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
cpuA1->copyFrom(*cpuA);
cpuB1->copyFrom(*cpuB);
cpuC1->copyFrom(*cpuC);
int columnOffset = rand() % width; // NOLINT
cpuA->rowScale(columnOffset, *cpuB, *cpuC);
gpuA->rowScale(columnOffset, *gpuB, *gpuC);
cpuA1->rowScale2(columnOffset, *cpuB1, *cpuC1);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckEqual(*cpuA, *outputCheck);
MatrixCheckEqual(*cpuA, *cpuA1);
}
void testMatrixTernaryRowDotMul(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuA1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuC = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
cpuC->randomizeUniform();
cpuA1->copyFrom(*cpuA);
cpuB1->copyFrom(*cpuB);
cpuC1->copyFrom(*cpuC);
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
int columnOffset = rand() % width; // NOLINT
cpuA->rowDotMul(columnOffset, *cpuB, *cpuC);
gpuA->rowDotMul(columnOffset, *gpuB, *gpuC);
cpuA1->rowDotMul2(columnOffset, *cpuB1, *cpuC1);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *cpuA1);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixAddDotMulMMV(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC = std::make_shared<CpuMatrix>(1, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuC = std::make_shared<GpuMatrix>(1, width);
MatrixPtr cpuA1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC1 = std::make_shared<CpuMatrix>(1, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
cpuC->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
cpuA1->copyFrom(*cpuA);
cpuB1->copyFrom(*cpuB);
cpuC1->copyFrom(*cpuC);
cpuA->addDotMulMMV(*cpuB, *cpuC);
gpuA->addDotMulMMV(*gpuB, *gpuC);
cpuA1->addDotMulMMV2(*cpuB1, *cpuC1);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
MatrixCheckEqual(*cpuA, *cpuA1);
TensorCheckEqual(*cpuA, *cpuCopyB);
TensorCheckEqual(*cpuB, *cpuCopyA);
}
void testMatrixTranspose(int height, int width) {
......@@ -772,9 +305,7 @@ void testMatrixTranspose(int height, int width) {
cpu->transpose(cpuT, false);
gpu->transpose(gpuT, false);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(width, height);
outputCheck->copyFrom(*gpuT);
MatrixCheckEqual(*cpuT, *outputCheck);
TensorCheckEqual(*cpuT, *gpuT);
}
void testMatrixInverse(int height) {
......@@ -795,530 +326,127 @@ void testMatrixInverse(int height) {
cpu->inverse(cpuI, false);
gpu->inverse(gpuI, false);
outputCheck->copyFrom(*gpuI);
MatrixCheckErr(*cpuI, *outputCheck);
TensorCheckErr(*cpuI, *gpuI);
outputCheck->mul(cpu, cpuI);
cpu->setDiag(1.0);
MatrixCheckErr(*cpu, *outputCheck);
}
TEST(Matrix, unary) {
for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
VLOG(3) << " height=" << height << " width=" << width;
// applyUnary
testMatrixAssign(height, width);
testMatrixAdd(height, width);
testMatrixSqrt(height, width);
// applyBinary
testMatrixBinaryAdd(height, width);
testMatrixTanh(height, width);
testMatrixTanhDerivative(height, width);
testMatrixDeepSwap(height, width);
// applyTernary
testMatrixTernarySub(height, width);
testMatrixSumOfSquaresBp(height, width);
// asRowVector
testMatrixAddBias(height, width, 1.0);
testMatrixAddBias(height, width, 3.5);
testMatrixAddDotMulMMV(height, width);
// asColVector
testMatrixTernaryRowScale(height, width);
testMatrixBinaryRowScale(height, width);
// sum
testMatrixGetSum(height, width);
// transpose
testMatrixTranspose(height, width);
}
// inverse
testMatrixInverse(height);
}
}
void testMatrixSoftmax(int height, int width) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(height, width);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
cpuOutput->zero();
gpuOutput->zero();
cpuInput->softmax(*cpuOutput);
gpuInput->softmax(*gpuOutput);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckErr(*cpuOutput, *outputCheck);
}
void testSequenceSoftmax(int batchSize) {
// forward
int inputDim = 1;
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(batchSize, inputDim);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(batchSize, inputDim);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
IVectorPtr cpuSequence;
generateSequenceStartPositions(batchSize, cpuSequence);
IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true);
gpuSequence->copyFrom(*cpuSequence);
cpuInput->sequenceSoftmax(*cpuInput, *cpuSequence);
gpuInput->sequenceSoftmax(*gpuInput, *gpuSequence);
// check
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(batchSize, inputDim);
outputCheck->copyFrom(*gpuInput);
MatrixCheckErr(*cpuInput, *outputCheck);
}
void testMatrixSoftmaxThreshold(int height, int width) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(height, width);
cpuInput->randomizeUniform();
cpuInput->getData()[0] = 100.0;
gpuInput->copyFrom(*cpuInput);
cpuOutput->zero();
gpuOutput->zero();
cpuInput->softmax(*cpuOutput);
gpuInput->softmax(*gpuOutput);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuOutput);
// check output zero
int cpuCount = 0;
int gpuCount = 0;
auto zeroNum = [](MatrixPtr out, int& count) {
for (size_t i = 0; i < out->getHeight(); i++) {
for (size_t j = 0; j < out->getWidth(); j++) {
if (out->getElement(i, j) == 0) count++;
}
}
};
zeroNum(cpuOutput, cpuCount);
zeroNum(outputCheck, gpuCount);
EXPECT_EQ(cpuCount, 0) << "Cpu softmax output value 0";
EXPECT_EQ(gpuCount, 0) << "Gpu softmax output value 0";
}
void testMatrixSoftmaxBp(int height, int width) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(height, width);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
cpuOutput->randomizeUniform();
gpuOutput->copyFrom(*cpuOutput);
gpuOutput->softmaxBackward(*gpuInput);
MatrixPtr sftMaxSum = std::make_shared<CpuMatrix>(height, 1);
MatrixPtr sftMaxDot = std::make_shared<CpuMatrix>(height, width);
sftMaxDot->dotMul(*cpuOutput, *cpuInput);
sftMaxSum->colMerge(*sftMaxDot);
cpuOutput->softmaxDerivative(*cpuInput, *sftMaxSum);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckErr(*cpuOutput, *outputCheck);
}
TEST(Matrix, softmax) {
for (auto height : {1, 11, 73, 128, 200}) {
for (auto width : {1, 32, 100, 512, 1000}) {
VLOG(3) << " height=" << height << " width=" << width;
testMatrixSoftmax(height, width);
testMatrixSoftmaxBp(height, width);
testMatrixSoftmaxThreshold(height, width);
}
testSequenceSoftmax(height);
}
}
void testMatrixAddDotMulVMM(int height, int width, int endCol = 0) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(1, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(1, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuC = std::make_shared<GpuMatrix>(height, width);
MatrixPtr cpuA1 = std::make_shared<CpuMatrix>(1, width);
MatrixPtr cpuB1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC1 = std::make_shared<CpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
cpuC->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
cpuA1->copyFrom(*cpuA);
cpuB1->copyFrom(*cpuB);
cpuC1->copyFrom(*cpuC);
if (!endCol) {
cpuA->addDotMulVMM(*cpuB, *cpuC);
gpuA->addDotMulVMM(*gpuB, *gpuC);
cpuA1->addDotMulVMM2(*cpuB1, *cpuC1);
MatrixCheckErr(*cpuA, *cpuA1);
} else {
MatrixPtr subCpuA = cpuA->subColMatrix(0, endCol);
MatrixPtr subCpuB = cpuB->subColMatrix(0, endCol);
MatrixPtr subCpuC = cpuC->subColMatrix(0, endCol);
MatrixPtr subGpuA = gpuA->subColMatrix(0, endCol);
MatrixPtr subGpuB = gpuB->subColMatrix(0, endCol);
MatrixPtr subGpuC = gpuC->subColMatrix(0, endCol);
subCpuA->addDotMulVMM(*subCpuB, *subCpuC);
subGpuA->addDotMulVMM(*subGpuB, *subGpuC);
}
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(1, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixRowSum(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, 1);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, 1);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
MatrixPtr cpuA1 = std::make_shared<CpuMatrix>(height, 1);
MatrixPtr cpuB1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA1 = std::make_shared<GpuMatrix>(height, 1);
MatrixPtr gpuB1 = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
cpuA1->copyFrom(*cpuA);
cpuB1->copyFrom(*cpuB);
gpuA1->copyFrom(*cpuA);
gpuB1->copyFrom(*cpuB);
cpuA->colMerge(*cpuB);
gpuA->colMerge(*gpuB);
cpuB1->rowSum(*cpuA1);
gpuB1->rowSum(*gpuA1);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, 1);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
outputCheck->copyFrom(*gpuA1);
MatrixCheckErr(*cpuA1, *outputCheck);
}
void testMatrixRowMax(int height, int width, int endCol = 0) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, 1);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, 1);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
if (!endCol) {
cpuB->rowMax(*cpuA);
gpuB->rowMax(*gpuA);
} else {
MatrixPtr subCpuB = cpuB->subColMatrix(0, endCol);
MatrixPtr subGpuB = gpuB->subColMatrix(0, endCol);
subCpuB->rowMax(*cpuA);
subGpuB->rowMax(*gpuA);
}
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, 1);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixColSum(int height, int width, int endCol = 0) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(1, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(1, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
if (!endCol) {
cpuA->accumulateColSum(*cpuB);
gpuA->accumulateColSum(*gpuB);
} else {
MatrixPtr subCpuA = cpuA->subColMatrix(0, endCol);
MatrixPtr subGpuA = gpuA->subColMatrix(0, endCol);
MatrixPtr subCpuB = cpuB->subColMatrix(0, endCol);
MatrixPtr subGpuB = gpuB->subColMatrix(0, endCol);
subCpuA->accumulateColSum(*subCpuB);
subGpuA->accumulateColSum(*subGpuB);
}
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(1, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixColMax(int height, int width, int endCol = 0) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(1, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(1, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
if (!endCol) {
cpuB->colMax(*cpuA);
gpuB->colMax(*gpuA);
} else {
MatrixPtr subCpuA = cpuA->subColMatrix(0, endCol);
MatrixPtr subGpuA = gpuA->subColMatrix(0, endCol);
MatrixPtr subCpuB = cpuB->subColMatrix(0, endCol);
MatrixPtr subGpuB = gpuB->subColMatrix(0, endCol);
subCpuB->colMax(*subCpuA);
subGpuB->colMax(*subGpuA);
}
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(1, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixCollectBias(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(1, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(1, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
real scale = 1.0f / (rand() % 10); // NOLINT
cpuA->collectBias(*cpuB, scale);
gpuA->collectBias(*gpuB, scale);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(1, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixSumOfSquares(int height, int width, int endCol = 0) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, 1);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, 1);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuC = std::make_shared<GpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
cpuC->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
if (!endCol) {
cpuA->sumOfSquares(*cpuB, *cpuC);
gpuA->sumOfSquares(*gpuB, *gpuC);
} else {
MatrixPtr subCpuB = cpuB->subColMatrix(0, endCol);
MatrixPtr subCpuC = cpuC->subColMatrix(0, endCol);
MatrixPtr subGpuB = gpuB->subColMatrix(0, endCol);
MatrixPtr subGpuC = gpuC->subColMatrix(0, endCol);
cpuA->sumOfSquares(*subCpuB, *subCpuC);
gpuA->sumOfSquares(*subGpuB, *subGpuC);
}
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, 1);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixBinaryClassificationError(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuC = std::make_shared<GpuMatrix>(height, width);
MatrixPtr cpuA2 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB2 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC2 = std::make_shared<CpuMatrix>(height, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
cpuC->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
cpuA2->copyFrom(*cpuA);
cpuB2->copyFrom(*cpuB);
cpuC2->copyFrom(*cpuC);
real scale = 0.5;
int columnOffset = rand() % width; // NOLINT
cpuA->binaryClassificationError(columnOffset, *cpuB, *cpuC, scale);
gpuA->binaryClassificationError(columnOffset, *gpuB, *gpuC, scale);
cpuA2->binaryClassificationError2(columnOffset, *cpuB2, *cpuC2, scale);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
MatrixCheckErr(*cpuA, *cpuA2);
}
TEST(Matrix, aggregate) {
for (auto height : {1, 11, 16, 32, 64, 73, 128, 200, 1024, 2345}) {
for (auto width : {1, 9, 16, 32, 64, 100, 512, 1000, 1024, 2453}) {
VLOG(3) << " height=" << height << " width=" << width;
testMatrixRowSum(height, width);
testMatrixRowMax(height, width);
testMatrixColSum(height, width);
testMatrixColMax(height, width);
testMatrixCollectBias(height, width);
testMatrixTernaryRowDotMul(height, width);
testMatrixAddDotMulVMM(height, width);
testMatrixSumOfSquares(height, width);
testMatrixBinaryClassificationError(height, width);
}
}
cpu->setDiag(1.0);
TensorCheckErr(*cpu, *outputCheck);
}
TEST(Matrix, aggregate2) {
for (auto height : {16, 32, 128, 512, 1024}) {
for (auto width :
{16, 32, 64, 128, 256, 512, 768, 1024, 2048, 3072, 4096}) {
TEST(Matrix, unary) {
for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
VLOG(3) << " height=" << height << " width=" << width;
int endCol = rand() % width; // NOLINT
testMatrixRowMax(height, width, endCol);
testMatrixSumOfSquares(height, width, endCol);
testMatrixColSum(height, width, endCol);
testMatrixColMax(height, width, endCol);
testMatrixAddDotMulVMM(height, width, endCol);
testMatrixDeepSwap(height, width);
testMatrixZeroAtOffset(height, width);
testMatrixGetSum(height, width);
testMatrixTranspose(height, width);
}
// inverse
testMatrixInverse(height);
}
}
void testMatrixAddAtOffset(int height, int width1, int width2) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width1);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(height, width2);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width1);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(height, width2);
void testMatrixSoftmax(int height, int width) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(height, width);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
cpuOutput->randomizeUniform();
gpuOutput->copyFrom(*cpuOutput);
int columnOffset = 0;
int offset = std::abs(width1 - width2);
if (offset) {
columnOffset = rand() % offset; // NOLINT
}
cpuOutput->addAtOffset(*cpuInput, columnOffset);
gpuOutput->addAtOffset(*gpuInput, columnOffset);
cpuOutput->zero();
gpuOutput->zero();
cpuInput->softmax(*cpuOutput);
gpuInput->softmax(*gpuOutput);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width2);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
TensorCheckErr(*cpuOutput, *gpuOutput);
}
void testMatrixAssignAtOffset(int height, int width1, int width2) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width1);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(height, width2);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width1);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(height, width2);
void testSequenceSoftmax(int batchSize) {
// forward
int inputDim = 1;
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(batchSize, inputDim);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(batchSize, inputDim);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
cpuOutput->randomizeUniform();
gpuOutput->copyFrom(*cpuOutput);
int columnOffset = 0;
int offset = std::abs(width1 - width2);
if (offset) {
columnOffset = rand() % offset; // NOLINT
}
cpuOutput->assignAtOffset(*cpuInput, columnOffset);
gpuOutput->assignAtOffset(*gpuInput, columnOffset);
IVectorPtr cpuSequence;
generateSequenceStartPositions(batchSize, cpuSequence);
IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true);
gpuSequence->copyFrom(*cpuSequence);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width2);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
cpuInput->sequenceSoftmax(*cpuInput, *cpuSequence);
gpuInput->sequenceSoftmax(*gpuInput, *gpuSequence);
TensorCheckErr(*cpuInput, *gpuInput);
}
TEST(Matrix, AtOffset) {
for (auto height : {1, 11, 73, 128, 200}) {
for (auto width1 : {1, 32, 100, 512, 1000}) {
for (auto width2 : {1, 32, 100, 512, 1000}) {
VLOG(3) << " height=" << height << " width1=" << width1
<< " width2=" << width2;
void testMatrixSoftmaxThreshold(int height, int width) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(height, width);
cpuInput->randomizeUniform();
cpuInput->getData()[0] = 100.0;
gpuInput->copyFrom(*cpuInput);
cpuOutput->zero();
gpuOutput->zero();
cpuInput->softmax(*cpuOutput);
gpuInput->softmax(*gpuOutput);
testMatrixAddAtOffset(height, width1, width2);
testMatrixAssignAtOffset(height, width1, width2);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuOutput);
// check output zero
int cpuCount = 0;
int gpuCount = 0;
auto zeroNum = [](MatrixPtr out, int& count) {
for (size_t i = 0; i < out->getHeight(); i++) {
for (size_t j = 0; j < out->getWidth(); j++) {
if (out->getElement(i, j) == 0) count++;
}
}
}
};
zeroNum(cpuOutput, cpuCount);
zeroNum(outputCheck, gpuCount);
EXPECT_EQ(cpuCount, 0) << "Cpu softmax output value 0";
EXPECT_EQ(gpuCount, 0) << "Gpu softmax output value 0";
}
void testMatrixSelectRows(int numSamples, int tableSize, int inputDim) {
MatrixPtr cpuTable = std::make_shared<CpuMatrix>(tableSize, inputDim);
MatrixPtr gpuTable = std::make_shared<GpuMatrix>(tableSize, inputDim);
cpuTable->randomizeUniform();
gpuTable->copyFrom(*cpuTable);
IVectorPtr cpuIds;
IVectorPtr gpuIds;
cpuIds = VectorT<int>::create(numSamples, false);
gpuIds = VectorT<int>::create(numSamples, true);
cpuIds->rand(tableSize);
gpuIds->copyFrom(*cpuIds);
void testMatrixSoftmaxBp(int height, int width) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(height, width);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(numSamples, inputDim);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(numSamples, inputDim);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
cpuOutput->randomizeUniform();
gpuOutput->copyFrom(*cpuOutput);
gpuOutput->softmaxBackward(*gpuInput);
cpuOutput->selectRows(*cpuTable, *cpuIds);
gpuOutput->selectRows(*gpuTable, *gpuIds);
MatrixPtr sftMaxSum = std::make_shared<CpuMatrix>(height, 1);
MatrixPtr sftMaxDot = std::make_shared<CpuMatrix>(height, width);
sftMaxDot->dotMul(*cpuOutput, *cpuInput);
sftMaxSum->colMerge(*sftMaxDot);
cpuOutput->softmaxDerivative(*cpuInput, *sftMaxSum);
// check
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(numSamples, inputDim);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
TensorCheckErr(*cpuOutput, *gpuOutput);
}
TEST(Matrix, softmax) {
for (auto height : {1, 11, 73, 128, 200}) {
for (auto width : {1, 32, 100, 512, 1000}) {
VLOG(3) << " height=" << height << " width=" << width;
testMatrixSoftmax(height, width);
testMatrixSoftmaxBp(height, width);
testMatrixSoftmaxThreshold(height, width);
}
testSequenceSoftmax(height);
}
}
void testMatrixAddToRows(int numSamples, int tableSize, int inputDim) {
......@@ -1342,10 +470,7 @@ void testMatrixAddToRows(int numSamples, int tableSize, int inputDim) {
cpuOutput->addToRows(*cpuTable, *cpuIds);
gpuOutput->addToRows(*gpuTable, *gpuIds);
// check
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(tableSize, inputDim);
outputCheck->copyFrom(*gpuTable);
MatrixCheckErr(*cpuTable, *outputCheck);
TensorCheckErr(*cpuTable, *gpuTable);
}
TEST(Matrix, tableProjection) {
......@@ -1354,7 +479,6 @@ TEST(Matrix, tableProjection) {
for (auto inputDim : {20, 50}) {
VLOG(3) << " numSamples=" << numSamples << " tableSize=" << tableSize
<< " inputDim=" << inputDim;
testMatrixSelectRows(numSamples, tableSize, inputDim);
testMatrixAddToRows(numSamples, tableSize, inputDim);
}
}
......@@ -1388,9 +512,7 @@ void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
cpuC->mul(cpuA, cpuB, alpha, beta);
gpuC->mul(gpuA, gpuB, alpha, beta);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(heightC, widthC);
outputCheck->copyFrom(*gpuC);
MatrixCheckErr(*cpuC, *outputCheck);
TensorCheckErr(*cpuC, *gpuC);
}
void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
......@@ -1462,9 +584,7 @@ void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
subCpuC->mul(subCpuA, subCpuB, alpha, beta);
subGpuC->mul(subGpuA, subGpuB, alpha, beta);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(heightC, widthC);
outputCheck->copyFrom(*gpuC);
MatrixCheckErr(*cpuC, *outputCheck);
TensorCheckErr(*cpuC, *gpuC);
}
TEST(Matrix, mul) {
......@@ -1518,9 +638,7 @@ void testVectorReset(int size) {
cpu->reset(value);
gpu->reset(value);
std::shared_ptr<CpuVectorT<T>> out = std::make_shared<CpuVectorT<T>>(size);
out->copyFrom(*gpu);
VectorCheckEqual(*cpu, *out);
TensorCheckEqual(*cpu, *gpu);
}
template <class T>
......@@ -1546,9 +664,7 @@ void testVecortSelectFrom(int size) {
cpuDst->selectFrom(*cpuSrc, *cpuIds);
gpuDst->selectFrom(*gpuSrc, *gpuIds);
std::shared_ptr<CpuVectorT<T>> out = std::make_shared<CpuVectorT<T>>(size);
out->copyFrom(*gpuDst);
VectorCheckEqual(*cpuDst, *out);
TensorCheckEqual(*cpuDst, *gpuDst);
}
template <class T>
......@@ -1559,9 +675,7 @@ void testVecotrZeroMem(int size) {
cpu->zeroMem();
gpu->zeroMem();
std::shared_ptr<CpuVectorT<T>> out = std::make_shared<CpuVectorT<T>>(size);
out->copyFrom(*gpu);
VectorCheckEqual(*cpu, *out);
TensorCheckEqual(*cpu, *gpu);
}
template <class T>
......@@ -1582,9 +696,7 @@ void testVectorIsEqual(int size) {
cpuA->isEqualTo(*cpuB, value);
gpuA->isEqualTo(*gpuB, value);
std::shared_ptr<CpuVectorT<T>> out = std::make_shared<CpuVectorT<T>>(size);
out->copyFrom(*gpuA);
VectorCheckEqual(*cpuA, *out);
TensorCheckEqual(*cpuA, *gpuA);
}
TEST(Vector, Equal) {
......@@ -1615,9 +727,7 @@ void testMatrixTopK(int samples, int dim, int beamSize) {
cpuSrc->rowMax(*cpuIds, *cpuVal);
gpuSrc->rowMax(*gpuIds, *gpuVal);
MatrixPtr outVal = std::make_shared<CpuMatrix>(samples, beamSize);
outVal->copyFrom(*gpuVal);
MatrixCheckEqual(*cpuVal, *outVal);
TensorCheckEqual(*cpuVal, *gpuVal);
}
TEST(Matrix, topK) {
......@@ -1653,9 +763,7 @@ void testSMatrixTopK(int samples, int dim, int beamSize, real ratio) {
cpuSrc->rowMax(*cpuIds, *cpuVal);
gpuSrc->rowMax(*gpuIds, *gpuVal);
MatrixPtr outCheckMaxVal = std::make_shared<CpuMatrix>(samples, beamSize);
outCheckMaxVal->copyFrom(*gpuVal);
MatrixCheckEqual(*cpuVal, *outCheckMaxVal);
TensorCheckEqual(*cpuVal, *gpuVal);
IVectorPtr outCheckIds = std::make_shared<CpuIVector>(samples * beamSize);
outCheckIds->copyFrom(*gpuIds);
......@@ -1685,42 +793,6 @@ TEST(SMatrix, topK) {
}
}
void testMatrixCopyByRowIndex(int outHeight, int inHeight, int width) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(inHeight, width);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(inHeight, width);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(outHeight, width);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(outHeight, width);
cpuOutput->zero();
gpuOutput->zero();
IVectorPtr cpuRowIndex = IVector::create(outHeight, false);
IVectorPtr gpuRowIndex = IVector::create(outHeight, true);
cpuRowIndex->rand(inHeight);
gpuRowIndex->copyFrom(*cpuRowIndex);
cpuOutput->copyByRowIndex(*cpuInput, *cpuRowIndex);
gpuOutput->copyByRowIndex(*gpuInput, *gpuRowIndex);
// check
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(outHeight, width);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
}
TEST(Matrix, copyByRowIndex) {
for (auto outHeight : {31, 500, 1000}) {
for (auto inHeight : {17, 257, 500, 1200}) {
for (auto width : {512, 1024}) {
VLOG(3) << outHeight << " " << inHeight << " " << width;
testMatrixCopyByRowIndex(outHeight, inHeight, width);
}
}
}
}
void testMatrixSequenceAvgForward(int batchSize, int inputDim, int mode) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(batchSize, inputDim);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(batchSize, inputDim);
......@@ -1741,10 +813,7 @@ void testMatrixSequenceAvgForward(int batchSize, int inputDim, int mode) {
cpuOutput->sequenceAvgForward(*cpuInput, *cpuSequence, mode);
gpuOutput->sequenceAvgForward(*gpuInput, *gpuSequence, mode);
// check
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(newBatchSize, inputDim);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckErr(*cpuOutput, *outputCheck);
TensorCheckErr(*cpuOutput, *gpuOutput);
}
TEST(Matrix, sequenceAvgForward) {
......@@ -1759,45 +828,6 @@ TEST(Matrix, sequenceAvgForward) {
}
}
void testCosSim(int heightX, int heightY, int width, real scale) {
MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false);
MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false);
MatrixPtr output = CpuMatrix::create(heightX, 1, false, false);
prevOutX->randomizeUniform();
prevOutY->randomizeUniform();
prevOutX->add(-0.5);
prevOutY->add(-0.5);
output->randomizeUniform();
MatrixPtr prevOutXGpu = GpuMatrix::create(heightX, width, false, true);
MatrixPtr prevOutYGpu = GpuMatrix::create(heightY, width, false, true);
MatrixPtr outputGpu = GpuMatrix::create(heightX, 1, false, true);
prevOutXGpu->copyFrom(*prevOutX);
prevOutYGpu->copyFrom(*prevOutY);
outputGpu->copyFrom(*output);
output->cosSim(*prevOutX, *prevOutY, scale);
outputGpu->cosSim(*prevOutXGpu, *prevOutYGpu, scale);
MatrixPtr outputCheck = CpuMatrix::create(heightX, 1, false, false);
outputCheck->copyFrom(*outputGpu);
MatrixCheckErr(*output, *outputCheck);
}
TEST(Matrix, cosSim) {
for (auto heightX : {10, 100, 1000}) {
for (auto heightY : {1, heightX}) {
for (auto width : {10, 100, 1000}) {
for (auto scale : {1.0, 2.0}) {
testCosSim(heightX, heightY, width, scale);
}
}
}
}
}
void testCosSimDerivate(int heightX, int heightY, int width, real scale) {
MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false);
MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false);
......@@ -1837,12 +867,8 @@ void testCosSimDerivate(int heightX, int heightY, int width, real scale) {
*prevGradYGpu,
scale);
MatrixPtr prevGradXCheck = CpuMatrix::create(heightX, width, false, false);
MatrixPtr prevGradYCheck = CpuMatrix::create(heightY, width, false, false);
prevGradXCheck->copyFrom(*prevGradXGpu);
prevGradYCheck->copyFrom(*prevGradYGpu);
MatrixCheckErr(*prevGradX, *prevGradXCheck);
MatrixCheckErr(*prevGradY, *prevGradYCheck);
TensorCheckErr(*prevGradX, *prevGradXGpu);
TensorCheckErr(*prevGradY, *prevGradYGpu);
}
TEST(Matrix, cosSimDerivate) {
......@@ -1857,80 +883,6 @@ TEST(Matrix, cosSimDerivate) {
}
}
void testParamReluForward(int height, int width, int w_height, int w_width) {
MatrixPtr output = CpuMatrix::create(height, width, false, false);
MatrixPtr input = CpuMatrix::create(height, width, false, false);
MatrixPtr w = CpuMatrix::create(w_height, w_width, false, false);
output->randomizeUniform();
input->randomizeUniform();
w->randomizeUniform();
input->add(-0.5);
MatrixPtr outputGpu = GpuMatrix::create(height, width, false, true);
MatrixPtr inputGpu = GpuMatrix::create(height, width, false, true);
MatrixPtr wGpu = GpuMatrix::create(w_height, w_width, false, true);
inputGpu->copyFrom(*input);
wGpu->copyFrom(*w);
output->paramReluForward(*input, *w);
outputGpu->paramReluForward(*inputGpu, *wGpu);
MatrixPtr outputCheck = CpuMatrix::create(height, width, false, false);
outputCheck->copyFrom(*outputGpu);
MatrixCheckEqual(*output, *outputCheck);
}
TEST(Matrix, paramReluForward) {
for (auto height : {10, 100}) {
for (auto width : {10, 100}) {
for (auto w_height : {1, 2}) {
for (auto w_width : {1, 2}) {
testParamReluForward(height, width, w_height, w_width);
}
}
}
}
}
void testParamReluBackwardW(int height, int width, int w_height, int w_width) {
MatrixPtr oGrad = CpuMatrix::create(height, width, false, false);
MatrixPtr input = CpuMatrix::create(height, width, false, false);
MatrixPtr w = CpuMatrix::create(w_height, w_width, false, false);
oGrad->randomizeUniform();
input->randomizeUniform();
w->randomizeUniform();
input->add(-0.5);
MatrixPtr oGradGpu = GpuMatrix::create(height, width, false, true);
MatrixPtr inputGpu = GpuMatrix::create(height, width, false, true);
MatrixPtr wGpu = GpuMatrix::create(w_height, w_width, false, true);
oGradGpu->copyFrom(*oGrad);
inputGpu->copyFrom(*input);
wGpu->copyFrom(*w);
w->paramReluBackwardW(*oGrad, *input);
wGpu->paramReluBackwardW(*oGradGpu, *inputGpu);
MatrixPtr wCheck = CpuMatrix::create(w_height, w_width, false, false);
wCheck->copyFrom(*wGpu);
MatrixCheckErr(*w, *wCheck);
}
TEST(Matrix, paramReluBackwardW) {
for (auto height : {10, 100}) {
for (auto width : {10, 100}) {
for (auto w_height : {1, 2}) {
for (auto w_width : {1, 2}) {
testParamReluBackwardW(height, width, w_height, w_width);
}
}
}
}
}
void testParamReluBackwardDiff(int height,
int width,
int w_height,
......@@ -1959,9 +911,7 @@ void testParamReluBackwardDiff(int height,
diff->paramReluBackwardDiff(*oGrad, *input, *w);
diffGpu->paramReluBackwardDiff(*oGradGpu, *inputGpu, *wGpu);
MatrixPtr diffCheck = CpuMatrix::create(height, width, false, false);
diffCheck->copyFrom(*diffGpu);
MatrixCheckErr(*diff, *diffCheck);
TensorCheckErr(*diff, *diffGpu);
}
TEST(Matrix, paramReluBackwardDiff) {
......@@ -1992,9 +942,7 @@ void testClassificationError(int numSamples, int dim) {
cpuError->classificationError(cpuOutput, cpuLabel);
gpuError->classificationError(gpuOutput, gpuLabel);
MatrixPtr check = std::make_shared<CpuMatrix>(numSamples, 1);
check->copyFrom(*gpuError);
MatrixCheckEqual(*cpuError, *check);
TensorCheckEqual(*cpuError, *gpuError);
}
TEST(Matrix, classificationError) {
......@@ -2159,9 +1107,8 @@ void testAvgPoolFwdBwd(int numSamples,
outW,
padH,
padW);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
targetCheck->copyFrom(*targetGpu);
MatrixCheckErr(*target, *targetCheck);
TensorCheckErr(*target, *targetGpu);
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
......@@ -2200,10 +1147,8 @@ void testAvgPoolFwdBwd(int numSamples,
1.0,
padH,
padW);
MatrixPtr targetBwdCheck =
CpuMatrix::create(numSamples, inWidth, false, false);
targetBwdCheck->copyFrom(*inputGpuGrad);
MatrixCheckErr(*inputGrad, *targetBwdCheck);
TensorCheckErr(*inputGrad, *inputGpuGrad);
}
TEST(Matrix, PoolFwdBwd) {
......@@ -2268,11 +1213,9 @@ void testMaxOutFwdBwd(
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
IVectorPtr id = CpuIVector::create(numSamples * outWidth, false);
IVectorPtr idGpu = GpuIVector::create(numSamples * outWidth, true);
IVectorPtr idCheck = CpuIVector::create(numSamples * outWidth, false);
input->randomizeUniform();
inputGpu->copyFrom(*input);
......@@ -2280,11 +1223,8 @@ void testMaxOutFwdBwd(
target->maxoutForward(*input, *id, outChannels, groups);
targetGpu->maxoutForward(*inputGpu, *idGpu, outChannels, groups);
// check
targetCheck->copyFrom(*targetGpu);
MatrixCheckErr(*target, *targetCheck);
idCheck->copyFrom(*idGpu);
VectorCheckEqual(*id, *idCheck);
TensorCheckErr(*target, *targetGpu);
TensorCheckEqual(*id, *idGpu);
// backward
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
......@@ -2293,8 +1233,6 @@ void testMaxOutFwdBwd(
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad =
GpuMatrix::create(numSamples, outWidth, false, true);
MatrixPtr targetCheckGrad =
CpuMatrix::create(numSamples, inWidth, false, false);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
......@@ -2304,9 +1242,7 @@ void testMaxOutFwdBwd(
inputGrad->maxoutBackward(*targetGrad, *id, outChannels, groups);
inputGpuGrad->maxoutBackward(*targetGpuGrad, *idGpu, outChannels, groups);
// check
targetCheckGrad->copyFrom(*inputGpuGrad);
MatrixCheckErr(*inputGrad, *targetCheckGrad);
TensorCheckErr(*inputGrad, *inputGpuGrad);
}
TEST(Matrix, MaxOutFwdBwd) {
......@@ -2326,113 +1262,6 @@ TEST(Matrix, MaxOutFwdBwd) {
}
}
void testAddSharedBias(int numSamples, int dim, int channel) {
MatrixPtr cpuData = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr gpuData = std::make_shared<GpuMatrix>(numSamples, dim);
MatrixPtr cpuBias = std::make_shared<CpuMatrix>(1, channel);
MatrixPtr gpuBias = std::make_shared<GpuMatrix>(1, channel);
cpuData->randomizeUniform();
gpuData->copyFrom(*cpuData);
cpuBias->randomizeUniform();
gpuBias->copyFrom(*cpuBias);
cpuData->addSharedBias(*cpuBias, 1.0);
gpuData->addSharedBias(*gpuBias, 1.0);
MatrixPtr check = std::make_shared<CpuMatrix>(numSamples, dim);
check->copyFrom(*gpuData);
MatrixCheckErr(*cpuData, *check);
}
void testCollectSharedBias(int numSamples, int dim, int channel) {
MatrixPtr cpuData = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr gpuData = std::make_shared<GpuMatrix>(numSamples, dim);
MatrixPtr cpuBias = std::make_shared<CpuMatrix>(1, channel);
MatrixPtr gpuBias = std::make_shared<GpuMatrix>(1, channel);
cpuData->randomizeUniform();
gpuData->copyFrom(*cpuData);
cpuBias->randomizeUniform();
gpuBias->copyFrom(*cpuBias);
cpuBias->collectSharedBias(*cpuData, 1.0);
gpuBias->collectSharedBias(*gpuData, 1.0);
MatrixPtr check = std::make_shared<CpuMatrix>(1, channel);
check->copyFrom(*gpuBias);
MatrixCheckErr(*cpuBias, *check);
}
TEST(Matrix, sharedBias) {
for (auto numSamples : {1, 100, 520}) {
for (auto dim : {100 * 16, 100 * 32}) {
for (auto channel : {8, 16}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim
<< " channel=" << channel;
testAddSharedBias(numSamples, dim, channel);
testCollectSharedBias(numSamples, dim, channel);
}
}
}
}
void testMultiBinaryLabelCrossEntropy(int numSamples, int dim) {
MatrixPtr output = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(numSamples, dim);
MatrixPtr cpuEntropy = std::make_shared<CpuMatrix>(numSamples, 1);
MatrixPtr gpuEntropy = std::make_shared<GpuMatrix>(numSamples, 1);
MatrixPtr cpuGrad = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr gpuGrad = std::make_shared<GpuMatrix>(numSamples, dim);
MatrixPtr cpuLabel = std::make_shared<CpuSparseMatrix>(
numSamples, dim, numSamples, NO_VALUE, SPARSE_CSR, false);
MatrixPtr gpuLabel = std::make_shared<GpuSparseMatrix>(
numSamples, dim, numSamples, NO_VALUE, SPARSE_CSR, false);
for (int i = 0; i < numSamples; i++) {
const unsigned int id = rand() % dim; // NOLINT
cpuLabel->setRow(i, 1, &id, nullptr);
gpuLabel->setRow(i, 1, &id, nullptr);
}
output->randomizeUniform();
cpuOutput->zeroMem();
output->softmax(*cpuOutput);
gpuOutput->copyFrom(*cpuOutput);
cpuEntropy->zeroMem();
gpuEntropy->zeroMem();
cpuEntropy->multiBinaryLabelCrossEntropy(*cpuOutput, *cpuLabel);
gpuEntropy->multiBinaryLabelCrossEntropy(*gpuOutput, *gpuLabel);
MatrixPtr check1 = std::make_shared<CpuMatrix>(numSamples, 1);
check1->copyFrom(*gpuEntropy);
MatrixCheckErr(*cpuEntropy, *check1);
cpuGrad->zeroMem();
gpuGrad->zeroMem();
cpuGrad->multiBinaryLabelCrossEntropyBp(*cpuOutput, *cpuLabel);
gpuGrad->multiBinaryLabelCrossEntropyBp(*gpuOutput, *gpuLabel);
MatrixPtr check2 = std::make_shared<CpuMatrix>(numSamples, dim);
check2->copyFrom(*gpuGrad);
MatrixCheckErr(*cpuGrad, *check2);
}
TEST(Matrix, multiBinaryCrossEntropy) {
for (auto numSamples : {100, 1000, 10000}) {
for (auto dim : {100, 1000, 10000}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim;
testMultiBinaryLabelCrossEntropy(numSamples, dim);
}
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册