提交 1df826e7 编写于 作者: H hedaoyuan

Add a AutoCompare and move some test form test_matrixCompare.cpp to test_Matrix.cpp

上级 f70fc4a4
...@@ -13,11 +13,14 @@ See the License for the specific language governing permissions and ...@@ -13,11 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <cmath> #include <cmath>
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
using namespace paddle; // NOLINT using paddle::Matrix;
using namespace std; // NOLINT using paddle::CpuMatrix;
using paddle::GpuMatrix;
using paddle::VectorT;
using paddle::CpuVectorT;
using paddle::GpuVectorT;
namespace autotest { namespace autotest {
...@@ -71,6 +74,53 @@ private: ...@@ -71,6 +74,53 @@ private:
CpuMatrix arg_; CpuMatrix arg_;
}; };
template <>
class CopyToCpu<Matrix> {
public:
explicit CopyToCpu(const Matrix& arg)
: arg_(arg.getHeight(), arg.getWidth()) {
arg_.copyFrom(arg);
}
CpuMatrix& copiedArg() { return arg_; }
private:
CpuMatrix arg_;
};
template <typename T>
class CopyToCpu<CpuVectorT<T>> {
public:
explicit CopyToCpu(const CpuVectorT<T>& arg) : arg_(arg) {}
const CpuVectorT<T>& copiedArg() const { return arg_; }
private:
const CpuVectorT<T>& arg_;
};
template <typename T>
class CopyToCpu<GpuVectorT<T>> {
public:
explicit CopyToCpu(const GpuVectorT<T>& arg) : arg_(arg.getSize()) {
arg_.copyFrom(arg);
}
CpuVectorT<T>& copiedArg() { return arg_; }
private:
CpuVectorT<T> arg_;
};
template <typename T>
class CopyToCpu<VectorT<T>> {
public:
explicit CopyToCpu(const VectorT<T>& arg) : arg_(arg.getSize()) {
arg_.copyFrom(arg);
}
CpuVectorT<T>& copiedArg() { return arg_; }
private:
CpuVectorT<T> arg_;
};
template <typename AssertEq> template <typename AssertEq>
void TensorCheck(AssertEq compare, void TensorCheck(AssertEq compare,
const CpuMatrix& matrix1, const CpuMatrix& matrix1,
...@@ -95,10 +145,30 @@ void TensorCheck(AssertEq compare, ...@@ -95,10 +145,30 @@ void TensorCheck(AssertEq compare,
EXPECT_EQ(count, 0) << "There are " << count << " different element."; EXPECT_EQ(count, 0) << "There are " << count << " different element.";
} }
template <typename AssertEq, class T>
void TensorCheck(AssertEq compare,
const CpuVectorT<T>& vector1,
const CpuVectorT<T>& vector2) {
CHECK(vector1.getSize() == vector2.getSize());
const T* data1 = vector1.getData();
const T* data2 = vector2.getData();
size_t size = vector1.getSize();
int count = 0;
for (size_t i = 0; i < size; i++) {
real a = data1[i];
real b = data2[i];
if (!compare(a, b)) {
count++;
}
}
EXPECT_EQ(count, 0) << "There are " << count << " different element.";
}
template <typename AssertEq, typename Tensor1, typename Tensor2> template <typename AssertEq, typename Tensor1, typename Tensor2>
extern void TensorCheck(AssertEq compare, void TensorCheck(AssertEq compare,
const Tensor1& tensor1, const Tensor1& tensor1,
const Tensor2& tensor2) { const Tensor2& tensor2) {
TensorCheck(compare, TensorCheck(compare,
CopyToCpu<Tensor1>(tensor1).copiedArg(), CopyToCpu<Tensor1>(tensor1).copiedArg(),
CopyToCpu<Tensor2>(tensor2).copiedArg()); CopyToCpu<Tensor2>(tensor2).copiedArg());
...@@ -116,4 +186,24 @@ void TensorCheck(AssertEq compare, size_t args1, size_t args2) { ...@@ -116,4 +186,24 @@ void TensorCheck(AssertEq compare, size_t args1, size_t args2) {
<< ", args2 = " << args2; << ", args2 = " << args2;
} }
template <typename Tensor1, typename Tensor2>
void TensorCheckEqual(const Tensor1& tensor1, const Tensor2& tensor2) {
AssertEqual compare(0);
TensorCheck(compare,
CopyToCpu<Tensor1>(tensor1).copiedArg(),
CopyToCpu<Tensor2>(tensor2).copiedArg());
}
template <typename Tensor1, typename Tensor2>
void TensorCheckErr(const Tensor1& tensor1, const Tensor2& tensor2) {
#ifndef PADDLE_TYPE_DOUBLE
AssertEqual compare(1e-3);
#else
AssertEqual compare(1e-10);
#endif
TensorCheck(compare,
CopyToCpu<Tensor1>(tensor1).copiedArg(),
CopyToCpu<Tensor2>(tensor2).copiedArg());
}
} // namespace autotest } // namespace autotest
...@@ -14,21 +14,19 @@ limitations under the License. */ ...@@ -14,21 +14,19 @@ limitations under the License. */
/** /**
* TestUtils.h is used to automatically compare CPU and GPU code is consistent. * TestUtils.h is used to automatically compare CPU and GPU code is consistent.
* * Refer test_Matrix.cpp and test_BaseMatrix.cpp for how to use autotest.
* Auto compare BaseMatrix member function:
* Use case:
* a. void BaseMatrix::tanh(BaseMatrixT& b);
* Compare method: BaseMatrixCompare<0>(&BaseMatrix::tanh);
*
* b.
*
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "TensorCheck.h" #include "TensorCheck.h"
using namespace paddle; // NOLINT using paddle::BaseMatrix;
using paddle::CpuIVector;
using paddle::GpuIVector;
using paddle::CpuSparseMatrix;
using paddle::GpuSparseMatrix;
namespace autotest { namespace autotest {
...@@ -196,9 +194,7 @@ template <bool AsRowVector, ...@@ -196,9 +194,7 @@ template <bool AsRowVector,
typename R, typename R,
typename... Args, typename... Args,
typename AssertEq> typename AssertEq>
void BaseMatrixCompare(R (C::*f)(Args...), void BaseMatrixCompare(R (C::*f)(Args...), AssertEq compare) {
AssertEq compare,
bool checkArgs = false) {
for (auto height : {1, 11, 73, 128, 200, 330}) { for (auto height : {1, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000}) { for (auto width : {1, 3, 32, 100, 512, 1000}) {
CpuMatrix obj1(AsRowVector ? 1 : height, AsColVector ? 1 : width); CpuMatrix obj1(AsRowVector ? 1 : height, AsColVector ? 1 : width);
...@@ -227,17 +223,91 @@ void BaseMatrixCompare(R (C::*f)(Args...), ...@@ -227,17 +223,91 @@ void BaseMatrixCompare(R (C::*f)(Args...),
call(obj2, f, std::get<I>(tuple2)...); call(obj2, f, std::get<I>(tuple2)...);
TensorCheck(compare, obj1, obj2); TensorCheck(compare, obj1, obj2);
if (checkArgs) {
checkTuple(tuple1, tuple2, compare);
}
} }
} }
} }
template <typename T>
class ReturnType {
public:
typedef T type;
};
template <>
class ReturnType<CpuMatrix> {
public:
typedef GpuMatrix type;
};
template <>
class ReturnType<CpuIVector> {
public:
typedef GpuIVector type;
};
template <>
class ReturnType<CpuSparseMatrix> {
public:
typedef GpuSparseMatrix type;
};
template <typename T>
typename ReturnType<T>::type autoArgs(T v) {
return v;
}
template <>
GpuMatrix autoArgs(CpuMatrix v) {
GpuMatrix a(v.getHeight(), v.getWidth());
a.copyFrom(v);
return a;
}
template <>
GpuIVector autoArgs(CpuIVector v) {
GpuIVector a(v.getSize());
a.copyFrom(v);
return a;
}
template <>
GpuSparseMatrix autoArgs(CpuSparseMatrix v) {
GpuSparseMatrix a(v.getHeight(),
v.getWidth(),
v.getElementCnt(),
v.getValueType(),
v.getFormat());
a.copyFrom(v, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
return a;
}
class AutoCompare {
public:
AutoCompare(size_t height, size_t width)
: cpu(height, width), gpu(height, width) {
init(cpu);
copy(gpu, cpu);
}
template <typename C, typename R, typename... FArgs, typename... Args>
void operator()(R (C::*f)(FArgs...), Args&&... args) {
call(cpu, f, args...);
call(gpu, f, autoArgs(args)...);
TensorCheckErr(cpu, gpu);
}
protected:
CpuMatrix cpu;
GpuMatrix gpu;
};
} // namespace autotest } // namespace autotest
template <std::size_t... I, typename C, typename R, typename... Args> template <std::size_t... I, typename C, typename R, typename... Args>
void BaseMatrixCompare(R (C::*f)(Args...), bool checkArgs = false) { void BaseMatrixCompare(R (C::*f)(Args...)) {
static_assert(sizeof...(I) == sizeof...(Args), static_assert(sizeof...(I) == sizeof...(Args),
"size of parameter packs are not equal"); "size of parameter packs are not equal");
...@@ -247,7 +317,7 @@ void BaseMatrixCompare(R (C::*f)(Args...), bool checkArgs = false) { ...@@ -247,7 +317,7 @@ void BaseMatrixCompare(R (C::*f)(Args...), bool checkArgs = false) {
autotest::AssertEqual compare(1e-10); autotest::AssertEqual compare(1e-10);
#endif #endif
autotest::BaseMatrixCompare<false, false, I...>(f, compare, checkArgs); autotest::BaseMatrixCompare<false, false, I...>(f, compare);
} }
template <std::size_t... I, typename C, typename R, typename... Args> template <std::size_t... I, typename C, typename R, typename... Args>
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
/** /**
* This test file compares the implementation of CPU and GPU function * This test file compares the implementation of CPU and GPU function
* in BaseMatrix.cpp. * in BaseMatrix.cpp or Matrix.cpp.
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
...@@ -188,17 +188,22 @@ TEST(BaseMatrix, Other) { ...@@ -188,17 +188,22 @@ TEST(BaseMatrix, Other) {
BaseMatrixCompare<0, 1, 2>(&BaseMatrix::rowScale); BaseMatrixCompare<0, 1, 2>(&BaseMatrix::rowScale);
BaseMatrixCompare<0, 1, 2>(&BaseMatrix::rowDotMul); BaseMatrixCompare<0, 1, 2>(&BaseMatrix::rowDotMul);
BaseMatrixCompare<0, 1, 2, 3>(&BaseMatrix::binaryClassificationError); BaseMatrixCompare<0, 1, 2, 3>(&BaseMatrix::binaryClassificationError);
BaseMatrixCompare<0, 1>(&Matrix::sumOfSquaresBp);
} }
TEST(BaseMatrix, Aggregate) { TEST(BaseMatrix, Aggregate) {
BaseMatrixAsColVector<0>(&BaseMatrix::maxRows); BaseMatrixAsColVector<0>(&BaseMatrix::maxRows);
BaseMatrixAsColVector<0>(&BaseMatrix::minRows); BaseMatrixAsColVector<0>(&BaseMatrix::minRows);
BaseMatrixAsColVector<0, 1, 2>(&BaseMatrix::sumRows); BaseMatrixAsColVector<0, 1, 2>(&BaseMatrix::sumRows);
BaseMatrixAsColVector<0, 1>(&Matrix::sumOfSquares);
BaseMatrixAsRowVector<0>(&BaseMatrix::maxCols); BaseMatrixAsRowVector<0>(&BaseMatrix::maxCols);
BaseMatrixAsRowVector<0>(&BaseMatrix::minCols); BaseMatrixAsRowVector<0>(&BaseMatrix::minCols);
BaseMatrixAsRowVector<0, 1>(&BaseMatrix::addDotMulVMM); BaseMatrixAsRowVector<0, 1>(&BaseMatrix::addDotMulVMM);
BaseMatrixAsRowVector<0, 1, 2>(&BaseMatrix::sumCols); BaseMatrixAsRowVector<0, 1, 2>(&BaseMatrix::sumCols);
BaseMatrixAsRowVector<0, 1>(
static_cast<void (Matrix::*)(Matrix&, real)>(&Matrix::collectBias));
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
......
...@@ -14,25 +14,295 @@ limitations under the License. */ ...@@ -14,25 +14,295 @@ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
/** /**
* This test file compares the implementation of CPU and GPU function * This test file use AutoCompare to compares the implementation
* in Matrix.cpp. * of CPU and GPU member function in Matrix.cpp.
*
* 1. Constructs an AutoCompare object, a AutoCompare object contains
* a CpuMatrix and a GpuMatrix;
* 2. Initializes the required parameters for the member function.
* Only need to initialize the CPU parameters.
* 3. Use the operator() template for testing. In the operator() will call back
* member functions, and compare the results.
*
* use case:
* AutoCompare test(...);
* Init Argument arg1,arg2...
* test(function, arg1, arg2....)
*
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "TestUtils.h" #include "TestUtils.h"
using namespace paddle; // NOLINT using paddle::CpuMatrix;
using paddle::SparseValueType;
using paddle::SparseFormat;
using paddle::NO_VALUE;
using paddle::SPARSE_CSR;
using paddle::initMain;
using autotest::TensorCheckEqual;
using autotest::TensorCheckErr;
using autotest::AutoCompare;
TEST(Matrix, Matrix) { void testBilinearFwdBwd(int numSamples,
BaseMatrixCompare<0>(&Matrix::softmax, true); int imgSizeH,
BaseMatrixCompare<0, 1>(&Matrix::sumOfSquaresBp); int imgSizeW,
int channels) {
int inWidth = imgSizeH * imgSizeW * channels;
int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels;
real ratioH = 0.5;
real ratioW = 0.5;
AutoCompare forward(numSamples, outWidth);
CpuMatrix arg1(numSamples, inWidth);
arg1.randomizeUniform();
forward(&Matrix::bilinearForward,
arg1,
imgSizeH,
imgSizeW,
2 * imgSizeH,
2 * imgSizeW,
channels,
ratioH,
ratioW);
AutoCompare backward(numSamples, inWidth);
CpuMatrix arg2(numSamples, outWidth);
arg2.randomizeUniform();
backward(&Matrix::bilinearBackward,
arg2,
2 * imgSizeH,
2 * imgSizeW,
imgSizeH,
imgSizeW,
channels,
ratioH,
ratioW);
}
TEST(Matrix, BilinearFwdBwd) {
for (auto numSamples : {5, 10}) {
for (auto channels : {8, 16}) {
for (auto imgSizeH : {14, 28}) {
for (auto imgSizeW : {16, 30}) {
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW;
testBilinearFwdBwd(numSamples, imgSizeH, imgSizeW, channels);
}
}
}
}
}
void testMatrixAddBias(int height, int width, real scale) {
AutoCompare test(height, width);
CpuMatrix arg1(1, width);
arg1.randomizeUniform();
test(static_cast<void (Matrix::*)(Matrix&, real)>(&Matrix::addBias),
arg1,
scale);
}
void testMatrixAddDotMulMMV(int height, int width) {
AutoCompare test(height, width);
CpuMatrix arg1(height, width);
CpuMatrix arg2(1, width);
arg1.randomizeUniform();
arg2.randomizeUniform();
test(&BaseMatrix::addDotMulMMV, arg1, arg2);
}
TEST(Matrix, unary) {
for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
VLOG(3) << " height=" << height << " width=" << width;
testMatrixAddBias(height, width, 1.0);
testMatrixAddBias(height, width, 3.5);
testMatrixAddDotMulMMV(height, width);
}
}
}
void testMatrixAddAtOffset(int height, int width1, int width2, int offset) {
AutoCompare test(height, width2);
CpuMatrix arg1(height, width1);
arg1.randomizeUniform();
test(&Matrix::addAtOffset, arg1, offset);
}
void testMatrixAssignAtOffset(int height, int width1, int width2, int offset) {
AutoCompare test(height, width2);
CpuMatrix arg1(height, width1);
arg1.randomizeUniform();
test(&Matrix::assignAtOffset, arg1, offset);
}
TEST(Matrix, AtOffset) {
for (auto height : {1, 11, 73, 128, 200}) {
for (auto width1 : {1, 32, 100, 512, 1000}) {
for (auto width2 : {1, 32, 100, 512, 1000}) {
int columnOffset = 0;
int offset = std::abs(width1 - width2);
if (offset) {
columnOffset = std::rand() % offset;
}
VLOG(3) << " height=" << height << " width1=" << width1
<< " width2=" << width2 << " columnOffset = " << columnOffset;
testMatrixAddAtOffset(height, width1, width2, columnOffset);
testMatrixAssignAtOffset(height, width1, width2, columnOffset);
}
}
}
}
void testMatrixSelectRows(int numSamples, int tableSize, int inputDim) {
AutoCompare test(numSamples, inputDim);
CpuMatrix arg1(tableSize, inputDim);
CpuIVector arg2(numSamples);
arg1.randomizeUniform();
arg2.rand(tableSize);
test(&Matrix::selectRows, arg1, arg2);
}
TEST(Matrix, tableProjection) {
for (auto numSamples : {10, 100, 1000, 10000, 80000}) {
for (auto tableSize : {10, 100}) {
for (auto inputDim : {20, 50}) {
VLOG(3) << " numSamples=" << numSamples << " tableSize=" << tableSize
<< " inputDim=" << inputDim;
testMatrixSelectRows(numSamples, tableSize, inputDim);
}
}
}
}
void testMatrixCopyByRowIndex(int outHeight, int inHeight, int width) {
AutoCompare test(outHeight, width);
CpuMatrix arg1(inHeight, width);
CpuIVector arg2(outHeight);
arg1.randomizeUniform();
arg2.rand(inHeight);
test(&Matrix::copyByRowIndex, arg1, arg2);
} }
TEST(Matrix, Aggregate) { TEST(Matrix, copyByRowIndex) {
BaseMatrixAsRowVector<0, 1>( for (auto outHeight : {31, 500, 1000}) {
static_cast<void (Matrix::*)(Matrix&, real)>(&Matrix::collectBias)); for (auto inHeight : {17, 257, 500, 1200}) {
for (auto width : {512, 1024}) {
VLOG(3) << outHeight << " " << inHeight << " " << width;
testMatrixCopyByRowIndex(outHeight, inHeight, width);
}
}
}
}
void testCosSim(int heightX, int heightY, int width, real scale) {
AutoCompare test(heightX, 1);
CpuMatrix arg1(heightX, width);
CpuMatrix arg2(heightY, width);
arg1.randomizeUniform();
arg2.randomizeUniform();
arg2.add(-0.5);
test(&Matrix::cosSim, arg1, arg2, scale);
}
TEST(Matrix, cosSim) {
for (auto heightX : {10, 100, 1000}) {
for (auto heightY : {1, heightX}) {
for (auto width : {10, 100, 1000}) {
for (auto scale : {1.0, 2.0}) {
testCosSim(heightX, heightY, width, scale);
}
}
}
}
}
void testParamReluForward(int height, int width, int w_height, int w_width) {
AutoCompare test(height, width);
CpuMatrix arg1(height, width);
CpuMatrix arg2(w_height, w_width);
arg1.randomizeUniform();
arg2.randomizeUniform();
arg1.add(-0.5);
test(&Matrix::paramReluForward, arg1, arg2);
}
void testParamReluBackwardW(int height, int width, int w_height, int w_width) {
AutoCompare test(w_height, w_width);
CpuMatrix arg1(height, width);
CpuMatrix arg2(height, width);
arg1.randomizeUniform();
arg2.randomizeUniform();
arg2.add(-0.5);
test(&Matrix::paramReluBackwardW, arg1, arg2);
}
TEST(Matrix, paramRelu) {
for (auto height : {10, 100}) {
for (auto width : {10, 100}) {
for (auto w_height : {1, 2}) {
for (auto w_width : {1, 2}) {
testParamReluForward(height, width, w_height, w_width);
testParamReluBackwardW(height, width, w_height, w_width);
}
}
}
}
}
void testAddSharedBias(int numSamples, int dim, int channel) {
AutoCompare test(numSamples, dim);
CpuMatrix arg1(1, channel);
arg1.randomizeUniform();
test(&Matrix::addSharedBias, arg1, 1.0);
}
void testCollectSharedBias(int numSamples, int dim, int channel) {
AutoCompare test(1, channel);
CpuMatrix arg1(numSamples, dim);
arg1.randomizeUniform();
test(&Matrix::collectSharedBias, arg1, 1.0);
}
TEST(Matrix, sharedBias) {
for (auto numSamples : {1, 100, 520}) {
for (auto dim : {100 * 16, 100 * 32}) {
for (auto channel : {8, 16}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim
<< " channel=" << channel;
testAddSharedBias(numSamples, dim, channel);
testCollectSharedBias(numSamples, dim, channel);
}
}
}
}
void testMultiBinaryLabelCrossEntropy(int numSamples, int dim) {
AutoCompare forward(numSamples, 1);
CpuMatrix arg1(numSamples, dim);
CpuSparseMatrix arg2(numSamples, dim, numSamples, NO_VALUE, SPARSE_CSR);
CpuMatrix output1(numSamples, dim);
output1.randomizeUniform();
output1.softmax(arg1);
for (int i = 0; i < numSamples; i++) {
const unsigned int id = std::rand() % dim;
arg2.setRow(i, 1, &id, nullptr);
}
forward(&Matrix::multiBinaryLabelCrossEntropy, arg1, arg2);
AutoCompare backward(numSamples, dim);
backward(&Matrix::multiBinaryLabelCrossEntropyBp, arg1, arg2);
}
BaseMatrixAsColVector<0, 1>(&Matrix::sumOfSquares); TEST(Matrix, multiBinaryCrossEntropy) {
for (auto numSamples : {100, 1000, 10000}) {
for (auto dim : {100, 1000, 10000}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim;
testMultiBinaryLabelCrossEntropy(numSamples, dim);
}
}
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
......
...@@ -22,163 +22,12 @@ limitations under the License. */ ...@@ -22,163 +22,12 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/gserver/tests/TestUtil.h" #include "paddle/gserver/tests/TestUtil.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
#include "TensorCheck.h"
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
using namespace std; // NOLINT using namespace std; // NOLINT
using autotest::TensorCheckEqual;
template <class T> using autotest::TensorCheckErr;
void VectorCheckEqual(const VectorT<T>& vector1, const VectorT<T>& vector2) {
CHECK(vector1.getSize() == vector2.getSize());
const T* data1 = vector1.getData();
const T* data2 = vector2.getData();
size_t size = vector1.getSize();
int count = 0;
for (size_t i = 0; i < size; i++) {
if (data1[i] != data2[i]) {
count++;
}
}
EXPECT_EQ(count, 0) << "There are " << count << " different element.";
}
void MatrixCheckEqual(const Matrix& matrix1, const Matrix& matrix2) {
CHECK(matrix1.getHeight() == matrix2.getHeight());
CHECK(matrix1.getWidth() == matrix2.getWidth());
int height = matrix1.getHeight();
int width = matrix1.getWidth();
const real* data1 = matrix1.getData();
const real* data2 = matrix2.getData();
int count = 0;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (data1[i * width + j] != data2[i * width + j]) {
count++;
}
}
}
EXPECT_EQ(count, 0) << "There are " << count << " different element.";
}
void MatrixCheckErr(const Matrix& matrix1, const Matrix& matrix2) {
CHECK(matrix1.getHeight() == matrix2.getHeight());
CHECK(matrix1.getWidth() == matrix2.getWidth());
#ifndef PADDLE_TYPE_DOUBLE
real err = 1e-3;
#else
real err = 1e-10;
#endif
int height = matrix1.getHeight();
int width = matrix1.getWidth();
const real* data1 = matrix1.getData();
const real* data2 = matrix2.getData();
int count = 0;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
real a = data1[i * width + j];
real b = data2[i * width + j];
if (fabs(a - b) > err) {
if ((fabsf(a - b) / fabsf(a)) > (err / 10.0f)) {
count++;
}
}
}
}
EXPECT_EQ(count, 0) << "There are " << count << " different element.";
}
void testBilinearFwdBwd(int numSamples,
int imgSizeH,
int imgSizeW,
int channels) {
int inWidth = imgSizeH * imgSizeW * channels;
int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels;
real ratioH = 0.5;
real ratioW = 0.5;
// forward
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
input->randomizeUniform();
inputGpu->copyFrom(*input);
target->bilinearForward(*input,
imgSizeH,
imgSizeW,
2 * imgSizeH,
2 * imgSizeW,
channels,
ratioH,
ratioW);
targetGpu->bilinearForward(*inputGpu,
imgSizeH,
imgSizeW,
2 * imgSizeH,
2 * imgSizeW,
channels,
ratioH,
ratioW);
// check
targetCheck->copyFrom(*targetGpu);
MatrixCheckErr(*target, *targetCheck);
// backward
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad =
GpuMatrix::create(numSamples, outWidth, false, true);
MatrixPtr targetCheckGrad =
CpuMatrix::create(numSamples, inWidth, false, false);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
inputGpuGrad->copyFrom(*inputGrad);
targetGpuGrad->copyFrom(*targetGrad);
inputGrad->bilinearBackward(*targetGrad,
2 * imgSizeH,
2 * imgSizeW,
imgSizeH,
imgSizeW,
channels,
ratioH,
ratioW);
inputGpuGrad->bilinearBackward(*targetGpuGrad,
2 * imgSizeH,
2 * imgSizeW,
imgSizeH,
imgSizeW,
channels,
ratioH,
ratioW);
// check
targetCheckGrad->copyFrom(*inputGpuGrad);
MatrixCheckErr(*inputGrad, *targetCheckGrad);
}
TEST(Matrix, BilinearFwdBwd) {
for (auto numSamples : {5, 10}) {
for (auto channels : {8, 16}) {
for (auto imgSizeH : {14, 28}) {
for (auto imgSizeW : {16, 30}) {
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW;
testBilinearFwdBwd(numSamples, imgSizeH, imgSizeW, channels);
}
}
}
}
}
void testMatrixProjectionForward(int contextStart, void testMatrixProjectionForward(int contextStart,
int contextLength, int contextLength,
...@@ -232,12 +81,7 @@ void testMatrixProjectionForward(int contextStart, ...@@ -232,12 +81,7 @@ void testMatrixProjectionForward(int contextStart,
beginPad, beginPad,
padding); padding);
// check TensorCheckEqual(*cpuOutput, *gpuOutput);
MatrixPtr outputCheck =
std::make_shared<CpuMatrix>(batchSize, inputDim * contextLength);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
} }
void testMatrixProjectionBackward(int contextStart, void testMatrixProjectionBackward(int contextStart,
...@@ -294,15 +138,9 @@ void testMatrixProjectionBackward(int contextStart, ...@@ -294,15 +138,9 @@ void testMatrixProjectionBackward(int contextStart,
beginPad); beginPad);
} }
// check TensorCheckErr(*cpuInputGrad, *gpuInputGrad);
MatrixPtr inputGradCheck = std::make_shared<CpuMatrix>(batchSize, inputDim);
inputGradCheck->copyFrom(*gpuInputGrad);
MatrixCheckErr(*cpuInputGrad, *inputGradCheck);
if (padding) { if (padding) {
MatrixPtr weightGradChcek = std::make_shared<CpuMatrix>(pad, inputDim); TensorCheckErr(*cpuWeightGrad, *gpuWeightGrad);
weightGradChcek->copyFrom(*gpuWeightGrad);
MatrixCheckErr(*cpuWeightGrad, *weightGradChcek);
} }
} }
...@@ -361,15 +199,8 @@ void testMatrixMaxSequence(int batchSize, int inputDim) { ...@@ -361,15 +199,8 @@ void testMatrixMaxSequence(int batchSize, int inputDim) {
cpuOutput->maxSequenceForward(*cpuInput, *cpuSequence, *cpuIndex); cpuOutput->maxSequenceForward(*cpuInput, *cpuSequence, *cpuIndex);
gpuOutput->maxSequenceForward(*gpuInput, *gpuSequence, *gpuIndex); gpuOutput->maxSequenceForward(*gpuInput, *gpuSequence, *gpuIndex);
// check TensorCheckEqual(*cpuOutput, *gpuOutput);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(newBatchSize, inputDim); TensorCheckEqual(*cpuIndex, *gpuIndex);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
IVectorPtr indexCheck = nullptr;
IVector::resizeOrCreate(indexCheck, newBatchSize * inputDim, false);
indexCheck->copyFrom(*gpuIndex);
VectorCheckEqual(*cpuIndex, *indexCheck);
// backward // backward
MatrixPtr cpuOutputGrad = std::make_shared<CpuMatrix>(newBatchSize, inputDim); MatrixPtr cpuOutputGrad = std::make_shared<CpuMatrix>(newBatchSize, inputDim);
...@@ -385,10 +216,7 @@ void testMatrixMaxSequence(int batchSize, int inputDim) { ...@@ -385,10 +216,7 @@ void testMatrixMaxSequence(int batchSize, int inputDim) {
cpuInputGrad->maxSequenceBackward(*cpuOutputGrad, *cpuSequence, *cpuIndex); cpuInputGrad->maxSequenceBackward(*cpuOutputGrad, *cpuSequence, *cpuIndex);
gpuInputGrad->maxSequenceBackward(*gpuOutputGrad, *gpuSequence, *gpuIndex); gpuInputGrad->maxSequenceBackward(*gpuOutputGrad, *gpuSequence, *gpuIndex);
// check TensorCheckEqual(*cpuInputGrad, *gpuInputGrad);
MatrixPtr inputGradCheck = std::make_shared<CpuMatrix>(batchSize, inputDim);
inputGradCheck->copyFrom(*gpuInputGrad);
MatrixCheckEqual(*cpuInputGrad, *inputGradCheck);
} }
TEST(Matrix, maxSequence) { TEST(Matrix, maxSequence) {
...@@ -431,6 +259,8 @@ void testMatrixZeroAtOffset(int height, int width) { ...@@ -431,6 +259,8 @@ void testMatrixZeroAtOffset(int height, int width) {
int columnOffset = rand() % width; // NOLINT we just use rand() for test. int columnOffset = rand() % width; // NOLINT we just use rand() for test.
int numColumns = rand() % (width - columnOffset); // NOLINT int numColumns = rand() % (width - columnOffset); // NOLINT
if (numColumns == 0) return;
cpuA->zeroAtOffset(columnOffset, numColumns); cpuA->zeroAtOffset(columnOffset, numColumns);
gpuA->zeroAtOffset(columnOffset, numColumns); gpuA->zeroAtOffset(columnOffset, numColumns);
...@@ -442,61 +272,8 @@ void testMatrixZeroAtOffset(int height, int width) { ...@@ -442,61 +272,8 @@ void testMatrixZeroAtOffset(int height, int width) {
} }
} }
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width); TensorCheckEqual(*cpuA, *gpuA);
outputCheck->copyFrom(*gpuA); TensorCheckEqual(*cpuA, *cpuTest);
MatrixCheckEqual(*cpuA, *outputCheck);
MatrixCheckEqual(*cpuA, *cpuTest);
}
void testMatrixAddBias(int height, int width, real scale) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(1, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(1, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
cpuA->addBias(*cpuB, scale);
gpuA->addBias(*gpuB, scale);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
}
void testMatrixAddDotMulMMV(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC = std::make_shared<CpuMatrix>(1, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuC = std::make_shared<GpuMatrix>(1, width);
MatrixPtr cpuA1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuB1 = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuC1 = std::make_shared<CpuMatrix>(1, width);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
cpuC->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
cpuA1->copyFrom(*cpuA);
cpuB1->copyFrom(*cpuB);
cpuC1->copyFrom(*cpuC);
cpuA->addDotMulMMV(*cpuB, *cpuC);
gpuA->addDotMulMMV(*gpuB, *gpuC);
cpuA1->addDotMulMMV2(*cpuB1, *cpuC1);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width);
outputCheck->copyFrom(*gpuA);
MatrixCheckErr(*cpuA, *outputCheck);
MatrixCheckEqual(*cpuA, *cpuA1);
} }
void testMatrixTranspose(int height, int width) { void testMatrixTranspose(int height, int width) {
...@@ -510,9 +287,7 @@ void testMatrixTranspose(int height, int width) { ...@@ -510,9 +287,7 @@ void testMatrixTranspose(int height, int width) {
cpu->transpose(cpuT, false); cpu->transpose(cpuT, false);
gpu->transpose(gpuT, false); gpu->transpose(gpuT, false);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(width, height); TensorCheckEqual(*cpuT, *gpuT);
outputCheck->copyFrom(*gpuT);
MatrixCheckEqual(*cpuT, *outputCheck);
} }
void testMatrixInverse(int height) { void testMatrixInverse(int height) {
...@@ -533,12 +308,11 @@ void testMatrixInverse(int height) { ...@@ -533,12 +308,11 @@ void testMatrixInverse(int height) {
cpu->inverse(cpuI, false); cpu->inverse(cpuI, false);
gpu->inverse(gpuI, false); gpu->inverse(gpuI, false);
outputCheck->copyFrom(*gpuI); TensorCheckErr(*cpuI, *gpuI);
MatrixCheckErr(*cpuI, *outputCheck);
outputCheck->mul(cpu, cpuI); outputCheck->mul(cpu, cpuI);
cpu->setDiag(1.0); cpu->setDiag(1.0);
MatrixCheckErr(*cpu, *outputCheck); TensorCheckErr(*cpu, *outputCheck);
} }
TEST(Matrix, unary) { TEST(Matrix, unary) {
...@@ -546,15 +320,8 @@ TEST(Matrix, unary) { ...@@ -546,15 +320,8 @@ TEST(Matrix, unary) {
for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
VLOG(3) << " height=" << height << " width=" << width; VLOG(3) << " height=" << height << " width=" << width;
// asRowVector testMatrixZeroAtOffset(height, width);
testMatrixAddBias(height, width, 1.0);
testMatrixAddBias(height, width, 3.5);
testMatrixAddDotMulMMV(height, width);
// sum
testMatrixGetSum(height, width); testMatrixGetSum(height, width);
// transpose
testMatrixTranspose(height, width); testMatrixTranspose(height, width);
} }
// inverse // inverse
...@@ -562,6 +329,22 @@ TEST(Matrix, unary) { ...@@ -562,6 +329,22 @@ TEST(Matrix, unary) {
} }
} }
void testMatrixSoftmax(int height, int width) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(height, width);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
cpuOutput->zero();
gpuOutput->zero();
cpuInput->softmax(*cpuOutput);
gpuInput->softmax(*gpuOutput);
TensorCheckErr(*cpuOutput, *gpuOutput);
}
void testSequenceSoftmax(int batchSize) { void testSequenceSoftmax(int batchSize) {
// forward // forward
int inputDim = 1; int inputDim = 1;
...@@ -578,10 +361,7 @@ void testSequenceSoftmax(int batchSize) { ...@@ -578,10 +361,7 @@ void testSequenceSoftmax(int batchSize) {
cpuInput->sequenceSoftmax(*cpuInput, *cpuSequence); cpuInput->sequenceSoftmax(*cpuInput, *cpuSequence);
gpuInput->sequenceSoftmax(*gpuInput, *gpuSequence); gpuInput->sequenceSoftmax(*gpuInput, *gpuSequence);
// check TensorCheckErr(*cpuInput, *gpuInput);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(batchSize, inputDim);
outputCheck->copyFrom(*gpuInput);
MatrixCheckErr(*cpuInput, *outputCheck);
} }
void testMatrixSoftmaxThreshold(int height, int width) { void testMatrixSoftmaxThreshold(int height, int width) {
...@@ -634,9 +414,7 @@ void testMatrixSoftmaxBp(int height, int width) { ...@@ -634,9 +414,7 @@ void testMatrixSoftmaxBp(int height, int width) {
sftMaxSum->colMerge(*sftMaxDot); sftMaxSum->colMerge(*sftMaxDot);
cpuOutput->softmaxDerivative(*cpuInput, *sftMaxSum); cpuOutput->softmaxDerivative(*cpuInput, *sftMaxSum);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width); TensorCheckErr(*cpuOutput, *gpuOutput);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckErr(*cpuOutput, *outputCheck);
} }
TEST(Matrix, softmax) { TEST(Matrix, softmax) {
...@@ -644,6 +422,7 @@ TEST(Matrix, softmax) { ...@@ -644,6 +422,7 @@ TEST(Matrix, softmax) {
for (auto width : {1, 32, 100, 512, 1000}) { for (auto width : {1, 32, 100, 512, 1000}) {
VLOG(3) << " height=" << height << " width=" << width; VLOG(3) << " height=" << height << " width=" << width;
testMatrixSoftmax(height, width);
testMatrixSoftmaxBp(height, width); testMatrixSoftmaxBp(height, width);
testMatrixSoftmaxThreshold(height, width); testMatrixSoftmaxThreshold(height, width);
} }
...@@ -651,95 +430,6 @@ TEST(Matrix, softmax) { ...@@ -651,95 +430,6 @@ TEST(Matrix, softmax) {
} }
} }
void testMatrixAddAtOffset(int height, int width1, int width2) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width1);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(height, width2);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width1);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(height, width2);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
cpuOutput->randomizeUniform();
gpuOutput->copyFrom(*cpuOutput);
int columnOffset = 0;
int offset = std::abs(width1 - width2);
if (offset) {
columnOffset = rand() % offset; // NOLINT
}
cpuOutput->addAtOffset(*cpuInput, columnOffset);
gpuOutput->addAtOffset(*gpuInput, columnOffset);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width2);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
}
void testMatrixAssignAtOffset(int height, int width1, int width2) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width1);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(height, width2);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width1);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(height, width2);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
cpuOutput->randomizeUniform();
gpuOutput->copyFrom(*cpuOutput);
int columnOffset = 0;
int offset = std::abs(width1 - width2);
if (offset) {
columnOffset = rand() % offset; // NOLINT
}
cpuOutput->assignAtOffset(*cpuInput, columnOffset);
gpuOutput->assignAtOffset(*gpuInput, columnOffset);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, width2);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
}
TEST(Matrix, AtOffset) {
for (auto height : {1, 11, 73, 128, 200}) {
for (auto width1 : {1, 32, 100, 512, 1000}) {
for (auto width2 : {1, 32, 100, 512, 1000}) {
VLOG(3) << " height=" << height << " width1=" << width1
<< " width2=" << width2;
testMatrixAddAtOffset(height, width1, width2);
testMatrixAssignAtOffset(height, width1, width2);
}
}
}
}
void testMatrixSelectRows(int numSamples, int tableSize, int inputDim) {
MatrixPtr cpuTable = std::make_shared<CpuMatrix>(tableSize, inputDim);
MatrixPtr gpuTable = std::make_shared<GpuMatrix>(tableSize, inputDim);
cpuTable->randomizeUniform();
gpuTable->copyFrom(*cpuTable);
IVectorPtr cpuIds;
IVectorPtr gpuIds;
cpuIds = VectorT<int>::create(numSamples, false);
gpuIds = VectorT<int>::create(numSamples, true);
cpuIds->rand(tableSize);
gpuIds->copyFrom(*cpuIds);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(numSamples, inputDim);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(numSamples, inputDim);
cpuOutput->randomizeUniform();
gpuOutput->copyFrom(*cpuOutput);
cpuOutput->selectRows(*cpuTable, *cpuIds);
gpuOutput->selectRows(*gpuTable, *gpuIds);
// check
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(numSamples, inputDim);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
}
void testMatrixAddToRows(int numSamples, int tableSize, int inputDim) { void testMatrixAddToRows(int numSamples, int tableSize, int inputDim) {
MatrixPtr cpuTable = std::make_shared<CpuMatrix>(tableSize, inputDim); MatrixPtr cpuTable = std::make_shared<CpuMatrix>(tableSize, inputDim);
MatrixPtr gpuTable = std::make_shared<GpuMatrix>(tableSize, inputDim); MatrixPtr gpuTable = std::make_shared<GpuMatrix>(tableSize, inputDim);
...@@ -761,10 +451,7 @@ void testMatrixAddToRows(int numSamples, int tableSize, int inputDim) { ...@@ -761,10 +451,7 @@ void testMatrixAddToRows(int numSamples, int tableSize, int inputDim) {
cpuOutput->addToRows(*cpuTable, *cpuIds); cpuOutput->addToRows(*cpuTable, *cpuIds);
gpuOutput->addToRows(*gpuTable, *gpuIds); gpuOutput->addToRows(*gpuTable, *gpuIds);
// check TensorCheckErr(*cpuTable, *gpuTable);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(tableSize, inputDim);
outputCheck->copyFrom(*gpuTable);
MatrixCheckErr(*cpuTable, *outputCheck);
} }
TEST(Matrix, tableProjection) { TEST(Matrix, tableProjection) {
...@@ -773,7 +460,6 @@ TEST(Matrix, tableProjection) { ...@@ -773,7 +460,6 @@ TEST(Matrix, tableProjection) {
for (auto inputDim : {20, 50}) { for (auto inputDim : {20, 50}) {
VLOG(3) << " numSamples=" << numSamples << " tableSize=" << tableSize VLOG(3) << " numSamples=" << numSamples << " tableSize=" << tableSize
<< " inputDim=" << inputDim; << " inputDim=" << inputDim;
testMatrixSelectRows(numSamples, tableSize, inputDim);
testMatrixAddToRows(numSamples, tableSize, inputDim); testMatrixAddToRows(numSamples, tableSize, inputDim);
} }
} }
...@@ -807,9 +493,7 @@ void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { ...@@ -807,9 +493,7 @@ void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
cpuC->mul(cpuA, cpuB, alpha, beta); cpuC->mul(cpuA, cpuB, alpha, beta);
gpuC->mul(gpuA, gpuB, alpha, beta); gpuC->mul(gpuA, gpuB, alpha, beta);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(heightC, widthC); TensorCheckErr(*cpuC, *gpuC);
outputCheck->copyFrom(*gpuC);
MatrixCheckErr(*cpuC, *outputCheck);
} }
void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
...@@ -881,9 +565,7 @@ void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { ...@@ -881,9 +565,7 @@ void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
subCpuC->mul(subCpuA, subCpuB, alpha, beta); subCpuC->mul(subCpuA, subCpuB, alpha, beta);
subGpuC->mul(subGpuA, subGpuB, alpha, beta); subGpuC->mul(subGpuA, subGpuB, alpha, beta);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(heightC, widthC); TensorCheckErr(*cpuC, *gpuC);
outputCheck->copyFrom(*gpuC);
MatrixCheckErr(*cpuC, *outputCheck);
} }
TEST(Matrix, mul) { TEST(Matrix, mul) {
...@@ -937,9 +619,7 @@ void testVectorReset(int size) { ...@@ -937,9 +619,7 @@ void testVectorReset(int size) {
cpu->reset(value); cpu->reset(value);
gpu->reset(value); gpu->reset(value);
std::shared_ptr<CpuVectorT<T>> out = std::make_shared<CpuVectorT<T>>(size); TensorCheckEqual(*cpu, *gpu);
out->copyFrom(*gpu);
VectorCheckEqual(*cpu, *out);
} }
template <class T> template <class T>
...@@ -965,9 +645,7 @@ void testVecortSelectFrom(int size) { ...@@ -965,9 +645,7 @@ void testVecortSelectFrom(int size) {
cpuDst->selectFrom(*cpuSrc, *cpuIds); cpuDst->selectFrom(*cpuSrc, *cpuIds);
gpuDst->selectFrom(*gpuSrc, *gpuIds); gpuDst->selectFrom(*gpuSrc, *gpuIds);
std::shared_ptr<CpuVectorT<T>> out = std::make_shared<CpuVectorT<T>>(size); TensorCheckEqual(*cpuDst, *gpuDst);
out->copyFrom(*gpuDst);
VectorCheckEqual(*cpuDst, *out);
} }
template <class T> template <class T>
...@@ -978,9 +656,7 @@ void testVecotrZeroMem(int size) { ...@@ -978,9 +656,7 @@ void testVecotrZeroMem(int size) {
cpu->zeroMem(); cpu->zeroMem();
gpu->zeroMem(); gpu->zeroMem();
std::shared_ptr<CpuVectorT<T>> out = std::make_shared<CpuVectorT<T>>(size); TensorCheckEqual(*cpu, *gpu);
out->copyFrom(*gpu);
VectorCheckEqual(*cpu, *out);
} }
template <class T> template <class T>
...@@ -1001,9 +677,7 @@ void testVectorIsEqual(int size) { ...@@ -1001,9 +677,7 @@ void testVectorIsEqual(int size) {
cpuA->isEqualTo(*cpuB, value); cpuA->isEqualTo(*cpuB, value);
gpuA->isEqualTo(*gpuB, value); gpuA->isEqualTo(*gpuB, value);
std::shared_ptr<CpuVectorT<T>> out = std::make_shared<CpuVectorT<T>>(size); TensorCheckEqual(*cpuA, *gpuA);
out->copyFrom(*gpuA);
VectorCheckEqual(*cpuA, *out);
} }
TEST(Vector, Equal) { TEST(Vector, Equal) {
...@@ -1034,9 +708,7 @@ void testMatrixTopK(int samples, int dim, int beamSize) { ...@@ -1034,9 +708,7 @@ void testMatrixTopK(int samples, int dim, int beamSize) {
cpuSrc->rowMax(*cpuIds, *cpuVal); cpuSrc->rowMax(*cpuIds, *cpuVal);
gpuSrc->rowMax(*gpuIds, *gpuVal); gpuSrc->rowMax(*gpuIds, *gpuVal);
MatrixPtr outVal = std::make_shared<CpuMatrix>(samples, beamSize); TensorCheckEqual(*cpuVal, *gpuVal);
outVal->copyFrom(*gpuVal);
MatrixCheckEqual(*cpuVal, *outVal);
} }
TEST(Matrix, topK) { TEST(Matrix, topK) {
...@@ -1072,9 +744,7 @@ void testSMatrixTopK(int samples, int dim, int beamSize, real ratio) { ...@@ -1072,9 +744,7 @@ void testSMatrixTopK(int samples, int dim, int beamSize, real ratio) {
cpuSrc->rowMax(*cpuIds, *cpuVal); cpuSrc->rowMax(*cpuIds, *cpuVal);
gpuSrc->rowMax(*gpuIds, *gpuVal); gpuSrc->rowMax(*gpuIds, *gpuVal);
MatrixPtr outCheckMaxVal = std::make_shared<CpuMatrix>(samples, beamSize); TensorCheckEqual(*cpuVal, *gpuVal);
outCheckMaxVal->copyFrom(*gpuVal);
MatrixCheckEqual(*cpuVal, *outCheckMaxVal);
IVectorPtr outCheckIds = std::make_shared<CpuIVector>(samples * beamSize); IVectorPtr outCheckIds = std::make_shared<CpuIVector>(samples * beamSize);
outCheckIds->copyFrom(*gpuIds); outCheckIds->copyFrom(*gpuIds);
...@@ -1104,42 +774,6 @@ TEST(SMatrix, topK) { ...@@ -1104,42 +774,6 @@ TEST(SMatrix, topK) {
} }
} }
void testMatrixCopyByRowIndex(int outHeight, int inHeight, int width) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(inHeight, width);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(inHeight, width);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(outHeight, width);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(outHeight, width);
cpuOutput->zero();
gpuOutput->zero();
IVectorPtr cpuRowIndex = IVector::create(outHeight, false);
IVectorPtr gpuRowIndex = IVector::create(outHeight, true);
cpuRowIndex->rand(inHeight);
gpuRowIndex->copyFrom(*cpuRowIndex);
cpuOutput->copyByRowIndex(*cpuInput, *cpuRowIndex);
gpuOutput->copyByRowIndex(*gpuInput, *gpuRowIndex);
// check
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(outHeight, width);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckEqual(*cpuOutput, *outputCheck);
}
TEST(Matrix, copyByRowIndex) {
for (auto outHeight : {31, 500, 1000}) {
for (auto inHeight : {17, 257, 500, 1200}) {
for (auto width : {512, 1024}) {
VLOG(3) << outHeight << " " << inHeight << " " << width;
testMatrixCopyByRowIndex(outHeight, inHeight, width);
}
}
}
}
void testMatrixSequenceAvgForward(int batchSize, int inputDim, int mode) { void testMatrixSequenceAvgForward(int batchSize, int inputDim, int mode) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(batchSize, inputDim); MatrixPtr cpuInput = std::make_shared<CpuMatrix>(batchSize, inputDim);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(batchSize, inputDim); MatrixPtr gpuInput = std::make_shared<GpuMatrix>(batchSize, inputDim);
...@@ -1160,10 +794,7 @@ void testMatrixSequenceAvgForward(int batchSize, int inputDim, int mode) { ...@@ -1160,10 +794,7 @@ void testMatrixSequenceAvgForward(int batchSize, int inputDim, int mode) {
cpuOutput->sequenceAvgForward(*cpuInput, *cpuSequence, mode); cpuOutput->sequenceAvgForward(*cpuInput, *cpuSequence, mode);
gpuOutput->sequenceAvgForward(*gpuInput, *gpuSequence, mode); gpuOutput->sequenceAvgForward(*gpuInput, *gpuSequence, mode);
// check TensorCheckErr(*cpuOutput, *gpuOutput);
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(newBatchSize, inputDim);
outputCheck->copyFrom(*gpuOutput);
MatrixCheckErr(*cpuOutput, *outputCheck);
} }
TEST(Matrix, sequenceAvgForward) { TEST(Matrix, sequenceAvgForward) {
...@@ -1178,45 +809,6 @@ TEST(Matrix, sequenceAvgForward) { ...@@ -1178,45 +809,6 @@ TEST(Matrix, sequenceAvgForward) {
} }
} }
void testCosSim(int heightX, int heightY, int width, real scale) {
MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false);
MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false);
MatrixPtr output = CpuMatrix::create(heightX, 1, false, false);
prevOutX->randomizeUniform();
prevOutY->randomizeUniform();
prevOutX->add(-0.5);
prevOutY->add(-0.5);
output->randomizeUniform();
MatrixPtr prevOutXGpu = GpuMatrix::create(heightX, width, false, true);
MatrixPtr prevOutYGpu = GpuMatrix::create(heightY, width, false, true);
MatrixPtr outputGpu = GpuMatrix::create(heightX, 1, false, true);
prevOutXGpu->copyFrom(*prevOutX);
prevOutYGpu->copyFrom(*prevOutY);
outputGpu->copyFrom(*output);
output->cosSim(*prevOutX, *prevOutY, scale);
outputGpu->cosSim(*prevOutXGpu, *prevOutYGpu, scale);
MatrixPtr outputCheck = CpuMatrix::create(heightX, 1, false, false);
outputCheck->copyFrom(*outputGpu);
MatrixCheckErr(*output, *outputCheck);
}
TEST(Matrix, cosSim) {
for (auto heightX : {10, 100, 1000}) {
for (auto heightY : {1, heightX}) {
for (auto width : {10, 100, 1000}) {
for (auto scale : {1.0, 2.0}) {
testCosSim(heightX, heightY, width, scale);
}
}
}
}
}
void testCosSimDerivate(int heightX, int heightY, int width, real scale) { void testCosSimDerivate(int heightX, int heightY, int width, real scale) {
MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false); MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false);
MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false); MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false);
...@@ -1256,12 +848,8 @@ void testCosSimDerivate(int heightX, int heightY, int width, real scale) { ...@@ -1256,12 +848,8 @@ void testCosSimDerivate(int heightX, int heightY, int width, real scale) {
*prevGradYGpu, *prevGradYGpu,
scale); scale);
MatrixPtr prevGradXCheck = CpuMatrix::create(heightX, width, false, false); TensorCheckErr(*prevGradX, *prevGradXGpu);
MatrixPtr prevGradYCheck = CpuMatrix::create(heightY, width, false, false); TensorCheckErr(*prevGradY, *prevGradYGpu);
prevGradXCheck->copyFrom(*prevGradXGpu);
prevGradYCheck->copyFrom(*prevGradYGpu);
MatrixCheckErr(*prevGradX, *prevGradXCheck);
MatrixCheckErr(*prevGradY, *prevGradYCheck);
} }
TEST(Matrix, cosSimDerivate) { TEST(Matrix, cosSimDerivate) {
...@@ -1276,80 +864,6 @@ TEST(Matrix, cosSimDerivate) { ...@@ -1276,80 +864,6 @@ TEST(Matrix, cosSimDerivate) {
} }
} }
void testParamReluForward(int height, int width, int w_height, int w_width) {
MatrixPtr output = CpuMatrix::create(height, width, false, false);
MatrixPtr input = CpuMatrix::create(height, width, false, false);
MatrixPtr w = CpuMatrix::create(w_height, w_width, false, false);
output->randomizeUniform();
input->randomizeUniform();
w->randomizeUniform();
input->add(-0.5);
MatrixPtr outputGpu = GpuMatrix::create(height, width, false, true);
MatrixPtr inputGpu = GpuMatrix::create(height, width, false, true);
MatrixPtr wGpu = GpuMatrix::create(w_height, w_width, false, true);
inputGpu->copyFrom(*input);
wGpu->copyFrom(*w);
output->paramReluForward(*input, *w);
outputGpu->paramReluForward(*inputGpu, *wGpu);
MatrixPtr outputCheck = CpuMatrix::create(height, width, false, false);
outputCheck->copyFrom(*outputGpu);
MatrixCheckEqual(*output, *outputCheck);
}
TEST(Matrix, paramReluForward) {
for (auto height : {10, 100}) {
for (auto width : {10, 100}) {
for (auto w_height : {1, 2}) {
for (auto w_width : {1, 2}) {
testParamReluForward(height, width, w_height, w_width);
}
}
}
}
}
void testParamReluBackwardW(int height, int width, int w_height, int w_width) {
MatrixPtr oGrad = CpuMatrix::create(height, width, false, false);
MatrixPtr input = CpuMatrix::create(height, width, false, false);
MatrixPtr w = CpuMatrix::create(w_height, w_width, false, false);
oGrad->randomizeUniform();
input->randomizeUniform();
w->randomizeUniform();
input->add(-0.5);
MatrixPtr oGradGpu = GpuMatrix::create(height, width, false, true);
MatrixPtr inputGpu = GpuMatrix::create(height, width, false, true);
MatrixPtr wGpu = GpuMatrix::create(w_height, w_width, false, true);
oGradGpu->copyFrom(*oGrad);
inputGpu->copyFrom(*input);
wGpu->copyFrom(*w);
w->paramReluBackwardW(*oGrad, *input);
wGpu->paramReluBackwardW(*oGradGpu, *inputGpu);
MatrixPtr wCheck = CpuMatrix::create(w_height, w_width, false, false);
wCheck->copyFrom(*wGpu);
MatrixCheckErr(*w, *wCheck);
}
TEST(Matrix, paramReluBackwardW) {
for (auto height : {10, 100}) {
for (auto width : {10, 100}) {
for (auto w_height : {1, 2}) {
for (auto w_width : {1, 2}) {
testParamReluBackwardW(height, width, w_height, w_width);
}
}
}
}
}
void testParamReluBackwardDiff(int height, void testParamReluBackwardDiff(int height,
int width, int width,
int w_height, int w_height,
...@@ -1378,9 +892,7 @@ void testParamReluBackwardDiff(int height, ...@@ -1378,9 +892,7 @@ void testParamReluBackwardDiff(int height,
diff->paramReluBackwardDiff(*oGrad, *input, *w); diff->paramReluBackwardDiff(*oGrad, *input, *w);
diffGpu->paramReluBackwardDiff(*oGradGpu, *inputGpu, *wGpu); diffGpu->paramReluBackwardDiff(*oGradGpu, *inputGpu, *wGpu);
MatrixPtr diffCheck = CpuMatrix::create(height, width, false, false); TensorCheckErr(*diff, *diffGpu);
diffCheck->copyFrom(*diffGpu);
MatrixCheckErr(*diff, *diffCheck);
} }
TEST(Matrix, paramReluBackwardDiff) { TEST(Matrix, paramReluBackwardDiff) {
...@@ -1411,9 +923,7 @@ void testClassificationError(int numSamples, int dim) { ...@@ -1411,9 +923,7 @@ void testClassificationError(int numSamples, int dim) {
cpuError->classificationError(cpuOutput, cpuLabel); cpuError->classificationError(cpuOutput, cpuLabel);
gpuError->classificationError(gpuOutput, gpuLabel); gpuError->classificationError(gpuOutput, gpuLabel);
MatrixPtr check = std::make_shared<CpuMatrix>(numSamples, 1); TensorCheckEqual(*cpuError, *gpuError);
check->copyFrom(*gpuError);
MatrixCheckEqual(*cpuError, *check);
} }
TEST(Matrix, classificationError) { TEST(Matrix, classificationError) {
...@@ -1578,9 +1088,8 @@ void testAvgPoolFwdBwd(int numSamples, ...@@ -1578,9 +1088,8 @@ void testAvgPoolFwdBwd(int numSamples,
outW, outW,
padH, padH,
padW); padW);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
targetCheck->copyFrom(*targetGpu); TensorCheckErr(*target, *targetGpu);
MatrixCheckErr(*target, *targetCheck);
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
...@@ -1619,10 +1128,8 @@ void testAvgPoolFwdBwd(int numSamples, ...@@ -1619,10 +1128,8 @@ void testAvgPoolFwdBwd(int numSamples,
1.0, 1.0,
padH, padH,
padW); padW);
MatrixPtr targetBwdCheck =
CpuMatrix::create(numSamples, inWidth, false, false); TensorCheckErr(*inputGrad, *inputGpuGrad);
targetBwdCheck->copyFrom(*inputGpuGrad);
MatrixCheckErr(*inputGrad, *targetBwdCheck);
} }
TEST(Matrix, PoolFwdBwd) { TEST(Matrix, PoolFwdBwd) {
...@@ -1687,11 +1194,9 @@ void testMaxOutFwdBwd( ...@@ -1687,11 +1194,9 @@ void testMaxOutFwdBwd(
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
IVectorPtr id = CpuIVector::create(numSamples * outWidth, false); IVectorPtr id = CpuIVector::create(numSamples * outWidth, false);
IVectorPtr idGpu = GpuIVector::create(numSamples * outWidth, true); IVectorPtr idGpu = GpuIVector::create(numSamples * outWidth, true);
IVectorPtr idCheck = CpuIVector::create(numSamples * outWidth, false);
input->randomizeUniform(); input->randomizeUniform();
inputGpu->copyFrom(*input); inputGpu->copyFrom(*input);
...@@ -1699,11 +1204,8 @@ void testMaxOutFwdBwd( ...@@ -1699,11 +1204,8 @@ void testMaxOutFwdBwd(
target->maxoutForward(*input, *id, outChannels, groups); target->maxoutForward(*input, *id, outChannels, groups);
targetGpu->maxoutForward(*inputGpu, *idGpu, outChannels, groups); targetGpu->maxoutForward(*inputGpu, *idGpu, outChannels, groups);
// check TensorCheckErr(*target, *targetGpu);
targetCheck->copyFrom(*targetGpu); TensorCheckEqual(*id, *idGpu);
MatrixCheckErr(*target, *targetCheck);
idCheck->copyFrom(*idGpu);
VectorCheckEqual(*id, *idCheck);
// backward // backward
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
...@@ -1712,8 +1214,6 @@ void testMaxOutFwdBwd( ...@@ -1712,8 +1214,6 @@ void testMaxOutFwdBwd(
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad = MatrixPtr targetGpuGrad =
GpuMatrix::create(numSamples, outWidth, false, true); GpuMatrix::create(numSamples, outWidth, false, true);
MatrixPtr targetCheckGrad =
CpuMatrix::create(numSamples, inWidth, false, false);
inputGrad->randomizeUniform(); inputGrad->randomizeUniform();
targetGrad->randomizeUniform(); targetGrad->randomizeUniform();
...@@ -1723,9 +1223,7 @@ void testMaxOutFwdBwd( ...@@ -1723,9 +1223,7 @@ void testMaxOutFwdBwd(
inputGrad->maxoutBackward(*targetGrad, *id, outChannels, groups); inputGrad->maxoutBackward(*targetGrad, *id, outChannels, groups);
inputGpuGrad->maxoutBackward(*targetGpuGrad, *idGpu, outChannels, groups); inputGpuGrad->maxoutBackward(*targetGpuGrad, *idGpu, outChannels, groups);
// check TensorCheckErr(*inputGrad, *inputGpuGrad);
targetCheckGrad->copyFrom(*inputGpuGrad);
MatrixCheckErr(*inputGrad, *targetCheckGrad);
} }
TEST(Matrix, MaxOutFwdBwd) { TEST(Matrix, MaxOutFwdBwd) {
...@@ -1745,113 +1243,6 @@ TEST(Matrix, MaxOutFwdBwd) { ...@@ -1745,113 +1243,6 @@ TEST(Matrix, MaxOutFwdBwd) {
} }
} }
void testAddSharedBias(int numSamples, int dim, int channel) {
MatrixPtr cpuData = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr gpuData = std::make_shared<GpuMatrix>(numSamples, dim);
MatrixPtr cpuBias = std::make_shared<CpuMatrix>(1, channel);
MatrixPtr gpuBias = std::make_shared<GpuMatrix>(1, channel);
cpuData->randomizeUniform();
gpuData->copyFrom(*cpuData);
cpuBias->randomizeUniform();
gpuBias->copyFrom(*cpuBias);
cpuData->addSharedBias(*cpuBias, 1.0);
gpuData->addSharedBias(*gpuBias, 1.0);
MatrixPtr check = std::make_shared<CpuMatrix>(numSamples, dim);
check->copyFrom(*gpuData);
MatrixCheckErr(*cpuData, *check);
}
void testCollectSharedBias(int numSamples, int dim, int channel) {
MatrixPtr cpuData = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr gpuData = std::make_shared<GpuMatrix>(numSamples, dim);
MatrixPtr cpuBias = std::make_shared<CpuMatrix>(1, channel);
MatrixPtr gpuBias = std::make_shared<GpuMatrix>(1, channel);
cpuData->randomizeUniform();
gpuData->copyFrom(*cpuData);
cpuBias->randomizeUniform();
gpuBias->copyFrom(*cpuBias);
cpuBias->collectSharedBias(*cpuData, 1.0);
gpuBias->collectSharedBias(*gpuData, 1.0);
MatrixPtr check = std::make_shared<CpuMatrix>(1, channel);
check->copyFrom(*gpuBias);
MatrixCheckErr(*cpuBias, *check);
}
TEST(Matrix, sharedBias) {
for (auto numSamples : {1, 100, 520}) {
for (auto dim : {100 * 16, 100 * 32}) {
for (auto channel : {8, 16}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim
<< " channel=" << channel;
testAddSharedBias(numSamples, dim, channel);
testCollectSharedBias(numSamples, dim, channel);
}
}
}
}
void testMultiBinaryLabelCrossEntropy(int numSamples, int dim) {
MatrixPtr output = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(numSamples, dim);
MatrixPtr cpuEntropy = std::make_shared<CpuMatrix>(numSamples, 1);
MatrixPtr gpuEntropy = std::make_shared<GpuMatrix>(numSamples, 1);
MatrixPtr cpuGrad = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr gpuGrad = std::make_shared<GpuMatrix>(numSamples, dim);
MatrixPtr cpuLabel = std::make_shared<CpuSparseMatrix>(
numSamples, dim, numSamples, NO_VALUE, SPARSE_CSR, false);
MatrixPtr gpuLabel = std::make_shared<GpuSparseMatrix>(
numSamples, dim, numSamples, NO_VALUE, SPARSE_CSR, false);
for (int i = 0; i < numSamples; i++) {
const unsigned int id = rand() % dim; // NOLINT
cpuLabel->setRow(i, 1, &id, nullptr);
gpuLabel->setRow(i, 1, &id, nullptr);
}
output->randomizeUniform();
cpuOutput->zeroMem();
output->softmax(*cpuOutput);
gpuOutput->copyFrom(*cpuOutput);
cpuEntropy->zeroMem();
gpuEntropy->zeroMem();
cpuEntropy->multiBinaryLabelCrossEntropy(*cpuOutput, *cpuLabel);
gpuEntropy->multiBinaryLabelCrossEntropy(*gpuOutput, *gpuLabel);
MatrixPtr check1 = std::make_shared<CpuMatrix>(numSamples, 1);
check1->copyFrom(*gpuEntropy);
MatrixCheckErr(*cpuEntropy, *check1);
cpuGrad->zeroMem();
gpuGrad->zeroMem();
cpuGrad->multiBinaryLabelCrossEntropyBp(*cpuOutput, *cpuLabel);
gpuGrad->multiBinaryLabelCrossEntropyBp(*gpuOutput, *gpuLabel);
MatrixPtr check2 = std::make_shared<CpuMatrix>(numSamples, dim);
check2->copyFrom(*gpuGrad);
MatrixCheckErr(*cpuGrad, *check2);
}
TEST(Matrix, multiBinaryCrossEntropy) {
for (auto numSamples : {100, 1000, 10000}) {
for (auto dim : {100, 1000, 10000}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim;
testMultiBinaryLabelCrossEntropy(numSamples, dim);
}
}
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册