提交 4751cc8f 编写于 作者: X xutianbing

Pass unit test for SparseCpuMatrix::mul(CpuMatrix, CpuMatrix),

SparseGpuMatrix::mul(GpuMatrix, GpuMatrix),
CpuMatrix::mul(CpuSparseMatrix, CpuMatrix),
and GpuMatrix::mul(GpuSparseMatrix, GpuMatrix)
上级 1ca2846e
......@@ -498,15 +498,10 @@ public:
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
/// todo(tianbing), support SparseMatrixArg for out_mat
auto out_mat = outputs[0].matrix<Device>();
LOG(INFO) << "out_mat:";
out_mat.print(std::cout);
if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg()) {
LOG(INFO) << "in1_mat:";
inputs[0].matrix<Device>().print(std::cout);
LOG(INFO) << "in2_mat:";
inputs[1].matrix<Device>().print(std::cout);
/// matrix = matrix * matrix
if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg() &&
!outputs[0].isSparseArg()) {
MulOp<Device>(out_mat,
inputs[0].matrix<Device>(),
inputs[1].matrix<Device>(),
......@@ -515,11 +510,9 @@ public:
return;
}
if (!inputs[0].isSparseArg() && inputs[1].isSparseArg()) {
LOG(INFO) << "in1_mat:";
inputs[0].matrix<Device>().print(std::cout);
LOG(INFO) << "in2_mat:";
inputs[1].sparse().SparseMatrix<Device>().print(std::cout);
/// matrix = matrix * sparse matrix
if (!inputs[0].isSparseArg() && inputs[1].isSparseArg() &&
!outputs[0].isSparseArg()) {
MulOp<Device>(out_mat,
inputs[0].matrix<Device>(),
inputs[1].sparse().SparseMatrix<Device>(),
......@@ -528,11 +521,9 @@ public:
return;
}
if (inputs[0].isSparseArg() && !inputs[1].isSparseArg()) {
LOG(INFO) << "in1_mat:";
inputs[0].sparse().SparseMatrix<Device>().print(std::cout);
LOG(INFO) << "in2_mat:";
inputs[1].matrix<Device>().print(std::cout);
/// matrix = sparse matrix * matrix
if (inputs[0].isSparseArg() && !inputs[1].isSparseArg() &&
!outputs[0].isSparseArg()) {
MulOp<Device>(out_mat,
inputs[0].sparse().SparseMatrix<Device>(),
inputs[1].matrix<Device>(),
......@@ -540,6 +531,18 @@ public:
beta_);
return;
}
/// sparse matrix = matrix * matrix
auto out_sparse_mat = outputs[0].sparse().SparseMatrix<Device>();
if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg() &&
outputs[0].isSparseArg()) {
MulOp<Device>(out_sparse_mat,
inputs[0].matrix<Device>(),
inputs[1].matrix<Device>(),
alpha_,
beta_);
return;
}
}
private:
......
......@@ -176,7 +176,36 @@ void MulOp<DEVICE_TYPE_GPU>(GpuSparseMatrix& out,
const GpuMatrix& b,
real scale_ab,
real scale_t) {
/// todo(tianbing), implement it
/// todo(tianbing), clean the code
CHECK(a.useGpu_ && b.useGpu_) << "type not match";
CHECK(!out.trans_) << "trans not supported";
real* a_data = const_cast<real*>(a.getData());
real* b_data = const_cast<real*>(b.getData());
hl_sparse_matrix_s out_data = out.sMatrix_.get();
hl_trans_op_t a_trans = a.trans_ ? HPPL_OP_T : HPPL_OP_N;
hl_trans_op_t b_trans = b.trans_ ? HPPL_OP_T : HPPL_OP_N;
if (!a.trans_ && !b.trans_) {
CHECK(out.height_ == a.getHeight());
CHECK(out.width_ == b.getWidth());
CHECK(a.getWidth() == b.getHeight());
} else if (a.trans_ && !b.trans_) {
CHECK(out.height_ == a.getWidth());
CHECK(out.width_ == b.getWidth());
CHECK(a.getHeight() == b.getHeight());
} else if (!a.trans_ && b.trans_) {
CHECK(out.height_ == a.getHeight());
CHECK(out.width_ == b.getHeight());
CHECK(a.getWidth() == b.getWidth());
} else {
LOG(INFO) << "Not support";
}
int dim_m = out.height_;
int dim_n = out.width_;
int dim_k = !b.trans_ ? b.getHeight() : b.getWidth();
hl_sparse_matrix_mul(
a_data, a_trans, b_data, b_trans, out_data,
dim_m, dim_n, dim_k, scale_ab, scale_t);
}
} // namespace paddle
......@@ -24,9 +24,10 @@ limitations under the License. */
using namespace paddle; // NOLINT
/**
* C = alpha * C + beta * (A * B)
* C = alpha * C + beta * (A * B), A, B, C dense matrix
* dense = dense * dense
*/
void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
void testDDDMatrix(bool transa, bool transb, int dimM, int dimN, int dimK) {
real alpha = 1.5;
real beta = 2.0;
......@@ -73,7 +74,7 @@ void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
autotest::TensorCheckErr(*cpuC, *gpuC);
}
TEST(Matrix, mul) {
TEST(Matrix, DDDMul) {
LOG(INFO) << "test for dense = dense * dense matrix";
for (auto transa : {false, true}) {
for (auto transb : {false, true}) {
......@@ -89,7 +90,7 @@ TEST(Matrix, mul) {
<< " dimN=" << std::setw(5) << dimN
<< " dimK=" << std::setw(5) << dimK;
testMatrixMul(transa, transb, dimM, dimN, dimK);
testDDDMatrix(transa, transb, dimM, dimN, dimK);
}
}
}
......@@ -97,19 +98,100 @@ TEST(Matrix, mul) {
}
}
struct MatrixPara {
size_t height;
size_t width;
bool trans;
bool sparse;
size_t nnz;
SparseFormat format;
};
/**
* C += A * B, B, C dense, A sparse
* dense = sparse * dense
*/
void testDSparseDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real alpha = 1.0;
real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
CpuSparseMatrix cpuMatrixA(dimM, dimK, nnz, FLOAT_VALUE, FORMAT, false);
GpuSparseMatrix gpuMatrixA(dimM, dimK, nnz, FLOAT_VALUE, FORMAT, false);
CpuMatrix cpuDenseA(dimM, dimK, false);
auto cpuMatrixB = Matrix::create(dimK, dimN, false, false);
auto gpuMatrixB = Matrix::create(dimK, dimN, false, true);
auto cpuDenseB = Matrix::create(dimK, dimN, false, false);
auto cpuMatrixC = Matrix::create(dimM, dimN, false, false);
auto gpuMatrixC = Matrix::create(dimM, dimN, false, true);
auto cpuDenseC = Matrix::create(dimM, dimN, false, false);
/*matrix init*/
hl_stream_t stream(HPPL_STREAM_1);
cpuMatrixA.randomizeUniform();
cpuMatrixB->randomizeUniform();
cpuMatrixC->randomizeUniform();
gpuMatrixA.copyFrom(cpuMatrixA, stream);
gpuMatrixB->copyFrom(*cpuMatrixB, stream);
gpuMatrixC->copyFrom(*cpuMatrixC, stream);
cpuDenseA.copyFrom(cpuMatrixA);
cpuDenseB->copyFrom(*cpuMatrixB);
cpuDenseC->copyFrom(*cpuMatrixC);
hl_stream_synchronize(stream);
/*matrix mul*/
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(cpuMatrixA);
cpuInputs.addArg(*cpuMatrixB);
cpuOutputs.addArg(*cpuMatrixC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(gpuMatrixA);
gpuInputs.addArg(*gpuMatrixB);
gpuOutputs.addArg(*gpuMatrixC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
BufferArgs denseInputs;
BufferArgs denseOutputs;
denseInputs.addArg(cpuDenseA);
denseInputs.addArg(*cpuDenseB);
denseOutputs.addArg(*cpuDenseC, ADD_TO);
cpuFunc->calc(denseInputs, denseOutputs);
/*check result*/
autotest::TensorCheckErr(*cpuMatrixC, *cpuDenseC);
autotest::TensorCheckErr(*cpuMatrixC, *gpuMatrixC);
}
TEST(Matrix, DSparseDMul) {
LOG(INFO) << "test for dense = sparse * dense matrix";
for (const auto dimM : {10, 100, 1000}) {
for (const auto dimN : {10, 100}) {
for (const auto dimK : {3, 10}) {
for (const auto nnz : {3, 10}) {
for (const auto FORMAT : {SPARSE_CSR}) {
VLOG(3) << setiosflags(std::ios::left) << std::setfill(' ')
<< " dimM=" << std::setw(5) << dimM
<< " dimN=" << std::setw(5) << dimN
<< " dimK=" << std::setw(5) << dimK
<< " nnz=" << std::setw(5) << nnz
<< " format=" << std::setw(5) << FORMAT;
testDSparseDMatrix(dimM, dimN, dimK, nnz, FORMAT);
}
}
}
}
}
}
/**
* C += A * B, A, C dense, B sparse
* dense = dense * sparse
*/
void testDSparseDMatrix() {
void testDDSparseMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real alpha = 1.0;
real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
......@@ -117,46 +199,19 @@ void testDSparseDMatrix() {
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
constexpr size_t dimM = 2;
constexpr size_t dimN = 2;
constexpr size_t dimK = 3;
constexpr size_t NNZ = 3;
constexpr SparseFormat FORMAT = SPARSE_CSC;
MatrixPara paraA{dimM, dimK, /*trans*/ false, /*sparse*/ false, NNZ, FORMAT};
MatrixPara paraB{dimK, dimN, /*trans*/ false, /*sparse*/ true, NNZ, FORMAT};
MatrixPara paraC{dimM, dimN, /*trans*/ false, /*sparse*/ false, NNZ, FORMAT};
auto cpuMatrixA =
Matrix::create(paraA.height, paraA.width, paraA.trans, false);
auto gpuMatrixA =
Matrix::create(paraA.height, paraA.width, paraA.trans, true);
auto cpuDenseA =
Matrix::create(paraA.height, paraA.width, paraA.trans, false);
CpuSparseMatrix cpuMatrixB(paraB.height,
paraB.width,
paraB.nnz,
FLOAT_VALUE,
paraB.format,
paraB.trans);
GpuSparseMatrix gpuMatrixB(paraB.height,
paraB.width,
paraB.nnz,
FLOAT_VALUE,
paraB.format,
paraB.trans);
auto cpuDenseB =
Matrix::create(paraB.height, paraB.width, paraB.trans, false);
auto cpuMatrixC =
Matrix::create(paraC.height, paraC.width, paraC.trans, false);
auto gpuMatrixC =
Matrix::create(paraC.height, paraC.width, paraC.trans, true);
auto cpuDenseC =
Matrix::create(paraC.height, paraC.width, paraC.trans, false);
auto gpuMatrixC_d2h =
Matrix::create(paraC.height, paraC.width, paraC.trans, false);
auto cpuMatrixA = Matrix::create(dimM, dimK, false, false);
auto gpuMatrixA = Matrix::create(dimM, dimK, false, true);
auto cpuDenseA = Matrix::create(dimM, dimK, false, false);
CpuSparseMatrix cpuMatrixB(dimK, dimN, nnz, FLOAT_VALUE, FORMAT, false);
GpuSparseMatrix gpuMatrixB(dimK, dimN, nnz, FLOAT_VALUE, FORMAT, false);
auto cpuDenseB = Matrix::create(dimK, dimN, false, false);
auto cpuMatrixC = Matrix::create(dimM, dimN, false, false);
auto gpuMatrixC = Matrix::create(dimM, dimN, false, true);
auto cpuDenseC = Matrix::create(dimM, dimN, false, false);
/*matrix init*/
hl_stream_t stream(HPPL_STREAM_1);
cpuMatrixA->randomizeUniform();
......@@ -172,27 +227,6 @@ void testDSparseDMatrix() {
cpuDenseC->copyFrom(*cpuMatrixC);
hl_stream_synchronize(stream);
LOG(INFO) << "cpuMatrixA: ";
cpuMatrixA->print(std::cout);
LOG(INFO) << "cpuMatrixB: ";
(&cpuMatrixB)->print(std::cout);
LOG(INFO) << "cpuMatrixC: ";
cpuMatrixC->print(std::cout);
LOG(INFO) << "cpuDenseA: ";
cpuDenseA->print(std::cout);
LOG(INFO) << "cpuDenseB: ";
cpuDenseB->print(std::cout);
LOG(INFO) << "cpuDenseC: ";
cpuDenseC->print(std::cout);
LOG(INFO) << "gpuMatrixA: ";
gpuMatrixA->print(std::cout);
LOG(INFO) << "gpuMatrixB: ";
(&gpuMatrixB)->print(std::cout);
LOG(INFO) << "gpuMatrixC: ";
gpuMatrixC->print(std::cout);
/*matrix mul*/
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
......@@ -215,15 +249,120 @@ void testDSparseDMatrix() {
denseOutputs.addArg(*cpuDenseC, ADD_TO);
cpuFunc->calc(denseInputs, denseOutputs);
gpuMatrixC_d2h->copyFrom(*gpuMatrixC, stream);
hl_stream_synchronize(stream);
/*check result*/
// autotest::TensorCheckErr(*cpuMatrixC, *gpuMatrixC);
checkMatrixEqual(cpuMatrixC, cpuDenseC);
checkMatrixEqual(cpuMatrixC, gpuMatrixC_d2h);
autotest::TensorCheckErr(*cpuMatrixC, *cpuDenseC);
autotest::TensorCheckErr(*cpuMatrixC, *gpuMatrixC);
}
TEST(Matrix, SparseMatrixMul) {
TEST(Matrix, DDSparseMul) {
LOG(INFO) << "test for dense = dense * sparse matrix";
testDSparseDMatrix();
for (const auto dimM : {10, 100, 1000}) {
for (const auto dimN : {10, 100}) {
for (const auto dimK : {3, 10}) {
for (const auto nnz : {3, 10}) {
for (const auto FORMAT : {SPARSE_CSR, SPARSE_CSC}) {
VLOG(3) << setiosflags(std::ios::left) << std::setfill(' ')
<< " dimM=" << std::setw(5) << dimM
<< " dimN=" << std::setw(5) << dimN
<< " dimK=" << std::setw(5) << dimK
<< " nnz=" << std::setw(5) << nnz
<< " format=" << std::setw(5) << FORMAT;
testDDSparseMatrix(dimM, dimN, dimK, nnz, FORMAT);
}
}
}
}
}
}
/**
* C += A * B, A sparse, B, C dense
* sparse = dense * dense
*/
void testSparseDDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real alpha = 1.0;
real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
auto cpuMatrixA = Matrix::create(dimM, dimK, false, false);
auto gpuMatrixA = Matrix::create(dimM, dimK, false, true);
auto cpuDenseA = Matrix::create(dimM, dimK, false, false);
auto cpuMatrixB = Matrix::create(dimK, dimN, false, false);
auto gpuMatrixB = Matrix::create(dimK, dimN, false, true);
auto cpuDenseB = Matrix::create(dimK, dimN, false, false);
CpuSparseMatrix cpuMatrixC(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
CpuSparseMatrix gpuMatrixC_d2h(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
GpuSparseMatrix gpuMatrixC(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
CpuMatrix cpuDenseC(dimM, dimN, false);
/*matrix init*/
hl_stream_t stream(HPPL_STREAM_1);
cpuMatrixA->randomizeUniform();
cpuMatrixB->randomizeUniform();
cpuMatrixC.randomizeUniform();
gpuMatrixA->copyFrom(*cpuMatrixA, stream);
gpuMatrixB->copyFrom(*cpuMatrixB, stream);
gpuMatrixC.copyFrom(cpuMatrixC, stream);
cpuDenseA->copyFrom(*cpuMatrixA);
cpuDenseB->copyFrom(*cpuMatrixB);
cpuDenseC.copyFrom(cpuMatrixC);
hl_stream_synchronize(stream);
/*matrix mul*/
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(*cpuMatrixA);
cpuInputs.addArg(*cpuMatrixB);
cpuOutputs.addArg(cpuMatrixC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(*gpuMatrixA);
gpuInputs.addArg(*gpuMatrixB);
gpuOutputs.addArg(gpuMatrixC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
BufferArgs denseInputs;
BufferArgs denseOutputs;
denseInputs.addArg(*cpuDenseA);
denseInputs.addArg(*cpuDenseB);
denseOutputs.addArg(cpuDenseC, ADD_TO);
cpuFunc->calc(denseInputs, denseOutputs);
gpuMatrixC_d2h.copyFrom(gpuMatrixC, stream);
hl_stream_synchronize(stream);
/*check result*/
checkSMatrixEqual(cpuMatrixC, gpuMatrixC_d2h);
checkSMatrixEqual2Dense(cpuMatrixC, cpuDenseC);
}
TEST(Matrix, SparseDDMul) {
LOG(INFO) << "test for sparse = dense * dense matrix";
for (const auto dimM : {10, 100, 1000}) {
for (const auto dimN : {10, 100}) {
for (const auto dimK : {3, 10}) {
for (const auto nnz : {3, 10}) {
for (const auto FORMAT : {SPARSE_CSC, SPARSE_CSR}) {
VLOG(3) << setiosflags(std::ios::left) << std::setfill(' ')
<< " dimM=" << std::setw(5) << dimM
<< " dimN=" << std::setw(5) << dimN
<< " dimK=" << std::setw(5) << dimK
<< " nnz=" << std::setw(5) << nnz
<< " format=" << std::setw(5) << FORMAT;
testSparseDDMatrix(dimM, dimN, dimK, nnz, FORMAT);
}
}
}
}
}
}
......@@ -177,7 +177,6 @@ GpuSparseMatrix::GpuSparseMatrix(real* value,
hl_sparse_matrix_s_ptr tmp2(tmp, hl_destruct_sparse_matrix);
sMatrix_ = tmp2;
}
LOG(INFO) << "weight to matrix ";
}
}
......
......@@ -30,6 +30,17 @@ void checkMatrixEqual(const MatrixPtr& a, const MatrixPtr& b) {
}
}
void checkSMatrixEqual(const CpuSparseMatrix& a, const CpuSparseMatrix& b) {
ASSERT_EQ(a.getWidth(), b.getWidth());
ASSERT_EQ(a.getHeight(), b.getHeight());
ASSERT_EQ(a.isTransposed(), b.isTransposed());
ASSERT_EQ(a.getFormat(), b.getFormat());
ASSERT_EQ(a.getElementCnt(), b.getElementCnt());
for (size_t r = 0; r < a.getElementCnt(); ++r) {
ASSERT_FLOAT_EQ(a.getValue()[r], b.getValue()[r]);
}
}
void checkSMatrixEqual(const CpuSparseMatrixPtr& a,
const CpuSparseMatrixPtr& b) {
ASSERT_EQ(a->getWidth(), b->getWidth());
......@@ -73,6 +84,36 @@ void checkSMatrixEqual2(const CpuSparseMatrixPtr& a,
}
}
void checkSMatrixEqual2Dense(const CpuSparseMatrix& a, const CpuMatrix& b) {
ASSERT_EQ(a.getWidth(), b.getWidth());
ASSERT_EQ(a.getHeight(), b.getHeight());
ASSERT_EQ(a.isTransposed(), b.isTransposed());
if (a.getFormat() == SPARSE_CSC) {
int* rows = a.getRows();
for (size_t i = 0; i < a.getWidth(); i++) {
for (size_t j = a.getColStartIdx(i); j < a.getColStartIdx(i + 1); j++) {
if (a.getValueType() == FLOAT_VALUE) {
ASSERT_FLOAT_EQ(a.getValue()[j], b.getElement(rows[j], i));
} else {
ASSERT_FLOAT_EQ(1.0, b.getElement(rows[j], i));
}
}
}
} else {
int* cols = a.getCols();
for (size_t i = 0; i < a.getHeight(); i++) {
for (size_t j = a.getRowStartIdx(i); j < a.getRowStartIdx(i + 1); j++) {
if (a.getValueType() == FLOAT_VALUE) {
ASSERT_FLOAT_EQ(a.getValue()[j], b.getElement(i, cols[j]));
} else {
ASSERT_FLOAT_EQ(1.0, b.getElement(i, cols[j]));
}
}
}
}
}
void checkSMatrixEqual2Dense(const CpuSparseMatrixPtr& a,
const CpuMatrixPtr& b) {
ASSERT_EQ(a->getWidth(), b->getWidth());
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册