From 1ca2846ef688bfc532918e9e977eb4b519f3c4e8 Mon Sep 17 00:00:00 2001 From: xutianbing Date: Tue, 17 Jan 2017 21:50:57 -0800 Subject: [PATCH] Pass unit test for CpuMatrix::mul(CpuMatrix, CpuSparseMatrix) and GpuMatrix::mul(CpuMatrix, GpuSparseMatrix) --- paddle/function/BufferArg.cpp | 12 ++- paddle/function/BufferArg.h | 27 +++---- paddle/function/MulOp.cpp | 50 +++++++++++-- paddle/function/MulOp.h | 2 + paddle/function/MulOpTest.cpp | 134 ++++++++++++++++++++++++++++++++++ 5 files changed, 198 insertions(+), 27 deletions(-) diff --git a/paddle/function/BufferArg.cpp b/paddle/function/BufferArg.cpp index 5d595deb12c..39773c76280 100644 --- a/paddle/function/BufferArg.cpp +++ b/paddle/function/BufferArg.cpp @@ -32,14 +32,22 @@ const SparseMatrixArg& BufferArg::sparse() const { SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType) : BufferArg(sparse, argType), row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), - col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) { + col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32), + /// todo(tianbing), make sure how to get NNZ + nnz_(sparse.getElementCnt()), + format_(sparse.getFormat()), + type_(sparse.getValueType()) { bufferType_ = TENSOR_SPARSE; } SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType) : BufferArg(sparse, argType), row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), - col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) { + col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32), + /// todo(tianbing), make sure how to get NNZ + nnz_(sparse.getElementCnt()), + format_(sparse.getFormat()), + type_(sparse.getValueType()) { bufferType_ = TENSOR_SPARSE; } diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 2da1115ec96..eac3fe44203 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -30,13 +30,6 @@ enum BufferType { TENSOR_SPARSE = 4 }; -enum SparseDataType { - SPARSE_NO_VALUE = 0, // do not need value pointer, all values are 1 - SPARSE_FLOAT_VALUE = 1 -}; - -enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 }; - class BufferArg; class SequenceArg; class SparseMatrixArg; @@ -272,8 +265,8 @@ public: const BufferArg& row, const BufferArg& col, size_t nnz, - SparseDataFormat format, - SparseDataType type, + SparseFormat format, + SparseValueType type, ArgType argType = UNSPECIFIED) : BufferArg(buf, valueType, shape, argType), row_(row), @@ -286,9 +279,9 @@ public: CHECK_EQ(shape_.ndims(), (size_t)2); CHECK_EQ(row_.shape().ndims(), (size_t)1); CHECK_EQ(col_.shape().ndims(), (size_t)1); - if (format == SPARSE_CSR_FORMAT) { + if (format == SPARSE_CSR) { CHECK_EQ(nnz, col.shape()[0]); - } else if (format == SPARSE_CSC_FORMAT) { + } else if (format == SPARSE_CSC) { CHECK_EQ(nnz, row.shape()[0]); } } @@ -310,8 +303,8 @@ public: shape_[0], shape_[1], nnz_, - static_cast(type_), - static_cast(format_), + type_, + format_, trans_); } @@ -323,16 +316,16 @@ public: size_t nnz() const { return nnz_; } - SparseDataFormat dataFormat() const { return format_; } + SparseFormat dataFormat() const { return format_; } - SparseDataType dataType() const { return type_; } + SparseValueType dataType() const { return type_; } private: BufferArg row_; BufferArg col_; size_t nnz_; - SparseDataFormat format_; - SparseDataType type_; + SparseFormat format_; + SparseValueType type_; }; } // namespace paddle diff --git a/paddle/function/MulOp.cpp b/paddle/function/MulOp.cpp index 1c593bb083e..85f7f535dcf 100644 --- a/paddle/function/MulOp.cpp +++ b/paddle/function/MulOp.cpp @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MulOp.h" +/// todo(tianbing), delete it +#include #include "paddle/math/MathFunctions.h" #include "paddle/math/SIMDFunctions.h" #include "paddle/utils/ThreadLocal.h" @@ -496,16 +498,48 @@ public: CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); CHECK_EQ(outputs[0].getArgType(), ADD_TO); - auto in1_mat = inputs[0].matrix(); - if (inputs[0].isSparseArg()) { - in1_mat = inputs[0].sparse().SparseMatrix(); + /// todo(tianbing), support SparseMatrixArg for out_mat + auto out_mat = outputs[0].matrix(); + LOG(INFO) << "out_mat:"; + out_mat.print(std::cout); + if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg()) { + LOG(INFO) << "in1_mat:"; + inputs[0].matrix().print(std::cout); + LOG(INFO) << "in2_mat:"; + inputs[1].matrix().print(std::cout); + MulOp(out_mat, + inputs[0].matrix(), + inputs[1].matrix(), + alpha_, + beta_); + return; } - auto in2_mat = inputs[1].matrix(); - if (inputs[1].isSparseArg()) { - in2_mat = inputs[1].sparse().SparseMatrix(); + + if (!inputs[0].isSparseArg() && inputs[1].isSparseArg()) { + LOG(INFO) << "in1_mat:"; + inputs[0].matrix().print(std::cout); + LOG(INFO) << "in2_mat:"; + inputs[1].sparse().SparseMatrix().print(std::cout); + MulOp(out_mat, + inputs[0].matrix(), + inputs[1].sparse().SparseMatrix(), + alpha_, + beta_); + return; + } + + if (inputs[0].isSparseArg() && !inputs[1].isSparseArg()) { + LOG(INFO) << "in1_mat:"; + inputs[0].sparse().SparseMatrix().print(std::cout); + LOG(INFO) << "in2_mat:"; + inputs[1].matrix().print(std::cout); + MulOp(out_mat, + inputs[0].sparse().SparseMatrix(), + inputs[1].matrix(), + alpha_, + beta_); + return; } - auto out_mat = outputs[0].matrix(); - MulOp(out_mat, in1_mat, in2_mat, alpha_, beta_); } private: diff --git a/paddle/function/MulOp.h b/paddle/function/MulOp.h index b7b1f56af10..23bfd0fa932 100644 --- a/paddle/function/MulOp.h +++ b/paddle/function/MulOp.h @@ -15,6 +15,8 @@ limitations under the License. */ #pragma once #include "Function.h" +/// todo(tianbing), delete +#include #include "paddle/math/Matrix.h" #include "paddle/math/SparseMatrix.h" diff --git a/paddle/function/MulOpTest.cpp b/paddle/function/MulOpTest.cpp index 3229193660e..fd02504678e 100644 --- a/paddle/function/MulOpTest.cpp +++ b/paddle/function/MulOpTest.cpp @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +/// todo(tianbing), delete +#include #include "FunctionTest.h" #include "paddle/math/Matrix.h" #include "paddle/math/SparseMatrix.h" @@ -72,6 +74,7 @@ void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { } TEST(Matrix, mul) { + LOG(INFO) << "test for dense = dense * dense matrix"; for (auto transa : {false, true}) { for (auto transb : {false, true}) { for (auto dimM : {1, 10, 100}) { @@ -93,3 +96,134 @@ TEST(Matrix, mul) { } } } + +struct MatrixPara { + size_t height; + size_t width; + bool trans; + bool sparse; + size_t nnz; + SparseFormat format; +}; + +/** + * C += A * B, A, C dense, B sparse + */ +void testDSparseDMatrix() { + real alpha = 1.0; + real beta = 1.0; + const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU"); + cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); + const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU"); + gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); + + constexpr size_t dimM = 2; + constexpr size_t dimN = 2; + constexpr size_t dimK = 3; + constexpr size_t NNZ = 3; + constexpr SparseFormat FORMAT = SPARSE_CSC; + + MatrixPara paraA{dimM, dimK, /*trans*/ false, /*sparse*/ false, NNZ, FORMAT}; + MatrixPara paraB{dimK, dimN, /*trans*/ false, /*sparse*/ true, NNZ, FORMAT}; + MatrixPara paraC{dimM, dimN, /*trans*/ false, /*sparse*/ false, NNZ, FORMAT}; + + auto cpuMatrixA = + Matrix::create(paraA.height, paraA.width, paraA.trans, false); + auto gpuMatrixA = + Matrix::create(paraA.height, paraA.width, paraA.trans, true); + auto cpuDenseA = + Matrix::create(paraA.height, paraA.width, paraA.trans, false); + CpuSparseMatrix cpuMatrixB(paraB.height, + paraB.width, + paraB.nnz, + FLOAT_VALUE, + paraB.format, + paraB.trans); + + GpuSparseMatrix gpuMatrixB(paraB.height, + paraB.width, + paraB.nnz, + FLOAT_VALUE, + paraB.format, + paraB.trans); + + auto cpuDenseB = + Matrix::create(paraB.height, paraB.width, paraB.trans, false); + auto cpuMatrixC = + Matrix::create(paraC.height, paraC.width, paraC.trans, false); + auto gpuMatrixC = + Matrix::create(paraC.height, paraC.width, paraC.trans, true); + auto cpuDenseC = + Matrix::create(paraC.height, paraC.width, paraC.trans, false); + auto gpuMatrixC_d2h = + Matrix::create(paraC.height, paraC.width, paraC.trans, false); + /*matrix init*/ + hl_stream_t stream(HPPL_STREAM_1); + cpuMatrixA->randomizeUniform(); + cpuMatrixB.randomizeUniform(); + cpuMatrixC->randomizeUniform(); + + gpuMatrixA->copyFrom(*cpuMatrixA, stream); + gpuMatrixB.copyFrom(cpuMatrixB, stream); + gpuMatrixC->copyFrom(*cpuMatrixC, stream); + + cpuDenseA->copyFrom(*cpuMatrixA); + cpuDenseB->copyFrom(cpuMatrixB); + cpuDenseC->copyFrom(*cpuMatrixC); + hl_stream_synchronize(stream); + + LOG(INFO) << "cpuMatrixA: "; + cpuMatrixA->print(std::cout); + LOG(INFO) << "cpuMatrixB: "; + (&cpuMatrixB)->print(std::cout); + LOG(INFO) << "cpuMatrixC: "; + cpuMatrixC->print(std::cout); + + LOG(INFO) << "cpuDenseA: "; + cpuDenseA->print(std::cout); + LOG(INFO) << "cpuDenseB: "; + cpuDenseB->print(std::cout); + LOG(INFO) << "cpuDenseC: "; + cpuDenseC->print(std::cout); + + LOG(INFO) << "gpuMatrixA: "; + gpuMatrixA->print(std::cout); + LOG(INFO) << "gpuMatrixB: "; + (&gpuMatrixB)->print(std::cout); + LOG(INFO) << "gpuMatrixC: "; + gpuMatrixC->print(std::cout); + + /*matrix mul*/ + BufferArgs cpuInputs; + BufferArgs cpuOutputs; + cpuInputs.addArg(*cpuMatrixA); + cpuInputs.addArg(cpuMatrixB); + cpuOutputs.addArg(*cpuMatrixC, ADD_TO); + cpuFunc->calc(cpuInputs, cpuOutputs); + + BufferArgs gpuInputs; + BufferArgs gpuOutputs; + gpuInputs.addArg(*gpuMatrixA); + gpuInputs.addArg(gpuMatrixB); + gpuOutputs.addArg(*gpuMatrixC, ADD_TO); + gpuFunc->calc(gpuInputs, gpuOutputs); + + BufferArgs denseInputs; + BufferArgs denseOutputs; + denseInputs.addArg(*cpuDenseA); + denseInputs.addArg(*cpuDenseB); + denseOutputs.addArg(*cpuDenseC, ADD_TO); + cpuFunc->calc(denseInputs, denseOutputs); + + gpuMatrixC_d2h->copyFrom(*gpuMatrixC, stream); + hl_stream_synchronize(stream); + /*check result*/ + // autotest::TensorCheckErr(*cpuMatrixC, *gpuMatrixC); + checkMatrixEqual(cpuMatrixC, cpuDenseC); + checkMatrixEqual(cpuMatrixC, gpuMatrixC_d2h); +} + +TEST(Matrix, SparseMatrixMul) { + LOG(INFO) << "test for dense = dense * sparse matrix"; + testDSparseDMatrix(); +} -- GitLab