From bc5d7bb6d221d7f921db70a8d1c7757bbff8ac15 Mon Sep 17 00:00:00 2001 From: xutianbing Date: Sun, 22 Jan 2017 14:46:40 -0800 Subject: [PATCH] Add Sparse = dense * dense unit test with Daoyuan's Function test --- paddle/function/BufferArg.h | 3 ++ paddle/function/FunctionTest.h | 40 +++++++++++++--- paddle/function/MulOp.cpp | 7 +++ paddle/function/MulOpTest.cpp | 86 +++++++++------------------------- 4 files changed, 67 insertions(+), 69 deletions(-) diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 17a4e4a6b0e..7565047a570 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -172,6 +172,7 @@ public: bool isTransposed() const { return trans_; } bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; } bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; } + virtual size_t numElements() const { return shape_.getElements(); } const SequenceArg& sequence() const; const SparseMatrixArg& sparse() const; @@ -353,6 +354,8 @@ public: size_t nnz() const { return nnz_; } + size_t numElements() const override { return nnz_; } + SparseFormat dataFormat() const { return format_; } SparseValueType dataType() const { return type_; } diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h index 9d38671bc3c..6515cba1629 100644 --- a/paddle/function/FunctionTest.h +++ b/paddle/function/FunctionTest.h @@ -101,6 +101,34 @@ public: output.isTransposed())); } + /// add and init output sparse matrix + void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) { + cpuSparse_ = std::make_shared(output.shape()[0], + output.shape()[1], + output.nnz(), + output.dataType(), + output.dataFormat(), + output.isTransposed()); + + gpuSparse_ = std::make_shared(output.shape()[0], + output.shape()[1], + output.nnz(), + output.dataType(), + output.dataFormat(), + output.isTransposed()); + + /// init sparse matrix + hl_stream_t stream(HPPL_STREAM_1); + cpuSparse_->randomizeUniform(); + gpuSparse_->copyFrom(*cpuSparse_, stream); + hl_stream_synchronize(stream); + + cpuOutputs_.emplace_back( + std::make_shared(*cpuSparse_, argType)); + gpuOutputs_.emplace_back( + std::make_shared(*gpuSparse_, argType)); + } + void addInputs(const SequenceArg& input) { size_t batchSize = input.shape()[0]; size_t numSeqs = batchSize / 10 + 1; @@ -199,8 +227,7 @@ protected: void initOutputs() { for (size_t i = 0; i < cpuOutputs_.size(); i++) { if (cpuOutputs_[i]->isSparseArg()) { - LOG(INFO) << "output sparse matrix already init"; - continue; + continue; /// sparse matrix already init } initArg(*cpuOutputs_[i]); @@ -218,10 +245,11 @@ protected: void compareOutputs() { for (size_t i = 0; i < cpuOutputs_.size(); i++) { // TODO, Need a BufferCheck used to compare the two buffers. - auto cpu = cpuOutputs_[i]; - auto gpu = gpuOutputs_[i]; - CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data()); - GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data()); + const auto cpu = cpuOutputs_[i]; + const auto gpu = gpuOutputs_[i]; + CHECK_EQ(cpu->numElements(), gpu->numElements()); + CpuVector cpuVector(cpu->numElements(), (real*)cpu->data()); + GpuVector gpuVector(gpu->numElements(), (real*)gpu->data()); autotest::TensorCheckErr(cpuVector, gpuVector); } } diff --git a/paddle/function/MulOp.cpp b/paddle/function/MulOp.cpp index bd3bc5c087d..4d7f1a7fa92 100644 --- a/paddle/function/MulOp.cpp +++ b/paddle/function/MulOp.cpp @@ -319,6 +319,13 @@ public: auto outSparseMat = outputs[0].sparse().SparseMatrix(); if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg() && outputs[0].isSparseArg()) { + /* + LOG(INFO) << "input0"; + inputs[0].matrix().print(std::cout); + LOG(INFO) << "input1"; + inputs[1].matrix().print(std::cout); + LOG(INFO) << "output sparse matrix"; + outSparseMat.print(std::cout); */ MulOp(outSparseMat, inputs[0].matrix(), inputs[1].matrix(), diff --git a/paddle/function/MulOpTest.cpp b/paddle/function/MulOpTest.cpp index 7300a2014bd..05460c80970 100644 --- a/paddle/function/MulOpTest.cpp +++ b/paddle/function/MulOpTest.cpp @@ -183,75 +183,35 @@ TEST(MulOp, DDSparseMul) { * C += A * B, A sparse, B, C dense * sparse = dense * dense */ -void testSparseDDMatrix( +void testFuncSparseDDMatrix( size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { real alpha = 1.0; real beta = 1.0; - const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU"); - cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); - const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU"); - gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); - - auto cpuMatrixA = Matrix::create(dimM, dimK, false, false); - auto gpuMatrixA = Matrix::create(dimM, dimK, false, true); - auto cpuDenseA = Matrix::create(dimM, dimK, false, false); - - auto cpuMatrixB = Matrix::create(dimK, dimN, false, false); - auto gpuMatrixB = Matrix::create(dimK, dimN, false, true); - auto cpuDenseB = Matrix::create(dimK, dimN, false, false); - - CpuSparseMatrix cpuMatrixC(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false); - CpuSparseMatrix gpuMatrixC_d2h(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false); - GpuSparseMatrix gpuMatrixC(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false); - CpuMatrix cpuDenseC(dimM, dimN, false); - - /*matrix init*/ - hl_stream_t stream(HPPL_STREAM_1); - cpuMatrixA->randomizeUniform(); - cpuMatrixB->randomizeUniform(); - cpuMatrixC.randomizeUniform(); - - gpuMatrixA->copyFrom(*cpuMatrixA, stream); - gpuMatrixB->copyFrom(*cpuMatrixB, stream); - gpuMatrixC.copyFrom(cpuMatrixC, stream); - - cpuDenseA->copyFrom(*cpuMatrixA); - cpuDenseB->copyFrom(*cpuMatrixB); - cpuDenseC.copyFrom(cpuMatrixC); - hl_stream_synchronize(stream); - - /*matrix mul*/ - BufferArgs cpuInputs; - BufferArgs cpuOutputs; - cpuInputs.addArg(*cpuMatrixA); - cpuInputs.addArg(*cpuMatrixB); - cpuOutputs.addArg(cpuMatrixC, ADD_TO); - cpuFunc->calc(cpuInputs, cpuOutputs); - - BufferArgs gpuInputs; - BufferArgs gpuOutputs; - gpuInputs.addArg(*gpuMatrixA); - gpuInputs.addArg(*gpuMatrixB); - gpuOutputs.addArg(gpuMatrixC, ADD_TO); - gpuFunc->calc(gpuInputs, gpuOutputs); - - BufferArgs denseInputs; - BufferArgs denseOutputs; - denseInputs.addArg(*cpuDenseA); - denseInputs.addArg(*cpuDenseB); - denseOutputs.addArg(cpuDenseC, ADD_TO); - cpuFunc->calc(denseInputs, denseOutputs); + // init Test object + FunctionCompare test("MulOp", + FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); + // prepare input arguments + /// matrix A : M * K + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK})); - gpuMatrixC_d2h.copyFrom(gpuMatrixC, stream); - hl_stream_synchronize(stream); + /// matrix B: K * N + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimK, dimN})); - /*check result*/ - checkSMatrixEqual(cpuMatrixC, gpuMatrixC_d2h); - checkSMatrixEqual2Dense(cpuMatrixC, cpuDenseC); + /// output sparse matrix C: M * N + test.addOutputs(SparseMatrixArg(VALUE_TYPE_FLOAT, + TensorShape{dimM, dimN}, + nnz, + FORMAT, + FLOAT_VALUE, + UNSPECIFIED, + false), + ADD_TO); + // run Function + test.run(); } -TEST(Matrix, SparseDDMul) { - LOG(INFO) << "test for sparse = dense * dense matrix"; +TEST(MulOp, SparseDDMul) { + LOG(INFO) << "function test for sparse = dense * dense matrix"; for (const auto dimM : {10, 100, 1000}) { for (const auto dimN : {10, 100}) { for (const auto dimK : {3, 10}) { @@ -263,7 +223,7 @@ TEST(Matrix, SparseDDMul) { << " dimK=" << std::setw(5) << dimK << " nnz=" << std::setw(5) << nnz << " format=" << std::setw(5) << FORMAT; - testSparseDDMatrix(dimM, dimN, dimK, nnz, FORMAT); + testFuncSparseDDMatrix(dimM, dimN, dimK, nnz, FORMAT); } } } -- GitLab