Support SparseMatrixArg unit test using Daoyuan's new Function Test.

077f936a · xutianbing · 316bf75a · 077f936a · 077f936a · 077f936a
5 changed file
--- a/paddle/function/BufferArg.cpp
+++ b/paddle/function/BufferArg.cpp
@@ -33,7 +33,6 @@ SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
    : BufferArg(sparse, argType),
      row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
-      /// todo(tianbing), make sure how to get NNZ
      nnz_(sparse.getElementCnt()),
      format_(sparse.getFormat()),
      type_(sparse.getValueType()) {
@@ -44,7 +43,6 @@ SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
    : BufferArg(sparse, argType),
      row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
-      /// todo(tianbing), make sure how to get NNZ
      nnz_(sparse.getElementCnt()),
      format_(sparse.getFormat()),
      type_(sparse.getValueType()) {

--- a/paddle/function/BufferArg.h
+++ b/paddle/function/BufferArg.h
@@ -71,17 +71,24 @@ public:
 public:
  BufferArg(ValueType valueType,
            const TensorShape& shape,
-            ArgType argType = UNSPECIFIED)
+            ArgType argType = UNSPECIFIED,
+            bool trans = false)
      : buf_(nullptr),
        valueType_(valueType),
        shape_(shape),
-        argType_(argType) {}
+        argType_(argType),
+        trans_(trans) {}

  BufferArg(void* buf,
            ValueType valueType,
            const TensorShape& shape,
-            ArgType argType = UNSPECIFIED)
-      : buf_(buf), valueType_(valueType), shape_(shape), argType_(argType) {}
+            ArgType argType = UNSPECIFIED,
+            bool trans = false)
+      : buf_(buf),
+        valueType_(valueType),
+        shape_(shape),
+        argType_(argType),
+        trans_(trans) {}

  BufferArg(void* buf, ValueType valueType)
      : buf_(buf), valueType_(valueType) {}
@@ -162,6 +169,7 @@ public:
  ValueType valueType() const { return valueType_; }
  BufferType bufferType() const { return bufferType_; }
  const TensorShape& shape() const { return shape_; }
+  bool isTransposed() const { return trans_; }
  bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; }
  bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }

@@ -175,6 +183,7 @@ protected:
  BufferType bufferType_{TENSOR_UNKNOWN};
  ArgType argType_{UNSPECIFIED};
  bool trans_{false};
+  // todo(tianbing), add deviceType_
  // leading dimensions. The size is dims_.size()
  // Dims lds_;
 };
@@ -267,8 +276,9 @@ public:
                  size_t nnz,
                  SparseFormat format,
                  SparseValueType type,
-                  ArgType argType = UNSPECIFIED)
-      : BufferArg(buf, valueType, shape, argType),
+                  ArgType argType = UNSPECIFIED,
+                  bool trans = false)
+      : BufferArg(buf, valueType, shape, argType, trans),
        row_(row),
        col_(col),
        nnz_(nnz),
@@ -286,6 +296,33 @@ public:
    }
  }

+  SparseMatrixArg(ValueType valueType,
+                  const TensorShape& shape,
+                  size_t nnz,
+                  SparseFormat format,
+                  SparseValueType type,
+                  ArgType argType = UNSPECIFIED,
+                  bool trans = false)
+      : BufferArg(valueType, shape, argType, trans),
+        /// len of row_ : height + 1 (CSR), buf_ == nullptr
+        row_(format == SPARSE_CSR
+                 ? BufferArg(VALUE_TYPE_INT32, TensorShape{shape[0] + 1})
+                 : BufferArg(VALUE_TYPE_INT32, TensorShape{nnz})),
+        /// len of col_ :  width + 1 (CSC), buf_ == nullptr
+        col_(format == SPARSE_CSR
+                 ? BufferArg(VALUE_TYPE_INT32, TensorShape{nnz})
+                 : BufferArg(VALUE_TYPE_INT32, TensorShape{shape[1] + 1})),
+        nnz_(nnz),
+        format_(format),
+        type_(type) {
+    bufferType_ = TENSOR_SPARSE;
+    /// todo(tianbing)
+    /// valueType and shape_.ndims() == 2 need to check before
+    /// this constructor to make sure row_ and col_ are right
+    CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
+    CHECK_EQ(shape_.ndims(), (size_t)2);
+  }
+
  SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED);

  SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED);

--- a/paddle/function/FunctionTest.h
+++ b/paddle/function/FunctionTest.h
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "Function.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/SparseMatrix.h"
 #include "paddle/math/Vector.h"
 #include "paddle/math/tests/TensorCheck.h"
 #include "paddle/testing/TestUtil.h"
@@ -62,29 +64,41 @@ public:
    cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
    gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));

-    cpuInputs_.emplace_back(std::make_shared<BufferArg>(
-        cpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
-    gpuInputs_.emplace_back(std::make_shared<BufferArg>(
-        gpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
+    cpuInputs_.emplace_back(
+        std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(),
+                                    input.valueType(),
+                                    input.shape(),
+                                    UNSPECIFIED,
+                                    input.isTransposed()));
+    gpuInputs_.emplace_back(
+        std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(),
+                                    input.valueType(),
+                                    input.shape(),
+                                    UNSPECIFIED,
+                                    input.isTransposed()));
  }

  // output need only contains shape, do not contains data.
-  void addOutputs(const BufferArg& output) {
+  void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) {
    size_t size =
        output.shape().getElements() * sizeOfValuType(output.valueType());
    cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
    gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));

-    cpuOutputs_.emplace_back(
-        std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(),
-                                    output.valueType(),
-                                    output.shape(),
-                                    ASSIGN_TO));
-    gpuOutputs_.emplace_back(
-        std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(),
-                                    output.valueType(),
-                                    output.shape(),
-                                    ASSIGN_TO));
+    cpuOutputs_.emplace_back(std::make_shared<BufferArg>(
+        cpuMemory_.back()->getBuf(),
+        output.valueType(),
+        output.shape(),
+        // todo(tianbing), argType = output.getArgType(), but default ASSIGN_TO
+        argType,
+        output.isTransposed()));
+    gpuOutputs_.emplace_back(std::make_shared<BufferArg>(
+        gpuMemory_.back()->getBuf(),
+        output.valueType(),
+        output.shape(),
+        // todo(tianbing), argType = output.getArgType(), but default ASSIGN_TO
+        argType,
+        output.isTransposed()));
  }

  void addInputs(const SequenceArg& input) {
@@ -107,10 +121,36 @@ public:
    // TODO: need be implemented.
  }

+  void addInputs(const SparseMatrixArg& input) {
+    cpuSparse_ = std::make_shared<CpuSparseMatrix>(input.shape()[0],
+                                                   input.shape()[1],
+                                                   input.nnz(),
+                                                   input.dataType(),
+                                                   input.dataFormat(),
+                                                   input.isTransposed());
+
+    gpuSparse_ = std::make_shared<GpuSparseMatrix>(input.shape()[0],
+                                                   input.shape()[1],
+                                                   input.nnz(),
+                                                   input.dataType(),
+                                                   input.dataFormat(),
+                                                   input.isTransposed());
+
+    /// init sparse matrix
+    hl_stream_t stream(HPPL_STREAM_1);
+    cpuSparse_->randomizeUniform();
+    gpuSparse_->copyFrom(*cpuSparse_, stream);
+    hl_stream_synchronize(stream);
+
+    cpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*cpuSparse_));
+    gpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*gpuSparse_));
+  }
+
  void run() {
    // prepare cpu/gpu arguments
    initInputs();

+    initOutputs();
    // function calculate
    auto callFunction = [](FunctionBase* function,
                           std::vector<BufferArgPtr>& inputs,
@@ -129,7 +169,7 @@ public:
    callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_);
    callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_);

-    // check outputs and inouts
+    // check outputs
    compareOutputs();
  }

@@ -140,6 +180,10 @@ public:
 protected:
  void initInputs() {
    for (size_t i = 0; i < cpuInputs_.size(); i++) {
+      if (cpuInputs_[i]->isSparseArg()) {
+        continue;  /// sparse matrix already init
+      }
+
      initArg(*cpuInputs_[i]);

      // TODO: Need a BufferCopy used to copy from one BufferArg to another.
@@ -152,6 +196,25 @@ protected:
    }
  }

+  void initOutputs() {
+    for (size_t i = 0; i < cpuOutputs_.size(); i++) {
+      if (cpuOutputs_[i]->isSparseArg()) {
+        LOG(INFO) << "output sparse matrix already init";
+        continue;
+      }
+
+      initArg(*cpuOutputs_[i]);
+
+      // TODO: Need a BufferCopy used to copy from one BufferArg to another.
+      CpuVector cpuVector(cpuOutputs_[i]->shape().getElements(),
+                          (real*)cpuOutputs_[i]->data());
+      GpuVector gpuVector(gpuOutputs_[i]->shape().getElements(),
+                          (real*)gpuOutputs_[i]->data());
+
+      gpuVector.copyFrom(cpuVector);
+    }
+  }
+
  void compareOutputs() {
    for (size_t i = 0; i < cpuOutputs_.size(); i++) {
      // TODO, Need a BufferCheck used to compare the two buffers.
@@ -159,7 +222,6 @@ protected:
      auto gpu = gpuOutputs_[i];
      CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data());
      GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data());
-
      autotest::TensorCheckErr(cpuVector, gpuVector);
    }
  }
@@ -195,6 +257,8 @@ protected:
  std::vector<BufferArgPtr> cpuOutputs_;
  std::vector<BufferArgPtr> gpuInputs_;
  std::vector<BufferArgPtr> gpuOutputs_;
+  std::shared_ptr<CpuSparseMatrix> cpuSparse_;
+  std::shared_ptr<GpuSparseMatrix> gpuSparse_;
 };

 }  // namespace paddle
--- a/paddle/function/MulOp.h
+++ b/paddle/function/MulOp.h
@@ -15,6 +15,8 @@ limitations under the License. */
 #pragma once

 #include "Function.h"
+/// todo(tianbing), delete it
+#include <iostream>
 #include "paddle/math/Matrix.h"
 #include "paddle/math/SparseMatrix.h"


--- a/paddle/function/MulOpTest.cpp
+++ b/paddle/function/MulOpTest.cpp
@@ -24,58 +24,39 @@ limitations under the License. */
 using namespace paddle;  // NOLINT

 /**
- *  C = alpha * C + beta * (A * B), A, B, C dense matrix
+ *  C += A * B, A, B, C dense matrix
 *  dense = dense * dense
 */
-void testDDDMatrix(bool transa, bool transb, int dimM, int dimN, int dimK) {
-  real alpha = 1.5;
-  real beta = 2.0;
-
-  const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
-  cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
-  const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
-  gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
-
-  int heightA = (transa == false) ? dimM : dimK;
-  int widthA = (transa == false) ? dimK : dimM;
-  int heightB = (transb == false) ? dimK : dimN;
-  int widthB = (transb == false) ? dimN : dimK;
-  int heightC = dimM;
-  int widthC = dimN;
-
-  auto cpuA = std::make_shared<CpuMatrix>(heightA, widthA, transa);
-  auto cpuB = std::make_shared<CpuMatrix>(heightB, widthB, transb);
-  auto cpuC = std::make_shared<CpuMatrix>(heightC, widthC);
-  auto gpuA = std::make_shared<GpuMatrix>(heightA, widthA, transa);
-  auto gpuB = std::make_shared<GpuMatrix>(heightB, widthB, transb);
-  auto gpuC = std::make_shared<GpuMatrix>(heightC, widthC);
-
-  cpuA->randomizeUniform();
-  cpuB->randomizeUniform();
-  cpuC->randomizeUniform();
-  gpuA->copyFrom(*cpuA);
-  gpuB->copyFrom(*cpuB);
-  gpuC->copyFrom(*cpuC);
-
-  BufferArgs cpuInputs;
-  BufferArgs cpuOutputs;
-  cpuInputs.addArg(*cpuA);
-  cpuInputs.addArg(*cpuB);
-  cpuOutputs.addArg(*cpuC, ADD_TO);
-  cpuFunc->calc(cpuInputs, cpuOutputs);
-
-  BufferArgs gpuInputs;
-  BufferArgs gpuOutputs;
-  gpuInputs.addArg(*gpuA);
-  gpuInputs.addArg(*gpuB);
-  gpuOutputs.addArg(*gpuC, ADD_TO);
-  gpuFunc->calc(gpuInputs, gpuOutputs);
-
-  autotest::TensorCheckErr(*cpuC, *gpuC);
+void testFuncDDDMatrix(
+    bool transa, bool transb, size_t dimM, size_t dimN, size_t dimK) {
+  real alpha = 1.0;
+  real beta = 1.0;
+  size_t heightA = (transa == false) ? dimM : dimK;
+  size_t widthA = (transa == false) ? dimK : dimM;
+  size_t heightB = (transb == false) ? dimK : dimN;
+  size_t widthB = (transb == false) ? dimN : dimK;
+  size_t heightC = dimM;
+  size_t widthC = dimN;
+  // init Test object
+  FunctionCompare test("MulOp",
+                       FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
+  // prepare input arguments
+  /// matrix A : HA * WA
+  test.addInputs(BufferArg(
+      VALUE_TYPE_FLOAT, TensorShape{heightA, widthA}, UNSPECIFIED, transa));
+  /// matrix B: HB * WB
+  test.addInputs(BufferArg(
+      VALUE_TYPE_FLOAT, TensorShape{heightB, widthB}, UNSPECIFIED, transb));
+
+  /// output matrix C: HC * WC
+  test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{heightC, widthC}),
+                  ADD_TO);
+  // run Function
+  test.run();
 }

-TEST(Matrix, DDDMul) {
-  LOG(INFO) << "test for dense = dense * dense matrix";
+TEST(MulOp, DDDMatrixMul) {
+  LOG(INFO) << "function test for dense = dense * dense matrix";
  for (const auto transa : {false, true}) {
    for (const auto transb : {false, true}) {
      for (const auto dimM : {1, 10, 100}) {
@@ -89,7 +70,7 @@ TEST(Matrix, DDDMul) {
                    << " dimM=" << std::setw(5) << dimM
                    << " dimN=" << std::setw(5) << dimN
                    << " dimK=" << std::setw(5) << dimK;
-            testDDDMatrix(transa, transb, dimM, dimN, dimK);
+            testFuncDDDMatrix(transa, transb, dimM, dimN, dimK);
          }
        }
      }
@@ -101,71 +82,33 @@ TEST(Matrix, DDDMul) {
  * C += A * B, B, C dense, A sparse
  * dense = sparse * dense
  */
-void testDSparseDMatrix(
+void testFuncDSparseDMatrix(
    size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
  real alpha = 1.0;
  real beta = 1.0;
-  const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
-  cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
-  const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
-  gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
-
-  CpuSparseMatrix cpuMatrixA(dimM, dimK, nnz, FLOAT_VALUE, FORMAT, false);
-  GpuSparseMatrix gpuMatrixA(dimM, dimK, nnz, FLOAT_VALUE, FORMAT, false);
-  CpuMatrix cpuDenseA(dimM, dimK, false);
-
-  auto cpuMatrixB = Matrix::create(dimK, dimN, false, false);
-  auto gpuMatrixB = Matrix::create(dimK, dimN, false, true);
-  auto cpuDenseB = Matrix::create(dimK, dimN, false, false);
-
-  auto cpuMatrixC = Matrix::create(dimM, dimN, false, false);
-  auto gpuMatrixC = Matrix::create(dimM, dimN, false, true);
-  auto cpuDenseC = Matrix::create(dimM, dimN, false, false);
-
-  /*matrix init*/
-  hl_stream_t stream(HPPL_STREAM_1);
-  cpuMatrixA.randomizeUniform();
-  cpuMatrixB->randomizeUniform();
-  cpuMatrixC->randomizeUniform();
-
-  gpuMatrixA.copyFrom(cpuMatrixA, stream);
-  gpuMatrixB->copyFrom(*cpuMatrixB, stream);
-  gpuMatrixC->copyFrom(*cpuMatrixC, stream);
-
-  cpuDenseA.copyFrom(cpuMatrixA);
-  cpuDenseB->copyFrom(*cpuMatrixB);
-  cpuDenseC->copyFrom(*cpuMatrixC);
-  hl_stream_synchronize(stream);
-
-  /*matrix mul*/
-  BufferArgs cpuInputs;
-  BufferArgs cpuOutputs;
-  cpuInputs.addArg(cpuMatrixA);
-  cpuInputs.addArg(*cpuMatrixB);
-  cpuOutputs.addArg(*cpuMatrixC, ADD_TO);
-  cpuFunc->calc(cpuInputs, cpuOutputs);
-
-  BufferArgs gpuInputs;
-  BufferArgs gpuOutputs;
-  gpuInputs.addArg(gpuMatrixA);
-  gpuInputs.addArg(*gpuMatrixB);
-  gpuOutputs.addArg(*gpuMatrixC, ADD_TO);
-  gpuFunc->calc(gpuInputs, gpuOutputs);
-
-  BufferArgs denseInputs;
-  BufferArgs denseOutputs;
-  denseInputs.addArg(cpuDenseA);
-  denseInputs.addArg(*cpuDenseB);
-  denseOutputs.addArg(*cpuDenseC, ADD_TO);
-  cpuFunc->calc(denseInputs, denseOutputs);
-
-  /*check result*/
-  autotest::TensorCheckErr(*cpuMatrixC, *cpuDenseC);
-  autotest::TensorCheckErr(*cpuMatrixC, *gpuMatrixC);
+  // init Test object
+  FunctionCompare test("MulOp",
+                       FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
+  // prepare input arguments
+  /// sparse matrix A : M * K
+  test.addInputs(SparseMatrixArg(VALUE_TYPE_FLOAT,
+                                 TensorShape{dimM, dimK},
+                                 nnz,
+                                 FORMAT,
+                                 FLOAT_VALUE,
+                                 UNSPECIFIED,
+                                 false));
+  /// matrix B: K * N
+  test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimK, dimN}));
+
+  /// output matrix C: M * N
+  test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}), ADD_TO);
+  // run Function
+  test.run();
 }

-TEST(Matrix, DSparseDMul) {
-  LOG(INFO) << "test for dense = sparse * dense matrix";
+TEST(MuLOp, DSparseDMul) {
+  LOG(INFO) << "function test for dense = sparse * dense matrix";
  for (const auto dimM : {10, 100, 1000}) {
    for (const auto dimN : {10, 100}) {
      for (const auto dimK : {3, 10}) {
@@ -177,7 +120,7 @@ TEST(Matrix, DSparseDMul) {
                    << " dimK=" << std::setw(5) << dimK
                    << " nnz=" << std::setw(5) << nnz
                    << " format=" << std::setw(5) << FORMAT;
-            testDSparseDMatrix(dimM, dimN, dimK, nnz, FORMAT);
+            testFuncDSparseDMatrix(dimM, dimN, dimK, nnz, FORMAT);
          }
        }
      }
@@ -189,72 +132,34 @@ TEST(Matrix, DSparseDMul) {
  * C += A * B, A, C dense, B sparse
  * dense = dense * sparse
  */
-void testDDSparseMatrix(
+void testFuncDDSparseMatrix(
    size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
  real alpha = 1.0;
  real beta = 1.0;
-  const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
-  cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
-  const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
-  gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
-
-  auto cpuMatrixA = Matrix::create(dimM, dimK, false, false);
-  auto gpuMatrixA = Matrix::create(dimM, dimK, false, true);
-  auto cpuDenseA = Matrix::create(dimM, dimK, false, false);
-
-  CpuSparseMatrix cpuMatrixB(dimK, dimN, nnz, FLOAT_VALUE, FORMAT, false);
-
-  GpuSparseMatrix gpuMatrixB(dimK, dimN, nnz, FLOAT_VALUE, FORMAT, false);
-
-  auto cpuDenseB = Matrix::create(dimK, dimN, false, false);
-  auto cpuMatrixC = Matrix::create(dimM, dimN, false, false);
-  auto gpuMatrixC = Matrix::create(dimM, dimN, false, true);
-  auto cpuDenseC = Matrix::create(dimM, dimN, false, false);
-
-  /*matrix init*/
-  hl_stream_t stream(HPPL_STREAM_1);
-  cpuMatrixA->randomizeUniform();
-  cpuMatrixB.randomizeUniform();
-  cpuMatrixC->randomizeUniform();
-
-  gpuMatrixA->copyFrom(*cpuMatrixA, stream);
-  gpuMatrixB.copyFrom(cpuMatrixB, stream);
-  gpuMatrixC->copyFrom(*cpuMatrixC, stream);
-
-  cpuDenseA->copyFrom(*cpuMatrixA);
-  cpuDenseB->copyFrom(cpuMatrixB);
-  cpuDenseC->copyFrom(*cpuMatrixC);
-  hl_stream_synchronize(stream);
-
-  /*matrix mul*/
-  BufferArgs cpuInputs;
-  BufferArgs cpuOutputs;
-  cpuInputs.addArg(*cpuMatrixA);
-  cpuInputs.addArg(cpuMatrixB);
-  cpuOutputs.addArg(*cpuMatrixC, ADD_TO);
-  cpuFunc->calc(cpuInputs, cpuOutputs);
-
-  BufferArgs gpuInputs;
-  BufferArgs gpuOutputs;
-  gpuInputs.addArg(*gpuMatrixA);
-  gpuInputs.addArg(gpuMatrixB);
-  gpuOutputs.addArg(*gpuMatrixC, ADD_TO);
-  gpuFunc->calc(gpuInputs, gpuOutputs);
-
-  BufferArgs denseInputs;
-  BufferArgs denseOutputs;
-  denseInputs.addArg(*cpuDenseA);
-  denseInputs.addArg(*cpuDenseB);
-  denseOutputs.addArg(*cpuDenseC, ADD_TO);
-  cpuFunc->calc(denseInputs, denseOutputs);
-
-  /*check result*/
-  autotest::TensorCheckErr(*cpuMatrixC, *cpuDenseC);
-  autotest::TensorCheckErr(*cpuMatrixC, *gpuMatrixC);
+  // init Test object
+  FunctionCompare test("MulOp",
+                       FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
+  // prepare input arguments
+  /// matrix A : M * K
+  test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}));
+
+  /// matrix B: K * N
+  test.addInputs(SparseMatrixArg(VALUE_TYPE_FLOAT,
+                                 TensorShape{dimK, dimN},
+                                 nnz,
+                                 FORMAT,
+                                 FLOAT_VALUE,
+                                 UNSPECIFIED,
+                                 false));
+
+  /// output matrix C: M * N
+  test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}), ADD_TO);
+  // run Function
+  test.run();
 }

-TEST(Matrix, DDSparseMul) {
-  LOG(INFO) << "test for dense = dense * sparse matrix";
+TEST(MulOp, DDSparseMul) {
+  LOG(INFO) << "function test for dense = dense * sparse matrix";
  for (const auto dimM : {10, 100, 1000}) {
    for (const auto dimN : {10, 100}) {
      for (const auto dimK : {3, 10}) {
@@ -266,7 +171,7 @@ TEST(Matrix, DDSparseMul) {
                    << " dimK=" << std::setw(5) << dimK
                    << " nnz=" << std::setw(5) << nnz
                    << " format=" << std::setw(5) << FORMAT;
-            testDDSparseMatrix(dimM, dimN, dimK, nnz, FORMAT);
+            testFuncDDSparseMatrix(dimM, dimN, dimK, nnz, FORMAT);
          }
        }
      }