From 936301f157f8c557eea80eda4c0b9d2525dbab52 Mon Sep 17 00:00:00 2001
From: xutianbing <xutianbing@baidu.com>
Date: Fri, 13 Jan 2017 14:55:43 -0800
Subject: [PATCH] Use and test Daoyuan's SparseMatrixArg.

---
 paddle/function/BufferArg.cpp  |  6 ++--
 paddle/function/BufferArg.h    | 28 ++++++++++++++--
 paddle/function/CMakeLists.txt |  1 +
 paddle/function/MulOp.cpp      | 60 ++++++++++++++++++++++++++++++++++
 paddle/function/MulOp.h        | 30 +++++++++++++++++
 paddle/function/MulOpGpu.cu    | 57 ++++++++++++++++++++++++++++++++
 paddle/function/MulOpTest.cpp  | 56 +++++++++++++++++++++++++++++++
 paddle/function/TensorType.h   | 26 ++++++++++++++-
 8 files changed, 259 insertions(+), 5 deletions(-)
 create mode 100644 paddle/function/MulOp.cpp
 create mode 100644 paddle/function/MulOp.h
 create mode 100644 paddle/function/MulOpGpu.cu
 create mode 100644 paddle/function/MulOpTest.cpp
diff --git a/paddle/function/BufferArg.cpp b/paddle/function/BufferArg.cpp
index 5d595deb12c..4064daf4159 100644
--- a/paddle/function/BufferArg.cpp
+++ b/paddle/function/BufferArg.cpp
@@ -32,14 +32,16 @@ const SparseMatrixArg& BufferArg::sparse() const {
 SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
     : BufferArg(sparse, argType),
       row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
-      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
+      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
+      trans_(const_cast<CpuSparseMatrix&>(sparse).getTranspose()) {
   bufferType_ = TENSOR_SPARSE;
 }
 
 SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
     : BufferArg(sparse, argType),
       row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
-      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
+      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
+      trans_(const_cast<GpuSparseMatrix&>(sparse).getTranspose()) {
   bufferType_ = TENSOR_SPARSE;
 }
 
diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h
index 84209265ce7..3489510b25f 100644
--- a/paddle/function/BufferArg.h
+++ b/paddle/function/BufferArg.h
@@ -167,7 +167,7 @@ public:
   ValueType valueType() const { return valueType_; }
   BufferType bufferType() const { return bufferType_; }
   const TensorShape& shape() const { return shape_; }
-  bool isSparse() const { return (TENSOR_SPARSE == bufferType_); }
+  bool isSparse() const { return TENSOR_SPARSE == bufferType_; }
   bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
 
   const SequenceArg& sequence() const;
@@ -271,13 +271,15 @@ public:
                   size_t nnz,
                   SparseDataFormat format,
                   SparseDataType type,
+                  bool trans = false,
                   ArgType argType = UNSPECIFIED)
       : BufferArg(buf, valueType, shape, argType),
         row_(row),
         col_(col),
         nnz_(nnz),
         format_(format),
-        type_(type) {
+        type_(type),
+        trans_(trans) {
     bufferType_ = TENSOR_SPARSE;
     CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
     CHECK_EQ(shape_.ndims(), (size_t)2);
@@ -294,6 +296,24 @@ public:
 
   SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED);
 
+  template <DeviceType DType>
+  typename Tensor<real, DType>::SparseMatrix SparseMatrix() const {
+    CHECK(buf_);
+    CHECK(valueType_ == DataType<real>::value);
+    // CHECK(deviceType_ == DType);
+    CHECK_EQ(2, shape_.ndims());
+    return typename Tensor<real, DType>::SparseMatrix(
+        reinterpret_cast<real*>(buf_),
+        reinterpret_cast<int*>(row_.data()),
+        reinterpret_cast<int*>(col_.data()),
+        shape_[0],
+        shape_[1],
+        nnz_,
+        static_cast<SparseValueType>(type_),
+        static_cast<SparseFormat>(format_),
+        trans_);
+  }
+
   ~SparseMatrixArg() {}
 
   void* getRowBuf() const { return row_.data(); }
@@ -302,6 +322,8 @@ public:
 
   size_t nnz() const { return nnz_; }
 
+  bool isTranspose() const { return trans_; }
+
   SparseDataFormat dataFormat() const { return format_; }
 
   SparseDataType dataType() const { return type_; }
@@ -312,6 +334,8 @@ private:
   size_t nnz_;
   SparseDataFormat format_;
   SparseDataType type_;
+  /// todo(tianbing), move trans_ up to BufferArg
+  bool trans_;
 };
 
 }  // namespace paddle
diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt
index 6d20868072c..fae3b7b20a7 100644
--- a/paddle/function/CMakeLists.txt
+++ b/paddle/function/CMakeLists.txt
@@ -26,6 +26,7 @@ if(WITH_TESTING)
     add_simple_unittest(FunctionTest)
     add_simple_unittest(ContextProjectionOpTest)
     add_simple_unittest(PadOpTest)
+    add_simple_unittest(MulOpTest)
 endif()
 endif()
 
diff --git a/paddle/function/MulOp.cpp b/paddle/function/MulOp.cpp
new file mode 100644
index 00000000000..1fa29fae8d4
--- /dev/null
+++ b/paddle/function/MulOp.cpp
@@ -0,0 +1,60 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "MulOp.h"
+
+namespace paddle {
+
+/**
+ * mul operator
+ * out = scaleT * out + scaleAB*(in1 * in2)
+ *
+ * \param outputs[0]      output matrix, N * M
+ * \param inputs[0]       first input (sparse) matrix,  N * K
+ * \param inputs[1]       second input matrix, K * M (non-transpose)
+ */
+template <DeviceType Device>
+class MulFunc : public FunctionBase {
+public:
+  void init(const FuncConfig& config) override {
+    scaleAB_ = config.get<real>("scaleAB");
+    scaleT_ = config.get<real>("scaleT");
+  }
+
+  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
+    /// todo(tianbing), add more checks
+    CHECK_EQ((size_t)1, inputs.size());
+    CHECK_EQ((size_t)2, outputs.size());
+    CHECK(inputs[0].data() && inputs[1].data() && outputs[0].data());
+    CHECK_EQ(inputs[0].shape().ndims(), (size_t)2);
+    CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
+    CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
+    CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
+
+    CHECK(inputs[0].isSparse()) << "SparseMatrix requried here";
+    const auto in1_mat = inputs[0].sparse().SparseMatrix<Device>();
+    auto out_mat = outputs[0].matrix<Device>();
+    const auto in2_mat = inputs[1].matrix<Device>();
+    MulOp<Device>(out_mat, in1_mat, in2_mat, scaleAB_, scaleT_);
+  }
+
+private:
+  real scaleAB_;
+  real scaleT_;
+};
+
+#ifndef PADDLE_ONLY_CPU
+REGISTER_TYPED_FUNC(MulOp, GPU, MulFunc);
+#endif
+}  // namespace paddle
diff --git a/paddle/function/MulOp.h b/paddle/function/MulOp.h
new file mode 100644
index 00000000000..bcea1864026
--- /dev/null
+++ b/paddle/function/MulOp.h
@@ -0,0 +1,30 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "Function.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/SparseMatrix.h"
+
+namespace paddle {
+
+template <DeviceType DType>
+void MulOp(GpuMatrix& out,
+           const GpuSparseMatrix& a,
+           const GpuMatrix& b,
+           real scaleAB,
+           real scaleT);
+
+}  // namespace paddle
diff --git a/paddle/function/MulOpGpu.cu b/paddle/function/MulOpGpu.cu
new file mode 100644
index 00000000000..db716c1e46b
--- /dev/null
+++ b/paddle/function/MulOpGpu.cu
@@ -0,0 +1,57 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "hl_base.h"
+#include "MulOp.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/SparseMatrix.h"
+
+namespace paddle {
+/**
+ * out = scale_t * out + scale_ab * (a * b)
+ */
+template <>
+void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
+                            const GpuSparseMatrix& a,
+                            const GpuMatrix& b,
+                            real scale_ab,
+                            real scale_t) {
+  CHECK(out.isContiguous());
+  CHECK(b.isContiguous());
+  CHECK(b.useGpu_ == true) << "Matrix type are not equal";
+  CHECK(!out.trans_ && !b.trans_) << "not supported";
+  if (!a.trans_) {
+    CHECK(out.width_ == b.width_ && out.height_ == a.height_
+          && a.width_ == b.height_) << "Matrix dimensions are not equal";
+  } else {
+    CHECK(out.width_ == b.width_ && out.height_ == a.width_
+          && a.height_ == b.height_) << "Matrix dimensions are not equal";
+  }
+  hl_trans_op_t a_trans = a.trans_ ? HPPL_OP_T : HPPL_OP_N;
+  hl_sparse_matrix_s a_data = a.sMatrix_.get();
+  real* b_data = b.data_;
+  real* out_data = out.data_;
+  hl_matrix_csr_mul_dense(a_data,
+                          a_trans,
+                          b_data,
+                          HPPL_OP_N,
+                          out_data,
+                          out.height_,
+                          out.width_,
+                          b.height_,
+                          scale_ab,
+                          scale_t);
+}
+
+}  // namespace paddle
diff --git a/paddle/function/MulOpTest.cpp b/paddle/function/MulOpTest.cpp
new file mode 100644
index 00000000000..bc1fa9f607a
--- /dev/null
+++ b/paddle/function/MulOpTest.cpp
@@ -0,0 +1,56 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "FunctionTest.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/SparseMatrix.h"
+#include "paddle/testing/TestUtil.h"
+
+using namespace paddle;  // NOLINT
+
+void testSpMatrixMul(int M, int N, int K, real rate, real scale1, real scale2) {
+  /// todo(tianbing) check CPU/GPU
+  const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOP-GPU");
+  gpuFunc->init(FuncConfig().set("scaleAB", scale1).set("scaleT", scale2));
+
+  int nnz = M * K * rate;
+  auto gpuA = std::make_shared<GpuSparseMatrix>(M, K, nnz);
+  const auto gpuB = std::make_shared<GpuMatrix>(K, N);
+  const auto gpuOut = std::make_shared<GpuMatrix>(M, N);
+
+  gpuA->randomizeUniform();
+  gpuB->randomizeUniform();
+  gpuOut->randomizeUniform();
+
+  BufferArgs inputs;
+  BufferArgs outputs;
+  inputs.addArg(*gpuA);
+  inputs.addArg(*gpuB);
+  outputs.addArg(*gpuOut);
+
+  gpuFunc->calc(inputs, outputs);
+}
+
+TEST(SMatrix, sMatrixMul) {
+  for (auto M : {1, 40, 128, 200}) {
+    for (auto N : {100, 2000, 20480}) {
+      for (auto K : {100, 512, 1024}) {
+        /// todo(tianbing), add scaleAB and scaleT
+        VLOG(3) << " M=" << M << " N=" << N << " K=" << K;
+        testSpMatrixMul(M, N, K, 0.05, 1, 1);
+      }
+    }
+  }
+}
diff --git a/paddle/function/TensorType.h b/paddle/function/TensorType.h
index 98942cff9e2..73ac1cc70d2 100644
--- a/paddle/function/TensorType.h
+++ b/paddle/function/TensorType.h
@@ -87,6 +87,29 @@ struct MatrixT<int, DEVICE_TYPE_GPU> {
   using type = void;  // Not implemented
 };
 
+template <typename VType, DeviceType Device>
+struct SparseMatrixT;
+
+template <>
+struct SparseMatrixT<real, DEVICE_TYPE_CPU> {
+  using type = CpuSparseMatrix;
+};
+
+template <>
+struct SparseMatrixT<real, DEVICE_TYPE_GPU> {
+  using type = GpuSparseMatrix;
+};
+
+template <>
+struct SparseMatrixT<int, DEVICE_TYPE_CPU> {
+  using type = void;  // Not implemented
+};
+
+template <>
+struct SparseMatrixT<int, DEVICE_TYPE_GPU> {
+  using type = void;  // Not implemented
+};
+
 template <typename VType, DeviceType Device>
 struct VectorT;
 
@@ -114,8 +137,9 @@ struct VectorT<int, DEVICE_TYPE_GPU> {
 
 template <typename VType, DeviceType DType>
 struct Tensor {
-  typedef typename detail::MatrixT<VType, DType>::type Matrix;
   typedef typename detail::VectorT<VType, DType>::type Vector;
+  typedef typename detail::MatrixT<VType, DType>::type Matrix;
+  typedef typename detail::SparseMatrixT<VType, DType>::type SparseMatrix;
 };
 
 }  // namespace paddle
-- 
GitLab