Use and test Daoyuan's SparseMatrixArg.

936301f1 · xutianbing · b1f09f27 · 936301f1 · 936301f1 · 936301f1
8 changed file
--- a/paddle/function/BufferArg.cpp
+++ b/paddle/function/BufferArg.cpp
@@ -32,14 +32,16 @@ const SparseMatrixArg& BufferArg::sparse() const {
 SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
    : BufferArg(sparse, argType),
      row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
-      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
+      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
+      trans_(const_cast<CpuSparseMatrix&>(sparse).getTranspose()) {
  bufferType_ = TENSOR_SPARSE;
 }

 SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
    : BufferArg(sparse, argType),
      row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
-      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
+      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
+      trans_(const_cast<GpuSparseMatrix&>(sparse).getTranspose()) {
  bufferType_ = TENSOR_SPARSE;
 }


--- a/paddle/function/BufferArg.h
+++ b/paddle/function/BufferArg.h
@@ -167,7 +167,7 @@ public:
  ValueType valueType() const { return valueType_; }
  BufferType bufferType() const { return bufferType_; }
  const TensorShape& shape() const { return shape_; }
-  bool isSparse() const { return (TENSOR_SPARSE == bufferType_); }
+  bool isSparse() const { return TENSOR_SPARSE == bufferType_; }
  bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }

  const SequenceArg& sequence() const;
@@ -271,13 +271,15 @@ public:
                  size_t nnz,
                  SparseDataFormat format,
                  SparseDataType type,
+                  bool trans = false,
                  ArgType argType = UNSPECIFIED)
      : BufferArg(buf, valueType, shape, argType),
        row_(row),
        col_(col),
        nnz_(nnz),
        format_(format),
-        type_(type) {
+        type_(type),
+        trans_(trans) {
    bufferType_ = TENSOR_SPARSE;
    CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
    CHECK_EQ(shape_.ndims(), (size_t)2);
@@ -294,6 +296,24 @@ public:

  SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED);

+  template <DeviceType DType>
+  typename Tensor<real, DType>::SparseMatrix SparseMatrix() const {
+    CHECK(buf_);
+    CHECK(valueType_ == DataType<real>::value);
+    // CHECK(deviceType_ == DType);
+    CHECK_EQ(2, shape_.ndims());
+    return typename Tensor<real, DType>::SparseMatrix(
+        reinterpret_cast<real*>(buf_),
+        reinterpret_cast<int*>(row_.data()),
+        reinterpret_cast<int*>(col_.data()),
+        shape_[0],
+        shape_[1],
+        nnz_,
+        static_cast<SparseValueType>(type_),
+        static_cast<SparseFormat>(format_),
+        trans_);
+  }
+
  ~SparseMatrixArg() {}

  void* getRowBuf() const { return row_.data(); }
@@ -302,6 +322,8 @@ public:

  size_t nnz() const { return nnz_; }

+  bool isTranspose() const { return trans_; }
+
  SparseDataFormat dataFormat() const { return format_; }

  SparseDataType dataType() const { return type_; }
@@ -312,6 +334,8 @@ private:
  size_t nnz_;
  SparseDataFormat format_;
  SparseDataType type_;
+  /// todo(tianbing), move trans_ up to BufferArg
+  bool trans_;
 };

 }  // namespace paddle
--- a/paddle/function/CMakeLists.txt
+++ b/paddle/function/CMakeLists.txt
@@ -26,6 +26,7 @@ if(WITH_TESTING)
    add_simple_unittest(FunctionTest)
    add_simple_unittest(ContextProjectionOpTest)
    add_simple_unittest(PadOpTest)
+    add_simple_unittest(MulOpTest)
 endif()
 endif()


--- a/paddle/function/MulOp.cpp
+++ b/paddle/function/MulOp.cpp
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "MulOp.h"
+
+namespace paddle {
+
+/**
+ * mul operator
+ * out = scaleT * out + scaleAB*(in1 * in2)
+ *
+ * \param outputs[0]      output matrix, N * M
+ * \param inputs[0]       first input (sparse) matrix,  N * K
+ * \param inputs[1]       second input matrix, K * M (non-transpose)
+ */
+template <DeviceType Device>
+class MulFunc : public FunctionBase {
+public:
+  void init(const FuncConfig& config) override {
+    scaleAB_ = config.get<real>("scaleAB");
+    scaleT_ = config.get<real>("scaleT");
+  }
+
+  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
+    /// todo(tianbing), add more checks
+    CHECK_EQ((size_t)1, inputs.size());
+    CHECK_EQ((size_t)2, outputs.size());
+    CHECK(inputs[0].data() && inputs[1].data() && outputs[0].data());
+    CHECK_EQ(inputs[0].shape().ndims(), (size_t)2);
+    CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
+    CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
+    CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
+
+    CHECK(inputs[0].isSparse()) << "SparseMatrix requried here";
+    const auto in1_mat = inputs[0].sparse().SparseMatrix<Device>();
+    auto out_mat = outputs[0].matrix<Device>();
+    const auto in2_mat = inputs[1].matrix<Device>();
+    MulOp<Device>(out_mat, in1_mat, in2_mat, scaleAB_, scaleT_);
+  }
+
+private:
+  real scaleAB_;
+  real scaleT_;
+};
+
+#ifndef PADDLE_ONLY_CPU
+REGISTER_TYPED_FUNC(MulOp, GPU, MulFunc);
+#endif
+}  // namespace paddle
--- a/paddle/function/MulOp.h
+++ b/paddle/function/MulOp.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "Function.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/SparseMatrix.h"
+
+namespace paddle {
+
+template <DeviceType DType>
+void MulOp(GpuMatrix& out,
+           const GpuSparseMatrix& a,
+           const GpuMatrix& b,
+           real scaleAB,
+           real scaleT);
+
+}  // namespace paddle
--- a/paddle/function/MulOpGpu.cu
+++ b/paddle/function/MulOpGpu.cu
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "hl_base.h"
+#include "MulOp.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/SparseMatrix.h"
+
+namespace paddle {
+/**
+ * out = scale_t * out + scale_ab * (a * b)
+ */
+template <>
+void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
+                            const GpuSparseMatrix& a,
+                            const GpuMatrix& b,
+                            real scale_ab,
+                            real scale_t) {
+  CHECK(out.isContiguous());
+  CHECK(b.isContiguous());
+  CHECK(b.useGpu_ == true) << "Matrix type are not equal";
+  CHECK(!out.trans_ && !b.trans_) << "not supported";
+  if (!a.trans_) {
+    CHECK(out.width_ == b.width_ && out.height_ == a.height_
+          && a.width_ == b.height_) << "Matrix dimensions are not equal";
+  } else {
+    CHECK(out.width_ == b.width_ && out.height_ == a.width_
+          && a.height_ == b.height_) << "Matrix dimensions are not equal";
+  }
+  hl_trans_op_t a_trans = a.trans_ ? HPPL_OP_T : HPPL_OP_N;
+  hl_sparse_matrix_s a_data = a.sMatrix_.get();
+  real* b_data = b.data_;
+  real* out_data = out.data_;
+  hl_matrix_csr_mul_dense(a_data,
+                          a_trans,
+                          b_data,
+                          HPPL_OP_N,
+                          out_data,
+                          out.height_,
+                          out.width_,
+                          b.height_,
+                          scale_ab,
+                          scale_t);
+}
+
+}  // namespace paddle
--- a/paddle/function/MulOpTest.cpp
+++ b/paddle/function/MulOpTest.cpp
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "FunctionTest.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/SparseMatrix.h"
+#include "paddle/testing/TestUtil.h"
+
+using namespace paddle;  // NOLINT
+
+void testSpMatrixMul(int M, int N, int K, real rate, real scale1, real scale2) {
+  /// todo(tianbing) check CPU/GPU
+  const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOP-GPU");
+  gpuFunc->init(FuncConfig().set("scaleAB", scale1).set("scaleT", scale2));
+
+  int nnz = M * K * rate;
+  auto gpuA = std::make_shared<GpuSparseMatrix>(M, K, nnz);
+  const auto gpuB = std::make_shared<GpuMatrix>(K, N);
+  const auto gpuOut = std::make_shared<GpuMatrix>(M, N);
+
+  gpuA->randomizeUniform();
+  gpuB->randomizeUniform();
+  gpuOut->randomizeUniform();
+
+  BufferArgs inputs;
+  BufferArgs outputs;
+  inputs.addArg(*gpuA);
+  inputs.addArg(*gpuB);
+  outputs.addArg(*gpuOut);
+
+  gpuFunc->calc(inputs, outputs);
+}
+
+TEST(SMatrix, sMatrixMul) {
+  for (auto M : {1, 40, 128, 200}) {
+    for (auto N : {100, 2000, 20480}) {
+      for (auto K : {100, 512, 1024}) {
+        /// todo(tianbing), add scaleAB and scaleT
+        VLOG(3) << " M=" << M << " N=" << N << " K=" << K;
+        testSpMatrixMul(M, N, K, 0.05, 1, 1);
+      }
+    }
+  }
+}
--- a/paddle/function/TensorType.h
+++ b/paddle/function/TensorType.h
@@ -87,6 +87,29 @@ struct MatrixT<int, DEVICE_TYPE_GPU> {
  using type = void;  // Not implemented
 };

+template <typename VType, DeviceType Device>
+struct SparseMatrixT;
+
+template <>
+struct SparseMatrixT<real, DEVICE_TYPE_CPU> {
+  using type = CpuSparseMatrix;
+};
+
+template <>
+struct SparseMatrixT<real, DEVICE_TYPE_GPU> {
+  using type = GpuSparseMatrix;
+};
+
+template <>
+struct SparseMatrixT<int, DEVICE_TYPE_CPU> {
+  using type = void;  // Not implemented
+};
+
+template <>
+struct SparseMatrixT<int, DEVICE_TYPE_GPU> {
+  using type = void;  // Not implemented
+};
+
 template <typename VType, DeviceType Device>
 struct VectorT;

@@ -114,8 +137,9 @@ struct VectorT<int, DEVICE_TYPE_GPU> {

 template <typename VType, DeviceType DType>
 struct Tensor {
-  typedef typename detail::MatrixT<VType, DType>::type Matrix;
  typedef typename detail::VectorT<VType, DType>::type Vector;
+  typedef typename detail::MatrixT<VType, DType>::type Matrix;
+  typedef typename detail::SparseMatrixT<VType, DType>::type SparseMatrix;
 };

 }  // namespace paddle