提交 1ca2846e 编写于 作者: X xutianbing

Pass unit test for CpuMatrix::mul(CpuMatrix, CpuSparseMatrix)

and GpuMatrix::mul(CpuMatrix, GpuSparseMatrix)
上级 2df8eec5
......@@ -32,14 +32,22 @@ const SparseMatrixArg& BufferArg::sparse() const {
SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
/// todo(tianbing), make sure how to get NNZ
nnz_(sparse.getElementCnt()),
format_(sparse.getFormat()),
type_(sparse.getValueType()) {
bufferType_ = TENSOR_SPARSE;
}
SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
/// todo(tianbing), make sure how to get NNZ
nnz_(sparse.getElementCnt()),
format_(sparse.getFormat()),
type_(sparse.getValueType()) {
bufferType_ = TENSOR_SPARSE;
}
......
......@@ -30,13 +30,6 @@ enum BufferType {
TENSOR_SPARSE = 4
};
enum SparseDataType {
SPARSE_NO_VALUE = 0, // do not need value pointer, all values are 1
SPARSE_FLOAT_VALUE = 1
};
enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 };
class BufferArg;
class SequenceArg;
class SparseMatrixArg;
......@@ -272,8 +265,8 @@ public:
const BufferArg& row,
const BufferArg& col,
size_t nnz,
SparseDataFormat format,
SparseDataType type,
SparseFormat format,
SparseValueType type,
ArgType argType = UNSPECIFIED)
: BufferArg(buf, valueType, shape, argType),
row_(row),
......@@ -286,9 +279,9 @@ public:
CHECK_EQ(shape_.ndims(), (size_t)2);
CHECK_EQ(row_.shape().ndims(), (size_t)1);
CHECK_EQ(col_.shape().ndims(), (size_t)1);
if (format == SPARSE_CSR_FORMAT) {
if (format == SPARSE_CSR) {
CHECK_EQ(nnz, col.shape()[0]);
} else if (format == SPARSE_CSC_FORMAT) {
} else if (format == SPARSE_CSC) {
CHECK_EQ(nnz, row.shape()[0]);
}
}
......@@ -310,8 +303,8 @@ public:
shape_[0],
shape_[1],
nnz_,
static_cast<SparseValueType>(type_),
static_cast<SparseFormat>(format_),
type_,
format_,
trans_);
}
......@@ -323,16 +316,16 @@ public:
size_t nnz() const { return nnz_; }
SparseDataFormat dataFormat() const { return format_; }
SparseFormat dataFormat() const { return format_; }
SparseDataType dataType() const { return type_; }
SparseValueType dataType() const { return type_; }
private:
BufferArg row_;
BufferArg col_;
size_t nnz_;
SparseDataFormat format_;
SparseDataType type_;
SparseFormat format_;
SparseValueType type_;
};
} // namespace paddle
......@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "MulOp.h"
/// todo(tianbing), delete it
#include <iostream>
#include "paddle/math/MathFunctions.h"
#include "paddle/math/SIMDFunctions.h"
#include "paddle/utils/ThreadLocal.h"
......@@ -496,16 +498,48 @@ public:
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
auto in1_mat = inputs[0].matrix<Device>();
if (inputs[0].isSparseArg()) {
in1_mat = inputs[0].sparse().SparseMatrix<Device>();
/// todo(tianbing), support SparseMatrixArg for out_mat
auto out_mat = outputs[0].matrix<Device>();
LOG(INFO) << "out_mat:";
out_mat.print(std::cout);
if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg()) {
LOG(INFO) << "in1_mat:";
inputs[0].matrix<Device>().print(std::cout);
LOG(INFO) << "in2_mat:";
inputs[1].matrix<Device>().print(std::cout);
MulOp<Device>(out_mat,
inputs[0].matrix<Device>(),
inputs[1].matrix<Device>(),
alpha_,
beta_);
return;
}
auto in2_mat = inputs[1].matrix<Device>();
if (inputs[1].isSparseArg()) {
in2_mat = inputs[1].sparse().SparseMatrix<Device>();
if (!inputs[0].isSparseArg() && inputs[1].isSparseArg()) {
LOG(INFO) << "in1_mat:";
inputs[0].matrix<Device>().print(std::cout);
LOG(INFO) << "in2_mat:";
inputs[1].sparse().SparseMatrix<Device>().print(std::cout);
MulOp<Device>(out_mat,
inputs[0].matrix<Device>(),
inputs[1].sparse().SparseMatrix<Device>(),
alpha_,
beta_);
return;
}
if (inputs[0].isSparseArg() && !inputs[1].isSparseArg()) {
LOG(INFO) << "in1_mat:";
inputs[0].sparse().SparseMatrix<Device>().print(std::cout);
LOG(INFO) << "in2_mat:";
inputs[1].matrix<Device>().print(std::cout);
MulOp<Device>(out_mat,
inputs[0].sparse().SparseMatrix<Device>(),
inputs[1].matrix<Device>(),
alpha_,
beta_);
return;
}
auto out_mat = outputs[0].matrix<Device>();
MulOp<Device>(out_mat, in1_mat, in2_mat, alpha_, beta_);
}
private:
......
......@@ -15,6 +15,8 @@ limitations under the License. */
#pragma once
#include "Function.h"
/// todo(tianbing), delete
#include <iostream>
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
......
......@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
/// todo(tianbing), delete
#include <iostream>
#include "FunctionTest.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
......@@ -72,6 +74,7 @@ void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
}
TEST(Matrix, mul) {
LOG(INFO) << "test for dense = dense * dense matrix";
for (auto transa : {false, true}) {
for (auto transb : {false, true}) {
for (auto dimM : {1, 10, 100}) {
......@@ -93,3 +96,134 @@ TEST(Matrix, mul) {
}
}
}
struct MatrixPara {
size_t height;
size_t width;
bool trans;
bool sparse;
size_t nnz;
SparseFormat format;
};
/**
* C += A * B, A, C dense, B sparse
*/
void testDSparseDMatrix() {
real alpha = 1.0;
real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
constexpr size_t dimM = 2;
constexpr size_t dimN = 2;
constexpr size_t dimK = 3;
constexpr size_t NNZ = 3;
constexpr SparseFormat FORMAT = SPARSE_CSC;
MatrixPara paraA{dimM, dimK, /*trans*/ false, /*sparse*/ false, NNZ, FORMAT};
MatrixPara paraB{dimK, dimN, /*trans*/ false, /*sparse*/ true, NNZ, FORMAT};
MatrixPara paraC{dimM, dimN, /*trans*/ false, /*sparse*/ false, NNZ, FORMAT};
auto cpuMatrixA =
Matrix::create(paraA.height, paraA.width, paraA.trans, false);
auto gpuMatrixA =
Matrix::create(paraA.height, paraA.width, paraA.trans, true);
auto cpuDenseA =
Matrix::create(paraA.height, paraA.width, paraA.trans, false);
CpuSparseMatrix cpuMatrixB(paraB.height,
paraB.width,
paraB.nnz,
FLOAT_VALUE,
paraB.format,
paraB.trans);
GpuSparseMatrix gpuMatrixB(paraB.height,
paraB.width,
paraB.nnz,
FLOAT_VALUE,
paraB.format,
paraB.trans);
auto cpuDenseB =
Matrix::create(paraB.height, paraB.width, paraB.trans, false);
auto cpuMatrixC =
Matrix::create(paraC.height, paraC.width, paraC.trans, false);
auto gpuMatrixC =
Matrix::create(paraC.height, paraC.width, paraC.trans, true);
auto cpuDenseC =
Matrix::create(paraC.height, paraC.width, paraC.trans, false);
auto gpuMatrixC_d2h =
Matrix::create(paraC.height, paraC.width, paraC.trans, false);
/*matrix init*/
hl_stream_t stream(HPPL_STREAM_1);
cpuMatrixA->randomizeUniform();
cpuMatrixB.randomizeUniform();
cpuMatrixC->randomizeUniform();
gpuMatrixA->copyFrom(*cpuMatrixA, stream);
gpuMatrixB.copyFrom(cpuMatrixB, stream);
gpuMatrixC->copyFrom(*cpuMatrixC, stream);
cpuDenseA->copyFrom(*cpuMatrixA);
cpuDenseB->copyFrom(cpuMatrixB);
cpuDenseC->copyFrom(*cpuMatrixC);
hl_stream_synchronize(stream);
LOG(INFO) << "cpuMatrixA: ";
cpuMatrixA->print(std::cout);
LOG(INFO) << "cpuMatrixB: ";
(&cpuMatrixB)->print(std::cout);
LOG(INFO) << "cpuMatrixC: ";
cpuMatrixC->print(std::cout);
LOG(INFO) << "cpuDenseA: ";
cpuDenseA->print(std::cout);
LOG(INFO) << "cpuDenseB: ";
cpuDenseB->print(std::cout);
LOG(INFO) << "cpuDenseC: ";
cpuDenseC->print(std::cout);
LOG(INFO) << "gpuMatrixA: ";
gpuMatrixA->print(std::cout);
LOG(INFO) << "gpuMatrixB: ";
(&gpuMatrixB)->print(std::cout);
LOG(INFO) << "gpuMatrixC: ";
gpuMatrixC->print(std::cout);
/*matrix mul*/
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(*cpuMatrixA);
cpuInputs.addArg(cpuMatrixB);
cpuOutputs.addArg(*cpuMatrixC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(*gpuMatrixA);
gpuInputs.addArg(gpuMatrixB);
gpuOutputs.addArg(*gpuMatrixC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
BufferArgs denseInputs;
BufferArgs denseOutputs;
denseInputs.addArg(*cpuDenseA);
denseInputs.addArg(*cpuDenseB);
denseOutputs.addArg(*cpuDenseC, ADD_TO);
cpuFunc->calc(denseInputs, denseOutputs);
gpuMatrixC_d2h->copyFrom(*gpuMatrixC, stream);
hl_stream_synchronize(stream);
/*check result*/
// autotest::TensorCheckErr(*cpuMatrixC, *gpuMatrixC);
checkMatrixEqual(cpuMatrixC, cpuDenseC);
checkMatrixEqual(cpuMatrixC, gpuMatrixC_d2h);
}
TEST(Matrix, SparseMatrixMul) {
LOG(INFO) << "test for dense = dense * sparse matrix";
testDSparseDMatrix();
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册