提交 2df8eec5 编写于 作者: X xutianbing

Pass Unit test for GpuMatrix::mul(GpuMatrix, GpuMatrix) and CpuMatrix::mul(CpuMatrix, CpuMatrix)

上级 1f0cbcf3
...@@ -32,16 +32,14 @@ const SparseMatrixArg& BufferArg::sparse() const { ...@@ -32,16 +32,14 @@ const SparseMatrixArg& BufferArg::sparse() const {
SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType) SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType), : BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32), row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32), col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
trans_(const_cast<CpuSparseMatrix&>(sparse).getTranspose()) {
bufferType_ = TENSOR_SPARSE; bufferType_ = TENSOR_SPARSE;
} }
SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType) SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType), : BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32), row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32), col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
trans_(const_cast<GpuSparseMatrix&>(sparse).getTranspose()) {
bufferType_ = TENSOR_SPARSE; bufferType_ = TENSOR_SPARSE;
} }
......
...@@ -98,7 +98,8 @@ public: ...@@ -98,7 +98,8 @@ public:
const_cast<void*>(reinterpret_cast<const void*>(matrix.getData()))), const_cast<void*>(reinterpret_cast<const void*>(matrix.getData()))),
valueType_(DataType<real>::value), valueType_(DataType<real>::value),
shape_(2), shape_(2),
argType_(argType) { argType_(argType),
trans_(matrix.isTransposed()) {
bufferType_ = TENSOR_NORMAL; bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, matrix.getHeight()); shape_.setDim(0, matrix.getHeight());
shape_.setDim(1, matrix.getWidth()); shape_.setDim(1, matrix.getWidth());
...@@ -111,7 +112,8 @@ public: ...@@ -111,7 +112,8 @@ public:
const_cast<void*>(reinterpret_cast<const void*>(matrix.getData()))), const_cast<void*>(reinterpret_cast<const void*>(matrix.getData()))),
valueType_(DataType<real>::value), valueType_(DataType<real>::value),
shape_(shape), shape_(shape),
argType_(argType) { argType_(argType),
trans_(matrix.isTransposed()) {
bufferType_ = TENSOR_NORMAL; bufferType_ = TENSOR_NORMAL;
CHECK_EQ(matrix.getElementCnt(), shape.getElements()); CHECK_EQ(matrix.getElementCnt(), shape.getElements());
} }
...@@ -143,7 +145,7 @@ public: ...@@ -143,7 +145,7 @@ public:
// CHECK(deviceType_ == DType); // CHECK(deviceType_ == DType);
CHECK_EQ((size_t)2, shape_.ndims()); CHECK_EQ((size_t)2, shape_.ndims());
return typename Tensor<real, DType>::Matrix( return typename Tensor<real, DType>::Matrix(
reinterpret_cast<real*>(buf_), shape_[0], shape_[1]); reinterpret_cast<real*>(buf_), shape_[0], shape_[1], trans_);
} }
template <typename VType, DeviceType DType> template <typename VType, DeviceType DType>
...@@ -179,6 +181,7 @@ protected: ...@@ -179,6 +181,7 @@ protected:
TensorShape shape_; TensorShape shape_;
BufferType bufferType_{TENSOR_UNKNOWN}; BufferType bufferType_{TENSOR_UNKNOWN};
ArgType argType_{UNSPECIFIED}; ArgType argType_{UNSPECIFIED};
bool trans_{false};
// leading dimensions. The size is dims_.size() // leading dimensions. The size is dims_.size()
// Dims lds_; // Dims lds_;
}; };
...@@ -271,15 +274,13 @@ public: ...@@ -271,15 +274,13 @@ public:
size_t nnz, size_t nnz,
SparseDataFormat format, SparseDataFormat format,
SparseDataType type, SparseDataType type,
bool trans = false,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED)
: BufferArg(buf, valueType, shape, argType), : BufferArg(buf, valueType, shape, argType),
row_(row), row_(row),
col_(col), col_(col),
nnz_(nnz), nnz_(nnz),
format_(format), format_(format),
type_(type), type_(type) {
trans_(trans) {
bufferType_ = TENSOR_SPARSE; bufferType_ = TENSOR_SPARSE;
CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE)); CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
CHECK_EQ(shape_.ndims(), (size_t)2); CHECK_EQ(shape_.ndims(), (size_t)2);
...@@ -322,8 +323,6 @@ public: ...@@ -322,8 +323,6 @@ public:
size_t nnz() const { return nnz_; } size_t nnz() const { return nnz_; }
bool isTranspose() const { return trans_; }
SparseDataFormat dataFormat() const { return format_; } SparseDataFormat dataFormat() const { return format_; }
SparseDataType dataType() const { return type_; } SparseDataType dataType() const { return type_; }
...@@ -334,8 +333,6 @@ private: ...@@ -334,8 +333,6 @@ private:
size_t nnz_; size_t nnz_;
SparseDataFormat format_; SparseDataFormat format_;
SparseDataType type_; SparseDataType type_;
/// todo(tianbing), move trans_ up to BufferArg
bool trans_;
}; };
} // namespace paddle } // namespace paddle
...@@ -483,8 +483,8 @@ template <DeviceType Device> ...@@ -483,8 +483,8 @@ template <DeviceType Device>
class MulFunc : public FunctionBase { class MulFunc : public FunctionBase {
public: public:
void init(const FuncConfig& config) override { void init(const FuncConfig& config) override {
scaleAB_ = config.get<real>("scaleAB"); alpha_ = config.get<real>("scaleAB");
scaleT_ = config.get<real>("scaleT"); beta_ = config.get<real>("scaleT");
} }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
...@@ -494,7 +494,7 @@ public: ...@@ -494,7 +494,7 @@ public:
CHECK_EQ(inputs[0].shape().ndims(), (size_t)2); CHECK_EQ(inputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); CHECK_EQ(outputs[0].getArgType(), ADD_TO);
auto in1_mat = inputs[0].matrix<Device>(); auto in1_mat = inputs[0].matrix<Device>();
if (inputs[0].isSparseArg()) { if (inputs[0].isSparseArg()) {
...@@ -505,12 +505,12 @@ public: ...@@ -505,12 +505,12 @@ public:
in2_mat = inputs[1].sparse().SparseMatrix<Device>(); in2_mat = inputs[1].sparse().SparseMatrix<Device>();
} }
auto out_mat = outputs[0].matrix<Device>(); auto out_mat = outputs[0].matrix<Device>();
MulOp<Device>(out_mat, in1_mat, in2_mat, scaleAB_, scaleT_); MulOp<Device>(out_mat, in1_mat, in2_mat, alpha_, beta_);
} }
private: private:
real scaleAB_; real alpha_;
real scaleT_; real beta_;
}; };
REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc); REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc);
......
...@@ -68,4 +68,11 @@ void MulOp(GpuMatrix& out, ...@@ -68,4 +68,11 @@ void MulOp(GpuMatrix& out,
real scaleAB, real scaleAB,
real scaleT); real scaleT);
template <DeviceType DType>
void MulOp(GpuSparseMatrix& out,
const GpuMatrix& a,
const GpuMatrix& b,
real scaleAB,
real scaleT);
} // namespace paddle } // namespace paddle
...@@ -170,4 +170,13 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out, ...@@ -170,4 +170,13 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
} }
} }
template <>
void MulOp<DEVICE_TYPE_GPU>(GpuSparseMatrix& out,
const GpuMatrix& a,
const GpuMatrix& b,
real scale_ab,
real scale_t) {
/// todo(tianbing), implement it
}
} // namespace paddle } // namespace paddle
...@@ -16,50 +16,79 @@ limitations under the License. */ ...@@ -16,50 +16,79 @@ limitations under the License. */
#include "FunctionTest.h" #include "FunctionTest.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
#include "paddle/math/tests/test_matrixUtil.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
void testSpMatrixMul(int M, int N, int K, real rate, real scale1, real scale2) { /**
/// todo(tianbing) check CPU/GPU * C = alpha * C + beta * (A * B)
*/
void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
real alpha = 1.5;
real beta = 2.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU"); const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
gpuFunc->init(FuncConfig().set("scaleAB", scale1).set("scaleT", scale2)); gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
int nnz = M * N * rate; int heightA = (transa == false) ? dimM : dimK;
MatrixPtr cpuA = std::make_shared<CpuMatrix>(M, K); int widthA = (transa == false) ? dimK : dimM;
MatrixPtr cpuB = std::make_shared<CpuMatrix>(N, K); int heightB = (transb == false) ? dimK : dimN;
MatrixPtr cpuC(new CpuSparseMatrix(M, N, nnz)); int widthB = (transb == false) ? dimN : dimK;
int heightC = dimM;
int widthC = dimN;
MatrixPtr gpuA = std::make_shared<GpuMatrix>(M, K); auto cpuA = std::make_shared<CpuMatrix>(heightA, widthA, transa);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(N, K); auto cpuB = std::make_shared<CpuMatrix>(heightB, widthB, transb);
MatrixPtr gpuC(new GpuSparseMatrix(M, N, nnz)); auto cpuC = std::make_shared<CpuMatrix>(heightC, widthC);
auto gpuA = std::make_shared<GpuMatrix>(heightA, widthA, transa);
auto gpuB = std::make_shared<GpuMatrix>(heightB, widthB, transb);
auto gpuC = std::make_shared<GpuMatrix>(heightC, widthC);
cpuA->randomizeUniform(); cpuA->randomizeUniform();
cpuB->randomizeUniform(); cpuB->randomizeUniform();
cpuC->randomizeUniform(); cpuC->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
hl_stream_t stream(HPPL_STREAM_3); BufferArgs cpuInputs;
gpuA->copyFrom(*cpuA, stream); BufferArgs cpuOutputs;
gpuB->copyFrom(*cpuB, stream); cpuInputs.addArg(*cpuA);
gpuC->copyFrom(*cpuC, stream); cpuInputs.addArg(*cpuB);
hl_stream_synchronize(stream); cpuOutputs.addArg(*cpuC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs inputs; BufferArgs gpuInputs;
BufferArgs outputs; BufferArgs gpuOutputs;
inputs.addArg(*gpuA->getTranspose()); gpuInputs.addArg(*gpuA);
inputs.addArg(*gpuB->getTranspose()); gpuInputs.addArg(*gpuB);
outputs.addArg(*gpuC, ASSIGN_TO); gpuOutputs.addArg(*gpuC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
gpuFunc->calc(inputs, outputs); autotest::TensorCheckErr(*cpuC, *gpuC);
} }
TEST(SMatrix, sMatrixMul) { TEST(Matrix, mul) {
for (auto M : {1, 40, 128, 200}) { for (auto transa : {false, true}) {
for (auto N : {100}) { for (auto transb : {false, true}) {
for (auto K : {100}) { for (auto dimM : {1, 10, 100}) {
/// todo(tianbing), add scaleAB and scaleT for (auto dimN : {1, 10}) {
VLOG(3) << " M=" << M << " N=" << N << " K=" << K; for (auto dimK : {8}) {
testSpMatrixMul(M, N, K, 0.05, 1, 1); if (true == transa && true == transb) {
continue;
}
VLOG(3) << setiosflags(std::ios::left) << std::setfill(' ')
<< " transa=" << transa << " transb=" << transb
<< " dimM=" << std::setw(5) << dimM
<< " dimN=" << std::setw(5) << dimN
<< " dimK=" << std::setw(5) << dimK;
testMatrixMul(transa, transb, dimM, dimN, dimK);
}
}
} }
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册