提交 2df8eec5 编写于 作者: X xutianbing

Pass Unit test for GpuMatrix::mul(GpuMatrix, GpuMatrix) and CpuMatrix::mul(CpuMatrix, CpuMatrix)

上级 1f0cbcf3
......@@ -32,16 +32,14 @@ const SparseMatrixArg& BufferArg::sparse() const {
SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
trans_(const_cast<CpuSparseMatrix&>(sparse).getTranspose()) {
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
trans_(const_cast<GpuSparseMatrix&>(sparse).getTranspose()) {
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
......
......@@ -98,7 +98,8 @@ public:
const_cast<void*>(reinterpret_cast<const void*>(matrix.getData()))),
valueType_(DataType<real>::value),
shape_(2),
argType_(argType) {
argType_(argType),
trans_(matrix.isTransposed()) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, matrix.getHeight());
shape_.setDim(1, matrix.getWidth());
......@@ -111,7 +112,8 @@ public:
const_cast<void*>(reinterpret_cast<const void*>(matrix.getData()))),
valueType_(DataType<real>::value),
shape_(shape),
argType_(argType) {
argType_(argType),
trans_(matrix.isTransposed()) {
bufferType_ = TENSOR_NORMAL;
CHECK_EQ(matrix.getElementCnt(), shape.getElements());
}
......@@ -143,7 +145,7 @@ public:
// CHECK(deviceType_ == DType);
CHECK_EQ((size_t)2, shape_.ndims());
return typename Tensor<real, DType>::Matrix(
reinterpret_cast<real*>(buf_), shape_[0], shape_[1]);
reinterpret_cast<real*>(buf_), shape_[0], shape_[1], trans_);
}
template <typename VType, DeviceType DType>
......@@ -179,6 +181,7 @@ protected:
TensorShape shape_;
BufferType bufferType_{TENSOR_UNKNOWN};
ArgType argType_{UNSPECIFIED};
bool trans_{false};
// leading dimensions. The size is dims_.size()
// Dims lds_;
};
......@@ -271,15 +274,13 @@ public:
size_t nnz,
SparseDataFormat format,
SparseDataType type,
bool trans = false,
ArgType argType = UNSPECIFIED)
: BufferArg(buf, valueType, shape, argType),
row_(row),
col_(col),
nnz_(nnz),
format_(format),
type_(type),
trans_(trans) {
type_(type) {
bufferType_ = TENSOR_SPARSE;
CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
CHECK_EQ(shape_.ndims(), (size_t)2);
......@@ -322,8 +323,6 @@ public:
size_t nnz() const { return nnz_; }
bool isTranspose() const { return trans_; }
SparseDataFormat dataFormat() const { return format_; }
SparseDataType dataType() const { return type_; }
......@@ -334,8 +333,6 @@ private:
size_t nnz_;
SparseDataFormat format_;
SparseDataType type_;
/// todo(tianbing), move trans_ up to BufferArg
bool trans_;
};
} // namespace paddle
......@@ -483,8 +483,8 @@ template <DeviceType Device>
class MulFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
scaleAB_ = config.get<real>("scaleAB");
scaleT_ = config.get<real>("scaleT");
alpha_ = config.get<real>("scaleAB");
beta_ = config.get<real>("scaleT");
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
......@@ -494,7 +494,7 @@ public:
CHECK_EQ(inputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
auto in1_mat = inputs[0].matrix<Device>();
if (inputs[0].isSparseArg()) {
......@@ -505,12 +505,12 @@ public:
in2_mat = inputs[1].sparse().SparseMatrix<Device>();
}
auto out_mat = outputs[0].matrix<Device>();
MulOp<Device>(out_mat, in1_mat, in2_mat, scaleAB_, scaleT_);
MulOp<Device>(out_mat, in1_mat, in2_mat, alpha_, beta_);
}
private:
real scaleAB_;
real scaleT_;
real alpha_;
real beta_;
};
REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc);
......
......@@ -68,4 +68,11 @@ void MulOp(GpuMatrix& out,
real scaleAB,
real scaleT);
template <DeviceType DType>
void MulOp(GpuSparseMatrix& out,
const GpuMatrix& a,
const GpuMatrix& b,
real scaleAB,
real scaleT);
} // namespace paddle
......@@ -170,4 +170,13 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
}
}
template <>
void MulOp<DEVICE_TYPE_GPU>(GpuSparseMatrix& out,
const GpuMatrix& a,
const GpuMatrix& b,
real scale_ab,
real scale_t) {
/// todo(tianbing), implement it
}
} // namespace paddle
......@@ -16,50 +16,79 @@ limitations under the License. */
#include "FunctionTest.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/math/tests/test_matrixUtil.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT
void testSpMatrixMul(int M, int N, int K, real rate, real scale1, real scale2) {
/// todo(tianbing) check CPU/GPU
/**
* C = alpha * C + beta * (A * B)
*/
void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
real alpha = 1.5;
real beta = 2.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
gpuFunc->init(FuncConfig().set("scaleAB", scale1).set("scaleT", scale2));
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
int nnz = M * N * rate;
MatrixPtr cpuA = std::make_shared<CpuMatrix>(M, K);
MatrixPtr cpuB = std::make_shared<CpuMatrix>(N, K);
MatrixPtr cpuC(new CpuSparseMatrix(M, N, nnz));
int heightA = (transa == false) ? dimM : dimK;
int widthA = (transa == false) ? dimK : dimM;
int heightB = (transb == false) ? dimK : dimN;
int widthB = (transb == false) ? dimN : dimK;
int heightC = dimM;
int widthC = dimN;
MatrixPtr gpuA = std::make_shared<GpuMatrix>(M, K);
MatrixPtr gpuB = std::make_shared<GpuMatrix>(N, K);
MatrixPtr gpuC(new GpuSparseMatrix(M, N, nnz));
auto cpuA = std::make_shared<CpuMatrix>(heightA, widthA, transa);
auto cpuB = std::make_shared<CpuMatrix>(heightB, widthB, transb);
auto cpuC = std::make_shared<CpuMatrix>(heightC, widthC);
auto gpuA = std::make_shared<GpuMatrix>(heightA, widthA, transa);
auto gpuB = std::make_shared<GpuMatrix>(heightB, widthB, transb);
auto gpuC = std::make_shared<GpuMatrix>(heightC, widthC);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
cpuC->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
hl_stream_t stream(HPPL_STREAM_3);
gpuA->copyFrom(*cpuA, stream);
gpuB->copyFrom(*cpuB, stream);
gpuC->copyFrom(*cpuC, stream);
hl_stream_synchronize(stream);
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(*cpuA);
cpuInputs.addArg(*cpuB);
cpuOutputs.addArg(*cpuC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*gpuA->getTranspose());
inputs.addArg(*gpuB->getTranspose());
outputs.addArg(*gpuC, ASSIGN_TO);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(*gpuA);
gpuInputs.addArg(*gpuB);
gpuOutputs.addArg(*gpuC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
gpuFunc->calc(inputs, outputs);
autotest::TensorCheckErr(*cpuC, *gpuC);
}
TEST(SMatrix, sMatrixMul) {
for (auto M : {1, 40, 128, 200}) {
for (auto N : {100}) {
for (auto K : {100}) {
/// todo(tianbing), add scaleAB and scaleT
VLOG(3) << " M=" << M << " N=" << N << " K=" << K;
testSpMatrixMul(M, N, K, 0.05, 1, 1);
TEST(Matrix, mul) {
for (auto transa : {false, true}) {
for (auto transb : {false, true}) {
for (auto dimM : {1, 10, 100}) {
for (auto dimN : {1, 10}) {
for (auto dimK : {8}) {
if (true == transa && true == transb) {
continue;
}
VLOG(3) << setiosflags(std::ios::left) << std::setfill(' ')
<< " transa=" << transa << " transb=" << transb
<< " dimM=" << std::setw(5) << dimM
<< " dimN=" << std::setw(5) << dimN
<< " dimK=" << std::setw(5) << dimK;
testMatrixMul(transa, transb, dimM, dimN, dimK);
}
}
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册