提交 077f936a 编写于 作者: X xutianbing

Support SparseMatrixArg unit test using Daoyuan's new Function Test.

上级 316bf75a
......@@ -33,7 +33,6 @@ SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
/// todo(tianbing), make sure how to get NNZ
nnz_(sparse.getElementCnt()),
format_(sparse.getFormat()),
type_(sparse.getValueType()) {
......@@ -44,7 +43,6 @@ SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
/// todo(tianbing), make sure how to get NNZ
nnz_(sparse.getElementCnt()),
format_(sparse.getFormat()),
type_(sparse.getValueType()) {
......
......@@ -71,17 +71,24 @@ public:
public:
BufferArg(ValueType valueType,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
ArgType argType = UNSPECIFIED,
bool trans = false)
: buf_(nullptr),
valueType_(valueType),
shape_(shape),
argType_(argType) {}
argType_(argType),
trans_(trans) {}
BufferArg(void* buf,
ValueType valueType,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
: buf_(buf), valueType_(valueType), shape_(shape), argType_(argType) {}
ArgType argType = UNSPECIFIED,
bool trans = false)
: buf_(buf),
valueType_(valueType),
shape_(shape),
argType_(argType),
trans_(trans) {}
BufferArg(void* buf, ValueType valueType)
: buf_(buf), valueType_(valueType) {}
......@@ -162,6 +169,7 @@ public:
ValueType valueType() const { return valueType_; }
BufferType bufferType() const { return bufferType_; }
const TensorShape& shape() const { return shape_; }
bool isTransposed() const { return trans_; }
bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; }
bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
......@@ -175,6 +183,7 @@ protected:
BufferType bufferType_{TENSOR_UNKNOWN};
ArgType argType_{UNSPECIFIED};
bool trans_{false};
// todo(tianbing), add deviceType_
// leading dimensions. The size is dims_.size()
// Dims lds_;
};
......@@ -267,8 +276,9 @@ public:
size_t nnz,
SparseFormat format,
SparseValueType type,
ArgType argType = UNSPECIFIED)
: BufferArg(buf, valueType, shape, argType),
ArgType argType = UNSPECIFIED,
bool trans = false)
: BufferArg(buf, valueType, shape, argType, trans),
row_(row),
col_(col),
nnz_(nnz),
......@@ -286,6 +296,33 @@ public:
}
}
SparseMatrixArg(ValueType valueType,
const TensorShape& shape,
size_t nnz,
SparseFormat format,
SparseValueType type,
ArgType argType = UNSPECIFIED,
bool trans = false)
: BufferArg(valueType, shape, argType, trans),
/// len of row_ : height + 1 (CSR), buf_ == nullptr
row_(format == SPARSE_CSR
? BufferArg(VALUE_TYPE_INT32, TensorShape{shape[0] + 1})
: BufferArg(VALUE_TYPE_INT32, TensorShape{nnz})),
/// len of col_ : width + 1 (CSC), buf_ == nullptr
col_(format == SPARSE_CSR
? BufferArg(VALUE_TYPE_INT32, TensorShape{nnz})
: BufferArg(VALUE_TYPE_INT32, TensorShape{shape[1] + 1})),
nnz_(nnz),
format_(format),
type_(type) {
bufferType_ = TENSOR_SPARSE;
/// todo(tianbing)
/// valueType and shape_.ndims() == 2 need to check before
/// this constructor to make sure row_ and col_ are right
CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
CHECK_EQ(shape_.ndims(), (size_t)2);
}
SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED);
SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED);
......
......@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "Function.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/math/Vector.h"
#include "paddle/math/tests/TensorCheck.h"
#include "paddle/testing/TestUtil.h"
......@@ -62,29 +64,41 @@ public:
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuInputs_.emplace_back(std::make_shared<BufferArg>(
cpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
gpuInputs_.emplace_back(std::make_shared<BufferArg>(
gpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
cpuInputs_.emplace_back(
std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(),
input.valueType(),
input.shape(),
UNSPECIFIED,
input.isTransposed()));
gpuInputs_.emplace_back(
std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(),
input.valueType(),
input.shape(),
UNSPECIFIED,
input.isTransposed()));
}
// output need only contains shape, do not contains data.
void addOutputs(const BufferArg& output) {
void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) {
size_t size =
output.shape().getElements() * sizeOfValuType(output.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuOutputs_.emplace_back(
std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(),
output.valueType(),
output.shape(),
ASSIGN_TO));
gpuOutputs_.emplace_back(
std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(),
output.valueType(),
output.shape(),
ASSIGN_TO));
cpuOutputs_.emplace_back(std::make_shared<BufferArg>(
cpuMemory_.back()->getBuf(),
output.valueType(),
output.shape(),
// todo(tianbing), argType = output.getArgType(), but default ASSIGN_TO
argType,
output.isTransposed()));
gpuOutputs_.emplace_back(std::make_shared<BufferArg>(
gpuMemory_.back()->getBuf(),
output.valueType(),
output.shape(),
// todo(tianbing), argType = output.getArgType(), but default ASSIGN_TO
argType,
output.isTransposed()));
}
void addInputs(const SequenceArg& input) {
......@@ -107,10 +121,36 @@ public:
// TODO: need be implemented.
}
void addInputs(const SparseMatrixArg& input) {
cpuSparse_ = std::make_shared<CpuSparseMatrix>(input.shape()[0],
input.shape()[1],
input.nnz(),
input.dataType(),
input.dataFormat(),
input.isTransposed());
gpuSparse_ = std::make_shared<GpuSparseMatrix>(input.shape()[0],
input.shape()[1],
input.nnz(),
input.dataType(),
input.dataFormat(),
input.isTransposed());
/// init sparse matrix
hl_stream_t stream(HPPL_STREAM_1);
cpuSparse_->randomizeUniform();
gpuSparse_->copyFrom(*cpuSparse_, stream);
hl_stream_synchronize(stream);
cpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*cpuSparse_));
gpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*gpuSparse_));
}
void run() {
// prepare cpu/gpu arguments
initInputs();
initOutputs();
// function calculate
auto callFunction = [](FunctionBase* function,
std::vector<BufferArgPtr>& inputs,
......@@ -129,7 +169,7 @@ public:
callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_);
callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_);
// check outputs and inouts
// check outputs
compareOutputs();
}
......@@ -140,6 +180,10 @@ public:
protected:
void initInputs() {
for (size_t i = 0; i < cpuInputs_.size(); i++) {
if (cpuInputs_[i]->isSparseArg()) {
continue; /// sparse matrix already init
}
initArg(*cpuInputs_[i]);
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
......@@ -152,6 +196,25 @@ protected:
}
}
void initOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) {
if (cpuOutputs_[i]->isSparseArg()) {
LOG(INFO) << "output sparse matrix already init";
continue;
}
initArg(*cpuOutputs_[i]);
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector cpuVector(cpuOutputs_[i]->shape().getElements(),
(real*)cpuOutputs_[i]->data());
GpuVector gpuVector(gpuOutputs_[i]->shape().getElements(),
(real*)gpuOutputs_[i]->data());
gpuVector.copyFrom(cpuVector);
}
}
void compareOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) {
// TODO, Need a BufferCheck used to compare the two buffers.
......@@ -159,7 +222,6 @@ protected:
auto gpu = gpuOutputs_[i];
CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data());
GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data());
autotest::TensorCheckErr(cpuVector, gpuVector);
}
}
......@@ -195,6 +257,8 @@ protected:
std::vector<BufferArgPtr> cpuOutputs_;
std::vector<BufferArgPtr> gpuInputs_;
std::vector<BufferArgPtr> gpuOutputs_;
std::shared_ptr<CpuSparseMatrix> cpuSparse_;
std::shared_ptr<GpuSparseMatrix> gpuSparse_;
};
} // namespace paddle
......@@ -15,6 +15,8 @@ limitations under the License. */
#pragma once
#include "Function.h"
/// todo(tianbing), delete it
#include <iostream>
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
......
......@@ -24,58 +24,39 @@ limitations under the License. */
using namespace paddle; // NOLINT
/**
* C = alpha * C + beta * (A * B), A, B, C dense matrix
* C += A * B, A, B, C dense matrix
* dense = dense * dense
*/
void testDDDMatrix(bool transa, bool transb, int dimM, int dimN, int dimK) {
real alpha = 1.5;
real beta = 2.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
int heightA = (transa == false) ? dimM : dimK;
int widthA = (transa == false) ? dimK : dimM;
int heightB = (transb == false) ? dimK : dimN;
int widthB = (transb == false) ? dimN : dimK;
int heightC = dimM;
int widthC = dimN;
auto cpuA = std::make_shared<CpuMatrix>(heightA, widthA, transa);
auto cpuB = std::make_shared<CpuMatrix>(heightB, widthB, transb);
auto cpuC = std::make_shared<CpuMatrix>(heightC, widthC);
auto gpuA = std::make_shared<GpuMatrix>(heightA, widthA, transa);
auto gpuB = std::make_shared<GpuMatrix>(heightB, widthB, transb);
auto gpuC = std::make_shared<GpuMatrix>(heightC, widthC);
cpuA->randomizeUniform();
cpuB->randomizeUniform();
cpuC->randomizeUniform();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(*cpuA);
cpuInputs.addArg(*cpuB);
cpuOutputs.addArg(*cpuC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(*gpuA);
gpuInputs.addArg(*gpuB);
gpuOutputs.addArg(*gpuC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
autotest::TensorCheckErr(*cpuC, *gpuC);
void testFuncDDDMatrix(
bool transa, bool transb, size_t dimM, size_t dimN, size_t dimK) {
real alpha = 1.0;
real beta = 1.0;
size_t heightA = (transa == false) ? dimM : dimK;
size_t widthA = (transa == false) ? dimK : dimM;
size_t heightB = (transb == false) ? dimK : dimN;
size_t widthB = (transb == false) ? dimN : dimK;
size_t heightC = dimM;
size_t widthC = dimN;
// init Test object
FunctionCompare test("MulOp",
FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
// prepare input arguments
/// matrix A : HA * WA
test.addInputs(BufferArg(
VALUE_TYPE_FLOAT, TensorShape{heightA, widthA}, UNSPECIFIED, transa));
/// matrix B: HB * WB
test.addInputs(BufferArg(
VALUE_TYPE_FLOAT, TensorShape{heightB, widthB}, UNSPECIFIED, transb));
/// output matrix C: HC * WC
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{heightC, widthC}),
ADD_TO);
// run Function
test.run();
}
TEST(Matrix, DDDMul) {
LOG(INFO) << "test for dense = dense * dense matrix";
TEST(MulOp, DDDMatrixMul) {
LOG(INFO) << "function test for dense = dense * dense matrix";
for (const auto transa : {false, true}) {
for (const auto transb : {false, true}) {
for (const auto dimM : {1, 10, 100}) {
......@@ -89,7 +70,7 @@ TEST(Matrix, DDDMul) {
<< " dimM=" << std::setw(5) << dimM
<< " dimN=" << std::setw(5) << dimN
<< " dimK=" << std::setw(5) << dimK;
testDDDMatrix(transa, transb, dimM, dimN, dimK);
testFuncDDDMatrix(transa, transb, dimM, dimN, dimK);
}
}
}
......@@ -101,71 +82,33 @@ TEST(Matrix, DDDMul) {
* C += A * B, B, C dense, A sparse
* dense = sparse * dense
*/
void testDSparseDMatrix(
void testFuncDSparseDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real alpha = 1.0;
real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
CpuSparseMatrix cpuMatrixA(dimM, dimK, nnz, FLOAT_VALUE, FORMAT, false);
GpuSparseMatrix gpuMatrixA(dimM, dimK, nnz, FLOAT_VALUE, FORMAT, false);
CpuMatrix cpuDenseA(dimM, dimK, false);
auto cpuMatrixB = Matrix::create(dimK, dimN, false, false);
auto gpuMatrixB = Matrix::create(dimK, dimN, false, true);
auto cpuDenseB = Matrix::create(dimK, dimN, false, false);
auto cpuMatrixC = Matrix::create(dimM, dimN, false, false);
auto gpuMatrixC = Matrix::create(dimM, dimN, false, true);
auto cpuDenseC = Matrix::create(dimM, dimN, false, false);
/*matrix init*/
hl_stream_t stream(HPPL_STREAM_1);
cpuMatrixA.randomizeUniform();
cpuMatrixB->randomizeUniform();
cpuMatrixC->randomizeUniform();
gpuMatrixA.copyFrom(cpuMatrixA, stream);
gpuMatrixB->copyFrom(*cpuMatrixB, stream);
gpuMatrixC->copyFrom(*cpuMatrixC, stream);
cpuDenseA.copyFrom(cpuMatrixA);
cpuDenseB->copyFrom(*cpuMatrixB);
cpuDenseC->copyFrom(*cpuMatrixC);
hl_stream_synchronize(stream);
/*matrix mul*/
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(cpuMatrixA);
cpuInputs.addArg(*cpuMatrixB);
cpuOutputs.addArg(*cpuMatrixC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(gpuMatrixA);
gpuInputs.addArg(*gpuMatrixB);
gpuOutputs.addArg(*gpuMatrixC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
BufferArgs denseInputs;
BufferArgs denseOutputs;
denseInputs.addArg(cpuDenseA);
denseInputs.addArg(*cpuDenseB);
denseOutputs.addArg(*cpuDenseC, ADD_TO);
cpuFunc->calc(denseInputs, denseOutputs);
/*check result*/
autotest::TensorCheckErr(*cpuMatrixC, *cpuDenseC);
autotest::TensorCheckErr(*cpuMatrixC, *gpuMatrixC);
// init Test object
FunctionCompare test("MulOp",
FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
// prepare input arguments
/// sparse matrix A : M * K
test.addInputs(SparseMatrixArg(VALUE_TYPE_FLOAT,
TensorShape{dimM, dimK},
nnz,
FORMAT,
FLOAT_VALUE,
UNSPECIFIED,
false));
/// matrix B: K * N
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimK, dimN}));
/// output matrix C: M * N
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}), ADD_TO);
// run Function
test.run();
}
TEST(Matrix, DSparseDMul) {
LOG(INFO) << "test for dense = sparse * dense matrix";
TEST(MuLOp, DSparseDMul) {
LOG(INFO) << "function test for dense = sparse * dense matrix";
for (const auto dimM : {10, 100, 1000}) {
for (const auto dimN : {10, 100}) {
for (const auto dimK : {3, 10}) {
......@@ -177,7 +120,7 @@ TEST(Matrix, DSparseDMul) {
<< " dimK=" << std::setw(5) << dimK
<< " nnz=" << std::setw(5) << nnz
<< " format=" << std::setw(5) << FORMAT;
testDSparseDMatrix(dimM, dimN, dimK, nnz, FORMAT);
testFuncDSparseDMatrix(dimM, dimN, dimK, nnz, FORMAT);
}
}
}
......@@ -189,72 +132,34 @@ TEST(Matrix, DSparseDMul) {
* C += A * B, A, C dense, B sparse
* dense = dense * sparse
*/
void testDDSparseMatrix(
void testFuncDDSparseMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real alpha = 1.0;
real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
auto cpuMatrixA = Matrix::create(dimM, dimK, false, false);
auto gpuMatrixA = Matrix::create(dimM, dimK, false, true);
auto cpuDenseA = Matrix::create(dimM, dimK, false, false);
CpuSparseMatrix cpuMatrixB(dimK, dimN, nnz, FLOAT_VALUE, FORMAT, false);
GpuSparseMatrix gpuMatrixB(dimK, dimN, nnz, FLOAT_VALUE, FORMAT, false);
auto cpuDenseB = Matrix::create(dimK, dimN, false, false);
auto cpuMatrixC = Matrix::create(dimM, dimN, false, false);
auto gpuMatrixC = Matrix::create(dimM, dimN, false, true);
auto cpuDenseC = Matrix::create(dimM, dimN, false, false);
/*matrix init*/
hl_stream_t stream(HPPL_STREAM_1);
cpuMatrixA->randomizeUniform();
cpuMatrixB.randomizeUniform();
cpuMatrixC->randomizeUniform();
gpuMatrixA->copyFrom(*cpuMatrixA, stream);
gpuMatrixB.copyFrom(cpuMatrixB, stream);
gpuMatrixC->copyFrom(*cpuMatrixC, stream);
cpuDenseA->copyFrom(*cpuMatrixA);
cpuDenseB->copyFrom(cpuMatrixB);
cpuDenseC->copyFrom(*cpuMatrixC);
hl_stream_synchronize(stream);
/*matrix mul*/
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(*cpuMatrixA);
cpuInputs.addArg(cpuMatrixB);
cpuOutputs.addArg(*cpuMatrixC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(*gpuMatrixA);
gpuInputs.addArg(gpuMatrixB);
gpuOutputs.addArg(*gpuMatrixC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
BufferArgs denseInputs;
BufferArgs denseOutputs;
denseInputs.addArg(*cpuDenseA);
denseInputs.addArg(*cpuDenseB);
denseOutputs.addArg(*cpuDenseC, ADD_TO);
cpuFunc->calc(denseInputs, denseOutputs);
/*check result*/
autotest::TensorCheckErr(*cpuMatrixC, *cpuDenseC);
autotest::TensorCheckErr(*cpuMatrixC, *gpuMatrixC);
// init Test object
FunctionCompare test("MulOp",
FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
// prepare input arguments
/// matrix A : M * K
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}));
/// matrix B: K * N
test.addInputs(SparseMatrixArg(VALUE_TYPE_FLOAT,
TensorShape{dimK, dimN},
nnz,
FORMAT,
FLOAT_VALUE,
UNSPECIFIED,
false));
/// output matrix C: M * N
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}), ADD_TO);
// run Function
test.run();
}
TEST(Matrix, DDSparseMul) {
LOG(INFO) << "test for dense = dense * sparse matrix";
TEST(MulOp, DDSparseMul) {
LOG(INFO) << "function test for dense = dense * sparse matrix";
for (const auto dimM : {10, 100, 1000}) {
for (const auto dimN : {10, 100}) {
for (const auto dimK : {3, 10}) {
......@@ -266,7 +171,7 @@ TEST(Matrix, DDSparseMul) {
<< " dimK=" << std::setw(5) << dimK
<< " nnz=" << std::setw(5) << nnz
<< " format=" << std::setw(5) << FORMAT;
testDDSparseMatrix(dimM, dimN, dimK, nnz, FORMAT);
testFuncDDSparseMatrix(dimM, dimN, dimK, nnz, FORMAT);
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册