提交 077f936a 编写于 作者: X xutianbing

Support SparseMatrixArg unit test using Daoyuan's new Function Test.

上级 316bf75a
...@@ -33,7 +33,6 @@ SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType) ...@@ -33,7 +33,6 @@ SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType), : BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32), row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32), col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
/// todo(tianbing), make sure how to get NNZ
nnz_(sparse.getElementCnt()), nnz_(sparse.getElementCnt()),
format_(sparse.getFormat()), format_(sparse.getFormat()),
type_(sparse.getValueType()) { type_(sparse.getValueType()) {
...@@ -44,7 +43,6 @@ SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType) ...@@ -44,7 +43,6 @@ SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType), : BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32), row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32), col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
/// todo(tianbing), make sure how to get NNZ
nnz_(sparse.getElementCnt()), nnz_(sparse.getElementCnt()),
format_(sparse.getFormat()), format_(sparse.getFormat()),
type_(sparse.getValueType()) { type_(sparse.getValueType()) {
......
...@@ -71,17 +71,24 @@ public: ...@@ -71,17 +71,24 @@ public:
public: public:
BufferArg(ValueType valueType, BufferArg(ValueType valueType,
const TensorShape& shape, const TensorShape& shape,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED,
bool trans = false)
: buf_(nullptr), : buf_(nullptr),
valueType_(valueType), valueType_(valueType),
shape_(shape), shape_(shape),
argType_(argType) {} argType_(argType),
trans_(trans) {}
BufferArg(void* buf, BufferArg(void* buf,
ValueType valueType, ValueType valueType,
const TensorShape& shape, const TensorShape& shape,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED,
: buf_(buf), valueType_(valueType), shape_(shape), argType_(argType) {} bool trans = false)
: buf_(buf),
valueType_(valueType),
shape_(shape),
argType_(argType),
trans_(trans) {}
BufferArg(void* buf, ValueType valueType) BufferArg(void* buf, ValueType valueType)
: buf_(buf), valueType_(valueType) {} : buf_(buf), valueType_(valueType) {}
...@@ -162,6 +169,7 @@ public: ...@@ -162,6 +169,7 @@ public:
ValueType valueType() const { return valueType_; } ValueType valueType() const { return valueType_; }
BufferType bufferType() const { return bufferType_; } BufferType bufferType() const { return bufferType_; }
const TensorShape& shape() const { return shape_; } const TensorShape& shape() const { return shape_; }
bool isTransposed() const { return trans_; }
bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; } bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; }
bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; } bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
...@@ -175,6 +183,7 @@ protected: ...@@ -175,6 +183,7 @@ protected:
BufferType bufferType_{TENSOR_UNKNOWN}; BufferType bufferType_{TENSOR_UNKNOWN};
ArgType argType_{UNSPECIFIED}; ArgType argType_{UNSPECIFIED};
bool trans_{false}; bool trans_{false};
// todo(tianbing), add deviceType_
// leading dimensions. The size is dims_.size() // leading dimensions. The size is dims_.size()
// Dims lds_; // Dims lds_;
}; };
...@@ -267,8 +276,9 @@ public: ...@@ -267,8 +276,9 @@ public:
size_t nnz, size_t nnz,
SparseFormat format, SparseFormat format,
SparseValueType type, SparseValueType type,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED,
: BufferArg(buf, valueType, shape, argType), bool trans = false)
: BufferArg(buf, valueType, shape, argType, trans),
row_(row), row_(row),
col_(col), col_(col),
nnz_(nnz), nnz_(nnz),
...@@ -286,6 +296,33 @@ public: ...@@ -286,6 +296,33 @@ public:
} }
} }
SparseMatrixArg(ValueType valueType,
const TensorShape& shape,
size_t nnz,
SparseFormat format,
SparseValueType type,
ArgType argType = UNSPECIFIED,
bool trans = false)
: BufferArg(valueType, shape, argType, trans),
/// len of row_ : height + 1 (CSR), buf_ == nullptr
row_(format == SPARSE_CSR
? BufferArg(VALUE_TYPE_INT32, TensorShape{shape[0] + 1})
: BufferArg(VALUE_TYPE_INT32, TensorShape{nnz})),
/// len of col_ : width + 1 (CSC), buf_ == nullptr
col_(format == SPARSE_CSR
? BufferArg(VALUE_TYPE_INT32, TensorShape{nnz})
: BufferArg(VALUE_TYPE_INT32, TensorShape{shape[1] + 1})),
nnz_(nnz),
format_(format),
type_(type) {
bufferType_ = TENSOR_SPARSE;
/// todo(tianbing)
/// valueType and shape_.ndims() == 2 need to check before
/// this constructor to make sure row_ and col_ are right
CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
CHECK_EQ(shape_.ndims(), (size_t)2);
}
SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED); SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED);
SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED); SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED);
......
...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "Function.h" #include "Function.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/math/Vector.h" #include "paddle/math/Vector.h"
#include "paddle/math/tests/TensorCheck.h" #include "paddle/math/tests/TensorCheck.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
...@@ -62,29 +64,41 @@ public: ...@@ -62,29 +64,41 @@ public:
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuInputs_.emplace_back(std::make_shared<BufferArg>( cpuInputs_.emplace_back(
cpuMemory_.back()->getBuf(), input.valueType(), input.shape())); std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(),
gpuInputs_.emplace_back(std::make_shared<BufferArg>( input.valueType(),
gpuMemory_.back()->getBuf(), input.valueType(), input.shape())); input.shape(),
UNSPECIFIED,
input.isTransposed()));
gpuInputs_.emplace_back(
std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(),
input.valueType(),
input.shape(),
UNSPECIFIED,
input.isTransposed()));
} }
// output need only contains shape, do not contains data. // output need only contains shape, do not contains data.
void addOutputs(const BufferArg& output) { void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) {
size_t size = size_t size =
output.shape().getElements() * sizeOfValuType(output.valueType()); output.shape().getElements() * sizeOfValuType(output.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuOutputs_.emplace_back( cpuOutputs_.emplace_back(std::make_shared<BufferArg>(
std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(), cpuMemory_.back()->getBuf(),
output.valueType(), output.valueType(),
output.shape(), output.shape(),
ASSIGN_TO)); // todo(tianbing), argType = output.getArgType(), but default ASSIGN_TO
gpuOutputs_.emplace_back( argType,
std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(), output.isTransposed()));
output.valueType(), gpuOutputs_.emplace_back(std::make_shared<BufferArg>(
output.shape(), gpuMemory_.back()->getBuf(),
ASSIGN_TO)); output.valueType(),
output.shape(),
// todo(tianbing), argType = output.getArgType(), but default ASSIGN_TO
argType,
output.isTransposed()));
} }
void addInputs(const SequenceArg& input) { void addInputs(const SequenceArg& input) {
...@@ -107,10 +121,36 @@ public: ...@@ -107,10 +121,36 @@ public:
// TODO: need be implemented. // TODO: need be implemented.
} }
void addInputs(const SparseMatrixArg& input) {
cpuSparse_ = std::make_shared<CpuSparseMatrix>(input.shape()[0],
input.shape()[1],
input.nnz(),
input.dataType(),
input.dataFormat(),
input.isTransposed());
gpuSparse_ = std::make_shared<GpuSparseMatrix>(input.shape()[0],
input.shape()[1],
input.nnz(),
input.dataType(),
input.dataFormat(),
input.isTransposed());
/// init sparse matrix
hl_stream_t stream(HPPL_STREAM_1);
cpuSparse_->randomizeUniform();
gpuSparse_->copyFrom(*cpuSparse_, stream);
hl_stream_synchronize(stream);
cpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*cpuSparse_));
gpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*gpuSparse_));
}
void run() { void run() {
// prepare cpu/gpu arguments // prepare cpu/gpu arguments
initInputs(); initInputs();
initOutputs();
// function calculate // function calculate
auto callFunction = [](FunctionBase* function, auto callFunction = [](FunctionBase* function,
std::vector<BufferArgPtr>& inputs, std::vector<BufferArgPtr>& inputs,
...@@ -129,7 +169,7 @@ public: ...@@ -129,7 +169,7 @@ public:
callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_); callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_);
callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_); callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_);
// check outputs and inouts // check outputs
compareOutputs(); compareOutputs();
} }
...@@ -140,6 +180,10 @@ public: ...@@ -140,6 +180,10 @@ public:
protected: protected:
void initInputs() { void initInputs() {
for (size_t i = 0; i < cpuInputs_.size(); i++) { for (size_t i = 0; i < cpuInputs_.size(); i++) {
if (cpuInputs_[i]->isSparseArg()) {
continue; /// sparse matrix already init
}
initArg(*cpuInputs_[i]); initArg(*cpuInputs_[i]);
// TODO: Need a BufferCopy used to copy from one BufferArg to another. // TODO: Need a BufferCopy used to copy from one BufferArg to another.
...@@ -152,6 +196,25 @@ protected: ...@@ -152,6 +196,25 @@ protected:
} }
} }
void initOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) {
if (cpuOutputs_[i]->isSparseArg()) {
LOG(INFO) << "output sparse matrix already init";
continue;
}
initArg(*cpuOutputs_[i]);
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector cpuVector(cpuOutputs_[i]->shape().getElements(),
(real*)cpuOutputs_[i]->data());
GpuVector gpuVector(gpuOutputs_[i]->shape().getElements(),
(real*)gpuOutputs_[i]->data());
gpuVector.copyFrom(cpuVector);
}
}
void compareOutputs() { void compareOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) { for (size_t i = 0; i < cpuOutputs_.size(); i++) {
// TODO, Need a BufferCheck used to compare the two buffers. // TODO, Need a BufferCheck used to compare the two buffers.
...@@ -159,7 +222,6 @@ protected: ...@@ -159,7 +222,6 @@ protected:
auto gpu = gpuOutputs_[i]; auto gpu = gpuOutputs_[i];
CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data()); CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data());
GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data()); GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data());
autotest::TensorCheckErr(cpuVector, gpuVector); autotest::TensorCheckErr(cpuVector, gpuVector);
} }
} }
...@@ -195,6 +257,8 @@ protected: ...@@ -195,6 +257,8 @@ protected:
std::vector<BufferArgPtr> cpuOutputs_; std::vector<BufferArgPtr> cpuOutputs_;
std::vector<BufferArgPtr> gpuInputs_; std::vector<BufferArgPtr> gpuInputs_;
std::vector<BufferArgPtr> gpuOutputs_; std::vector<BufferArgPtr> gpuOutputs_;
std::shared_ptr<CpuSparseMatrix> cpuSparse_;
std::shared_ptr<GpuSparseMatrix> gpuSparse_;
}; };
} // namespace paddle } // namespace paddle
...@@ -15,6 +15,8 @@ limitations under the License. */ ...@@ -15,6 +15,8 @@ limitations under the License. */
#pragma once #pragma once
#include "Function.h" #include "Function.h"
/// todo(tianbing), delete it
#include <iostream>
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
......
...@@ -24,58 +24,39 @@ limitations under the License. */ ...@@ -24,58 +24,39 @@ limitations under the License. */
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
/** /**
* C = alpha * C + beta * (A * B), A, B, C dense matrix * C += A * B, A, B, C dense matrix
* dense = dense * dense * dense = dense * dense
*/ */
void testDDDMatrix(bool transa, bool transb, int dimM, int dimN, int dimK) { void testFuncDDDMatrix(
real alpha = 1.5; bool transa, bool transb, size_t dimM, size_t dimN, size_t dimK) {
real beta = 2.0; real alpha = 1.0;
real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU"); size_t heightA = (transa == false) ? dimM : dimK;
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); size_t widthA = (transa == false) ? dimK : dimM;
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU"); size_t heightB = (transb == false) ? dimK : dimN;
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); size_t widthB = (transb == false) ? dimN : dimK;
size_t heightC = dimM;
int heightA = (transa == false) ? dimM : dimK; size_t widthC = dimN;
int widthA = (transa == false) ? dimK : dimM; // init Test object
int heightB = (transb == false) ? dimK : dimN; FunctionCompare test("MulOp",
int widthB = (transb == false) ? dimN : dimK; FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
int heightC = dimM; // prepare input arguments
int widthC = dimN; /// matrix A : HA * WA
test.addInputs(BufferArg(
auto cpuA = std::make_shared<CpuMatrix>(heightA, widthA, transa); VALUE_TYPE_FLOAT, TensorShape{heightA, widthA}, UNSPECIFIED, transa));
auto cpuB = std::make_shared<CpuMatrix>(heightB, widthB, transb); /// matrix B: HB * WB
auto cpuC = std::make_shared<CpuMatrix>(heightC, widthC); test.addInputs(BufferArg(
auto gpuA = std::make_shared<GpuMatrix>(heightA, widthA, transa); VALUE_TYPE_FLOAT, TensorShape{heightB, widthB}, UNSPECIFIED, transb));
auto gpuB = std::make_shared<GpuMatrix>(heightB, widthB, transb);
auto gpuC = std::make_shared<GpuMatrix>(heightC, widthC); /// output matrix C: HC * WC
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{heightC, widthC}),
cpuA->randomizeUniform(); ADD_TO);
cpuB->randomizeUniform(); // run Function
cpuC->randomizeUniform(); test.run();
gpuA->copyFrom(*cpuA);
gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC);
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(*cpuA);
cpuInputs.addArg(*cpuB);
cpuOutputs.addArg(*cpuC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(*gpuA);
gpuInputs.addArg(*gpuB);
gpuOutputs.addArg(*gpuC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
autotest::TensorCheckErr(*cpuC, *gpuC);
} }
TEST(Matrix, DDDMul) { TEST(MulOp, DDDMatrixMul) {
LOG(INFO) << "test for dense = dense * dense matrix"; LOG(INFO) << "function test for dense = dense * dense matrix";
for (const auto transa : {false, true}) { for (const auto transa : {false, true}) {
for (const auto transb : {false, true}) { for (const auto transb : {false, true}) {
for (const auto dimM : {1, 10, 100}) { for (const auto dimM : {1, 10, 100}) {
...@@ -89,7 +70,7 @@ TEST(Matrix, DDDMul) { ...@@ -89,7 +70,7 @@ TEST(Matrix, DDDMul) {
<< " dimM=" << std::setw(5) << dimM << " dimM=" << std::setw(5) << dimM
<< " dimN=" << std::setw(5) << dimN << " dimN=" << std::setw(5) << dimN
<< " dimK=" << std::setw(5) << dimK; << " dimK=" << std::setw(5) << dimK;
testDDDMatrix(transa, transb, dimM, dimN, dimK); testFuncDDDMatrix(transa, transb, dimM, dimN, dimK);
} }
} }
} }
...@@ -101,71 +82,33 @@ TEST(Matrix, DDDMul) { ...@@ -101,71 +82,33 @@ TEST(Matrix, DDDMul) {
* C += A * B, B, C dense, A sparse * C += A * B, B, C dense, A sparse
* dense = sparse * dense * dense = sparse * dense
*/ */
void testDSparseDMatrix( void testFuncDSparseDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real alpha = 1.0; real alpha = 1.0;
real beta = 1.0; real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU"); // init Test object
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); FunctionCompare test("MulOp",
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU"); FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); // prepare input arguments
/// sparse matrix A : M * K
CpuSparseMatrix cpuMatrixA(dimM, dimK, nnz, FLOAT_VALUE, FORMAT, false); test.addInputs(SparseMatrixArg(VALUE_TYPE_FLOAT,
GpuSparseMatrix gpuMatrixA(dimM, dimK, nnz, FLOAT_VALUE, FORMAT, false); TensorShape{dimM, dimK},
CpuMatrix cpuDenseA(dimM, dimK, false); nnz,
FORMAT,
auto cpuMatrixB = Matrix::create(dimK, dimN, false, false); FLOAT_VALUE,
auto gpuMatrixB = Matrix::create(dimK, dimN, false, true); UNSPECIFIED,
auto cpuDenseB = Matrix::create(dimK, dimN, false, false); false));
/// matrix B: K * N
auto cpuMatrixC = Matrix::create(dimM, dimN, false, false); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimK, dimN}));
auto gpuMatrixC = Matrix::create(dimM, dimN, false, true);
auto cpuDenseC = Matrix::create(dimM, dimN, false, false); /// output matrix C: M * N
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}), ADD_TO);
/*matrix init*/ // run Function
hl_stream_t stream(HPPL_STREAM_1); test.run();
cpuMatrixA.randomizeUniform();
cpuMatrixB->randomizeUniform();
cpuMatrixC->randomizeUniform();
gpuMatrixA.copyFrom(cpuMatrixA, stream);
gpuMatrixB->copyFrom(*cpuMatrixB, stream);
gpuMatrixC->copyFrom(*cpuMatrixC, stream);
cpuDenseA.copyFrom(cpuMatrixA);
cpuDenseB->copyFrom(*cpuMatrixB);
cpuDenseC->copyFrom(*cpuMatrixC);
hl_stream_synchronize(stream);
/*matrix mul*/
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(cpuMatrixA);
cpuInputs.addArg(*cpuMatrixB);
cpuOutputs.addArg(*cpuMatrixC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(gpuMatrixA);
gpuInputs.addArg(*gpuMatrixB);
gpuOutputs.addArg(*gpuMatrixC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
BufferArgs denseInputs;
BufferArgs denseOutputs;
denseInputs.addArg(cpuDenseA);
denseInputs.addArg(*cpuDenseB);
denseOutputs.addArg(*cpuDenseC, ADD_TO);
cpuFunc->calc(denseInputs, denseOutputs);
/*check result*/
autotest::TensorCheckErr(*cpuMatrixC, *cpuDenseC);
autotest::TensorCheckErr(*cpuMatrixC, *gpuMatrixC);
} }
TEST(Matrix, DSparseDMul) { TEST(MuLOp, DSparseDMul) {
LOG(INFO) << "test for dense = sparse * dense matrix"; LOG(INFO) << "function test for dense = sparse * dense matrix";
for (const auto dimM : {10, 100, 1000}) { for (const auto dimM : {10, 100, 1000}) {
for (const auto dimN : {10, 100}) { for (const auto dimN : {10, 100}) {
for (const auto dimK : {3, 10}) { for (const auto dimK : {3, 10}) {
...@@ -177,7 +120,7 @@ TEST(Matrix, DSparseDMul) { ...@@ -177,7 +120,7 @@ TEST(Matrix, DSparseDMul) {
<< " dimK=" << std::setw(5) << dimK << " dimK=" << std::setw(5) << dimK
<< " nnz=" << std::setw(5) << nnz << " nnz=" << std::setw(5) << nnz
<< " format=" << std::setw(5) << FORMAT; << " format=" << std::setw(5) << FORMAT;
testDSparseDMatrix(dimM, dimN, dimK, nnz, FORMAT); testFuncDSparseDMatrix(dimM, dimN, dimK, nnz, FORMAT);
} }
} }
} }
...@@ -189,72 +132,34 @@ TEST(Matrix, DSparseDMul) { ...@@ -189,72 +132,34 @@ TEST(Matrix, DSparseDMul) {
* C += A * B, A, C dense, B sparse * C += A * B, A, C dense, B sparse
* dense = dense * sparse * dense = dense * sparse
*/ */
void testDDSparseMatrix( void testFuncDDSparseMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real alpha = 1.0; real alpha = 1.0;
real beta = 1.0; real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU"); // init Test object
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); FunctionCompare test("MulOp",
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU"); FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); // prepare input arguments
/// matrix A : M * K
auto cpuMatrixA = Matrix::create(dimM, dimK, false, false); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}));
auto gpuMatrixA = Matrix::create(dimM, dimK, false, true);
auto cpuDenseA = Matrix::create(dimM, dimK, false, false); /// matrix B: K * N
test.addInputs(SparseMatrixArg(VALUE_TYPE_FLOAT,
CpuSparseMatrix cpuMatrixB(dimK, dimN, nnz, FLOAT_VALUE, FORMAT, false); TensorShape{dimK, dimN},
nnz,
GpuSparseMatrix gpuMatrixB(dimK, dimN, nnz, FLOAT_VALUE, FORMAT, false); FORMAT,
FLOAT_VALUE,
auto cpuDenseB = Matrix::create(dimK, dimN, false, false); UNSPECIFIED,
auto cpuMatrixC = Matrix::create(dimM, dimN, false, false); false));
auto gpuMatrixC = Matrix::create(dimM, dimN, false, true);
auto cpuDenseC = Matrix::create(dimM, dimN, false, false); /// output matrix C: M * N
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}), ADD_TO);
/*matrix init*/ // run Function
hl_stream_t stream(HPPL_STREAM_1); test.run();
cpuMatrixA->randomizeUniform();
cpuMatrixB.randomizeUniform();
cpuMatrixC->randomizeUniform();
gpuMatrixA->copyFrom(*cpuMatrixA, stream);
gpuMatrixB.copyFrom(cpuMatrixB, stream);
gpuMatrixC->copyFrom(*cpuMatrixC, stream);
cpuDenseA->copyFrom(*cpuMatrixA);
cpuDenseB->copyFrom(cpuMatrixB);
cpuDenseC->copyFrom(*cpuMatrixC);
hl_stream_synchronize(stream);
/*matrix mul*/
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(*cpuMatrixA);
cpuInputs.addArg(cpuMatrixB);
cpuOutputs.addArg(*cpuMatrixC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(*gpuMatrixA);
gpuInputs.addArg(gpuMatrixB);
gpuOutputs.addArg(*gpuMatrixC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
BufferArgs denseInputs;
BufferArgs denseOutputs;
denseInputs.addArg(*cpuDenseA);
denseInputs.addArg(*cpuDenseB);
denseOutputs.addArg(*cpuDenseC, ADD_TO);
cpuFunc->calc(denseInputs, denseOutputs);
/*check result*/
autotest::TensorCheckErr(*cpuMatrixC, *cpuDenseC);
autotest::TensorCheckErr(*cpuMatrixC, *gpuMatrixC);
} }
TEST(Matrix, DDSparseMul) { TEST(MulOp, DDSparseMul) {
LOG(INFO) << "test for dense = dense * sparse matrix"; LOG(INFO) << "function test for dense = dense * sparse matrix";
for (const auto dimM : {10, 100, 1000}) { for (const auto dimM : {10, 100, 1000}) {
for (const auto dimN : {10, 100}) { for (const auto dimN : {10, 100}) {
for (const auto dimK : {3, 10}) { for (const auto dimK : {3, 10}) {
...@@ -266,7 +171,7 @@ TEST(Matrix, DDSparseMul) { ...@@ -266,7 +171,7 @@ TEST(Matrix, DDSparseMul) {
<< " dimK=" << std::setw(5) << dimK << " dimK=" << std::setw(5) << dimK
<< " nnz=" << std::setw(5) << nnz << " nnz=" << std::setw(5) << nnz
<< " format=" << std::setw(5) << FORMAT; << " format=" << std::setw(5) << FORMAT;
testDDSparseMatrix(dimM, dimN, dimK, nnz, FORMAT); testFuncDDSparseMatrix(dimM, dimN, dimK, nnz, FORMAT);
} }
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册