提交 bc5d7bb6 编写于 作者: X xutianbing

Add Sparse = dense * dense unit test with Daoyuan's Function test

上级 077f936a
...@@ -172,6 +172,7 @@ public: ...@@ -172,6 +172,7 @@ public:
bool isTransposed() const { return trans_; } bool isTransposed() const { return trans_; }
bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; } bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; }
bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; } bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
virtual size_t numElements() const { return shape_.getElements(); }
const SequenceArg& sequence() const; const SequenceArg& sequence() const;
const SparseMatrixArg& sparse() const; const SparseMatrixArg& sparse() const;
...@@ -353,6 +354,8 @@ public: ...@@ -353,6 +354,8 @@ public:
size_t nnz() const { return nnz_; } size_t nnz() const { return nnz_; }
size_t numElements() const override { return nnz_; }
SparseFormat dataFormat() const { return format_; } SparseFormat dataFormat() const { return format_; }
SparseValueType dataType() const { return type_; } SparseValueType dataType() const { return type_; }
......
...@@ -101,6 +101,34 @@ public: ...@@ -101,6 +101,34 @@ public:
output.isTransposed())); output.isTransposed()));
} }
/// add and init output sparse matrix
void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) {
cpuSparse_ = std::make_shared<CpuSparseMatrix>(output.shape()[0],
output.shape()[1],
output.nnz(),
output.dataType(),
output.dataFormat(),
output.isTransposed());
gpuSparse_ = std::make_shared<GpuSparseMatrix>(output.shape()[0],
output.shape()[1],
output.nnz(),
output.dataType(),
output.dataFormat(),
output.isTransposed());
/// init sparse matrix
hl_stream_t stream(HPPL_STREAM_1);
cpuSparse_->randomizeUniform();
gpuSparse_->copyFrom(*cpuSparse_, stream);
hl_stream_synchronize(stream);
cpuOutputs_.emplace_back(
std::make_shared<SparseMatrixArg>(*cpuSparse_, argType));
gpuOutputs_.emplace_back(
std::make_shared<SparseMatrixArg>(*gpuSparse_, argType));
}
void addInputs(const SequenceArg& input) { void addInputs(const SequenceArg& input) {
size_t batchSize = input.shape()[0]; size_t batchSize = input.shape()[0];
size_t numSeqs = batchSize / 10 + 1; size_t numSeqs = batchSize / 10 + 1;
...@@ -199,8 +227,7 @@ protected: ...@@ -199,8 +227,7 @@ protected:
void initOutputs() { void initOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) { for (size_t i = 0; i < cpuOutputs_.size(); i++) {
if (cpuOutputs_[i]->isSparseArg()) { if (cpuOutputs_[i]->isSparseArg()) {
LOG(INFO) << "output sparse matrix already init"; continue; /// sparse matrix already init
continue;
} }
initArg(*cpuOutputs_[i]); initArg(*cpuOutputs_[i]);
...@@ -218,10 +245,11 @@ protected: ...@@ -218,10 +245,11 @@ protected:
void compareOutputs() { void compareOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) { for (size_t i = 0; i < cpuOutputs_.size(); i++) {
// TODO, Need a BufferCheck used to compare the two buffers. // TODO, Need a BufferCheck used to compare the two buffers.
auto cpu = cpuOutputs_[i]; const auto cpu = cpuOutputs_[i];
auto gpu = gpuOutputs_[i]; const auto gpu = gpuOutputs_[i];
CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data()); CHECK_EQ(cpu->numElements(), gpu->numElements());
GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data()); CpuVector cpuVector(cpu->numElements(), (real*)cpu->data());
GpuVector gpuVector(gpu->numElements(), (real*)gpu->data());
autotest::TensorCheckErr(cpuVector, gpuVector); autotest::TensorCheckErr(cpuVector, gpuVector);
} }
} }
......
...@@ -319,6 +319,13 @@ public: ...@@ -319,6 +319,13 @@ public:
auto outSparseMat = outputs[0].sparse().SparseMatrix<Device>(); auto outSparseMat = outputs[0].sparse().SparseMatrix<Device>();
if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg() && if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg() &&
outputs[0].isSparseArg()) { outputs[0].isSparseArg()) {
/*
LOG(INFO) << "input0";
inputs[0].matrix<Device>().print(std::cout);
LOG(INFO) << "input1";
inputs[1].matrix<Device>().print(std::cout);
LOG(INFO) << "output sparse matrix";
outSparseMat.print(std::cout); */
MulOp<Device>(outSparseMat, MulOp<Device>(outSparseMat,
inputs[0].matrix<Device>(), inputs[0].matrix<Device>(),
inputs[1].matrix<Device>(), inputs[1].matrix<Device>(),
......
...@@ -183,75 +183,35 @@ TEST(MulOp, DDSparseMul) { ...@@ -183,75 +183,35 @@ TEST(MulOp, DDSparseMul) {
* C += A * B, A sparse, B, C dense * C += A * B, A sparse, B, C dense
* sparse = dense * dense * sparse = dense * dense
*/ */
void testSparseDDMatrix( void testFuncSparseDDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real alpha = 1.0; real alpha = 1.0;
real beta = 1.0; real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU"); // init Test object
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); FunctionCompare test("MulOp",
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU"); FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta)); // prepare input arguments
/// matrix A : M * K
auto cpuMatrixA = Matrix::create(dimM, dimK, false, false); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}));
auto gpuMatrixA = Matrix::create(dimM, dimK, false, true);
auto cpuDenseA = Matrix::create(dimM, dimK, false, false);
auto cpuMatrixB = Matrix::create(dimK, dimN, false, false);
auto gpuMatrixB = Matrix::create(dimK, dimN, false, true);
auto cpuDenseB = Matrix::create(dimK, dimN, false, false);
CpuSparseMatrix cpuMatrixC(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
CpuSparseMatrix gpuMatrixC_d2h(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
GpuSparseMatrix gpuMatrixC(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
CpuMatrix cpuDenseC(dimM, dimN, false);
/*matrix init*/
hl_stream_t stream(HPPL_STREAM_1);
cpuMatrixA->randomizeUniform();
cpuMatrixB->randomizeUniform();
cpuMatrixC.randomizeUniform();
gpuMatrixA->copyFrom(*cpuMatrixA, stream);
gpuMatrixB->copyFrom(*cpuMatrixB, stream);
gpuMatrixC.copyFrom(cpuMatrixC, stream);
cpuDenseA->copyFrom(*cpuMatrixA);
cpuDenseB->copyFrom(*cpuMatrixB);
cpuDenseC.copyFrom(cpuMatrixC);
hl_stream_synchronize(stream);
/*matrix mul*/
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(*cpuMatrixA);
cpuInputs.addArg(*cpuMatrixB);
cpuOutputs.addArg(cpuMatrixC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(*gpuMatrixA);
gpuInputs.addArg(*gpuMatrixB);
gpuOutputs.addArg(gpuMatrixC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
BufferArgs denseInputs;
BufferArgs denseOutputs;
denseInputs.addArg(*cpuDenseA);
denseInputs.addArg(*cpuDenseB);
denseOutputs.addArg(cpuDenseC, ADD_TO);
cpuFunc->calc(denseInputs, denseOutputs);
gpuMatrixC_d2h.copyFrom(gpuMatrixC, stream); /// matrix B: K * N
hl_stream_synchronize(stream); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimK, dimN}));
/*check result*/ /// output sparse matrix C: M * N
checkSMatrixEqual(cpuMatrixC, gpuMatrixC_d2h); test.addOutputs(SparseMatrixArg(VALUE_TYPE_FLOAT,
checkSMatrixEqual2Dense(cpuMatrixC, cpuDenseC); TensorShape{dimM, dimN},
nnz,
FORMAT,
FLOAT_VALUE,
UNSPECIFIED,
false),
ADD_TO);
// run Function
test.run();
} }
TEST(Matrix, SparseDDMul) { TEST(MulOp, SparseDDMul) {
LOG(INFO) << "test for sparse = dense * dense matrix"; LOG(INFO) << "function test for sparse = dense * dense matrix";
for (const auto dimM : {10, 100, 1000}) { for (const auto dimM : {10, 100, 1000}) {
for (const auto dimN : {10, 100}) { for (const auto dimN : {10, 100}) {
for (const auto dimK : {3, 10}) { for (const auto dimK : {3, 10}) {
...@@ -263,7 +223,7 @@ TEST(Matrix, SparseDDMul) { ...@@ -263,7 +223,7 @@ TEST(Matrix, SparseDDMul) {
<< " dimK=" << std::setw(5) << dimK << " dimK=" << std::setw(5) << dimK
<< " nnz=" << std::setw(5) << nnz << " nnz=" << std::setw(5) << nnz
<< " format=" << std::setw(5) << FORMAT; << " format=" << std::setw(5) << FORMAT;
testSparseDDMatrix(dimM, dimN, dimK, nnz, FORMAT); testFuncSparseDDMatrix(dimM, dimN, dimK, nnz, FORMAT);
} }
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册