提交 bc5d7bb6 编写于 作者: X xutianbing

Add Sparse = dense * dense unit test with Daoyuan's Function test

上级 077f936a
......@@ -172,6 +172,7 @@ public:
bool isTransposed() const { return trans_; }
bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; }
bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
virtual size_t numElements() const { return shape_.getElements(); }
const SequenceArg& sequence() const;
const SparseMatrixArg& sparse() const;
......@@ -353,6 +354,8 @@ public:
size_t nnz() const { return nnz_; }
size_t numElements() const override { return nnz_; }
SparseFormat dataFormat() const { return format_; }
SparseValueType dataType() const { return type_; }
......
......@@ -101,6 +101,34 @@ public:
output.isTransposed()));
}
/// add and init output sparse matrix
void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) {
cpuSparse_ = std::make_shared<CpuSparseMatrix>(output.shape()[0],
output.shape()[1],
output.nnz(),
output.dataType(),
output.dataFormat(),
output.isTransposed());
gpuSparse_ = std::make_shared<GpuSparseMatrix>(output.shape()[0],
output.shape()[1],
output.nnz(),
output.dataType(),
output.dataFormat(),
output.isTransposed());
/// init sparse matrix
hl_stream_t stream(HPPL_STREAM_1);
cpuSparse_->randomizeUniform();
gpuSparse_->copyFrom(*cpuSparse_, stream);
hl_stream_synchronize(stream);
cpuOutputs_.emplace_back(
std::make_shared<SparseMatrixArg>(*cpuSparse_, argType));
gpuOutputs_.emplace_back(
std::make_shared<SparseMatrixArg>(*gpuSparse_, argType));
}
void addInputs(const SequenceArg& input) {
size_t batchSize = input.shape()[0];
size_t numSeqs = batchSize / 10 + 1;
......@@ -199,8 +227,7 @@ protected:
void initOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) {
if (cpuOutputs_[i]->isSparseArg()) {
LOG(INFO) << "output sparse matrix already init";
continue;
continue; /// sparse matrix already init
}
initArg(*cpuOutputs_[i]);
......@@ -218,10 +245,11 @@ protected:
void compareOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) {
// TODO, Need a BufferCheck used to compare the two buffers.
auto cpu = cpuOutputs_[i];
auto gpu = gpuOutputs_[i];
CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data());
GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data());
const auto cpu = cpuOutputs_[i];
const auto gpu = gpuOutputs_[i];
CHECK_EQ(cpu->numElements(), gpu->numElements());
CpuVector cpuVector(cpu->numElements(), (real*)cpu->data());
GpuVector gpuVector(gpu->numElements(), (real*)gpu->data());
autotest::TensorCheckErr(cpuVector, gpuVector);
}
}
......
......@@ -319,6 +319,13 @@ public:
auto outSparseMat = outputs[0].sparse().SparseMatrix<Device>();
if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg() &&
outputs[0].isSparseArg()) {
/*
LOG(INFO) << "input0";
inputs[0].matrix<Device>().print(std::cout);
LOG(INFO) << "input1";
inputs[1].matrix<Device>().print(std::cout);
LOG(INFO) << "output sparse matrix";
outSparseMat.print(std::cout); */
MulOp<Device>(outSparseMat,
inputs[0].matrix<Device>(),
inputs[1].matrix<Device>(),
......
......@@ -183,75 +183,35 @@ TEST(MulOp, DDSparseMul) {
* C += A * B, A sparse, B, C dense
* sparse = dense * dense
*/
void testSparseDDMatrix(
void testFuncSparseDDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real alpha = 1.0;
real beta = 1.0;
const auto cpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-CPU");
cpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
const auto gpuFunc = FunctionBase::funcRegistrar_.createByType("MulOp-GPU");
gpuFunc->init(FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
auto cpuMatrixA = Matrix::create(dimM, dimK, false, false);
auto gpuMatrixA = Matrix::create(dimM, dimK, false, true);
auto cpuDenseA = Matrix::create(dimM, dimK, false, false);
auto cpuMatrixB = Matrix::create(dimK, dimN, false, false);
auto gpuMatrixB = Matrix::create(dimK, dimN, false, true);
auto cpuDenseB = Matrix::create(dimK, dimN, false, false);
CpuSparseMatrix cpuMatrixC(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
CpuSparseMatrix gpuMatrixC_d2h(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
GpuSparseMatrix gpuMatrixC(dimM, dimN, nnz, FLOAT_VALUE, FORMAT, false);
CpuMatrix cpuDenseC(dimM, dimN, false);
/*matrix init*/
hl_stream_t stream(HPPL_STREAM_1);
cpuMatrixA->randomizeUniform();
cpuMatrixB->randomizeUniform();
cpuMatrixC.randomizeUniform();
gpuMatrixA->copyFrom(*cpuMatrixA, stream);
gpuMatrixB->copyFrom(*cpuMatrixB, stream);
gpuMatrixC.copyFrom(cpuMatrixC, stream);
cpuDenseA->copyFrom(*cpuMatrixA);
cpuDenseB->copyFrom(*cpuMatrixB);
cpuDenseC.copyFrom(cpuMatrixC);
hl_stream_synchronize(stream);
/*matrix mul*/
BufferArgs cpuInputs;
BufferArgs cpuOutputs;
cpuInputs.addArg(*cpuMatrixA);
cpuInputs.addArg(*cpuMatrixB);
cpuOutputs.addArg(cpuMatrixC, ADD_TO);
cpuFunc->calc(cpuInputs, cpuOutputs);
BufferArgs gpuInputs;
BufferArgs gpuOutputs;
gpuInputs.addArg(*gpuMatrixA);
gpuInputs.addArg(*gpuMatrixB);
gpuOutputs.addArg(gpuMatrixC, ADD_TO);
gpuFunc->calc(gpuInputs, gpuOutputs);
BufferArgs denseInputs;
BufferArgs denseOutputs;
denseInputs.addArg(*cpuDenseA);
denseInputs.addArg(*cpuDenseB);
denseOutputs.addArg(cpuDenseC, ADD_TO);
cpuFunc->calc(denseInputs, denseOutputs);
// init Test object
FunctionCompare test("MulOp",
FuncConfig().set("scaleAB", alpha).set("scaleT", beta));
// prepare input arguments
/// matrix A : M * K
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}));
gpuMatrixC_d2h.copyFrom(gpuMatrixC, stream);
hl_stream_synchronize(stream);
/// matrix B: K * N
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimK, dimN}));
/*check result*/
checkSMatrixEqual(cpuMatrixC, gpuMatrixC_d2h);
checkSMatrixEqual2Dense(cpuMatrixC, cpuDenseC);
/// output sparse matrix C: M * N
test.addOutputs(SparseMatrixArg(VALUE_TYPE_FLOAT,
TensorShape{dimM, dimN},
nnz,
FORMAT,
FLOAT_VALUE,
UNSPECIFIED,
false),
ADD_TO);
// run Function
test.run();
}
TEST(Matrix, SparseDDMul) {
LOG(INFO) << "test for sparse = dense * dense matrix";
TEST(MulOp, SparseDDMul) {
LOG(INFO) << "function test for sparse = dense * dense matrix";
for (const auto dimM : {10, 100, 1000}) {
for (const auto dimN : {10, 100}) {
for (const auto dimK : {3, 10}) {
......@@ -263,7 +223,7 @@ TEST(Matrix, SparseDDMul) {
<< " dimK=" << std::setw(5) << dimK
<< " nnz=" << std::setw(5) << nnz
<< " format=" << std::setw(5) << FORMAT;
testSparseDDMatrix(dimM, dimN, dimK, nnz, FORMAT);
testFuncSparseDDMatrix(dimM, dimN, dimK, nnz, FORMAT);
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册