Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
077f936a
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
077f936a
编写于
1月 21, 2017
作者:
X
xutianbing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support SparseMatrixArg unit test using Daoyuan's new Function Test.
上级
316bf75a
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
203 addition
and
197 deletion
+203
-197
paddle/function/BufferArg.cpp
paddle/function/BufferArg.cpp
+0
-2
paddle/function/BufferArg.h
paddle/function/BufferArg.h
+43
-6
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+81
-17
paddle/function/MulOp.h
paddle/function/MulOp.h
+2
-0
paddle/function/MulOpTest.cpp
paddle/function/MulOpTest.cpp
+77
-172
未找到文件。
paddle/function/BufferArg.cpp
浏览文件 @
077f936a
...
...
@@ -33,7 +33,6 @@ SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
:
BufferArg
(
sparse
,
argType
),
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
),
/// todo(tianbing), make sure how to get NNZ
nnz_
(
sparse
.
getElementCnt
()),
format_
(
sparse
.
getFormat
()),
type_
(
sparse
.
getValueType
())
{
...
...
@@ -44,7 +43,6 @@ SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
:
BufferArg
(
sparse
,
argType
),
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
),
/// todo(tianbing), make sure how to get NNZ
nnz_
(
sparse
.
getElementCnt
()),
format_
(
sparse
.
getFormat
()),
type_
(
sparse
.
getValueType
())
{
...
...
paddle/function/BufferArg.h
浏览文件 @
077f936a
...
...
@@ -71,17 +71,24 @@ public:
public:
BufferArg
(
ValueType
valueType
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
ArgType
argType
=
UNSPECIFIED
,
bool
trans
=
false
)
:
buf_
(
nullptr
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{}
argType_
(
argType
),
trans_
(
trans
)
{}
BufferArg
(
void
*
buf
,
ValueType
valueType
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
buf_
(
buf
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{}
ArgType
argType
=
UNSPECIFIED
,
bool
trans
=
false
)
:
buf_
(
buf
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
),
trans_
(
trans
)
{}
BufferArg
(
void
*
buf
,
ValueType
valueType
)
:
buf_
(
buf
),
valueType_
(
valueType
)
{}
...
...
@@ -162,6 +169,7 @@ public:
ValueType
valueType
()
const
{
return
valueType_
;
}
BufferType
bufferType
()
const
{
return
bufferType_
;
}
const
TensorShape
&
shape
()
const
{
return
shape_
;
}
bool
isTransposed
()
const
{
return
trans_
;
}
bool
isSparseArg
()
const
{
return
TENSOR_SPARSE
==
bufferType_
;
}
bool
isSequenceArg
()
const
{
return
TENSOR_SEQUENCE_DATA
==
bufferType_
;
}
...
...
@@ -175,6 +183,7 @@ protected:
BufferType
bufferType_
{
TENSOR_UNKNOWN
};
ArgType
argType_
{
UNSPECIFIED
};
bool
trans_
{
false
};
// todo(tianbing), add deviceType_
// leading dimensions. The size is dims_.size()
// Dims lds_;
};
...
...
@@ -267,8 +276,9 @@ public:
size_t
nnz
,
SparseFormat
format
,
SparseValueType
type
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
buf
,
valueType
,
shape
,
argType
),
ArgType
argType
=
UNSPECIFIED
,
bool
trans
=
false
)
:
BufferArg
(
buf
,
valueType
,
shape
,
argType
,
trans
),
row_
(
row
),
col_
(
col
),
nnz_
(
nnz
),
...
...
@@ -286,6 +296,33 @@ public:
}
}
SparseMatrixArg
(
ValueType
valueType
,
const
TensorShape
&
shape
,
size_t
nnz
,
SparseFormat
format
,
SparseValueType
type
,
ArgType
argType
=
UNSPECIFIED
,
bool
trans
=
false
)
:
BufferArg
(
valueType
,
shape
,
argType
,
trans
),
/// len of row_ : height + 1 (CSR), buf_ == nullptr
row_
(
format
==
SPARSE_CSR
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape
[
0
]
+
1
})
:
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
nnz
})),
/// len of col_ : width + 1 (CSC), buf_ == nullptr
col_
(
format
==
SPARSE_CSR
?
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
nnz
})
:
BufferArg
(
VALUE_TYPE_INT32
,
TensorShape
{
shape
[
1
]
+
1
})),
nnz_
(
nnz
),
format_
(
format
),
type_
(
type
)
{
bufferType_
=
TENSOR_SPARSE
;
/// todo(tianbing)
/// valueType and shape_.ndims() == 2 need to check before
/// this constructor to make sure row_ and col_ are right
CHECK
((
valueType
==
VALUE_TYPE_FLOAT
)
||
(
valueType
==
VALUE_TYPE_DOUBLE
));
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
2
);
}
SparseMatrixArg
(
const
CpuSparseMatrix
&
sparse
,
ArgType
argType
=
UNSPECIFIED
);
SparseMatrixArg
(
const
GpuSparseMatrix
&
sparse
,
ArgType
argType
=
UNSPECIFIED
);
...
...
paddle/function/FunctionTest.h
浏览文件 @
077f936a
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "Function.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/math/Vector.h"
#include "paddle/math/tests/TensorCheck.h"
#include "paddle/testing/TestUtil.h"
...
...
@@ -62,29 +64,41 @@ public:
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuInputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
gpuInputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
cpuInputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
(),
UNSPECIFIED
,
input
.
isTransposed
()));
gpuInputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
(),
UNSPECIFIED
,
input
.
isTransposed
()));
}
// output need only contains shape, do not contains data.
void
addOutputs
(
const
BufferArg
&
output
)
{
void
addOutputs
(
const
BufferArg
&
output
,
ArgType
argType
=
ASSIGN_TO
)
{
size_t
size
=
output
.
shape
().
getElements
()
*
sizeOfValuType
(
output
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
ASSIGN_TO
));
gpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
ASSIGN_TO
));
cpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
// todo(tianbing), argType = output.getArgType(), but default ASSIGN_TO
argType
,
output
.
isTransposed
()));
gpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
// todo(tianbing), argType = output.getArgType(), but default ASSIGN_TO
argType
,
output
.
isTransposed
()));
}
void
addInputs
(
const
SequenceArg
&
input
)
{
...
...
@@ -107,10 +121,36 @@ public:
// TODO: need be implemented.
}
void
addInputs
(
const
SparseMatrixArg
&
input
)
{
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
input
.
dataType
(),
input
.
dataFormat
(),
input
.
isTransposed
());
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
input
.
dataType
(),
input
.
dataFormat
(),
input
.
isTransposed
());
/// init sparse matrix
hl_stream_t
stream
(
HPPL_STREAM_1
);
cpuSparse_
->
randomizeUniform
();
gpuSparse_
->
copyFrom
(
*
cpuSparse_
,
stream
);
hl_stream_synchronize
(
stream
);
cpuInputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
cpuSparse_
));
gpuInputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
gpuSparse_
));
}
void
run
()
{
// prepare cpu/gpu arguments
initInputs
();
initOutputs
();
// function calculate
auto
callFunction
=
[](
FunctionBase
*
function
,
std
::
vector
<
BufferArgPtr
>&
inputs
,
...
...
@@ -129,7 +169,7 @@ public:
callFunction
(
cpuFunc_
.
get
(),
cpuInputs_
,
cpuOutputs_
);
callFunction
(
gpuFunc_
.
get
(),
gpuInputs_
,
gpuOutputs_
);
// check outputs
and inouts
// check outputs
compareOutputs
();
}
...
...
@@ -140,6 +180,10 @@ public:
protected:
void
initInputs
()
{
for
(
size_t
i
=
0
;
i
<
cpuInputs_
.
size
();
i
++
)
{
if
(
cpuInputs_
[
i
]
->
isSparseArg
())
{
continue
;
/// sparse matrix already init
}
initArg
(
*
cpuInputs_
[
i
]);
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
...
...
@@ -152,6 +196,25 @@ protected:
}
}
void
initOutputs
()
{
for
(
size_t
i
=
0
;
i
<
cpuOutputs_
.
size
();
i
++
)
{
if
(
cpuOutputs_
[
i
]
->
isSparseArg
())
{
LOG
(
INFO
)
<<
"output sparse matrix already init"
;
continue
;
}
initArg
(
*
cpuOutputs_
[
i
]);
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector
cpuVector
(
cpuOutputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
cpuOutputs_
[
i
]
->
data
());
GpuVector
gpuVector
(
gpuOutputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
gpuOutputs_
[
i
]
->
data
());
gpuVector
.
copyFrom
(
cpuVector
);
}
}
void
compareOutputs
()
{
for
(
size_t
i
=
0
;
i
<
cpuOutputs_
.
size
();
i
++
)
{
// TODO, Need a BufferCheck used to compare the two buffers.
...
...
@@ -159,7 +222,6 @@ protected:
auto
gpu
=
gpuOutputs_
[
i
];
CpuVector
cpuVector
(
cpu
->
shape
().
getElements
(),
(
real
*
)
cpu
->
data
());
GpuVector
gpuVector
(
cpu
->
shape
().
getElements
(),
(
real
*
)
gpu
->
data
());
autotest
::
TensorCheckErr
(
cpuVector
,
gpuVector
);
}
}
...
...
@@ -195,6 +257,8 @@ protected:
std
::
vector
<
BufferArgPtr
>
cpuOutputs_
;
std
::
vector
<
BufferArgPtr
>
gpuInputs_
;
std
::
vector
<
BufferArgPtr
>
gpuOutputs_
;
std
::
shared_ptr
<
CpuSparseMatrix
>
cpuSparse_
;
std
::
shared_ptr
<
GpuSparseMatrix
>
gpuSparse_
;
};
}
// namespace paddle
paddle/function/MulOp.h
浏览文件 @
077f936a
...
...
@@ -15,6 +15,8 @@ limitations under the License. */
#pragma once
#include "Function.h"
/// todo(tianbing), delete it
#include <iostream>
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
...
...
paddle/function/MulOpTest.cpp
浏览文件 @
077f936a
...
...
@@ -24,58 +24,39 @@ limitations under the License. */
using
namespace
paddle
;
// NOLINT
/**
* C
= alpha * C + beta * (A * B)
, A, B, C dense matrix
* C
+= A * B
, A, B, C dense matrix
* dense = dense * dense
*/
void
testDDDMatrix
(
bool
transa
,
bool
transb
,
int
dimM
,
int
dimN
,
int
dimK
)
{
real
alpha
=
1.5
;
real
beta
=
2.0
;
const
auto
cpuFunc
=
FunctionBase
::
funcRegistrar_
.
createByType
(
"MulOp-CPU"
);
cpuFunc
->
init
(
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
const
auto
gpuFunc
=
FunctionBase
::
funcRegistrar_
.
createByType
(
"MulOp-GPU"
);
gpuFunc
->
init
(
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
int
heightA
=
(
transa
==
false
)
?
dimM
:
dimK
;
int
widthA
=
(
transa
==
false
)
?
dimK
:
dimM
;
int
heightB
=
(
transb
==
false
)
?
dimK
:
dimN
;
int
widthB
=
(
transb
==
false
)
?
dimN
:
dimK
;
int
heightC
=
dimM
;
int
widthC
=
dimN
;
auto
cpuA
=
std
::
make_shared
<
CpuMatrix
>
(
heightA
,
widthA
,
transa
);
auto
cpuB
=
std
::
make_shared
<
CpuMatrix
>
(
heightB
,
widthB
,
transb
);
auto
cpuC
=
std
::
make_shared
<
CpuMatrix
>
(
heightC
,
widthC
);
auto
gpuA
=
std
::
make_shared
<
GpuMatrix
>
(
heightA
,
widthA
,
transa
);
auto
gpuB
=
std
::
make_shared
<
GpuMatrix
>
(
heightB
,
widthB
,
transb
);
auto
gpuC
=
std
::
make_shared
<
GpuMatrix
>
(
heightC
,
widthC
);
cpuA
->
randomizeUniform
();
cpuB
->
randomizeUniform
();
cpuC
->
randomizeUniform
();
gpuA
->
copyFrom
(
*
cpuA
);
gpuB
->
copyFrom
(
*
cpuB
);
gpuC
->
copyFrom
(
*
cpuC
);
BufferArgs
cpuInputs
;
BufferArgs
cpuOutputs
;
cpuInputs
.
addArg
(
*
cpuA
);
cpuInputs
.
addArg
(
*
cpuB
);
cpuOutputs
.
addArg
(
*
cpuC
,
ADD_TO
);
cpuFunc
->
calc
(
cpuInputs
,
cpuOutputs
);
BufferArgs
gpuInputs
;
BufferArgs
gpuOutputs
;
gpuInputs
.
addArg
(
*
gpuA
);
gpuInputs
.
addArg
(
*
gpuB
);
gpuOutputs
.
addArg
(
*
gpuC
,
ADD_TO
);
gpuFunc
->
calc
(
gpuInputs
,
gpuOutputs
);
autotest
::
TensorCheckErr
(
*
cpuC
,
*
gpuC
);
void
testFuncDDDMatrix
(
bool
transa
,
bool
transb
,
size_t
dimM
,
size_t
dimN
,
size_t
dimK
)
{
real
alpha
=
1.0
;
real
beta
=
1.0
;
size_t
heightA
=
(
transa
==
false
)
?
dimM
:
dimK
;
size_t
widthA
=
(
transa
==
false
)
?
dimK
:
dimM
;
size_t
heightB
=
(
transb
==
false
)
?
dimK
:
dimN
;
size_t
widthB
=
(
transb
==
false
)
?
dimN
:
dimK
;
size_t
heightC
=
dimM
;
size_t
widthC
=
dimN
;
// init Test object
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
// prepare input arguments
/// matrix A : HA * WA
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
heightA
,
widthA
},
UNSPECIFIED
,
transa
));
/// matrix B: HB * WB
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
heightB
,
widthB
},
UNSPECIFIED
,
transb
));
/// output matrix C: HC * WC
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
heightC
,
widthC
}),
ADD_TO
);
// run Function
test
.
run
();
}
TEST
(
M
atrix
,
DDD
Mul
)
{
LOG
(
INFO
)
<<
"test for dense = dense * dense matrix"
;
TEST
(
M
ulOp
,
DDDMatrix
Mul
)
{
LOG
(
INFO
)
<<
"
function
test for dense = dense * dense matrix"
;
for
(
const
auto
transa
:
{
false
,
true
})
{
for
(
const
auto
transb
:
{
false
,
true
})
{
for
(
const
auto
dimM
:
{
1
,
10
,
100
})
{
...
...
@@ -89,7 +70,7 @@ TEST(Matrix, DDDMul) {
<<
" dimM="
<<
std
::
setw
(
5
)
<<
dimM
<<
" dimN="
<<
std
::
setw
(
5
)
<<
dimN
<<
" dimK="
<<
std
::
setw
(
5
)
<<
dimK
;
testDDDMatrix
(
transa
,
transb
,
dimM
,
dimN
,
dimK
);
test
Func
DDDMatrix
(
transa
,
transb
,
dimM
,
dimN
,
dimK
);
}
}
}
...
...
@@ -101,71 +82,33 @@ TEST(Matrix, DDDMul) {
* C += A * B, B, C dense, A sparse
* dense = sparse * dense
*/
void
testDSparseDMatrix
(
void
test
Func
DSparseDMatrix
(
size_t
dimM
,
size_t
dimN
,
size_t
dimK
,
size_t
nnz
,
SparseFormat
FORMAT
)
{
real
alpha
=
1.0
;
real
beta
=
1.0
;
const
auto
cpuFunc
=
FunctionBase
::
funcRegistrar_
.
createByType
(
"MulOp-CPU"
);
cpuFunc
->
init
(
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
const
auto
gpuFunc
=
FunctionBase
::
funcRegistrar_
.
createByType
(
"MulOp-GPU"
);
gpuFunc
->
init
(
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
CpuSparseMatrix
cpuMatrixA
(
dimM
,
dimK
,
nnz
,
FLOAT_VALUE
,
FORMAT
,
false
);
GpuSparseMatrix
gpuMatrixA
(
dimM
,
dimK
,
nnz
,
FLOAT_VALUE
,
FORMAT
,
false
);
CpuMatrix
cpuDenseA
(
dimM
,
dimK
,
false
);
auto
cpuMatrixB
=
Matrix
::
create
(
dimK
,
dimN
,
false
,
false
);
auto
gpuMatrixB
=
Matrix
::
create
(
dimK
,
dimN
,
false
,
true
);
auto
cpuDenseB
=
Matrix
::
create
(
dimK
,
dimN
,
false
,
false
);
auto
cpuMatrixC
=
Matrix
::
create
(
dimM
,
dimN
,
false
,
false
);
auto
gpuMatrixC
=
Matrix
::
create
(
dimM
,
dimN
,
false
,
true
);
auto
cpuDenseC
=
Matrix
::
create
(
dimM
,
dimN
,
false
,
false
);
/*matrix init*/
hl_stream_t
stream
(
HPPL_STREAM_1
);
cpuMatrixA
.
randomizeUniform
();
cpuMatrixB
->
randomizeUniform
();
cpuMatrixC
->
randomizeUniform
();
gpuMatrixA
.
copyFrom
(
cpuMatrixA
,
stream
);
gpuMatrixB
->
copyFrom
(
*
cpuMatrixB
,
stream
);
gpuMatrixC
->
copyFrom
(
*
cpuMatrixC
,
stream
);
cpuDenseA
.
copyFrom
(
cpuMatrixA
);
cpuDenseB
->
copyFrom
(
*
cpuMatrixB
);
cpuDenseC
->
copyFrom
(
*
cpuMatrixC
);
hl_stream_synchronize
(
stream
);
/*matrix mul*/
BufferArgs
cpuInputs
;
BufferArgs
cpuOutputs
;
cpuInputs
.
addArg
(
cpuMatrixA
);
cpuInputs
.
addArg
(
*
cpuMatrixB
);
cpuOutputs
.
addArg
(
*
cpuMatrixC
,
ADD_TO
);
cpuFunc
->
calc
(
cpuInputs
,
cpuOutputs
);
BufferArgs
gpuInputs
;
BufferArgs
gpuOutputs
;
gpuInputs
.
addArg
(
gpuMatrixA
);
gpuInputs
.
addArg
(
*
gpuMatrixB
);
gpuOutputs
.
addArg
(
*
gpuMatrixC
,
ADD_TO
);
gpuFunc
->
calc
(
gpuInputs
,
gpuOutputs
);
BufferArgs
denseInputs
;
BufferArgs
denseOutputs
;
denseInputs
.
addArg
(
cpuDenseA
);
denseInputs
.
addArg
(
*
cpuDenseB
);
denseOutputs
.
addArg
(
*
cpuDenseC
,
ADD_TO
);
cpuFunc
->
calc
(
denseInputs
,
denseOutputs
);
/*check result*/
autotest
::
TensorCheckErr
(
*
cpuMatrixC
,
*
cpuDenseC
);
autotest
::
TensorCheckErr
(
*
cpuMatrixC
,
*
gpuMatrixC
);
// init Test object
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
// prepare input arguments
/// sparse matrix A : M * K
test
.
addInputs
(
SparseMatrixArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimK
},
nnz
,
FORMAT
,
FLOAT_VALUE
,
UNSPECIFIED
,
false
));
/// matrix B: K * N
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimK
,
dimN
}));
/// output matrix C: M * N
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimN
}),
ADD_TO
);
// run Function
test
.
run
();
}
TEST
(
M
atrix
,
DSparseDMul
)
{
LOG
(
INFO
)
<<
"test for dense = sparse * dense matrix"
;
TEST
(
M
uLOp
,
DSparseDMul
)
{
LOG
(
INFO
)
<<
"
function
test for dense = sparse * dense matrix"
;
for
(
const
auto
dimM
:
{
10
,
100
,
1000
})
{
for
(
const
auto
dimN
:
{
10
,
100
})
{
for
(
const
auto
dimK
:
{
3
,
10
})
{
...
...
@@ -177,7 +120,7 @@ TEST(Matrix, DSparseDMul) {
<<
" dimK="
<<
std
::
setw
(
5
)
<<
dimK
<<
" nnz="
<<
std
::
setw
(
5
)
<<
nnz
<<
" format="
<<
std
::
setw
(
5
)
<<
FORMAT
;
testDSparseDMatrix
(
dimM
,
dimN
,
dimK
,
nnz
,
FORMAT
);
test
Func
DSparseDMatrix
(
dimM
,
dimN
,
dimK
,
nnz
,
FORMAT
);
}
}
}
...
...
@@ -189,72 +132,34 @@ TEST(Matrix, DSparseDMul) {
* C += A * B, A, C dense, B sparse
* dense = dense * sparse
*/
void
testDDSparseMatrix
(
void
test
Func
DDSparseMatrix
(
size_t
dimM
,
size_t
dimN
,
size_t
dimK
,
size_t
nnz
,
SparseFormat
FORMAT
)
{
real
alpha
=
1.0
;
real
beta
=
1.0
;
const
auto
cpuFunc
=
FunctionBase
::
funcRegistrar_
.
createByType
(
"MulOp-CPU"
);
cpuFunc
->
init
(
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
const
auto
gpuFunc
=
FunctionBase
::
funcRegistrar_
.
createByType
(
"MulOp-GPU"
);
gpuFunc
->
init
(
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
auto
cpuMatrixA
=
Matrix
::
create
(
dimM
,
dimK
,
false
,
false
);
auto
gpuMatrixA
=
Matrix
::
create
(
dimM
,
dimK
,
false
,
true
);
auto
cpuDenseA
=
Matrix
::
create
(
dimM
,
dimK
,
false
,
false
);
CpuSparseMatrix
cpuMatrixB
(
dimK
,
dimN
,
nnz
,
FLOAT_VALUE
,
FORMAT
,
false
);
GpuSparseMatrix
gpuMatrixB
(
dimK
,
dimN
,
nnz
,
FLOAT_VALUE
,
FORMAT
,
false
);
auto
cpuDenseB
=
Matrix
::
create
(
dimK
,
dimN
,
false
,
false
);
auto
cpuMatrixC
=
Matrix
::
create
(
dimM
,
dimN
,
false
,
false
);
auto
gpuMatrixC
=
Matrix
::
create
(
dimM
,
dimN
,
false
,
true
);
auto
cpuDenseC
=
Matrix
::
create
(
dimM
,
dimN
,
false
,
false
);
/*matrix init*/
hl_stream_t
stream
(
HPPL_STREAM_1
);
cpuMatrixA
->
randomizeUniform
();
cpuMatrixB
.
randomizeUniform
();
cpuMatrixC
->
randomizeUniform
();
gpuMatrixA
->
copyFrom
(
*
cpuMatrixA
,
stream
);
gpuMatrixB
.
copyFrom
(
cpuMatrixB
,
stream
);
gpuMatrixC
->
copyFrom
(
*
cpuMatrixC
,
stream
);
cpuDenseA
->
copyFrom
(
*
cpuMatrixA
);
cpuDenseB
->
copyFrom
(
cpuMatrixB
);
cpuDenseC
->
copyFrom
(
*
cpuMatrixC
);
hl_stream_synchronize
(
stream
);
/*matrix mul*/
BufferArgs
cpuInputs
;
BufferArgs
cpuOutputs
;
cpuInputs
.
addArg
(
*
cpuMatrixA
);
cpuInputs
.
addArg
(
cpuMatrixB
);
cpuOutputs
.
addArg
(
*
cpuMatrixC
,
ADD_TO
);
cpuFunc
->
calc
(
cpuInputs
,
cpuOutputs
);
BufferArgs
gpuInputs
;
BufferArgs
gpuOutputs
;
gpuInputs
.
addArg
(
*
gpuMatrixA
);
gpuInputs
.
addArg
(
gpuMatrixB
);
gpuOutputs
.
addArg
(
*
gpuMatrixC
,
ADD_TO
);
gpuFunc
->
calc
(
gpuInputs
,
gpuOutputs
);
BufferArgs
denseInputs
;
BufferArgs
denseOutputs
;
denseInputs
.
addArg
(
*
cpuDenseA
);
denseInputs
.
addArg
(
*
cpuDenseB
);
denseOutputs
.
addArg
(
*
cpuDenseC
,
ADD_TO
);
cpuFunc
->
calc
(
denseInputs
,
denseOutputs
);
/*check result*/
autotest
::
TensorCheckErr
(
*
cpuMatrixC
,
*
cpuDenseC
);
autotest
::
TensorCheckErr
(
*
cpuMatrixC
,
*
gpuMatrixC
);
// init Test object
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"scaleAB"
,
alpha
).
set
(
"scaleT"
,
beta
));
// prepare input arguments
/// matrix A : M * K
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimK
}));
/// matrix B: K * N
test
.
addInputs
(
SparseMatrixArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimK
,
dimN
},
nnz
,
FORMAT
,
FLOAT_VALUE
,
UNSPECIFIED
,
false
));
/// output matrix C: M * N
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimN
}),
ADD_TO
);
// run Function
test
.
run
();
}
TEST
(
M
atrix
,
DDSparseMul
)
{
LOG
(
INFO
)
<<
"test for dense = dense * sparse matrix"
;
TEST
(
M
ulOp
,
DDSparseMul
)
{
LOG
(
INFO
)
<<
"
function
test for dense = dense * sparse matrix"
;
for
(
const
auto
dimM
:
{
10
,
100
,
1000
})
{
for
(
const
auto
dimN
:
{
10
,
100
})
{
for
(
const
auto
dimK
:
{
3
,
10
})
{
...
...
@@ -266,7 +171,7 @@ TEST(Matrix, DDSparseMul) {
<<
" dimK="
<<
std
::
setw
(
5
)
<<
dimK
<<
" nnz="
<<
std
::
setw
(
5
)
<<
nnz
<<
" format="
<<
std
::
setw
(
5
)
<<
FORMAT
;
testDDSparseMatrix
(
dimM
,
dimN
,
dimK
,
nnz
,
FORMAT
);
test
Func
DDSparseMatrix
(
dimM
,
dimN
,
dimK
,
nnz
,
FORMAT
);
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录