Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
1879332a
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 2 年 前同步成功
通知
708
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1879332a
编写于
5月 26, 2017
作者:
H
hedaoyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Modify FunctionCompare to Compare2Function to support comparison of two CPU functions.
上级
1846d9e1
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
187 addition
and
135 deletion
+187
-135
paddle/function/ContextProjectionOpTest.cpp
paddle/function/ContextProjectionOpTest.cpp
+2
-2
paddle/function/CosSimOpTest.cpp
paddle/function/CosSimOpTest.cpp
+2
-2
paddle/function/CrossMapNormalOpTest.cpp
paddle/function/CrossMapNormalOpTest.cpp
+10
-10
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+165
-113
paddle/function/MulOpTest.cpp
paddle/function/MulOpTest.cpp
+7
-7
paddle/function/PadOpTest.cpp
paddle/function/PadOpTest.cpp
+1
-1
未找到文件。
paddle/function/ContextProjectionOpTest.cpp
浏览文件 @
1879332a
...
...
@@ -28,7 +28,7 @@ void testMatrixProjectionForward(int context_start,
std
::
max
(
0
,
(
int
)(
context_start
+
context_length
-
1
));
if
(
pad
==
0
)
is_padding
=
false
;
Function
Compare
test
(
CpuGpuFunc
Compare
test
(
"ContextProjectionForward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
...
...
@@ -60,7 +60,7 @@ void testMatrixProjectionBackward(int context_start,
std
::
max
(
0
,
(
int
)(
context_start
+
context_length
-
1
));
if
(
pad
==
0
)
is_padding
=
false
;
Function
Compare
test
(
CpuGpuFunc
Compare
test
(
"ContextProjectionBackward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
...
...
paddle/function/CosSimOpTest.cpp
浏览文件 @
1879332a
...
...
@@ -22,7 +22,7 @@ void testCosSimForward(size_t height_x,
size_t
height_y
,
size_t
width
,
real
scale
)
{
Function
Compare
test
(
"CosSimForward"
,
FuncConfig
().
set
(
"scale"
,
scale
));
CpuGpuFunc
Compare
test
(
"CosSimForward"
,
FuncConfig
().
set
(
"scale"
,
scale
));
// prepare input arguments
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
height_x
,
width
}));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
height_y
,
width
}));
...
...
@@ -36,7 +36,7 @@ void testCosSimBackward(size_t height_x,
size_t
height_y
,
size_t
width
,
real
scale
)
{
Function
Compare
test
(
"CosSimBackward"
,
FuncConfig
().
set
(
"scale"
,
scale
));
CpuGpuFunc
Compare
test
(
"CosSimBackward"
,
FuncConfig
().
set
(
"scale"
,
scale
));
// prepare input arguments
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
height_x
,
1
}));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
height_x
,
1
}));
...
...
paddle/function/CrossMapNormalOpTest.cpp
浏览文件 @
1879332a
...
...
@@ -28,11 +28,11 @@ TEST(CrossMapNormal, real) {
<<
" size="
<<
size
;
// init Test object
Function
Compare
test
(
"CrossMapNormal"
,
FuncConfig
()
.
set
(
"size"
,
size
)
.
set
(
"scale"
,
(
real
)
1.5
)
.
set
(
"pow"
,
(
real
)
0.5
));
CpuGpuFunc
Compare
test
(
"CrossMapNormal"
,
FuncConfig
()
.
set
(
"size"
,
size
)
.
set
(
"scale"
,
(
real
)
1.5
)
.
set
(
"pow"
,
(
real
)
0.5
));
// prepare input arguments
TensorShape
shape
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
...
...
@@ -57,11 +57,11 @@ TEST(CrossMapNormalGrad, real) {
<<
" imgSizeH="
<<
imgSizeH
<<
" imgSizeW="
<<
imgSizeW
<<
" size="
<<
size
;
Function
Compare
test
(
"CrossMapNormalGrad"
,
FuncConfig
()
.
set
(
"size"
,
size
)
.
set
(
"scale"
,
(
real
)
1.5
)
.
set
(
"pow"
,
(
real
)
0.5
));
CpuGpuFunc
Compare
test
(
"CrossMapNormalGrad"
,
FuncConfig
()
.
set
(
"size"
,
size
)
.
set
(
"scale"
,
(
real
)
1.5
)
.
set
(
"pow"
,
(
real
)
0.5
));
TensorShape
shape
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
...
...
paddle/function/FunctionTest.h
浏览文件 @
1879332a
...
...
@@ -22,14 +22,62 @@ namespace paddle {
typedef
std
::
shared_ptr
<
BufferArg
>
BufferArgPtr
;
namespace
test
{
template
<
DeviceType
DType
>
struct
Allocator
;
template
<
>
struct
Allocator
<
DEVICE_TYPE_CPU
>
{
using
type
=
CpuMemoryHandle
;
};
template
<
>
struct
Allocator
<
DEVICE_TYPE_GPU
>
{
using
type
=
GpuMemoryHandle
;
};
// Copy argument1 to argument2
template
<
DeviceType
DType1
,
DeviceType
DType2
>
class
CopyArgument
{
public:
void
operator
()(
const
BufferArg
&
arg1
,
BufferArg
&
arg2
)
{
CHECK_EQ
(
arg1
.
valueType
(),
arg2
.
valueType
());
CHECK_LE
(
arg1
.
shape
().
getElements
(),
arg2
.
shape
().
getElements
());
if
(
arg1
.
valueType
()
==
VALUE_TYPE_INT32
)
{
IVectorPtr
vector1
=
IVector
::
create
((
int
*
)
arg1
.
data
(),
arg1
.
shape
().
getElements
(),
DType1
==
DEVICE_TYPE_CPU
?
false
:
true
);
IVectorPtr
vector2
=
IVector
::
create
((
int
*
)
arg2
.
data
(),
arg2
.
shape
().
getElements
(),
DType2
==
DEVICE_TYPE_CPU
?
false
:
true
);
vector2
->
copyFrom
(
*
vector1
);
}
else
{
VectorPtr
vector1
=
Vector
::
create
((
real
*
)
arg1
.
data
(),
arg1
.
shape
().
getElements
(),
DType1
==
DEVICE_TYPE_CPU
?
false
:
true
);
VectorPtr
vector2
=
Vector
::
create
((
real
*
)
arg2
.
data
(),
arg2
.
shape
().
getElements
(),
DType2
==
DEVICE_TYPE_CPU
?
false
:
true
);
vector2
->
copyFrom
(
*
vector1
);
}
}
};
}
// namespace test
/**
* \brief A class for comparing CPU and GPU implementations of Function.
*
* \brief A class for comparing two Functions of different implementations.
* For example, can be used to compare the CPU and GPU implementation
* of the function is consistent.
*
* Use case:
* // Initializes a test object, the corresponding cpu and gpu Function
* // are constructed according to FunctionName and FuncConfig.
*
Function
Compare test(FunctionName, FuncConfig);
*
CpuGpuFunc
Compare test(FunctionName, FuncConfig);
* // Prepare inputs and outputs arguments.
* // Here the input and output can not contain real data,
* // only contains the argument type and shape.
...
...
@@ -45,28 +93,38 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
* // Compares CPU and GPU calculation results for consistency.
* test.run();
*/
class
FunctionCompare
{
template
<
DeviceType
DType1
,
DeviceType
DType2
>
class
Compare2Function
{
public:
FunctionCompare
(
const
std
::
string
&
name
,
const
FuncConfig
&
config
)
:
cpuFunc_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-CPU"
)),
gpuFunc_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-GPU"
))
{
cpuFunc_
->
init
(
config
);
gpuFunc_
->
init
(
config
);
typedef
typename
test
::
Allocator
<
DType1
>::
type
Allocator1
;
typedef
typename
test
::
Allocator
<
DType2
>::
type
Allocator2
;
typedef
typename
Tensor
<
real
,
DType1
>::
Vector
Vector1
;
typedef
typename
Tensor
<
real
,
DType2
>::
Vector
Vector2
;
typedef
typename
Tensor
<
real
,
DType1
>::
SparseMatrix
SparseMatrix1
;
typedef
typename
Tensor
<
real
,
DType2
>::
SparseMatrix
SparseMatrix2
;
Compare2Function
(
const
std
::
string
&
name1
,
const
std
::
string
&
name2
,
const
FuncConfig
&
config
)
:
function1_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name1
)),
function2_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name2
))
{
function1_
->
init
(
config
);
function2_
->
init
(
config
);
}
~
FunctionCompare
()
{}
~
Compare2Function
()
{}
// input need only contains shape, do not contains data.
void
addInputs
(
const
BufferArg
&
input
)
{
size_t
size
=
input
.
shape
().
getElements
()
*
sizeOfValuType
(
input
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
func1Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator1
>
(
size
));
func2Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator2
>
(
size
));
cpu
Inputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpu
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
gpu
Inputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpu
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
func1
Inputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
func1
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
func2
Inputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
func2
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
}
// assume one copy of sequence is shared by different SequenceArgs
...
...
@@ -75,62 +133,57 @@ public:
size_t
batchSize
=
input
.
shape
()[
0
];
size_t
numSeqs
=
batchSize
/
10
+
1
;
size_t
sizeId
=
(
numSeqs
+
1
)
*
sizeOfValuType
(
VALUE_TYPE_INT32
);
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
sizeId
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
sizeId
));
cpuSeq_
=
std
::
make_shared
<
SequenceIdArg
>
(
cpu
Memory_
.
back
()
->
getBuf
(),
TensorShape
{
numSeqs
+
1
});
gpuSeq_
=
std
::
make_shared
<
SequenceIdArg
>
(
gpu
Memory_
.
back
()
->
getBuf
(),
TensorShape
{
numSeqs
+
1
});
func1Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator1
>
(
sizeId
));
func2Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator2
>
(
sizeId
));
seq1_
=
std
::
make_shared
<
SequenceIdArg
>
(
func1
Memory_
.
back
()
->
getBuf
(),
TensorShape
{
numSeqs
+
1
});
seq2_
=
std
::
make_shared
<
SequenceIdArg
>
(
func2
Memory_
.
back
()
->
getBuf
(),
TensorShape
{
numSeqs
+
1
});
/// init sequence Id
initArg
(
*
cpuSeq
_
,
batchSize
);
initArg
(
*
seq1
_
,
batchSize
);
// todo(tianbing), delete it
CHECK_EQ
(
cpuSeq_
->
shape
().
getElements
(),
cpuSeq_
->
numSeqs
()
+
1
);
CpuIVector
cpuSeq
(
cpuSeq_
->
shape
().
getElements
(),
(
int
*
)
cpuSeq_
->
data
());
GpuIVector
gpuSeq
(
gpuSeq_
->
shape
().
getElements
(),
(
int
*
)
gpuSeq_
->
data
());
gpuSeq
.
copyFrom
(
cpuSeq
);
copyArg_
(
*
seq1_
,
*
seq2_
);
}
void
addInputs
(
const
SequenceArg
&
input
)
{
CHECK_EQ
(
input
.
shape
().
ndims
(),
2UL
);
size_t
batchSize
=
input
.
shape
()[
0
];
if
(
!
cpuSeq_
||
!
gpuSeq
_
)
{
// sequence not exist
if
(
!
seq1_
||
!
seq2
_
)
{
// sequence not exist
addSequence
(
SequenceIdArg
(
TensorShape
{
batchSize
}));
}
size_t
size
=
input
.
shape
().
getElements
()
*
sizeOfValuType
(
input
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
func1Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator1
>
(
size
));
func2Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator2
>
(
size
));
/// SequenceArg
cpu
Inputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
cpu
Memory_
.
back
()
->
getBuf
(),
func1
Inputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
func1
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
(),
*
cpuSeq
_
));
gpu
Inputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
gpu
Memory_
.
back
()
->
getBuf
(),
*
seq1
_
));
func2
Inputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
func2
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
(),
*
gpuSeq
_
));
*
seq2
_
));
}
// output need only contains shape, do not contains data.
void
addOutputs
(
const
BufferArg
&
output
,
ArgType
argType
=
ASSIGN_TO
)
{
size_t
size
=
output
.
shape
().
getElements
()
*
sizeOfValuType
(
output
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
func1Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator1
>
(
size
));
func2Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator2
>
(
size
));
cpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpu
Memory_
.
back
()
->
getBuf
(),
func1
Outputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
func1
Memory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
argType
));
gpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpu
Memory_
.
back
()
->
getBuf
(),
func2
Outputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
func2
Memory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
argType
));
...
...
@@ -138,14 +191,14 @@ public:
/// add and init output sparse matrix
void
addOutputs
(
const
SparseMatrixArg
&
output
,
ArgType
argType
=
ASSIGN_TO
)
{
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
sparse1_
=
std
::
make_shared
<
SparseMatrix1
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
static_cast
<
SparseValueType
>
(
output
.
dataType
()),
static_cast
<
SparseFormat
>
(
output
.
dataFormat
()));
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
sparse2_
=
std
::
make_shared
<
SparseMatrix2
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
...
...
@@ -154,52 +207,52 @@ public:
/// init sparse matrix
hl_stream_t
stream
(
HPPL_STREAM_1
);
cpuSparse
_
->
randomizeUniform
();
gpuSparse_
->
copyFrom
(
*
cpuSparse
_
,
stream
);
sparse1
_
->
randomizeUniform
();
sparse2_
->
copyFrom
(
*
sparse1
_
,
stream
);
hl_stream_synchronize
(
stream
);
cpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
cpuSparse
_
,
argType
));
gpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
gpuSparse
_
,
argType
));
func1
Outputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
sparse1
_
,
argType
));
func2
Outputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
sparse2
_
,
argType
));
}
void
addOutputs
(
const
SequenceArg
&
output
,
ArgType
argType
=
ASSIGN_TO
)
{
CHECK_EQ
(
output
.
shape
().
ndims
(),
2UL
);
size_t
batchSize
=
output
.
shape
()[
0
];
if
(
!
cpuSeq_
||
!
gpuSeq
_
)
{
// sequence not exist
if
(
!
seq1_
||
!
seq2
_
)
{
// sequence not exist
addSequence
(
SequenceIdArg
(
TensorShape
{
batchSize
}));
}
size_t
size
=
output
.
shape
().
getElements
()
*
sizeOfValuType
(
output
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
func1Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator1
>
(
size
));
func2Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator2
>
(
size
));
/// SequenceArg
cpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
cpu
Memory_
.
back
()
->
getBuf
(),
func1
Outputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
func1
Memory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
*
cpuSeq
_
,
*
seq1
_
,
argType
));
gpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
gpu
Memory_
.
back
()
->
getBuf
(),
func2
Outputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
func2
Memory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
*
gpuSeq
_
,
*
seq2
_
,
argType
));
}
void
addInputs
(
const
SparseMatrixArg
&
input
)
{
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
sparse1_
=
std
::
make_shared
<
SparseMatrix1
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
static_cast
<
SparseValueType
>
(
input
.
dataType
()),
static_cast
<
SparseFormat
>
(
input
.
dataFormat
()));
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
sparse2_
=
std
::
make_shared
<
SparseMatrix2
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
...
...
@@ -208,12 +261,12 @@ public:
/// init sparse matrix
hl_stream_t
stream
(
HPPL_STREAM_1
);
cpuSparse
_
->
randomizeUniform
();
gpuSparse_
->
copyFrom
(
*
cpuSparse
_
,
stream
);
sparse1
_
->
randomizeUniform
();
sparse2_
->
copyFrom
(
*
sparse1
_
,
stream
);
hl_stream_synchronize
(
stream
);
cpuInputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
cpuSparse
_
));
gpuInputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
gpuSparse
_
));
func1Inputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
sparse1
_
));
func2Inputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
sparse2
_
));
}
void
run
()
{
...
...
@@ -236,27 +289,27 @@ public:
function
->
calc
(
inArgs
,
outArgs
);
};
callFunction
(
cpuFunc_
.
get
(),
cpuInputs_
,
cpu
Outputs_
);
callFunction
(
gpuFunc_
.
get
(),
gpuInputs_
,
gpu
Outputs_
);
callFunction
(
function1_
.
get
(),
func1Inputs_
,
func1
Outputs_
);
callFunction
(
function2_
.
get
(),
func2Inputs_
,
func2
Outputs_
);
// check outputs
compareOutputs
();
}
std
::
shared_ptr
<
FunctionBase
>
getCpuFunction
()
const
{
return
cpuFunc
_
;
}
std
::
shared_ptr
<
FunctionBase
>
getCpuFunction
()
const
{
return
function1
_
;
}
std
::
shared_ptr
<
FunctionBase
>
getGpuFunction
()
const
{
return
gpuFunc
_
;
}
std
::
shared_ptr
<
FunctionBase
>
getGpuFunction
()
const
{
return
function2
_
;
}
protected:
// only init cpu argument, gpu argument copy from cpu argument.
void
initArg
(
BufferArg
&
arg
)
{
CpuVector
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
Vector1
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
vector
.
uniform
(
0.001
,
1
);
}
void
initArg
(
SequenceArg
&
arg
)
{
/// init only matrix
CpuVector
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
Vector1
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
vector
.
uniform
(
0.001
,
1
);
}
...
...
@@ -276,73 +329,72 @@ protected:
}
void
initInputs
()
{
for
(
size_t
i
=
0
;
i
<
cpu
Inputs_
.
size
();
i
++
)
{
if
(
cpu
Inputs_
[
i
]
->
isSparseArg
())
{
for
(
size_t
i
=
0
;
i
<
func1
Inputs_
.
size
();
i
++
)
{
if
(
func1
Inputs_
[
i
]
->
isSparseArg
())
{
continue
;
/// sparse matrix already init
}
if
(
cpu
Inputs_
[
i
]
->
isSequenceArg
())
{
initArg
(
dynamic_cast
<
SequenceArg
&>
(
*
cpu
Inputs_
[
i
]));
if
(
func1
Inputs_
[
i
]
->
isSequenceArg
())
{
initArg
(
dynamic_cast
<
SequenceArg
&>
(
*
func1
Inputs_
[
i
]));
}
else
{
initArg
(
*
cpu
Inputs_
[
i
]);
initArg
(
*
func1
Inputs_
[
i
]);
}
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector
cpuVector
(
cpuInputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
cpuInputs_
[
i
]
->
data
());
GpuVector
gpuVector
(
gpuInputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
gpuInputs_
[
i
]
->
data
());
gpuVector
.
copyFrom
(
cpuVector
);
copyArg_
(
*
func1Inputs_
[
i
],
*
func2Inputs_
[
i
]
);
}
}
void
initOutputs
()
{
for
(
size_t
i
=
0
;
i
<
cpu
Outputs_
.
size
();
i
++
)
{
if
(
cpu
Outputs_
[
i
]
->
isSparseArg
())
{
for
(
size_t
i
=
0
;
i
<
func1
Outputs_
.
size
();
i
++
)
{
if
(
func1
Outputs_
[
i
]
->
isSparseArg
())
{
continue
;
/// sparse matrix already init
}
if
(
cpu
Outputs_
[
i
]
->
isSequenceArg
())
{
initArg
(
dynamic_cast
<
SequenceArg
&>
(
*
cpu
Outputs_
[
i
]));
if
(
func1
Outputs_
[
i
]
->
isSequenceArg
())
{
initArg
(
dynamic_cast
<
SequenceArg
&>
(
*
func1
Outputs_
[
i
]));
}
else
{
initArg
(
*
cpu
Outputs_
[
i
]);
initArg
(
*
func1
Outputs_
[
i
]);
}
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector
cpuVector
(
cpuOutputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
cpuOutputs_
[
i
]
->
data
());
GpuVector
gpuVector
(
gpuOutputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
gpuOutputs_
[
i
]
->
data
());
gpuVector
.
copyFrom
(
cpuVector
);
copyArg_
(
*
func1Outputs_
[
i
],
*
func2Outputs_
[
i
]);
}
}
void
compareOutputs
()
{
for
(
size_t
i
=
0
;
i
<
cpu
Outputs_
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
func1
Outputs_
.
size
();
i
++
)
{
// TODO, Need a BufferCheck used to compare the two buffers.
const
auto
cpu
=
cpu
Outputs_
[
i
];
const
auto
gpu
=
gpu
Outputs_
[
i
];
const
auto
cpu
=
func1
Outputs_
[
i
];
const
auto
gpu
=
func2
Outputs_
[
i
];
CHECK_EQ
(
cpu
->
numElements
(),
gpu
->
numElements
());
CpuVector
cpuVector
(
cpu
->
numElements
(),
(
real
*
)
cpu
->
data
());
GpuVector
gpuVector
(
gpu
->
numElements
(),
(
real
*
)
gpu
->
data
());
Vector1
cpuVector
(
cpu
->
numElements
(),
(
real
*
)
cpu
->
data
());
Vector2
gpuVector
(
gpu
->
numElements
(),
(
real
*
)
gpu
->
data
());
autotest
::
TensorCheckErr
(
cpuVector
,
gpuVector
);
}
}
protected:
std
::
shared_ptr
<
FunctionBase
>
cpuFunc_
;
std
::
shared_ptr
<
FunctionBase
>
gpuFunc_
;
std
::
vector
<
CpuMemHandlePtr
>
cpuMemory_
;
std
::
vector
<
GpuMemHandlePtr
>
gpuMemory_
;
std
::
vector
<
BufferArgPtr
>
cpuInputs_
;
std
::
vector
<
BufferArgPtr
>
cpuOutputs_
;
std
::
vector
<
BufferArgPtr
>
gpuInputs_
;
std
::
vector
<
BufferArgPtr
>
gpuOutputs_
;
std
::
shared_ptr
<
CpuSparseMatrix
>
cpuSparse_
;
std
::
shared_ptr
<
GpuSparseMatrix
>
gpuSparse_
;
std
::
shared_ptr
<
SequenceIdArg
>
cpuSeq_
;
std
::
shared_ptr
<
SequenceIdArg
>
gpuSeq_
;
std
::
shared_ptr
<
FunctionBase
>
function1_
;
std
::
shared_ptr
<
FunctionBase
>
function2_
;
std
::
vector
<
std
::
shared_ptr
<
Allocator1
>>
func1Memory_
;
std
::
vector
<
std
::
shared_ptr
<
Allocator2
>>
func2Memory_
;
std
::
vector
<
BufferArgPtr
>
func1Inputs_
;
std
::
vector
<
BufferArgPtr
>
func1Outputs_
;
std
::
vector
<
BufferArgPtr
>
func2Inputs_
;
std
::
vector
<
BufferArgPtr
>
func2Outputs_
;
std
::
shared_ptr
<
SparseMatrix1
>
sparse1_
;
std
::
shared_ptr
<
SparseMatrix2
>
sparse2_
;
std
::
shared_ptr
<
SequenceIdArg
>
seq1_
;
std
::
shared_ptr
<
SequenceIdArg
>
seq2_
;
test
::
CopyArgument
<
DType1
,
DType2
>
copyArg_
;
};
class
CpuGpuFuncCompare
:
public
Compare2Function
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
{
public:
CpuGpuFuncCompare
(
const
std
::
string
&
name
,
const
FuncConfig
&
config
)
:
Compare2Function
(
name
+
"-CPU"
,
name
+
"-GPU"
,
config
)
{}
~
CpuGpuFuncCompare
()
{}
};
}
// namespace paddle
paddle/function/MulOpTest.cpp
浏览文件 @
1879332a
...
...
@@ -35,7 +35,7 @@ void testFuncDDDMatrix(
size_t
heightC
=
dimM
;
size_t
widthC
=
dimN
;
// init Test object
Function
Compare
test
(
CpuGpuFunc
Compare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
transa
).
set
(
"bTrans"
,
transb
));
// prepare input arguments
/// matrix A : HA * WA
...
...
@@ -81,8 +81,8 @@ void testFuncDSparseDMatrix(
size_t
dimM
,
size_t
dimN
,
size_t
dimK
,
size_t
nnz
,
SparseFormat
FORMAT
)
{
real
scaleT
=
1.0
;
// init Test object
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
CpuGpuFuncCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
// prepare input arguments
/// sparse matrix A : M * K
test
.
addInputs
(
SparseMatrixArg
(
...
...
@@ -126,8 +126,8 @@ void testFuncDDSparseMatrix(
size_t
dimM
,
size_t
dimN
,
size_t
dimK
,
size_t
nnz
,
SparseFormat
FORMAT
)
{
real
scaleT
=
1.0
;
// init Test object
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
CpuGpuFuncCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
// prepare input arguments
/// matrix A : M * K
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimK
}));
...
...
@@ -172,8 +172,8 @@ void testFuncSparseDDMatrix(
size_t
dimM
,
size_t
dimN
,
size_t
dimK
,
size_t
nnz
,
SparseFormat
FORMAT
)
{
real
scaleT
=
1.0
;
// init Test object
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
CpuGpuFuncCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
// prepare input arguments
/// matrix A : M * K
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimK
}));
...
...
paddle/function/PadOpTest.cpp
浏览文件 @
1879332a
...
...
@@ -25,7 +25,7 @@ TEST(Pad, real) {
VLOG
(
3
)
<<
" numSamples="
<<
numSamples
<<
" channels="
<<
channels
<<
" imgSizeH="
<<
imgSizeH
<<
" imgSizeW="
<<
imgSizeW
;
for
(
bool
test_grad
:
{
false
,
true
})
{
Function
Compare
compare
(
CpuGpuFunc
Compare
compare
(
test_grad
?
"PadGrad"
:
"Pad"
,
FuncConfig
()
.
set
<
std
::
vector
<
uint32_t
>>
(
"channel"
,
{
2
,
3
})
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录