Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
1879332a
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1879332a
编写于
5月 26, 2017
作者:
H
hedaoyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Modify FunctionCompare to Compare2Function to support comparison of two CPU functions.
上级
1846d9e1
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
187 addition
and
135 deletion
+187
-135
paddle/function/ContextProjectionOpTest.cpp
paddle/function/ContextProjectionOpTest.cpp
+2
-2
paddle/function/CosSimOpTest.cpp
paddle/function/CosSimOpTest.cpp
+2
-2
paddle/function/CrossMapNormalOpTest.cpp
paddle/function/CrossMapNormalOpTest.cpp
+10
-10
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+165
-113
paddle/function/MulOpTest.cpp
paddle/function/MulOpTest.cpp
+7
-7
paddle/function/PadOpTest.cpp
paddle/function/PadOpTest.cpp
+1
-1
未找到文件。
paddle/function/ContextProjectionOpTest.cpp
浏览文件 @
1879332a
...
...
@@ -28,7 +28,7 @@ void testMatrixProjectionForward(int context_start,
std
::
max
(
0
,
(
int
)(
context_start
+
context_length
-
1
));
if
(
pad
==
0
)
is_padding
=
false
;
Function
Compare
test
(
CpuGpuFunc
Compare
test
(
"ContextProjectionForward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
...
...
@@ -60,7 +60,7 @@ void testMatrixProjectionBackward(int context_start,
std
::
max
(
0
,
(
int
)(
context_start
+
context_length
-
1
));
if
(
pad
==
0
)
is_padding
=
false
;
Function
Compare
test
(
CpuGpuFunc
Compare
test
(
"ContextProjectionBackward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
...
...
paddle/function/CosSimOpTest.cpp
浏览文件 @
1879332a
...
...
@@ -22,7 +22,7 @@ void testCosSimForward(size_t height_x,
size_t
height_y
,
size_t
width
,
real
scale
)
{
Function
Compare
test
(
"CosSimForward"
,
FuncConfig
().
set
(
"scale"
,
scale
));
CpuGpuFunc
Compare
test
(
"CosSimForward"
,
FuncConfig
().
set
(
"scale"
,
scale
));
// prepare input arguments
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
height_x
,
width
}));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
height_y
,
width
}));
...
...
@@ -36,7 +36,7 @@ void testCosSimBackward(size_t height_x,
size_t
height_y
,
size_t
width
,
real
scale
)
{
Function
Compare
test
(
"CosSimBackward"
,
FuncConfig
().
set
(
"scale"
,
scale
));
CpuGpuFunc
Compare
test
(
"CosSimBackward"
,
FuncConfig
().
set
(
"scale"
,
scale
));
// prepare input arguments
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
height_x
,
1
}));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
height_x
,
1
}));
...
...
paddle/function/CrossMapNormalOpTest.cpp
浏览文件 @
1879332a
...
...
@@ -28,11 +28,11 @@ TEST(CrossMapNormal, real) {
<<
" size="
<<
size
;
// init Test object
Function
Compare
test
(
"CrossMapNormal"
,
FuncConfig
()
.
set
(
"size"
,
size
)
.
set
(
"scale"
,
(
real
)
1.5
)
.
set
(
"pow"
,
(
real
)
0.5
));
CpuGpuFunc
Compare
test
(
"CrossMapNormal"
,
FuncConfig
()
.
set
(
"size"
,
size
)
.
set
(
"scale"
,
(
real
)
1.5
)
.
set
(
"pow"
,
(
real
)
0.5
));
// prepare input arguments
TensorShape
shape
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
...
...
@@ -57,11 +57,11 @@ TEST(CrossMapNormalGrad, real) {
<<
" imgSizeH="
<<
imgSizeH
<<
" imgSizeW="
<<
imgSizeW
<<
" size="
<<
size
;
Function
Compare
test
(
"CrossMapNormalGrad"
,
FuncConfig
()
.
set
(
"size"
,
size
)
.
set
(
"scale"
,
(
real
)
1.5
)
.
set
(
"pow"
,
(
real
)
0.5
));
CpuGpuFunc
Compare
test
(
"CrossMapNormalGrad"
,
FuncConfig
()
.
set
(
"size"
,
size
)
.
set
(
"scale"
,
(
real
)
1.5
)
.
set
(
"pow"
,
(
real
)
0.5
));
TensorShape
shape
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
...
...
paddle/function/FunctionTest.h
浏览文件 @
1879332a
...
...
@@ -22,14 +22,62 @@ namespace paddle {
typedef
std
::
shared_ptr
<
BufferArg
>
BufferArgPtr
;
namespace
test
{
template
<
DeviceType
DType
>
struct
Allocator
;
template
<
>
struct
Allocator
<
DEVICE_TYPE_CPU
>
{
using
type
=
CpuMemoryHandle
;
};
template
<
>
struct
Allocator
<
DEVICE_TYPE_GPU
>
{
using
type
=
GpuMemoryHandle
;
};
// Copy argument1 to argument2
template
<
DeviceType
DType1
,
DeviceType
DType2
>
class
CopyArgument
{
public:
void
operator
()(
const
BufferArg
&
arg1
,
BufferArg
&
arg2
)
{
CHECK_EQ
(
arg1
.
valueType
(),
arg2
.
valueType
());
CHECK_LE
(
arg1
.
shape
().
getElements
(),
arg2
.
shape
().
getElements
());
if
(
arg1
.
valueType
()
==
VALUE_TYPE_INT32
)
{
IVectorPtr
vector1
=
IVector
::
create
((
int
*
)
arg1
.
data
(),
arg1
.
shape
().
getElements
(),
DType1
==
DEVICE_TYPE_CPU
?
false
:
true
);
IVectorPtr
vector2
=
IVector
::
create
((
int
*
)
arg2
.
data
(),
arg2
.
shape
().
getElements
(),
DType2
==
DEVICE_TYPE_CPU
?
false
:
true
);
vector2
->
copyFrom
(
*
vector1
);
}
else
{
VectorPtr
vector1
=
Vector
::
create
((
real
*
)
arg1
.
data
(),
arg1
.
shape
().
getElements
(),
DType1
==
DEVICE_TYPE_CPU
?
false
:
true
);
VectorPtr
vector2
=
Vector
::
create
((
real
*
)
arg2
.
data
(),
arg2
.
shape
().
getElements
(),
DType2
==
DEVICE_TYPE_CPU
?
false
:
true
);
vector2
->
copyFrom
(
*
vector1
);
}
}
};
}
// namespace test
/**
* \brief A class for comparing CPU and GPU implementations of Function.
*
* \brief A class for comparing two Functions of different implementations.
* For example, can be used to compare the CPU and GPU implementation
* of the function is consistent.
*
* Use case:
* // Initializes a test object, the corresponding cpu and gpu Function
* // are constructed according to FunctionName and FuncConfig.
*
Function
Compare test(FunctionName, FuncConfig);
*
CpuGpuFunc
Compare test(FunctionName, FuncConfig);
* // Prepare inputs and outputs arguments.
* // Here the input and output can not contain real data,
* // only contains the argument type and shape.
...
...
@@ -45,28 +93,38 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
* // Compares CPU and GPU calculation results for consistency.
* test.run();
*/
class
FunctionCompare
{
template
<
DeviceType
DType1
,
DeviceType
DType2
>
class
Compare2Function
{
public:
FunctionCompare
(
const
std
::
string
&
name
,
const
FuncConfig
&
config
)
:
cpuFunc_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-CPU"
)),
gpuFunc_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-GPU"
))
{
cpuFunc_
->
init
(
config
);
gpuFunc_
->
init
(
config
);
typedef
typename
test
::
Allocator
<
DType1
>::
type
Allocator1
;
typedef
typename
test
::
Allocator
<
DType2
>::
type
Allocator2
;
typedef
typename
Tensor
<
real
,
DType1
>::
Vector
Vector1
;
typedef
typename
Tensor
<
real
,
DType2
>::
Vector
Vector2
;
typedef
typename
Tensor
<
real
,
DType1
>::
SparseMatrix
SparseMatrix1
;
typedef
typename
Tensor
<
real
,
DType2
>::
SparseMatrix
SparseMatrix2
;
Compare2Function
(
const
std
::
string
&
name1
,
const
std
::
string
&
name2
,
const
FuncConfig
&
config
)
:
function1_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name1
)),
function2_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name2
))
{
function1_
->
init
(
config
);
function2_
->
init
(
config
);
}
~
FunctionCompare
()
{}
~
Compare2Function
()
{}
// input need only contains shape, do not contains data.
void
addInputs
(
const
BufferArg
&
input
)
{
size_t
size
=
input
.
shape
().
getElements
()
*
sizeOfValuType
(
input
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
func1Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator1
>
(
size
));
func2Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator2
>
(
size
));
cpu
Inputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpu
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
gpu
Inputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpu
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
func1
Inputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
func1
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
func2
Inputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
func2
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
}
// assume one copy of sequence is shared by different SequenceArgs
...
...
@@ -75,62 +133,57 @@ public:
size_t
batchSize
=
input
.
shape
()[
0
];
size_t
numSeqs
=
batchSize
/
10
+
1
;
size_t
sizeId
=
(
numSeqs
+
1
)
*
sizeOfValuType
(
VALUE_TYPE_INT32
);
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
sizeId
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
sizeId
));
cpuSeq_
=
std
::
make_shared
<
SequenceIdArg
>
(
cpu
Memory_
.
back
()
->
getBuf
(),
TensorShape
{
numSeqs
+
1
});
gpuSeq_
=
std
::
make_shared
<
SequenceIdArg
>
(
gpu
Memory_
.
back
()
->
getBuf
(),
TensorShape
{
numSeqs
+
1
});
func1Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator1
>
(
sizeId
));
func2Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator2
>
(
sizeId
));
seq1_
=
std
::
make_shared
<
SequenceIdArg
>
(
func1
Memory_
.
back
()
->
getBuf
(),
TensorShape
{
numSeqs
+
1
});
seq2_
=
std
::
make_shared
<
SequenceIdArg
>
(
func2
Memory_
.
back
()
->
getBuf
(),
TensorShape
{
numSeqs
+
1
});
/// init sequence Id
initArg
(
*
cpuSeq
_
,
batchSize
);
initArg
(
*
seq1
_
,
batchSize
);
// todo(tianbing), delete it
CHECK_EQ
(
cpuSeq_
->
shape
().
getElements
(),
cpuSeq_
->
numSeqs
()
+
1
);
CpuIVector
cpuSeq
(
cpuSeq_
->
shape
().
getElements
(),
(
int
*
)
cpuSeq_
->
data
());
GpuIVector
gpuSeq
(
gpuSeq_
->
shape
().
getElements
(),
(
int
*
)
gpuSeq_
->
data
());
gpuSeq
.
copyFrom
(
cpuSeq
);
copyArg_
(
*
seq1_
,
*
seq2_
);
}
void
addInputs
(
const
SequenceArg
&
input
)
{
CHECK_EQ
(
input
.
shape
().
ndims
(),
2UL
);
size_t
batchSize
=
input
.
shape
()[
0
];
if
(
!
cpuSeq_
||
!
gpuSeq
_
)
{
// sequence not exist
if
(
!
seq1_
||
!
seq2
_
)
{
// sequence not exist
addSequence
(
SequenceIdArg
(
TensorShape
{
batchSize
}));
}
size_t
size
=
input
.
shape
().
getElements
()
*
sizeOfValuType
(
input
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
func1Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator1
>
(
size
));
func2Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator2
>
(
size
));
/// SequenceArg
cpu
Inputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
cpu
Memory_
.
back
()
->
getBuf
(),
func1
Inputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
func1
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
(),
*
cpuSeq
_
));
gpu
Inputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
gpu
Memory_
.
back
()
->
getBuf
(),
*
seq1
_
));
func2
Inputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
func2
Memory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
(),
*
gpuSeq
_
));
*
seq2
_
));
}
// output need only contains shape, do not contains data.
void
addOutputs
(
const
BufferArg
&
output
,
ArgType
argType
=
ASSIGN_TO
)
{
size_t
size
=
output
.
shape
().
getElements
()
*
sizeOfValuType
(
output
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
func1Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator1
>
(
size
));
func2Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator2
>
(
size
));
cpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpu
Memory_
.
back
()
->
getBuf
(),
func1
Outputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
func1
Memory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
argType
));
gpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpu
Memory_
.
back
()
->
getBuf
(),
func2
Outputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
func2
Memory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
argType
));
...
...
@@ -138,14 +191,14 @@ public:
/// add and init output sparse matrix
void
addOutputs
(
const
SparseMatrixArg
&
output
,
ArgType
argType
=
ASSIGN_TO
)
{
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
sparse1_
=
std
::
make_shared
<
SparseMatrix1
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
static_cast
<
SparseValueType
>
(
output
.
dataType
()),
static_cast
<
SparseFormat
>
(
output
.
dataFormat
()));
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
sparse2_
=
std
::
make_shared
<
SparseMatrix2
>
(
output
.
shape
()[
0
],
output
.
shape
()[
1
],
output
.
nnz
(),
...
...
@@ -154,52 +207,52 @@ public:
/// init sparse matrix
hl_stream_t
stream
(
HPPL_STREAM_1
);
cpuSparse
_
->
randomizeUniform
();
gpuSparse_
->
copyFrom
(
*
cpuSparse
_
,
stream
);
sparse1
_
->
randomizeUniform
();
sparse2_
->
copyFrom
(
*
sparse1
_
,
stream
);
hl_stream_synchronize
(
stream
);
cpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
cpuSparse
_
,
argType
));
gpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
gpuSparse
_
,
argType
));
func1
Outputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
sparse1
_
,
argType
));
func2
Outputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
sparse2
_
,
argType
));
}
void
addOutputs
(
const
SequenceArg
&
output
,
ArgType
argType
=
ASSIGN_TO
)
{
CHECK_EQ
(
output
.
shape
().
ndims
(),
2UL
);
size_t
batchSize
=
output
.
shape
()[
0
];
if
(
!
cpuSeq_
||
!
gpuSeq
_
)
{
// sequence not exist
if
(
!
seq1_
||
!
seq2
_
)
{
// sequence not exist
addSequence
(
SequenceIdArg
(
TensorShape
{
batchSize
}));
}
size_t
size
=
output
.
shape
().
getElements
()
*
sizeOfValuType
(
output
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
func1Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator1
>
(
size
));
func2Memory_
.
emplace_back
(
std
::
make_shared
<
Allocator2
>
(
size
));
/// SequenceArg
cpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
cpu
Memory_
.
back
()
->
getBuf
(),
func1
Outputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
func1
Memory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
*
cpuSeq
_
,
*
seq1
_
,
argType
));
gpu
Outputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
gpu
Memory_
.
back
()
->
getBuf
(),
func2
Outputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
func2
Memory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
*
gpuSeq
_
,
*
seq2
_
,
argType
));
}
void
addInputs
(
const
SparseMatrixArg
&
input
)
{
cpuSparse_
=
std
::
make_shared
<
CpuSparseMatrix
>
(
sparse1_
=
std
::
make_shared
<
SparseMatrix1
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
static_cast
<
SparseValueType
>
(
input
.
dataType
()),
static_cast
<
SparseFormat
>
(
input
.
dataFormat
()));
gpuSparse_
=
std
::
make_shared
<
GpuSparseMatrix
>
(
sparse2_
=
std
::
make_shared
<
SparseMatrix2
>
(
input
.
shape
()[
0
],
input
.
shape
()[
1
],
input
.
nnz
(),
...
...
@@ -208,12 +261,12 @@ public:
/// init sparse matrix
hl_stream_t
stream
(
HPPL_STREAM_1
);
cpuSparse
_
->
randomizeUniform
();
gpuSparse_
->
copyFrom
(
*
cpuSparse
_
,
stream
);
sparse1
_
->
randomizeUniform
();
sparse2_
->
copyFrom
(
*
sparse1
_
,
stream
);
hl_stream_synchronize
(
stream
);
cpuInputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
cpuSparse
_
));
gpuInputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
gpuSparse
_
));
func1Inputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
sparse1
_
));
func2Inputs_
.
emplace_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
*
sparse2
_
));
}
void
run
()
{
...
...
@@ -236,27 +289,27 @@ public:
function
->
calc
(
inArgs
,
outArgs
);
};
callFunction
(
cpuFunc_
.
get
(),
cpuInputs_
,
cpu
Outputs_
);
callFunction
(
gpuFunc_
.
get
(),
gpuInputs_
,
gpu
Outputs_
);
callFunction
(
function1_
.
get
(),
func1Inputs_
,
func1
Outputs_
);
callFunction
(
function2_
.
get
(),
func2Inputs_
,
func2
Outputs_
);
// check outputs
compareOutputs
();
}
std
::
shared_ptr
<
FunctionBase
>
getCpuFunction
()
const
{
return
cpuFunc
_
;
}
std
::
shared_ptr
<
FunctionBase
>
getCpuFunction
()
const
{
return
function1
_
;
}
std
::
shared_ptr
<
FunctionBase
>
getGpuFunction
()
const
{
return
gpuFunc
_
;
}
std
::
shared_ptr
<
FunctionBase
>
getGpuFunction
()
const
{
return
function2
_
;
}
protected:
// only init cpu argument, gpu argument copy from cpu argument.
void
initArg
(
BufferArg
&
arg
)
{
CpuVector
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
Vector1
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
vector
.
uniform
(
0.001
,
1
);
}
void
initArg
(
SequenceArg
&
arg
)
{
/// init only matrix
CpuVector
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
Vector1
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
vector
.
uniform
(
0.001
,
1
);
}
...
...
@@ -276,73 +329,72 @@ protected:
}
void
initInputs
()
{
for
(
size_t
i
=
0
;
i
<
cpu
Inputs_
.
size
();
i
++
)
{
if
(
cpu
Inputs_
[
i
]
->
isSparseArg
())
{
for
(
size_t
i
=
0
;
i
<
func1
Inputs_
.
size
();
i
++
)
{
if
(
func1
Inputs_
[
i
]
->
isSparseArg
())
{
continue
;
/// sparse matrix already init
}
if
(
cpu
Inputs_
[
i
]
->
isSequenceArg
())
{
initArg
(
dynamic_cast
<
SequenceArg
&>
(
*
cpu
Inputs_
[
i
]));
if
(
func1
Inputs_
[
i
]
->
isSequenceArg
())
{
initArg
(
dynamic_cast
<
SequenceArg
&>
(
*
func1
Inputs_
[
i
]));
}
else
{
initArg
(
*
cpu
Inputs_
[
i
]);
initArg
(
*
func1
Inputs_
[
i
]);
}
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector
cpuVector
(
cpuInputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
cpuInputs_
[
i
]
->
data
());
GpuVector
gpuVector
(
gpuInputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
gpuInputs_
[
i
]
->
data
());
gpuVector
.
copyFrom
(
cpuVector
);
copyArg_
(
*
func1Inputs_
[
i
],
*
func2Inputs_
[
i
]
);
}
}
void
initOutputs
()
{
for
(
size_t
i
=
0
;
i
<
cpu
Outputs_
.
size
();
i
++
)
{
if
(
cpu
Outputs_
[
i
]
->
isSparseArg
())
{
for
(
size_t
i
=
0
;
i
<
func1
Outputs_
.
size
();
i
++
)
{
if
(
func1
Outputs_
[
i
]
->
isSparseArg
())
{
continue
;
/// sparse matrix already init
}
if
(
cpu
Outputs_
[
i
]
->
isSequenceArg
())
{
initArg
(
dynamic_cast
<
SequenceArg
&>
(
*
cpu
Outputs_
[
i
]));
if
(
func1
Outputs_
[
i
]
->
isSequenceArg
())
{
initArg
(
dynamic_cast
<
SequenceArg
&>
(
*
func1
Outputs_
[
i
]));
}
else
{
initArg
(
*
cpu
Outputs_
[
i
]);
initArg
(
*
func1
Outputs_
[
i
]);
}
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector
cpuVector
(
cpuOutputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
cpuOutputs_
[
i
]
->
data
());
GpuVector
gpuVector
(
gpuOutputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
gpuOutputs_
[
i
]
->
data
());
gpuVector
.
copyFrom
(
cpuVector
);
copyArg_
(
*
func1Outputs_
[
i
],
*
func2Outputs_
[
i
]);
}
}
void
compareOutputs
()
{
for
(
size_t
i
=
0
;
i
<
cpu
Outputs_
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
func1
Outputs_
.
size
();
i
++
)
{
// TODO, Need a BufferCheck used to compare the two buffers.
const
auto
cpu
=
cpu
Outputs_
[
i
];
const
auto
gpu
=
gpu
Outputs_
[
i
];
const
auto
cpu
=
func1
Outputs_
[
i
];
const
auto
gpu
=
func2
Outputs_
[
i
];
CHECK_EQ
(
cpu
->
numElements
(),
gpu
->
numElements
());
CpuVector
cpuVector
(
cpu
->
numElements
(),
(
real
*
)
cpu
->
data
());
GpuVector
gpuVector
(
gpu
->
numElements
(),
(
real
*
)
gpu
->
data
());
Vector1
cpuVector
(
cpu
->
numElements
(),
(
real
*
)
cpu
->
data
());
Vector2
gpuVector
(
gpu
->
numElements
(),
(
real
*
)
gpu
->
data
());
autotest
::
TensorCheckErr
(
cpuVector
,
gpuVector
);
}
}
protected:
std
::
shared_ptr
<
FunctionBase
>
cpuFunc_
;
std
::
shared_ptr
<
FunctionBase
>
gpuFunc_
;
std
::
vector
<
CpuMemHandlePtr
>
cpuMemory_
;
std
::
vector
<
GpuMemHandlePtr
>
gpuMemory_
;
std
::
vector
<
BufferArgPtr
>
cpuInputs_
;
std
::
vector
<
BufferArgPtr
>
cpuOutputs_
;
std
::
vector
<
BufferArgPtr
>
gpuInputs_
;
std
::
vector
<
BufferArgPtr
>
gpuOutputs_
;
std
::
shared_ptr
<
CpuSparseMatrix
>
cpuSparse_
;
std
::
shared_ptr
<
GpuSparseMatrix
>
gpuSparse_
;
std
::
shared_ptr
<
SequenceIdArg
>
cpuSeq_
;
std
::
shared_ptr
<
SequenceIdArg
>
gpuSeq_
;
std
::
shared_ptr
<
FunctionBase
>
function1_
;
std
::
shared_ptr
<
FunctionBase
>
function2_
;
std
::
vector
<
std
::
shared_ptr
<
Allocator1
>>
func1Memory_
;
std
::
vector
<
std
::
shared_ptr
<
Allocator2
>>
func2Memory_
;
std
::
vector
<
BufferArgPtr
>
func1Inputs_
;
std
::
vector
<
BufferArgPtr
>
func1Outputs_
;
std
::
vector
<
BufferArgPtr
>
func2Inputs_
;
std
::
vector
<
BufferArgPtr
>
func2Outputs_
;
std
::
shared_ptr
<
SparseMatrix1
>
sparse1_
;
std
::
shared_ptr
<
SparseMatrix2
>
sparse2_
;
std
::
shared_ptr
<
SequenceIdArg
>
seq1_
;
std
::
shared_ptr
<
SequenceIdArg
>
seq2_
;
test
::
CopyArgument
<
DType1
,
DType2
>
copyArg_
;
};
class
CpuGpuFuncCompare
:
public
Compare2Function
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_GPU
>
{
public:
CpuGpuFuncCompare
(
const
std
::
string
&
name
,
const
FuncConfig
&
config
)
:
Compare2Function
(
name
+
"-CPU"
,
name
+
"-GPU"
,
config
)
{}
~
CpuGpuFuncCompare
()
{}
};
}
// namespace paddle
paddle/function/MulOpTest.cpp
浏览文件 @
1879332a
...
...
@@ -35,7 +35,7 @@ void testFuncDDDMatrix(
size_t
heightC
=
dimM
;
size_t
widthC
=
dimN
;
// init Test object
Function
Compare
test
(
CpuGpuFunc
Compare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
transa
).
set
(
"bTrans"
,
transb
));
// prepare input arguments
/// matrix A : HA * WA
...
...
@@ -81,8 +81,8 @@ void testFuncDSparseDMatrix(
size_t
dimM
,
size_t
dimN
,
size_t
dimK
,
size_t
nnz
,
SparseFormat
FORMAT
)
{
real
scaleT
=
1.0
;
// init Test object
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
CpuGpuFuncCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
// prepare input arguments
/// sparse matrix A : M * K
test
.
addInputs
(
SparseMatrixArg
(
...
...
@@ -126,8 +126,8 @@ void testFuncDDSparseMatrix(
size_t
dimM
,
size_t
dimN
,
size_t
dimK
,
size_t
nnz
,
SparseFormat
FORMAT
)
{
real
scaleT
=
1.0
;
// init Test object
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
CpuGpuFuncCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
// prepare input arguments
/// matrix A : M * K
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimK
}));
...
...
@@ -172,8 +172,8 @@ void testFuncSparseDDMatrix(
size_t
dimM
,
size_t
dimN
,
size_t
dimK
,
size_t
nnz
,
SparseFormat
FORMAT
)
{
real
scaleT
=
1.0
;
// init Test object
FunctionCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
CpuGpuFuncCompare
test
(
"MulOp"
,
FuncConfig
().
set
(
"aTrans"
,
false
).
set
(
"bTrans"
,
false
));
// prepare input arguments
/// matrix A : M * K
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
dimM
,
dimK
}));
...
...
paddle/function/PadOpTest.cpp
浏览文件 @
1879332a
...
...
@@ -25,7 +25,7 @@ TEST(Pad, real) {
VLOG
(
3
)
<<
" numSamples="
<<
numSamples
<<
" channels="
<<
channels
<<
" imgSizeH="
<<
imgSizeH
<<
" imgSizeW="
<<
imgSizeW
;
for
(
bool
test_grad
:
{
false
,
true
})
{
Function
Compare
compare
(
CpuGpuFunc
Compare
compare
(
test_grad
?
"PadGrad"
:
"Pad"
,
FuncConfig
()
.
set
<
std
::
vector
<
uint32_t
>>
(
"channel"
,
{
2
,
3
})
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录