Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
67817433
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
67817433
编写于
8年前
作者:
H
hedaoyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Implement the FunctionTest
上级
039c0bf2
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
112 addition
and
126 deletion
+112
-126
paddle/function/Function.h
paddle/function/Function.h
+11
-2
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+101
-124
未找到文件。
paddle/function/Function.h
浏览文件 @
67817433
...
...
@@ -75,8 +75,17 @@ public:
// Tensor can be Matrix, Vector, IVector.
// For inputs, do not need argType.
// For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO.
template
<
typename
Tensor
>
void
addArg
(
const
Tensor
&
arg
,
ArgType
argType
=
UNSPECIFIED
)
{
void
addArg
(
const
Matrix
&
arg
,
ArgType
argType
=
UNSPECIFIED
)
{
_args_
.
push_back
(
new
BufferArg
(
arg
,
argType
));
addArg
(
*
_args_
.
back
());
}
void
addArg
(
const
Vector
&
arg
,
ArgType
argType
=
UNSPECIFIED
)
{
_args_
.
push_back
(
new
BufferArg
(
arg
,
argType
));
addArg
(
*
_args_
.
back
());
}
void
addArg
(
const
IVector
&
arg
,
ArgType
argType
=
UNSPECIFIED
)
{
_args_
.
push_back
(
new
BufferArg
(
arg
,
argType
));
addArg
(
*
_args_
.
back
());
}
...
...
This diff is collapsed.
Click to expand it.
paddle/function/FunctionTest.h
浏览文件 @
67817433
...
...
@@ -19,6 +19,8 @@ limitations under the License. */
namespace
paddle
{
typedef
std
::
shared_ptr
<
BufferArg
>
BufferArgPtr
;
/**
* \brief A class for comparing CPU and GPU implementations of Function.
*
...
...
@@ -45,143 +47,121 @@ namespace paddle {
class
FunctionCompare
{
public:
FunctionCompare
(
const
std
::
string
&
name
,
const
FuncConfig
&
config
)
:
cpu
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-CPU"
)),
gpu
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-GPU"
))
{
cpu
->
init
(
config
);
gpu
->
init
(
config
);
:
cpuFunc_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-CPU"
)),
gpuFunc_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-GPU"
))
{
cpuFunc_
->
init
(
config
);
gpuFunc_
->
init
(
config
);
}
~
FunctionCompare
()
{}
// input need only contains shape, do not contains data.
void
addInputs
(
const
BufferArg
&
input
)
{
size_t
size
=
input
.
shape
().
getElements
()
*
sizeOfValuType
(
input
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuInputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
gpuInputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
}
// output need only contains shape, do not contains data.
void
addOutputs
(
const
BufferArg
&
output
)
{
size_t
size
=
output
.
shape
().
getElements
()
*
sizeOfValuType
(
output
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
ASSIGN_TO
));
gpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
ASSIGN_TO
));
}
void
addInputs
(
const
BufferArg
&
input
)
{
inputs
.
push_back
(
input
);
}
void
addInputs
(
const
SequenceArg
&
input
)
{
size_t
batchSize
=
input
.
shape
()[
0
];
size_t
numSeqs
=
batchSize
/
10
+
1
;
size_t
sizeId
=
(
numSeqs
+
1
)
*
sizeOfValuType
(
VALUE_TYPE_INT32
);
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
sizeId
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
sizeId
));
void
addOutputs
(
const
BufferArg
&
output
)
{
outputs
.
push_back
(
output
);
}
TensorShape
seqsId
({
numSeqs
+
1
});
// void* cpuBuffer = cpuMemory_.back()->getBuf();
// void* gpuBuffer = gpuMemory_.back()->getBuf();
size_t
size
=
input
.
shape
().
getElements
()
*
sizeOfValuType
(
input
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
// TODO: need be implemented.
}
void
run
()
{
// prepare cpu/gpu arguments
prepareArg
s
();
initInput
s
();
// function calculate
cpu
->
calc
(
cpuInputs
,
cpuOutputs
);
gpu
->
calc
(
gpuInputs
,
gpuOutputs
);
// check outputs and inouts
auto
checkArgs
=
[
=
](
const
BufferArgs
&
cpuArgs
,
const
BufferArgs
&
gpuArgs
)
{
for
(
size_t
i
=
0
;
i
<
cpuArgs
.
size
();
i
++
)
{
auto
cpu
=
cpuArgs
[
i
];
auto
gpu
=
gpuArgs
[
i
];
CpuVector
cpuVector
(
cpu
.
shape
().
getElements
(),
(
real
*
)
cpu
.
getData
());
GpuVector
gpuVector
(
cpu
.
shape
().
getElements
(),
(
real
*
)
gpu
.
getData
());
autotest
::
TensorCheckErr
(
cpuVector
,
gpuVector
);
auto
callFunction
=
[](
FunctionBase
*
function
,
std
::
vector
<
BufferArgPtr
>&
inputs
,
std
::
vector
<
BufferArgPtr
>&
outputs
)
{
BufferArgs
inArgs
;
BufferArgs
outArgs
;
for
(
auto
arg
:
inputs
)
{
inArgs
.
addArg
(
*
arg
);
}
};
checkArgs
(
cpuOutputs
,
gpuOutputs
);
}
#if 0
void cmpWithArg(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) {
// init cpu and gpu arguments
auto initArgs = [=](
Arguments& cpuArgs, Arguments& gpuArgs, const Arguments& inArgs) {
for (const auto arg : inArgs) {
size_t size = sizeof(real);
for (const auto dim : arg.dims_) {
size *= dim;
}
if (arg.getData()) {
// todo(tianbing), waste unnecessary mem here
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_));
gpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_));
// already init outside
} else {
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuArgs.emplace_back(
Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_));
gpuArgs.emplace_back(
Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_));
// will use an api to refactor this code.
CpuVector cpuVector(size / sizeof(real),
(real*)cpuArgs.back().getData());
GpuVector gpuVector(size / sizeof(real),
(real*)gpuArgs.back().getData());
cpuVector.uniform(0.001, 1);
gpuVector.copyFrom(cpuVector);
}
for
(
auto
arg
:
outputs
)
{
outArgs
.
addArg
(
*
arg
);
}
function
->
calc
(
inArgs
,
outArgs
);
};
initArgs(cpuInputs, gpuInputs, inputs);
initArgs(cpuOutputs, gpuOutputs, outputs);
// function calculate
cpu->calc(cpuInputs, cpuOutputs);
gpu->calc(gpuInputs, gpuOutputs);
callFunction
(
cpuFunc_
.
get
(),
cpuInputs_
,
cpuOutputs_
);
callFunction
(
gpuFunc_
.
get
(),
gpuInputs_
,
gpuOutputs_
);
// check outputs and inouts
auto checkArgs = [=](const Arguments& cpuArgs, const Arguments& gpuArgs) {
for (size_t i = 0; i < cpuArgs.size(); i++) {
auto cpu = cpuArgs[i];
auto gpu = gpuArgs[i];
size_t size = 1;
for (auto dim : cpu.dims_) {
size *= dim;
}
CpuVector cpuVector(size, (real*)cpu.getData());
GpuVector gpuVector(size, (real*)gpu.getData());
autotest::TensorCheckErr(cpuVector, gpuVector);
}
};
checkArgs(cpuOutputs, gpuOutputs);
compareOutputs
();
}
#endif
std
::
shared_ptr
<
FunctionBase
>
getCpuFunction
()
const
{
return
cpu
;
}
std
::
shared_ptr
<
FunctionBase
>
getCpuFunction
()
const
{
return
cpu
Func_
;
}
std
::
shared_ptr
<
FunctionBase
>
getGpuFunction
()
const
{
return
gpu
;
}
std
::
shared_ptr
<
FunctionBase
>
getGpuFunction
()
const
{
return
gpu
Func_
;
}
protected:
void
prepareArg
s
()
{
// TODO, if inputs has data
}
void
initInput
s
()
{
for
(
size_t
i
=
0
;
i
<
cpuInputs_
.
size
();
i
++
)
{
initArg
(
*
cpuInputs_
[
i
]);
void
createArg
(
BufferArgs
&
cpuArgs
,
BufferArgs
&
gpuArgs
,
BufferArg
&
arg
)
{
size_t
size
=
arg
.
shape
().
getElements
()
*
sizeOfValuType
(
arg
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector
cpuVector
(
cpuInputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
cpuInputs_
[
i
]
->
data
());
GpuVector
gpuVector
(
gpuInputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
gpuInputs_
[
i
]
->
data
());
cpuArgs
.
emplace_back
(
BufferArg
(
cpuMemory_
.
back
()
->
getBuf
()),
arg
.
valueType
(),
arg
.
shape
());
gpuArgs
.
emplace_back
(
BufferArg
(
gpuMemory_
.
back
()
->
getBuf
()),
arg
.
valueType
(),
arg
.
shape
());
gpuVector
.
copyFrom
(
cpuVector
);
}
}
void
createArg
(
BufferArgs
&
cpuArgs
,
BufferArgs
&
gpuArgs
,
SequenceArg
&
arg
)
{
size_t
batchSize
=
arg
.
shape
()[
0
];
size_t
numSeqs
=
batchSize
/
10
+
1
;
void
compareOutputs
()
{
for
(
size_t
i
=
0
;
i
<
cpuOutputs_
.
size
();
i
++
)
{
// TODO, Need a BufferCheck used to compare the two buffers.
auto
cpu
=
cpuOutputs_
[
i
];
auto
gpu
=
gpuOutputs_
[
i
];
CpuVector
cpuVector
(
cpu
->
shape
().
getElements
(),
(
real
*
)
cpu
->
data
());
GpuVector
gpuVector
(
cpu
->
shape
().
getElements
(),
(
real
*
)
gpu
->
data
());
size_t
sizeId
=
(
numSeqs
+
1
)
*
sizeOfValuType
(
VALUE_TYPE_INT32
);
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
TensorShape
seqsId
({
numSeqs
+
1
});
void
*
cpuBuffer
=
cpuMemory_
.
back
()
->
getBuf
();
void
*
gpuBuffer
=
gpuMemory_
.
back
()
->
getBuf
();
size_t
size
=
arg
.
shape
().
getElements
()
*
sizeOfValuType
(
arg
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuArgs
.
emplace_back
(
SequenceArg
(
cpuMemory_
.
back
()
->
getBuf
(),
arg
.
valueType
(),
arg
.
shape
(),
SequenceIdArg
(
cpuBuffer
,
seqsId
)));
gpuArgs
.
emplace_back
(
SequenceArg
(
gpuMemory_
.
back
()
->
getBuf
(),
arg
.
valueType
(),
arg
.
shape
(),
SequenceIdArg
(
gpuBuffer
,
seqsId
)));
autotest
::
TensorCheckErr
(
cpuVector
,
gpuVector
);
}
}
// only init cpu argument, gpu argument copy from cpu argument.
...
...
@@ -192,10 +172,10 @@ protected:
void
initArg
(
SequenceIdArg
&
arg
,
size_t
batchSize
)
{
size_t
numSeqs
=
arg
.
numSeqs
();
int
*
buf
=
arg
.
data
();
int
*
buf
=
(
int
*
)
arg
.
data
();
int
pos
=
0
;
size_t
maxLen
=
2
*
batchSize
/
numSeqs
;
for
(
int
i
=
0
;
i
<
numSeqs
;
++
i
)
{
for
(
int
i
=
0
;
i
<
(
int
)
numSeqs
;
++
i
)
{
int
len
=
uniformRandom
(
std
::
min
<
int64_t
>
(
maxLen
,
batchSize
-
pos
-
numSeqs
+
i
))
+
1
;
...
...
@@ -207,17 +187,14 @@ protected:
}
protected:
std
::
shared_ptr
<
FunctionBase
>
cpu
;
std
::
shared_ptr
<
FunctionBase
>
gpu
;
std
::
shared_ptr
<
FunctionBase
>
cpu
Func_
;
std
::
shared_ptr
<
FunctionBase
>
gpu
Func_
;
std
::
vector
<
CpuMemHandlePtr
>
cpuMemory_
;
std
::
vector
<
GpuMemHandlePtr
>
gpuMemory_
;
// inputs and outputs
BufferArgs
inputs
;
BufferArgs
outputs
;
BufferArgs
cpuInputs_
;
BufferArgs
cpuOutputs_
;
BufferArgs
gpuInputs_
;
BufferArgs
gpuOutputs_
;
std
::
vector
<
BufferArgPtr
>
cpuInputs_
;
std
::
vector
<
BufferArgPtr
>
cpuOutputs_
;
std
::
vector
<
BufferArgPtr
>
gpuInputs_
;
std
::
vector
<
BufferArgPtr
>
gpuOutputs_
;
};
}
// namespace paddle
This diff is collapsed.
Click to expand it.
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录
新手
引导
客服
返回
顶部