Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
1251501c
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1251501c
编写于
1月 24, 2017
作者:
X
xutianbing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rewrite unit test using new Function Test.
上级
c785975b
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
185 addition
and
174 deletion
+185
-174
paddle/function/BufferArg.h
paddle/function/BufferArg.h
+2
-1
paddle/function/ContextProjectionOp.cpp
paddle/function/ContextProjectionOp.cpp
+36
-34
paddle/function/ContextProjectionOp.h
paddle/function/ContextProjectionOp.h
+0
-1
paddle/function/ContextProjectionOpTest.cpp
paddle/function/ContextProjectionOpTest.cpp
+39
-100
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+108
-38
未找到文件。
paddle/function/BufferArg.h
浏览文件 @
1251501c
...
...
@@ -226,7 +226,8 @@ public:
SequenceArg
(
ValueType
valueType
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
valueType
,
shape
,
argType
),
startPositions_
(
TensorShape
())
{
:
BufferArg
(
valueType
,
shape
,
argType
),
startPositions_
(
TensorShape
({
shape
[
0
]}))
{
bufferType_
=
TENSOR_SEQUENCE_DATA
;
}
...
...
paddle/function/ContextProjectionOp.cpp
浏览文件 @
1251501c
...
...
@@ -108,26 +108,23 @@ public:
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK
(
1
==
inputs
.
size
()
||
2
==
inputs
.
size
());
CHECK_EQ
(
(
size_t
)
1
,
outputs
.
size
());
CHECK
(
1
UL
==
inputs
.
size
()
||
2UL
==
inputs
.
size
());
CHECK_EQ
(
1UL
,
outputs
.
size
());
CHECK
(
inputs
[
0
].
isSequenceArg
()
&&
outputs
[
0
].
isSequenceArg
())
<<
"SequenceArg required here"
;
const
auto
val_seqs
=
dynamic_cast
<
const
SequenceArg
&>
(
inputs
[
0
]);
auto
out_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
outputs
[
0
]);
CHECK
(
out_seq
.
data
()
&&
val_seqs
.
data
()
&&
val_seqs
.
getSequenceId
().
data
());
CHECK_EQ
(
out_seq
.
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
val_seqs
.
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
val_seqs
.
getSequenceId
().
shape
().
ndims
(),
(
size_t
)
1
);
if
(
2
==
inputs
.
size
())
{
CHECK_EQ
(
inputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
}
CHECK_EQ
(
out_seq
.
shape
().
ndims
(),
2UL
);
CHECK_EQ
(
val_seqs
.
shape
().
ndims
(),
2UL
);
/// dim of output = dim of input * context_length
CHECK_EQ
(
out_seq
.
shape
()[
1
],
val_seqs
.
shape
()[
1
]
*
context_length_
);
/// input and output has the same batch_size
CHECK_EQ
(
val_seqs
.
shape
()[
0
],
out_seq
.
shape
()[
0
]);
/// dim of input == dim of weight
if
(
2
==
inputs
.
size
())
{
if
(
2UL
==
inputs
.
size
())
{
CHECK_EQ
(
inputs
[
1
].
shape
().
ndims
(),
2UL
);
/// dim of input == dim of weight
CHECK_EQ
(
val_seqs
.
shape
()[
1
],
inputs
[
1
].
shape
()[
1
]);
}
...
...
@@ -135,10 +132,11 @@ public:
auto
out_mat
=
out_seq
.
matrix
<
Device
>
();
const
auto
in_mat
=
val_seqs
.
matrix
<
Device
>
();
const
auto
w_mat
=
(
2
==
inputs
.
size
())
(
2
UL
==
inputs
.
size
())
?
inputs
[
1
].
matrix
<
Device
>
()
:
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
);
const
auto
seq_vec
=
val_seqs
.
getSequenceId
().
vector
<
int
,
Device
>
();
ContextProjectionForward
<
Device
>
(
out_mat
,
in_mat
,
w_mat
,
...
...
@@ -235,36 +233,40 @@ public:
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
(
(
size_t
)
1
,
inputs
.
size
());
CHECK
_EQ
((
size_t
)
2
,
outputs
.
size
());
CHECK_EQ
(
1UL
,
inputs
.
size
());
CHECK
(
1UL
==
outputs
.
size
()
||
2UL
==
outputs
.
size
());
CHECK
(
inputs
[
0
].
isSequenceArg
()
&&
outputs
[
0
].
isSequenceArg
())
<<
"SequenceArg required here"
;
const
auto
in_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
inputs
[
0
]);
auto
out_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
outputs
[
0
]);
CHECK
(
in_seq
.
data
()
&&
in_seq
.
getSequenceId
().
data
());
CHECK_EQ
(
in_seq
.
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
in_seq
.
getSequenceId
().
shape
().
ndims
(),
(
size_t
)
1
);
CHECK_EQ
(
out_seq
.
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
out_seq
.
getSequenceId
().
shape
().
ndims
(),
(
size_t
)
1
);
CHECK_EQ
(
outputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
in_seq
.
shape
().
ndims
(),
2UL
);
CHECK_EQ
(
out_seq
.
shape
().
ndims
(),
2UL
);
CHECK_EQ
(
out_seq
.
getSequenceId
().
shape
().
ndims
(),
1UL
);
/// dim of input grad == dim of weight
CHECK_EQ
(
out_seq
.
shape
()[
1
],
outputs
[
1
].
shape
()[
1
]);
/// input and output grad has the same batch_size
CHECK_EQ
(
out_seq
.
shape
()[
0
],
in_seq
.
shape
()[
0
]);
/// dim of output grad = dim of input grad * context_length
CHECK_EQ
(
in_seq
.
shape
()[
1
],
out_seq
.
shape
()[
1
]
*
context_length_
);
CHECK_EQ
(
out_seq
.
getArgType
(),
ADD_TO
);
CHECK_EQ
(
outputs
[
1
].
getArgType
(),
ADD_TO
);
if
(
2UL
==
outputs
.
size
())
{
CHECK_EQ
(
outputs
[
1
].
shape
().
ndims
(),
2UL
);
/// dim of input grad == dim of weight
CHECK_EQ
(
out_seq
.
shape
()[
1
],
outputs
[
1
].
shape
()[
1
]);
CHECK_EQ
(
outputs
[
1
].
getArgType
(),
ADD_TO
);
}
const
auto
seq_vec
=
in_seq
.
getSequenceId
().
vector
<
int
,
Device
>
();
const
auto
out_grad_mat
=
in_seq
.
matrix
<
Device
>
();
auto
in_grad_mat
=
!
out_seq
.
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
out_seq
.
matrix
<
Device
>
();
auto
w_grad_mat
=
!
outputs
[
1
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
outputs
[
1
].
matrix
<
Device
>
();
auto
w_grad_mat
=
(
2UL
==
outputs
.
size
())
?
outputs
[
1
].
matrix
<
Device
>
()
:
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
);
ContextProjectionBackward
<
Device
>
(
out_grad_mat
,
in_grad_mat
,
w_grad_mat
,
...
...
@@ -304,17 +306,17 @@ public:
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
(
1
,
static_cast
<
int
>
(
inputs
.
size
()
));
CHECK_EQ
(
1
,
static_cast
<
int
>
(
outputs
.
size
()
));
CHECK_EQ
(
1
UL
,
inputs
.
size
(
));
CHECK_EQ
(
1
UL
,
outputs
.
size
(
));
CHECK
(
inputs
[
0
].
isSequenceArg
()
&&
outputs
[
0
].
isSequenceArg
())
<<
"SequenceArg required here"
;
const
auto
in_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
inputs
[
0
]);
const
auto
out_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
outputs
[
0
]);
CHECK
(
in_seq
.
data
()
&&
out_seq
.
data
()
&&
in_seq
.
getSequenceId
().
data
());
CHECK_EQ
(
static_cast
<
int
>
(
out_seq
.
shape
().
ndims
()),
2
);
CHECK_EQ
(
static_cast
<
int
>
(
in_seq
.
shape
().
ndims
()),
2
);
CHECK_EQ
(
static_cast
<
int
>
(
in_seq
.
getSequenceId
().
shape
().
ndims
()),
1
);
CHECK_EQ
(
out_seq
.
shape
().
ndims
(),
2UL
);
CHECK_EQ
(
in_seq
.
shape
().
ndims
(),
2UL
);
CHECK_EQ
(
in_seq
.
getSequenceId
().
shape
().
ndims
(),
1UL
);
/// output layer grad dim == input layer grad dim * context_length_
CHECK_EQ
(
in_seq
.
shape
().
ndims
(),
out_seq
.
shape
().
ndims
()
*
context_length_
);
/// input and output has the same batch_size
...
...
@@ -355,14 +357,14 @@ public:
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
(
1
,
static_cast
<
int
>
(
inputs
.
size
()
));
CHECK_EQ
(
1
,
static_cast
<
int
>
(
outputs
.
size
()
));
CHECK_EQ
(
1
UL
,
inputs
.
size
(
));
CHECK_EQ
(
1
UL
,
outputs
.
size
(
));
CHECK
(
inputs
[
0
].
isSequenceArg
())
<<
"SequenceArg required here"
;
const
auto
in_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
inputs
[
0
]);
CHECK
(
in_seq
.
data
()
&&
in_seq
.
getSequenceId
().
data
()
&&
outputs
[
0
].
data
());
CHECK_EQ
(
static_cast
<
int
>
(
outputs
[
0
].
shape
().
ndims
()),
2
);
CHECK_EQ
(
static_cast
<
int
>
(
in_seq
.
shape
().
ndims
()),
2
);
CHECK_EQ
(
static_cast
<
int
>
(
in_seq
.
getSequenceId
().
shape
().
ndims
()),
1
);
CHECK_EQ
(
outputs
[
0
].
shape
().
ndims
(),
2UL
);
CHECK_EQ
(
in_seq
.
shape
().
ndims
(),
2UL
);
CHECK_EQ
(
in_seq
.
getSequenceId
().
shape
().
ndims
(),
1UL
);
CHECK_EQ
(
in_seq
.
shape
()[
0
],
outputs
[
0
].
shape
()[
0
]);
/// output layer grad dim == weight dim * context_length_
CHECK_EQ
(
in_seq
.
shape
()[
1
],
outputs
[
0
].
shape
()[
1
]
*
context_length_
);
...
...
paddle/function/ContextProjectionOp.h
浏览文件 @
1251501c
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Function.h"
namespace
paddle
{
...
...
paddle/function/ContextProjectionOpTest.cpp
浏览文件 @
1251501c
...
...
@@ -28,55 +28,26 @@ void testMatrixProjectionForward(int context_start,
std
::
max
(
0
,
(
int
)(
context_start
+
context_length
-
1
));
if
(
pad
==
0
)
is_padding
=
false
;
FunctionCompare
compare
(
"ContextProjectionForward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
.
set
(
"context_start"
,
context_start
)
.
set
(
"begin_pad"
,
std
::
max
(
0
,
-
context_start
)));
CpuMatrix
cpu_in
(
batch_size
,
input_dim
);
cpu_in
.
randomizeUniform
();
GpuMatrix
gpu_in
(
batch_size
,
input_dim
);
gpu_in
.
copyFrom
(
cpu_in
);
auto
cpu_weight
=
is_padding
?
std
::
make_shared
<
CpuMatrix
>
(
pad
,
input_dim
)
:
nullptr
;
auto
gpu_weight
=
is_padding
?
std
::
make_shared
<
GpuMatrix
>
(
pad
,
input_dim
)
:
nullptr
;
if
(
is_padding
)
{
cpu_weight
->
randomizeUniform
();
gpu_weight
->
copyFrom
(
*
cpu_weight
);
FunctionCompare
test
(
"ContextProjectionForward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
.
set
(
"context_start"
,
context_start
)
.
set
(
"begin_pad"
,
std
::
max
(
0
,
-
context_start
)));
// prepare input arguments
test
.
addSequence
(
SequenceIdArg
(
TensorShape
{
batch_size
}));
test
.
addInputs
(
SequenceArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
batch_size
,
input_dim
}));
if
(
is_padding
)
{
// weight
test
.
addInputs
(
SequenceArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
pad
,
input_dim
}));
}
IVectorPtr
cpu_seq
;
generateSequenceStartPositions
(
batch_size
,
cpu_seq
);
IVectorPtr
gpu_seq
=
IVector
::
create
(
cpu_seq
->
getSize
(),
true
);
gpu_seq
->
copyFrom
(
*
cpu_seq
);
CpuMatrix
cpu_out
(
batch_size
,
input_dim
*
context_length
);
GpuMatrix
gpu_out
(
batch_size
,
input_dim
*
context_length
);
cpu_out
.
randomizeUniform
();
gpu_out
.
copyFrom
(
cpu_out
);
BufferArgs
cpu_inputs
;
BufferArgs
cpu_outputs
;
cpu_inputs
.
addArg
(
cpu_in
,
*
cpu_seq
);
if
(
cpu_weight
)
{
cpu_inputs
.
addArg
(
*
cpu_weight
,
*
cpu_seq
);
}
cpu_outputs
.
addArg
(
cpu_out
,
*
cpu_seq
,
ADD_TO
);
compare
.
getCpuFunction
()
->
calc
(
cpu_inputs
,
cpu_outputs
);
test
.
addOutputs
(
SequenceArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
batch_size
,
input_dim
*
context_length
}),
ADD_TO
);
BufferArgs
gpu_inputs
;
BufferArgs
gpu_outputs
;
gpu_inputs
.
addArg
(
gpu_in
,
*
gpu_seq
);
if
(
gpu_weight
)
{
gpu_inputs
.
addArg
(
*
gpu_weight
,
*
gpu_seq
);
}
gpu_outputs
.
addArg
(
gpu_out
,
*
gpu_seq
,
ADD_TO
);
compare
.
getGpuFunction
()
->
calc
(
gpu_inputs
,
gpu_outputs
);
autotest
::
TensorCheckEqual
(
cpu_out
,
gpu_out
);
// run Function
test
.
run
();
}
void
testMatrixProjectionBackward
(
int
context_start
,
...
...
@@ -88,63 +59,31 @@ void testMatrixProjectionBackward(int context_start,
std
::
max
(
0
,
(
int
)(
context_start
+
context_length
-
1
));
if
(
pad
==
0
)
is_padding
=
false
;
FunctionCompare
compare
(
"ContextProjectionBackward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
.
set
(
"context_start"
,
context_start
)
.
set
(
"begin_pad"
,
std
::
max
(
0
,
-
context_start
))
.
set
(
"is_padding"
,
is_padding
)
.
set
(
"total_pad"
,
pad
));
CpuMatrix
cpu_in_grad
(
batch_size
,
input_dim
);
cpu_in_grad
.
randomizeUniform
();
GpuMatrix
gpu_in_grad
(
batch_size
,
input_dim
);
gpu_in_grad
.
copyFrom
(
cpu_in_grad
);
CpuMatrix
cpu_out_grad
(
batch_size
,
input_dim
*
context_length
);
cpu_out_grad
.
randomizeUniform
();
GpuMatrix
gpu_out_grad
(
batch_size
,
input_dim
*
context_length
);
gpu_out_grad
.
copyFrom
(
cpu_out_grad
);
IVectorPtr
cpu_seq
;
generateSequenceStartPositions
(
batch_size
,
cpu_seq
);
IVectorPtr
gpu_seq
=
IVector
::
create
(
cpu_seq
->
getSize
(),
true
);
gpu_seq
->
copyFrom
(
*
cpu_seq
);
auto
cpu_w_grad
=
is_padding
?
std
::
make_shared
<
CpuMatrix
>
(
pad
,
input_dim
)
:
nullptr
;
auto
gpu_w_grad
=
is_padding
?
std
::
make_shared
<
GpuMatrix
>
(
pad
,
input_dim
)
:
nullptr
;
if
(
is_padding
)
{
cpu_w_grad
->
randomizeUniform
();
gpu_w_grad
->
copyFrom
(
*
cpu_w_grad
);
FunctionCompare
test
(
"ContextProjectionBackward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
.
set
(
"context_start"
,
context_start
)
.
set
(
"begin_pad"
,
std
::
max
(
0
,
-
context_start
))
.
set
(
"is_padding"
,
is_padding
)
.
set
(
"total_pad"
,
pad
));
// prepare input arguments
test
.
addSequence
(
SequenceIdArg
(
TensorShape
{
batch_size
}));
test
.
addInputs
(
SequenceArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
batch_size
,
input_dim
*
context_length
}));
test
.
addOutputs
(
SequenceArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
batch_size
,
input_dim
}),
ADD_TO
);
if
(
is_padding
)
{
// weight
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
TensorShape
{
pad
,
input_dim
}),
ADD_TO
);
}
BufferArgs
cpu_inputs
;
BufferArgs
cpu_outputs
;
cpu_inputs
.
addArg
(
cpu_out_grad
,
*
cpu_seq
);
cpu_outputs
.
addArg
(
cpu_in_grad
,
*
cpu_seq
,
ADD_TO
);
cpu_outputs
.
addArg
(
cpu_w_grad
?
*
cpu_w_grad
:
CpuMatrix
(
nullptr
,
0
,
input_dim
),
ADD_TO
);
compare
.
getCpuFunction
()
->
calc
(
cpu_inputs
,
cpu_outputs
);
BufferArgs
gpu_inputs
;
BufferArgs
gpu_outputs
;
gpu_inputs
.
addArg
(
gpu_out_grad
,
*
gpu_seq
);
gpu_outputs
.
addArg
(
gpu_in_grad
,
*
gpu_seq
,
ADD_TO
);
gpu_outputs
.
addArg
(
gpu_w_grad
?
*
gpu_w_grad
:
GpuMatrix
(
nullptr
,
0
,
input_dim
),
ADD_TO
);
compare
.
getGpuFunction
()
->
calc
(
gpu_inputs
,
gpu_outputs
);
autotest
::
TensorCheckErr
(
cpu_in_grad
,
gpu_in_grad
);
if
(
is_padding
)
{
autotest
::
TensorCheckErr
(
*
cpu_w_grad
,
*
gpu_w_grad
);
}
// run Function
test
.
run
();
}
TEST
(
ContextProjection
,
p
rojection
)
{
TEST
(
ContextProjection
,
P
rojection
)
{
for
(
auto
context_start
:
{
-
5
,
-
3
,
-
1
,
0
,
3
})
{
for
(
auto
context_length
:
{
1
,
2
,
5
,
7
})
{
for
(
auto
trainable_padding
:
{
false
,
true
})
{
...
...
paddle/function/FunctionTest.h
浏览文件 @
1251501c
...
...
@@ -69,6 +69,54 @@ public:
gpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
}
// assume one copy of sequence is shared by different SequenceArgs
void
addSequence
(
const
SequenceIdArg
&
input
)
{
CHECK_EQ
(
input
.
shape
().
ndims
(),
1UL
);
size_t
batchSize
=
input
.
shape
()[
0
];
size_t
numSeqs
=
batchSize
/
10
+
1
;
size_t
sizeId
=
(
numSeqs
+
1
)
*
sizeOfValuType
(
VALUE_TYPE_INT32
);
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
sizeId
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
sizeId
));
cpuSeq_
=
std
::
make_shared
<
SequenceIdArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
TensorShape
{
numSeqs
+
1
});
gpuSeq_
=
std
::
make_shared
<
SequenceIdArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
TensorShape
{
numSeqs
+
1
});
/// init sequence Id
initArg
(
*
cpuSeq_
,
batchSize
);
// todo(tianbing), delete it
CHECK_EQ
(
cpuSeq_
->
shape
().
getElements
(),
cpuSeq_
->
numSeqs
()
+
1
);
CpuIVector
cpuSeq
(
cpuSeq_
->
shape
().
getElements
(),
(
int
*
)
cpuSeq_
->
data
());
GpuIVector
gpuSeq
(
gpuSeq_
->
shape
().
getElements
(),
(
int
*
)
gpuSeq_
->
data
());
gpuSeq
.
copyFrom
(
cpuSeq
);
}
void
addInputs
(
const
SequenceArg
&
input
)
{
CHECK_EQ
(
input
.
shape
().
ndims
(),
2UL
);
size_t
batchSize
=
input
.
shape
()[
0
];
if
(
!
cpuSeq_
||
!
gpuSeq_
)
{
// sequence not exist
addSequence
(
SequenceIdArg
(
TensorShape
{
batchSize
}));
}
size_t
size
=
input
.
shape
().
getElements
()
*
sizeOfValuType
(
input
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
/// SequenceArg
cpuInputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
(),
*
cpuSeq_
));
gpuInputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
(),
*
gpuSeq_
));
}
// output need only contains shape, do not contains data.
void
addOutputs
(
const
BufferArg
&
output
,
ArgType
argType
=
ASSIGN_TO
)
{
size_t
size
=
...
...
@@ -86,6 +134,7 @@ public:
output
.
valueType
(),
output
.
shape
(),
argType
));
<<<<<<<
HEAD
}
/// add and init output sparse matrix
...
...
@@ -116,24 +165,31 @@ public:
std
::
make_shared
<
SparseMatrixArg
>
(
*
gpuSparse_
,
argType
));
}
void
addInputs
(
const
SequenceArg
&
input
)
{
size_t
batchSize
=
input
.
shape
()[
0
];
size_t
numSeqs
=
batchSize
/
10
+
1
;
size_t
sizeId
=
(
numSeqs
+
1
)
*
sizeOfValuType
(
VALUE_TYPE_INT32
);
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
sizeId
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
sizeId
));
TensorShape
seqsId
({
numSeqs
+
1
});
// void* cpuBuffer = cpuMemory_.back()->getBuf();
// void* gpuBuffer = gpuMemory_.back()->getBuf();
void
addOutputs
(
const
SequenceArg
&
output
,
ArgType
argType
=
ASSIGN_TO
)
{
CHECK_EQ
(
output
.
shape
().
ndims
(),
2UL
);
size_t
batchSize
=
output
.
shape
()[
0
];
if
(
!
cpuSeq_
||
!
gpuSeq_
)
{
// sequence not exist
addSequence
(
SequenceIdArg
(
TensorShape
{
batchSize
}));
}
size_t
size
=
input
.
shape
().
getElements
()
*
sizeOfValuType
(
in
put
.
valueType
());
output
.
shape
().
getElements
()
*
sizeOfValuType
(
out
put
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
// TODO: need be implemented.
/// SequenceArg
cpuOutputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
*
cpuSeq_
,
argType
));
gpuOutputs_
.
emplace_back
(
std
::
make_shared
<
SequenceArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
*
gpuSeq_
,
argType
));
}
void
addInputs
(
const
SparseMatrixArg
&
input
)
{
...
...
@@ -193,14 +249,44 @@ public:
std
::
shared_ptr
<
FunctionBase
>
getGpuFunction
()
const
{
return
gpuFunc_
;
}
protected:
// only init cpu argument, gpu argument copy from cpu argument.
void
initArg
(
BufferArg
&
arg
)
{
CpuVector
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
vector
.
uniform
(
0.001
,
1
);
}
void
initArg
(
SequenceArg
&
arg
)
{
/// init only matrix
CpuVector
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
vector
.
uniform
(
0.001
,
1
);
}
void
initArg
(
SequenceIdArg
&
arg
,
size_t
batchSize
)
{
size_t
numSeqs
=
arg
.
numSeqs
();
int
*
buf
=
reinterpret_cast
<
int
*>
(
arg
.
data
());
int
pos
=
0
;
size_t
maxLen
=
2
*
batchSize
/
numSeqs
;
for
(
int
i
=
0
;
i
<
(
int
)
numSeqs
;
++
i
)
{
int
len
=
1
+
uniformRandom
(
std
::
min
<
int64_t
>
(
maxLen
,
batchSize
-
pos
-
numSeqs
+
i
));
buf
[
i
]
=
pos
;
pos
+=
len
;
VLOG
(
1
)
<<
" len="
<<
len
;
}
buf
[
numSeqs
]
=
batchSize
;
}
void
initInputs
()
{
for
(
size_t
i
=
0
;
i
<
cpuInputs_
.
size
();
i
++
)
{
if
(
cpuInputs_
[
i
]
->
isSparseArg
())
{
continue
;
/// sparse matrix already init
}
initArg
(
*
cpuInputs_
[
i
]);
if
(
cpuInputs_
[
i
]
->
isSequenceArg
())
{
initArg
(
dynamic_cast
<
SequenceArg
&>
(
*
cpuInputs_
[
i
]));
}
else
{
initArg
(
*
cpuInputs_
[
i
]);
}
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector
cpuVector
(
cpuInputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
cpuInputs_
[
i
]
->
data
());
...
...
@@ -217,7 +303,11 @@ protected:
continue
;
/// sparse matrix already init
}
initArg
(
*
cpuOutputs_
[
i
]);
if
(
cpuOutputs_
[
i
]
->
isSequenceArg
())
{
initArg
(
dynamic_cast
<
SequenceArg
&>
(
*
cpuOutputs_
[
i
]));
}
else
{
initArg
(
*
cpuOutputs_
[
i
]);
}
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector
cpuVector
(
cpuOutputs_
[
i
]
->
shape
().
getElements
(),
...
...
@@ -241,28 +331,6 @@ protected:
}
}
// only init cpu argument, gpu argument copy from cpu argument.
void
initArg
(
BufferArg
&
arg
)
{
CpuVector
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
vector
.
uniform
(
0.001
,
1
);
}
void
initArg
(
SequenceIdArg
&
arg
,
size_t
batchSize
)
{
size_t
numSeqs
=
arg
.
numSeqs
();
int
*
buf
=
reinterpret_cast
<
int
*>
(
arg
.
data
());
int
pos
=
0
;
size_t
maxLen
=
2
*
batchSize
/
numSeqs
;
for
(
int
i
=
0
;
i
<
(
int
)
numSeqs
;
++
i
)
{
int
len
=
uniformRandom
(
std
::
min
<
int64_t
>
(
maxLen
,
batchSize
-
pos
-
numSeqs
+
i
))
+
1
;
buf
[
i
]
=
pos
;
pos
+=
len
;
VLOG
(
1
)
<<
" len="
<<
len
;
}
buf
[
numSeqs
]
=
batchSize
;
}
protected:
std
::
shared_ptr
<
FunctionBase
>
cpuFunc_
;
std
::
shared_ptr
<
FunctionBase
>
gpuFunc_
;
...
...
@@ -274,6 +342,8 @@ protected:
std
::
vector
<
BufferArgPtr
>
gpuOutputs_
;
std
::
shared_ptr
<
CpuSparseMatrix
>
cpuSparse_
;
std
::
shared_ptr
<
GpuSparseMatrix
>
gpuSparse_
;
std
::
shared_ptr
<
SequenceIdArg
>
cpuSeq_
;
std
::
shared_ptr
<
SequenceIdArg
>
gpuSeq_
;
};
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录