Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
479e4a50
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
479e4a50
编写于
10月 02, 2017
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'feature/simplify_sum_op' into feature/grad_reg_mechanism_cont2
上级
ff8766e9
297ba1a3
变更
18
显示空白变更内容
内联
并排
Showing
18 changed file
with
656 addition
and
161 deletion
+656
-161
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+3
-0
paddle/framework/block_desc.h
paddle/framework/block_desc.h
+3
-3
paddle/framework/op_info.h
paddle/framework/op_info.h
+3
-5
paddle/framework/program_desc.h
paddle/framework/program_desc.h
+3
-3
paddle/framework/scope.h
paddle/framework/scope.h
+3
-5
paddle/framework/tensor_array.cc
paddle/framework/tensor_array.cc
+283
-0
paddle/framework/tensor_array.h
paddle/framework/tensor_array.h
+110
-0
paddle/framework/tensor_array_test.cc
paddle/framework/tensor_array_test.cc
+130
-0
paddle/operators/CMakeLists.txt
paddle/operators/CMakeLists.txt
+5
-1
paddle/operators/recurrent_op.cc
paddle/operators/recurrent_op.cc
+35
-39
paddle/operators/recurrent_op.h
paddle/operators/recurrent_op.h
+4
-6
paddle/operators/rnn/recurrent_op_utils.cc
paddle/operators/rnn/recurrent_op_utils.cc
+31
-40
paddle/operators/rnn/recurrent_op_utils.h
paddle/operators/rnn/recurrent_op_utils.h
+3
-3
paddle/operators/sum_op.cc
paddle/operators/sum_op.cc
+18
-16
paddle/operators/sum_op.cu
paddle/operators/sum_op.cu
+1
-3
paddle/operators/sum_op.h
paddle/operators/sum_op.h
+0
-19
paddle/platform/macros.h
paddle/platform/macros.h
+6
-4
python/paddle/v2/framework/tests/test_recurrent_op.py
python/paddle/v2/framework/tests/test_recurrent_op.py
+15
-14
未找到文件。
paddle/framework/CMakeLists.txt
浏览文件 @
479e4a50
...
...
@@ -41,3 +41,6 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library
(
backward SRCS backward.cc DEPS net_op
)
cc_test
(
backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context
)
cc_library
(
tensor_array SRCS tensor_array.cc DEPS lod_tensor
)
cc_test
(
tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place
)
paddle/framework/block_desc.h
浏览文件 @
479e4a50
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include <vector>
#include "paddle/framework/op_desc.h"
#include "paddle/framework/var_desc.h"
#include "paddle/platform/macros.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -34,9 +35,6 @@ class BlockDescBind {
BlockDescBind
(
ProgramDescBind
*
prog
,
BlockDesc
*
desc
)
:
prog_
(
prog
),
desc_
(
desc
),
need_update_
(
false
)
{}
BlockDescBind
(
const
BlockDescBind
&
o
)
=
delete
;
BlockDescBind
&
operator
=
(
const
BlockDescBind
&
o
)
=
delete
;
int32_t
ID
()
const
{
return
desc_
->
idx
();
}
int32_t
Parent
()
const
{
return
desc_
->
parent_idx
();
}
...
...
@@ -66,6 +64,8 @@ class BlockDescBind {
std
::
deque
<
std
::
unique_ptr
<
OpDescBind
>>
ops_
;
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
VarDescBind
>>
vars_
;
DISABLE_COPY_AND_ASSIGN
(
BlockDescBind
);
};
}
// namespace framework
}
// namespace paddle
paddle/framework/op_info.h
浏览文件 @
479e4a50
...
...
@@ -20,6 +20,7 @@
#include "paddle/framework/attribute.h"
#include "paddle/framework/op_desc.h"
#include "paddle/framework/type_defs.h"
#include "paddle/platform/macros.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -53,11 +54,6 @@ class OpInfoMap {
public:
static
OpInfoMap
&
Instance
();
OpInfoMap
(
const
OpInfoMap
&
o
)
=
delete
;
OpInfoMap
(
OpInfoMap
&&
o
)
=
delete
;
OpInfoMap
&
operator
=
(
const
OpInfoMap
&
o
)
=
delete
;
OpInfoMap
&
operator
=
(
OpInfoMap
&&
o
)
=
delete
;
bool
Has
(
const
std
::
string
&
op_type
)
const
{
return
map_
.
find
(
op_type
)
!=
map_
.
end
();
}
...
...
@@ -93,6 +89,8 @@ class OpInfoMap {
private:
OpInfoMap
()
=
default
;
std
::
unordered_map
<
std
::
string
,
const
OpInfo
>
map_
;
DISABLE_COPY_AND_ASSIGN
(
OpInfoMap
);
};
}
// namespace framework
...
...
paddle/framework/program_desc.h
浏览文件 @
479e4a50
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <vector>
#include "paddle/framework/framework.pb.h"
#include "paddle/platform/macros.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -26,9 +27,6 @@ class ProgramDescBind {
public:
static
ProgramDescBind
&
Instance
(
ProgramDesc
*
prog
);
ProgramDescBind
(
const
ProgramDescBind
&
o
)
=
delete
;
ProgramDescBind
&
operator
=
(
const
ProgramDescBind
&
o
)
=
delete
;
BlockDescBind
*
AppendBlock
(
const
BlockDescBind
&
parent
);
BlockDescBind
*
Block
(
size_t
idx
)
{
return
blocks_
[
idx
].
get
();
}
...
...
@@ -46,6 +44,8 @@ class ProgramDescBind {
ProgramDesc
*
prog_
;
std
::
vector
<
std
::
unique_ptr
<
BlockDescBind
>>
blocks_
;
DISABLE_COPY_AND_ASSIGN
(
ProgramDescBind
);
};
}
// namespace framework
}
// namespace paddle
paddle/framework/scope.h
浏览文件 @
479e4a50
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include <unordered_map>
#include "paddle/framework/variable.h"
#include "paddle/platform/macros.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -38,11 +39,6 @@ class Scope {
Scope
()
{}
~
Scope
();
// Disable Copy, Assign, Move.
Scope
(
const
Scope
&
other
)
=
delete
;
Scope
&
operator
=
(
const
Scope
&
other
)
=
delete
;
Scope
(
Scope
&&
other
)
=
delete
;
/// Create a sub-scope. Returns a reference other than a pointer so
/// to prevent from manual deletion.
/// Mark it to const because that new kid scope cannot change parent scope.
...
...
@@ -73,6 +69,8 @@ class Scope {
std
::
unordered_map
<
std
::
string
,
Variable
*>
vars_
;
mutable
std
::
list
<
Scope
*>
kids_
;
Scope
const
*
parent_
{
nullptr
};
DISABLE_COPY_AND_ASSIGN
(
Scope
);
};
}
// namespace framework
...
...
paddle/framework/tensor_array.cc
0 → 100644
浏览文件 @
479e4a50
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/tensor_array.h"
#include <glog/logging.h>
#include <algorithm>
#include <limits>
namespace
paddle
{
namespace
framework
{
namespace
detail
{
/*
* Offer an iterator over the length-sorted lod-tensor's top level. The top
* level of a lod-tensor stores batch-size of sequences, each top-level sequence
* may contains several lower-level sequences, sort top-level lod by the numbers
* of lower-level sequences in descending order, so that during RNN's running,
* the batch-size will keep decreasing, the short sentences will end at the tail
* of each batch.
*
* Let's take a simple lod-tensor for example
*
* |(0) |(1) top-level has two instances
* ||| ||||| lower-level
*
* sort by lower-level's length
*
* |(1) |(0)
* ||||| |||
*
* when RNN runs, it get 5 batches (equals the number of elements the longest
* sequence has)
*
* |||||
* |||
*
* the first three batches has two elements, the last two elements just has 1
* element each.
*/
struct
DynamicBatchUnpacker
{
using
value_type
=
float
;
DynamicBatchUnpacker
(
const
LoDTensor
&
source
,
size_t
level
,
bool
descend
=
true
)
:
source
(
&
source
),
level
(
level
)
{
BuildLengthSortedMeta
(
descend
);
}
LoDTensor
GetBatch
(
size_t
index
);
std
::
vector
<
DySeqMeta
>
meta
;
LoDTensor
const
*
source
;
size_t
level
;
protected:
void
BuildLengthSortedMeta
(
bool
descend
);
};
LoDTensor
PackDynamicBatch
(
const
std
::
vector
<
LoDTensor
>&
source
,
const
std
::
vector
<
DySeqMeta
>&
meta
,
const
LoD
&
lod
,
size_t
level
);
}
// namespace detail
const
LoDTensor
&
TensorArray
::
Read
(
size_t
index
)
const
{
PADDLE_ENFORCE_LE
(
index
,
MAX_SIZE
,
"index[%d] too large"
,
index
);
if
(
index
>=
size
())
{
values_
.
resize
(
index
+
1
);
}
return
values_
[
index
];
}
void
TensorArray
::
Write
(
size_t
index
,
const
LoDTensor
&
value
)
{
PADDLE_ENFORCE_LE
(
index
,
MAX_SIZE
,
"index[%d] too large"
,
index
);
if
(
index
>=
size
())
{
values_
.
resize
(
index
+
1
);
}
values_
[
index
].
Resize
(
value
.
dims
());
values_
[
index
].
mutable_data
<
value_type
>
(
platform
::
CPUPlace
());
values_
[
index
].
CopyFrom
<
value_type
>
(
value
,
platform
::
CPUPlace
());
}
void
TensorArray
::
WriteShared
(
size_t
index
,
const
LoDTensor
&
value
)
{
PADDLE_ENFORCE_LE
(
index
,
MAX_SIZE
,
"index[%d] too large"
,
index
);
if
(
index
>=
size
())
{
values_
.
resize
(
index
+
1
);
}
values_
[
index
].
ShareDataWith
<
value_type
>
(
value
);
}
LoDTensor
TensorArray
::
Pack
(
size_t
level
,
const
std
::
vector
<
DySeqMeta
>&
meta
,
const
LoD
&
lod
)
const
{
return
detail
::
PackDynamicBatch
(
values_
,
meta
,
lod
,
level
);
}
std
::
vector
<
DySeqMeta
>
TensorArray
::
Unpack
(
const
LoDTensor
&
source
,
int
level
,
bool
length_desend
)
{
detail
::
DynamicBatchUnpacker
unpacker
(
source
,
level
,
length_desend
/*descend*/
);
// find max length of all the sequences
size_t
max_length
=
0
;
for
(
const
auto
&
seq
:
unpacker
.
meta
)
{
max_length
=
std
::
max
(
max_length
,
seq
.
end
-
seq
.
begin
);
}
// write batches to values
for
(
size_t
batch_id
=
0
;
batch_id
<
max_length
;
batch_id
++
)
{
Write
(
batch_id
,
unpacker
.
GetBatch
(
batch_id
));
}
return
unpacker
.
meta
;
}
LoDTensor
TensorArray
::
Stack
()
const
{
LoDTensor
result
;
if
(
size
()
==
0
)
return
result
;
const
auto
&
first_dims
=
values_
.
front
().
dims
();
// check all the values have the same shape
// TODO(superjom) check the same dtypes
for
(
size_t
idx
=
1
;
idx
<
size
();
idx
++
)
{
const
auto
&
value_dims
=
values_
[
idx
].
dims
();
PADDLE_ENFORCE_EQ
(
first_dims
,
value_dims
);
}
// copy
auto
result_dims
=
vectorize
(
first_dims
);
result_dims
.
insert
(
result_dims
.
begin
(),
size
());
result
.
Resize
(
make_ddim
(
result_dims
));
result
.
mutable_data
<
value_type
>
(
platform
::
CPUPlace
());
for
(
size_t
idx
=
0
;
idx
<
size
();
idx
++
)
{
result
.
Slice
<
value_type
>
(
idx
,
idx
+
1
)
.
CopyFrom
<
value_type
>
(
Read
(
idx
),
platform
::
CPUPlace
());
}
return
result
;
}
void
TensorArray
::
Unstack
(
const
LoDTensor
&
source
)
const
{
Unstack
(
source
,
false
/*data_shared*/
);
}
void
TensorArray
::
UnstackShared
(
const
LoDTensor
&
source
)
const
{
Unstack
(
source
,
true
/*data_shared*/
);
}
void
TensorArray
::
Unstack
(
const
LoDTensor
&
source
,
bool
data_shared
)
const
{
size_t
first_dim
=
source
.
dims
()[
0
];
DDim
value_dims
=
slice_ddim
(
source
.
dims
(),
1
,
source
.
dims
().
size
());
PADDLE_ENFORCE_GT
(
first_dim
,
0
,
"source should have some data to be unstacked"
);
values_
.
resize
(
first_dim
);
for
(
size_t
elem
=
0
;
elem
<
first_dim
;
elem
++
)
{
// create a new value
auto
&
value
=
values_
[
elem
];
if
(
data_shared
)
{
// share memory
value
.
ShareDataWith
<
value_type
>
(
source
.
Slice
<
value_type
>
(
elem
,
elem
+
1
));
}
else
{
// copy
value
.
Resize
(
value_dims
);
value
.
CopyFrom
<
value_type
>
(
source
.
Slice
<
value_type
>
(
elem
,
elem
+
1
),
platform
::
CPUPlace
());
}
}
}
size_t
TensorArray
::
size
()
const
{
return
values_
.
size
();
}
namespace
detail
{
void
DynamicBatchUnpacker
::
BuildLengthSortedMeta
(
bool
descend
)
{
PADDLE_ENFORCE
(
meta
.
empty
(),
"duplicate build meta"
);
// collect meta for each sequence in some level
auto
lod
=
SliceLevels
(
source
->
lod
(),
level
,
level
+
1
)[
0
];
for
(
size_t
seq_id
=
0
;
seq_id
<
lod
.
size
()
-
1
;
seq_id
++
)
{
DySeqMeta
seq_meta
({
lod
[
seq_id
],
lod
[
seq_id
+
1
],
seq_id
});
meta
.
push_back
(
seq_meta
);
}
PADDLE_ENFORCE_GT
(
meta
.
size
(),
0
,
"meta is empty"
);
// sort by length
sort
(
meta
.
begin
(),
meta
.
end
(),
[
descend
](
const
DySeqMeta
&
a
,
const
DySeqMeta
&
b
)
{
bool
a_ge_b
=
(
a
.
end
-
a
.
begin
)
>
(
b
.
end
-
b
.
begin
);
return
descend
?
a_ge_b
:
!
a_ge_b
;
});
}
LoDTensor
DynamicBatchUnpacker
::
GetBatch
(
size_t
index
)
{
PADDLE_ENFORCE
(
!
meta
.
empty
(),
"should build meta first"
);
LoDTensor
result
;
// collect indice need to copy to the batch
std
::
vector
<
size_t
>
indice
;
for
(
size_t
seq_id
=
0
;
seq_id
<
meta
.
size
();
seq_id
++
)
{
const
auto
&
seq_meta
=
meta
[
seq_id
];
if
(
index
>=
seq_meta
.
end
)
break
;
indice
.
push_back
(
seq_meta
.
begin
+
index
);
}
PADDLE_ENFORCE
(
!
indice
.
empty
(),
"invalid batch at %d"
,
index
);
// copy the indice of records in LoDTensor
auto
record_dims
=
slice_ddim
(
source
->
dims
(),
1
,
source
->
dims
().
size
());
auto
record_dims_vec
=
vectorize
(
record_dims
);
record_dims_vec
.
insert
(
record_dims_vec
.
begin
(),
indice
.
size
());
result
.
Resize
(
make_ddim
(
record_dims_vec
));
result
.
mutable_data
<
value_type
>
(
platform
::
CPUPlace
());
for
(
size_t
i
=
0
;
i
<
indice
.
size
()
-
1
;
i
++
)
{
auto
index
=
indice
[
i
];
auto
target
=
result
.
Slice
<
value_type
>
(
i
,
i
+
1
);
auto
source_
=
source
->
Slice
<
value_type
>
(
index
,
index
+
1
);
target
.
CopyFrom
<
value_type
>
(
source_
,
platform
::
CPUPlace
());
}
return
result
;
}
LoDTensor
PackDynamicBatch
(
const
std
::
vector
<
LoDTensor
>&
source
,
const
std
::
vector
<
DySeqMeta
>&
meta
,
const
LoD
&
lod
,
size_t
level
)
{
PADDLE_ENFORCE
(
!
source
.
empty
());
PADDLE_ENFORCE
(
!
meta
.
empty
());
PADDLE_ENFORCE
(
!
lod
.
empty
());
LoDTensor
result
;
// init result space
auto
record_dims
=
slice_ddim
(
source
[
0
].
dims
(),
1
,
source
[
0
].
dims
().
size
());
auto
record_dims_vec
=
vectorize
(
record_dims
);
auto
height
=
lod
[
level
].
back
();
record_dims_vec
.
insert
(
record_dims_vec
.
begin
(),
height
);
result
.
Resize
(
make_ddim
(
record_dims_vec
));
result
.
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
size_t
batch_id
=
0
;
batch_id
<
source
.
size
();
batch_id
++
)
{
for
(
size_t
seq_id
=
0
;
seq_id
<
meta
.
size
();
seq_id
++
)
{
const
auto
&
seq_meta
=
meta
[
seq_id
];
// source is source[batch_id][seq_id]
// target is result[index]
auto
index
=
seq_meta
.
begin
+
batch_id
;
if
(
index
>=
seq_meta
.
end
)
break
;
auto
source_
=
source
[
batch_id
].
Slice
<
float
>
(
seq_id
,
seq_id
+
1
);
auto
target
=
result
.
Slice
<
float
>
(
index
,
index
+
1
);
target
.
CopyFrom
<
float
>
(
source_
,
platform
::
CPUPlace
());
}
}
result
.
set_lod
(
lod
);
return
result
;
}
}
// namespace detail
}
// namespace framework
}
// namespace paddle
paddle/framework/tensor_array.h
0 → 100644
浏览文件 @
479e4a50
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/framework/lod_tensor.h"
namespace
paddle
{
namespace
framework
{
/*
* DyBatchSeqPosition stores indices of the basic element in tensor. It is used
* after lod-tensor's re-assembling, its info can be used to recover the order
* in original lod-tensor.
*/
struct
DySeqMeta
{
size_t
begin
;
size_t
end
;
// not included
size_t
ori_idx
;
};
/*
* TensorArray is a C-array-like array of tensors, it is meant to be used with
* dynamic iteration primitives such as while_loop. It is used to segment inputs
* and store states in all time steps.
*
* By providing some methods similar to a C++ array, the difinition of some
* state-based dynamic models such as RNN cound be more natural and highly
* flexible.
*/
class
TensorArray
{
public:
using
value_type
=
float
;
// max number of values allowed to store.
const
size_t
MAX_SIZE
{
100000
};
/*
* Read the value at location `index` in the `TensorArray`.
*/
const
LoDTensor
&
Read
(
size_t
index
)
const
;
/*
* Write value into the index of the TensorArray.
*/
void
Write
(
size_t
index
,
const
LoDTensor
&
value
);
/*
* Write value into the index of the TensorArray, with memory shared.
*/
void
WriteShared
(
size_t
index
,
const
LoDTensor
&
value
);
/*
* Recover the original LoD-arranged LoDTensor with the `values`, `level` and
* `indice_map`.
*/
LoDTensor
Pack
(
size_t
level
,
const
std
::
vector
<
DySeqMeta
>
&
meta
,
const
LoD
&
lod
)
const
;
/*
* Split LoDTensor in some `level` and write the generated batches to
* `values`, if set `desend`, will sort by length in descending order else in
* ascending order.
*/
std
::
vector
<
DySeqMeta
>
Unpack
(
const
LoDTensor
&
source
,
int
level
,
bool
length_desend
);
/*
* Pack the values into a tensor with rank one higher than each tensor in
* values.
*/
LoDTensor
Stack
()
const
;
/*
* Unpacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors.
*/
void
Unstack
(
const
LoDTensor
&
source
)
const
;
/*
* Unpacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors,
* with memory of tensors shared.
*/
void
UnstackShared
(
const
LoDTensor
&
source
)
const
;
/*
* Return the number of values.
*/
size_t
size
()
const
;
protected:
void
Unstack
(
const
LoDTensor
&
source
,
bool
data_shared
)
const
;
private:
mutable
std
::
vector
<
LoDTensor
>
values_
;
};
// class TensorArray
}
// namespace framework
}
// namespace paddle
paddle/framework/tensor_array_test.cc
0 → 100644
浏览文件 @
479e4a50
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/tensor_array.h"
#include <gtest/gtest.h>
namespace
paddle
{
namespace
framework
{
class
TensorArrayTester
:
public
::
testing
::
Test
{
protected:
void
SetUp
()
override
{
LoDTensor
source
;
source
.
Resize
(
make_ddim
({
batch_size
,
dim
}));
int
*
data
=
source
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
16
*
32
;
i
++
)
{
data
[
i
]
=
i
;
}
ta
.
Unstack
(
source
);
}
TensorArray
ta
;
const
int
batch_size
=
16
;
const
int
dim
=
32
;
};
TEST_F
(
TensorArrayTester
,
Read
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
const
auto
&
tensor
=
ta
.
Read
(
i
);
ASSERT_EQ
(
tensor
.
dims
()[
0
],
1
);
ASSERT_EQ
(
tensor
.
dims
()[
1
],
dim
);
}
}
TEST_F
(
TensorArrayTester
,
Write
)
{
LoDTensor
source
;
source
.
Resize
(
make_ddim
({
1
,
dim
}));
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
*
(
source
.
mutable_data
<
int
>
(
platform
::
CPUPlace
())
+
i
)
=
i
;
}
ta
.
Write
(
2
,
source
);
const
auto
&
tensor
=
ta
.
Read
(
2
);
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
EXPECT_EQ
(
*
(
tensor
.
data
<
int
>
()
+
i
),
*
(
source
.
data
<
int
>
()
+
i
));
}
}
TEST_F
(
TensorArrayTester
,
WriteShared
)
{
LoDTensor
source
;
source
.
Resize
(
make_ddim
({
1
,
dim
}));
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
*
(
source
.
mutable_data
<
int
>
(
platform
::
CPUPlace
())
+
i
)
=
i
;
}
ta
.
WriteShared
(
2
,
source
);
const
auto
&
tensor
=
ta
.
Read
(
2
);
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
EXPECT_EQ
(
*
(
tensor
.
data
<
int
>
()
+
i
),
*
(
source
.
data
<
int
>
()
+
i
));
}
EXPECT_EQ
(
source
.
data
<
int
>
(),
tensor
.
data
<
int
>
());
}
class
TensorArrayPackTester
:
public
::
testing
::
Test
{
protected:
virtual
void
SetUp
()
override
{
lod
.
push_back
(
std
::
vector
<
size_t
>
{
0
,
2
,
9
,
13
});
source
.
set_lod
(
lod
);
source
.
Resize
(
make_ddim
({
13
,
128
}));
source
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
// content of each setence: 0 1 2 3 4
const
auto
&
level
=
lod
.
front
();
for
(
size_t
i
=
0
;
i
<
level
.
size
()
-
1
;
i
++
)
{
size_t
begin
=
level
[
i
];
size_t
end
=
level
[
i
+
1
];
for
(
size_t
j
=
begin
;
j
<
end
;
j
++
)
{
auto
record
=
source
.
Slice
<
int
>
(
j
,
j
+
1
);
for
(
int
dim
=
0
;
dim
<
128
;
dim
++
)
{
record
.
mutable_data
<
int
>
(
platform
::
CPUPlace
())[
dim
]
=
j
-
begin
;
}
}
}
// unpack
meta
=
ta
.
Unpack
(
source
,
0
,
true
);
}
LoD
lod
;
TensorArray
ta
;
LoDTensor
source
;
std
::
vector
<
DySeqMeta
>
meta
;
};
TEST_F
(
TensorArrayPackTester
,
Unpack
)
{
ASSERT_EQ
(
ta
.
size
(),
7UL
);
const
auto
&
t0
=
ta
.
Read
(
0
);
const
auto
&
t1
=
ta
.
Read
(
1
);
ASSERT_EQ
(
t0
.
data
<
int
>
()[
0
],
int
(
0
));
ASSERT_EQ
(
t1
.
data
<
int
>
()[
0
],
int
(
1
));
}
TEST_F
(
TensorArrayPackTester
,
Pack
)
{
LoDTensor
packed
=
ta
.
Pack
(
0
,
meta
,
lod
);
}
TEST_F
(
TensorArrayTester
,
size
)
{
ASSERT_EQ
(
ta
.
size
(),
static_cast
<
size_t
>
(
batch_size
));
}
}
// namespace framework
}
// namespace paddle
paddle/operators/CMakeLists.txt
浏览文件 @
479e4a50
...
...
@@ -103,12 +103,16 @@ set(DEPS_OPS
recurrent_op
cond_op
cross_entropy_op
softmax_with_cross_entropy_op
)
softmax_with_cross_entropy_op
sum_op
)
op_library
(
recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
DEPS framework_proto tensor net_op
)
op_library
(
cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op
)
op_library
(
cross_entropy_op DEPS cross_entropy
)
op_library
(
softmax_with_cross_entropy_op DEPS cross_entropy softmax
)
op_library
(
sum_op DEPS net_op
)
list
(
REMOVE_ITEM GENERAL_OPS
${
DEPS_OPS
}
)
foreach
(
src
${
GENERAL_OPS
}
)
...
...
paddle/operators/recurrent_op.cc
浏览文件 @
479e4a50
...
...
@@ -30,36 +30,39 @@ using LoDTensor = framework::LoDTensor;
void
RecurrentAlgorithm
::
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
auto
step_scopes
=
GetStepScopes
(
scope
);
rnn
::
SegmentInputs
(
step_scopes
,
arg_
->
inlinks
,
seq_len_
,
false
/*infer_shape_mode*/
)
;
InitMemories
(
step_scopes
[
0
],
false
/*infer_shape_mode*/
);
auto
*
input0
=
scope
.
FindVar
(
arg_
->
inlinks
[
0
]
);
PADDLE_ENFORCE_NOT_NULL
(
input0
);
size_t
seq_len
=
input0
->
GetMutable
<
LoDTensor
>
()
->
dims
()[
0
]
;
PADDLE_ENFORCE_GT
(
seq_len
,
0
);
for
(
size_t
step_id
=
0
;
step_id
<
seq_len_
;
step_id
++
)
{
// create output alias variables
CreateScopes
(
scope
,
seq_len
);
auto
&
step_scopes
=
GetStepScopes
(
scope
);
rnn
::
SegmentInputs
(
step_scopes
,
arg_
->
inlinks
,
seq_len
);
InitMemories
(
step_scopes
[
0
]);
for
(
size_t
step_id
=
0
;
step_id
<
seq_len
;
step_id
++
)
{
if
(
step_id
>
0
)
{
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
step_id
,
-
1
,
false
/*infer_shape_mode*/
);
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
step_id
,
-
1
);
}
(
*
stepnet_
)
->
Run
(
*
step_scopes
[
step_id
],
dev_ctx
);
}
rnn
::
ConcatOutputs
(
step_scopes
,
arg_
->
outlinks
,
seq_len_
,
false
/*infer_shape_mode*/
);
rnn
::
ConcatOutputs
(
step_scopes
,
arg_
->
outlinks
,
seq_len
);
}
void
RecurrentAlgorithm
::
CreateScopes
(
const
Scope
&
scope
)
const
{
void
RecurrentAlgorithm
::
CreateScopes
(
const
Scope
&
scope
,
size_t
seq_len
)
const
{
// TODO(superjom) Only two scopes are needed for inference, this case will be
// supported later.
auto
step_scopes_var
=
scope
.
FindVar
(
arg_
->
step_scopes
);
auto
*
step_scopes_var
=
scope
.
FindVar
(
arg_
->
step_scopes
);
PADDLE_ENFORCE
(
step_scopes_var
!=
nullptr
,
""
);
auto
step_scopes
=
step_scopes_var
->
GetMutable
<
std
::
vector
<
Scope
*>>
();
auto
*
step_scopes
=
step_scopes_var
->
GetMutable
<
std
::
vector
<
Scope
*>>
();
// Now all variables in scope must be created outside of op.
PADDLE_ENFORCE_NOT_NULL
(
stepnet_
);
PADDLE_ENFORCE
(
!
(
*
stepnet_
)
->
Outputs
().
empty
(),
"stepnet_ op has no outputs"
);
if
(
seq_len
_
>
step_scopes
->
size
())
{
for
(
size_t
i
=
step_scopes
->
size
();
i
<
seq_len
_
;
++
i
)
{
if
(
seq_len
>
step_scopes
->
size
())
{
for
(
size_t
i
=
step_scopes
->
size
();
i
<
seq_len
;
++
i
)
{
auto
&
step_scope
=
scope
.
NewScope
();
// create step net's temp inputs
...
...
@@ -82,8 +85,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const {
}
}
void
RecurrentAlgorithm
::
InitMemories
(
Scope
*
step_scope
,
bool
infer_shape_mode
)
const
{
void
RecurrentAlgorithm
::
InitMemories
(
Scope
*
step_scope
)
const
{
for
(
auto
&
attr
:
arg_
->
memories
)
{
auto
*
pre_mem
=
step_scope
->
NewVar
(
attr
.
pre_var
)
->
GetMutable
<
LoDTensor
>
();
PADDLE_ENFORCE
(
step_scope
->
FindVar
(
attr
.
boot_var
)
!=
nullptr
,
...
...
@@ -91,13 +93,10 @@ void RecurrentAlgorithm::InitMemories(Scope* step_scope,
attr
.
boot_var
);
auto
*
boot_mem
=
step_scope
->
FindVar
(
attr
.
boot_var
)
->
GetMutable
<
LoDTensor
>
();
if
(
infer_shape_mode
)
{
pre_mem
->
Resize
(
boot_mem
->
dims
());
PADDLE_ENFORCE_EQ
(
pre_mem
->
dims
().
size
(),
2
);
}
else
{
pre_mem
->
ShareDataWith
<
float
>
(
*
boot_mem
);
}
}
}
const
rnn
::
ArgumentName
RecurrentOp
::
kArgName
{
...
...
@@ -146,23 +145,23 @@ class RecurrentAlgorithmProtoAndCheckerMaker
void
RecurrentGradientAlgorithm
::
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
auto
step_scopes
=
GetStepScopes
(
scope
);
rnn
::
SegmentInputs
(
step_scopes
,
arg_
->
inlinks
,
seq_len_
,
false
/*infer_shape_mode*/
);
for
(
int
step_id
=
seq_len_
-
1
;
step_id
>=
0
;
--
step_id
)
{
if
(
static_cast
<
size_t
>
(
step_id
)
!=
seq_len_
-
1
)
{
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
step_id
,
1
,
false
/*infer_shape_mode*/
);
auto
*
input0
=
scope
.
FindVar
(
arg_
->
inlinks
[
0
]);
PADDLE_ENFORCE_NOT_NULL
(
input0
);
size_t
seq_len
=
input0
->
GetMutable
<
LoDTensor
>
()
->
dims
()[
0
];
auto
&
step_scopes
=
GetStepScopes
(
scope
);
rnn
::
SegmentInputs
(
step_scopes
,
arg_
->
inlinks
,
seq_len
);
for
(
int
step_id
=
seq_len
-
1
;
step_id
>=
0
;
--
step_id
)
{
if
(
step_id
!=
seq_len
-
1
)
{
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
step_id
,
1
);
}
(
*
stepnet_
)
->
Run
(
*
step_scopes
[
step_id
],
dev_ctx
);
}
LinkBootMemoryGradients
(
step_scopes
[
0
],
false
);
rnn
::
ConcatOutputs
(
step_scopes
,
arg_
->
outlinks
,
seq_len_
,
false
/*infer_shape_mode*/
);
rnn
::
ConcatOutputs
(
step_scopes
,
arg_
->
outlinks
,
seq_len
);
LinkBootMemoryGradients
(
step_scopes
[
0
]);
}
void
RecurrentGradientAlgorithm
::
LinkBootMemoryGradients
(
Scope
*
step_scope
,
bool
infer_shape_mode
)
const
{
Scope
*
step_scope
)
const
{
for
(
auto
&
attr
:
arg_
->
memories
)
{
PADDLE_ENFORCE
(
step_scope
->
FindVar
(
attr
.
var
)
!=
nullptr
,
"memory variable [%s] does not exists"
,
attr
.
var
);
...
...
@@ -171,12 +170,9 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
auto
*
mem_grad
=
step_scope
->
NewVar
(
attr
.
var
)
->
GetMutable
<
LoDTensor
>
();
auto
*
boot_mem_grad
=
step_scope
->
NewVar
(
attr
.
boot_var
)
->
GetMutable
<
LoDTensor
>
();
if
(
infer_shape_mode
)
{
boot_mem_grad
->
Resize
(
mem_grad
->
dims
());
}
else
{
boot_mem_grad
->
ShareDataWith
<
float
>
(
*
mem_grad
);
}
}
}
RecurrentGradientOp
::
RecurrentGradientOp
(
...
...
paddle/operators/recurrent_op.h
浏览文件 @
479e4a50
...
...
@@ -48,7 +48,7 @@ class RecurrentAlgorithm {
* NOTE the scopes are reused in both the forward and backward, so just
* create once and expand its size if more steps need.
*/
void
CreateScopes
(
const
framework
::
Scope
&
scope
)
const
;
void
CreateScopes
(
const
framework
::
Scope
&
scope
,
size_t
seq_len
)
const
;
const
std
::
vector
<
framework
::
Scope
*>&
GetStepScopes
(
const
framework
::
Scope
&
scope
)
const
{
...
...
@@ -56,12 +56,11 @@ class RecurrentAlgorithm {
->
GetMutable
<
std
::
vector
<
framework
::
Scope
*>>
();
}
void
InitMemories
(
framework
::
Scope
*
step_scopes
,
bool
infer_shape_mode
)
const
;
void
InitMemories
(
framework
::
Scope
*
step_scopes
)
const
;
private:
std
::
unique_ptr
<
framework
::
OperatorBase
>*
stepnet_
;
rnn
::
Argument
*
arg_
;
mutable
size_t
seq_len_
;
};
class
RecurrentGradientAlgorithm
{
...
...
@@ -86,8 +85,7 @@ class RecurrentGradientAlgorithm {
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
;
void
LinkBootMemoryGradients
(
framework
::
Scope
*
step_scopes
,
bool
infer_shape_mode
)
const
;
void
LinkBootMemoryGradients
(
framework
::
Scope
*
step_scopes
)
const
;
protected:
inline
const
std
::
vector
<
framework
::
Scope
*>&
GetStepScopes
(
...
...
@@ -98,7 +96,6 @@ class RecurrentGradientAlgorithm {
private:
rnn
::
Argument
*
arg_
;
mutable
size_t
seq_len_
;
std
::
unique_ptr
<
framework
::
OperatorBase
>*
stepnet_
;
};
...
...
@@ -123,6 +120,7 @@ class RecurrentOp : public framework::OperatorBase {
void
set_stepnet
(
std
::
unique_ptr
<
OperatorBase
>
net
)
{
stepnet_
=
std
::
move
(
net
);
}
const
OperatorBase
&
stepnet
()
const
{
return
*
stepnet_
;
}
static
const
rnn
::
ArgumentName
kArgName
;
...
...
paddle/operators/rnn/recurrent_op_utils.cc
浏览文件 @
479e4a50
...
...
@@ -25,7 +25,7 @@ using LoDTensor = framework::LoDTensor;
void
SegmentInputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
const
std
::
vector
<
std
::
string
>&
inlinks
,
const
size_t
seq_len
,
bool
infer_shape_mode
)
{
const
size_t
seq_len
)
{
PADDLE_ENFORCE
(
!
inlinks
.
empty
(),
"no in links are provided."
);
for
(
size_t
i
=
0
;
i
<
inlinks
.
size
();
++
i
)
{
// global inputs
...
...
@@ -41,11 +41,9 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
for
(
size_t
j
=
0
;
j
<
seq_len
;
j
++
)
{
Tensor
*
step_input
=
step_scopes
[
j
]
->
NewVar
(
inlinks
[
i
])
->
GetMutable
<
Tensor
>
();
if
(
!
infer_shape_mode
)
{
// The input of operators of each step is Tensor here.
// Maybe need to modify Slice function.
*
step_input
=
input
->
Slice
<
float
>
(
j
,
j
+
1
);
}
step_input
->
Resize
(
step_dims
);
}
}
...
...
@@ -53,22 +51,20 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
void
ConcatOutputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
const
std
::
vector
<
std
::
string
>&
outlinks
,
const
size_t
seq_len
,
bool
infer_shape_mode
)
{
const
size_t
seq_len
)
{
for
(
size_t
i
=
0
;
i
<
outlinks
.
size
();
i
++
)
{
auto
output_var
=
step_scopes
[
0
]
->
parent
().
FindVar
(
outlinks
[
i
]);
auto
*
output_var
=
step_scopes
[
0
]
->
parent
().
FindVar
(
outlinks
[
i
]);
PADDLE_ENFORCE_NOT_NULL
(
output_var
,
"output link [%s] is not in scope."
,
outlinks
[
i
]);
LoDTensor
*
output
=
output_var
->
GetMutable
<
LoDTensor
>
();
if
(
infer_shape_mode
)
{
auto
step_scope_var
=
step_scopes
[
0
]
->
FindVar
(
outlinks
[
i
]);
auto
*
step_scope_var
=
step_scopes
[
0
]
->
FindVar
(
outlinks
[
i
]);
PADDLE_ENFORCE_NOT_NULL
(
step_scope_var
,
"%s not in scope"
,
outlinks
[
i
]);
f
::
DDim
step_dims
=
step_scope_var
->
template
GetMutable
<
LoDTensor
>()
->
dims
();
std
::
vector
<
int64_t
>
dims_vec
=
vectorize
(
step_dims
);
dims_vec
.
insert
(
dims_vec
.
begin
(),
seq_len
);
output
->
Resize
(
f
::
make_ddim
(
dims_vec
));
}
else
{
output
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
size_t
j
=
0
;
j
<
seq_len
;
j
++
)
{
LoDTensor
*
step_output
=
...
...
@@ -79,13 +75,11 @@ void ConcatOutputs(const std::vector<Scope*>& step_scopes,
.
CopyFrom
<
float
>
(
*
step_output
,
platform
::
CPUPlace
());
}
}
}
}
void
LinkMemories
(
const
std
::
vector
<
Scope
*>&
scopes
,
const
std
::
vector
<
rnn
::
MemoryAttr
>&
memories
,
const
size_t
step_id
,
const
int
offset
,
bool
infer_shape_mode
)
{
const
size_t
step_id
,
const
int
offset
)
{
PADDLE_ENFORCE_LT
(
step_id
,
scopes
.
size
(),
"step [%d] is out of range of step scopes' size [%d]"
,
step_id
,
scopes
.
size
());
...
...
@@ -95,17 +89,14 @@ void LinkMemories(const std::vector<Scope*>& scopes,
step_id
+
offset
,
scopes
.
size
(),
"offset [%d] is out of range, it must be less than (%d - %d)"
,
offset
,
scopes
.
size
(),
step_id
);
auto
scope
=
scopes
[
step_id
];
auto
linked_scope
=
scopes
[
step_id
+
offset
];
auto
*
scope
=
scopes
[
step_id
];
auto
*
linked_scope
=
scopes
[
step_id
+
offset
];
for
(
auto
&
attr
:
memories
)
{
auto
mem
=
scope
->
FindVar
(
attr
.
pre_var
)
->
GetMutable
<
LoDTensor
>
();
auto
linked_mem
=
linked_scope
->
FindVar
(
attr
.
var
)
->
GetMutable
<
LoDTensor
>
();
if
(
infer_shape_mode
)
{
auto
*
mem
=
scope
->
FindVar
(
attr
.
pre_var
)
->
GetMutable
<
LoDTensor
>
();
auto
*
linked_mem
=
linked_scope
->
FindVar
(
attr
.
var
)
->
GetMutable
<
LoDTensor
>
();
mem
->
Resize
(
linked_mem
->
dims
());
}
else
{
mem
->
ShareDataWith
<
float
>
(
*
linked_mem
);
}
}
}
void
InitArgument
(
const
ArgumentName
&
name
,
Argument
*
arg
,
...
...
@@ -115,11 +106,11 @@ void InitArgument(const ArgumentName& name, Argument* arg,
arg
->
inlinks
=
op
.
Inputs
(
name
.
inlinks
);
arg
->
outlinks
=
op
.
Outputs
(
name
.
outlinks
);
auto
boot_memories
=
auto
&
boot_memories
=
is_grad
?
op
.
Outputs
(
name
.
boot_memories
)
:
op
.
Inputs
(
name
.
boot_memories
);
// attributes
auto
memories
=
op
.
Attr
<
std
::
vector
<
std
::
string
>>
(
name
.
memories
);
auto
pre_memories
=
op
.
Attr
<
std
::
vector
<
std
::
string
>>
(
name
.
pre_memories
);
auto
&
memories
=
op
.
Attr
<
std
::
vector
<
std
::
string
>>
(
name
.
memories
);
auto
&
pre_memories
=
op
.
Attr
<
std
::
vector
<
std
::
string
>>
(
name
.
pre_memories
);
PADDLE_ENFORCE
(
memories
.
size
()
==
boot_memories
.
size
(),
"the size of memories, boot_memories don't match:%d,%d"
,
...
...
paddle/operators/rnn/recurrent_op_utils.h
浏览文件 @
479e4a50
...
...
@@ -64,18 +64,18 @@ struct ArgumentName {
*/
void
SegmentInputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
const
std
::
vector
<
std
::
string
>&
inlinks
,
const
size_t
seq_len
,
bool
infer_shape_mode
);
const
size_t
seq_len
);
/**
* Process outputs of step nets and merge to variables.
*/
void
ConcatOutputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
const
std
::
vector
<
std
::
string
>&
outlinks
,
const
size_t
seq_len
,
bool
infer_shape_mode
);
const
size_t
seq_len
);
void
LinkMemories
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
const
std
::
vector
<
MemoryAttr
>&
memories
,
const
size_t
step_id
,
const
int
offset
,
bool
infer_shape_mode
);
const
int
offset
);
void
InitArgument
(
const
ArgumentName
&
name
,
Argument
*
arg
,
const
framework
::
OperatorBase
&
op
,
bool
is_grad
=
false
);
...
...
paddle/operators/sum_op.cc
浏览文件 @
479e4a50
...
...
@@ -11,6 +11,7 @@ limitations under the License. */
#include "paddle/operators/sum_op.h"
#include <vector>
#include "paddle/operators/net_op.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -22,14 +23,15 @@ class SumOp : public framework::OperatorWithKernel {
protected:
void
InferShape
(
framework
::
InferShapeContextBase
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInputs
(
"X"
),
"Inputs(X) should not be null"
);
auto
x_dims
=
ctx
->
GetInputsDim
(
"X"
);
PADDLE_ENFORCE
(
!
x_dims
.
empty
(),
"Input(X) of SumOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of SumOp should not be null."
);
auto
in_dim
=
x_dims
[
0
];
size_t
N
=
x_dims
.
size
();
PADDLE_ENFORCE_GT
(
N
,
1
,
"Input tensors count should > 1."
);
auto
in_dim
=
x_dims
[
0
];
for
(
size_t
i
=
1
;
i
<
N
;
i
++
)
{
auto
dim
=
x_dims
[
i
];
PADDLE_ENFORCE
(
in_dim
==
dim
,
"Input tensors must have same shape"
);
...
...
@@ -56,21 +58,23 @@ or not. But the output only shares the LoD with the first input.
}
};
class
SumGradOp
:
public
framework
::
OperatorWithKernel
{
class
SumGradOp
:
public
NetOp
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
SumGradOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
NetOp
(
type
,
inputs
,
outputs
,
attrs
)
{
auto
&
x_grad_names
=
Outputs
(
framework
::
GradVarName
(
"X"
));
auto
out_grad_name
=
this
->
Input
(
framework
::
GradVarName
(
"Out"
));
protected:
void
InferShape
(
framework
::
InferShapeContextBase
*
ctx
)
const
override
{
auto
out_grad_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
auto
x_grad_names
=
ctx
->
Outputs
(
framework
::
GradVarName
(
"X"
));
size_t
x_length
=
x_grad_names
.
size
();
std
::
vector
<
framework
::
DDim
>
x_grad_dims
;
x_grad_dims
.
reserve
(
x_length
);
for
(
size_t
i
=
0
;
i
<
x_length
;
++
i
)
{
x_grad_dims
.
push_back
(
out_grad_dims
);
framework
::
AttributeMap
grad_attrs
;
grad_attrs
[
"scale"
]
=
1.0
f
;
for
(
auto
&
x_grad_name
:
x_grad_names
)
{
AppendOp
(
framework
::
OpRegistry
::
CreateOp
(
"scale"
,
{{
"X"
,
{
out_grad_name
}}},
{{
"Out"
,
{
x_grad_name
}}},
grad_attrs
));
}
ctx
->
SetOutputsDim
(
framework
::
GradVarName
(
"X"
),
x_grad_dims
);
CompleteAddOp
(
false
);
}
};
...
...
@@ -80,5 +84,3 @@ class SumGradOp : public framework::OperatorWithKernel {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
sum
,
ops
::
SumOp
,
ops
::
SumOpMaker
,
sum_grad
,
ops
::
SumGradOp
);
REGISTER_OP_CPU_KERNEL
(
sum
,
ops
::
SumKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
sum_grad
,
ops
::
SumGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/sum_op.cu
浏览文件 @
479e4a50
...
...
@@ -14,5 +14,3 @@ limitations under the License. */
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
sum
,
ops
::
SumKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
\ No newline at end of file
REGISTER_OP_GPU_KERNEL
(
sum_grad
,
ops
::
SumGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
paddle/operators/sum_op.h
浏览文件 @
479e4a50
...
...
@@ -42,24 +42,5 @@ class SumKernel : public framework::OpKernel<T> {
}
};
template
<
typename
Place
,
typename
T
>
class
SumGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
input
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
outs
=
context
.
MultiOutput
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
for
(
auto
out
:
outs
)
{
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
}
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
auto
in
=
EigenVector
<
T
>::
Flatten
(
*
input
);
for
(
auto
out
:
outs
)
{
auto
result
=
EigenVector
<
T
>::
Flatten
(
*
out
);
result
.
device
(
place
)
=
in
;
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/platform/macros.h
浏览文件 @
479e4a50
...
...
@@ -19,5 +19,7 @@ limitations under the License. */
#define DISABLE_COPY_AND_ASSIGN(classname) \
private: \
classname(const classname&) = delete; \
classname& operator=(const classname&) = delete
classname(const classname&&) = delete; \
classname& operator=(const classname&) = delete; \
classname& operator=(const classname&&) = delete
#endif
python/paddle/v2/framework/tests/test_recurrent_op.py
浏览文件 @
479e4a50
...
...
@@ -16,14 +16,17 @@ class PySimpleRNN(object):
'''
def
__init__
(
self
,
input_dim
=
30
,
batch_size
=
50
,
weight_dim
=
15
,
sent_len
=
11
):
self
.
x
=
np
.
random
.
normal
(
size
=
(
sent_len
,
batch_size
,
input_dim
))
self
.
W
=
np
.
random
.
normal
(
size
=
(
input_dim
,
input_dim
))
self
.
U
=
np
.
random
.
normal
(
size
=
(
input_dim
,
input_dim
))
self
.
h_boot
=
np
.
random
.
normal
(
size
=
(
batch_size
,
input_dim
))
self
.
x
=
np
.
random
.
normal
(
size
=
(
sent_len
,
batch_size
,
input_dim
)).
astype
(
"float32"
)
self
.
W
=
np
.
random
.
normal
(
size
=
(
input_dim
,
input_dim
)).
astype
(
"float32"
)
self
.
U
=
np
.
random
.
normal
(
size
=
(
input_dim
,
input_dim
)).
astype
(
"float32"
)
self
.
h_boot
=
np
.
random
.
normal
(
size
=
(
batch_size
,
input_dim
)).
astype
(
"float32"
)
# memories
self
.
mems
=
[
np
.
zeros
(
shape
=
(
batch_size
,
input_dim
))
for
i
in
range
(
sent_len
)
np
.
zeros
(
shape
=
(
batch_size
,
input_dim
)).
astype
(
"float32"
)
for
i
in
range
(
sent_len
)
]
def
forward
(
self
):
...
...
@@ -36,7 +39,7 @@ class PySimpleRNN(object):
return
[
self
.
x
[
i
]
for
i
in
range
(
self
.
x
.
shape
[
0
])]
def
concat_outputs
(
self
):
return
np
.
array
(
self
.
mems
)
return
np
.
array
(
self
.
mems
)
.
astype
(
"float32"
)
def
step
(
self
,
step_id
,
x
):
'''
...
...
@@ -47,8 +50,8 @@ class PySimpleRNN(object):
pre_mem
=
self
.
mems
[
step_id
-
1
]
else
:
pre_mem
=
self
.
h_boot
xW
=
np
.
matmul
(
x
,
self
.
W
)
hU
=
np
.
matmul
(
pre_mem
,
self
.
U
)
xW
=
np
.
matmul
(
x
,
self
.
W
)
.
astype
(
"float32"
)
hU
=
np
.
matmul
(
pre_mem
,
self
.
U
)
.
astype
(
"float32"
)
sum
=
xW
+
hU
self
.
mems
[
step_id
]
=
py_sigmoid
(
sum
)
...
...
@@ -102,7 +105,8 @@ class RecurrentOpTest(unittest.TestCase):
self
.
create_step_net
()
ctx
=
core
.
DeviceContext
.
create
(
core
.
CPUPlace
())
self
.
rnnop
.
run
(
self
.
scope
,
ctx
)
return
np
.
array
(
self
.
scope
.
find_var
(
"h@mem"
).
get_tensor
())
return
np
.
array
(
self
.
scope
.
find_var
(
"h@mem"
).
get_tensor
()).
astype
(
"float32"
)
def
create_global_variables
(
self
):
# create inlink
...
...
@@ -142,7 +146,7 @@ class RecurrentOpTest(unittest.TestCase):
stepnet
=
core
.
Net
.
create
()
x_fc_op
=
Operator
(
"mul"
,
X
=
"x"
,
Y
=
"W"
,
Out
=
"Wx"
)
h_fc_op
=
Operator
(
"mul"
,
X
=
"h@pre"
,
Y
=
"U"
,
Out
=
"Uh"
)
sum_op
=
Operator
(
"
add"
,
X
=
"Wx"
,
Y
=
"Uh"
,
Out
=
"sum"
)
sum_op
=
Operator
(
"
sum"
,
X
=
[
"Wx"
,
"Uh"
]
,
Out
=
"sum"
)
sig_op
=
Operator
(
"sigmoid"
,
X
=
"sum"
,
Y
=
"h@mem"
)
for
op
in
[
x_fc_op
,
h_fc_op
,
sum_op
,
sig_op
]:
...
...
@@ -179,7 +183,7 @@ class RecurrentGradientOpTest(unittest.TestCase):
stepnet
=
core
.
Net
.
create
()
x_fc_op
=
Operator
(
"mul"
,
X
=
"x@alias"
,
Y
=
"W"
,
Out
=
"Wx"
)
h_fc_op
=
Operator
(
"mul"
,
X
=
"h@pre"
,
Y
=
"U"
,
Out
=
"Uh"
)
sum_op
=
Operator
(
"
add"
,
X
=
"Wx"
,
Y
=
"Uh"
,
Out
=
"sum"
)
sum_op
=
Operator
(
"
sum"
,
X
=
[
"Wx"
,
"Uh"
]
,
Out
=
"sum"
)
sig_op
=
Operator
(
"sigmoid"
,
X
=
"sum"
,
Y
=
"h@alias"
)
for
op
in
[
x_fc_op
,
h_fc_op
,
sum_op
,
sig_op
]:
...
...
@@ -197,7 +201,4 @@ class RecurrentGradientOpTest(unittest.TestCase):
if
__name__
==
'__main__'
:
exit
(
0
)
# FIXME(yuyang18): InferShape has been removed, this unittest may error
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录