Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c1738e29
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c1738e29
编写于
4月 11, 2017
作者:
L
Luo Tao
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into stride
上级
dd613047
b22cd96a
变更
21
隐藏空白更改
内联
并排
Showing
21 changed file
with
463 addition
and
287 deletion
+463
-287
CMakeLists.txt
CMakeLists.txt
+1
-0
cmake/external/any.cmake
cmake/external/any.cmake
+20
-0
demo/seqToseq/api_train_v2.py
demo/seqToseq/api_train_v2.py
+56
-25
paddle/function/Function.cpp
paddle/function/Function.cpp
+0
-60
paddle/function/Function.h
paddle/function/Function.h
+28
-11
paddle/function/PadOp.cpp
paddle/function/PadOp.cpp
+14
-22
paddle/function/PadOp.h
paddle/function/PadOp.h
+6
-12
paddle/gserver/layers/PadLayer.cpp
paddle/gserver/layers/PadLayer.cpp
+9
-18
paddle/gserver/layers/PadLayer.h
paddle/gserver/layers/PadLayer.h
+3
-3
paddle/py_paddle/dataprovider_converter.py
paddle/py_paddle/dataprovider_converter.py
+11
-2
paddle/utils/Any.h
paddle/utils/Any.h
+35
-0
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+18
-6
python/paddle/v2/config_base.py
python/paddle/v2/config_base.py
+23
-8
python/paddle/v2/dataset/common.py
python/paddle/v2/dataset/common.py
+0
-7
python/paddle/v2/dataset/imdb.py
python/paddle/v2/dataset/imdb.py
+3
-2
python/paddle/v2/dataset/imikolov.py
python/paddle/v2/dataset/imikolov.py
+6
-6
python/paddle/v2/layer.py
python/paddle/v2/layer.py
+184
-77
python/paddle/v2/parameters.py
python/paddle/v2/parameters.py
+2
-1
python/paddle/v2/tests/test_layer.py
python/paddle/v2/tests/test_layer.py
+18
-14
python/paddle/v2/topology.py
python/paddle/v2/topology.py
+21
-11
python/paddle/v2/trainer.py
python/paddle/v2/trainer.py
+5
-2
未找到文件。
CMakeLists.txt
浏览文件 @
c1738e29
...
...
@@ -64,6 +64,7 @@ include(external/python) # download, build, install python
include
(
external/openblas
)
# download, build, install openblas
include
(
external/swig
)
# download, build, install swig
include
(
external/warpctc
)
# download, build, install warpctc
include
(
external/any
)
# download libn::any
include
(
package
)
# set paddle packages
include
(
cpplint
)
# set paddle c++ style
...
...
cmake/external/any.cmake
0 → 100644
浏览文件 @
c1738e29
INCLUDE
(
ExternalProject
)
SET
(
ANY_SOURCE_DIR
${
THIRD_PARTY_PATH
}
/any
)
INCLUDE_DIRECTORIES
(
${
ANY_SOURCE_DIR
}
/src/linb_any
)
ExternalProject_Add
(
linb_any
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/thelink2012/any.git"
GIT_TAG
"8fef1e93710a0edf8d7658999e284a1142c4c020"
PREFIX
${
ANY_SOURCE_DIR
}
UPDATE_COMMAND
""
CONFIGURE_COMMAND
""
BUILD_COMMAND
""
INSTALL_COMMAND
""
TEST_COMMAND
""
)
add_definitions
(
-DANY_IMPL_ANY_CAST_MOVEABLE
)
demo/seqToseq/api_train_v2.py
浏览文件 @
c1738e29
import
sys
import
paddle.v2
as
paddle
def
seqToseq_net
(
source_dict_dim
,
target_dict_dim
):
def
seqToseq_net
(
source_dict_dim
,
target_dict_dim
,
is_generating
=
False
):
### Network Architecture
word_vector_dim
=
512
# dimension of word vector
decoder_size
=
512
# dimension of hidden unit in GRU Decoder network
encoder_size
=
512
# dimension of hidden unit in GRU Encoder network
beam_size
=
3
max_length
=
250
#### Encoder
src_word_id
=
paddle
.
layer
.
data
(
name
=
'source_language_word'
,
...
...
@@ -67,30 +71,57 @@ def seqToseq_net(source_dict_dim, target_dict_dim):
group_input2
=
paddle
.
layer
.
StaticInputV2
(
input
=
encoded_proj
,
is_seq
=
True
)
group_inputs
=
[
group_input1
,
group_input2
]
trg_embedding
=
paddle
.
layer
.
embedding
(
input
=
paddle
.
layer
.
data
(
name
=
'target_language_word'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
target_dict_dim
)),
size
=
word_vector_dim
,
param_attr
=
paddle
.
attr
.
ParamAttr
(
name
=
'_target_language_embedding'
))
group_inputs
.
append
(
trg_embedding
)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder
=
paddle
.
layer
.
recurrent_group
(
name
=
decoder_group_name
,
step
=
gru_decoder_with_attention
,
input
=
group_inputs
)
lbl
=
paddle
.
layer
.
data
(
name
=
'target_language_next_word'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
target_dict_dim
))
cost
=
paddle
.
layer
.
classification_cost
(
input
=
decoder
,
label
=
lbl
)
return
cost
if
not
is_generating
:
trg_embedding
=
paddle
.
layer
.
embedding
(
input
=
paddle
.
layer
.
data
(
name
=
'target_language_word'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
target_dict_dim
)),
size
=
word_vector_dim
,
param_attr
=
paddle
.
attr
.
ParamAttr
(
name
=
'_target_language_embedding'
))
group_inputs
.
append
(
trg_embedding
)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder
=
paddle
.
layer
.
recurrent_group
(
name
=
decoder_group_name
,
step
=
gru_decoder_with_attention
,
input
=
group_inputs
)
lbl
=
paddle
.
layer
.
data
(
name
=
'target_language_next_word'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
target_dict_dim
))
cost
=
paddle
.
layer
.
classification_cost
(
input
=
decoder
,
label
=
lbl
)
return
cost
else
:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding
=
paddle
.
layer
.
GeneratedInputV2
(
size
=
target_dict_dim
,
embedding_name
=
'_target_language_embedding'
,
embedding_size
=
word_vector_dim
)
group_inputs
.
append
(
trg_embedding
)
beam_gen
=
paddle
.
layer
.
beam_search
(
name
=
decoder_group_name
,
step
=
gru_decoder_with_attention
,
input
=
group_inputs
,
bos_id
=
0
,
eos_id
=
1
,
beam_size
=
beam_size
,
max_length
=
max_length
)
return
beam_gen
def
main
():
...
...
paddle/function/Function.cpp
浏览文件 @
c1738e29
...
...
@@ -16,66 +16,6 @@ limitations under the License. */
namespace
paddle
{
template
<
>
size_t
FuncConfig
::
get
<
size_t
>
(
const
std
::
string
&
key
)
const
{
auto
it
=
valueMap_
.
find
(
key
);
CHECK
(
it
!=
valueMap_
.
end
())
<<
"Cannot find value: '"
<<
key
<<
"'"
;
return
it
->
second
.
s
;
}
template
<
>
real
FuncConfig
::
get
<
real
>
(
const
std
::
string
&
key
)
const
{
auto
it
=
valueMap_
.
find
(
key
);
CHECK
(
it
!=
valueMap_
.
end
())
<<
"Cannot find value: '"
<<
key
<<
"'"
;
return
it
->
second
.
r
;
}
template
<
>
int
FuncConfig
::
get
<
int
>
(
const
std
::
string
&
key
)
const
{
auto
it
=
valueMap_
.
find
(
key
);
CHECK
(
it
!=
valueMap_
.
end
())
<<
"Cannot find value: '"
<<
key
<<
"'"
;
return
it
->
second
.
i
;
}
template
<
>
bool
FuncConfig
::
get
<
bool
>
(
const
std
::
string
&
key
)
const
{
auto
it
=
valueMap_
.
find
(
key
);
CHECK
(
it
!=
valueMap_
.
end
())
<<
"Cannot find value: '"
<<
key
<<
"'"
;
return
it
->
second
.
b
;
}
template
<
>
FuncConfig
&
FuncConfig
::
set
<
size_t
>
(
const
std
::
string
&
key
,
size_t
v
)
{
CHECK_EQ
(
static_cast
<
int
>
(
valueMap_
.
count
(
key
)),
0
)
<<
"Duplicated value: "
<<
key
;
valueMap_
[
key
].
s
=
v
;
return
*
this
;
}
template
<
>
FuncConfig
&
FuncConfig
::
set
<
real
>
(
const
std
::
string
&
key
,
real
v
)
{
CHECK_EQ
(
static_cast
<
int
>
(
valueMap_
.
count
(
key
)),
0
)
<<
"Duplicated value: "
<<
key
;
valueMap_
[
key
].
r
=
v
;
return
*
this
;
}
template
<
>
FuncConfig
&
FuncConfig
::
set
<
int
>
(
const
std
::
string
&
key
,
int
v
)
{
CHECK_EQ
(
static_cast
<
int
>
(
valueMap_
.
count
(
key
)),
0
)
<<
"Duplicated value: "
<<
key
;
valueMap_
[
key
].
i
=
v
;
return
*
this
;
}
template
<
>
FuncConfig
&
FuncConfig
::
set
<
bool
>
(
const
std
::
string
&
key
,
bool
v
)
{
CHECK_EQ
(
static_cast
<
int
>
(
valueMap_
.
count
(
key
)),
0
)
<<
"Duplicated value: "
<<
key
;
valueMap_
[
key
].
b
=
v
;
return
*
this
;
}
void
BufferArgs
::
addArg
(
const
Matrix
&
arg
,
const
TensorShape
&
shape
,
ArgType
argType
)
{
...
...
paddle/function/Function.h
浏览文件 @
c1738e29
...
...
@@ -18,32 +18,49 @@ limitations under the License. */
#include <vector>
#include "BufferArg.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Any.h"
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/utils/Error.h"
namespace
paddle
{
/**
* Function Configuration.
* The argument type of Function::init.
* Follow-up will consider moving this data structure to Proto inside.
*/
class
FuncConfig
{
public:
union
value
{
size_t
s
;
real
r
;
int
i
;
bool
b
;
};
template
<
typename
T
>
T
get
(
const
std
::
string
&
key
)
const
;
T
get
(
const
std
::
string
&
key
,
Error
*
err
=
nullptr
)
const
{
try
{
return
any_cast
<
T
>
(
valueMap_
.
at
(
key
));
}
catch
(
std
::
exception
&
e
)
{
// could be cast or out of range exception.
if
(
err
)
{
*
err
=
Error
(
e
.
what
());
}
else
{
LOG
(
FATAL
)
<<
"Cannot get key "
<<
key
<<
"with error "
<<
e
.
what
();
}
return
T
();
}
}
template
<
typename
T
>
FuncConfig
&
set
(
const
std
::
string
&
key
,
T
v
);
FuncConfig
&
set
(
const
std
::
string
&
key
,
T
v
,
Error
*
err
=
nullptr
)
{
auto
it
=
valueMap_
.
find
(
key
);
if
(
it
!=
valueMap_
.
end
())
{
// already contains key.
if
(
err
)
{
*
err
=
Error
(
"Key %s is already set in FuncConfig"
,
key
.
c_str
());
}
else
{
LOG
(
FATAL
)
<<
"Key "
<<
key
<<
" is already set in FuncConfig."
;
}
return
*
this
;
}
valueMap_
[
key
]
=
any
(
v
);
return
*
this
;
}
protected:
std
::
map
<
std
::
string
,
value
>
valueMap_
;
mutable
std
::
unordered_map
<
std
::
string
,
any
>
valueMap_
;
};
/**
...
...
paddle/function/PadOp.cpp
浏览文件 @
c1738e29
...
...
@@ -25,9 +25,9 @@ void Pad<DEVICE_TYPE_CPU>(real* outputs,
const
int
inH
,
const
int
inW
,
const
PadConf
&
pad
)
{
int
cstart
=
pad
.
channel
Start
,
cend
=
pad
.
channelEnd
;
int
hstart
=
pad
.
height
Start
,
hend
=
pad
.
heightEnd
;
int
wstart
=
pad
.
width
Start
,
wend
=
pad
.
widthEnd
;
int
cstart
=
pad
.
channel
[
0
],
cend
=
pad
.
channel
[
1
]
;
int
hstart
=
pad
.
height
[
0
],
hend
=
pad
.
height
[
1
]
;
int
wstart
=
pad
.
width
[
0
],
wend
=
pad
.
width
[
1
]
;
int
outC
=
inC
+
cstart
+
cend
;
int
outH
=
inH
+
hstart
+
hend
;
int
outW
=
inW
+
wstart
+
wend
;
...
...
@@ -51,9 +51,9 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
const
int
inH
,
const
int
inW
,
const
PadConf
&
pad
)
{
int
cstart
=
pad
.
channel
Start
,
cend
=
pad
.
channelEnd
;
int
hstart
=
pad
.
height
Start
,
hend
=
pad
.
heightEnd
;
int
wstart
=
pad
.
width
Start
,
wend
=
pad
.
widthEnd
;
int
cstart
=
pad
.
channel
[
0
],
cend
=
pad
.
channel
[
1
]
;
int
hstart
=
pad
.
height
[
0
],
hend
=
pad
.
height
[
1
]
;
int
wstart
=
pad
.
width
[
0
],
wend
=
pad
.
width
[
1
]
;
int
outC
=
inC
+
cstart
+
cend
;
int
outH
=
inH
+
hstart
+
hend
;
int
outW
=
inW
+
wstart
+
wend
;
...
...
@@ -71,6 +71,12 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
}
}
static
inline
PadConf
castToPadConf
(
const
FuncConfig
&
conf
)
{
return
{
conf
.
get
<
std
::
vector
<
uint32_t
>>
(
"channel"
),
conf
.
get
<
std
::
vector
<
uint32_t
>>
(
"height"
),
conf
.
get
<
std
::
vector
<
uint32_t
>>
(
"width"
)};
}
/**
* \brief Padding zeros to input according to the specify dimension.
* The struct pad_ contains the padding size in each dimension.
...
...
@@ -127,14 +133,7 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
template
<
DeviceType
Device
>
class
PadFunc
:
public
FunctionBase
{
public:
void
init
(
const
FuncConfig
&
config
)
override
{
pad_
.
channelStart
=
config
.
get
<
int
>
(
"cstart"
);
pad_
.
channelEnd
=
config
.
get
<
int
>
(
"cend"
);
pad_
.
heightStart
=
config
.
get
<
int
>
(
"hstart"
);
pad_
.
heightEnd
=
config
.
get
<
int
>
(
"hend"
);
pad_
.
widthStart
=
config
.
get
<
int
>
(
"wstart"
);
pad_
.
widthEnd
=
config
.
get
<
int
>
(
"wend"
);
}
void
init
(
const
FuncConfig
&
config
)
override
{
pad_
=
castToPadConf
(
config
);
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
(
1UL
,
inputs
.
size
());
...
...
@@ -175,14 +174,7 @@ private:
template
<
DeviceType
Device
>
class
PadGradFunc
:
public
FunctionBase
{
public:
void
init
(
const
FuncConfig
&
config
)
override
{
pad_
.
channelStart
=
config
.
get
<
int
>
(
"cstart"
);
pad_
.
channelEnd
=
config
.
get
<
int
>
(
"cend"
);
pad_
.
heightStart
=
config
.
get
<
int
>
(
"hstart"
);
pad_
.
heightEnd
=
config
.
get
<
int
>
(
"hend"
);
pad_
.
widthStart
=
config
.
get
<
int
>
(
"wstart"
);
pad_
.
widthEnd
=
config
.
get
<
int
>
(
"wend"
);
}
void
init
(
const
FuncConfig
&
config
)
override
{
pad_
=
castToPadConf
(
config
);
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
(
1UL
,
inputs
.
size
());
...
...
paddle/function/PadOp.h
浏览文件 @
c1738e29
...
...
@@ -19,18 +19,12 @@ limitations under the License. */
namespace
paddle
{
struct
PadConf
{
/// how many values to add before the data along channel dimension.
int
channelStart
;
/// how many values to add after the data along channel dimension.
int
channelEnd
;
/// how many values to add before the data along height dimension.
int
heightStart
;
/// how many values to add after the data along height dimension.
int
heightEnd
;
/// how many values to add before the data along width dimension.
int
widthStart
;
/// how many values to add after the data along width dimension.
int
widthEnd
;
/// how many values to add before/after the data along channel dimension.
std
::
vector
<
uint32_t
>
channel
;
/// how many values to add before/after the data along height dimension.
std
::
vector
<
uint32_t
>
height
;
/// how many values to add before/after the data along width dimension.
std
::
vector
<
uint32_t
>
width
;
};
/**
...
...
paddle/gserver/layers/PadLayer.cpp
浏览文件 @
c1738e29
...
...
@@ -36,12 +36,9 @@ bool PadLayer::init(const LayerMap& layerMap,
CHECK_EQ
(
2
,
pad_conf
.
pad_c_size
());
CHECK_EQ
(
2
,
pad_conf
.
pad_h_size
());
CHECK_EQ
(
2
,
pad_conf
.
pad_w_size
());
padc_
.
push_back
(
pad_conf
.
pad_c
(
0
));
padc_
.
push_back
(
pad_conf
.
pad_c
(
1
));
padh_
.
push_back
(
pad_conf
.
pad_h
(
0
));
padh_
.
push_back
(
pad_conf
.
pad_h
(
1
));
padw_
.
push_back
(
pad_conf
.
pad_w
(
0
));
padw_
.
push_back
(
pad_conf
.
pad_w
(
1
));
padc_
=
{
pad_conf
.
pad_c
(
0
),
pad_conf
.
pad_c
(
1
)};
padh_
=
{
pad_conf
.
pad_h
(
0
),
pad_conf
.
pad_h
(
1
)};
padw_
=
{
pad_conf
.
pad_w
(
0
),
pad_conf
.
pad_w
(
1
)};
outDims_
=
TensorShape
(
4
);
setOutDims
(
0
);
...
...
@@ -49,21 +46,15 @@ bool PadLayer::init(const LayerMap& layerMap,
createFunction
(
forward_
,
"Pad"
,
FuncConfig
()
.
set
(
"cstart"
,
padc_
[
0
])
.
set
(
"cend"
,
padc_
[
1
])
.
set
(
"hstart"
,
padh_
[
0
])
.
set
(
"hend"
,
padh_
[
1
])
.
set
(
"wstart"
,
padw_
[
0
])
.
set
(
"wend"
,
padw_
[
1
]));
.
set
(
"channel"
,
padc_
)
.
set
(
"height"
,
padh_
)
.
set
(
"width"
,
padw_
));
createFunction
(
backward_
,
"PadGrad"
,
FuncConfig
()
.
set
(
"cstart"
,
padc_
[
0
])
.
set
(
"cend"
,
padc_
[
1
])
.
set
(
"hstart"
,
padh_
[
0
])
.
set
(
"hend"
,
padh_
[
1
])
.
set
(
"wstart"
,
padw_
[
0
])
.
set
(
"wend"
,
padw_
[
1
]));
.
set
(
"channel"
,
padc_
)
.
set
(
"height"
,
padh_
)
.
set
(
"width"
,
padw_
));
return
true
;
}
...
...
paddle/gserver/layers/PadLayer.h
浏览文件 @
c1738e29
...
...
@@ -38,9 +38,9 @@ protected:
void
setOutDims
(
const
size_t
batchSize
);
void
setTensorDim
(
const
size_t
batchSize
);
std
::
vector
<
in
t
>
padc_
;
std
::
vector
<
in
t
>
padh_
;
std
::
vector
<
in
t
>
padw_
;
std
::
vector
<
uint32_
t
>
padc_
;
std
::
vector
<
uint32_
t
>
padh_
;
std
::
vector
<
uint32_
t
>
padw_
;
TensorShape
inDims_
;
TensorShape
outDims_
;
};
...
...
paddle/py_paddle/dataprovider_converter.py
浏览文件 @
c1738e29
...
...
@@ -160,10 +160,19 @@ class SparseFloatScanner(SparseBinaryScanner):
class
IndexScanner
(
IScanner
):
def
__init__
(
self
,
input_type
,
pos
):
IScanner
.
__init__
(
self
,
input_type
,
pos
)
self
.
__ids__
=
[]
self
.
__ids__
=
None
self
.
__idx__
=
0
def
pre_scan
(
self
,
dat
):
self
.
__idx__
+=
1
def
finish_pre_scan
(
self
,
argument
):
self
.
__ids__
=
[
0
]
*
self
.
__idx__
self
.
__idx__
=
0
def
scan
(
self
,
dat
):
self
.
__ids__
.
append
(
dat
)
self
.
__ids__
[
self
.
__idx__
]
=
dat
self
.
__idx__
+=
1
def
finish_scan
(
self
,
argument
):
ids
=
swig_paddle
.
IVector
.
create
(
self
.
__ids__
,
self
.
data_in_gpu
)
...
...
paddle/utils/Any.h
0 → 100644
浏览文件 @
c1738e29
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#if __cplusplus > 201402L
#include <any>
namespace
paddle
{
// using std::any for C++ 17
using
std
::
any
;
using
std
::
any_cast
;
using
std
::
bad_any_cast
;
}
// namespace paddle
#else
#include <any.hpp>
namespace
paddle
{
// use linb::any for C++ 11
using
linb
::
any
;
using
linb
::
any_cast
;
using
linb
::
bad_any_cast
;
}
// namespace paddle
#endif
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
c1738e29
...
...
@@ -18,7 +18,7 @@ import inspect
from
paddle.trainer.config_parser
import
*
from
.activations
import
LinearActivation
,
SigmoidActivation
,
TanhActivation
,
\
ReluActivation
,
IdentityActivation
,
SoftmaxActivation
ReluActivation
,
IdentityActivation
,
SoftmaxActivation
,
BaseActivation
from
.evaluators
import
*
from
.poolings
import
MaxPooling
,
AvgPooling
,
BasePoolingType
from
.attrs
import
*
...
...
@@ -2277,8 +2277,9 @@ def img_pool_layer(input,
pool_type
.
name
=
'avg'
type_name
=
pool_type
.
name
+
'-projection'
\
if
(
isinstance
(
pool_type
,
AvgPooling
)
or
isinstance
(
pool_type
,
MaxPooling
))
\
else
pool_type
.
name
if
(
isinstance
(
pool_type
,
AvgPooling
)
or
isinstance
(
pool_type
,
MaxPooling
))
\
else
pool_type
.
name
pool_size_y
=
pool_size
if
pool_size_y
is
None
else
pool_size_y
stride_y
=
stride
if
stride_y
is
None
else
stride_y
...
...
@@ -3318,8 +3319,8 @@ def recurrent_group(step,
assert
(
targetInlink
==
None
or
targetInlink_in_inlinks
())
targetInlinkName
=
None
if
targetInlink
==
None
\
else
targetInlink
.
name
if
isinstance
(
targetInlink
,
LayerOutput
)
\
else
targetInlink
.
input
.
name
else
targetInlink
.
name
if
isinstance
(
targetInlink
,
LayerOutput
)
\
else
targetInlink
.
input
.
name
contains_sub_seq
=
[
False
]
...
...
@@ -4831,12 +4832,14 @@ def crf_decoding_layer(input,
return
LayerOutput
(
name
,
LayerType
.
CRF_DECODING_LAYER
,
parents
,
size
=
1
)
@
wrap_act_default
(
act
=
SigmoidActivation
())
@
wrap_bias_attr_default
(
has_bias
=
True
)
@
wrap_name_default
()
@
layer_support
()
def
nce_layer
(
input
,
label
,
num_classes
,
act
=
None
,
weight
=
None
,
num_neg_samples
=
10
,
neg_distribution
=
None
,
...
...
@@ -4865,6 +4868,8 @@ def nce_layer(input,
:type weight: LayerOutput
:param num_classes: number of classes.
:type num_classes: int
:param act: Activation, default is Sigmoid.
:type act: BaseActivation
:param num_neg_samples: number of negative samples. Default is 10.
:type num_neg_samples: int
:param neg_distribution: The distribution for generating the random negative labels.
...
...
@@ -4887,6 +4892,8 @@ def nce_layer(input,
assert
isinstance
(
neg_distribution
,
collections
.
Sequence
)
assert
len
(
neg_distribution
)
==
num_classes
assert
sum
(
neg_distribution
)
==
1
if
not
isinstance
(
act
,
BaseActivation
):
raise
TypeError
()
ipts_for_layer
=
[]
parents
=
[]
...
...
@@ -4908,12 +4915,17 @@ def nce_layer(input,
type
=
LayerType
.
NCE_LAYER
,
num_classes
=
num_classes
,
neg_sampling_dist
=
neg_distribution
,
active_type
=
act
.
name
,
num_neg_samples
=
num_neg_samples
,
inputs
=
ipts_for_layer
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
NCE_LAYER
,
parents
=
parents
,
size
=
l
.
config
.
size
)
name
,
LayerType
.
NCE_LAYER
,
parents
=
parents
,
size
=
l
.
config
.
size
,
activation
=
act
)
"""
...
...
python/paddle/v2/config_base.py
浏览文件 @
c1738e29
...
...
@@ -67,7 +67,16 @@ class Layer(object):
self
.
name
=
name
self
.
__context__
=
{}
self
.
__parent_layers__
=
parent_layers
self
.
__children_layers__
=
[]
# used for evaluator.
# some layer may have some extra parent layer
self
.
__extra_parent__
=
[]
# used for evaluator.
self
.
__children_layers__
=
[]
def
extra_parent
(
self
):
return
self
.
__extra_parent__
def
append_extra_parent
(
self
,
parent
):
self
.
__extra_parent__
.
append
(
parent
)
def
append_child
(
self
,
layer
,
parent_names
):
self
.
__children_layers__
.
append
((
layer
,
parent_names
))
...
...
@@ -78,14 +87,20 @@ class Layer(object):
"""
self
.
__context__
=
context
#
short cut if myself
is parsed before.
#
STEP: short cut if this layer
is parsed before.
if
self
.
context_name
()
in
context
:
if
self
.
use_context_name
():
return
context
[
self
.
context_name
()]
else
:
return
context
[
self
.
name
]
# parse parent before myself
# STEP: parse extra_parent that is not used by this layer but must
# be parsed before this layer.
for
p
in
self
.
__extra_parent__
:
p
.
to_proto
(
context
=
context
)
# STEP: parse parent that is used by this layer, get the result and
# insert into kwargs of the next layer's to_proto_impl method.
kwargs
=
dict
()
for
layer_name
in
self
.
__parent_layers__
:
if
not
isinstance
(
self
.
__parent_layers__
[
layer_name
],
...
...
@@ -97,14 +112,13 @@ class Layer(object):
self
.
__parent_layers__
[
layer_name
])
kwargs
[
layer_name
]
=
v1_layer
#
parse myself
.
#
STEP: parse myself and add myself into context
.
ret_val
=
self
.
to_proto_impl
(
**
kwargs
)
if
self
.
context_name
()
is
not
None
and
\
self
.
context_name
()
not
in
context
:
if
self
.
context_name
()
is
not
None
\
and
self
.
context_name
()
not
in
context
:
context
[
self
.
context_name
()]
=
ret_val
#
parse children
.
#
STEP: parse children that should be pased after this layer
.
for
layer
,
pnames
in
self
.
__children_layers__
:
drop
=
False
...
...
@@ -117,6 +131,7 @@ class Layer(object):
continue
layer
.
to_proto
(
context
=
context
)
# STEP: return v1 layer result
if
self
.
context_name
()
is
None
:
return
ret_val
elif
self
.
use_context_name
():
...
...
python/paddle/v2/dataset/common.py
浏览文件 @
c1738e29
...
...
@@ -66,13 +66,6 @@ def download(url, module_name, md5sum):
return
filename
def
dict_add
(
a_dict
,
ele
):
if
ele
in
a_dict
:
a_dict
[
ele
]
+=
1
else
:
a_dict
[
ele
]
=
1
def
fetch_all
():
for
module_name
in
filter
(
lambda
x
:
not
x
.
startswith
(
"__"
),
dir
(
paddle
.
v2
.
dataset
)):
...
...
python/paddle/v2/dataset/imdb.py
浏览文件 @
c1738e29
...
...
@@ -18,6 +18,7 @@ TODO(yuyang18): Complete comments.
"""
import
paddle.v2.dataset.common
import
collections
import
tarfile
import
Queue
import
re
...
...
@@ -48,10 +49,10 @@ def tokenize(pattern):
def
build_dict
(
pattern
,
cutoff
):
word_freq
=
{}
word_freq
=
collections
.
defaultdict
(
int
)
for
doc
in
tokenize
(
pattern
):
for
word
in
doc
:
paddle
.
v2
.
dataset
.
common
.
dict_add
(
word_freq
,
word
)
word_freq
[
word
]
+=
1
# Not sure if we should prune less-frequent words here.
word_freq
=
filter
(
lambda
x
:
x
[
1
]
>
cutoff
,
word_freq
.
items
())
...
...
python/paddle/v2/dataset/imikolov.py
浏览文件 @
c1738e29
...
...
@@ -17,6 +17,7 @@ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
Complete comments.
"""
import
paddle.v2.dataset.common
import
collections
import
tarfile
__all__
=
[
'train'
,
'test'
,
'build_dict'
]
...
...
@@ -26,15 +27,14 @@ MD5 = '30177ea32e27c525793142b6bf2c8e2d'
def
word_count
(
f
,
word_freq
=
None
):
add
=
paddle
.
v2
.
dataset
.
common
.
dict_add
if
word_freq
==
None
:
word_freq
=
{}
if
word_freq
is
None
:
word_freq
=
collections
.
defaultdict
(
int
)
for
l
in
f
:
for
w
in
l
.
strip
().
split
():
add
(
word_freq
,
w
)
add
(
word_freq
,
'<s>'
)
add
(
word_freq
,
'<e>'
)
word_freq
[
w
]
+=
1
word_freq
[
'<s>'
]
+=
1
word_freq
[
'<e>'
]
+=
1
return
word_freq
...
...
python/paddle/v2/layer.py
浏览文件 @
c1738e29
...
...
@@ -33,40 +33,52 @@ The primary usage shows below.
import
collections
import
inspect
from
config_base
import
Layer
,
__convert_to_v2__
import
re
import
paddle.trainer_config_helpers
as
conf_helps
from
paddle.trainer.config_parser
import
\
RecurrentLayerGroupWithoutOutLinksBegin
,
RecurrentLayerGroupSetOutLink
,
\
RecurrentLayerGroupEnd
,
model_type
from
paddle.trainer_config_helpers.config_parser_utils
import
\
parse_network_config
as
__parse__
from
paddle.trainer_config_helpers.default_decorators
import
wrap_act_default
from
paddle.trainer_config_helpers.default_decorators
import
\
wrap_bias_attr_default
from
paddle.trainer_config_helpers.default_decorators
import
wrap_name_default
from
paddle.trainer_config_helpers.layers
import
RecurrentLayerGroupSetGenerator
,
Generator
from
paddle.trainer_config_helpers.layers
import
layer_support
from
paddle.trainer.config_parser
import
\
RecurrentLayerGroupWithoutOutLinksBegin
,
RecurrentLayerGroupSetOutLink
,
\
RecurrentLayerGroupEnd
,
model_type
import
activation
import
re
import
attr
import
data_type
from
config_base
import
Layer
,
__convert_to_v2__
__all__
=
[
'parse_network'
,
'data'
]
def
parse_network
(
*
outputs
):
def
parse_network
(
output_layers
,
extra_layers
=
None
):
"""
Parse all output layers and then generate a ModelConfig object.
Parse all layers in the neural network graph and
then generate a ModelConfig object.
.. note::
This function is used internally in paddle.v2 module. User should never
invoke this method.
:param outputs: Output layers.
:type outputs: Layer
:param output_layers: Output layers.
:type output_layers: Layer
:param extra_layers: Some layers in the neural network graph are not in the
path of output_layers.
:type extra_layers: Layer
:return: A ModelConfig object instance.
:rtype: ModelConfig
"""
if
not
isinstance
(
output_layers
,
collections
.
Sequence
):
output_layers
=
[
output_layers
]
if
extra_layers
is
not
None
and
not
isinstance
(
extra_layers
,
collections
.
Sequence
):
extra_layers
=
[
extra_layers
]
def
__real_func__
():
"""
...
...
@@ -74,7 +86,11 @@ def parse_network(*outputs):
the plain old paddle configuration function.
"""
context
=
dict
()
real_output
=
[
each
.
to_proto
(
context
=
context
)
for
each
in
outputs
]
real_output
=
[
each
.
to_proto
(
context
=
context
)
for
each
in
output_layers
]
if
extra_layers
is
not
None
:
extra_output
=
[
each
.
to_proto
(
context
=
context
)
for
each
in
extra_layers
]
conf_helps
.
outputs
(
real_output
)
return
__parse__
(
__real_func__
)
...
...
@@ -119,54 +135,23 @@ class DataLayerV2(Layer):
return
doc
class
WithExtraParent
(
Layer
):
def
extra_parent
(
self
):
return
self
.
__extra_parent__
def
__init__
(
self
,
name
=
None
,
parent_layers
=
None
):
self
.
__extra_parent__
=
[]
super
(
WithExtraParent
,
self
).
__init__
(
name
=
name
,
parent_layers
=
parent_layers
)
def
append_extra_parent
(
self
,
parent
):
self
.
__extra_parent__
.
append
(
parent
)
def
to_proto
(
self
,
context
):
class
MemoryV2
(
Layer
):
def
__init__
(
self
,
name
,
extra_input
=
None
,
**
kwargs
):
"""
function to set proto attribute
Init memory object, if memory is inited inside recurrent_group step
function, it may depend on a boot_layer that should be initialized
outside recurrent_group, so we:
1. add RecurrentLayerInput to extra_parent of self.
2. add boot_layer to the extra_parent of RecurrentLayerInput.
:param extra_input: list of RecurrentLayerInput
:type extra_input: [RecurrentLayerInput]
"""
kwargs
=
dict
()
for
p
in
self
.
__extra_parent__
:
p
.
to_proto
(
context
=
context
)
for
layer_name
in
self
.
__parent_layers__
:
if
not
isinstance
(
self
.
__parent_layers__
[
layer_name
],
collections
.
Sequence
):
v1_layer
=
self
.
__parent_layers__
[
layer_name
].
to_proto
(
context
=
context
)
else
:
v1_layer
=
map
(
lambda
x
:
x
.
to_proto
(
context
=
context
),
self
.
__parent_layers__
[
layer_name
])
kwargs
[
layer_name
]
=
v1_layer
if
self
.
context_name
()
is
None
:
return
self
.
to_proto_impl
(
context
=
context
,
**
kwargs
)
elif
self
.
context_name
()
not
in
context
:
context
[
self
.
context_name
()]
=
self
.
to_proto_impl
(
context
=
context
,
**
kwargs
)
if
self
.
use_context_name
():
return
context
[
self
.
context_name
()]
else
:
return
context
[
self
.
name
]
class
MemoryV2
(
WithExtraParent
):
def
__init__
(
self
,
name
,
**
kwargs
):
self
.
name
=
name
super
(
MemoryV2
,
self
).
__init__
(
name
=
name
,
parent_layers
=
dict
())
self
.
__kwargs__
=
kwargs
self
.
__boot_layer_name__
=
None
if
'boot_layer'
in
kwargs
:
begin_of_current_rnn
=
[]
# TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
...
...
@@ -189,11 +174,10 @@ class MemoryV2(WithExtraParent):
assert
begin_of_current_rnn
is
not
None
for
extra
in
begin_of_current_rnn
:
self
.
append_extra_parent
(
extra
)
assert
isinstance
(
extra
,
WithExtraParent
)
extra
.
append_extra_parent
(
kwargs
[
'boot_layer'
])
self
.
__boot_layer_name__
=
kwargs
[
'boot_layer'
].
name
def
to_proto_impl
(
self
,
context
,
**
kwargs
):
def
to_proto_impl
(
self
,
**
kwargs
):
args
=
dict
()
for
each
in
kwargs
:
args
[
each
]
=
kwargs
[
each
]
...
...
@@ -201,7 +185,7 @@ class MemoryV2(WithExtraParent):
args
[
each
]
=
self
.
__kwargs__
[
each
]
if
self
.
__boot_layer_name__
is
not
None
:
args
[
'boot_layer'
]
=
context
[
self
.
__boot_layer_name__
]
args
[
'boot_layer'
]
=
self
.
__context__
[
self
.
__boot_layer_name__
]
size
=
args
.
get
(
'size'
,
None
)
if
size
is
not
None
:
...
...
@@ -223,22 +207,6 @@ class MemoryV2(WithExtraParent):
return
True
class
LayerOutputV2
(
Layer
):
"""
LayerOutputV2 is used to store the result of LayerOutput in v1 api.
It will not store it's parents because layer_output has been parsed already.
"""
def
__init__
(
self
,
layer_output
):
assert
isinstance
(
layer_output
,
conf_helps
.
LayerOutput
)
self
.
layer_output
=
layer_output
super
(
LayerOutputV2
,
self
).
__init__
(
name
=
layer_output
.
name
,
parent_layers
=
dict
())
def
to_proto_impl
(
self
):
return
self
.
layer_output
class
StaticInputV2
(
object
):
def
__init__
(
self
,
input
,
is_seq
=
False
,
size
=
None
):
assert
isinstance
(
input
,
LayerV2
)
...
...
@@ -250,6 +218,66 @@ class StaticInputV2(object):
# assert input.size is not None or size is not None
class
BaseGeneratedInputV2
(
object
):
def
__init__
(
self
):
self
.
bos_id
=
None
self
.
eos_id
=
None
def
before_real_step
(
self
):
raise
NotImplementedError
()
def
after_real_step
(
self
,
*
args
):
raise
NotImplementedError
()
class
GeneratedInputV2
(
BaseGeneratedInputV2
):
def
__init__
(
self
,
size
,
embedding_name
,
embedding_size
):
super
(
GeneratedInputV2
,
self
).
__init__
()
self
.
size
=
size
self
.
embedding_name
=
embedding_name
self
.
embedding_size
=
embedding_size
def
after_real_step
(
self
,
input
):
return
max_id
(
input
=
input
,
name
=
'__beam_search_predict__'
)
def
before_real_step
(
self
):
predict_id
=
memory
(
name
=
'__beam_search_predict__'
,
size
=
self
.
size
,
boot_with_const_id
=
self
.
bos_id
)
trg_emb
=
embedding
(
input
=
predict_id
,
size
=
self
.
embedding_size
,
param_attr
=
attr
.
ParamAttr
(
name
=
self
.
embedding_name
))
return
trg_emb
class
RecurrentLayerGroupSetGeneratorV2
(
Layer
):
def
__init__
(
self
,
eos_name
,
max_length
,
beam_size
,
num_results_per_sample
):
self
.
eos_name
=
eos_name
self
.
max_length
=
max_length
self
.
beam_size
=
beam_size
self
.
num_results_per_sample
=
num_results_per_sample
super
(
RecurrentLayerGroupSetGeneratorV2
,
self
).
__init__
(
name
=
eos_name
,
parent_layers
=
{})
def
to_proto_impl
(
self
,
**
kwargs
):
RecurrentLayerGroupSetGenerator
(
Generator
(
eos_layer_name
=
self
.
eos_name
,
max_num_frames
=
self
.
max_length
,
beam_size
=
self
.
beam_size
,
num_results_per_sample
=
self
.
num_results_per_sample
))
return
self
def
context_name
(
self
):
return
self
.
eos_name
+
".fake"
def
use_context_name
(
self
):
return
True
class
MixedLayerV2
(
Layer
):
"""
This class is use to support `with` grammar. If not, the following code
...
...
@@ -328,18 +356,24 @@ def mixed(size=0,
return
MixedLayerV2
(
size
,
input
,
name
,
act
,
bias_attr
,
layer_attr
)
class
RecurrentLayerInput
(
WithExtraParent
):
class
RecurrentLayerInput
(
Layer
):
def
__init__
(
self
,
recurrent_name
,
index
,
parent_layers
):
assert
len
(
parent_layers
)
==
1
self
.
__parents__
=
parent_layers
.
values
()[
0
]
super
(
RecurrentLayerInput
,
self
).
__init__
(
name
=
self
.
__parents__
[
index
].
name
,
parent_layers
=
parent_layers
)
parents_len
=
len
(
parent_layers
)
assert
parents_len
<=
1
if
parents_len
==
0
:
self
.
__parents__
=
[]
else
:
self
.
__parents__
=
parent_layers
.
values
()[
0
]
self
.
__recurrent_name__
=
recurrent_name
name
=
self
.
__parents__
[
index
].
name
if
index
>=
0
else
self
.
context_name
()
super
(
RecurrentLayerInput
,
self
).
__init__
(
name
=
name
,
parent_layers
=
parent_layers
)
def
context_name
(
self
):
return
self
.
__recurrent_name__
+
".begin"
def
to_proto_impl
(
self
,
context
,
**
kwargs
):
def
to_proto_impl
(
self
,
**
kwargs
):
model_type
(
'recurrent_nn'
)
RecurrentLayerGroupWithoutOutLinksBegin
(
name
=
self
.
__recurrent_name__
,
...
...
@@ -436,6 +470,11 @@ def recurrent_group(step, input, name=None):
for
i
in
xrange
(
len
(
non_static_inputs
))
]
extra_input
=
None
if
len
(
non_static_inputs
)
==
0
:
extra_input
=
RecurrentLayerInput
(
recurrent_name
=
name
,
index
=-
1
,
parent_layers
=
{})
def
__real_step__
(
*
args
):
rnn_input
=
list
(
args
)
static_inputs
=
filter
(
lambda
x
:
isinstance
(
x
,
StaticInputV2
),
input
)
...
...
@@ -443,6 +482,7 @@ def recurrent_group(step, input, name=None):
mem_name
=
"__%s_memory__"
%
static_input
.
input
.
name
mem
=
memory
(
name
=
mem_name
,
extra_input
=
extra_input
,
is_seq
=
static_input
.
is_seq
,
size
=
static_input
.
input
.
calculate_size
,
boot_layer
=
static_input
.
input
)
...
...
@@ -472,6 +512,73 @@ def recurrent_group(step, input, name=None):
return
retv
@
wrap_name_default
()
def
beam_search
(
step
,
input
,
bos_id
,
eos_id
,
beam_size
,
max_length
=
500
,
name
=
None
,
num_results_per_sample
=
None
):
if
num_results_per_sample
is
None
:
num_results_per_sample
=
beam_size
assert
num_results_per_sample
<=
beam_size
# logger.warning("num_results_per_sample should be less than beam_size")
if
isinstance
(
input
,
StaticInputV2
)
or
isinstance
(
input
,
BaseGeneratedInputV2
):
input
=
[
input
]
generated_input_index
=
-
1
real_input
=
[]
for
i
,
each_input
in
enumerate
(
input
):
assert
isinstance
(
each_input
,
StaticInputV2
)
or
isinstance
(
each_input
,
BaseGeneratedInputV2
)
if
isinstance
(
each_input
,
BaseGeneratedInputV2
):
assert
generated_input_index
==
-
1
generated_input_index
=
i
else
:
real_input
.
append
(
each_input
)
assert
generated_input_index
!=
-
1
gipt
=
input
[
generated_input_index
]
assert
isinstance
(
gipt
,
BaseGeneratedInputV2
)
gipt
.
bos_id
=
bos_id
gipt
.
eos_id
=
eos_id
def
__real_step__
(
*
args
):
eos_name
=
"__%s_eos_layer__"
%
name
generator
=
RecurrentLayerGroupSetGeneratorV2
(
eos_name
,
max_length
,
beam_size
,
num_results_per_sample
)
args
=
list
(
args
)
before_step_layer
=
gipt
.
before_real_step
()
before_step_layer
.
append_child
(
layer
=
generator
,
parent_names
=
[
before_step_layer
.
name
])
args
.
insert
(
generated_input_index
,
before_step_layer
)
predict
=
gipt
.
after_real_step
(
step
(
*
args
))
eos_layer
=
eos
(
input
=
predict
,
eos_id
=
eos_id
,
name
=
eos_name
)
predict
.
append_child
(
layer
=
eos_layer
,
parent_names
=
[
predict
.
name
])
return
predict
# tmp = paddle.layer.recurrent_group(
# step=__real_step__,
# input=real_input,
# reverse=False,
# name=name,
# is_generating=True)
tmp
=
recurrent_group
(
step
=
__real_step__
,
input
=
real_input
,
name
=
name
)
return
tmp
__projection_names__
=
filter
(
lambda
x
:
x
.
endswith
(
'_projection'
),
dir
(
conf_helps
))
...
...
python/paddle/v2/parameters.py
浏览文件 @
c1738e29
...
...
@@ -159,7 +159,8 @@ class Parameters(object):
if
not
self
.
has_key
(
key
):
raise
ValueError
(
"No such parameter %s"
%
key
)
conf
=
self
.
__param_conf__
[
key
]
return
tuple
(
map
(
int
,
conf
.
dims
))
dims
=
conf
.
dims
if
conf
.
dims
else
(
1
,
conf
.
size
)
return
tuple
(
map
(
int
,
dims
))
def
__setitem__
(
self
,
key
,
value
):
"""
...
...
python/paddle/v2/tests/test_layer.py
浏览文件 @
c1738e29
...
...
@@ -59,13 +59,13 @@ class ImageLayerTest(unittest.TestCase):
num_channels
=
16
,
pool_type
=
pooling
.
Max
())
maxout
=
layer
.
maxout
(
input
=
conv
,
num_channels
=
16
,
groups
=
4
)
print
layer
.
parse_network
(
maxpool
,
spp
,
maxout
)
print
layer
.
parse_network
(
[
maxpool
,
spp
,
maxout
]
)
def
test_norm_layer
(
self
):
norm1
=
layer
.
img_cmrnorm
(
input
=
conv
,
size
=
5
)
norm2
=
layer
.
batch_norm
(
input
=
conv
)
norm3
=
layer
.
sum_to_one_norm
(
input
=
conv
)
print
layer
.
parse_network
(
norm1
,
norm2
,
norm3
)
print
layer
.
parse_network
(
[
norm1
,
norm2
,
norm3
]
)
class
AggregateLayerTest
(
unittest
.
TestCase
):
...
...
@@ -78,7 +78,8 @@ class AggregateLayerTest(unittest.TestCase):
first_seq
=
layer
.
first_seq
(
input
=
pixel
)
concat
=
layer
.
concat
(
input
=
[
last_seq
,
first_seq
])
seq_concat
=
layer
.
seq_concat
(
a
=
last_seq
,
b
=
first_seq
)
print
layer
.
parse_network
(
pool
,
last_seq
,
first_seq
,
concat
,
seq_concat
)
print
layer
.
parse_network
(
[
pool
,
last_seq
,
first_seq
,
concat
,
seq_concat
])
class
MathLayerTest
(
unittest
.
TestCase
):
...
...
@@ -95,8 +96,10 @@ class MathLayerTest(unittest.TestCase):
tensor
=
layer
.
tensor
(
a
=
pixel
,
b
=
pixel
,
size
=
1000
)
cos_sim
=
layer
.
cos_sim
(
a
=
pixel
,
b
=
pixel
)
trans
=
layer
.
trans
(
input
=
tensor
)
print
layer
.
parse_network
(
addto
,
linear_comb
,
interpolation
,
power
,
scaling
,
slope
,
tensor
,
cos_sim
,
trans
)
print
layer
.
parse_network
([
addto
,
linear_comb
,
interpolation
,
power
,
scaling
,
slope
,
tensor
,
cos_sim
,
trans
])
class
ReshapeLayerTest
(
unittest
.
TestCase
):
...
...
@@ -110,7 +113,8 @@ class ReshapeLayerTest(unittest.TestCase):
repeat
=
layer
.
repeat
(
input
=
pixel
,
num_repeats
=
4
)
reshape
=
layer
.
seq_reshape
(
input
=
pixel
,
reshape_size
=
4
)
rotate
=
layer
.
rotate
(
input
=
pixel
,
height
=
16
,
width
=
49
)
print
layer
.
parse_network
(
block_expand
,
expand
,
repeat
,
reshape
,
rotate
)
print
layer
.
parse_network
(
[
block_expand
,
expand
,
repeat
,
reshape
,
rotate
])
class
RecurrentLayerTest
(
unittest
.
TestCase
):
...
...
@@ -119,7 +123,7 @@ class RecurrentLayerTest(unittest.TestCase):
recurrent
=
layer
.
recurrent
(
input
=
word
)
lstm
=
layer
.
lstmemory
(
input
=
word
)
gru
=
layer
.
grumemory
(
input
=
word
)
print
layer
.
parse_network
(
recurrent
,
lstm
,
gru
)
print
layer
.
parse_network
(
[
recurrent
,
lstm
,
gru
]
)
class
CostLayerTest
(
unittest
.
TestCase
):
...
...
@@ -139,10 +143,10 @@ class CostLayerTest(unittest.TestCase):
cost10
=
layer
.
sum_cost
(
input
=
inference
)
cost11
=
layer
.
huber_cost
(
input
=
score
,
label
=
label
)
print
layer
.
parse_network
(
cost1
,
cost2
)
print
layer
.
parse_network
(
cost3
,
cost4
)
print
layer
.
parse_network
(
cost5
,
cost6
)
print
layer
.
parse_network
(
cost7
,
cost8
,
cost9
,
cost10
,
cost11
)
print
layer
.
parse_network
(
[
cost1
,
cost2
]
)
print
layer
.
parse_network
(
[
cost3
,
cost4
]
)
print
layer
.
parse_network
(
[
cost5
,
cost6
]
)
print
layer
.
parse_network
(
[
cost7
,
cost8
,
cost9
,
cost10
,
cost11
]
)
crf
=
layer
.
crf
(
input
=
inference
,
label
=
label
)
crf_decoding
=
layer
.
crf_decoding
(
input
=
inference
,
size
=
3
)
...
...
@@ -151,8 +155,8 @@ class CostLayerTest(unittest.TestCase):
nce
=
layer
.
nce
(
input
=
inference
,
label
=
label
,
num_classes
=
3
)
hsigmoid
=
layer
.
hsigmoid
(
input
=
inference
,
label
=
label
,
num_classes
=
3
)
print
layer
.
parse_network
(
crf
,
crf_decoding
,
ctc
,
warp_ctc
,
nce
,
hsigmoid
)
print
layer
.
parse_network
(
[
crf
,
crf_decoding
,
ctc
,
warp_ctc
,
nce
,
hsigmoid
]
)
class
OtherLayerTest
(
unittest
.
TestCase
):
...
...
@@ -160,7 +164,7 @@ class OtherLayerTest(unittest.TestCase):
maxid
=
layer
.
max_id
(
input
=
inference
)
sampling_id
=
layer
.
sampling_id
(
input
=
inference
)
eos
=
layer
.
eos
(
input
=
maxid
,
eos_id
=
5
)
print
layer
.
parse_network
(
maxid
,
sampling_id
,
eos
)
print
layer
.
parse_network
(
[
maxid
,
sampling_id
,
eos
]
)
def
test_slicing_joining_layer
(
self
):
pad
=
layer
.
pad
(
input
=
conv
,
pad_c
=
[
2
,
3
],
pad_h
=
[
1
,
2
],
pad_w
=
[
3
,
1
])
...
...
python/paddle/v2/topology.py
浏览文件 @
c1738e29
...
...
@@ -17,7 +17,6 @@ import collections
from
paddle.proto.ModelConfig_pb2
import
ModelConfig
import
layer
as
v2_layer
from
layer
import
WithExtraParent
__all__
=
[
'Topology'
]
...
...
@@ -41,9 +40,8 @@ def __bfs_travel__(callback, *layers):
__break__
=
callback
(
each_layer
)
if
__break__
:
return
__layers__
=
each_layer
.
__parent_layers__
.
values
()
if
isinstance
(
each_layer
,
WithExtraParent
):
__layers__
=
__layers__
+
each_layer
.
extra_parent
()
__layers__
=
each_layer
.
__parent_layers__
.
values
()
+
\
each_layer
.
extra_parent
()
__bfs_travel__
(
callback
,
*
__layers__
)
...
...
@@ -53,14 +51,26 @@ class Topology(object):
and network configs.
"""
def
__init__
(
self
,
layers
):
if
not
isinstance
(
layers
,
collections
.
Sequence
):
__check_layer_type__
(
layers
)
layers
=
[
layers
]
for
layer
in
layers
:
__check_layer_type__
(
layer
)
def
__init__
(
self
,
layers
,
extra_layers
=
None
):
def
__check__
(
layers
):
if
not
isinstance
(
layers
,
collections
.
Sequence
):
__check_layer_type__
(
layers
)
layers
=
[
layers
]
for
layer
in
layers
:
__check_layer_type__
(
layer
)
return
layers
layers
=
__check__
(
layers
)
self
.
layers
=
layers
self
.
__model_config__
=
v2_layer
.
parse_network
(
*
layers
)
if
extra_layers
is
not
None
:
extra_layers
=
__check__
(
extra_layers
)
self
.
__model_config__
=
v2_layer
.
parse_network
(
layers
,
extra_layers
=
extra_layers
)
if
extra_layers
is
not
None
:
self
.
layers
.
extend
(
extra_layers
)
assert
isinstance
(
self
.
__model_config__
,
ModelConfig
)
def
proto
(
self
):
...
...
python/paddle/v2/trainer.py
浏览文件 @
c1738e29
...
...
@@ -37,9 +37,12 @@ class SGD(object):
:type cost: paddle.v2.config_base.Layer
:param parameters: The parameters dictionary.
:type parameters: paddle.v2.parameters.Parameters
:param extra_layers: Some layers in the neural network graph are not
in the path of cost layer.
:type extra_layers: paddle.v2.config_base.Layer
"""
def
__init__
(
self
,
cost
,
parameters
,
update_equation
):
def
__init__
(
self
,
cost
,
parameters
,
update_equation
,
extra_layers
=
None
):
if
not
isinstance
(
parameters
,
v2_parameters
.
Parameters
):
raise
TypeError
(
'parameters should be parameters'
)
...
...
@@ -47,7 +50,7 @@ class SGD(object):
if
not
isinstance
(
update_equation
,
v2_optimizer
.
Optimizer
):
raise
TypeError
(
"update equation parameter must be "
"paddle.v2.optimizer.Optimizer"
)
topology
=
Topology
(
cost
)
topology
=
Topology
(
cost
,
extra_layers
=
extra_layers
)
self
.
__optimizer__
=
update_equation
self
.
__topology__
=
topology
self
.
__parameters__
=
parameters
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录