Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
23e47bb6
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
23e47bb6
编写于
9月 28, 2016
作者:
L
liaogang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'upstream/master'
上级
a8df4111
d130d181
变更
44
隐藏空白更改
内联
并排
Showing
44 changed file
with
1215 addition
and
444 deletion
+1215
-444
doc_cn/demo/quick_start/index.md
doc_cn/demo/quick_start/index.md
+1
-1
paddle/cuda/src/hl_cuda_cublas.cc
paddle/cuda/src/hl_cuda_cublas.cc
+2
-2
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+25
-16
paddle/gserver/layers/AgentLayer.cpp
paddle/gserver/layers/AgentLayer.cpp
+13
-13
paddle/gserver/layers/CRFLayer.h
paddle/gserver/layers/CRFLayer.h
+1
-1
paddle/gserver/layers/LinearChainCRF.h
paddle/gserver/layers/LinearChainCRF.h
+24
-24
paddle/gserver/tests/CMakeLists.txt
paddle/gserver/tests/CMakeLists.txt
+0
-1
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
+77
-0
paddle/gserver/tests/sequence_rnn_multi_input.conf
paddle/gserver/tests/sequence_rnn_multi_input.conf
+58
-0
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
+22
-7
paddle/parameter/Argument.cpp
paddle/parameter/Argument.cpp
+9
-4
paddle/parameter/Argument.h
paddle/parameter/Argument.h
+2
-2
paddle/trainer/ThreadParameterUpdater.cpp
paddle/trainer/ThreadParameterUpdater.cpp
+1
-1
paddle/trainer/TrainerInternal.cpp
paddle/trainer/TrainerInternal.cpp
+1
-0
paddle/trainer/tests/sample_trainer_config_parallel.conf
paddle/trainer/tests/sample_trainer_config_parallel.conf
+44
-107
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+7
-5
python/paddle/trainer_config_helpers/activations.py
python/paddle/trainer_config_helpers/activations.py
+7
-2
python/paddle/trainer_config_helpers/attrs.py
python/paddle/trainer_config_helpers/attrs.py
+56
-9
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+412
-241
python/paddle/trainer_config_helpers/networks.py
python/paddle/trainer_config_helpers/networks.py
+6
-3
python/paddle/trainer_config_helpers/poolings.py
python/paddle/trainer_config_helpers/poolings.py
+6
-1
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
+5
-0
python/paddle/trainer_config_helpers/tests/configs/.gitignore
...on/paddle/trainer_config_helpers/tests/configs/.gitignore
+1
-0
python/paddle/trainer_config_helpers/tests/configs/check.md5
python/paddle/trainer_config_helpers/tests/configs/check.md5
+17
-0
python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
...trainer_config_helpers/tests/configs/generate_protostr.sh
+18
-0
python/paddle/trainer_config_helpers/tests/configs/img_layers.py
...paddle/trainer_config_helpers/tests/configs/img_layers.py
+20
-0
python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
...le/trainer_config_helpers/tests/configs/last_first_seq.py
+26
-0
python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
...trainer_config_helpers/tests/configs/layer_activations.py
+21
-0
python/paddle/trainer_config_helpers/tests/configs/projections.py
...addle/trainer_config_helpers/tests/configs/projections.py
+47
-0
python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
.../paddle/trainer_config_helpers/tests/configs/run_tests.sh
+5
-0
python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
...trainer_config_helpers/tests/configs/simple_rnn_layers.py
+36
-0
python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
.../trainer_config_helpers/tests/configs/test_cost_layers.py
+26
-0
python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
...trainer_config_helpers/tests/configs/test_expand_layer.py
+14
-0
python/paddle/trainer_config_helpers/tests/configs/test_fc.py
...on/paddle/trainer_config_helpers/tests/configs/test_fc.py
+20
-0
python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
...iner_config_helpers/tests/configs/test_grumemory_layer.py
+11
-0
python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
...dle/trainer_config_helpers/tests/configs/test_hsigmoid.py
+11
-0
python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
...iner_config_helpers/tests/configs/test_lstmemory_layer.py
+11
-0
python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
...e/trainer_config_helpers/tests/configs/test_ntm_layers.py
+23
-0
python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
.../trainer_config_helpers/tests/configs/test_print_layer.py
+12
-0
python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
...le/trainer_config_helpers/tests/configs/test_rnn_group.py
+35
-0
python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
...ner_config_helpers/tests/configs/test_sequence_pooling.py
+30
-0
python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
...dle/trainer_config_helpers/tests/configs/unused_layers.py
+14
-0
python/paddle/trainer_config_helpers/tests/configs/util_layers.py
...addle/trainer_config_helpers/tests/configs/util_layers.py
+15
-0
python/paddle/trainer_config_helpers/tests/layers_test_config.py
...paddle/trainer_config_helpers/tests/layers_test_config.py
+23
-4
未找到文件。
doc_cn/demo/quick_start/index.md
浏览文件 @
23e47bb6
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
## 安装(Install)
## 安装(Install)
首先请参考
<a
href =
"../../build_and_install/in
stall/in
dex.html"
>
安装教程
</a>
安装PaddlePaddle。
首先请参考
<a
href =
"../../build_and_install/index.html"
>
安装教程
</a>
安装PaddlePaddle。
## 使用概述(Overview)
## 使用概述(Overview)
...
...
paddle/cuda/src/hl_cuda_cublas.cc
浏览文件 @
23e47bb6
...
@@ -217,7 +217,7 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
...
@@ -217,7 +217,7 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
}
else
{
}
else
{
LOG
(
FATAL
)
<<
"parameter transa error!"
;
LOG
(
FATAL
)
<<
"parameter transa error!"
;
}
}
CHECK_EQ
(
stat
,
CUBLAS_STATUS_SUCCESS
);
CHECK_EQ
(
stat
,
CUBLAS_STATUS_SUCCESS
)
<<
hl_cublas_get_error_string
(
stat
)
;
CHECK_SYNC
(
"hl_matrix_mul failed"
);
CHECK_SYNC
(
"hl_matrix_mul failed"
);
}
}
...
@@ -266,7 +266,7 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
...
@@ -266,7 +266,7 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
LOG
(
FATAL
)
<<
"parameter transa error!"
;
LOG
(
FATAL
)
<<
"parameter transa error!"
;
}
}
CHECK_EQ
(
stat
,
CUBLAS_STATUS_SUCCESS
);
CHECK_EQ
(
stat
,
CUBLAS_STATUS_SUCCESS
)
<<
hl_cublas_get_error_string
(
stat
)
;
CHECK_SYNC
(
"hl_matrix_mul_vector"
);
CHECK_SYNC
(
"hl_matrix_mul_vector"
);
}
}
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
23e47bb6
...
@@ -497,20 +497,21 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
...
@@ -497,20 +497,21 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
int
idSize
=
0
;
int
idSize
=
0
;
// connect in_links
// connect in_links
for
(
size_t
j
=
0
;
j
<
inFrameLines_
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
inFrameLines_
.
size
();
++
j
)
{
Info
&
info
=
info_
[
shareInlinkInfo
?
0
:
j
];
// idSize denotes the sum number of tokens in each length i
// idSize denotes the sum number of tokens in each length i
idSize
=
info
_
[
j
].
idIndex
[
i
+
1
]
-
info_
[
j
]
.
idIndex
[
i
];
idSize
=
info
.
idIndex
[
i
+
1
]
-
info
.
idIndex
[
i
];
InFrameLine
inFrameLine
=
inFrameLines_
[
j
];
InFrameLine
inFrameLine
=
inFrameLines_
[
j
];
auto
scatterAgent
=
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
inFrameLine
.
agents
[
i
].
get
());
dynamic_cast
<
ScatterAgentLayer
*>
(
inFrameLine
.
agents
[
i
].
get
());
scatterAgent
->
setRealLayerAndOutput
(
inFrameLine
.
inLayer
,
scatterAgent
->
setRealLayerAndOutput
(
inFrameLine
.
inLayer
,
inFrameLine
.
outArg
,
info
_
[
j
]
.
allIds
,
inFrameLine
.
outArg
,
info
.
allIds
,
info
_
[
j
]
.
idIndex
[
i
],
idSize
);
info
.
idIndex
[
i
],
idSize
);
if
(
hasSubseq
)
{
if
(
hasSubseq
)
{
// size: the length of subsequence
// size: the length of subsequence
int
size
=
int
size
=
info
_
[
j
].
seqStartPosIndex
[
i
+
1
]
-
info_
[
j
]
.
seqStartPosIndex
[
i
];
info
.
seqStartPosIndex
[
i
+
1
]
-
info
.
seqStartPosIndex
[
i
];
scatterAgent
->
setSequenceStartPositions
(
info
_
[
j
]
.
sequenceStartPositions
,
scatterAgent
->
setSequenceStartPositions
(
info
.
sequenceStartPositions
,
info
_
[
j
]
.
seqStartPosIndex
[
i
],
info
.
seqStartPosIndex
[
i
],
size
);
size
);
}
}
}
}
...
@@ -744,16 +745,24 @@ void RecurrentGradientMachine::selectRowsOneTime(LayerPtr layer,
...
@@ -744,16 +745,24 @@ void RecurrentGradientMachine::selectRowsOneTime(LayerPtr layer,
const
IVectorPtr
&
allIds
,
const
IVectorPtr
&
allIds
,
Argument
*
arg
,
Argument
*
arg
,
PassType
passType
)
{
PassType
passType
)
{
const
MatrixPtr
&
realV
=
layer
->
getOutputValue
();
Argument
&
src
=
layer
->
getOutput
();
int
height
=
realV
->
getHeight
();
if
(
src
.
value
)
{
int
width
=
realV
->
getWidth
();
const
MatrixPtr
&
realV
=
src
.
value
;
Matrix
::
resizeOrCreate
(
arg
->
value
,
height
,
width
,
/* trans */
false
,
useGpu_
);
int
height
=
realV
->
getHeight
();
arg
->
value
->
zeroMem
();
int
width
=
realV
->
getWidth
();
arg
->
value
->
selectRows
(
*
realV
,
*
allIds
);
Matrix
::
resizeOrCreate
(
if
(
passType
!=
PASS_TEST
)
{
arg
->
value
,
height
,
width
,
/* trans */
false
,
useGpu_
);
Matrix
::
resizeOrCreate
(
arg
->
grad
,
height
,
width
,
/* trans */
false
,
arg
->
value
->
zeroMem
();
useGpu_
);
arg
->
value
->
selectRows
(
*
realV
,
*
allIds
);
arg
->
grad
->
zeroMem
();
if
(
passType
!=
PASS_TEST
)
{
Matrix
::
resizeOrCreate
(
arg
->
grad
,
height
,
width
,
/* trans */
false
,
useGpu_
);
arg
->
grad
->
zeroMem
();
}
}
if
(
src
.
ids
)
{
IVector
::
resizeOrCreate
(
arg
->
ids
,
src
.
ids
->
getSize
(),
useGpu_
);
arg
->
ids
->
selectFrom
(
*
src
.
ids
,
*
allIds
);
}
}
}
}
...
...
paddle/gserver/layers/AgentLayer.cpp
浏览文件 @
23e47bb6
...
@@ -139,15 +139,16 @@ void ScatterAgentLayer::forward(PassType passType) {
...
@@ -139,15 +139,16 @@ void ScatterAgentLayer::forward(PassType passType) {
Layer
::
forward
(
passType
);
Layer
::
forward
(
passType
);
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
if
(
realLayer_
->
getOutput
().
ids
)
{
// ids scatter
int
width
=
this
->
getSize
();
IVector
::
resizeOrCreate
(
output_
.
ids
,
ids_
->
getSize
(),
useGpu_
);
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
output_
.
ids
->
selectFrom
(
*
realLayer_
->
getOutput
().
ids
,
*
ids_
);
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
}
else
{
// value scatter
width
,
useGpu_
);
int
width
=
this
->
getSize
();
}
else
{
// used in generation
if
(
realOutArg_
.
value
)
{
if
(
realLayer_
->
getOutput
().
ids
)
{
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
*
width
,
idSize_
,
IVector
::
resizeOrCreate
(
output_
.
ids
,
ids_
->
getSize
(),
useGpu_
);
width
,
useGpu_
);
output_
.
ids
->
selectFrom
(
*
realLayer_
->
getOutput
().
ids
,
*
ids_
);
}
else
{
// used in generation
}
if
(
realLayer_
->
getOutput
().
value
)
{
int
height
=
ids_
->
getSize
();
int
height
=
ids_
->
getSize
();
resetOutput
(
height
,
width
);
resetOutput
(
height
,
width
);
...
@@ -213,18 +214,17 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
...
@@ -213,18 +214,17 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
void
SequenceScatterAgentLayer
::
forward
(
PassType
passType
)
{
void
SequenceScatterAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
Layer
::
forward
(
passType
);
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
CHECK
(
!
realLayer_
->
getOutput
().
ids
)
<<
"Not supported"
;
const
Argument
&
input
=
realLayer_
->
getOutput
();
const
Argument
&
input
=
realLayer_
->
getOutput
();
CHECK_EQ
(
input
.
value
->
getWidth
(),
this
->
getSize
());
CHECK_EQ
(
realLayer_
->
getSize
(),
this
->
getSize
());
int
width
=
this
->
getSize
();
int
width
=
this
->
getSize
();
AsyncGpuBlock
asyncGpuBlock
;
AsyncGpuBlock
asyncGpuBlock
;
REGISTER_TIMER_INFO
(
"SequenceAgentLayerForward"
,
getName
().
c_str
());
REGISTER_TIMER_INFO
(
"SequenceAgentLayerForward"
,
getName
().
c_str
());
if
(
realOutArg_
.
value
)
{
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
CHECK
(
realOutArg_
.
sequenceStartPositions
);
CHECK
(
realOutArg_
.
sequenceStartPositions
);
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
*
width
,
idSize_
,
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
width
,
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
seqStartPosIndex_
,
/* seqStart */
seqStartPosIndex_
,
/* seqSize */
numSequences_
);
/* seqSize */
numSequences_
);
...
...
paddle/gserver/layers/CRFLayer.h
浏览文件 @
23e47bb6
...
@@ -25,7 +25,7 @@ namespace paddle {
...
@@ -25,7 +25,7 @@ namespace paddle {
/**
/**
* A layer for calculating the cost of sequential conditional random field
* A layer for calculating the cost of sequential conditional random field
* model.
* model.
* See
LinearChainCRF.h
for the detail of the CRF formulation.
* See
class LinearChainCRF
for the detail of the CRF formulation.
*/
*/
class
CRFLayer
:
public
Layer
{
class
CRFLayer
:
public
Layer
{
public:
public:
...
...
paddle/gserver/layers/LinearChainCRF.h
浏览文件 @
23e47bb6
...
@@ -21,39 +21,39 @@ namespace paddle {
...
@@ -21,39 +21,39 @@ namespace paddle {
class
LinearChainCRF
{
class
LinearChainCRF
{
public:
public:
/*
/*
*
The size of para and grad must be (numClasses + 2) * numClasses
.
* The size of para and grad must be \f$(numClasses + 2) * numClasses\f$
.
The first numClasses values of para are for starting weights (a
).
* The first numClasses values of para are for starting weights (\f$a\f$
).
The next numClasses values of para are for ending weights (b
),
* The next numClasses values of para are for ending weights (\f$b\f$
),
The remaning values are for transition weights (w
).
* The remaning values are for transition weights (\f$w\f$
).
*
The probability of a state sequence s of length L
is defined as:
* The probability of a state sequence s of length \f$L\f$
is defined as:
P(s) = (1/Z) exp(a_{s_1} + b_{s_L}
* \f$
P(s) = (1/Z) exp(a_{s_1} + b_{s_L}
+ \sum_{l=1}^L x_{s_l}
*
+ \sum_{l=1}^L x_{s_l}
+ \sum_{l=2}^L w_{s_{l-1},s_l})
* + \sum_{l=2}^L w_{s_{l-1},s_l})\f$
where Z is a normalization value so that the sum of P(s)
over all possible
* where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$
over all possible
sequences is 1, and x
is the input feature to the CRF.
* sequences is \f$1\f$, and \f$x\f$
is the input feature to the CRF.
*/
*/
LinearChainCRF
(
int
numClasses
,
real
*
para
,
real
*
grad
);
LinearChainCRF
(
int
numClasses
,
real
*
para
,
real
*
grad
);
/*
/*
*
Calculate the negative log likelihood of s given x.
*
Calculate the negative log likelihood of s given x.
The size of x must be length * numClasses. Each consecutive numClasses
*
The size of x must be length * numClasses. Each consecutive numClasses
values are the features for one time step.
*
values are the features for one time step.
*/
*/
real
forward
(
real
*
x
,
int
*
s
,
int
length
);
real
forward
(
real
*
x
,
int
*
s
,
int
length
);
/*
/*
*
Calculate the gradient with respect to x, a, b, and w.
*
Calculate the gradient with respect to x, a, b, and w.
The gradient of x will be stored in dx.
*
The gradient of x will be stored in dx.
backward() can only be called after a corresponding call to forward() with
*
backward() can only be called after a corresponding call to forward() with
the same x, s and length.
*
the same x, s and length.
NOTE:
The gradient is added to dx and grad (provided at constructor).
* @note
The gradient is added to dx and grad (provided at constructor).
*/
*/
void
backward
(
real
*
x
,
real
*
dx
,
int
*
s
,
int
length
);
void
backward
(
real
*
x
,
real
*
dx
,
int
*
s
,
int
length
);
/*
/*
*
Find the most probable sequence given x. The result will be stored in s.
*
Find the most probable sequence given x. The result will be stored in s.
*/
*/
void
decode
(
real
*
x
,
int
*
s
,
int
length
);
void
decode
(
real
*
x
,
int
*
s
,
int
length
);
...
...
paddle/gserver/tests/CMakeLists.txt
浏览文件 @
23e47bb6
...
@@ -56,7 +56,6 @@ add_test(NAME test_RecurrentGradientMachine
...
@@ -56,7 +56,6 @@ add_test(NAME test_RecurrentGradientMachine
COMMAND .set_python_path.sh -d
COMMAND .set_python_path.sh -d
${
PROJ_ROOT
}
/python:
${
PROJ_ROOT
}
/paddle/gserver/tests
${
PROJ_ROOT
}
/python:
${
PROJ_ROOT
}
/paddle/gserver/tests
${
CMAKE_CURRENT_BINARY_DIR
}
/test_RecurrentGradientMachine
${
CMAKE_CURRENT_BINARY_DIR
}
/test_RecurrentGradientMachine
--use_gpu=false
WORKING_DIRECTORY
${
PROJ_ROOT
}
/paddle
)
WORKING_DIRECTORY
${
PROJ_ROOT
}
/paddle
)
add_unittest_without_exec
(
test_NetworkCompare
add_unittest_without_exec
(
test_NetworkCompare
...
...
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
0 → 100644
浏览文件 @
23e47bb6
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
.
trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_subseq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
8
hidden_dim
=
8
label_dim
=
3
data
=
data_layer
(
name
=
"word"
,
size
=
dict_dim
)
emb
=
embedding_layer
(
input
=
data
,
size
=
word_dim
)
# This hierachical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn.conf
def
outer_step
(
wid
,
x
):
outer_mem
=
memory
(
name
=
"outer_rnn_state"
,
size
=
hidden_dim
)
def
inner_step
(
y
,
wid
):
z
=
embedding_layer
(
input
=
wid
,
size
=
word_dim
)
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
out
=
fc_layer
(
input
=[
y
,
z
,
inner_mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
"inner_rnn_state"
)
return
out
inner_rnn_output
=
recurrent_group
(
step
=
inner_step
,
name
=
"inner"
,
input
=[
x
,
wid
])
last
=
last_seq
(
input
=
inner_rnn_output
,
name
=
"outer_rnn_state"
)
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it correctly. Current implementation requires that
# all the out links are from sequences. However, it does not report error
# when the out links are not sequences.
return
inner_rnn_output
out
=
recurrent_group
(
name
=
"outer"
,
step
=
outer_step
,
input
=[
SubsequenceInput
(
data
),
SubsequenceInput
(
emb
)])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)))
paddle/gserver/tests/sequence_rnn_multi_input.conf
0 → 100644
浏览文件 @
23e47bb6
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
.
trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_seq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
8
hidden_dim
=
8
label_dim
=
3
data
=
data_layer
(
name
=
"word"
,
size
=
dict_dim
)
emb
=
embedding_layer
(
input
=
data
,
size
=
word_dim
)
def
step
(
y
,
wid
):
z
=
embedding_layer
(
input
=
wid
,
size
=
word_dim
)
mem
=
memory
(
name
=
"rnn_state"
,
size
=
hidden_dim
)
out
=
fc_layer
(
input
=[
y
,
z
,
mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
"rnn_state"
)
return
out
out
=
recurrent_group
(
name
=
"rnn"
,
step
=
step
,
input
=[
emb
,
data
])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)))
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
浏览文件 @
23e47bb6
...
@@ -92,7 +92,11 @@ void CalCost(const string& conf, const string& dir, real* cost,
...
@@ -92,7 +92,11 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir
(
dir
.
c_str
());
rmDir
(
dir
.
c_str
());
}
}
void
test
(
const
string
&
conf1
,
const
string
&
conf2
,
double
eps
)
{
void
test
(
const
string
&
conf1
,
const
string
&
conf2
,
double
eps
,
bool
useGpu
)
{
if
(
!
paddle
::
version
::
isWithGpu
()
&&
useGpu
)
{
return
;
}
FLAGS_use_gpu
=
useGpu
;
int
num_passes
=
5
;
int
num_passes
=
5
;
real
*
cost1
=
new
real
[
num_passes
];
real
*
cost1
=
new
real
[
num_passes
];
const
string
dir1
=
"gserver/tests/t1"
;
const
string
dir1
=
"gserver/tests/t1"
;
...
@@ -113,17 +117,28 @@ void test(const string& conf1, const string& conf2, double eps) {
...
@@ -113,17 +117,28 @@ void test(const string& conf1, const string& conf2, double eps) {
}
}
TEST
(
RecurrentGradientMachine
,
HasSubSequence
)
{
TEST
(
RecurrentGradientMachine
,
HasSubSequence
)
{
test
(
"gserver/tests/sequence_layer_group.conf"
,
for
(
bool
useGpu
:
{
false
,
true
})
{
"gserver/tests/sequence_nest_layer_group.conf"
,
test
(
"gserver/tests/sequence_layer_group.conf"
,
1e-5
);
"gserver/tests/sequence_nest_layer_group.conf"
,
1e-5
,
useGpu
);
}
}
}
TEST
(
RecurrentGradientMachine
,
rnn
)
{
TEST
(
RecurrentGradientMachine
,
rnn
)
{
test
(
"gserver/tests/sequence_rnn.conf"
,
for
(
bool
useGpu
:
{
false
,
true
})
{
"gserver/tests/sequence_nest_rnn.conf"
,
test
(
"gserver/tests/sequence_rnn.conf"
,
0
);
"gserver/tests/sequence_nest_rnn.conf"
,
1e-6
,
useGpu
);
}
}
}
TEST
(
RecurrentGradientMachine
,
rnn_multi_input
)
{
for
(
bool
useGpu
:
{
false
,
true
})
{
test
(
"gserver/tests/sequence_rnn_multi_input.conf"
,
"gserver/tests/sequence_nest_rnn_multi_input.conf"
,
1e-6
,
useGpu
);
}
}
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
if
(
paddle
::
version
::
isWithPyDataProvider
())
{
if
(
paddle
::
version
::
isWithPyDataProvider
())
{
...
...
paddle/parameter/Argument.cpp
浏览文件 @
23e47bb6
...
@@ -554,11 +554,16 @@ void Argument::degradeSequence(const Argument& input, bool useGpu) {
...
@@ -554,11 +554,16 @@ void Argument::degradeSequence(const Argument& input, bool useGpu) {
void
Argument
::
subArgFrom
(
const
Argument
&
input
,
size_t
offset
,
size_t
height
,
void
Argument
::
subArgFrom
(
const
Argument
&
input
,
size_t
offset
,
size_t
height
,
size_t
width
,
bool
useGpu
,
bool
trans
,
bool
seqFlag
,
size_t
width
,
bool
useGpu
,
bool
trans
,
bool
seqFlag
,
size_t
seqStart
,
size_t
seqSize
)
{
size_t
seqStart
,
size_t
seqSize
)
{
value
=
Matrix
::
create
(
input
.
value
->
getData
()
+
offset
,
height
,
width
,
trans
,
if
(
input
.
value
)
{
useGpu
);
value
=
Matrix
::
create
(
input
.
value
->
getData
()
+
offset
*
width
,
height
,
width
,
trans
,
useGpu
);
}
if
(
input
.
ids
)
{
ids
=
IVector
::
create
(
input
.
ids
->
getData
()
+
offset
,
height
,
useGpu
);
}
if
(
input
.
grad
)
{
if
(
input
.
grad
)
{
grad
=
Matrix
::
create
(
input
.
grad
->
getData
()
+
offset
,
height
,
width
,
trans
,
grad
=
Matrix
::
create
(
input
.
grad
->
getData
()
+
offset
*
width
,
useGpu
);
height
,
width
,
trans
,
useGpu
);
}
}
if
(
seqFlag
)
{
if
(
seqFlag
)
{
sequenceStartPositions
=
std
::
make_shared
<
ICpuGpuVector
>
(
sequenceStartPositions
=
std
::
make_shared
<
ICpuGpuVector
>
(
...
...
paddle/parameter/Argument.h
浏览文件 @
23e47bb6
...
@@ -177,11 +177,11 @@ struct Argument {
...
@@ -177,11 +177,11 @@ struct Argument {
}
}
/**
/**
* @brief (value, grad, sequenceStartPositions) of output are subset of
* @brief (value,
ids,
grad, sequenceStartPositions) of output are subset of
* input. Note that, output share the same memory of input.
* input. Note that, output share the same memory of input.
*
*
* @param input[in] input
* @param input[in] input
* @param offset[in] offset
of input.value
* @param offset[in] offset
in terms of rows
* @param height[in] height of output.value
* @param height[in] height of output.value
* @param width[in] width of output.value
* @param width[in] width of output.value
* @param useGpu[in]
* @param useGpu[in]
...
...
paddle/trainer/ThreadParameterUpdater.cpp
浏览文件 @
23e47bb6
...
@@ -141,7 +141,7 @@ void SgdThreadUpdater::traverse(GetTraverseCallback getTraverseCallback) {
...
@@ -141,7 +141,7 @@ void SgdThreadUpdater::traverse(GetTraverseCallback getTraverseCallback) {
}
else
if
(
hasCpuPara
)
{
}
else
if
(
hasCpuPara
)
{
getGlobalSyncThreadPool
()
->
exec
(
cpuTraverse
);
getGlobalSyncThreadPool
()
->
exec
(
cpuTraverse
);
}
else
if
(
hasGpuPara
)
{
}
else
if
(
hasGpuPara
)
{
c
puTraverse
(
0
,
0
);
g
puTraverse
(
0
,
0
);
}
}
}
}
...
...
paddle/trainer/TrainerInternal.cpp
浏览文件 @
23e47bb6
...
@@ -101,6 +101,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId,
...
@@ -101,6 +101,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId,
// it
// it
//! to ParameterHook.
//! to ParameterHook.
auto
&
grad
=
para
->
getBuf
(
PARAMETER_GRADIENT
);
auto
&
grad
=
para
->
getBuf
(
PARAMETER_GRADIENT
);
SetDevice
device
(
para
->
getDeviceId
());
paraStats
[
para
->
getID
()].
avgAbsGrad
=
grad
->
getAbsSum
()
/
para
->
getSize
();
paraStats
[
para
->
getID
()].
avgAbsGrad
=
grad
->
getAbsSum
()
/
para
->
getSize
();
paraStats
[
para
->
getID
()].
maxAbsGrad
=
grad
->
getAbsMax
();
paraStats
[
para
->
getID
()].
maxAbsGrad
=
grad
->
getAbsMax
();
}
}
...
...
paddle/trainer/tests/sample_trainer_config_parallel.conf
浏览文件 @
23e47bb6
...
@@ -13,137 +13,74 @@
...
@@ -13,137 +13,74 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
from
paddle
.
trainer_config_helpers
import
*
TrainData
(
TrainData
(
SimpleData
(
SimpleData
(
files
=
"trainer/tests/sample_filelist.txt"
,
files
=
"trainer/tests/sample_filelist.txt"
,
feat_dim
=
3
,
feat_dim
=
3
,
context_len
=
0
,
context_len
=
0
,
buffer_capacity
=
1000000
))
buffer_capacity
=
1000000
,
)
)
TestData
(
TestData
(
SimpleData
(
SimpleData
(
files
=
"trainer/tests/sample_filelist.txt"
,
files
=
"trainer/tests/sample_filelist.txt"
,
feat_dim
=
3
,
feat_dim
=
3
,
context_len
=
0
,
context_len
=
0
,
buffer_capacity
=
1000000
))
buffer_capacity
=
1000000
,
)
)
Settings
(
settings
(
batch_size
=
100
)
algorithm
=
"sgd"
,
num_batches_per_send_parameter
=
1
,
num_batches_per_get_parameter
=
1
,
batch_size
=
100
,
learning_rate
=
0
.
001
,
learning_rate_decay_a
=
1
e
-
5
,
learning_rate_decay_b
=
0
.
5
,
)
default_initial_std
(
0
.
2
)
# Output layer, label layer, cost layer, preferably set to the same environment.
# Output layer, label layer, cost layer, preferably set to the same environment.
output_device
=
0
output_device
=
0
model_type
(
"nn"
)
# Input Layer does not need to specify the device number.
# Input Layer does not need to specify the device number.
Layer
(
data
=
data_layer
(
name
=
'input'
,
size
=
3
)
name
=
"input"
,
type
=
"data"
,
size
=
3
,
)
# Calculate in the CPU.
# Calculate in the CPU.
Layer
(
fc1
=
fc_layer
(
input
=
data
,
size
=
5
,
name
=
"layer1_1"
,
bias_attr
=
True
,
type
=
"fc"
,
layer_attr
=
ExtraAttr
(
device
=-
1
),
size
=
5
,
act
=
SigmoidActivation
())
active_type
=
"sigmoid"
,
device
= -
1
,
inputs
=
"input"
,
)
# Calculate in the GPU 0.
# Calculate in the GPU 0.
Layer
(
fc2
=
fc_layer
(
input
=
fc1
,
size
=
10
,
name
=
"layer2_1"
,
bias_attr
=
True
,
type
=
"fc"
,
layer_attr
=
ExtraAttr
(
device
=
0
),
size
=
10
,
act
=
SigmoidActivation
())
active_type
=
"sigmoid"
,
device
=
0
,
inputs
=
"layer1_1"
,
)
# Calculate in the GPU 1.
# Calculate in the GPU 1.
Layer
(
fc3
=
fc_layer
(
input
=
fc1
,
size
=
10
,
name
=
"layer2_2"
,
bias_attr
=
True
,
type
=
"fc"
,
layer_attr
=
ExtraAttr
(
device
=
1
),
size
=
10
,
act
=
SigmoidActivation
())
active_type
=
"sigmoid"
,
device
=
1
,
inputs
=
"layer1_1"
,
)
# Calculate in the GPU 0.
# Calculate in the GPU 0.
Layer
(
fc4
=
fc_layer
(
input
=[
fc2
,
fc3
],
size
=
10
,
name
=
"layer3_1"
,
bias_attr
=
True
,
type
=
"fc"
,
layer_attr
=
ExtraAttr
(
device
=
0
),
size
=
10
,
act
=
SigmoidActivation
())
device
=
0
,
active_type
=
"sigmoid"
,
inputs
= [
"layer2_1"
,
"layer2_2"
],
)
# Calculate in the GPU 1.
# Calculate in the GPU 1.
Layer
(
fc5
=
fc_layer
(
input
=[
fc2
,
fc3
],
size
=
10
,
name
=
"layer3_2"
,
bias_attr
=
True
,
type
=
"fc"
,
layer_attr
=
ExtraAttr
(
device
=
1
),
size
=
10
,
act
=
SigmoidActivation
())
device
=
1
,
active_type
=
"sigmoid"
,
inputs
= [
"layer2_1"
,
"layer2_2"
],
)
Layer
(
output
=
fc_layer
(
input
=[
fc4
,
fc5
],
size
=
10
,
name
=
"output"
,
bias_attr
=
True
,
type
=
"fc"
,
layer_attr
=
ExtraAttr
(
device
=
output_device
),
size
=
10
,
act
=
SoftmaxActivation
())
device
=
output_device
,
active_type
=
"sigmoid"
,
inputs
= [
"layer3_1"
,
"layer3_2"
],
)
if
get_config_arg
(
'with_cost'
,
bool
,
True
):
if
get_config_arg
(
'with_cost'
,
bool
,
True
):
# This is for training the neural network.
# This is for training the neural network.
# We need to have another data layer for label
# We need to have another data layer for label
# and a layer for calculating cost
# and a layer for calculating cost
Layer
(
lbl
=
data_layer
(
name
=
'label'
,
size
=
1
,
name
=
"label"
,
layer_attr
=
ExtraAttr
(
device
=
output_device
))
type
=
"data"
,
device
=
output_device
,
outputs
(
classification_cost
(
input
=
output
,
size
=
1
,
label
=
lbl
,
)
layer_attr
=
ExtraAttr
(
device
=
output_device
)))
Layer
(
name
=
"cost"
,
type
=
"multi-class-cross-entropy"
,
device
=
output_device
,
inputs
= [
"output"
,
"label"
],
)
Evaluator
(
name
=
"error"
,
type
=
"classification_error"
,
inputs
= [
"output"
,
"label"
])
Inputs
(
"input"
,
"label"
)
Outputs
(
"cost"
)
else
:
else
:
# This is for prediction where we don't have label
# This is for prediction where we don't have label
# and don't need to calculate cost
# and don't need to calculate cost
Inputs
(
"input"
)
outputs
(
output
)
Outputs
(
"output"
)
python/paddle/trainer/config_parser.py
浏览文件 @
23e47bb6
...
@@ -1279,7 +1279,7 @@ class LayerBase(object):
...
@@ -1279,7 +1279,7 @@ class LayerBase(object):
size
,
size
,
dims
=
None
,
dims
=
None
,
sparse
=
None
,
sparse
=
None
,
format
=
"csr"
):
format
=
None
):
if
dims
is
None
:
if
dims
is
None
:
# TODO(yuyang18): print warning and callstack here!
# TODO(yuyang18): print warning and callstack here!
dims
=
list
()
dims
=
list
()
...
@@ -2074,7 +2074,7 @@ class MaxLayer(LayerBase):
...
@@ -2074,7 +2074,7 @@ class MaxLayer(LayerBase):
active_type
=
'linear'
,
active_type
=
'linear'
,
device
=
None
,
device
=
None
,
bias
=
False
,
bias
=
False
,
output_max_index
=
Fals
e
):
output_max_index
=
Non
e
):
super
(
MaxLayer
,
self
).
__init__
(
name
,
'max'
,
0
,
inputs
=
inputs
,
device
=
device
)
super
(
MaxLayer
,
self
).
__init__
(
name
,
'max'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'MaxLayer must have 1 input'
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'MaxLayer must have 1 input'
)
self
.
config
.
trans_type
=
trans_type
self
.
config
.
trans_type
=
trans_type
...
@@ -2083,7 +2083,8 @@ class MaxLayer(LayerBase):
...
@@ -2083,7 +2083,8 @@ class MaxLayer(LayerBase):
input_layer
=
self
.
get_input_layer
(
input_index
)
input_layer
=
self
.
get_input_layer
(
input_index
)
self
.
set_layer_size
(
input_layer
.
size
)
self
.
set_layer_size
(
input_layer
.
size
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
self
.
config
.
output_max_index
=
output_max_index
if
output_max_index
is
not
None
:
self
.
config
.
output_max_index
=
output_max_index
@
config_layer
(
'maxid'
)
@
config_layer
(
'maxid'
)
...
@@ -2440,7 +2441,7 @@ class MixedLayer(LayerBase):
...
@@ -2440,7 +2441,7 @@ class MixedLayer(LayerBase):
inputs
,
inputs
,
size
=
0
,
size
=
0
,
bias
=
True
,
bias
=
True
,
error_clipping_threshold
=
0.0
,
error_clipping_threshold
=
None
,
**
xargs
):
**
xargs
):
config_assert
(
inputs
,
'inputs cannot be empty'
)
config_assert
(
inputs
,
'inputs cannot be empty'
)
super
(
MixedLayer
,
self
).
__init__
(
super
(
MixedLayer
,
self
).
__init__
(
...
@@ -2510,7 +2511,8 @@ class MixedLayer(LayerBase):
...
@@ -2510,7 +2511,8 @@ class MixedLayer(LayerBase):
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
self
.
config
.
error_clipping_threshold
=
error_clipping_threshold
if
error_clipping_threshold
is
not
None
:
self
.
config
.
error_clipping_threshold
=
error_clipping_threshold
# like MixedLayer, but no bias parameter
# like MixedLayer, but no bias parameter
@
config_func
@
config_func
...
...
python/paddle/trainer_config_helpers/activations.py
浏览文件 @
23e47bb6
...
@@ -15,8 +15,10 @@
...
@@ -15,8 +15,10 @@
__all__
=
[
"TanhActivation"
,
"SigmoidActivation"
,
__all__
=
[
"TanhActivation"
,
"SigmoidActivation"
,
"SoftmaxActivation"
,
"IdentityActivation"
,
"LinearActivation"
,
"SoftmaxActivation"
,
"IdentityActivation"
,
"LinearActivation"
,
'SequenceSoftmaxActivation'
,
'ExpActivation'
,
'SequenceSoftmaxActivation'
,
'ExpActivation'
,
"ReluActivation"
,
"BReluActivation"
,
"SoftReluActivation"
,
"STanhActivation"
,
"ReluActivation"
,
"BReluActivation"
,
"SoftReluActivation"
,
"AbsActivation"
,
"SquareActivation"
,
"BaseActivation"
]
"STanhActivation"
,
"AbsActivation"
,
"SquareActivation"
,
"BaseActivation"
]
class
BaseActivation
(
object
):
class
BaseActivation
(
object
):
...
@@ -36,6 +38,9 @@ class BaseActivation(object):
...
@@ -36,6 +38,9 @@ class BaseActivation(object):
self
.
name
=
name
self
.
name
=
name
self
.
support_hppl
=
support_hppl
self
.
support_hppl
=
support_hppl
def
__repr__
(
self
):
return
self
.
name
class
TanhActivation
(
BaseActivation
):
class
TanhActivation
(
BaseActivation
):
"""
"""
...
...
python/paddle/trainer_config_helpers/attrs.py
浏览文件 @
23e47bb6
...
@@ -17,6 +17,42 @@ __all__ = ['ParamAttr', 'ExtraAttr', 'ParameterAttribute',
...
@@ -17,6 +17,42 @@ __all__ = ['ParamAttr', 'ExtraAttr', 'ParameterAttribute',
'ExtraLayerAttribute'
]
'ExtraLayerAttribute'
]
def
convert_and_compare
(
x
,
Type
):
"""
Convert x to be the same type as Type and then convert back to
check whether there is a loss of information
:param x: object to be checked
:param Type: target type to check x over
"""
return
type
(
x
)(
Type
(
x
))
==
x
def
is_compatible_with
(
x
,
Type
):
"""
Check if x has a type compatible with Type
:param x: object to be checked
:param Type: target type to check x over
"""
if
type
(
x
)
==
Type
:
return
True
try
:
if
float
==
Type
or
int
==
Type
:
# avoid those types that can be converted to float/int but not very
# meaningful and could potentially lead to error
# i.e., str and bool typed value should not be used for initializing float/int variable
if
not
isinstance
(
x
,
str
)
and
not
isinstance
(
x
,
bool
):
return
convert_and_compare
(
x
,
Type
)
elif
bool
==
Type
:
# should not use string type to initialize bool variable
if
not
isinstance
(
x
,
str
):
return
convert_and_compare
(
x
,
Type
)
else
:
return
False
except
:
return
False
class
ParameterAttribute
(
object
):
class
ParameterAttribute
(
object
):
"""
"""
Parameter Attributes object. To fine-tuning network training process, user
Parameter Attributes object. To fine-tuning network training process, user
...
@@ -65,14 +101,18 @@ class ParameterAttribute(object):
...
@@ -65,14 +101,18 @@ class ParameterAttribute(object):
elif
initial_std
is
None
and
initial_mean
is
None
and
initial_max
\
elif
initial_std
is
None
and
initial_mean
is
None
and
initial_max
\
is
None
and
initial_min
is
None
:
is
None
and
initial_min
is
None
:
self
.
attr
=
{
'initial_smart'
:
True
}
self
.
attr
=
{
'initial_smart'
:
True
}
elif
isinstance
(
initial_std
,
float
)
or
isinstance
(
initial_mean
,
float
):
elif
is_compatible_with
(
initial_std
,
float
)
or
\
is_compatible_with
(
initial_mean
,
float
):
self
.
attr
=
dict
()
self
.
attr
=
dict
()
if
initial_std
is
not
None
:
if
initial_std
is
not
None
:
self
.
attr
[
'initial_std'
]
=
initial_std
self
.
attr
[
'initial_std'
]
=
initial_std
if
initial_mean
is
not
None
:
if
initial_mean
is
not
None
:
self
.
attr
[
'initial_mean'
]
=
initial_mean
self
.
attr
[
'initial_mean'
]
=
initial_mean
self
.
attr
[
'initial_strategy'
]
=
0
# Gauss Random
self
.
attr
[
'initial_strategy'
]
=
0
# Gauss Random
elif
isinstance
(
initial_max
,
float
)
and
isinstance
(
initial_min
,
float
):
elif
is_compatible_with
(
initial_max
,
float
)
and
\
is_compatible_with
(
initial_min
,
float
):
initial_max
=
initial_max
initial_min
=
initial_min
assert
initial_min
<
initial_max
assert
initial_min
<
initial_max
initial_mean
=
(
initial_max
+
initial_min
)
/
2
initial_mean
=
(
initial_max
+
initial_min
)
/
2
initial_std
=
initial_mean
-
initial_min
initial_std
=
initial_mean
-
initial_min
...
@@ -83,16 +123,16 @@ class ParameterAttribute(object):
...
@@ -83,16 +123,16 @@ class ParameterAttribute(object):
else
:
else
:
raise
RuntimeError
(
"Unexpected branch."
)
raise
RuntimeError
(
"Unexpected branch."
)
if
not
is_static
and
is
instance
(
l1_rate
,
float
):
if
not
is_static
and
is
_compatible_with
(
l1_rate
,
float
):
self
.
attr
[
'decay_rate_l1'
]
=
l1_rate
self
.
attr
[
'decay_rate_l1'
]
=
l1_rate
if
not
is_static
and
is
instance
(
l2_rate
,
float
):
if
not
is_static
and
is
_compatible_with
(
l2_rate
,
float
):
self
.
attr
[
'decay_rate'
]
=
l2_rate
self
.
attr
[
'decay_rate'
]
=
l2_rate
if
not
is_static
and
is
instance
(
learning_rate
,
float
):
if
not
is_static
and
is
_compatible_with
(
learning_rate
,
float
):
self
.
attr
[
'learning_rate'
]
=
learning_rate
self
.
attr
[
'learning_rate'
]
=
learning_rate
if
not
is_static
and
is
instance
(
momentum
,
float
):
if
not
is_static
and
is
_compatible_with
(
momentum
,
float
):
self
.
attr
[
'momentum'
]
=
momentum
self
.
attr
[
'momentum'
]
=
momentum
if
name
is
not
None
:
if
name
is
not
None
:
...
@@ -134,12 +174,16 @@ class ExtraLayerAttribute(object):
...
@@ -134,12 +174,16 @@ class ExtraLayerAttribute(object):
The dropout rate is the zero rate of this mask. The
The dropout rate is the zero rate of this mask. The
details of what dropout is please refer to `here
details of what dropout is please refer to `here
<https://www.cs.toronto.edu/~hinton/absps/
<https://www.cs.toronto.edu/~hinton/absps/
JMLRdropout.pdf>`_
JMLRdropout.pdf>`_
.
:type drop_rate: float
:type drop_rate: float
:param device: device ID of layer. device=-1, use CPU. device>0, use GPU.
The details allocation in parallel_nn please refer to `here
<http://www.paddlepaddle.org/doc/ui/cmd_argument/
use_case.html#case-2-specify-layers-in-different-devices>`_.
:type device: int
"""
"""
def
__init__
(
self
,
error_clipping_threshold
=
None
,
drop_rate
=
None
):
def
__init__
(
self
,
error_clipping_threshold
=
None
,
drop_rate
=
None
,
device
=
None
):
self
.
attr
=
dict
()
self
.
attr
=
dict
()
if
isinstance
(
error_clipping_threshold
,
float
):
if
isinstance
(
error_clipping_threshold
,
float
):
assert
error_clipping_threshold
>
0
assert
error_clipping_threshold
>
0
...
@@ -149,6 +193,9 @@ class ExtraLayerAttribute(object):
...
@@ -149,6 +193,9 @@ class ExtraLayerAttribute(object):
assert
drop_rate
>
0
assert
drop_rate
>
0
self
.
attr
[
"drop_rate"
]
=
drop_rate
self
.
attr
[
"drop_rate"
]
=
drop_rate
if
isinstance
(
device
,
int
):
self
.
attr
[
"device"
]
=
device
def
check
(
self
,
layer_name
):
def
check
(
self
,
layer_name
):
for
key
in
self
.
attr
:
for
key
in
self
.
attr
:
if
not
hasattr
(
self
,
'can_%s'
%
key
)
or
\
if
not
hasattr
(
self
,
'can_%s'
%
key
)
or
\
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
23e47bb6
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
# limitations under the License.
# limitations under the License.
import
functools
import
functools
import
collections
from
paddle.trainer.config_parser
import
*
from
paddle.trainer.config_parser
import
*
from
.activations
import
LinearActivation
,
SigmoidActivation
,
TanhActivation
,
\
from
.activations
import
LinearActivation
,
SigmoidActivation
,
TanhActivation
,
\
...
@@ -21,6 +22,7 @@ from .evaluators import *
...
@@ -21,6 +22,7 @@ from .evaluators import *
from
.poolings
import
MaxPooling
,
AvgPooling
,
BasePoolingType
from
.poolings
import
MaxPooling
,
AvgPooling
,
BasePoolingType
from
.attrs
import
*
from
.attrs
import
*
from
.default_decorators
import
*
from
.default_decorators
import
*
try
:
try
:
import
cPickle
as
pickle
import
cPickle
as
pickle
except
ImportError
:
except
ImportError
:
...
@@ -51,7 +53,8 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
...
@@ -51,7 +53,8 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
'cross_entropy_with_selfnorm'
,
'cross_entropy'
,
'cross_entropy_with_selfnorm'
,
'cross_entropy'
,
'multi_binary_label_cross_entropy'
,
'multi_binary_label_cross_entropy'
,
'rank_cost'
,
'lambda_cost'
,
'huber_cost'
,
'rank_cost'
,
'lambda_cost'
,
'huber_cost'
,
'block_expand_layer'
,
'out_prod_layer'
,
'print_layer'
# 'block_expand_layer', # TODO(yuyang18): this layer is not correct
'out_prod_layer'
,
'print_layer'
]
]
...
@@ -165,11 +168,12 @@ class LayerOutput(object):
...
@@ -165,11 +168,12 @@ class LayerOutput(object):
:param activation: Layer Activation.
:param activation: Layer Activation.
:type activation: BaseActivation.
:type activation: BaseActivation.
:param parents: Layer's parents.
:param parents: Layer's parents.
:type parents: list|tuple
:type parents: list|tuple
|collection.Sequence
"""
"""
def
__init__
(
self
,
name
,
layer_type
,
parents
=
None
,
activation
=
None
,
def
__init__
(
self
,
name
,
layer_type
,
parents
=
None
,
activation
=
None
,
num_filters
=
None
,
img_norm_type
=
None
,
size
=
None
,
outputs
=
None
):
num_filters
=
None
,
img_norm_type
=
None
,
size
=
None
,
outputs
=
None
,
reverse
=
None
):
assert
isinstance
(
name
,
basestring
)
assert
isinstance
(
name
,
basestring
)
assert
isinstance
(
layer_type
,
basestring
)
assert
isinstance
(
layer_type
,
basestring
)
assert
LayerType
.
is_layer_type
(
layer_type
)
assert
LayerType
.
is_layer_type
(
layer_type
)
...
@@ -185,6 +189,7 @@ class LayerOutput(object):
...
@@ -185,6 +189,7 @@ class LayerOutput(object):
if
outputs
is
None
:
if
outputs
is
None
:
outputs
=
[
'default'
]
outputs
=
[
'default'
]
self
.
outputs
=
outputs
self
.
outputs
=
outputs
self
.
reverse
=
reverse
def
__repr__
(
self
):
def
__repr__
(
self
):
"""
"""
...
@@ -201,32 +206,16 @@ class LayerOutput(object):
...
@@ -201,32 +206,16 @@ class LayerOutput(object):
ERROR_CLIPPING
=
'error_clipping_threshold'
ERROR_CLIPPING
=
'error_clipping_threshold'
DROPOUT
=
'drop_rate'
DROPOUT
=
'drop_rate'
DEVICE
=
'device'
def
check_input
(
input
):
"""
Check input is a LayerOutput or list of LayerOutput or tuple of LayerOutput
if is a LayerOutput,
:param input: The input layer. Could be a list/tuple of input layer.
:type input: LayerOutput|list|tuple
:return: list of LayerOutput
:rtype: list of LayerOutput
"""
if
isinstance
(
input
,
LayerOutput
):
return
[
LayerOutput
]
assert
isinstance
(
input
,
list
)
for
inp
in
input
:
assert
isinstance
(
inp
,
LayerOutput
)
return
list
(
input
)
def
layer_support
(
*
attrs
):
def
layer_support
(
*
attrs
):
attrs_list
=
list
(
attrs
)
attrs_list
.
append
(
DEVICE
)
def
decorator
(
method
):
def
decorator
(
method
):
@
functools
.
wraps
(
method
)
@
functools
.
wraps
(
method
)
def
wrapper
(
*
args
,
**
kwargs
):
def
wrapper
(
*
args
,
**
kwargs
):
for
attr
in
attrs
:
for
attr
in
attrs
_list
:
for
each
in
args
:
for
each
in
args
:
if
isinstance
(
each
,
ExtraLayerAttribute
):
if
isinstance
(
each
,
ExtraLayerAttribute
):
setattr
(
each
,
'_'
.
join
([
'can'
,
attr
]),
True
)
setattr
(
each
,
'_'
.
join
([
'can'
,
attr
]),
True
)
...
@@ -289,6 +278,43 @@ def full_matrix_projection(input, size=0, param_attr=None):
...
@@ -289,6 +278,43 @@ def full_matrix_projection(input, size=0, param_attr=None):
return
proj
return
proj
@
wrap_param_attr_default
()
def
trans_full_matrix_projection
(
input
,
size
=
0
,
param_attr
=
None
):
"""
Different from full_matrix_projection, this projection performs matrix
multiplication, using transpose of weight.
.. math::
out.row[i] += in.row[i] * w^\mathrm{T}
:math:`w^\mathrm{T}` means transpose of weight.
The simply usage is:
.. code-block:: python
proj = trans_full_matrix_projection(input=layer,
size=100,
param_attr=ParamAttr(
name='_proj',
initial_mean=0.0,
initial_std=0.01))
:param input: input layer
:type input: LayerOutput
:param size: The parameter size. Means the width of parameter.
:type size: int
:param param_attr: Parameter config, None if use default.
:type param_attr: ParameterAttribute
:return: A TransposedFullMatrixProjection Object.
:rtype: TransposedFullMatrixProjection
"""
proj
=
TransposedFullMatrixProjection
(
input_layer_name
=
input
.
name
,
size
=
size
,
**
param_attr
.
attr
)
proj
.
origin
=
input
return
proj
@
wrap_param_attr_default
()
@
wrap_param_attr_default
()
def
table_projection
(
input
,
size
=
0
,
param_attr
=
None
):
def
table_projection
(
input
,
size
=
0
,
param_attr
=
None
):
"""
"""
...
@@ -366,7 +392,7 @@ def identity_projection(input, offset=None):
...
@@ -366,7 +392,7 @@ def identity_projection(input, offset=None):
Note that both of two projections should not have any parameter.
Note that both of two projections should not have any parameter.
:param input: Input Layer.
:param input: Input Layer.
:type input: LayerOutput
.
:type input: LayerOutput
:param offset: Offset, None if use default.
:param offset: Offset, None if use default.
:type offset: int
:type offset: int
:return: A IdentityProjection or IdentityOffsetProjection Object
:return: A IdentityProjection or IdentityOffsetProjection Object
...
@@ -409,10 +435,11 @@ def dotmul_projection(input, param_attr=None):
...
@@ -409,10 +435,11 @@ def dotmul_projection(input, param_attr=None):
proj
=
DotMulProjection
(
input_layer_name
=
input
.
name
,
proj
=
DotMulProjection
(
input_layer_name
=
input
.
name
,
size
=
input
.
size
,
size
=
input
.
size
,
**
param_attr
.
attr
)
**
param_attr
.
attr
)
proj
.
origin
=
input
proj
.
origin
=
input
return
proj
return
proj
def
dotmul_operator
(
x
,
y
,
scale
=
1
):
def
dotmul_operator
(
a
=
None
,
b
=
None
,
scale
=
1
,
**
kwargs
):
"""
"""
DotMulOperator takes two inputs and performs element-wise multiplication:
DotMulOperator takes two inputs and performs element-wise multiplication:
...
@@ -428,22 +455,31 @@ def dotmul_operator(x, y, scale=1):
...
@@ -428,22 +455,31 @@ def dotmul_operator(x, y, scale=1):
op = dotmul_operator(x=layer1, y=layer2, scale=0.5)
op = dotmul_operator(x=layer1, y=layer2, scale=0.5)
:param
x
: Input layer1
:param
a
: Input layer1
:type
x
: LayerOutput
:type
a
: LayerOutput
:param
y
: Input layer2
:param
b
: Input layer2
:type
y
: LayerOutput
:type
b
: LayerOutput
:param scale: config scalar, default value is one.
:param scale: config scalar, default value is one.
:type scale: float
:type scale: float
:return: A DotMulOperator Object.
:return: A DotMulOperator Object.
:rtype: DotMulOperator
:rtype: DotMulOperator
"""
"""
assert
isinstance
(
x
,
LayerOutput
)
if
'x'
in
kwargs
or
'y'
in
kwargs
:
assert
isinstance
(
y
,
LayerOutput
)
logger
.
warning
(
'x and y arguments for dotmul_operator is deprecated. '
op
=
DotMulOperator
(
input_layer_names
=
[
x
.
name
,
y
.
name
],
'Please use a and b as parameter.'
)
a
=
kwargs
.
get
(
'x'
,
a
)
# For Backward capacity.
b
=
kwargs
.
get
(
'y'
,
b
)
assert
isinstance
(
a
,
LayerOutput
)
assert
isinstance
(
b
,
LayerOutput
)
if
a
.
size
is
not
None
and
b
.
size
is
not
None
:
assert
a
.
size
==
b
.
size
op
=
DotMulOperator
(
input_layer_names
=
[
a
.
name
,
b
.
name
],
scale
=
scale
)
scale
=
scale
)
op
.
origin
=
[
x
,
y
]
op
.
origin
=
[
a
,
b
]
return
op
return
op
@
wrap_bias_attr_default
([
'padding_attr'
])
@
wrap_bias_attr_default
([
'padding_attr'
])
def
context_projection
(
input
,
context_len
,
context_start
=
None
,
def
context_projection
(
input
,
context_len
,
context_start
=
None
,
padding_attr
=
False
):
padding_attr
=
False
):
...
@@ -612,7 +648,7 @@ def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False,
...
@@ -612,7 +648,7 @@ def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False,
else
:
else
:
with
mixed_layer
(
name
=
name
,
size
=
size
,
act
=
act
,
bias_attr
=
bias_attr
,
with
mixed_layer
(
name
=
name
,
size
=
size
,
act
=
act
,
bias_attr
=
bias_attr
,
layer_attr
=
layer_attr
)
as
m
:
layer_attr
=
layer_attr
)
as
m
:
if
isinstance
(
input
,
list
)
or
isinstance
(
input
,
tupl
e
):
if
isinstance
(
input
,
collections
.
Sequenc
e
):
for
each
in
input
:
for
each
in
input
:
m
+=
each
m
+=
each
else
:
else
:
...
@@ -722,23 +758,19 @@ def fc_layer(input, size, act=None, name=None,
...
@@ -722,23 +758,19 @@ def fc_layer(input, size, act=None, name=None,
"""
"""
if
isinstance
(
input
,
LayerOutput
):
if
isinstance
(
input
,
LayerOutput
):
input
=
[
input
]
input
=
[
input
]
assert
not
isinstance
(
param_attr
,
list
)
assert
not
isinstance
(
param_attr
,
collections
.
Sequence
)
param_attr
=
[
param_attr
]
param_attr
=
[
param_attr
]
else
:
else
:
if
isinstance
(
param_attr
,
list
)
or
isinstance
(
param_attr
,
tupl
e
):
if
isinstance
(
param_attr
,
collections
.
Sequenc
e
):
assert
len
(
input
)
==
len
(
param_attr
)
assert
len
(
input
)
==
len
(
param_attr
)
else
:
else
:
param_attr
=
[
copy
.
deepcopy
(
param_attr
)
for
_
in
range
(
len
(
input
))]
param_attr
=
[
copy
.
deepcopy
(
param_attr
)
for
_
in
range
(
len
(
input
))]
assert
isinstance
(
input
,
list
)
assert
isinstance
(
input
,
collections
.
Sequence
)
def
__idx_to_input__
(
i
):
attr
=
param_attr
[
i
]
assert
isinstance
(
attr
,
ParameterAttribute
)
return
Input
(
input
[
i
].
name
,
**
attr
.
attr
)
Layer
(
Layer
(
inputs
=
map
(
__idx_to_input__
,
range
(
len
(
input
))),
inputs
=
[
Input
(
ipt
.
name
,
**
attr
.
attr
)
for
ipt
,
attr
in
zip
(
input
,
param_attr
)],
name
=
name
,
name
=
name
,
type
=
LayerType
.
FC_LAYER
,
type
=
LayerType
.
FC_LAYER
,
size
=
size
,
size
=
size
,
...
@@ -759,16 +791,20 @@ def print_layer(input, name=None):
...
@@ -759,16 +791,20 @@ def print_layer(input, name=None):
:type name: basestring
:type name: basestring
:param input: The input layer. Could be a list/tuple of input layer.
:param input: The input layer. Could be a list/tuple of input layer.
:type input: LayerOutput|list|tuple
:type input: LayerOutput|list|tuple
:return:
No return
:return:
LayerOutput
"""
"""
check_input
(
input
)
if
isinstance
(
input
,
LayerOutput
):
input
=
[
input
]
assert
isinstance
(
input
,
collections
.
Sequence
)
# list or tuple
for
each
in
input
:
assert
isinstance
(
each
,
LayerOutput
)
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
PRINT_LAYER
,
type
=
LayerType
.
PRINT_LAYER
,
inputs
=
[
l
.
name
for
l
in
input
],
inputs
=
[
l
.
name
for
l
in
input
],
)
)
LayerOutput
(
name
,
LayerType
.
PRINT_LAYER
,
input
)
# this layer don't return anything, can not be input of other layer.
@
wrap_name_default
(
"seq_pooling"
)
@
wrap_name_default
(
"seq_pooling"
)
...
@@ -807,8 +843,13 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None,
...
@@ -807,8 +843,13 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None,
:rtype: LayerType
:rtype: LayerType
"""
"""
extra_dict
=
dict
()
extra_dict
=
dict
()
# noinspection PyUnresolvedReferences
if
isinstance
(
pooling_type
,
AvgPooling
):
if
isinstance
(
pooling_type
,
AvgPooling
):
extra_dict
[
'average_strategy'
]
=
pooling_type
.
strategy
extra_dict
[
'average_strategy'
]
=
pooling_type
.
strategy
elif
isinstance
(
pooling_type
,
MaxPooling
)
and
\
pooling_type
.
output_max_index
is
not
None
:
assert
isinstance
(
pooling_type
.
output_max_index
,
bool
)
extra_dict
[
'output_max_index'
]
=
pooling_type
.
output_max_index
extra_dict
.
update
(
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
extra_dict
.
update
(
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
Layer
(
Layer
(
...
@@ -832,7 +873,7 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None,
...
@@ -832,7 +873,7 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None,
@
wrap_name_default
(
"lstmemory"
)
@
wrap_name_default
(
"lstmemory"
)
@
layer_support
(
DROPOUT
)
@
layer_support
(
DROPOUT
)
def
lstmemory
(
input
,
name
=
None
,
reverse
=
False
,
act
=
None
,
def
lstmemory
(
input
,
name
=
None
,
reverse
=
False
,
act
=
None
,
gate_act
=
None
,
gate_act
=
None
,
size
=
None
,
state_act
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
state_act
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
layer_attr
=
None
):
layer_attr
=
None
):
"""
"""
...
@@ -897,6 +938,16 @@ def lstmemory(input, name=None, reverse=False, act=None,
...
@@ -897,6 +938,16 @@ def lstmemory(input, name=None, reverse=False, act=None,
assert
gate_act
.
support_hppl
assert
gate_act
.
support_hppl
assert
state_act
.
support_hppl
assert
state_act
.
support_hppl
assert
act
.
support_hppl
assert
act
.
support_hppl
assert
input
.
size
is
not
None
and
input
.
size
%
4
==
0
if
size
is
not
None
:
if
input
.
size
/
4
==
size
:
plog
=
logger
.
warning
else
:
plog
=
logger
.
fatal
plog
(
"NOTE: The lstmemory layer[%s]'s size is set by previous input "
"layer. The lstm size should be equal with input layer size/4. The"
" size which is set explicitly will be ignored."
%
name
)
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
LSTMEMORY
,
type
=
LayerType
.
LSTMEMORY
,
...
@@ -908,8 +959,9 @@ def lstmemory(input, name=None, reverse=False, act=None,
...
@@ -908,8 +959,9 @@ def lstmemory(input, name=None, reverse=False, act=None,
inputs
=
[
Input
(
input
.
name
,
**
param_attr
.
attr
)],
inputs
=
[
Input
(
input
.
name
,
**
param_attr
.
attr
)],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
LSTMEMORY
,
[
input
],
return
LayerOutput
(
name
,
LayerType
.
LSTMEMORY
,
[
input
],
size
=
input
.
size
/
4
,
size
=
input
.
size
/
4
if
input
.
size
is
not
None
else
None
)
reverse
=
reverse
)
@
wrap_bias_attr_default
()
@
wrap_bias_attr_default
()
@
wrap_param_attr_default
()
@
wrap_param_attr_default
()
...
@@ -919,7 +971,7 @@ def lstmemory(input, name=None, reverse=False, act=None,
...
@@ -919,7 +971,7 @@ def lstmemory(input, name=None, reverse=False, act=None,
@
wrap_name_default
(
"gru"
)
@
wrap_name_default
(
"gru"
)
@
layer_support
(
DROPOUT
)
@
layer_support
(
DROPOUT
)
def
grumemory
(
input
,
name
=
None
,
reverse
=
False
,
act
=
None
,
def
grumemory
(
input
,
name
=
None
,
reverse
=
False
,
act
=
None
,
gate_act
=
None
,
gate_act
=
None
,
size
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
layer_attr
=
None
):
layer_attr
=
None
):
"""
"""
...
@@ -977,7 +1029,7 @@ def grumemory(input, name=None, reverse=False, act=None,
...
@@ -977,7 +1029,7 @@ def grumemory(input, name=None, reverse=False, act=None,
:type name: None|basestring
:type name: None|basestring
:param input: input layer.
:param input: input layer.
:type input: LayerOutput.
:type input: LayerOutput.
:param reverse: Wether sequence process is reversed or not.
:param reverse: W
h
ether sequence process is reversed or not.
:type reverse: bool
:type reverse: bool
:param act: activation type, TanhActivation by default. This activation
:param act: activation type, TanhActivation by default. This activation
affects the :math:`{
\\
tilde{h_t}}`.
affects the :math:`{
\\
tilde{h_t}}`.
...
@@ -993,12 +1045,23 @@ def grumemory(input, name=None, reverse=False, act=None,
...
@@ -993,12 +1045,23 @@ def grumemory(input, name=None, reverse=False, act=None,
:type param_attr: ParameterAttribute|None|False
:type param_attr: ParameterAttribute|None|False
:param layer_attr: Extra Layer attribute
:param layer_attr: Extra Layer attribute
:type layer_attr: ExtraLayerAttribute|None
:type layer_attr: ExtraLayerAttribute|None
:param size: Stub parameter of size, but actually not used. If set this size
will get a warning.
:type size: None
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
assert
act
.
support_hppl
assert
act
.
support_hppl
assert
gate_act
.
support_hppl
assert
gate_act
.
support_hppl
assert
input
.
size
is
not
None
and
input
.
size
%
3
==
0
if
size
is
not
None
:
if
input
.
size
/
3
==
size
:
plog
=
logger
.
warning
else
:
plog
=
logger
.
fatal
plog
(
"NOTE: the gru memory layer's size is set by previous input layer,"
" and should be input size / 3. Set size explicitly will be "
"ignored."
)
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
GRUMEMORY
,
type
=
LayerType
.
GRUMEMORY
,
...
@@ -1010,8 +1073,9 @@ def grumemory(input, name=None, reverse=False, act=None,
...
@@ -1010,8 +1073,9 @@ def grumemory(input, name=None, reverse=False, act=None,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
)
return
LayerOutput
(
name
,
LayerType
.
GRUMEMORY
,
[
input
],
return
LayerOutput
(
name
,
LayerType
.
GRUMEMORY
,
[
input
],
size
=
input
.
size
/
3
,
size
=
input
.
size
/
3
if
input
.
size
is
not
None
else
None
)
reverse
=
reverse
)
@
wrap_name_default
()
@
wrap_name_default
()
@
layer_support
()
@
layer_support
()
...
@@ -1030,6 +1094,12 @@ def last_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP,
...
@@ -1030,6 +1094,12 @@ def last_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP,
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
if
input
.
reverse
is
not
None
and
input
.
reverse
:
logger
.
warning
(
"You are getting the last instance of a sequence that"
" is a output of a REVERSED layer. There is no time"
" series information at all. Maybe you want to use"
" first_seq instead."
)
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
SEQUENCE_LAST_INSTANCE
,
type
=
LayerType
.
SEQUENCE_LAST_INSTANCE
,
...
@@ -1058,6 +1128,13 @@ def first_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP,
...
@@ -1058,6 +1128,13 @@ def first_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP,
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
if
input
.
reverse
is
not
None
and
not
input
.
reverse
:
logger
.
warning
(
'You are getting the first instance for a time series,'
' and it is a normal recurrent layer output. There is no'
' time series information at all. Maybe you want to use'
' last_seq instead.'
)
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
SEQUENCE_FIRST_INSTANCE
,
type
=
LayerType
.
SEQUENCE_FIRST_INSTANCE
,
...
@@ -1073,6 +1150,7 @@ class ExpandLevel(object):
...
@@ -1073,6 +1150,7 @@ class ExpandLevel(object):
FROM_TIMESTEP
=
AggregateLevel
.
EACH_TIMESTEP
FROM_TIMESTEP
=
AggregateLevel
.
EACH_TIMESTEP
FROM_SEQUENCE
=
AggregateLevel
.
EACH_SEQUENCE
FROM_SEQUENCE
=
AggregateLevel
.
EACH_SEQUENCE
@
wrap_name_default
()
@
wrap_name_default
()
@
layer_support
()
@
layer_support
()
def
expand_layer
(
input
,
expand_as
,
def
expand_layer
(
input
,
expand_as
,
...
@@ -1123,7 +1201,6 @@ def expand_layer(input, expand_as,
...
@@ -1123,7 +1201,6 @@ def expand_layer(input, expand_as,
parents
=
[
input
,
expand_as
])
parents
=
[
input
,
expand_as
])
@
wrap_name_default
()
@
wrap_name_default
()
@
layer_support
()
@
layer_support
()
def
interpolation_layer
(
input
,
weight
,
name
=
None
,
layer_attr
=
None
):
def
interpolation_layer
(
input
,
weight
,
name
=
None
,
layer_attr
=
None
):
...
@@ -1155,10 +1232,15 @@ def interpolation_layer(input, weight, name=None, layer_attr=None):
...
@@ -1155,10 +1232,15 @@ def interpolation_layer(input, weight, name=None, layer_attr=None):
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
assert
isinstance
(
input
,
list
)
or
isinstance
(
input
,
tupl
e
)
assert
isinstance
(
input
,
collections
.
Sequenc
e
)
assert
len
(
input
)
==
2
assert
len
(
input
)
==
2
assert
input
[
0
].
size
==
input
[
1
].
size
assert
isinstance
(
input
[
0
],
LayerOutput
)
and
isinstance
(
input
[
1
],
assert
weight
.
size
==
1
LayerOutput
)
if
input
[
0
].
size
is
not
None
and
input
[
1
].
size
is
not
None
:
assert
input
[
0
].
size
==
input
[
1
].
size
assert
isinstance
(
weight
,
LayerOutput
)
if
weight
.
size
is
not
None
:
assert
weight
.
size
==
1
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
INTERPOLATION_LAYER
,
type
=
LayerType
.
INTERPOLATION_LAYER
,
...
@@ -1200,11 +1282,13 @@ def power_layer(input, weight, name=None, layer_attr=None):
...
@@ -1200,11 +1282,13 @@ def power_layer(input, weight, name=None, layer_attr=None):
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
assert
weight
.
size
==
1
assert
isinstance
(
input
,
LayerOutput
)
and
isinstance
(
weight
,
LayerOutput
)
if
weight
.
size
is
not
None
:
assert
weight
.
size
==
1
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
POWER_LAYER
,
type
=
LayerType
.
POWER_LAYER
,
inputs
=
[
input
.
name
,
weigh
t
.
name
],
inputs
=
[
weight
.
name
,
inpu
t
.
name
],
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
)
)
return
LayerOutput
(
name
,
LayerType
.
POWER_LAYER
,
return
LayerOutput
(
name
,
LayerType
.
POWER_LAYER
,
...
@@ -1243,7 +1327,9 @@ def scaling_layer(input, weight, name=None, layer_attr=None):
...
@@ -1243,7 +1327,9 @@ def scaling_layer(input, weight, name=None, layer_attr=None):
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
assert
weight
.
size
==
1
assert
isinstance
(
weight
,
LayerOutput
)
and
isinstance
(
input
,
LayerOutput
)
if
weight
.
size
is
not
None
:
assert
weight
.
size
==
1
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
SCALING_LAYER
,
type
=
LayerType
.
SCALING_LAYER
,
...
@@ -1322,6 +1408,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
...
@@ -1322,6 +1408,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
assert
isinstance
(
a
,
LayerOutput
)
and
isinstance
(
b
,
LayerOutput
)
if
size
==
1
:
if
size
==
1
:
Layer
(
Layer
(
name
=
name
,
name
=
name
,
...
@@ -1331,6 +1418,8 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
...
@@ -1331,6 +1418,8 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
)
else
:
else
:
if
a
.
size
is
not
None
and
b
.
size
is
not
None
:
assert
size
==
b
.
size
/
a
.
size
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
COSINE_SIM_VEC
,
type
=
LayerType
.
COSINE_SIM_VEC
,
...
@@ -1341,11 +1430,13 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
...
@@ -1341,11 +1430,13 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
)
)
return
LayerOutput
(
name
,
LayerType
.
COSINE_SIM
,
parents
=
[
a
,
b
])
return
LayerOutput
(
name
,
LayerType
.
COSINE_SIM
,
parents
=
[
a
,
b
])
@
wrap_name_default
()
@
wrap_name_default
()
@
wrap_bias_attr_default
(
has_bias
=
True
)
@
wrap_bias_attr_default
(
has_bias
=
True
)
@
wrap_param_attr_default
()
@
layer_support
()
@
layer_support
()
def
hsigmoid
(
input
,
label
,
num_classes
,
name
=
None
,
bias_attr
=
None
,
def
hsigmoid
(
input
,
label
,
num_classes
,
name
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
param_attr
=
None
,
layer_attr
=
None
):
"""
"""
Organize the classes into a binary tree. At each node, a sigmoid function
Organize the classes into a binary tree. At each node, a sigmoid function
is used to calculate the probability of belonging to the right branch.
is used to calculate the probability of belonging to the right branch.
...
@@ -1379,15 +1470,23 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
...
@@ -1379,15 +1470,23 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
"""
"""
if
isinstance
(
input
,
LayerOutput
):
if
isinstance
(
input
,
LayerOutput
):
input
=
[
input
]
input
=
[
input
]
assert
isinstance
(
input
,
list
)
or
isinstance
(
input
,
tuple
)
if
not
isinstance
(
param_attr
,
collections
.
Sequence
):
param_attr
=
[
param_attr
]
else
:
if
not
isinstance
(
param_attr
,
collections
.
Sequence
):
param_attr
=
[
param_attr
]
*
len
(
input
)
else
:
assert
len
(
param_attr
)
==
len
(
input
)
assert
isinstance
(
input
,
collections
.
Sequence
)
assert
isinstance
(
label
,
LayerOutput
)
assert
isinstance
(
label
,
LayerOutput
)
assert
label
.
layer_type
==
LayerType
.
DATA
assert
label
.
layer_type
==
LayerType
.
DATA
ipts_for_layer
=
[]
ipts_for_layer
=
[]
parents
=
[]
parents
=
[]
for
each_input
in
input
:
for
each_input
,
each_param_attr
in
zip
(
input
,
param_attr
)
:
assert
isinstance
(
each_input
,
LayerOutput
)
assert
isinstance
(
each_input
,
LayerOutput
)
ipts_for_layer
.
append
(
each_input
.
name
)
ipts_for_layer
.
append
(
Input
(
each_input
.
name
,
**
each_param_attr
.
attr
)
)
parents
.
append
(
each_input
)
parents
.
append
(
each_input
)
ipts_for_layer
.
append
(
label
.
name
)
ipts_for_layer
.
append
(
label
.
name
)
parents
.
append
(
label
)
parents
.
append
(
label
)
...
@@ -1402,6 +1501,7 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
...
@@ -1402,6 +1501,7 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
)
)
return
LayerOutput
(
name
,
LayerType
.
HSIGMOID
,
parents
=
parents
)
return
LayerOutput
(
name
,
LayerType
.
HSIGMOID
,
parents
=
parents
)
@
wrap_name_default
(
"conv"
)
@
wrap_name_default
(
"conv"
)
@
wrap_param_attr_default
()
@
wrap_param_attr_default
()
@
wrap_bias_attr_default
()
@
wrap_bias_attr_default
()
...
@@ -1435,23 +1535,26 @@ def img_conv_layer(input, filter_size, num_filters,
...
@@ -1435,23 +1535,26 @@ def img_conv_layer(input, filter_size, num_filters,
:type name: basestring
:type name: basestring
:param input: Layer Input.
:param input: Layer Input.
:type input: LayerOutput
:type input: LayerOutput
:param filter_size: The x dimension of a filter kernel.
:param filter_size: The x dimension of a filter kernel. Or input a tuple for
:type filter_size: int
two image dimension.
:type filter_size: int|tuple|list
:param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle
:param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle
currently supports rectangular filters, the filter's
currently supports rectangular filters, the filter's
shape will be (filter_size, filter_size_y).
shape will be (filter_size, filter_size_y).
:type filter_size_y: int
:type filter_size_y: int
|None
:param num_filters: Each filter group's number of filter
:param num_filters: Each filter group's number of filter
:param act: Activation type. Default is tanh
:param act: Activation type. Default is tanh
:type act: BaseActivation
:type act: BaseActivation
:param groups: Group size of filters.
:param groups: Group size of filters.
:type groups: int
:type groups: int
:param stride: The x dimension of the stride.
:param stride: The x dimension of the stride. Or input a tuple for two image
:type stride: int
dimension.
:type stride: int|tuple|list
:param stride_y: The y dimension of the stride.
:param stride_y: The y dimension of the stride.
:type stride_y: int
:type stride_y: int
:param padding: The x dimension of the padding.
:param padding: The x dimension of the padding. Or input a tuple for two
:type padding: int
image dimension
:type padding: int|tuple|list
:param padding_y: The y dimension of the padding.
:param padding_y: The y dimension of the padding.
:type padding_y: int
:type padding_y: int
:param bias_attr: Convolution bias attribute. None means default bias.
:param bias_attr: Convolution bias attribute. None means default bias.
...
@@ -1472,13 +1575,30 @@ def img_conv_layer(input, filter_size, num_filters,
...
@@ -1472,13 +1575,30 @@ def img_conv_layer(input, filter_size, num_filters,
if
num_channels
is
None
:
if
num_channels
is
None
:
assert
input
.
num_filters
is
not
None
assert
input
.
num_filters
is
not
None
num_channels
=
input
.
num_filters
num_channels
=
input
.
num_filters
if
filter_size_y
is
None
:
if
filter_size_y
is
None
:
filter_size_y
=
filter_size
if
isinstance
(
filter_size
,
collections
.
Sequence
):
assert
len
(
filter_size
)
==
2
filter_size
,
filter_size_y
=
filter_size
else
:
filter_size_y
=
filter_size
if
stride_y
is
None
:
if
stride_y
is
None
:
stride_y
=
stride
if
isinstance
(
stride
,
collections
.
Sequence
):
assert
len
(
stride
)
==
2
stride
,
stride_y
=
stride
else
:
stride_y
=
stride
if
padding_y
is
None
:
if
padding_y
is
None
:
padding_y
=
padding
if
isinstance
(
padding
,
collections
.
Sequence
):
if
param_attr
.
attr
.
get
(
'initial_smart'
)
==
True
:
# special initial for conv layers.
assert
len
(
padding
)
==
2
padding
,
padding_y
=
padding
else
:
padding_y
=
padding
if
param_attr
.
attr
.
get
(
'initial_smart'
):
# special initial for conv layers.
init_w
=
(
2.0
/
(
filter_size
**
2
*
num_channels
))
**
0.5
init_w
=
(
2.0
/
(
filter_size
**
2
*
num_channels
))
**
0.5
param_attr
.
attr
[
"initial_mean"
]
=
0.0
param_attr
.
attr
[
"initial_mean"
]
=
0.0
param_attr
.
attr
[
"initial_std"
]
=
init_w
param_attr
.
attr
[
"initial_std"
]
=
init_w
...
@@ -1489,8 +1609,9 @@ def img_conv_layer(input, filter_size, num_filters,
...
@@ -1489,8 +1609,9 @@ def img_conv_layer(input, filter_size, num_filters,
inputs
=
Input
(
input
.
name
,
conv
=
Conv
(
inputs
=
Input
(
input
.
name
,
conv
=
Conv
(
filter_size
=
filter_size
,
padding
=
padding
,
stride
=
stride
,
filter_size
=
filter_size
,
padding
=
padding
,
stride
=
stride
,
channels
=
num_channels
,
groups
=
groups
,
channels
=
num_channels
,
groups
=
groups
,
filter_size_y
=
filter_size_y
,
padding_y
=
padding_y
,
stride_y
=
stride_y
),
filter_size_y
=
filter_size_y
,
padding_y
=
padding_y
,
**
param_attr
.
attr
),
stride_y
=
stride_y
),
**
param_attr
.
attr
),
active_type
=
act
.
name
,
active_type
=
act
.
name
,
num_filters
=
num_filters
,
num_filters
=
num_filters
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
...
@@ -1550,7 +1671,7 @@ def img_pool_layer(input, pool_size, name=None,
...
@@ -1550,7 +1671,7 @@ def img_pool_layer(input, pool_size, name=None,
type
=
LayerType
.
POOL_LAYER
,
type
=
LayerType
.
POOL_LAYER
,
inputs
=
[
Input
(
input
.
name
,
inputs
=
[
Input
(
input
.
name
,
pool
=
Pool
(
pool
=
Pool
(
pool_type
=
pool_type
.
name
+
'-projection'
,
pool_type
=
''
.
join
([
pool_type
.
name
,
'-projection'
])
,
channels
=
num_channels
,
channels
=
num_channels
,
size_x
=
pool_size
,
size_x
=
pool_size
,
start
=
start
,
start
=
start
,
...
@@ -1604,7 +1725,6 @@ def img_cmrnorm_layer(input, size, scale=0.0128, power=0.75,
...
@@ -1604,7 +1725,6 @@ def img_cmrnorm_layer(input, size, scale=0.0128, power=0.75,
:type power: float
:type power: float
:param num_channels: input layer's filers number or channels. If
:param num_channels: input layer's filers number or channels. If
num_channels is None, it will be set automatically.
num_channels is None, it will be set automatically.
:param blocked: namely normalize in number of blocked feature maps.
:param layer_attr: Extra Layer Attribute.
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:return: LayerOutput object.
...
@@ -1657,7 +1777,7 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None,
...
@@ -1657,7 +1777,7 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None,
batch_norm for CPU. Otherwise, select batch norm
batch_norm for CPU. Otherwise, select batch norm
type based on the specified type. If you use cudnn_batch_norm,
type based on the specified type. If you use cudnn_batch_norm,
we suggested you use latest version, such as v5.1.
we suggested you use latest version, such as v5.1.
:type type: None|string, None or "batch_norm" or "cudnn_batch_norm"
:type
batch_norm_
type: None|string, None or "batch_norm" or "cudnn_batch_norm"
:param act: Activation Type. Better be relu. Because batch
:param act: Activation Type. Better be relu. Because batch
normalization will normalize input near zero.
normalization will normalize input near zero.
:type act: BaseActivation
:type act: BaseActivation
...
@@ -1818,7 +1938,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None,
...
@@ -1818,7 +1938,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None,
if
isinstance
(
input
,
LayerOutput
):
if
isinstance
(
input
,
LayerOutput
):
input
=
[
input
]
input
=
[
input
]
assert
isinstance
(
input
,
list
)
or
isinstance
(
input
,
tupl
e
)
assert
isinstance
(
input
,
collections
.
Sequenc
e
)
ipts_for_layer
=
[]
ipts_for_layer
=
[]
for
each_input
in
input
:
for
each_input
in
input
:
assert
isinstance
(
each_input
,
LayerOutput
)
assert
isinstance
(
each_input
,
LayerOutput
)
...
@@ -1832,7 +1952,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None,
...
@@ -1832,7 +1952,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None,
active_type
=
act
.
name
,
active_type
=
act
.
name
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
)
assert
isinstance
(
input
,
list
)
or
isinstance
(
input
,
tuple
)
return
LayerOutput
(
name
,
LayerType
.
ADDTO_LAYER
,
parents
=
input
,
return
LayerOutput
(
name
,
LayerType
.
ADDTO_LAYER
,
parents
=
input
,
activation
=
act
,
num_filters
=
num_filters
)
activation
=
act
,
num_filters
=
num_filters
)
...
@@ -1848,7 +1968,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None):
...
@@ -1848,7 +1968,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None):
:param name: Layer name.
:param name: Layer name.
:type name: basestring
:type name: basestring
:param input: input layers or projections
:param input: input layers or projections
:type input: list|tuple
:type input: list|tuple
|collection.Sequence
:param act: Activation type.
:param act: Activation type.
:type act: BaseActivation
:type act: BaseActivation
:param layer_attr: Extra Layer Attribute.
:param layer_attr: Extra Layer Attribute.
...
@@ -1862,10 +1982,10 @@ def concat_layer(input, act=None, name=None, layer_attr=None):
...
@@ -1862,10 +1982,10 @@ def concat_layer(input, act=None, name=None, layer_attr=None):
elif
isinstance
(
input
,
Projection
):
elif
isinstance
(
input
,
Projection
):
input
=
[
input
]
input
=
[
input
]
else
:
else
:
assert
isinstance
(
input
,
list
)
or
isinstance
(
input
,
tupl
e
)
assert
isinstance
(
input
,
collections
.
Sequenc
e
)
def
__is_type__
(
o
,
tp
):
def
__is_type__
(
o
,
tp
):
if
not
isinstance
(
o
,
list
)
and
not
isinstance
(
o
,
tupl
e
):
if
not
isinstance
(
o
,
collections
.
Sequenc
e
):
if
o
==
tp
:
if
o
==
tp
:
return
True
return
True
elif
len
(
o
.
__bases__
)
==
0
:
elif
len
(
o
.
__bases__
)
==
0
:
...
@@ -2147,28 +2267,51 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None):
...
@@ -2147,28 +2267,51 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None):
@
wrap_param_attr_default
()
@
wrap_param_attr_default
()
@
layer_support
()
@
layer_support
()
def
recurrent_layer
(
input
,
act
=
None
,
bias_attr
=
None
,
def
recurrent_layer
(
input
,
act
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
name
=
None
,
layer_attr
=
None
):
param_attr
=
None
,
name
=
None
,
reverse
=
False
,
layer_attr
=
None
):
"""
"""
TODO(yuyang18): Add docs
Simple recurrent unit layer. It is just a fully connect layer through both
time and neural network.
:param input:
For each sequence [start, end] it performs the following computation\:
:param size:
:param act:
.. math::
:param bias_attr:
:param param_attr:
out_{i} = act(in_{i})
\\
\\
\\
text{for}
\\
i = start
\\\\
:param name:
out_{i} = act(in_{i} + out_{i-1} * W)
\\
\\
\\
text{for}
\\
start < i <= end
:param layer_attr:
If reversed is true, the order is reversed\:
.. math::
out_{i} = act(in_{i})
\\
\\
\\
text{for}
\\
i = end
\\\\
out_{i} = act(in_{i} + out_{i+1} * W)
\\
\\
\\
text{for}
\\
start <= i < end
:param input: Input Layer
:type input: LayerOutput
:param act: activation.
:type act: BaseActivation
:param bias_attr: bias attribute.
:type bias_attr: ParameterAttribute
:param param_attr: parameter attribute.
:type param_attr: ParameterAttribute
:param name: name of the layer
:type name: basestring
:param layer_attr: Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
"""
"""
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
RECURRENT_LAYER
,
type
=
LayerType
.
RECURRENT_LAYER
,
inputs
=
Input
(
input
.
name
,
**
param_attr
.
attr
),
inputs
=
Input
(
input
.
name
,
**
param_attr
.
attr
),
active_type
=
act
.
name
,
active_type
=
act
.
name
,
size
=
input
.
size
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
reversed
=
reverse
,
**
ExtraAttr
.
to_kwargs
(
layer_attr
))
**
ExtraAttr
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
RECURRENT_LAYER
,
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
RECURRENT_LAYER
,
parents
=
[
input
],
size
=
input
.
size
,
activation
=
act
)
parents
=
[
input
],
size
=
input
.
size
,
activation
=
act
,
reverse
=
reverse
)
class
StaticInput
(
object
):
class
StaticInput
(
object
):
...
@@ -2176,6 +2319,7 @@ class StaticInput(object):
...
@@ -2176,6 +2319,7 @@ class StaticInput(object):
StaticInput is only used in recurrent_group which defines a read-only memory
StaticInput is only used in recurrent_group which defines a read-only memory
that can be a sequence or non-sequence.
that can be a sequence or non-sequence.
"""
"""
def
__init__
(
self
,
input
,
is_seq
=
False
,
size
=
None
):
def
__init__
(
self
,
input
,
is_seq
=
False
,
size
=
None
):
assert
isinstance
(
input
,
LayerOutput
)
assert
isinstance
(
input
,
LayerOutput
)
self
.
input
=
input
self
.
input
=
input
...
@@ -2195,6 +2339,7 @@ class SubsequenceInput(object):
...
@@ -2195,6 +2339,7 @@ class SubsequenceInput(object):
input = SubsequenceInput(layer)
input = SubsequenceInput(layer)
"""
"""
def
__init__
(
self
,
input
):
def
__init__
(
self
,
input
):
assert
isinstance
(
input
,
LayerOutput
)
assert
isinstance
(
input
,
LayerOutput
)
assert
input
.
size
is
not
None
assert
input
.
size
is
not
None
...
@@ -2267,7 +2412,7 @@ def recurrent_group(step, input, reverse=False, name=None):
...
@@ -2267,7 +2412,7 @@ def recurrent_group(step, input, reverse=False, name=None):
if
is_single_input
(
input
):
if
is_single_input
(
input
):
input
=
[
input
]
input
=
[
input
]
assert
isinstance
(
input
,
list
)
or
isinstance
(
input
,
tupl
e
)
assert
isinstance
(
input
,
collections
.
Sequenc
e
)
def
is_in_links
(
x
):
def
is_in_links
(
x
):
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
SubsequenceInput
)
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
SubsequenceInput
)
...
@@ -2311,6 +2456,7 @@ def recurrent_group(step, input, reverse=False, name=None):
...
@@ -2311,6 +2456,7 @@ def recurrent_group(step, input, reverse=False, name=None):
for
ot
in
layer_outs
:
for
ot
in
layer_outs
:
assert
isinstance
(
ot
,
LayerOutput
)
assert
isinstance
(
ot
,
LayerOutput
)
ot
.
reverse
=
reverse
if
contains_sub_seq
[
0
]:
if
contains_sub_seq
[
0
]:
RecurrentLayerGroupSetOutLink
(
Link
(
ot
.
name
,
has_subseq
=
True
))
RecurrentLayerGroupSetOutLink
(
Link
(
ot
.
name
,
has_subseq
=
True
))
else
:
else
:
...
@@ -2323,6 +2469,7 @@ def recurrent_group(step, input, reverse=False, name=None):
...
@@ -2323,6 +2469,7 @@ def recurrent_group(step, input, reverse=False, name=None):
else
:
else
:
return
layer_outs
return
layer_outs
class
BaseGeneratedInput
(
object
):
class
BaseGeneratedInput
(
object
):
def
__init__
(
self
):
def
__init__
(
self
):
self
.
bos_id
=
None
self
.
bos_id
=
None
...
@@ -2351,6 +2498,7 @@ class GeneratedInput(BaseGeneratedInput):
...
@@ -2351,6 +2498,7 @@ class GeneratedInput(BaseGeneratedInput):
return
trg_emb
return
trg_emb
def
__init__
(
self
,
size
,
embedding_name
,
embedding_size
):
def
__init__
(
self
,
size
,
embedding_name
,
embedding_size
):
super
(
GeneratedInput
,
self
).
__init__
()
self
.
size
=
size
self
.
size
=
size
self
.
embedding_name
=
embedding_name
self
.
embedding_name
=
embedding_name
self
.
embedding_size
=
embedding_size
self
.
embedding_size
=
embedding_size
...
@@ -2387,6 +2535,7 @@ def maxid_layer(input, name=None, layer_attr=None):
...
@@ -2387,6 +2535,7 @@ def maxid_layer(input, name=None, layer_attr=None):
layer_type
=
LayerType
.
MAXID_LAYER
,
layer_type
=
LayerType
.
MAXID_LAYER
,
parents
=
[
input
])
parents
=
[
input
])
@
wrap_name_default
()
@
wrap_name_default
()
def
out_prod_layer
(
input1
,
input2
,
name
=
None
,
layer_attr
=
None
):
def
out_prod_layer
(
input1
,
input2
,
name
=
None
,
layer_attr
=
None
):
"""
"""
...
@@ -2419,7 +2568,8 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
...
@@ -2419,7 +2568,8 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
=
name
,
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
OUT_PROD_LAYER
,
layer_type
=
LayerType
.
OUT_PROD_LAYER
,
parents
=
[
input1
,
input2
])
parents
=
[
input1
,
input2
])
@
wrap_name_default
()
@
wrap_name_default
()
def
eos_layer
(
input
,
eos_id
,
name
=
None
,
layer_attr
=
None
):
def
eos_layer
(
input
,
eos_id
,
name
=
None
,
layer_attr
=
None
):
...
@@ -2472,14 +2622,14 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
...
@@ -2472,14 +2622,14 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
def rnn_step(input):
def rnn_step(input):
last_time_step_output = memory(name='rnn', size=512)
last_time_step_output = memory(name='rnn', size=512)
with mixed_layer(size=512) as simple_rnn:
with mixed_layer(size=512
, name='rnn'
) as simple_rnn:
simple_rnn += full_matrix_projection(input)
simple_rnn += full_matrix_projection(input)
simple_rnn += last_time_step_output
simple_rnn += last_time_step_output
return simple_rnn
return simple_rnn
beam_gen = beam_search(name="decoder",
beam_gen = beam_search(name="decoder",
step=rnn_step,
step=rnn_step,
input=[StaticInput(
"encoder_last"
)],
input=[StaticInput(
encoder_last
)],
bos_id=0,
bos_id=0,
eos_id=1,
eos_id=1,
beam_size=5,
beam_size=5,
...
@@ -2493,18 +2643,18 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
...
@@ -2493,18 +2643,18 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
:param name: Name of the recurrent unit that generates sequences.
:param name: Name of the recurrent unit that generates sequences.
:type name: base string
:type name: base string
:param step: A callable function that defines the calculation in a time
:param step: A callable function that defines the calculation in a time
step, and it is appled to sequences with arbitrary length by
step, and it is appl
i
ed to sequences with arbitrary length by
sharing a same set of weights.
sharing a same set of weights.
You can refer to the first parameter of recurrent_group, or
You can refer to the first parameter of recurrent_group, or
demo/seqToseq/seqToseq_net.py for more details.
demo/seqToseq/seqToseq_net.py for more details.
:type step: callable
:type step: callable
:param input: Input data for the recurrent unit
:param input: Input data for the recurrent unit
:type input:
StaticInput|GeneratedInpu
t
:type input:
lis
t
:param bos_id: Index of the start symbol in the dictionary. The start symbol
:param bos_id: Index of the start symbol in the dictionary. The start symbol
is a special token for NLP task, which indicates the
is a special token for NLP task, which indicates the
beginning of a sequence. In the generation task, the start
beginning of a sequence. In the generation task, the start
symbol is e
n
sential, since it is used to initialize the RNN
symbol is e
s
sential, since it is used to initialize the RNN
internal state.
internal state.
:type bos_id: int
:type bos_id: int
:param eos_id: Index of the end symbol in the dictionary. The end symbol is
:param eos_id: Index of the end symbol in the dictionary. The end symbol is
...
@@ -2513,6 +2663,8 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
...
@@ -2513,6 +2663,8 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
symbol is generated, or a pre-defined max iteration number
symbol is generated, or a pre-defined max iteration number
is exceeded.
is exceeded.
:type eos_id: int
:type eos_id: int
:param max_length: Max generated sequence length.
:type max_length: int
:param beam_size: Beam search for sequence generation is an iterative search
:param beam_size: Beam search for sequence generation is an iterative search
algorithm. To maintain tractability, every iteration only
algorithm. To maintain tractability, every iteration only
only stores a predetermined number, called the beam_size,
only stores a predetermined number, called the beam_size,
...
@@ -2553,8 +2705,8 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
...
@@ -2553,8 +2705,8 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
real_input
=
[]
real_input
=
[]
for
i
,
each_input
in
enumerate
(
input
):
for
i
,
each_input
in
enumerate
(
input
):
# print type(each_input)
# print type(each_input)
assert
isinstance
(
each_input
,
StaticInput
)
or
isinstance
(
each_input
,
assert
isinstance
(
each_input
,
StaticInput
)
or
isinstance
(
BaseGeneratedInput
)
each_input
,
BaseGeneratedInput
)
if
isinstance
(
each_input
,
BaseGeneratedInput
):
if
isinstance
(
each_input
,
BaseGeneratedInput
):
assert
generated_input_index
==
-
1
assert
generated_input_index
==
-
1
generated_input_index
=
i
generated_input_index
=
i
...
@@ -2625,9 +2777,11 @@ def regression_cost(input, label, cost='square_error', name=None):
...
@@ -2625,9 +2777,11 @@ def regression_cost(input, label, cost='square_error', name=None):
@
wrap_name_default
(
"cost"
)
@
wrap_name_default
(
"cost"
)
@
layer_support
()
def
classification_cost
(
input
,
label
,
name
=
None
,
def
classification_cost
(
input
,
label
,
name
=
None
,
cost
=
"multi-class-cross-entropy"
,
cost
=
"multi-class-cross-entropy"
,
evaluator
=
classification_error_evaluator
):
evaluator
=
classification_error_evaluator
,
layer_attr
=
None
):
"""
"""
classification cost Layer.
classification cost Layer.
...
@@ -2640,13 +2794,16 @@ def classification_cost(input, label, name=None,
...
@@ -2640,13 +2794,16 @@ def classification_cost(input, label, name=None,
:param cost: cost method.
:param cost: cost method.
:type cost: basestring
:type cost: basestring
:param evaluator: Evaluator method.
:param evaluator: Evaluator method.
:param layer_attr: layer's extra attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
assert
input
.
layer_type
!=
LayerType
.
DATA
assert
input
.
layer_type
!=
LayerType
.
DATA
assert
isinstance
(
input
.
activation
,
SoftmaxActivation
)
assert
isinstance
(
input
.
activation
,
SoftmaxActivation
)
assert
label
.
layer_type
==
LayerType
.
DATA
assert
label
.
layer_type
==
LayerType
.
DATA
Layer
(
name
=
name
,
type
=
cost
,
inputs
=
[
Input
(
input
.
name
),
Input
(
label
.
name
)])
Layer
(
name
=
name
,
type
=
cost
,
inputs
=
[
Input
(
input
.
name
),
Input
(
label
.
name
)],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
def
__add_evaluator__
(
e
):
def
__add_evaluator__
(
e
):
assert
callable
(
e
)
assert
callable
(
e
)
...
@@ -2659,7 +2816,7 @@ def classification_cost(input, label, name=None,
...
@@ -2659,7 +2816,7 @@ def classification_cost(input, label, name=None,
e
(
name
=
e
.
__name__
,
input
=
input
,
label
=
label
)
e
(
name
=
e
.
__name__
,
input
=
input
,
label
=
label
)
if
not
isinstance
(
evaluator
,
list
)
and
not
isinstance
(
evaluator
,
tupl
e
):
if
not
isinstance
(
evaluator
,
collections
.
Sequenc
e
):
evaluator
=
[
evaluator
]
evaluator
=
[
evaluator
]
for
each_evaluator
in
evaluator
:
for
each_evaluator
in
evaluator
:
...
@@ -2667,8 +2824,9 @@ def classification_cost(input, label, name=None,
...
@@ -2667,8 +2824,9 @@ def classification_cost(input, label, name=None,
return
LayerOutput
(
name
,
LayerType
.
COST
,
parents
=
[
input
,
label
])
return
LayerOutput
(
name
,
LayerType
.
COST
,
parents
=
[
input
,
label
])
def
conv_operator
(
img
,
filter
,
filter_size
,
num_filters
,
def
conv_operator
(
img
,
filter
,
filter_size
,
num_filters
,
num_channel
=
None
,
stride
=
1
,
padding
=
0
,
groups
=
1
,
num_channel
=
None
,
stride
=
1
,
padding
=
0
,
filter_size_y
=
None
,
stride_y
=
None
,
padding_y
=
None
):
filter_size_y
=
None
,
stride_y
=
None
,
padding_y
=
None
):
"""
"""
Different from img_conv_layer, conv_op is an Operator, which can be used
Different from img_conv_layer, conv_op is an Operator, which can be used
...
@@ -2682,7 +2840,7 @@ def conv_operator(img, filter, filter_size, num_filters,
...
@@ -2682,7 +2840,7 @@ def conv_operator(img, filter, filter_size, num_filters,
op = conv_operator(img=input1,
op = conv_operator(img=input1,
filter=input2,
filter=input2,
filter_size=3
.0
,
filter_size=3,
num_filters=64,
num_filters=64,
num_channels=64)
num_channels=64)
...
@@ -2696,8 +2854,8 @@ def conv_operator(img, filter, filter_size, num_filters,
...
@@ -2696,8 +2854,8 @@ def conv_operator(img, filter, filter_size, num_filters,
PaddlePaddle now supports rectangular filters,
PaddlePaddle now supports rectangular filters,
the filter's shape can be (filter_size, filter_size_y).
the filter's shape can be (filter_size, filter_size_y).
:type filter_size_y: int
:type filter_size_y: int
:param num_filter: channel of output data.
:param num_filter
s
: channel of output data.
:type num_filter: int
:type num_filter
s
: int
:param num_channel: channel of input data.
:param num_channel: channel of input data.
:type num_channel: int
:type num_channel: int
:param stride: The x dimension of the stride.
:param stride: The x dimension of the stride.
...
@@ -2717,8 +2875,16 @@ def conv_operator(img, filter, filter_size, num_filters,
...
@@ -2717,8 +2875,16 @@ def conv_operator(img, filter, filter_size, num_filters,
stride_y
=
stride
stride_y
=
stride
if
padding_y
is
None
:
if
padding_y
is
None
:
padding_y
=
padding
padding_y
=
padding
if
num_channel
is
None
:
num_channel
=
img
.
num_filters
assert
isinstance
(
filter
,
LayerOutput
)
if
filter
.
size
is
not
None
:
filter
.
size
=
filter_size
*
filter_size_y
*
num_filters
*
num_channel
op
=
ConvOperator
(
input_layer_names
=
[
img
.
name
,
filter
.
name
],
op
=
ConvOperator
(
input_layer_names
=
[
img
.
name
,
filter
.
name
],
num_filters
=
num_filter
,
num_filters
=
num_filters
,
conv_conf
=
Conv
(
filter_size
=
filter_size
,
conv_conf
=
Conv
(
filter_size
=
filter_size
,
padding
=
padding
,
padding
=
padding
,
stride
=
stride
,
stride
=
stride
,
...
@@ -2726,13 +2892,13 @@ def conv_operator(img, filter, filter_size, num_filters,
...
@@ -2726,13 +2892,13 @@ def conv_operator(img, filter, filter_size, num_filters,
filter_size_y
=
filter_size_y
,
filter_size_y
=
filter_size_y
,
padding_y
=
padding_y
,
padding_y
=
padding_y
,
stride_y
=
stride_y
,
stride_y
=
stride_y
,
groups
=
groups
))
groups
=
1
))
op
.
origin
=
[
img
,
filter
]
op
.
origin
=
[
img
,
filter
]
return
op
return
op
@
wrap_name_default
()
@
wrap_name_default
()
def
conv_shift_layer
(
input
,
name
=
None
):
def
conv_shift_layer
(
a
,
b
,
name
=
None
):
"""
"""
This layer performs cyclic convolution for two input. For example:
This layer performs cyclic convolution for two input. For example:
- a[in]: contains M elements.
- a[in]: contains M elements.
...
@@ -2744,68 +2910,77 @@ def conv_shift_layer(input, name=None):
...
@@ -2744,68 +2910,77 @@ def conv_shift_layer(input, name=None):
c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j}
c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j}
In this formular:
In this formular:
- a's index is computed modulo M.
- a's index is computed modulo M. When it is negative, then get item from
- b's index is computed modulo N.
the right side (which is the end of array) to the left.
- b's index is computed modulo N. When it is negative, then get item from
the right size (which is the end of array) to the left.
The example usage is:
The example usage is:
.. code-block:: python
.. code-block:: python
conv_shift = conv_shif_layer(input=[layer1, layer2])
conv_shift = conv_shif
t
_layer(input=[layer1, layer2])
:param name: layer name
:param name: layer name
:type name: basestring
:type name: basestring
:param input: Input layer.
:param a: Input layer a.
:type input: LayerOutput|list|tuple.
:type a: LayerOutput
:param b: input layer b
:type b: LayerOutput
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
assert
isinstance
(
input
,
list
)
or
isinstance
(
input
,
tuple
)
assert
isinstance
(
a
,
LayerOutput
)
and
isinstance
(
b
,
LayerOutput
)
assert
b
.
size
is
None
or
b
.
size
%
2
==
1
# size of b must be odd.
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
CONV_SHIFT_LAYER
,
type
=
LayerType
.
CONV_SHIFT_LAYER
,
inputs
=
[
x
.
name
for
x
in
input
],
inputs
=
[
a
.
name
,
b
.
name
],
)
)
return
LayerOutput
(
name
,
LayerType
.
CONV_SHIFT_LAYER
,
parents
=
input
)
return
LayerOutput
(
name
,
LayerType
.
CONV_SHIFT_LAYER
,
parents
=
[
a
,
b
],
size
=
a
.
size
)
@
wrap_name_default
()
@
wrap_name_default
()
@
wrap_param_attr_default
()
@
wrap_param_attr_default
()
@
wrap_bias_attr_default
()
@
wrap_bias_attr_default
()
@
wrap_act_default
(
act
=
LinearActivation
())
@
layer_support
(
ERROR_CLIPPING
,
DROPOUT
)
@
layer_support
(
ERROR_CLIPPING
,
DROPOUT
)
def
tensor_layer
(
input
,
size
,
act
=
None
,
name
=
None
,
def
tensor_layer
(
a
,
b
,
size
,
act
=
None
,
name
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
param_attr
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
"""
"""
This layer performs tensor operation for two input.
This layer performs tensor operation for two input.
For example, each sample:
For example, each sample:
.. math::
.. math::
y_{i} =
x_{1} * W_{i} * {x_{2}
^\mathrm{T}}, i=0,1,...,K-1
y_{i} =
a * W_{i} * {b
^\mathrm{T}}, i=0,1,...,K-1
In this formular:
In this formular:
- :math:`
x_{1}
`: the first input contains M elements.
- :math:`
a
`: the first input contains M elements.
- :math:`
x_{2}
`: the second input contains N elements.
- :math:`
b
`: the second input contains N elements.
- :math:`y_{i}`: the i-th element of y.
- :math:`y_{i}`: the i-th element of y.
- :math:`W_{i}`: the i-th learned weight, shape if [M, N]
- :math:`W_{i}`: the i-th learned weight, shape if [M, N]
- :math:`
{x_{2}}^\mathrm{T}`: the transpose of :math:`x
_{2}`.
- :math:`
b^\mathrm{T}`: the transpose of :math:`b
_{2}`.
The simple usage is:
The simple usage is:
.. code-block:: python
.. code-block:: python
tensor = tensor_layer(
input=[layer1, layer2]
)
tensor = tensor_layer(
a=layer1, b=layer2, size=1000
)
:param name: layer name
:param name: layer name
:type name: basestring
:type name: basestring
:param input: Input layer.
:param a: Input layer a.
:type input: LayerOutput|list|tuple.
:type a: LayerOutput
:param b: input layer b.
:type b: LayerOutput
:param size: the layer dimension.
:param size: the layer dimension.
:type size: int.
:type size: int.
:param act: Activation Type. Default is tanh.
:param act: Activation Type. Default is tanh.
:type act: BaseActivation
:type act: BaseActivation
:param param_attr: The Parameter Attribute.
:param param_attr: The Parameter Attribute.
:type param_attr: ParameterAttribute
|list
:type param_attr: ParameterAttribute
:param bias_attr: The Bias Attribute. If no bias, then pass False or
:param bias_attr: The Bias Attribute. If no bias, then pass False or
something not type of ParameterAttribute. None will get a
something not type of ParameterAttribute. None will get a
default Bias.
default Bias.
...
@@ -2815,65 +2990,26 @@ def tensor_layer(input, size, act=None, name=None,
...
@@ -2815,65 +2990,26 @@ def tensor_layer(input, size, act=None, name=None,
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
assert
isinstance
(
input
,
list
)
or
isinstance
(
input
,
tuple
)
assert
isinstance
(
a
,
LayerOutput
)
and
isinstance
(
b
,
LayerOutput
)
assert
len
(
input
)
==
2
Layer
(
Layer
(
name
=
name
,
name
=
name
,
size
=
size
,
size
=
size
,
type
=
LayerType
.
TENSOR_LAYER
,
type
=
LayerType
.
TENSOR_LAYER
,
active_type
=
act
.
name
,
active_type
=
act
.
name
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
inputs
=
[
Input
(
input
[
0
]
.
name
,
**
param_attr
.
attr
),
inputs
=
[
Input
(
a
.
name
,
**
param_attr
.
attr
),
Input
(
input
[
1
]
.
name
)],
Input
(
b
.
name
)],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
)
return
LayerOutput
(
name
,
LayerType
.
TENSOR_LAYER
,
parents
=
input
,
return
LayerOutput
(
name
,
LayerType
.
TENSOR_LAYER
,
parents
=
[
a
,
b
]
,
activation
=
act
,
size
=
size
)
activation
=
act
,
size
=
size
)
@
wrap_param_attr_default
()
def
trans_full_matrix_projection
(
input
,
size
=
0
,
param_attr
=
None
):
"""
Different from full_matrix_projection, this projection performs matrix
multiplication, using transpose of weight.
.. math::
out.row[i] += in.row[i] * w^\mathrm{T}
:math:`w^\mathrm{T}` means transpose of weight.
The simply usage is:
.. code-block:: python
proj = trans_full_matrix_projection(input=layer,
size=100,
param_attr=ParamAttr(
name='_proj',
initial_mean=0.0,
initial_std=0.01))
:param input: input layer
:type input: LayerOutput
:param size: The parameter size. Means the width of parameter.
:type size: int
:param param_attr: Parameter config, None if use default.
:type param_attr: ParameterAttribute
:return: A TransposedFullMatrixProjection Object.
:rtype: TransposedFullMatrixProjection
"""
proj
=
TransposedFullMatrixProjection
(
input_layer_name
=
input
.
name
,
size
=
size
,
**
param_attr
.
attr
)
proj
.
origin
=
input
proj
.
origin
.
projection
=
"trans_matrix"
return
proj
@
wrap_name_default
()
@
wrap_name_default
()
@
wrap_param_attr_default
()
@
wrap_param_attr_default
()
@
wrap_bias_attr_default
()
@
wrap_bias_attr_default
()
@
wrap_act_default
()
@
wrap_act_default
()
def
selective_fc_layer
(
input
,
size
,
act
=
None
,
name
=
None
,
def
selective_fc_layer
(
input
,
s
elect
,
s
ize
,
act
=
None
,
name
=
None
,
pass_generation
=
False
,
pass_generation
=
False
,
has_selected_colums
=
True
,
has_selected_colums
=
True
,
mul_ratio
=
0.02
,
mul_ratio
=
0.02
,
...
@@ -2888,12 +3024,15 @@ def selective_fc_layer(input, size, act=None, name=None,
...
@@ -2888,12 +3024,15 @@ def selective_fc_layer(input, size, act=None, name=None,
.. code-block:: python
.. code-block:: python
sel_fc = selective_fc_layer(input=input, 128, act=TanhActivation())
sel_fc = selective_fc_layer(input=input,
size=
128, act=TanhActivation())
:param name: The Layer Name.
:param name: The Layer Name.
:type name: basestring
:type name: basestring
:param input: The input layer.
:param input: The input layer.
:type input: LayerOutput|list|tuple
:type input: LayerOutput|list|tuple
:param select: The select layer. The output of select layer should be a
sparse binary matrix, and treat as the mask of selective fc.
:type select: LayerOutput
:param size: The layer dimension.
:param size: The layer dimension.
:type size: int
:type size: int
:param act: Activation Type. Default is tanh.
:param act: Activation Type. Default is tanh.
...
@@ -2911,33 +3050,33 @@ def selective_fc_layer(input, size, act=None, name=None,
...
@@ -2911,33 +3050,33 @@ def selective_fc_layer(input, size, act=None, name=None,
"""
"""
if
isinstance
(
input
,
LayerOutput
):
if
isinstance
(
input
,
LayerOutput
):
input
=
[
input
]
input
=
[
input
]
assert
not
isinstance
(
param_attr
,
list
)
assert
not
isinstance
(
param_attr
,
collections
.
Sequence
)
param_attr
=
[
param_attr
]
param_attr
=
[
param_attr
]
else
:
else
:
if
isinstance
(
param_attr
,
list
)
or
isinstance
(
param_attr
,
tupl
e
):
if
isinstance
(
param_attr
,
collections
.
Sequenc
e
):
assert
len
(
input
)
==
len
(
param_attr
)
assert
len
(
input
)
==
len
(
param_attr
)
else
:
else
:
param_attr
=
[
copy
.
deepcopy
(
param_attr
)
for
_
in
range
(
len
(
input
))]
param_attr
=
[
copy
.
deepcopy
(
param_attr
)
for
_
in
range
(
len
(
input
))]
assert
isinstance
(
input
,
list
)
assert
isinstance
(
input
,
collections
.
Sequence
)
assert
isinstance
(
select
,
LayerOutput
)
def
__idx_to_input__
(
i
):
if
select
.
size
is
not
None
:
attr
=
param_attr
[
i
]
assert
select
.
size
==
size
assert
isinstance
(
attr
,
ParameterAttribute
)
return
Input
(
input
[
i
].
name
,
**
attr
.
attr
)
Layer
(
Layer
(
inputs
=
map
(
__idx_to_input__
,
range
(
len
(
input
))),
inputs
=
[
Input
(
ipt
.
name
,
**
attr
.
attr
)
for
ipt
,
attr
in
zip
(
input
,
param_attr
)]
+
[
select
.
name
],
name
=
name
,
name
=
name
,
type
=
LayerType
.
SEL_FC_LAYER
,
type
=
LayerType
.
SEL_FC_LAYER
,
size
=
size
,
size
=
size
,
bias
=
ParameterAttribute
.
to_bias
(
bias_attr
),
active_type
=
act
.
name
,
active_type
=
act
.
name
,
selective_fc_pass_generation
=
pass_generation
,
selective_fc_pass_generation
=
pass_generation
,
has_selected_colums
=
has_selected_colums
,
has_selected_colums
=
has_selected_colums
,
selective_fc_full_mul_ratio
=
mul_ratio
,
selective_fc_full_mul_ratio
=
mul_ratio
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
)
return
LayerOutput
(
name
,
LayerType
.
SEL_FC_LAYER
,
input
,
activation
=
act
,
return
LayerOutput
(
name
,
LayerType
.
SEL_FC_LAYER
,
list
(
input
)
+
[
select
],
activation
=
act
,
size
=
size
)
size
=
size
)
...
@@ -3005,7 +3144,7 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0):
...
@@ -3005,7 +3144,7 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0):
@
wrap_name_default
()
@
wrap_name_default
()
def
linear_comb_layer
(
weights
,
vectors
,
size
,
name
=
None
):
def
linear_comb_layer
(
weights
,
vectors
,
size
=
None
,
name
=
None
):
"""
"""
A layer for weighted sum of vectors takes two inputs.
A layer for weighted sum of vectors takes two inputs.
- Input: size of weights is M
- Input: size of weights is M
...
@@ -3035,11 +3174,13 @@ def linear_comb_layer(weights, vectors, size, name=None):
...
@@ -3035,11 +3174,13 @@ def linear_comb_layer(weights, vectors, size, name=None):
.. code-block:: python
.. code-block:: python
linear_comb = linear_comb_layer(weighs=weight, vectors=vectors,
linear_comb = linear_comb_layer(weigh
t
s=weight, vectors=vectors,
size=elem_dim)
size=elem_dim)
:param input: The input layers.
:param weights: The weight layer.
:type input: LayerOutput
:type weights: LayerOutput
:param vectors: The vector layer.
:type vectors: LayerOutput
:param size: the dimension of this layer.
:param size: the dimension of this layer.
:type size: int
:type size: int
:param name: The Layer Name.
:param name: The Layer Name.
...
@@ -3047,7 +3188,13 @@ def linear_comb_layer(weights, vectors, size, name=None):
...
@@ -3047,7 +3188,13 @@ def linear_comb_layer(weights, vectors, size, name=None):
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
assert
isinstance
(
weights
,
LayerOutput
)
and
isinstance
(
vectors
,
LayerOutput
)
if
vectors
.
size
is
not
None
and
weights
.
size
is
not
None
:
assert
vectors
.
size
%
weights
.
size
==
0
if
size
is
None
:
size
=
vectors
.
size
/
weights
.
size
else
:
assert
size
==
vectors
.
size
/
weights
.
size
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
LINEAR_COMBINATION_LAYER
,
type
=
LayerType
.
LINEAR_COMBINATION_LAYER
,
...
@@ -3057,8 +3204,10 @@ def linear_comb_layer(weights, vectors, size, name=None):
...
@@ -3057,8 +3204,10 @@ def linear_comb_layer(weights, vectors, size, name=None):
return
LayerOutput
(
name
,
LayerType
.
LINEAR_COMBINATION_LAYER
,
return
LayerOutput
(
name
,
LayerType
.
LINEAR_COMBINATION_LAYER
,
[
weights
,
vectors
],
size
=
size
)
[
weights
,
vectors
],
size
=
size
)
convex_comb_layer
=
linear_comb_layer
convex_comb_layer
=
linear_comb_layer
@
wrap_name_default
()
@
wrap_name_default
()
def
block_expand_layer
(
input
,
def
block_expand_layer
(
input
,
channel
=
0
,
channel
=
0
,
...
@@ -3120,22 +3269,22 @@ def block_expand_layer(input,
...
@@ -3120,22 +3269,22 @@ def block_expand_layer(input,
"""
"""
Layer
(
name
=
name
,
Layer
(
name
=
name
,
input
=
Input
(
input
.
name
,
input
=
Input
(
input
.
name
,
block_expand
=
BlockExpand
(
channel
=
channel
,
block_expand
=
BlockExpand
(
channel
s
=
channel
,
block_x
=
block_x
,
block_x
=
block_x
,
block_y
=
block_y
,
block_y
=
block_y
,
stride_x
=
stride_x
,
stride_x
=
stride_x
,
stride_y
=
stride_y
,
stride_y
=
stride_y
,
padding_x
=
padding_x
,
padding_x
=
padding_x
,
padding_y
=
padding_y
)
padding_y
=
padding_y
)
),
),
type
=
LayerType
.
BLOCK_EXPAND
,
type
=
LayerType
.
BLOCK_EXPAND
,
)
)
return
LayerOutput
(
name
,
LayerType
.
BLOCK_EXPAND
,
parents
=
[
input
])
return
LayerOutput
(
name
,
LayerType
.
BLOCK_EXPAND
,
parents
=
[
input
],
size
=
size
)
@
wrap_name_default
()
@
wrap_name_default
()
def
ctc_layer
(
input
,
label
,
size
,
name
=
None
,
norm_by_times
=
False
):
def
ctc_layer
(
input
,
label
,
size
=
None
,
name
=
None
,
norm_by_times
=
False
):
"""
"""
Connectionist Temporal Classification (CTC) is designed for temporal
Connectionist Temporal Classification (CTC) is designed for temporal
classication task. That is, for sequence labeling problems where the
classication task. That is, for sequence labeling problems where the
...
@@ -3143,7 +3292,8 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
...
@@ -3143,7 +3292,8 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
More details can be found by referring to `Connectionist Temporal
More details can be found by referring to `Connectionist Temporal
Classification: Labelling Unsegmented Sequence Data with Recurrent
Classification: Labelling Unsegmented Sequence Data with Recurrent
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf>`_
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/
icml2006_GravesFGS06.pdf>`_
Note:
Note:
Considering the 'blank' label needed by CTC, you need to use
Considering the 'blank' label needed by CTC, you need to use
...
@@ -3161,14 +3311,14 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
...
@@ -3161,14 +3311,14 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
size=9055,
size=9055,
norm_by_times=True)
norm_by_times=True)
:param input: The input layer
s
.
:param input: The input layer.
:type input: LayerOutput
:type input: LayerOutput
:param label: The data layer of label with variable length.
:param label: The data layer of label with variable length.
:type label: LayerOutput
:type label: LayerOutput
:param size: category numbers + 1.
:param size: category numbers + 1.
:type size: int
:type size: int
:param name: The name of this layer
, which can not specify.
:param name: The name of this layer
:type name: string|None
:type name:
base
string|None
:param norm_by_times: Whether to normalization by times. False by default.
:param norm_by_times: Whether to normalization by times. False by default.
:type norm_by_times: bool
:type norm_by_times: bool
:return: LayerOutput object.
:return: LayerOutput object.
...
@@ -3176,18 +3326,24 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
...
@@ -3176,18 +3326,24 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
"""
"""
assert
isinstance
(
input
,
LayerOutput
)
assert
isinstance
(
input
,
LayerOutput
)
assert
isinstance
(
label
,
LayerOutput
)
assert
isinstance
(
label
,
LayerOutput
)
if
label
.
size
is
not
None
:
if
size
is
not
None
:
assert
size
==
label
.
size
+
1
else
:
size
=
label
.
size
+
1
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
CTC_LAYER
,
type
=
LayerType
.
CTC_LAYER
,
size
=
size
,
size
=
size
,
norm_by_times
=
norm_by_times
,
norm_by_times
=
norm_by_times
,
inputs
=
[
input
.
name
,
label
.
name
]
inputs
=
[
input
.
name
,
label
.
name
]
)
)
return
LayerOutput
(
name
,
LayerType
.
CTC_LAYER
,
[
input
,
label
],
size
=
size
)
return
LayerOutput
(
name
,
LayerType
.
CTC_LAYER
,
[
input
,
label
],
size
=
size
)
@
wrap_name_default
()
@
wrap_name_default
()
@
wrap_param_attr_default
()
@
wrap_param_attr_default
()
def
crf_layer
(
input
,
label
,
size
,
weight
=
None
,
param_attr
=
None
,
name
=
None
):
def
crf_layer
(
input
,
label
,
size
=
None
,
weight
=
None
,
param_attr
=
None
,
name
=
None
):
"""
"""
A layer for calculating the cost of sequential conditional random
A layer for calculating the cost of sequential conditional random
field model.
field model.
...
@@ -3203,7 +3359,7 @@ def crf_layer(input, label, size, weight=None, param_attr=None, name=None):
...
@@ -3203,7 +3359,7 @@ def crf_layer(input, label, size, weight=None, param_attr=None, name=None):
:param input: The first input layer is the feature.
:param input: The first input layer is the feature.
:type input: LayerOutput
:type input: LayerOutput
:param label: The second input layer is label.
:param label: The second input layer is label.
:type
input
: LayerOutput
:type
label
: LayerOutput
:param size: The category number.
:param size: The category number.
:type size: int
:type size: int
:param weight: The third layer is "weight" of each sample, which is an
:param weight: The third layer is "weight" of each sample, which is an
...
@@ -3219,6 +3375,12 @@ def crf_layer(input, label, size, weight=None, param_attr=None, name=None):
...
@@ -3219,6 +3375,12 @@ def crf_layer(input, label, size, weight=None, param_attr=None, name=None):
assert
isinstance
(
input
,
LayerOutput
)
assert
isinstance
(
input
,
LayerOutput
)
assert
isinstance
(
label
,
LayerOutput
)
assert
isinstance
(
label
,
LayerOutput
)
assert
weight
is
None
or
isinstance
(
weight
,
LayerOutput
)
assert
weight
is
None
or
isinstance
(
weight
,
LayerOutput
)
if
input
.
size
is
not
None
and
label
.
size
is
not
None
:
assert
input
.
size
==
label
.
size
if
size
is
None
:
size
=
input
.
size
else
:
assert
size
==
input
.
size
ipts
=
[
Input
(
input
.
name
,
**
param_attr
.
attr
),
ipts
=
[
Input
(
input
.
name
,
**
param_attr
.
attr
),
Input
(
label
.
name
)]
Input
(
label
.
name
)]
...
@@ -3226,16 +3388,17 @@ def crf_layer(input, label, size, weight=None, param_attr=None, name=None):
...
@@ -3226,16 +3388,17 @@ def crf_layer(input, label, size, weight=None, param_attr=None, name=None):
ipts
.
append
(
Input
(
weight
.
name
))
ipts
.
append
(
Input
(
weight
.
name
))
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
CRF_LAYER
,
type
=
LayerType
.
CRF_LAYER
,
size
=
size
,
size
=
size
,
inputs
=
ipts
,
inputs
=
ipts
,
)
)
parents
=
[
input
,
label
]
parents
=
[
input
,
label
]
if
weight
is
not
None
:
if
weight
is
not
None
:
parents
.
append
(
weight
)
parents
.
append
(
weight
)
return
LayerOutput
(
name
,
LayerType
.
CRF_LAYER
,
parents
,
size
=
size
)
return
LayerOutput
(
name
,
LayerType
.
CRF_LAYER
,
parents
,
size
=
size
)
@
wrap_name_default
()
@
wrap_name_default
()
@
wrap_param_attr_default
()
@
wrap_param_attr_default
()
def
crf_decoding_layer
(
input
,
size
,
label
=
None
,
param_attr
=
None
,
name
=
None
):
def
crf_decoding_layer
(
input
,
size
,
label
=
None
,
param_attr
=
None
,
name
=
None
):
...
@@ -3268,24 +3431,28 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None):
...
@@ -3268,24 +3431,28 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None):
ipts
.
append
(
Input
(
label
.
name
))
ipts
.
append
(
Input
(
label
.
name
))
Layer
(
Layer
(
name
=
name
,
name
=
name
,
type
=
LayerType
.
CRF_DECODING_LAYER
,
type
=
LayerType
.
CRF_DECODING_LAYER
,
size
=
size
,
size
=
size
,
inputs
=
ipts
,
inputs
=
ipts
,
)
)
parents
=
[
input
]
parents
=
[
input
]
if
label
is
not
None
:
if
label
is
not
None
:
parents
.
append
(
label
)
parents
.
append
(
label
)
return
LayerOutput
(
name
,
LayerType
.
CRF_DECODING_LAYER
,
parents
,
size
=
size
)
return
LayerOutput
(
name
,
LayerType
.
CRF_DECODING_LAYER
,
parents
,
size
=
size
)
"""
"""
following are cost Layers.
following are cost Layers.
"""
"""
@
wrap_name_default
()
@
wrap_name_default
()
def
rank_cost
(
left
,
right
,
lab
le
,
weight
=
None
,
name
=
None
,
coeff
=
1.0
):
def
rank_cost
(
left
,
right
,
lab
el
,
weight
=
None
,
name
=
None
,
coeff
=
1.0
):
"""
"""
A cost Layer for learning to rank using gradient descent. Details can refer
A cost Layer for learning to rank using gradient descent. Details can refer
to `papers <http://research.microsoft.com/en-us/um/people/cburges/papers/ICML_ranking.pdf>`_.
to `papers <http://research.microsoft.com/en-us/um/people/cburges/papers/
ICML_ranking.pdf>`_.
This layer contains at least three inputs. The weight is an optional
This layer contains at least three inputs. The weight is an optional
argument, which affects the cost.
argument, which affects the cost.
...
@@ -3342,12 +3509,13 @@ def rank_cost(left, right, lable, weight=None, name=None, coeff=1.0):
...
@@ -3342,12 +3509,13 @@ def rank_cost(left, right, lable, weight=None, name=None, coeff=1.0):
type
=
LayerType
.
RANK_COST
,
type
=
LayerType
.
RANK_COST
,
inputs
=
ipts
,
inputs
=
ipts
,
coeff
=
coeff
,
coeff
=
coeff
,
)
)
return
LayerOutput
(
name
,
LayerType
.
RANK_COST
,
parents
=
parents
)
return
LayerOutput
(
name
,
LayerType
.
RANK_COST
,
parents
=
parents
)
@
wrap_name_default
()
@
wrap_name_default
()
def
lambda_cost
(
input
,
score
,
NDCG_num
=
5
,
max_sort_size
=-
1
,
coeff
=
1.0
):
def
lambda_cost
(
input
,
score
,
name
,
NDCG_num
=
5
,
max_sort_size
=-
1
):
"""
"""
lambdaCost for lambdaRank LTR approach.
lambdaCost for lambdaRank LTR approach.
...
@@ -3360,9 +3528,7 @@ def lambda_cost(input, score, NDCG_num=5, max_sort_size=-1, coeff=1.0):
...
@@ -3360,9 +3528,7 @@ def lambda_cost(input, score, NDCG_num=5, max_sort_size=-1, coeff=1.0):
NDCG_num=8,
NDCG_num=8,
max_sort_size=-1)
max_sort_size=-1)
:param input: The 1st input. Samples of the same query should be loaded
:param input: Samples of the same query should be loaded as sequence.
as sequence. User should provided socres for each sample.
The score should be the 2nd input of this layer.
:type input: LayerOutput
:type input: LayerOutput
:param score: The 2nd input. Score of each sample.
:param score: The 2nd input. Score of each sample.
:type input: LayerOutput
:type input: LayerOutput
...
@@ -3380,21 +3546,22 @@ def lambda_cost(input, score, NDCG_num=5, max_sort_size=-1, coeff=1.0):
...
@@ -3380,21 +3546,22 @@ def lambda_cost(input, score, NDCG_num=5, max_sort_size=-1, coeff=1.0):
:type max_sort_size: int
:type max_sort_size: int
:param name: The name of this layers. It is not necessary.
:param name: The name of this layers. It is not necessary.
:type name: None|basestring
:type name: None|basestring
:param coeff: The coefficient affects the gradient in the backward.
:type coeff: float
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
assert
isinstance
(
input
,
LayerOutput
)
and
isinstance
(
score
,
LayerOutput
)
if
score
.
size
is
not
None
:
assert
score
.
size
==
1
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
LAMBDA_COST
,
type
=
LayerType
.
LAMBDA_COST
,
inputs
=
[
input
.
name
,
score
.
name
],
inputs
=
[
input
.
name
,
score
.
name
],
NDCG_num
=
NDCG_num
,
NDCG_num
=
NDCG_num
,
max_sort_size
=
max_sort_size
,
max_sort_size
=
max_sort_size
coeff
=
coeff
,
)
)
return
LayerOutput
(
name
,
LayerType
.
LAMBDA_COST
,
parents
=
[
input
,
score
])
return
LayerOutput
(
name
,
LayerType
.
LAMBDA_COST
,
parents
=
[
input
,
score
])
@
wrap_name_default
()
@
wrap_name_default
()
def
cross_entropy
(
input
,
label
,
name
=
None
,
coeff
=
1.0
):
def
cross_entropy
(
input
,
label
,
name
=
None
,
coeff
=
1.0
):
"""
"""
...
@@ -3422,9 +3589,10 @@ def cross_entropy(input, label, name=None, coeff=1.0):
...
@@ -3422,9 +3589,10 @@ def cross_entropy(input, label, name=None, coeff=1.0):
type
=
LayerType
.
CROSS_ENTROPY
,
type
=
LayerType
.
CROSS_ENTROPY
,
inputs
=
[
input
.
name
,
label
.
name
],
inputs
=
[
input
.
name
,
label
.
name
],
coeff
=
coeff
,
coeff
=
coeff
,
)
)
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY
,
parents
=
[
input
,
label
])
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY
,
parents
=
[
input
,
label
])
@
wrap_name_default
()
@
wrap_name_default
()
def
cross_entropy_with_selfnorm
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
def
cross_entropy_with_selfnorm
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
softmax_selfnorm_alpha
=
0.1
):
softmax_selfnorm_alpha
=
0.1
):
...
@@ -3455,12 +3623,13 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0,
...
@@ -3455,12 +3623,13 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0,
inputs
=
[
input
.
name
,
label
.
name
],
inputs
=
[
input
.
name
,
label
.
name
],
coeff
=
coeff
,
coeff
=
coeff
,
softmax_selfnorm_alpha
=
softmax_selfnorm_alpha
,
softmax_selfnorm_alpha
=
softmax_selfnorm_alpha
,
)
)
return
LayerOutput
(
name
,
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY_WITH_SELFNORM
,
LayerType
.
CROSS_ENTROPY_WITH_SELFNORM
,
parents
=
[
input
,
label
])
parents
=
[
input
,
label
])
@
wrap_name_default
()
@
wrap_name_default
()
def
huber_cost
(
input
,
label
,
name
=
None
,
coeff
=
1.0
):
def
huber_cost
(
input
,
label
,
name
=
None
,
coeff
=
1.0
):
"""
"""
...
@@ -3474,8 +3643,6 @@ def huber_cost(input, label, name=None, coeff=1.0):
...
@@ -3474,8 +3643,6 @@ def huber_cost(input, label, name=None, coeff=1.0):
:type input: LayerOutput.
:type input: LayerOutput.
:param label: The input label.
:param label: The input label.
:type input: LayerOutput.
:type input: LayerOutput.
:param type: The type of cost.
:type type: basestring.
:param name: The name of this layers. It is not necessary.
:param name: The name of this layers. It is not necessary.
:type name: None|basestring.
:type name: None|basestring.
:param coeff: The coefficient affects the gradient in the backward.
:param coeff: The coefficient affects the gradient in the backward.
...
@@ -3483,14 +3650,17 @@ def huber_cost(input, label, name=None, coeff=1.0):
...
@@ -3483,14 +3650,17 @@ def huber_cost(input, label, name=None, coeff=1.0):
:return: LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput.
:rtype: LayerOutput.
"""
"""
assert
isinstance
(
input
,
LayerOutput
)
if
input
.
size
is
not
None
:
assert
input
.
size
==
1
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
HUBER
,
type
=
LayerType
.
HUBER
,
inputs
=
[
input
.
name
,
label
.
name
],
inputs
=
[
input
.
name
,
label
.
name
],
coeff
=
coeff
,
coeff
=
coeff
,
)
)
return
LayerOutput
(
name
,
LayerType
.
HUBER
,
parents
=
[
input
,
label
])
return
LayerOutput
(
name
,
LayerType
.
HUBER
,
parents
=
[
input
,
label
])
@
wrap_name_default
()
@
wrap_name_default
()
def
multi_binary_label_cross_entropy
(
input
,
label
,
name
=
None
,
coeff
=
1.0
):
def
multi_binary_label_cross_entropy
(
input
,
label
,
name
=
None
,
coeff
=
1.0
):
"""
"""
...
@@ -3514,15 +3684,16 @@ def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0):
...
@@ -3514,15 +3684,16 @@ def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0):
:rtype: LayerOutput
:rtype: LayerOutput
"""
"""
if
not
isinstance
(
input
.
act
,
SigmoidActivation
):
if
input
.
activation
is
None
or
\
not
isinstance
(
input
.
activation
,
SigmoidActivation
):
logger
.
log
(
logging
.
WARN
,
logger
.
log
(
logging
.
WARN
,
"%s is not recommend for batch normalization's activation, "
"%s is not recommend for batch normalization's activation, "
"maybe the relu is better"
%
act
.
name
)
"maybe the relu is better"
%
repr
(
input
.
activation
)
)
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
MULTI_BIN_LABEL_CROSS_ENTROPY
,
type
=
LayerType
.
MULTI_BIN_LABEL_CROSS_ENTROPY
,
inputs
=
[
input
.
name
,
label
.
name
],
inputs
=
[
input
.
name
,
label
.
name
],
coeff
=
coeff
,
coeff
=
coeff
,
)
)
return
LayerOutput
(
name
,
LayerType
.
MULTI_BIN_LABEL_CROSS_ENTROPY
,
return
LayerOutput
(
name
,
LayerType
.
MULTI_BIN_LABEL_CROSS_ENTROPY
,
parents
=
[
input
,
label
])
parents
=
[
input
,
label
])
python/paddle/trainer_config_helpers/networks.py
浏览文件 @
23e47bb6
...
@@ -616,7 +616,7 @@ def lstmemory_group(input, size=None, name=None,
...
@@ -616,7 +616,7 @@ def lstmemory_group(input, size=None, name=None,
cell states, or hidden states in every time step are accessible to for the
cell states, or hidden states in every time step are accessible to for the
user. This is especially useful in attention model. If you do not need to
user. This is especially useful in attention model. If you do not need to
access to the internal states of the lstm, but merely use its outputs,
access to the internal states of the lstm, but merely use its outputs,
it is recomm
a
nded to use the lstmemory, which is relatively faster than
it is recomm
e
nded to use the lstmemory, which is relatively faster than
lstmemory_group.
lstmemory_group.
NOTE: In PaddlePaddle's implementation, the following input-to-hidden
NOTE: In PaddlePaddle's implementation, the following input-to-hidden
...
@@ -1052,7 +1052,7 @@ def dropout_layer(input, dropout_rate, name=None):
...
@@ -1052,7 +1052,7 @@ def dropout_layer(input, dropout_rate, name=None):
layer_attr
=
ExtraAttr
(
drop_rate
=
dropout_rate
))
layer_attr
=
ExtraAttr
(
drop_rate
=
dropout_rate
))
def
outputs
(
layers
):
def
outputs
(
layers
,
*
args
):
"""
"""
Declare the end of network. Currently it will only calculate the
Declare the end of network. Currently it will only calculate the
input/output order of network. It will calculate the predict network or
input/output order of network. It will calculate the predict network or
...
@@ -1089,9 +1089,12 @@ def outputs(layers):
...
@@ -1089,9 +1089,12 @@ def outputs(layers):
if
isinstance
(
layers
,
LayerOutput
):
if
isinstance
(
layers
,
LayerOutput
):
layers
=
[
layers
]
layers
=
[
layers
]
if
len
(
args
)
!=
0
:
layers
.
extend
(
args
)
assert
len
(
layers
)
>
0
assert
len
(
layers
)
>
0
if
len
(
layers
)
!=
1
:
if
len
(
layers
)
!=
1
:
logger
.
warning
(
"
EndOfNetwork
routine try to calculate network's"
logger
.
warning
(
"
`outputs`
routine try to calculate network's"
" inputs and outputs order. It might not work well."
" inputs and outputs order. It might not work well."
"Please see follow log carefully."
)
"Please see follow log carefully."
)
inputs
=
[]
inputs
=
[]
...
...
python/paddle/trainer_config_helpers/poolings.py
浏览文件 @
23e47bb6
...
@@ -47,9 +47,14 @@ class MaxPooling(BasePoolingType):
...
@@ -47,9 +47,14 @@ class MaxPooling(BasePoolingType):
.. math::
.. math::
max(samples
\\
_of
\\
_a
\\
_sequence)
max(samples
\\
_of
\\
_a
\\
_sequence)
:param output_max_index: True if output sequence max index instead of max
value. None means use default value in proto.
:type output_max_index: bool|None
"""
"""
def
__init__
(
self
):
def
__init__
(
self
,
output_max_index
=
None
):
BasePoolingType
.
__init__
(
self
,
"max"
)
BasePoolingType
.
__init__
(
self
,
"max"
)
self
.
output_max_index
=
output_max_index
class
AvgPooling
(
BasePoolingType
):
class
AvgPooling
(
BasePoolingType
):
...
...
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
浏览文件 @
23e47bb6
...
@@ -3,3 +3,8 @@ add_test(NAME layers_test
...
@@ -3,3 +3,8 @@ add_test(NAME layers_test
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
python
${
PROJ_ROOT
}
/python/paddle/trainer_config_helpers/tests/layers_test.py
python
${
PROJ_ROOT
}
/python/paddle/trainer_config_helpers/tests/layers_test.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
add_test
(
NAME test_layerHelpers
COMMAND
${
PROJ_ROOT
}
/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
)
python/paddle/trainer_config_helpers/tests/configs/.gitignore
0 → 100644
浏览文件 @
23e47bb6
*protostr
python/paddle/trainer_config_helpers/tests/configs/check.md5
0 → 100644
浏览文件 @
23e47bb6
7e6919d17562516e9a1d9a88de1fb3b9 img_layers.protostr
a5d9259ff1fd7ca23d0ef090052cb1f2 last_first_seq.protostr
9c038249ec8ff719753a746cdb04c026 layer_activations.protostr
5913f87b39cee3b2701fa158270aca26 projections.protostr
6b39e34beea8dfb782bee9bd3dea9eb5 simple_rnn_layers.protostr
0fc1409600f1a3301da994ab9d28b0bf test_cost_layers.protostr
144bc6d3a509de74115fa623741797ed test_expand_layer.protostr
2378518bdb71e8c6e888b1842923df58 test_fc.protostr
8bb44e1e5072d0c261572307e7672bda test_grumemory_layer.protostr
1f3510672dce7a9ed25317fc58579ac7 test_hsigmoid.protostr
d350bd91a0dc13e854b1364c3d9339c6 test_lstmemory_layer.protostr
251a948ba41c1071afcd3d9cf9c233f7 test_ntm_layers.protostr
e6ff04e70aea27c7b06d808cc49c9497 test_print_layer.protostr
2a75dd33b640c49a8821c2da6e574577 test_rnn_group.protostr
67d6fde3afb54f389d0ce4ff14726fe1 test_sequence_pooling.protostr
f586a548ef4350ba1ed47a81859a64cb unused_layers.protostr
8122477f4f65244580cec09edc590041 util_layers.protostr
python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
0 → 100755
浏览文件 @
23e47bb6
#!/bin/bash
set
-e
cd
`
dirname
$0
`
export
PYTHONPATH
=
$PWD
/../../../../
configs
=(
test_fc layer_activations projections test_print_layer
test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group
)
for
conf
in
${
configs
[*]
}
do
echo
"Generating "
$conf
python
-m
paddle.utils.dump_config
$conf
.py
>
$conf
.protostr
done
python/paddle/trainer_config_helpers/tests/configs/img_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-3
,
batch_size
=
1000
)
img
=
data_layer
(
name
=
'image'
,
size
=
256
*
256
)
img_conv
=
img_conv_layer
(
input
=
img
,
num_channels
=
1
,
num_filters
=
64
,
filter_size
=
(
32
,
64
),
padding
=
(
1
,
0
),
stride
=
(
1
,
1
),
act
=
LinearActivation
())
img_bn
=
batch_norm_layer
(
input
=
img_conv
,
act
=
ReluActivation
())
img_norm
=
img_cmrnorm_layer
(
input
=
img_bn
,
size
=
32
)
img_pool
=
img_pool_layer
(
input
=
img_conv
,
pool_size
=
32
,
pool_type
=
MaxPooling
())
outputs
(
img_pool
,
img_norm
)
\ No newline at end of file
python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
30
)
seq_op
=
[
first_seq
,
last_seq
]
agg_level
=
[
AggregateLevel
.
EACH_SEQUENCE
,
AggregateLevel
.
EACH_TIMESTEP
]
opts
=
[]
for
op
in
seq_op
:
for
al
in
agg_level
:
opts
.
append
(
op
(
input
=
din
,
agg_level
=
al
))
outputs
(
opts
)
\ No newline at end of file
python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
0 → 100644
浏览文件 @
23e47bb6
'''
Test all activations.
'''
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'input'
,
size
=
100
)
acts
=
[
TanhActivation
,
SigmoidActivation
,
SoftmaxActivation
,
IdentityActivation
,
LinearActivation
,
ExpActivation
,
ReluActivation
,
BReluActivation
,
SoftReluActivation
,
STanhActivation
,
AbsActivation
,
SquareActivation
]
outputs
(
[
fc_layer
(
input
=
din
,
size
=
100
,
act
=
act
(),
name
=
"layer_%d"
%
i
)
for
i
,
act
in
enumerate
(
acts
)])
python/paddle/trainer_config_helpers/tests/configs/projections.py
0 → 100644
浏览文件 @
23e47bb6
'''
Test mixed layer, projections and operators.
'''
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
din
=
data_layer
(
name
=
'test'
,
size
=
100
)
din
=
embedding_layer
(
input
=
din
,
size
=
256
)
with
mixed_layer
(
size
=
100
)
as
m1
:
m1
+=
full_matrix_projection
(
input
=
din
)
with
mixed_layer
(
size
=
100
)
as
m2
:
m2
+=
table_projection
(
input
=
m1
)
with
mixed_layer
(
size
=
100
)
as
m3
:
m3
+=
identity_projection
(
input
=
m2
)
with
mixed_layer
(
size
=
100
)
as
m4
:
m4
+=
dotmul_projection
(
input
=
m3
)
with
mixed_layer
()
as
m5
:
m5
+=
context_projection
(
input
=
m4
,
context_len
=
3
)
with
mixed_layer
()
as
m6
:
m6
+=
dotmul_operator
(
a
=
m3
,
b
=
m4
)
img
=
data_layer
(
name
=
'img'
,
size
=
32
*
32
)
flt
=
data_layer
(
name
=
'filter'
,
size
=
3
*
3
*
1
*
64
)
with
mixed_layer
()
as
m7
:
m7
+=
conv_operator
(
img
=
img
,
filter
=
flt
,
num_filters
=
64
,
num_channel
=
1
,
filter_size
=
3
)
end
=
mixed_layer
(
input
=
[
full_matrix_projection
(
input
=
m5
),
trans_full_matrix_projection
(
input
=
m6
),
full_matrix_projection
(
input
=
m7
)],
size
=
100
,
layer_attr
=
ExtraAttr
(
drop_rate
=
0.5
,
error_clipping_threshold
=
40
))
outputs
(
end
)
python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
0 → 100755
浏览文件 @
23e47bb6
#!/bin/bash
cd
`
dirname
$0
`
set
-e
./generate_protostr.sh
md5sum
-c
check.md5
python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
din
=
data_layer
(
name
=
'data'
,
size
=
200
)
hidden
=
fc_layer
(
input
=
din
,
size
=
200
,
act
=
SigmoidActivation
())
rnn
=
recurrent_layer
(
input
=
hidden
,
act
=
SigmoidActivation
())
rnn2
=
recurrent_layer
(
input
=
hidden
,
act
=
SigmoidActivation
(),
reverse
=
True
)
lstm1_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
4
,
act
=
LinearActivation
(),
bias_attr
=
False
)
lstm1
=
lstmemory
(
input
=
lstm1_param
,
act
=
SigmoidActivation
())
lstm2_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
4
,
act
=
LinearActivation
(),
bias_attr
=
False
)
lstm2
=
lstmemory
(
input
=
lstm2_param
,
act
=
SigmoidActivation
(),
reverse
=
True
)
gru1_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
3
,
act
=
LinearActivation
(),
bias_attr
=
False
)
gru1
=
grumemory
(
input
=
gru1_param
,
act
=
SigmoidActivation
())
gru2_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
3
,
act
=
LinearActivation
(),
bias_attr
=
False
)
gru2
=
grumemory
(
input
=
gru2_param
,
act
=
SigmoidActivation
(),
reverse
=
True
)
outputs
(
last_seq
(
input
=
rnn
),
first_seq
(
input
=
rnn2
),
last_seq
(
input
=
lstm1
),
first_seq
(
input
=
lstm2
),
last_seq
(
input
=
gru1
),
first_seq
(
gru2
))
python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
seq_in
=
data_layer
(
name
=
'input'
,
size
=
200
)
labels
=
data_layer
(
name
=
'labels'
,
size
=
5000
)
probs
=
data_layer
(
name
=
'probs'
,
size
=
10
)
xe_label
=
data_layer
(
name
=
'xe-label'
,
size
=
10
)
outputs
(
ctc_layer
(
input
=
seq_in
,
label
=
labels
),
crf_layer
(
input
=
fc_layer
(
input
=
seq_in
,
size
=
4
),
label
=
data_layer
(
name
=
'crf_label'
,
size
=
4
)),
rank_cost
(
left
=
data_layer
(
name
=
'left'
,
size
=
1
),
right
=
data_layer
(
name
=
'right'
,
size
=
1
),
label
=
data_layer
(
name
=
'label'
,
size
=
1
)),
lambda_cost
(
input
=
data_layer
(
name
=
'list_feature'
,
size
=
100
),
score
=
data_layer
(
name
=
'list_scores'
,
size
=
1
)),
cross_entropy
(
input
=
probs
,
label
=
xe_label
),
cross_entropy_with_selfnorm
(
input
=
probs
,
label
=
xe_label
),
huber_cost
(
input
=
data_layer
(
name
=
'huber_probs'
,
size
=
1
),
label
=
data_layer
(
name
=
'huber_label'
,
size
=
1
)),
multi_binary_label_cross_entropy
(
input
=
probs
,
label
=
xe_label
))
python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
30
)
data_seq
=
data_layer
(
name
=
'data_seq'
,
size
=
30
)
outputs
(
expand_layer
(
input
=
din
,
expand_as
=
data_seq
,
expand_level
=
ExpandLevel
.
FROM_SEQUENCE
),
expand_layer
(
input
=
din
,
expand_as
=
data_seq
,
expand_level
=
ExpandLevel
.
FROM_TIMESTEP
))
python/paddle/trainer_config_helpers/tests/configs/test_fc.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
100
)
trans
=
trans_layer
(
input
=
din
)
hidden
=
fc_layer
(
input
=
trans
,
size
=
100
,
bias_attr
=
False
)
mask
=
data_layer
(
name
=
'mask'
,
size
=
100
)
hidden_sel
=
selective_fc_layer
(
input
=
din
,
select
=
mask
,
size
=
100
,
act
=
SigmoidActivation
())
outputs
(
hidden
,
hidden_sel
)
python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
din
=
data_layer
(
name
=
'data'
,
size
=
120
)
outputs
(
grumemory
(
input
=
din
,
size
=
40
,
reverse
=
True
,
gate_act
=
TanhActivation
(),
act
=
SigmoidActivation
()))
python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'data'
,
size
=
100
)
label
=
data_layer
(
name
=
'label'
,
size
=
10
)
outputs
(
hsigmoid
(
input
=
din
,
label
=
label
,
num_classes
=
10
))
\ No newline at end of file
python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
128
)
outputs
(
lstmemory
(
input
=
din
,
reverse
=
True
,
gate_act
=
TanhActivation
(),
act
=
TanhActivation
(),
size
=
32
))
python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
weight
=
data_layer
(
name
=
'w'
,
size
=
1
)
a
=
data_layer
(
name
=
'a'
,
size
=
100
)
b
=
data_layer
(
name
=
'b'
,
size
=
100
)
c
=
data_layer
(
name
=
'c'
,
size
=
200
)
d
=
data_layer
(
name
=
'd'
,
size
=
31
)
outputs
(
interpolation_layer
(
input
=
[
a
,
b
],
weight
=
weight
),
power_layer
(
input
=
a
,
weight
=
weight
),
scaling_layer
(
input
=
a
,
weight
=
weight
),
cos_sim
(
a
=
a
,
b
=
b
),
cos_sim
(
a
=
a
,
b
=
c
,
size
=
2
),
sum_to_one_norm_layer
(
input
=
a
),
conv_shift_layer
(
a
=
a
,
b
=
d
),
tensor_layer
(
a
=
a
,
b
=
b
,
size
=
1000
),
slope_intercept_layer
(
input
=
a
,
slope
=
0.7
,
intercept
=
0.9
),
linear_comb_layer
(
weights
=
b
,
vectors
=
c
))
python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'input'
,
size
=
100
)
print_layer
(
input
=
din
)
outputs
(
din
)
python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
seq
=
data_layer
(
name
=
'seq_input'
,
size
=
100
)
sub_seq
=
data_layer
(
name
=
'sub_seq_input'
,
size
=
100
)
lbl
=
data_layer
(
name
=
'label'
,
size
=
1
)
def
generate_rnn_simple
(
name
):
def
rnn_simple
(
s
):
m
=
memory
(
name
=
name
,
size
=
200
)
fc
=
fc_layer
(
input
=
[
s
,
m
],
size
=
200
,
name
=
name
)
return
fc
return
rnn_simple
with
mixed_layer
()
as
lstm_param
:
# test lstm unit, rnn group
lstm_param
+=
full_matrix_projection
(
input
=
seq
,
size
=
100
*
4
)
with
mixed_layer
()
as
gru_param
:
gru_param
+=
full_matrix_projection
(
input
=
seq
,
size
=
100
*
3
)
outputs
(
last_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_forward'
),
input
=
seq
)),
first_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_back'
),
input
=
seq
,
reverse
=
True
)),
last_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_subseq_forward'
),
input
=
SubsequenceInput
(
input
=
sub_seq
))),
last_seq
(
input
=
lstmemory_group
(
input
=
lstm_param
,
size
=
100
)),
last_seq
(
input
=
gru_group
(
input
=
gru_param
,
size
=
100
)))
python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'dat_in'
,
size
=
100
)
POOL_TYPE
=
[
MaxPooling
,
AvgPooling
,
SumPooling
]
AGG_LEVEL
=
[
AggregateLevel
.
EACH_SEQUENCE
,
AggregateLevel
.
EACH_TIMESTEP
]
opts
=
[]
for
pt
in
POOL_TYPE
:
for
al
in
AGG_LEVEL
:
opts
.
append
(
pooling_layer
(
input
=
din
,
agg_level
=
al
,
pooling_type
=
pt
()))
opts
.
append
(
pooling_layer
(
input
=
din
,
pooling_type
=
MaxPooling
(
output_max_index
=
True
)))
outputs
(
opts
)
python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
probs
=
data_layer
(
name
=
'probs'
,
size
=
100
)
outputs
(
sampling_id_layer
(
input
=
probs
),
# It seems not support training
# It seems this layer is not correct, and should be rewrite.
# block_expand_layer(input=probs, channel=1, block_x=1, block_y=3),
)
\ No newline at end of file
python/paddle/trainer_config_helpers/tests/configs/util_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
a
=
data_layer
(
name
=
'a'
,
size
=
10
)
b
=
data_layer
(
name
=
'b'
,
size
=
10
)
result
=
addto_layer
(
input
=
[
a
,
b
])
concat1
=
concat_layer
(
input
=
[
a
,
b
])
concat2
=
concat_layer
(
input
=
[
identity_projection
(
input
=
a
),
identity_projection
(
input
=
b
)
])
outputs
(
result
,
concat1
,
concat2
)
\ No newline at end of file
python/paddle/trainer_config_helpers/tests/layers_test_config.py
浏览文件 @
23e47bb6
...
@@ -23,6 +23,15 @@ z = out_prod_layer(input1=x, input2=y)
...
@@ -23,6 +23,15 @@ z = out_prod_layer(input1=x, input2=y)
x1
=
fc_layer
(
input
=
x
,
size
=
5
)
x1
=
fc_layer
(
input
=
x
,
size
=
5
)
y1
=
fc_layer
(
input
=
y
,
size
=
5
)
y1
=
fc_layer
(
input
=
y
,
size
=
5
)
z1
=
mixed_layer
(
act
=
LinearActivation
(),
input
=
[
conv_operator
(
img
=
x1
,
filter
=
y1
,
filter_size
=
1
,
num_filters
=
5
,
num_channel
=
5
,
stride
=
1
)])
y2
=
fc_layer
(
input
=
y
,
size
=
15
)
y2
=
fc_layer
(
input
=
y
,
size
=
15
)
cos1
=
cos_sim
(
a
=
x1
,
b
=
y1
)
cos1
=
cos_sim
(
a
=
x1
,
b
=
y1
)
...
@@ -30,7 +39,7 @@ cos3 = cos_sim(a=x1, b=y2, size=3)
...
@@ -30,7 +39,7 @@ cos3 = cos_sim(a=x1, b=y2, size=3)
linear_comb
=
linear_comb_layer
(
weights
=
x1
,
vectors
=
y2
,
size
=
3
)
linear_comb
=
linear_comb_layer
(
weights
=
x1
,
vectors
=
y2
,
size
=
3
)
out
=
fc_layer
(
input
=
[
cos1
,
cos3
,
linear_comb
,
z
],
out
=
fc_layer
(
input
=
[
cos1
,
cos3
,
linear_comb
,
z
,
z1
],
size
=
num_classes
,
size
=
num_classes
,
act
=
SoftmaxActivation
())
act
=
SoftmaxActivation
())
...
@@ -38,11 +47,21 @@ print_layer(input=[out])
...
@@ -38,11 +47,21 @@ print_layer(input=[out])
outputs
(
classification_cost
(
out
,
data_layer
(
name
=
"label"
,
size
=
num_classes
)))
outputs
(
classification_cost
(
out
,
data_layer
(
name
=
"label"
,
size
=
num_classes
)))
dotmul
=
mixed_layer
(
input
=
[
dotmul_operator
(
x
=
x1
,
y
=
y1
),
dotmul
=
mixed_layer
(
input
=
[
dotmul_operator
(
a
=
x1
,
b
=
x1
),
dotmul_projection
(
input
=
y1
)])
dotmul_projection
(
input
=
y1
)])
proj_with_attr_init
=
mixed_layer
(
input
=
full_matrix_projection
(
input
=
y1
,
param_attr
=
ParamAttr
(
learning_rate
=
0
,
initial_mean
=
0
,
initial_std
=
0
)),
bias_attr
=
ParamAttr
(
initial_mean
=
0
,
initial_std
=
0
,
learning_rate
=
0
),
act
=
LinearActivation
(),
size
=
5
,
name
=
'proj_with_attr_init'
)
# for ctc
# for ctc
tmp
=
fc_layer
(
input
=
[
x1
,
dotmul
],
tmp
=
fc_layer
(
input
=
[
x1
,
dotmul
,
proj_with_attr_init
],
size
=
num_classes
+
1
,
size
=
num_classes
+
1
,
act
=
SoftmaxActivation
())
act
=
SoftmaxActivation
())
ctc
=
ctc_layer
(
input
=
tmp
,
ctc
=
ctc_layer
(
input
=
tmp
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录