Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
23e47bb6
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
23e47bb6
编写于
9月 28, 2016
作者:
L
liaogang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'upstream/master'
上级
a8df4111
d130d181
变更
44
展开全部
显示空白变更内容
内联
并排
Showing
44 changed file
with
1215 addition
and
444 deletion
+1215
-444
doc_cn/demo/quick_start/index.md
doc_cn/demo/quick_start/index.md
+1
-1
paddle/cuda/src/hl_cuda_cublas.cc
paddle/cuda/src/hl_cuda_cublas.cc
+2
-2
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+25
-16
paddle/gserver/layers/AgentLayer.cpp
paddle/gserver/layers/AgentLayer.cpp
+13
-13
paddle/gserver/layers/CRFLayer.h
paddle/gserver/layers/CRFLayer.h
+1
-1
paddle/gserver/layers/LinearChainCRF.h
paddle/gserver/layers/LinearChainCRF.h
+24
-24
paddle/gserver/tests/CMakeLists.txt
paddle/gserver/tests/CMakeLists.txt
+0
-1
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
+77
-0
paddle/gserver/tests/sequence_rnn_multi_input.conf
paddle/gserver/tests/sequence_rnn_multi_input.conf
+58
-0
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
+22
-7
paddle/parameter/Argument.cpp
paddle/parameter/Argument.cpp
+9
-4
paddle/parameter/Argument.h
paddle/parameter/Argument.h
+2
-2
paddle/trainer/ThreadParameterUpdater.cpp
paddle/trainer/ThreadParameterUpdater.cpp
+1
-1
paddle/trainer/TrainerInternal.cpp
paddle/trainer/TrainerInternal.cpp
+1
-0
paddle/trainer/tests/sample_trainer_config_parallel.conf
paddle/trainer/tests/sample_trainer_config_parallel.conf
+44
-107
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+7
-5
python/paddle/trainer_config_helpers/activations.py
python/paddle/trainer_config_helpers/activations.py
+7
-2
python/paddle/trainer_config_helpers/attrs.py
python/paddle/trainer_config_helpers/attrs.py
+56
-9
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+412
-241
python/paddle/trainer_config_helpers/networks.py
python/paddle/trainer_config_helpers/networks.py
+6
-3
python/paddle/trainer_config_helpers/poolings.py
python/paddle/trainer_config_helpers/poolings.py
+6
-1
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
+5
-0
python/paddle/trainer_config_helpers/tests/configs/.gitignore
...on/paddle/trainer_config_helpers/tests/configs/.gitignore
+1
-0
python/paddle/trainer_config_helpers/tests/configs/check.md5
python/paddle/trainer_config_helpers/tests/configs/check.md5
+17
-0
python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
...trainer_config_helpers/tests/configs/generate_protostr.sh
+18
-0
python/paddle/trainer_config_helpers/tests/configs/img_layers.py
...paddle/trainer_config_helpers/tests/configs/img_layers.py
+20
-0
python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
...le/trainer_config_helpers/tests/configs/last_first_seq.py
+26
-0
python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
...trainer_config_helpers/tests/configs/layer_activations.py
+21
-0
python/paddle/trainer_config_helpers/tests/configs/projections.py
...addle/trainer_config_helpers/tests/configs/projections.py
+47
-0
python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
.../paddle/trainer_config_helpers/tests/configs/run_tests.sh
+5
-0
python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
...trainer_config_helpers/tests/configs/simple_rnn_layers.py
+36
-0
python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
.../trainer_config_helpers/tests/configs/test_cost_layers.py
+26
-0
python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
...trainer_config_helpers/tests/configs/test_expand_layer.py
+14
-0
python/paddle/trainer_config_helpers/tests/configs/test_fc.py
...on/paddle/trainer_config_helpers/tests/configs/test_fc.py
+20
-0
python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
...iner_config_helpers/tests/configs/test_grumemory_layer.py
+11
-0
python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
...dle/trainer_config_helpers/tests/configs/test_hsigmoid.py
+11
-0
python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
...iner_config_helpers/tests/configs/test_lstmemory_layer.py
+11
-0
python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
...e/trainer_config_helpers/tests/configs/test_ntm_layers.py
+23
-0
python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
.../trainer_config_helpers/tests/configs/test_print_layer.py
+12
-0
python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
...le/trainer_config_helpers/tests/configs/test_rnn_group.py
+35
-0
python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
...ner_config_helpers/tests/configs/test_sequence_pooling.py
+30
-0
python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
...dle/trainer_config_helpers/tests/configs/unused_layers.py
+14
-0
python/paddle/trainer_config_helpers/tests/configs/util_layers.py
...addle/trainer_config_helpers/tests/configs/util_layers.py
+15
-0
python/paddle/trainer_config_helpers/tests/layers_test_config.py
...paddle/trainer_config_helpers/tests/layers_test_config.py
+23
-4
未找到文件。
doc_cn/demo/quick_start/index.md
浏览文件 @
23e47bb6
...
...
@@ -4,7 +4,7 @@
## 安装(Install)
首先请参考
<a
href =
"../../build_and_install/in
stall/in
dex.html"
>
安装教程
</a>
安装PaddlePaddle。
首先请参考
<a
href =
"../../build_and_install/index.html"
>
安装教程
</a>
安装PaddlePaddle。
## 使用概述(Overview)
...
...
paddle/cuda/src/hl_cuda_cublas.cc
浏览文件 @
23e47bb6
...
...
@@ -217,7 +217,7 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
}
else
{
LOG
(
FATAL
)
<<
"parameter transa error!"
;
}
CHECK_EQ
(
stat
,
CUBLAS_STATUS_SUCCESS
);
CHECK_EQ
(
stat
,
CUBLAS_STATUS_SUCCESS
)
<<
hl_cublas_get_error_string
(
stat
)
;
CHECK_SYNC
(
"hl_matrix_mul failed"
);
}
...
...
@@ -266,7 +266,7 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
LOG
(
FATAL
)
<<
"parameter transa error!"
;
}
CHECK_EQ
(
stat
,
CUBLAS_STATUS_SUCCESS
);
CHECK_EQ
(
stat
,
CUBLAS_STATUS_SUCCESS
)
<<
hl_cublas_get_error_string
(
stat
)
;
CHECK_SYNC
(
"hl_matrix_mul_vector"
);
}
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
23e47bb6
...
...
@@ -497,20 +497,21 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
int
idSize
=
0
;
// connect in_links
for
(
size_t
j
=
0
;
j
<
inFrameLines_
.
size
();
++
j
)
{
Info
&
info
=
info_
[
shareInlinkInfo
?
0
:
j
];
// idSize denotes the sum number of tokens in each length i
idSize
=
info
_
[
j
].
idIndex
[
i
+
1
]
-
info_
[
j
]
.
idIndex
[
i
];
idSize
=
info
.
idIndex
[
i
+
1
]
-
info
.
idIndex
[
i
];
InFrameLine
inFrameLine
=
inFrameLines_
[
j
];
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
inFrameLine
.
agents
[
i
].
get
());
scatterAgent
->
setRealLayerAndOutput
(
inFrameLine
.
inLayer
,
inFrameLine
.
outArg
,
info
_
[
j
]
.
allIds
,
info
_
[
j
]
.
idIndex
[
i
],
idSize
);
inFrameLine
.
outArg
,
info
.
allIds
,
info
.
idIndex
[
i
],
idSize
);
if
(
hasSubseq
)
{
// size: the length of subsequence
int
size
=
info
_
[
j
].
seqStartPosIndex
[
i
+
1
]
-
info_
[
j
]
.
seqStartPosIndex
[
i
];
scatterAgent
->
setSequenceStartPositions
(
info
_
[
j
]
.
sequenceStartPositions
,
info
_
[
j
]
.
seqStartPosIndex
[
i
],
info
.
seqStartPosIndex
[
i
+
1
]
-
info
.
seqStartPosIndex
[
i
];
scatterAgent
->
setSequenceStartPositions
(
info
.
sequenceStartPositions
,
info
.
seqStartPosIndex
[
i
],
size
);
}
}
...
...
@@ -744,10 +745,13 @@ void RecurrentGradientMachine::selectRowsOneTime(LayerPtr layer,
const
IVectorPtr
&
allIds
,
Argument
*
arg
,
PassType
passType
)
{
const
MatrixPtr
&
realV
=
layer
->
getOutputValue
();
Argument
&
src
=
layer
->
getOutput
();
if
(
src
.
value
)
{
const
MatrixPtr
&
realV
=
src
.
value
;
int
height
=
realV
->
getHeight
();
int
width
=
realV
->
getWidth
();
Matrix
::
resizeOrCreate
(
arg
->
value
,
height
,
width
,
/* trans */
false
,
useGpu_
);
Matrix
::
resizeOrCreate
(
arg
->
value
,
height
,
width
,
/* trans */
false
,
useGpu_
);
arg
->
value
->
zeroMem
();
arg
->
value
->
selectRows
(
*
realV
,
*
allIds
);
if
(
passType
!=
PASS_TEST
)
{
...
...
@@ -755,6 +759,11 @@ void RecurrentGradientMachine::selectRowsOneTime(LayerPtr layer,
useGpu_
);
arg
->
grad
->
zeroMem
();
}
}
if
(
src
.
ids
)
{
IVector
::
resizeOrCreate
(
arg
->
ids
,
src
.
ids
->
getSize
(),
useGpu_
);
arg
->
ids
->
selectFrom
(
*
src
.
ids
,
*
allIds
);
}
}
void
RecurrentGradientMachine
::
createSeqPos
(
...
...
paddle/gserver/layers/AgentLayer.cpp
浏览文件 @
23e47bb6
...
...
@@ -139,15 +139,16 @@ void ScatterAgentLayer::forward(PassType passType) {
Layer
::
forward
(
passType
);
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
if
(
realLayer_
->
getOutput
().
ids
)
{
// ids scatter
IVector
::
resizeOrCreate
(
output_
.
ids
,
ids_
->
getSize
(),
useGpu_
);
output_
.
ids
->
selectFrom
(
*
realLayer_
->
getOutput
().
ids
,
*
ids_
);
}
else
{
// value scatter
int
width
=
this
->
getSize
();
if
(
realOutArg_
.
value
)
{
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
*
width
,
idSize_
,
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
);
}
else
{
// used in generation
if
(
realLayer_
->
getOutput
().
ids
)
{
IVector
::
resizeOrCreate
(
output_
.
ids
,
ids_
->
getSize
(),
useGpu_
);
output_
.
ids
->
selectFrom
(
*
realLayer_
->
getOutput
().
ids
,
*
ids_
);
}
if
(
realLayer_
->
getOutput
().
value
)
{
int
height
=
ids_
->
getSize
();
resetOutput
(
height
,
width
);
...
...
@@ -213,18 +214,17 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
void
SequenceScatterAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
CHECK
(
!
realLayer_
->
getOutput
().
ids
)
<<
"Not supported"
;
const
Argument
&
input
=
realLayer_
->
getOutput
();
CHECK_EQ
(
input
.
value
->
getWidth
(),
this
->
getSize
());
CHECK_EQ
(
realLayer_
->
getSize
(),
this
->
getSize
());
int
width
=
this
->
getSize
();
AsyncGpuBlock
asyncGpuBlock
;
REGISTER_TIMER_INFO
(
"SequenceAgentLayerForward"
,
getName
().
c_str
());
if
(
realOutArg_
.
value
)
{
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
CHECK
(
realOutArg_
.
sequenceStartPositions
);
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
*
width
,
idSize_
,
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
seqStartPosIndex_
,
/* seqSize */
numSequences_
);
...
...
paddle/gserver/layers/CRFLayer.h
浏览文件 @
23e47bb6
...
...
@@ -25,7 +25,7 @@ namespace paddle {
/**
* A layer for calculating the cost of sequential conditional random field
* model.
* See
LinearChainCRF.h
for the detail of the CRF formulation.
* See
class LinearChainCRF
for the detail of the CRF formulation.
*/
class
CRFLayer
:
public
Layer
{
public:
...
...
paddle/gserver/layers/LinearChainCRF.h
浏览文件 @
23e47bb6
...
...
@@ -21,39 +21,39 @@ namespace paddle {
class
LinearChainCRF
{
public:
/*
The size of para and grad must be (numClasses + 2) * numClasses
.
The first numClasses values of para are for starting weights (a
).
The next numClasses values of para are for ending weights (b
),
The remaning values are for transition weights (w
).
The probability of a state sequence s of length L
is defined as:
P(s) = (1/Z) exp(a_{s_1} + b_{s_L}
+ \sum_{l=1}^L x_{s_l}
+ \sum_{l=2}^L w_{s_{l-1},s_l})
where Z is a normalization value so that the sum of P(s)
over all possible
sequences is 1, and x
is the input feature to the CRF.
/*
*
* The size of para and grad must be \f$(numClasses + 2) * numClasses\f$
.
* The first numClasses values of para are for starting weights (\f$a\f$
).
* The next numClasses values of para are for ending weights (\f$b\f$
),
* The remaning values are for transition weights (\f$w\f$
).
*
* The probability of a state sequence s of length \f$L\f$
is defined as:
* \f$
P(s) = (1/Z) exp(a_{s_1} + b_{s_L}
*
+ \sum_{l=1}^L x_{s_l}
* + \sum_{l=2}^L w_{s_{l-1},s_l})\f$
* where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$
over all possible
* sequences is \f$1\f$, and \f$x\f$
is the input feature to the CRF.
*/
LinearChainCRF
(
int
numClasses
,
real
*
para
,
real
*
grad
);
/*
Calculate the negative log likelihood of s given x.
The size of x must be length * numClasses. Each consecutive numClasses
values are the features for one time step.
/*
*
*
Calculate the negative log likelihood of s given x.
*
The size of x must be length * numClasses. Each consecutive numClasses
*
values are the features for one time step.
*/
real
forward
(
real
*
x
,
int
*
s
,
int
length
);
/*
Calculate the gradient with respect to x, a, b, and w.
The gradient of x will be stored in dx.
backward() can only be called after a corresponding call to forward() with
the same x, s and length.
NOTE:
The gradient is added to dx and grad (provided at constructor).
/*
*
*
Calculate the gradient with respect to x, a, b, and w.
*
The gradient of x will be stored in dx.
*
backward() can only be called after a corresponding call to forward() with
*
the same x, s and length.
* @note
The gradient is added to dx and grad (provided at constructor).
*/
void
backward
(
real
*
x
,
real
*
dx
,
int
*
s
,
int
length
);
/*
Find the most probable sequence given x. The result will be stored in s.
/*
*
*
Find the most probable sequence given x. The result will be stored in s.
*/
void
decode
(
real
*
x
,
int
*
s
,
int
length
);
...
...
paddle/gserver/tests/CMakeLists.txt
浏览文件 @
23e47bb6
...
...
@@ -56,7 +56,6 @@ add_test(NAME test_RecurrentGradientMachine
COMMAND .set_python_path.sh -d
${
PROJ_ROOT
}
/python:
${
PROJ_ROOT
}
/paddle/gserver/tests
${
CMAKE_CURRENT_BINARY_DIR
}
/test_RecurrentGradientMachine
--use_gpu=false
WORKING_DIRECTORY
${
PROJ_ROOT
}
/paddle
)
add_unittest_without_exec
(
test_NetworkCompare
...
...
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
0 → 100644
浏览文件 @
23e47bb6
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
.
trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_subseq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
8
hidden_dim
=
8
label_dim
=
3
data
=
data_layer
(
name
=
"word"
,
size
=
dict_dim
)
emb
=
embedding_layer
(
input
=
data
,
size
=
word_dim
)
# This hierachical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn.conf
def
outer_step
(
wid
,
x
):
outer_mem
=
memory
(
name
=
"outer_rnn_state"
,
size
=
hidden_dim
)
def
inner_step
(
y
,
wid
):
z
=
embedding_layer
(
input
=
wid
,
size
=
word_dim
)
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
out
=
fc_layer
(
input
=[
y
,
z
,
inner_mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
"inner_rnn_state"
)
return
out
inner_rnn_output
=
recurrent_group
(
step
=
inner_step
,
name
=
"inner"
,
input
=[
x
,
wid
])
last
=
last_seq
(
input
=
inner_rnn_output
,
name
=
"outer_rnn_state"
)
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it correctly. Current implementation requires that
# all the out links are from sequences. However, it does not report error
# when the out links are not sequences.
return
inner_rnn_output
out
=
recurrent_group
(
name
=
"outer"
,
step
=
outer_step
,
input
=[
SubsequenceInput
(
data
),
SubsequenceInput
(
emb
)])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)))
paddle/gserver/tests/sequence_rnn_multi_input.conf
0 → 100644
浏览文件 @
23e47bb6
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
.
trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_seq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
8
hidden_dim
=
8
label_dim
=
3
data
=
data_layer
(
name
=
"word"
,
size
=
dict_dim
)
emb
=
embedding_layer
(
input
=
data
,
size
=
word_dim
)
def
step
(
y
,
wid
):
z
=
embedding_layer
(
input
=
wid
,
size
=
word_dim
)
mem
=
memory
(
name
=
"rnn_state"
,
size
=
hidden_dim
)
out
=
fc_layer
(
input
=[
y
,
z
,
mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
"rnn_state"
)
return
out
out
=
recurrent_group
(
name
=
"rnn"
,
step
=
step
,
input
=[
emb
,
data
])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)))
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
浏览文件 @
23e47bb6
...
...
@@ -92,7 +92,11 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir
(
dir
.
c_str
());
}
void
test
(
const
string
&
conf1
,
const
string
&
conf2
,
double
eps
)
{
void
test
(
const
string
&
conf1
,
const
string
&
conf2
,
double
eps
,
bool
useGpu
)
{
if
(
!
paddle
::
version
::
isWithGpu
()
&&
useGpu
)
{
return
;
}
FLAGS_use_gpu
=
useGpu
;
int
num_passes
=
5
;
real
*
cost1
=
new
real
[
num_passes
];
const
string
dir1
=
"gserver/tests/t1"
;
...
...
@@ -113,17 +117,28 @@ void test(const string& conf1, const string& conf2, double eps) {
}
TEST
(
RecurrentGradientMachine
,
HasSubSequence
)
{
for
(
bool
useGpu
:
{
false
,
true
})
{
test
(
"gserver/tests/sequence_layer_group.conf"
,
"gserver/tests/sequence_nest_layer_group.conf"
,
1e-5
);
1e-5
,
useGpu
);
}
}
TEST
(
RecurrentGradientMachine
,
rnn
)
{
for
(
bool
useGpu
:
{
false
,
true
})
{
test
(
"gserver/tests/sequence_rnn.conf"
,
"gserver/tests/sequence_nest_rnn.conf"
,
0
);
1e-6
,
useGpu
);
}
}
TEST
(
RecurrentGradientMachine
,
rnn_multi_input
)
{
for
(
bool
useGpu
:
{
false
,
true
})
{
test
(
"gserver/tests/sequence_rnn_multi_input.conf"
,
"gserver/tests/sequence_nest_rnn_multi_input.conf"
,
1e-6
,
useGpu
);
}
}
int
main
(
int
argc
,
char
**
argv
)
{
if
(
paddle
::
version
::
isWithPyDataProvider
())
{
...
...
paddle/parameter/Argument.cpp
浏览文件 @
23e47bb6
...
...
@@ -554,11 +554,16 @@ void Argument::degradeSequence(const Argument& input, bool useGpu) {
void
Argument
::
subArgFrom
(
const
Argument
&
input
,
size_t
offset
,
size_t
height
,
size_t
width
,
bool
useGpu
,
bool
trans
,
bool
seqFlag
,
size_t
seqStart
,
size_t
seqSize
)
{
value
=
Matrix
::
create
(
input
.
value
->
getData
()
+
offset
,
height
,
width
,
trans
,
useGpu
);
if
(
input
.
value
)
{
value
=
Matrix
::
create
(
input
.
value
->
getData
()
+
offset
*
width
,
height
,
width
,
trans
,
useGpu
);
}
if
(
input
.
ids
)
{
ids
=
IVector
::
create
(
input
.
ids
->
getData
()
+
offset
,
height
,
useGpu
);
}
if
(
input
.
grad
)
{
grad
=
Matrix
::
create
(
input
.
grad
->
getData
()
+
offset
,
height
,
width
,
trans
,
useGpu
);
grad
=
Matrix
::
create
(
input
.
grad
->
getData
()
+
offset
*
width
,
height
,
width
,
trans
,
useGpu
);
}
if
(
seqFlag
)
{
sequenceStartPositions
=
std
::
make_shared
<
ICpuGpuVector
>
(
...
...
paddle/parameter/Argument.h
浏览文件 @
23e47bb6
...
...
@@ -177,11 +177,11 @@ struct Argument {
}
/**
* @brief (value, grad, sequenceStartPositions) of output are subset of
* @brief (value,
ids,
grad, sequenceStartPositions) of output are subset of
* input. Note that, output share the same memory of input.
*
* @param input[in] input
* @param offset[in] offset
of input.value
* @param offset[in] offset
in terms of rows
* @param height[in] height of output.value
* @param width[in] width of output.value
* @param useGpu[in]
...
...
paddle/trainer/ThreadParameterUpdater.cpp
浏览文件 @
23e47bb6
...
...
@@ -141,7 +141,7 @@ void SgdThreadUpdater::traverse(GetTraverseCallback getTraverseCallback) {
}
else
if
(
hasCpuPara
)
{
getGlobalSyncThreadPool
()
->
exec
(
cpuTraverse
);
}
else
if
(
hasGpuPara
)
{
c
puTraverse
(
0
,
0
);
g
puTraverse
(
0
,
0
);
}
}
...
...
paddle/trainer/TrainerInternal.cpp
浏览文件 @
23e47bb6
...
...
@@ -101,6 +101,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId,
// it
//! to ParameterHook.
auto
&
grad
=
para
->
getBuf
(
PARAMETER_GRADIENT
);
SetDevice
device
(
para
->
getDeviceId
());
paraStats
[
para
->
getID
()].
avgAbsGrad
=
grad
->
getAbsSum
()
/
para
->
getSize
();
paraStats
[
para
->
getID
()].
maxAbsGrad
=
grad
->
getAbsMax
();
}
...
...
paddle/trainer/tests/sample_trainer_config_parallel.conf
浏览文件 @
23e47bb6
...
...
@@ -13,137 +13,74 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
from
paddle
.
trainer_config_helpers
import
*
TrainData
(
SimpleData
(
TrainData
(
SimpleData
(
files
=
"trainer/tests/sample_filelist.txt"
,
feat_dim
=
3
,
context_len
=
0
,
buffer_capacity
=
1000000
,
)
)
buffer_capacity
=
1000000
))
TestData
(
SimpleData
(
TestData
(
SimpleData
(
files
=
"trainer/tests/sample_filelist.txt"
,
feat_dim
=
3
,
context_len
=
0
,
buffer_capacity
=
1000000
,
)
)
buffer_capacity
=
1000000
))
Settings
(
algorithm
=
"sgd"
,
num_batches_per_send_parameter
=
1
,
num_batches_per_get_parameter
=
1
,
batch_size
=
100
,
learning_rate
=
0
.
001
,
learning_rate_decay_a
=
1
e
-
5
,
learning_rate_decay_b
=
0
.
5
,
)
settings
(
batch_size
=
100
)
default_initial_std
(
0
.
2
)
# Output layer, label layer, cost layer, preferably set to the same environment.
output_device
=
0
model_type
(
"nn"
)
# Input Layer does not need to specify the device number.
Layer
(
name
=
"input"
,
type
=
"data"
,
size
=
3
,
)
data
=
data_layer
(
name
=
'input'
,
size
=
3
)
# Calculate in the CPU.
Layer
(
name
=
"layer1_1"
,
type
=
"fc"
,
size
=
5
,
active_type
=
"sigmoid"
,
device
= -
1
,
inputs
=
"input"
,
)
fc1
=
fc_layer
(
input
=
data
,
size
=
5
,
bias_attr
=
True
,
layer_attr
=
ExtraAttr
(
device
=-
1
),
act
=
SigmoidActivation
())
# Calculate in the GPU 0.
Layer
(
name
=
"layer2_1"
,
type
=
"fc"
,
size
=
10
,
active_type
=
"sigmoid"
,
device
=
0
,
inputs
=
"layer1_1"
,
)
fc2
=
fc_layer
(
input
=
fc1
,
size
=
10
,
bias_attr
=
True
,
layer_attr
=
ExtraAttr
(
device
=
0
),
act
=
SigmoidActivation
())
# Calculate in the GPU 1.
Layer
(
name
=
"layer2_2"
,
type
=
"fc"
,
size
=
10
,
active_type
=
"sigmoid"
,
device
=
1
,
inputs
=
"layer1_1"
,
)
fc3
=
fc_layer
(
input
=
fc1
,
size
=
10
,
bias_attr
=
True
,
layer_attr
=
ExtraAttr
(
device
=
1
),
act
=
SigmoidActivation
())
# Calculate in the GPU 0.
Layer
(
name
=
"layer3_1"
,
type
=
"fc"
,
size
=
10
,
device
=
0
,
active_type
=
"sigmoid"
,
inputs
= [
"layer2_1"
,
"layer2_2"
],
)
fc4
=
fc_layer
(
input
=[
fc2
,
fc3
],
size
=
10
,
bias_attr
=
True
,
layer_attr
=
ExtraAttr
(
device
=
0
),
act
=
SigmoidActivation
())
# Calculate in the GPU 1.
Layer
(
name
=
"layer3_2"
,
type
=
"fc"
,
size
=
10
,
device
=
1
,
active_type
=
"sigmoid"
,
inputs
= [
"layer2_1"
,
"layer2_2"
],
)
fc5
=
fc_layer
(
input
=[
fc2
,
fc3
],
size
=
10
,
bias_attr
=
True
,
layer_attr
=
ExtraAttr
(
device
=
1
),
act
=
SigmoidActivation
())
Layer
(
name
=
"output"
,
type
=
"fc"
,
size
=
10
,
device
=
output_device
,
active_type
=
"sigmoid"
,
inputs
= [
"layer3_1"
,
"layer3_2"
],
)
output
=
fc_layer
(
input
=[
fc4
,
fc5
],
size
=
10
,
bias_attr
=
True
,
layer_attr
=
ExtraAttr
(
device
=
output_device
),
act
=
SoftmaxActivation
())
if
get_config_arg
(
'with_cost'
,
bool
,
True
):
# This is for training the neural network.
# We need to have another data layer for label
# and a layer for calculating cost
Layer
(
name
=
"label"
,
type
=
"data"
,
device
=
output_device
,
size
=
1
,
)
Layer
(
name
=
"cost"
,
type
=
"multi-class-cross-entropy"
,
device
=
output_device
,
inputs
= [
"output"
,
"label"
],
)
Evaluator
(
name
=
"error"
,
type
=
"classification_error"
,
inputs
= [
"output"
,
"label"
])
Inputs
(
"input"
,
"label"
)
Outputs
(
"cost"
)
lbl
=
data_layer
(
name
=
'label'
,
size
=
1
,
layer_attr
=
ExtraAttr
(
device
=
output_device
))
outputs
(
classification_cost
(
input
=
output
,
label
=
lbl
,
layer_attr
=
ExtraAttr
(
device
=
output_device
)))
else
:
# This is for prediction where we don't have label
# and don't need to calculate cost
Inputs
(
"input"
)
Outputs
(
"output"
)
outputs
(
output
)
python/paddle/trainer/config_parser.py
浏览文件 @
23e47bb6
...
...
@@ -1279,7 +1279,7 @@ class LayerBase(object):
size
,
dims
=
None
,
sparse
=
None
,
format
=
"csr"
):
format
=
None
):
if
dims
is
None
:
# TODO(yuyang18): print warning and callstack here!
dims
=
list
()
...
...
@@ -2074,7 +2074,7 @@ class MaxLayer(LayerBase):
active_type
=
'linear'
,
device
=
None
,
bias
=
False
,
output_max_index
=
Fals
e
):
output_max_index
=
Non
e
):
super
(
MaxLayer
,
self
).
__init__
(
name
,
'max'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'MaxLayer must have 1 input'
)
self
.
config
.
trans_type
=
trans_type
...
...
@@ -2083,7 +2083,8 @@ class MaxLayer(LayerBase):
input_layer
=
self
.
get_input_layer
(
input_index
)
self
.
set_layer_size
(
input_layer
.
size
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
self
.
config
.
output_max_index
=
output_max_index
if
output_max_index
is
not
None
:
self
.
config
.
output_max_index
=
output_max_index
@
config_layer
(
'maxid'
)
...
...
@@ -2440,7 +2441,7 @@ class MixedLayer(LayerBase):
inputs
,
size
=
0
,
bias
=
True
,
error_clipping_threshold
=
0.0
,
error_clipping_threshold
=
None
,
**
xargs
):
config_assert
(
inputs
,
'inputs cannot be empty'
)
super
(
MixedLayer
,
self
).
__init__
(
...
...
@@ -2510,6 +2511,7 @@ class MixedLayer(LayerBase):
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
if
error_clipping_threshold
is
not
None
:
self
.
config
.
error_clipping_threshold
=
error_clipping_threshold
# like MixedLayer, but no bias parameter
...
...
python/paddle/trainer_config_helpers/activations.py
浏览文件 @
23e47bb6
...
...
@@ -15,8 +15,10 @@
__all__
=
[
"TanhActivation"
,
"SigmoidActivation"
,
"SoftmaxActivation"
,
"IdentityActivation"
,
"LinearActivation"
,
'SequenceSoftmaxActivation'
,
'ExpActivation'
,
"ReluActivation"
,
"BReluActivation"
,
"SoftReluActivation"
,
"STanhActivation"
,
"AbsActivation"
,
"SquareActivation"
,
"BaseActivation"
]
"ReluActivation"
,
"BReluActivation"
,
"SoftReluActivation"
,
"STanhActivation"
,
"AbsActivation"
,
"SquareActivation"
,
"BaseActivation"
]
class
BaseActivation
(
object
):
...
...
@@ -36,6 +38,9 @@ class BaseActivation(object):
self
.
name
=
name
self
.
support_hppl
=
support_hppl
def
__repr__
(
self
):
return
self
.
name
class
TanhActivation
(
BaseActivation
):
"""
...
...
python/paddle/trainer_config_helpers/attrs.py
浏览文件 @
23e47bb6
...
...
@@ -17,6 +17,42 @@ __all__ = ['ParamAttr', 'ExtraAttr', 'ParameterAttribute',
'ExtraLayerAttribute'
]
def
convert_and_compare
(
x
,
Type
):
"""
Convert x to be the same type as Type and then convert back to
check whether there is a loss of information
:param x: object to be checked
:param Type: target type to check x over
"""
return
type
(
x
)(
Type
(
x
))
==
x
def
is_compatible_with
(
x
,
Type
):
"""
Check if x has a type compatible with Type
:param x: object to be checked
:param Type: target type to check x over
"""
if
type
(
x
)
==
Type
:
return
True
try
:
if
float
==
Type
or
int
==
Type
:
# avoid those types that can be converted to float/int but not very
# meaningful and could potentially lead to error
# i.e., str and bool typed value should not be used for initializing float/int variable
if
not
isinstance
(
x
,
str
)
and
not
isinstance
(
x
,
bool
):
return
convert_and_compare
(
x
,
Type
)
elif
bool
==
Type
:
# should not use string type to initialize bool variable
if
not
isinstance
(
x
,
str
):
return
convert_and_compare
(
x
,
Type
)
else
:
return
False
except
:
return
False
class
ParameterAttribute
(
object
):
"""
Parameter Attributes object. To fine-tuning network training process, user
...
...
@@ -65,14 +101,18 @@ class ParameterAttribute(object):
elif
initial_std
is
None
and
initial_mean
is
None
and
initial_max
\
is
None
and
initial_min
is
None
:
self
.
attr
=
{
'initial_smart'
:
True
}
elif
isinstance
(
initial_std
,
float
)
or
isinstance
(
initial_mean
,
float
):
elif
is_compatible_with
(
initial_std
,
float
)
or
\
is_compatible_with
(
initial_mean
,
float
):
self
.
attr
=
dict
()
if
initial_std
is
not
None
:
self
.
attr
[
'initial_std'
]
=
initial_std
if
initial_mean
is
not
None
:
self
.
attr
[
'initial_mean'
]
=
initial_mean
self
.
attr
[
'initial_strategy'
]
=
0
# Gauss Random
elif
isinstance
(
initial_max
,
float
)
and
isinstance
(
initial_min
,
float
):
elif
is_compatible_with
(
initial_max
,
float
)
and
\
is_compatible_with
(
initial_min
,
float
):
initial_max
=
initial_max
initial_min
=
initial_min
assert
initial_min
<
initial_max
initial_mean
=
(
initial_max
+
initial_min
)
/
2
initial_std
=
initial_mean
-
initial_min
...
...
@@ -83,16 +123,16 @@ class ParameterAttribute(object):
else
:
raise
RuntimeError
(
"Unexpected branch."
)
if
not
is_static
and
is
instance
(
l1_rate
,
float
):
if
not
is_static
and
is
_compatible_with
(
l1_rate
,
float
):
self
.
attr
[
'decay_rate_l1'
]
=
l1_rate
if
not
is_static
and
is
instance
(
l2_rate
,
float
):
if
not
is_static
and
is
_compatible_with
(
l2_rate
,
float
):
self
.
attr
[
'decay_rate'
]
=
l2_rate
if
not
is_static
and
is
instance
(
learning_rate
,
float
):
if
not
is_static
and
is
_compatible_with
(
learning_rate
,
float
):
self
.
attr
[
'learning_rate'
]
=
learning_rate
if
not
is_static
and
is
instance
(
momentum
,
float
):
if
not
is_static
and
is
_compatible_with
(
momentum
,
float
):
self
.
attr
[
'momentum'
]
=
momentum
if
name
is
not
None
:
...
...
@@ -134,12 +174,16 @@ class ExtraLayerAttribute(object):
The dropout rate is the zero rate of this mask. The
details of what dropout is please refer to `here
<https://www.cs.toronto.edu/~hinton/absps/
JMLRdropout.pdf>`_
JMLRdropout.pdf>`_
.
:type drop_rate: float
:param device: device ID of layer. device=-1, use CPU. device>0, use GPU.
The details allocation in parallel_nn please refer to `here
<http://www.paddlepaddle.org/doc/ui/cmd_argument/
use_case.html#case-2-specify-layers-in-different-devices>`_.
:type device: int
"""
def
__init__
(
self
,
error_clipping_threshold
=
None
,
drop_rate
=
None
):
def
__init__
(
self
,
error_clipping_threshold
=
None
,
drop_rate
=
None
,
device
=
None
):
self
.
attr
=
dict
()
if
isinstance
(
error_clipping_threshold
,
float
):
assert
error_clipping_threshold
>
0
...
...
@@ -149,6 +193,9 @@ class ExtraLayerAttribute(object):
assert
drop_rate
>
0
self
.
attr
[
"drop_rate"
]
=
drop_rate
if
isinstance
(
device
,
int
):
self
.
attr
[
"device"
]
=
device
def
check
(
self
,
layer_name
):
for
key
in
self
.
attr
:
if
not
hasattr
(
self
,
'can_%s'
%
key
)
or
\
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
23e47bb6
此差异已折叠。
点击以展开。
python/paddle/trainer_config_helpers/networks.py
浏览文件 @
23e47bb6
...
...
@@ -616,7 +616,7 @@ def lstmemory_group(input, size=None, name=None,
cell states, or hidden states in every time step are accessible to for the
user. This is especially useful in attention model. If you do not need to
access to the internal states of the lstm, but merely use its outputs,
it is recomm
a
nded to use the lstmemory, which is relatively faster than
it is recomm
e
nded to use the lstmemory, which is relatively faster than
lstmemory_group.
NOTE: In PaddlePaddle's implementation, the following input-to-hidden
...
...
@@ -1052,7 +1052,7 @@ def dropout_layer(input, dropout_rate, name=None):
layer_attr
=
ExtraAttr
(
drop_rate
=
dropout_rate
))
def
outputs
(
layers
):
def
outputs
(
layers
,
*
args
):
"""
Declare the end of network. Currently it will only calculate the
input/output order of network. It will calculate the predict network or
...
...
@@ -1089,9 +1089,12 @@ def outputs(layers):
if
isinstance
(
layers
,
LayerOutput
):
layers
=
[
layers
]
if
len
(
args
)
!=
0
:
layers
.
extend
(
args
)
assert
len
(
layers
)
>
0
if
len
(
layers
)
!=
1
:
logger
.
warning
(
"
EndOfNetwork
routine try to calculate network's"
logger
.
warning
(
"
`outputs`
routine try to calculate network's"
" inputs and outputs order. It might not work well."
"Please see follow log carefully."
)
inputs
=
[]
...
...
python/paddle/trainer_config_helpers/poolings.py
浏览文件 @
23e47bb6
...
...
@@ -47,9 +47,14 @@ class MaxPooling(BasePoolingType):
.. math::
max(samples
\\
_of
\\
_a
\\
_sequence)
:param output_max_index: True if output sequence max index instead of max
value. None means use default value in proto.
:type output_max_index: bool|None
"""
def
__init__
(
self
):
def
__init__
(
self
,
output_max_index
=
None
):
BasePoolingType
.
__init__
(
self
,
"max"
)
self
.
output_max_index
=
output_max_index
class
AvgPooling
(
BasePoolingType
):
...
...
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
浏览文件 @
23e47bb6
...
...
@@ -3,3 +3,8 @@ add_test(NAME layers_test
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
python
${
PROJ_ROOT
}
/python/paddle/trainer_config_helpers/tests/layers_test.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
add_test
(
NAME test_layerHelpers
COMMAND
${
PROJ_ROOT
}
/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
)
python/paddle/trainer_config_helpers/tests/configs/.gitignore
0 → 100644
浏览文件 @
23e47bb6
*protostr
python/paddle/trainer_config_helpers/tests/configs/check.md5
0 → 100644
浏览文件 @
23e47bb6
7e6919d17562516e9a1d9a88de1fb3b9 img_layers.protostr
a5d9259ff1fd7ca23d0ef090052cb1f2 last_first_seq.protostr
9c038249ec8ff719753a746cdb04c026 layer_activations.protostr
5913f87b39cee3b2701fa158270aca26 projections.protostr
6b39e34beea8dfb782bee9bd3dea9eb5 simple_rnn_layers.protostr
0fc1409600f1a3301da994ab9d28b0bf test_cost_layers.protostr
144bc6d3a509de74115fa623741797ed test_expand_layer.protostr
2378518bdb71e8c6e888b1842923df58 test_fc.protostr
8bb44e1e5072d0c261572307e7672bda test_grumemory_layer.protostr
1f3510672dce7a9ed25317fc58579ac7 test_hsigmoid.protostr
d350bd91a0dc13e854b1364c3d9339c6 test_lstmemory_layer.protostr
251a948ba41c1071afcd3d9cf9c233f7 test_ntm_layers.protostr
e6ff04e70aea27c7b06d808cc49c9497 test_print_layer.protostr
2a75dd33b640c49a8821c2da6e574577 test_rnn_group.protostr
67d6fde3afb54f389d0ce4ff14726fe1 test_sequence_pooling.protostr
f586a548ef4350ba1ed47a81859a64cb unused_layers.protostr
8122477f4f65244580cec09edc590041 util_layers.protostr
python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
0 → 100755
浏览文件 @
23e47bb6
#!/bin/bash
set
-e
cd
`
dirname
$0
`
export
PYTHONPATH
=
$PWD
/../../../../
configs
=(
test_fc layer_activations projections test_print_layer
test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group
)
for
conf
in
${
configs
[*]
}
do
echo
"Generating "
$conf
python
-m
paddle.utils.dump_config
$conf
.py
>
$conf
.protostr
done
python/paddle/trainer_config_helpers/tests/configs/img_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-3
,
batch_size
=
1000
)
img
=
data_layer
(
name
=
'image'
,
size
=
256
*
256
)
img_conv
=
img_conv_layer
(
input
=
img
,
num_channels
=
1
,
num_filters
=
64
,
filter_size
=
(
32
,
64
),
padding
=
(
1
,
0
),
stride
=
(
1
,
1
),
act
=
LinearActivation
())
img_bn
=
batch_norm_layer
(
input
=
img_conv
,
act
=
ReluActivation
())
img_norm
=
img_cmrnorm_layer
(
input
=
img_bn
,
size
=
32
)
img_pool
=
img_pool_layer
(
input
=
img_conv
,
pool_size
=
32
,
pool_type
=
MaxPooling
())
outputs
(
img_pool
,
img_norm
)
\ No newline at end of file
python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
30
)
seq_op
=
[
first_seq
,
last_seq
]
agg_level
=
[
AggregateLevel
.
EACH_SEQUENCE
,
AggregateLevel
.
EACH_TIMESTEP
]
opts
=
[]
for
op
in
seq_op
:
for
al
in
agg_level
:
opts
.
append
(
op
(
input
=
din
,
agg_level
=
al
))
outputs
(
opts
)
\ No newline at end of file
python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
0 → 100644
浏览文件 @
23e47bb6
'''
Test all activations.
'''
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'input'
,
size
=
100
)
acts
=
[
TanhActivation
,
SigmoidActivation
,
SoftmaxActivation
,
IdentityActivation
,
LinearActivation
,
ExpActivation
,
ReluActivation
,
BReluActivation
,
SoftReluActivation
,
STanhActivation
,
AbsActivation
,
SquareActivation
]
outputs
(
[
fc_layer
(
input
=
din
,
size
=
100
,
act
=
act
(),
name
=
"layer_%d"
%
i
)
for
i
,
act
in
enumerate
(
acts
)])
python/paddle/trainer_config_helpers/tests/configs/projections.py
0 → 100644
浏览文件 @
23e47bb6
'''
Test mixed layer, projections and operators.
'''
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
din
=
data_layer
(
name
=
'test'
,
size
=
100
)
din
=
embedding_layer
(
input
=
din
,
size
=
256
)
with
mixed_layer
(
size
=
100
)
as
m1
:
m1
+=
full_matrix_projection
(
input
=
din
)
with
mixed_layer
(
size
=
100
)
as
m2
:
m2
+=
table_projection
(
input
=
m1
)
with
mixed_layer
(
size
=
100
)
as
m3
:
m3
+=
identity_projection
(
input
=
m2
)
with
mixed_layer
(
size
=
100
)
as
m4
:
m4
+=
dotmul_projection
(
input
=
m3
)
with
mixed_layer
()
as
m5
:
m5
+=
context_projection
(
input
=
m4
,
context_len
=
3
)
with
mixed_layer
()
as
m6
:
m6
+=
dotmul_operator
(
a
=
m3
,
b
=
m4
)
img
=
data_layer
(
name
=
'img'
,
size
=
32
*
32
)
flt
=
data_layer
(
name
=
'filter'
,
size
=
3
*
3
*
1
*
64
)
with
mixed_layer
()
as
m7
:
m7
+=
conv_operator
(
img
=
img
,
filter
=
flt
,
num_filters
=
64
,
num_channel
=
1
,
filter_size
=
3
)
end
=
mixed_layer
(
input
=
[
full_matrix_projection
(
input
=
m5
),
trans_full_matrix_projection
(
input
=
m6
),
full_matrix_projection
(
input
=
m7
)],
size
=
100
,
layer_attr
=
ExtraAttr
(
drop_rate
=
0.5
,
error_clipping_threshold
=
40
))
outputs
(
end
)
python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
0 → 100755
浏览文件 @
23e47bb6
#!/bin/bash
cd
`
dirname
$0
`
set
-e
./generate_protostr.sh
md5sum
-c
check.md5
python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
din
=
data_layer
(
name
=
'data'
,
size
=
200
)
hidden
=
fc_layer
(
input
=
din
,
size
=
200
,
act
=
SigmoidActivation
())
rnn
=
recurrent_layer
(
input
=
hidden
,
act
=
SigmoidActivation
())
rnn2
=
recurrent_layer
(
input
=
hidden
,
act
=
SigmoidActivation
(),
reverse
=
True
)
lstm1_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
4
,
act
=
LinearActivation
(),
bias_attr
=
False
)
lstm1
=
lstmemory
(
input
=
lstm1_param
,
act
=
SigmoidActivation
())
lstm2_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
4
,
act
=
LinearActivation
(),
bias_attr
=
False
)
lstm2
=
lstmemory
(
input
=
lstm2_param
,
act
=
SigmoidActivation
(),
reverse
=
True
)
gru1_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
3
,
act
=
LinearActivation
(),
bias_attr
=
False
)
gru1
=
grumemory
(
input
=
gru1_param
,
act
=
SigmoidActivation
())
gru2_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
3
,
act
=
LinearActivation
(),
bias_attr
=
False
)
gru2
=
grumemory
(
input
=
gru2_param
,
act
=
SigmoidActivation
(),
reverse
=
True
)
outputs
(
last_seq
(
input
=
rnn
),
first_seq
(
input
=
rnn2
),
last_seq
(
input
=
lstm1
),
first_seq
(
input
=
lstm2
),
last_seq
(
input
=
gru1
),
first_seq
(
gru2
))
python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
seq_in
=
data_layer
(
name
=
'input'
,
size
=
200
)
labels
=
data_layer
(
name
=
'labels'
,
size
=
5000
)
probs
=
data_layer
(
name
=
'probs'
,
size
=
10
)
xe_label
=
data_layer
(
name
=
'xe-label'
,
size
=
10
)
outputs
(
ctc_layer
(
input
=
seq_in
,
label
=
labels
),
crf_layer
(
input
=
fc_layer
(
input
=
seq_in
,
size
=
4
),
label
=
data_layer
(
name
=
'crf_label'
,
size
=
4
)),
rank_cost
(
left
=
data_layer
(
name
=
'left'
,
size
=
1
),
right
=
data_layer
(
name
=
'right'
,
size
=
1
),
label
=
data_layer
(
name
=
'label'
,
size
=
1
)),
lambda_cost
(
input
=
data_layer
(
name
=
'list_feature'
,
size
=
100
),
score
=
data_layer
(
name
=
'list_scores'
,
size
=
1
)),
cross_entropy
(
input
=
probs
,
label
=
xe_label
),
cross_entropy_with_selfnorm
(
input
=
probs
,
label
=
xe_label
),
huber_cost
(
input
=
data_layer
(
name
=
'huber_probs'
,
size
=
1
),
label
=
data_layer
(
name
=
'huber_label'
,
size
=
1
)),
multi_binary_label_cross_entropy
(
input
=
probs
,
label
=
xe_label
))
python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
30
)
data_seq
=
data_layer
(
name
=
'data_seq'
,
size
=
30
)
outputs
(
expand_layer
(
input
=
din
,
expand_as
=
data_seq
,
expand_level
=
ExpandLevel
.
FROM_SEQUENCE
),
expand_layer
(
input
=
din
,
expand_as
=
data_seq
,
expand_level
=
ExpandLevel
.
FROM_TIMESTEP
))
python/paddle/trainer_config_helpers/tests/configs/test_fc.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
100
)
trans
=
trans_layer
(
input
=
din
)
hidden
=
fc_layer
(
input
=
trans
,
size
=
100
,
bias_attr
=
False
)
mask
=
data_layer
(
name
=
'mask'
,
size
=
100
)
hidden_sel
=
selective_fc_layer
(
input
=
din
,
select
=
mask
,
size
=
100
,
act
=
SigmoidActivation
())
outputs
(
hidden
,
hidden_sel
)
python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
din
=
data_layer
(
name
=
'data'
,
size
=
120
)
outputs
(
grumemory
(
input
=
din
,
size
=
40
,
reverse
=
True
,
gate_act
=
TanhActivation
(),
act
=
SigmoidActivation
()))
python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'data'
,
size
=
100
)
label
=
data_layer
(
name
=
'label'
,
size
=
10
)
outputs
(
hsigmoid
(
input
=
din
,
label
=
label
,
num_classes
=
10
))
\ No newline at end of file
python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
128
)
outputs
(
lstmemory
(
input
=
din
,
reverse
=
True
,
gate_act
=
TanhActivation
(),
act
=
TanhActivation
(),
size
=
32
))
python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
weight
=
data_layer
(
name
=
'w'
,
size
=
1
)
a
=
data_layer
(
name
=
'a'
,
size
=
100
)
b
=
data_layer
(
name
=
'b'
,
size
=
100
)
c
=
data_layer
(
name
=
'c'
,
size
=
200
)
d
=
data_layer
(
name
=
'd'
,
size
=
31
)
outputs
(
interpolation_layer
(
input
=
[
a
,
b
],
weight
=
weight
),
power_layer
(
input
=
a
,
weight
=
weight
),
scaling_layer
(
input
=
a
,
weight
=
weight
),
cos_sim
(
a
=
a
,
b
=
b
),
cos_sim
(
a
=
a
,
b
=
c
,
size
=
2
),
sum_to_one_norm_layer
(
input
=
a
),
conv_shift_layer
(
a
=
a
,
b
=
d
),
tensor_layer
(
a
=
a
,
b
=
b
,
size
=
1000
),
slope_intercept_layer
(
input
=
a
,
slope
=
0.7
,
intercept
=
0.9
),
linear_comb_layer
(
weights
=
b
,
vectors
=
c
))
python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'input'
,
size
=
100
)
print_layer
(
input
=
din
)
outputs
(
din
)
python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
seq
=
data_layer
(
name
=
'seq_input'
,
size
=
100
)
sub_seq
=
data_layer
(
name
=
'sub_seq_input'
,
size
=
100
)
lbl
=
data_layer
(
name
=
'label'
,
size
=
1
)
def
generate_rnn_simple
(
name
):
def
rnn_simple
(
s
):
m
=
memory
(
name
=
name
,
size
=
200
)
fc
=
fc_layer
(
input
=
[
s
,
m
],
size
=
200
,
name
=
name
)
return
fc
return
rnn_simple
with
mixed_layer
()
as
lstm_param
:
# test lstm unit, rnn group
lstm_param
+=
full_matrix_projection
(
input
=
seq
,
size
=
100
*
4
)
with
mixed_layer
()
as
gru_param
:
gru_param
+=
full_matrix_projection
(
input
=
seq
,
size
=
100
*
3
)
outputs
(
last_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_forward'
),
input
=
seq
)),
first_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_back'
),
input
=
seq
,
reverse
=
True
)),
last_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_subseq_forward'
),
input
=
SubsequenceInput
(
input
=
sub_seq
))),
last_seq
(
input
=
lstmemory_group
(
input
=
lstm_param
,
size
=
100
)),
last_seq
(
input
=
gru_group
(
input
=
gru_param
,
size
=
100
)))
python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'dat_in'
,
size
=
100
)
POOL_TYPE
=
[
MaxPooling
,
AvgPooling
,
SumPooling
]
AGG_LEVEL
=
[
AggregateLevel
.
EACH_SEQUENCE
,
AggregateLevel
.
EACH_TIMESTEP
]
opts
=
[]
for
pt
in
POOL_TYPE
:
for
al
in
AGG_LEVEL
:
opts
.
append
(
pooling_layer
(
input
=
din
,
agg_level
=
al
,
pooling_type
=
pt
()))
opts
.
append
(
pooling_layer
(
input
=
din
,
pooling_type
=
MaxPooling
(
output_max_index
=
True
)))
outputs
(
opts
)
python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
probs
=
data_layer
(
name
=
'probs'
,
size
=
100
)
outputs
(
sampling_id_layer
(
input
=
probs
),
# It seems not support training
# It seems this layer is not correct, and should be rewrite.
# block_expand_layer(input=probs, channel=1, block_x=1, block_y=3),
)
\ No newline at end of file
python/paddle/trainer_config_helpers/tests/configs/util_layers.py
0 → 100644
浏览文件 @
23e47bb6
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
a
=
data_layer
(
name
=
'a'
,
size
=
10
)
b
=
data_layer
(
name
=
'b'
,
size
=
10
)
result
=
addto_layer
(
input
=
[
a
,
b
])
concat1
=
concat_layer
(
input
=
[
a
,
b
])
concat2
=
concat_layer
(
input
=
[
identity_projection
(
input
=
a
),
identity_projection
(
input
=
b
)
])
outputs
(
result
,
concat1
,
concat2
)
\ No newline at end of file
python/paddle/trainer_config_helpers/tests/layers_test_config.py
浏览文件 @
23e47bb6
...
...
@@ -23,6 +23,15 @@ z = out_prod_layer(input1=x, input2=y)
x1
=
fc_layer
(
input
=
x
,
size
=
5
)
y1
=
fc_layer
(
input
=
y
,
size
=
5
)
z1
=
mixed_layer
(
act
=
LinearActivation
(),
input
=
[
conv_operator
(
img
=
x1
,
filter
=
y1
,
filter_size
=
1
,
num_filters
=
5
,
num_channel
=
5
,
stride
=
1
)])
y2
=
fc_layer
(
input
=
y
,
size
=
15
)
cos1
=
cos_sim
(
a
=
x1
,
b
=
y1
)
...
...
@@ -30,7 +39,7 @@ cos3 = cos_sim(a=x1, b=y2, size=3)
linear_comb
=
linear_comb_layer
(
weights
=
x1
,
vectors
=
y2
,
size
=
3
)
out
=
fc_layer
(
input
=
[
cos1
,
cos3
,
linear_comb
,
z
],
out
=
fc_layer
(
input
=
[
cos1
,
cos3
,
linear_comb
,
z
,
z1
],
size
=
num_classes
,
act
=
SoftmaxActivation
())
...
...
@@ -38,11 +47,21 @@ print_layer(input=[out])
outputs
(
classification_cost
(
out
,
data_layer
(
name
=
"label"
,
size
=
num_classes
)))
dotmul
=
mixed_layer
(
input
=
[
dotmul_operator
(
x
=
x1
,
y
=
y
1
),
dotmul
=
mixed_layer
(
input
=
[
dotmul_operator
(
a
=
x1
,
b
=
x
1
),
dotmul_projection
(
input
=
y1
)])
proj_with_attr_init
=
mixed_layer
(
input
=
full_matrix_projection
(
input
=
y1
,
param_attr
=
ParamAttr
(
learning_rate
=
0
,
initial_mean
=
0
,
initial_std
=
0
)),
bias_attr
=
ParamAttr
(
initial_mean
=
0
,
initial_std
=
0
,
learning_rate
=
0
),
act
=
LinearActivation
(),
size
=
5
,
name
=
'proj_with_attr_init'
)
# for ctc
tmp
=
fc_layer
(
input
=
[
x1
,
dotmul
],
tmp
=
fc_layer
(
input
=
[
x1
,
dotmul
,
proj_with_attr_init
],
size
=
num_classes
+
1
,
act
=
SoftmaxActivation
())
ctc
=
ctc_layer
(
input
=
tmp
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录