Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
58e1b3b3
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
58e1b3b3
编写于
11月 11, 2016
作者:
Y
Yu Yang
提交者:
GitHub
11月 11, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #446 from QiJune/format_py_code_2nd
format python code in python directory
上级
ef5e483c
a1ba3f44
变更
54
显示空白变更内容
内联
并排
Showing
54 changed file
with
3498 addition
and
2926 deletion
+3498
-2926
python/paddle/__init__.py
python/paddle/__init__.py
+0
-1
python/paddle/trainer/PyDataProvider2.py
python/paddle/trainer/PyDataProvider2.py
+19
-16
python/paddle/trainer/PyDataProviderWrapper.py
python/paddle/trainer/PyDataProviderWrapper.py
+22
-13
python/paddle/trainer/__init__.py
python/paddle/trainer/__init__.py
+0
-1
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+979
-1029
python/paddle/trainer/config_parser_extension.py
python/paddle/trainer/config_parser_extension.py
+5
-5
python/paddle/trainer/recurrent_units.py
python/paddle/trainer/recurrent_units.py
+259
-230
python/paddle/trainer_config_helpers/activations.py
python/paddle/trainer_config_helpers/activations.py
+33
-18
python/paddle/trainer_config_helpers/attrs.py
python/paddle/trainer_config_helpers/attrs.py
+25
-12
python/paddle/trainer_config_helpers/data_sources.py
python/paddle/trainer_config_helpers/data_sources.py
+34
-21
python/paddle/trainer_config_helpers/default_decorators.py
python/paddle/trainer_config_helpers/default_decorators.py
+11
-8
python/paddle/trainer_config_helpers/evaluators.py
python/paddle/trainer_config_helpers/evaluators.py
+118
-123
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+864
-569
python/paddle/trainer_config_helpers/math.py
python/paddle/trainer_config_helpers/math.py
+20
-7
python/paddle/trainer_config_helpers/networks.py
python/paddle/trainer_config_helpers/networks.py
+477
-295
python/paddle/trainer_config_helpers/optimizers.py
python/paddle/trainer_config_helpers/optimizers.py
+23
-28
python/paddle/trainer_config_helpers/poolings.py
python/paddle/trainer_config_helpers/poolings.py
+13
-10
python/paddle/trainer_config_helpers/tests/configs/img_layers.py
...paddle/trainer_config_helpers/tests/configs/img_layers.py
+10
-9
python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
.../trainer_config_helpers/tests/configs/img_trans_layers.py
+11
-9
python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
...le/trainer_config_helpers/tests/configs/last_first_seq.py
+4
-13
python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
...trainer_config_helpers/tests/configs/layer_activations.py
+8
-8
python/paddle/trainer_config_helpers/tests/configs/math_ops.py
...n/paddle/trainer_config_helpers/tests/configs/math_ops.py
+2
-6
python/paddle/trainer_config_helpers/tests/configs/projections.py
...addle/trainer_config_helpers/tests/configs/projections.py
+14
-15
python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
.../paddle/trainer_config_helpers/tests/configs/shared_fc.py
+17
-10
python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
...addle/trainer_config_helpers/tests/configs/shared_lstm.py
+20
-8
python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
...trainer_config_helpers/tests/configs/simple_rnn_layers.py
+16
-15
python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
...trainer_config_helpers/tests/configs/test_bi_grumemory.py
+1
-4
python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
...iner_config_helpers/tests/configs/test_bilinear_interp.py
+17
-20
python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
.../trainer_config_helpers/tests/configs/test_cost_layers.py
+31
-18
python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
...fig_helpers/tests/configs/test_cost_layers_with_weight.py
+6
-6
python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
...trainer_config_helpers/tests/configs/test_expand_layer.py
+6
-8
python/paddle/trainer_config_helpers/tests/configs/test_fc.py
...on/paddle/trainer_config_helpers/tests/configs/test_fc.py
+4
-8
python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
...iner_config_helpers/tests/configs/test_grumemory_layer.py
+8
-6
python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
...dle/trainer_config_helpers/tests/configs/test_hsigmoid.py
+2
-5
python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
...iner_config_helpers/tests/configs/test_lstmemory_layer.py
+8
-6
python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
...addle/trainer_config_helpers/tests/configs/test_maxout.py
+28
-40
python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
...e/trainer_config_helpers/tests/configs/test_ntm_layers.py
+21
-14
python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
.../trainer_config_helpers/tests/configs/test_print_layer.py
+1
-4
python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
...le/trainer_config_helpers/tests/configs/test_rnn_group.py
+13
-12
python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
...ner_config_helpers/tests/configs/test_sequence_pooling.py
+6
-15
python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
...ner_config_helpers/tests/configs/test_split_datasource.py
+6
-8
python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
...le/trainer_config_helpers/tests/configs/test_spp_layer.py
+7
-9
python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
...dle/trainer_config_helpers/tests/configs/unused_layers.py
+2
-5
python/paddle/trainer_config_helpers/tests/configs/util_layers.py
...addle/trainer_config_helpers/tests/configs/util_layers.py
+3
-5
python/paddle/trainer_config_helpers/tests/layers_test_config.py
...paddle/trainer_config_helpers/tests/layers_test_config.py
+35
-30
python/paddle/trainer_config_helpers/utils.py
python/paddle/trainer_config_helpers/utils.py
+2
-2
python/paddle/utils/image_util.py
python/paddle/utils/image_util.py
+45
-31
python/paddle/utils/make_model_diagram.py
python/paddle/utils/make_model_diagram.py
+8
-9
python/paddle/utils/plotcurve.py
python/paddle/utils/plotcurve.py
+26
-14
python/paddle/utils/predefined_net.py
python/paddle/utils/predefined_net.py
+128
-112
python/paddle/utils/preprocess_img.py
python/paddle/utils/preprocess_img.py
+20
-17
python/paddle/utils/preprocess_util.py
python/paddle/utils/preprocess_util.py
+40
-25
python/paddle/utils/show_pb.py
python/paddle/utils/show_pb.py
+3
-6
python/paddle/utils/torch2paddle.py
python/paddle/utils/torch2paddle.py
+17
-8
未找到文件。
python/paddle/__init__.py
浏览文件 @
58e1b3b3
...
...
@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
python/paddle/trainer/PyDataProvider2.py
浏览文件 @
58e1b3b3
...
...
@@ -18,8 +18,7 @@ import collections
import
functools
import
itertools
logging
.
basicConfig
(
format
=
"[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
logging
.
basicConfig
(
format
=
"[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
" %(message)s"
)
...
...
@@ -132,8 +131,10 @@ class InputOrderWrapper(object):
def
__call__
(
self
,
obj
,
filename
):
for
item
in
self
.
generator
(
obj
,
filename
):
if
isinstance
(
item
,
dict
):
yield
[
item
.
get
(
input_name
,
None
)
for
input_name
in
self
.
input_order
]
yield
[
item
.
get
(
input_name
,
None
)
for
input_name
in
self
.
input_order
]
else
:
yield
item
...
...
@@ -162,8 +163,8 @@ class CheckWrapper(object):
yield
items
except
AssertionError
as
e
:
self
.
logger
.
warning
(
"Item (%s) is not fit the input type with error %s"
%
(
repr
(
item
),
repr
(
e
)))
"Item (%s) is not fit the input type with error %s"
%
(
repr
(
item
),
repr
(
e
)))
if
self
.
check_fail_continue
:
continue
...
...
@@ -202,13 +203,17 @@ class CheckWrapper(object):
callback
(
each
)
def
provider
(
input_types
=
None
,
should_shuffle
=
None
,
pool_size
=-
1
,
def
provider
(
input_types
=
None
,
should_shuffle
=
None
,
pool_size
=-
1
,
min_pool_size
=-
1
,
can_over_batch_size
=
True
,
calc_batch_size
=
None
,
cache
=
CacheType
.
NO_CACHE
,
check
=
False
,
check_fail_continue
=
False
,
init_hook
=
None
,
**
kwargs
):
check
=
False
,
check_fail_continue
=
False
,
init_hook
=
None
,
**
kwargs
):
"""
Provider decorator. Use it to make a function into PyDataProvider2 object.
In this function, user only need to get each sample for some train/test
...
...
@@ -318,8 +323,8 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
"Could not recognize should_shuffle (%s), "
"just use default value of should_shuffle."
" Please set should_shuffle to bool value or "
"something in %s"
%
(
repr
(
self
.
should_shuffle
),
"something in %s"
%
(
repr
(
self
.
should_shuffle
),
repr
(
true_table
+
false_table
)))
self
.
should_shuffle
=
None
...
...
@@ -351,8 +356,7 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
self
.
generator
=
InputOrderWrapper
(
self
.
generator
,
self
.
input_order
)
if
self
.
check
:
self
.
generator
=
CheckWrapper
(
self
.
generator
,
self
.
slots
,
self
.
generator
=
CheckWrapper
(
self
.
generator
,
self
.
slots
,
check_fail_continue
,
self
.
logger
)
...
...
@@ -368,4 +372,3 @@ def deserialize_args(args):
:return:
"""
return
cPickle
.
loads
(
args
)
python/paddle/trainer/PyDataProviderWrapper.py
浏览文件 @
58e1b3b3
...
...
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module provide a wrapper(decorator) to wrap a data process method into a
PyDataProvider. Some examples are shown `here <data_provider/python_case.html>`_.
...
...
@@ -47,6 +46,7 @@ except ImportError:
import
io
class
SlotType
(
object
):
# Just a hint for user.
pass
...
...
@@ -83,6 +83,7 @@ class SparseNonValueSlot(SlotType):
- **SubSeq**: [[[int, int, ...], [int, ....], ...] ,
\
[[int, int, ...], [int, ....], ...] , ...]
"""
def
__init__
(
self
,
dim
):
"""
:param dim: slot dimension
...
...
@@ -294,8 +295,9 @@ class GeneralPyDataProvider:
fn
=
"%s_%d"
%
(
self
.
profile_filename
,
self
.
profile_count
)
sortby
=
"cumulative"
with
open
(
fn
,
"w"
)
as
f
:
pstats
.
Stats
(
self
.
profiler
,
stream
=
f
).
sort_stats
(
sortby
).
print_stats
()
pstats
.
Stats
(
self
.
profiler
,
stream
=
f
).
sort_stats
(
sortby
).
print_stats
()
self
.
logger
.
info
(
"saving profile to file %s"
%
fn
)
self
.
profile_count
+=
1
self
.
logger
.
info
(
"resetting profile"
)
...
...
@@ -453,9 +455,10 @@ class GeneralPyDataProvider:
seq_stream
.
flush
()
subseq_stream
.
flush
()
return
""
.
join
([
self
.
int_packer
.
pack
(
current_batch_size
),
data_bytes
.
getvalue
(),
seq_bytes
.
getvalue
(),
subseq_bytes
.
getvalue
()])
return
""
.
join
([
self
.
int_packer
.
pack
(
current_batch_size
),
data_bytes
.
getvalue
(),
seq_bytes
.
getvalue
(),
subseq_bytes
.
getvalue
()
])
finally
:
data_stream
.
close
()
...
...
@@ -516,7 +519,7 @@ class GeneralPyDataProvider:
self
.
data_pool
[
idx
])
idx
-=
1
ret_list
+=
self
.
data_pool
[
self
.
data_pool_idx
:
idx
+
1
]
ret_list
+=
self
.
data_pool
[
self
.
data_pool_idx
:
idx
+
1
]
# for speed reason, just shift left index, not delete data actually.
self
.
data_pool_idx
=
idx
+
1
...
...
@@ -537,8 +540,8 @@ class GeneralPyDataProvider:
if
self
.
max_pool_size
==
0
:
for
i
in
xrange
(
min
(
self
.
file_count
,
len
(
self
.
generators
))):
self
.
data_pool
+=
list
(
self
.
generators
[
i
])
self
.
generators
=
self
.
generators
[
min
(
self
.
file_count
,
len
(
self
.
generators
)):]
self
.
generators
=
self
.
generators
[
min
(
self
.
file_count
,
len
(
self
.
generators
)):]
self
.
max_pool_size
=
len
(
self
.
data_pool
)
else
:
while
len
(
self
.
data_pool
)
<
self
.
max_pool_size
and
len
(
...
...
@@ -562,9 +565,15 @@ def default_init_hook(cls, *args, **kwargs):
del
cls
,
args
,
kwargs
def
provider
(
slots
=
None
,
use_seq
=
False
,
should_shuffle
=
True
,
pool_size
=
1
,
can_over_batch_size
=
True
,
calc_batch_size
=
lambda
data
:
1
,
debug
=
False
,
init_hook
=
default_init_hook
,
profile_filename
=
None
):
def
provider
(
slots
=
None
,
use_seq
=
False
,
should_shuffle
=
True
,
pool_size
=
1
,
can_over_batch_size
=
True
,
calc_batch_size
=
lambda
data
:
1
,
debug
=
False
,
init_hook
=
default_init_hook
,
profile_filename
=
None
):
"""
The decorator for PyDataProvider. User should use this to create Provider class.
User should only concern how to read sample from file.
...
...
python/paddle/trainer/__init__.py
浏览文件 @
58e1b3b3
...
...
@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
python/paddle/trainer/config_parser.py
浏览文件 @
58e1b3b3
...
...
@@ -13,7 +13,6 @@
# limitations under the License.
from
__future__
import
print_function
'''
The following functions are available in the config file:
...
...
@@ -101,50 +100,45 @@ except Exception as e:
raise
logging
.
basicConfig
(
format
=
'[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s'
,
)
format
=
'[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s'
,
)
logger
=
logging
.
getLogger
(
'paddle'
)
logger
.
setLevel
(
logging
.
INFO
)
__real_print__
=
print
print
=
logger
.
info
print
=
logger
.
info
# from layer type name to layer class
g_layer_type_map
=
{}
# Initialize global variables. We use this function so that we can
# call parse_config() multiple times
def
init_config_environment
(
g_default_momentum
=
None
,
g_default_decay_rate
=
None
,
g_default_initial_mean
=
0.
,
g_default_initial_std
=
0.01
,
g_default_num_batches_regularization
=
None
,
g_default_initial_strategy
=
0
,
g_default_initial_smart
=
False
,
g_default_gradient_clipping_threshold
=
None
,
g_default_device
=
None
,
g_default_update_hooks
=
None
,
g_default_compact_func
=
None
,
g_config
=
TrainerConfig
(),
g_layer_map
=
{},
g_parameter_map
=
{},
g_extended_config_funcs
=
{},
g_default_momentum
=
None
,
g_default_decay_rate
=
None
,
g_default_initial_mean
=
0.
,
g_default_initial_std
=
0.01
,
g_default_num_batches_regularization
=
None
,
g_default_initial_strategy
=
0
,
g_default_initial_smart
=
False
,
g_default_gradient_clipping_threshold
=
None
,
g_default_device
=
None
,
g_default_update_hooks
=
None
,
g_default_compact_func
=
None
,
g_config
=
TrainerConfig
(),
g_layer_map
=
{},
g_parameter_map
=
{},
g_extended_config_funcs
=
{},
# store command args of paddle_trainer
g_command_config_args
=
{},
g_command_config_args
=
{},
# Used for PyDataProvider to avoid duplicate module name
g_py_module_name_list
=
[],
g_current_submodel
=
None
,
g_root_submodel
=
None
,
g_submodel_map
=
{},
g_submodel_stack
=
[],
g_add_submodel_suffix
=
False
,
):
g_py_module_name_list
=
[],
g_current_submodel
=
None
,
g_root_submodel
=
None
,
g_submodel_map
=
{},
g_submodel_stack
=
[],
g_add_submodel_suffix
=
False
,
):
for
k
,
v
in
locals
().
iteritems
():
globals
()[
k
]
=
copy
.
deepcopy
(
v
)
...
...
@@ -161,43 +155,54 @@ def config_assert(b, msg):
if
not
b
:
logger
.
fatal
(
msg
)
g_config_funcs
=
{}
# decorator for indicating a function which can be used in config file
def
config_func
(
func
):
g_config_funcs
[
func
.
func_name
]
=
func
return
func
# decorator for indicating a class which can be used in config file
def
config_class
(
cls
):
g_config_funcs
[
cls
.
__name__
]
=
cls
return
cls
# decorator for indicating a class for a layer type
def
config_layer
(
layer_type
):
def
wrap
(
cls
):
g_config_funcs
[
cls
.
__name__
]
=
cls
g_layer_type_map
[
layer_type
]
=
cls
return
cls
return
wrap
def
gen_parameter_name
(
layer_name
,
input_index
):
return
'_%s.w%d'
%
(
layer_name
,
input_index
)
def
gen_bias_parameter_name
(
layer_name
):
return
'_%s.wbias'
%
layer_name
def
default
(
x
,
default_value
):
return
default_value
if
x
is
None
else
x
class
Cfg
(
object
):
def
add_keys
(
self
,
locals
):
for
k
,
v
in
locals
.
iteritems
():
if
not
k
.
startswith
(
'_'
):
self
.
__setattr__
(
k
,
v
)
# functions available in config file
# Define the name of the input layers of the NeuralNetwork.
# The type of these layers must be "data".
# These layers will be provided with the DataBatch obtained
...
...
@@ -216,6 +221,7 @@ def Inputs(*args):
if
g_current_submodel
is
g_root_submodel
:
g_config
.
model_config
.
input_layer_names
.
append
(
name
)
@
config_func
def
HasInputsSet
():
return
len
(
g_current_submodel
.
input_layer_names
)
!=
0
...
...
@@ -254,36 +260,42 @@ def SubModelBegin(name):
g_submodel_map
[
name
]
=
sub_model
g_current_submodel
=
sub_model
@
config_func
def
SubModelEnd
(
name
=
None
):
def
SubModelEnd
(
name
=
None
):
global
g_current_submodel
,
g_root_submodel
,
g_submodel_stack
config_assert
(
g_current_submodel
is
not
g_root_submodel
,
"submodel not begin"
)
config_assert
(
g_current_submodel
is
not
g_root_submodel
,
"submodel not begin"
)
if
name
is
not
None
:
config_assert
(
g_current_submodel
.
name
==
MakeLayerNameInParentSubmodel
(
name
),
config_assert
(
g_current_submodel
.
name
==
MakeLayerNameInParentSubmodel
(
name
),
"submodel name error"
)
g_current_submodel
=
g_submodel_stack
.
pop
()
def
MakeLayerNameInParentSubmodel
(
name
):
suffix
=
""
if
len
(
g_submodel_stack
)
>
1
:
suffix
=
"@"
+
g_submodel_stack
[
-
1
].
name
return
name
+
suffix
def
GetLayerBaseName
(
name
):
return
name
.
split
(
'@'
)[
0
]
def
MakeLayerNameInSubmodel
(
name
,
submodel_name
=
None
):
def
MakeLayerNameInSubmodel
(
name
,
submodel_name
=
None
):
global
g_current_submodel
global
g_add_submodel_suffix
if
(
submodel_name
is
None
and
not
g_add_submodel_suffix
and
not
g_current_submodel
.
is_recurrent_layer_group
):
if
(
submodel_name
is
None
and
not
g_add_submodel_suffix
and
not
g_current_submodel
.
is_recurrent_layer_group
):
return
name
if
submodel_name
is
None
:
submodel_name
=
g_current_submodel
.
name
return
name
+
"@"
+
submodel_name
# Define a recurrent layer group begin with RecurrentLayerGroupBegin
# and end with RecurrentLayerGroupEnd.
# A recurrent layer group forward/backward one frame after previous frame
...
...
@@ -332,8 +344,10 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
if
in_links_count
==
0
:
in_links_has_subseq
=
has_subseq
else
:
config_assert
(
in_links_has_subseq
==
has_subseq
,
"The sequence type of in_links should be the same in RecurrentLayerGroup"
)
config_assert
(
in_links_has_subseq
==
has_subseq
,
"The sequence type of in_links should be the same in RecurrentLayerGroup"
)
in_links_count
+=
1
layer_name
=
MakeLayerNameInParentSubmodel
(
name
)
layer
=
g_layer_map
[
layer_name
]
...
...
@@ -347,6 +361,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
pair
.
link_name
=
MakeLayerNameInSubmodel
(
name
)
pair
.
has_subseq
=
has_subseq
@
config_func
def
RecurrentLayerGroupSetOutLink
(
link
):
if
isinstance
(
link
,
basestring
):
...
...
@@ -363,8 +378,7 @@ def RecurrentLayerGroupSetOutLink(link):
def
RecurrentLayerGroupSetGenerator
(
generator
=
None
):
generator
.
eos_layer_name
=
MakeLayerNameInSubmodel
(
generator
.
eos_layer_name
)
generator
.
eos_layer_name
=
MakeLayerNameInSubmodel
(
generator
.
eos_layer_name
)
g_current_submodel
.
generator
.
CopyFrom
(
generator
)
...
...
@@ -375,23 +389,20 @@ def RecurrentLayerGroupBegin(name,
generator
=
None
,
target_inlinkname
=
""
,
seq_reversed
=
False
):
RecurrentLayerGroupWithoutOutLinksBegin
(
name
,
in_links
,
seq_reversed
,
RecurrentLayerGroupWithoutOutLinksBegin
(
name
,
in_links
,
seq_reversed
,
target_inlinkname
)
for
link
in
out_links
:
RecurrentLayerGroupSetOutLink
(
link
)
if
generator
is
not
None
:
RecurrentLayerGroupSetGenerator
(
generator
)
config_assert
(
len
(
in_links
)
==
0
,
"no in_links should be passed to generator"
)
config_assert
(
len
(
out_links
)
>=
1
,
config_assert
(
len
(
in_links
)
==
0
,
"no in_links should be passed to generator"
)
config_assert
(
len
(
out_links
)
>=
1
,
"one or more than one out_links should be passed to generator"
)
@
config_func
def
RecurrentLayerGroupEnd
(
name
):
global
g_current_submodel
...
...
@@ -399,7 +410,8 @@ def RecurrentLayerGroupEnd(name):
"RecurrentLayerGroup not begin"
)
for
pair
in
g_current_submodel
.
memories
:
#check exist
layer
=
g_layer_map
[
pair
.
layer_name
]
config_assert
(
layer
is
not
None
,
"memory declare wrong name:%s"
%
pair
.
layer_name
)
config_assert
(
layer
is
not
None
,
"memory declare wrong name:%s"
%
pair
.
layer_name
)
memory_link
=
g_layer_map
[
pair
.
link_name
]
config_assert
(
layer
.
size
==
memory_link
.
size
,
"memory declare wrong size:%d"
%
memory_link
.
size
)
...
...
@@ -418,12 +430,14 @@ def RecurrentLayerGroupEnd(name):
else
:
GatherAgentLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
# Define the model type
# currently, the paddle supports "nn", "recurrent_nn", "recursive_nn" and "multi_nn"
@
config_func
def
model_type
(
name
):
g_config
.
model_config
.
type
=
name
@
config_class
class
Bias
(
Cfg
):
def
__init__
(
...
...
@@ -441,10 +455,10 @@ class Bias(Cfg):
sparse_remote_update
=
None
,
gradient_clipping_threshold
=
None
,
is_static
=
None
,
is_shared
=
None
,
):
is_shared
=
None
,
):
self
.
add_keys
(
locals
())
# Define one input for a layer
@
config_class
class
Input
(
Cfg
):
...
...
@@ -477,19 +491,20 @@ class Input(Cfg):
is_static
=
None
,
is_shared
=
None
,
update_hooks
=
None
,
input_layer_argument
=
None
,
):
input_layer_argument
=
None
,
):
self
.
add_keys
(
locals
())
self
.
input_layer_name
=
MakeLayerNameInSubmodel
(
input_layer_name
)
# Define a projection for iexed layer
@
config_class
class
Projection
(
Input
):
type
=
None
# subclass should set it correctly
def
__init__
(
self
,
input_layer_name
,
size
=
0
,
# projection output size
size
=
0
,
# projection output size
parameter_name
=
None
,
learning_rate
=
None
,
momentum
=
None
,
...
...
@@ -509,8 +524,7 @@ class Projection(Input):
is_static
=
None
,
is_shared
=
None
,
update_hooks
=
None
,
input_layer_argument
=
None
,
):
input_layer_argument
=
None
,
):
self
.
add_keys
(
locals
())
self
.
input_layer_name
=
MakeLayerNameInSubmodel
(
input_layer_name
)
...
...
@@ -524,8 +538,10 @@ class Projection(Input):
# to indicate using the size from Layer config
def
calc_output_size
(
self
,
input_layer_config
):
return
self
.
size
def
calc_parameter_size
(
self
,
input_size
,
output_size
):
raise
NotimplementedError
def
calc_parameter_dims
(
self
,
input_size
,
output_size
):
raise
NotimplementedError
...
...
@@ -536,31 +552,32 @@ class IdentityProjection(Projection):
def
calc_output_size
(
self
,
input_layer_config
):
return
input_layer_config
.
size
def
calc_parameter_size
(
self
,
input_size
,
output_size
):
return
0
def
calc_parameter_dims
(
self
,
input_size
,
output_size
):
return
[]
# Like IdentityProjection, but layer size may smaller than input size,
# the projection select dimesions [offset, offset+layer_size) from input
@
config_class
class
IdentityOffsetProjection
(
Projection
):
type
=
'identity_offset'
def
__init__
(
self
,
input_layer_name
,
offset
,
**
xargs
):
super
(
IdentityOffsetProjection
,
self
).
__init__
(
input_layer_name
,
**
xargs
)
def
__init__
(
self
,
input_layer_name
,
offset
,
**
xargs
):
super
(
IdentityOffsetProjection
,
self
).
__init__
(
input_layer_name
,
**
xargs
)
self
.
proj_conf
.
offset
=
offset
def
calc_parameter_size
(
self
,
input_size
,
output_size
):
return
0
def
calc_parameter_dims
(
self
,
input_size
,
output_size
):
return
[]
# DotMulProjection performs element-wise multiplication with weight
@
config_class
class
DotMulProjection
(
Projection
):
...
...
@@ -568,49 +585,53 @@ class DotMulProjection(Projection):
def
calc_output_size
(
self
,
input_layer_config
):
return
input_layer_config
.
size
def
calc_parameter_size
(
self
,
input_size
,
output_size
):
return
output_size
def
calc_parameter_dims
(
self
,
input_size
,
output_size
):
return
[
1
,
output_size
]
@
config_class
class
TableProjection
(
Projection
):
type
=
'table'
def
calc_parameter_size
(
self
,
input_size
,
output_size
):
return
input_size
*
output_size
def
calc_parameter_dims
(
self
,
input_size
,
output_size
):
return
[
input_size
,
output_size
]
@
config_class
class
FullMatrixProjection
(
Projection
):
type
=
'fc'
def
calc_parameter_size
(
self
,
input_size
,
output_size
):
return
input_size
*
output_size
def
calc_parameter_dims
(
self
,
input_size
,
output_size
):
return
[
input_size
,
output_size
]
@
config_class
class
TransposedFullMatrixProjection
(
Projection
):
type
=
'trans_fc'
def
calc_parameter_size
(
self
,
input_size
,
output_size
):
return
input_size
*
output_size
def
calc_parameter_dims
(
self
,
input_size
,
output_size
):
return
[
output_size
,
input_size
]
@
config_class
class
ContextProjection
(
Projection
):
type
=
'context'
def
__init__
(
self
,
input_layer_name
,
context_start
,
context_length
,
trainable_padding
,
**
xargs
):
def
__init__
(
self
,
input_layer_name
,
context_start
,
context_length
,
trainable_padding
,
**
xargs
):
super
(
ContextProjection
,
self
).
__init__
(
input_layer_name
,
**
xargs
)
self
.
proj_conf
.
context_start
=
context_start
self
.
proj_conf
.
context_length
=
context_length
...
...
@@ -638,8 +659,7 @@ class ContextProjection(Projection):
class
ConvProjection
(
Projection
):
type
=
'conv'
def
__init__
(
self
,
def
__init__
(
self
,
input_layer_name
,
num_filters
=
None
,
conv_conf
=
None
,
...
...
@@ -649,12 +669,11 @@ class ConvProjection(Projection):
if
num_filters
is
not
None
:
self
.
proj_conf
.
num_filters
=
num_filters
parse_conv
(
conv_conf
,
input_layer_name
,
self
.
proj_conf
.
conv_conf
,
parse_conv
(
conv_conf
,
input_layer_name
,
self
.
proj_conf
.
conv_conf
,
num_filters
)
# TODO: support rectangle input
self
.
proj_conf
.
output_size
=
(
self
.
proj_conf
.
conv_conf
.
output_x
**
2
)
*
num_filters
self
.
proj_conf
.
output_size
=
(
self
.
proj_conf
.
conv_conf
.
output_x
**
2
)
*
num_filters
def
calc_output_size
(
self
,
input_layer_config
):
return
self
.
proj_conf
.
output_size
...
...
@@ -672,14 +691,15 @@ class ConvProjection(Projection):
def
calc_parameter_dims
(
self
,
input_size
,
output_size
):
return
None
# Define a operator for mixed layer
@
config_class
class
Operator
(
Cfg
):
type
=
None
# subclass should set it correctly
def
__init__
(
self
,
input_layer_names
,
):
input_layer_names
,
):
self
.
add_keys
(
locals
())
self
.
operator_conf
=
OperatorConfig
()
self
.
operator_conf
.
type
=
self
.
type
...
...
@@ -690,16 +710,13 @@ class Operator(Cfg):
def
calc_output_size
(
self
,
input_sizes
):
return
0
@
config_class
class
DotMulOperator
(
Operator
):
type
=
'dot_mul'
def
__init__
(
self
,
input_layer_names
,
scale
=
None
,
**
xargs
):
super
(
DotMulOperator
,
self
).
__init__
(
input_layer_names
,
**
xargs
)
def
__init__
(
self
,
input_layer_names
,
scale
=
None
,
**
xargs
):
super
(
DotMulOperator
,
self
).
__init__
(
input_layer_names
,
**
xargs
)
if
scale
is
not
None
:
self
.
operator_conf
.
dotmul_scale
=
scale
...
...
@@ -715,26 +732,24 @@ class DotMulOperator(Operator):
return
input_sizes
[
0
]
@
config_class
class
ConvOperator
(
Operator
):
type
=
'conv'
def
__init__
(
self
,
def
__init__
(
self
,
input_layer_names
,
num_filters
=
None
,
conv_conf
=
None
,
**
xargs
):
super
(
ConvOperator
,
self
).
__init__
(
input_layer_names
,
**
xargs
)
super
(
ConvOperator
,
self
).
__init__
(
input_layer_names
,
**
xargs
)
if
num_filters
is
not
None
:
self
.
operator_conf
.
num_filters
=
num_filters
parse_conv
(
conv_conf
,
MakeLayerNameInSubmodel
(
input_layer_names
[
0
]),
self
.
operator_conf
.
conv_conf
,
num_filters
)
self
.
operator_conf
.
output_size
=
(
self
.
operator_conf
.
conv_conf
.
output_x
**
2
)
*
num_filters
self
.
operator_conf
.
conv_conf
,
num_filters
)
self
.
operator_conf
.
output_size
=
(
self
.
operator_conf
.
conv_conf
.
output_x
**
2
)
*
num_filters
config_assert
(
len
(
input_layer_names
)
==
2
,
"Conv is binary operator"
)
...
...
@@ -745,20 +760,19 @@ class ConvOperator(Operator):
# please refer to the comments in proto/ModelConfig.proto
@
config_class
class
Conv
(
Cfg
):
def
__init__
(
self
,
def
__init__
(
self
,
filter_size
,
channels
,
padding
=
None
,
stride
=
None
,
groups
=
None
,
filter_channels
=
None
,
output_x
=
None
,
img_size
=
None
,
caffe_mode
=
True
,
filter_size_y
=
None
,
padding_y
=
None
,
stride_y
=
None
):
padding
=
None
,
stride
=
None
,
groups
=
None
,
filter_channels
=
None
,
output_x
=
None
,
img_size
=
None
,
caffe_mode
=
True
,
filter_size_y
=
None
,
padding_y
=
None
,
stride_y
=
None
):
self
.
add_keys
(
locals
())
if
filter_size_y
is
None
:
self
.
filter_size_y
=
filter_size
...
...
@@ -769,95 +783,83 @@ class Conv(Cfg):
if
output_x
is
not
None
:
config_assert
(
output_x
<=
0
)
# please refer to the comments in proto/ModelConfig.proto
@
config_class
class
BilinearInterp
(
Cfg
):
def
__init__
(
self
,
out_size_x
=
None
,
out_size_y
=
None
,
num_channels
=
None
):
def
__init__
(
self
,
out_size_x
=
None
,
out_size_y
=
None
,
num_channels
=
None
):
self
.
add_keys
(
locals
())
# please refer to the comments in proto/ModelConfig.proto
@
config_class
class
Pool
(
Cfg
):
def
__init__
(
self
,
def
__init__
(
self
,
pool_type
,
channels
,
size_x
,
size_y
=
None
,
img_width
=
None
,
start
=
None
,
stride
=
None
,
stride_y
=
None
,
padding
=
None
,
padding_y
=
None
):
size_y
=
None
,
img_width
=
None
,
start
=
None
,
stride
=
None
,
stride_y
=
None
,
padding
=
None
,
padding_y
=
None
):
self
.
add_keys
(
locals
())
# please refer to the comments in proto/ModelConfig.proto
@
config_class
class
SpatialPyramidPool
(
Cfg
):
def
__init__
(
self
,
pool_type
,
pyramid_height
,
channels
,
img_width
=
None
):
def
__init__
(
self
,
pool_type
,
pyramid_height
,
channels
,
img_width
=
None
):
self
.
add_keys
(
locals
())
# please refer to the comments in proto/ModelConfig.proto
@
config_class
class
Norm
(
Cfg
):
def
__init__
(
self
,
def
__init__
(
self
,
norm_type
,
channels
,
size
,
scale
,
pow
,
output_x
=
None
,
img_size
=
None
,
blocked
=
None
):
output_x
=
None
,
img_size
=
None
,
blocked
=
None
):
self
.
add_keys
(
locals
())
# please refer to the comments in proto/ModelConfig.proto
@
config_class
class
Image
(
Cfg
):
def
__init__
(
self
,
channels
,
img_size
=
None
):
def
__init__
(
self
,
channels
,
img_size
=
None
):
self
.
add_keys
(
locals
())
@
config_class
class
BlockExpand
(
Cfg
):
def
__init__
(
self
,
def
__init__
(
self
,
channels
,
padding_x
=
0
,
padding_y
=
0
,
stride_x
=
0
,
stride_y
=
0
,
block_x
=
0
,
block_y
=
0
,
img_size_x
=
0
,
img_size_y
=
0
,
output_x
=
0
,
output_y
=
0
):
padding_x
=
0
,
padding_y
=
0
,
stride_x
=
0
,
stride_y
=
0
,
block_x
=
0
,
block_y
=
0
,
img_size_x
=
0
,
img_size_y
=
0
,
output_x
=
0
,
output_y
=
0
):
self
.
add_keys
(
locals
())
@
config_class
class
MaxOut
(
Cfg
):
def
__init__
(
self
,
channels
,
groups
,
img_size_x
=
0
,
img_size_y
=
0
):
def
__init__
(
self
,
channels
,
groups
,
img_size_x
=
0
,
img_size_y
=
0
):
self
.
add_keys
(
locals
())
def
DataBase
(
async_load_data
=
False
,
constant_slots
=
None
,
data_ratio
=
1
,
...
...
@@ -871,19 +873,19 @@ def DataBase(async_load_data=False,
if
constant_slots
:
data_config
.
constant_slots
.
extend
(
constant_slots
)
data_config
.
data_ratio
=
data_ratio
data_config
.
is_main_data
=
is_main_data
data_config
.
data_ratio
=
data_ratio
data_config
.
is_main_data
=
is_main_data
usage_ratio
=
default
(
usage_ratio
,
settings_deprecated
[
"usage_ratio"
])
usage_ratio
=
default
(
usage_ratio
,
settings_deprecated
[
"usage_ratio"
])
config_assert
(
usage_ratio
>=
0
and
usage_ratio
<=
1
,
"The range of usage_ratio is [0, 1]"
)
data_config
.
usage_ratio
=
usage_ratio
return
data_config
@
config_func
def
SimpleData
(
files
=
None
,
def
SimpleData
(
files
=
None
,
feat_dim
=
None
,
context_len
=
None
,
buffer_capacity
=
None
,
...
...
@@ -898,9 +900,9 @@ def SimpleData(
data_config
.
buffer_capacity
=
buffer_capacity
return
data_config
@
config_func
def
PyData
(
files
=
None
,
def
PyData
(
files
=
None
,
type
=
None
,
file_group_queue_capacity
=
None
,
load_data_module
=
None
,
...
...
@@ -913,16 +915,21 @@ def PyData(
data_config
=
DataBase
(
**
xargs
)
data_config
.
type
=
'py'
if
load_data_module
in
g_py_module_name_list
:
def
get_path
(
module
):
m
=
__import__
(
load_data_module
)
return
os
.
path
.
split
(
os
.
path
.
realpath
(
m
.
__file__
))[
0
]
# python C-api is not thread safe, one module can only be import once,
# so here we nedd to copy the module with different names if it has to be
# imported several times.
module_new_name
=
"%s_copy_%d"
%
(
load_data_module
,
len
(
g_py_module_name_list
))
module_new_name
=
"%s_copy_%d"
%
(
load_data_module
,
len
(
g_py_module_name_list
))
g_py_module_name_list
.
append
(
module_new_name
)
module_path
=
"%s/%s.py"
%
(
get_path
(
load_data_module
),
load_data_module
)
new_module_path
=
"%s/%s.py"
%
(
get_path
(
load_data_module
),
module_new_name
)
module_path
=
"%s/%s.py"
%
(
get_path
(
load_data_module
),
load_data_module
)
new_module_path
=
"%s/%s.py"
%
(
get_path
(
load_data_module
),
module_new_name
)
if
os
.
path
.
isfile
(
module_path
)
==
False
:
raise
Exception
(
"File %s is not exist."
%
module_path
)
shutil
.
copy2
(
module_path
,
new_module_path
)
...
...
@@ -947,9 +954,9 @@ def PyData(
data_config
.
constant_slots
.
extend
(
constant_slots
)
return
data_config
@
config_func
def
ProtoData
(
files
=
None
,
def
ProtoData
(
files
=
None
,
type
=
None
,
file_group_queue_capacity
=
None
,
load_file_count
=
None
,
...
...
@@ -976,19 +983,18 @@ def ProtoData(
data_config
.
constant_slots
.
extend
(
constant_slots
)
return
data_config
#real data for training is actually provided by "sub_data" data providers.
@
config_func
def
MultiData
(
sub_data
=
[]
):
def
MultiData
(
sub_data
=
[]):
data_config
=
DataConfig
()
data_config
.
type
=
'multi'
data_config
.
sub_data_configs
.
extend
(
sub_data
)
return
data_config
@
config_func
def
Data
(
type
,
def
Data
(
type
,
files
=
None
,
feat_dim
=
None
,
slot_dims
=
None
,
...
...
@@ -1030,15 +1036,19 @@ def TestData(data_config, async_load_data=None):
" Data definition"
)
g_config
.
test_data_config
.
async_load_data
=
async_load_data
def
parse_bilinear
(
bilinear
,
input_layer_name
,
bilinear_conf
):
bilinear_conf
.
out_size_x
=
bilinear
.
out_size_x
;
bilinear_conf
.
out_size_y
=
bilinear
.
out_size_y
;
bilinear_conf
.
num_channels
=
bilinear
.
num_channels
;
bilinear_conf
.
out_size_x
=
bilinear
.
out_size_x
bilinear_conf
.
out_size_y
=
bilinear
.
out_size_y
bilinear_conf
.
num_channels
=
bilinear
.
num_channels
'''
caffe_mode: compute the output size using floor instead of ceil,
which is consistent of caffe and CuDNN's convention.
'''
def
cnn_output_size
(
img_size
,
filter_size
,
padding
,
stride
,
caffe_mode
):
output
=
(
2
*
padding
+
img_size
-
filter_size
)
/
float
(
stride
)
if
caffe_mode
:
...
...
@@ -1046,10 +1056,13 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
else
:
return
1
+
int
(
math
.
ceil
(
output
))
'''
calcualte image_size based on output_size for convolution.
It is the reverse function of cnn_output_size
'''
def
cnn_image_size
(
output_size
,
filter_size
,
padding
,
stride
,
caffe_mode
):
if
caffe_mode
:
img_size
=
(
output_size
-
1
)
*
stride
+
filter_size
-
2
*
padding
...
...
@@ -1057,70 +1070,75 @@ def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode):
img_size
=
(
output_size
-
2
)
*
stride
+
filter_size
-
2
*
padding
+
1
return
img_size
def
parse_pool
(
pool
,
input_layer_name
,
pool_conf
):
pool_conf
.
pool_type
=
pool
.
pool_type
config_assert
(
pool
.
pool_type
in
[
'max-projection'
,
'avg-projection'
,
'cudnn-max-pool'
,
'cudnn-avg-pool'
],
"pool-type %s is not in "
config_assert
(
pool
.
pool_type
in
[
'max-projection'
,
'avg-projection'
,
'cudnn-max-pool'
,
'cudnn-avg-pool'
],
"pool-type %s is not in "
"['max-projection', 'avg-projection', "
"'cudnn-max-pool', 'cudnn-avg-pool']"
%
pool
.
pool_type
)
"'cudnn-max-pool', 'cudnn-avg-pool']"
%
pool
.
pool_type
)
pool_conf
.
channels
=
pool
.
channels
pool_conf
.
size_x
=
pool
.
size_x
pool_conf
.
stride
=
pool
.
stride
pool_conf
.
size_y
=
default
(
pool
.
size_y
,
pool_conf
.
size_x
)
pool_conf
.
stride_y
=
default
(
pool
.
stride_y
,
pool_conf
.
stride
)
;
pool_conf
.
stride_y
=
default
(
pool
.
stride_y
,
pool_conf
.
stride
)
img_pixels
=
g_layer_map
[
input_layer_name
].
size
/
pool
.
channels
# the img_width may be removed,
# and it can be calculated automatically later.
pool_conf
.
img_size
=
default
(
pool
.
img_width
,
int
(
img_pixels
**
0.5
))
pool_conf
.
img_size
=
default
(
pool
.
img_width
,
int
(
img_pixels
**
0.5
))
pool_conf
.
img_size_y
=
img_pixels
/
pool_conf
.
img_size
config_assert
(
pool_conf
.
img_size
*
pool_conf
.
img_size_y
==
img_pixels
,
"Incorrect input image size %d for input image pixels %d"
%
(
pool_conf
.
img_size
,
img_pixels
))
"Incorrect input image size %d for input image pixels %d"
%
(
pool_conf
.
img_size
,
img_pixels
))
config_assert
(
not
pool
.
start
,
"start is deprecated in pooling."
)
if
pool
.
padding
is
not
None
:
pool_conf
.
padding
=
pool
.
padding
pool_conf
.
padding_y
=
default
(
pool
.
padding_y
,
pool_conf
.
padding
)
pool_conf
.
output_x
=
cnn_output_size
(
pool_conf
.
img_size
,
pool_conf
.
size_x
,
pool_conf
.
padding
,
pool_conf
.
stride
,
False
)
pool_conf
.
output_y
=
cnn_output_size
(
pool_conf
.
img_size_y
,
pool_conf
.
size_y
,
pool_conf
.
padding_y
,
pool_conf
.
stride_y
,
False
)
pool_conf
.
output_x
=
cnn_output_size
(
pool_conf
.
img_size
,
pool_conf
.
size_x
,
pool_conf
.
padding
,
pool_conf
.
stride
,
False
)
pool_conf
.
output_y
=
cnn_output_size
(
pool_conf
.
img_size_y
,
pool_conf
.
size_y
,
pool_conf
.
padding_y
,
pool_conf
.
stride_y
,
False
)
def
parse_spp
(
spp
,
input_layer_name
,
spp_conf
):
spp_conf
.
pool_type
=
spp
.
pool_type
config_assert
(
spp
.
pool_type
in
[
'max-projection'
,
'avg-projection'
],
"pool-type %s is not in "
"['max-projection', 'avg-projection']"
%
spp
.
pool_type
)
"pool-type %s is not in "
"['max-projection', 'avg-projection']"
%
spp
.
pool_type
)
spp_conf
.
pyramid_height
=
spp
.
pyramid_height
spp_conf
.
channels
=
spp
.
channels
img_pixels
=
g_layer_map
[
input_layer_name
].
size
/
spp_conf
.
channels
spp_conf
.
img_size
=
default
(
spp
.
img_width
,
int
(
img_pixels
**
0.5
))
spp_conf
.
img_size
=
default
(
spp
.
img_width
,
int
(
img_pixels
**
0.5
))
spp_conf
.
img_size_y
=
img_pixels
/
spp_conf
.
img_size
config_assert
(
spp_conf
.
img_size
*
spp_conf
.
img_size_y
==
img_pixels
,
"Incorrect input image size %d for input image pixels %d"
%
(
spp_conf
.
img_size
,
img_pixels
))
"Incorrect input image size %d for input image pixels %d"
%
(
spp_conf
.
img_size
,
img_pixels
))
def
parse_image
(
image
,
input_layer_name
,
image_conf
):
image_conf
.
channels
=
image
.
channels
image_pixels
=
g_layer_map
[
input_layer_name
].
size
/
image_conf
.
channels
image_conf
.
img_size
=
int
(
image_pixels
**
0.5
)
config_assert
((
image_conf
.
img_size
**
2
)
==
image_pixels
,
"Incorrect input image size %d for input image pixels %d"
%
(
image_conf
.
img_size
,
image_pixels
))
image_conf
.
img_size
=
int
(
image_pixels
**
0.5
)
config_assert
((
image_conf
.
img_size
**
2
)
==
image_pixels
,
"Incorrect input image size %d for input image pixels %d"
%
(
image_conf
.
img_size
,
image_pixels
))
def
parse_norm
(
norm
,
input_layer_name
,
norm_conf
):
norm_conf
.
norm_type
=
norm
.
norm_type
config_assert
(
norm
.
norm_type
in
[
'rnorm'
,
'cmrnorm-projection'
],
"norm-type %s is not in [rnorm, 'cmrnorm-projection']"
%
norm
.
norm_type
)
"norm-type %s is not in [rnorm, 'cmrnorm-projection']"
%
norm
.
norm_type
)
norm_conf
.
channels
=
norm
.
channels
norm_conf
.
size
=
norm
.
size
norm_conf
.
scale
=
norm
.
scale
...
...
@@ -1128,20 +1146,23 @@ def parse_norm(norm, input_layer_name, norm_conf):
norm_conf
.
blocked
=
norm
.
blocked
img_pixels
=
g_layer_map
[
input_layer_name
].
size
/
norm
.
channels
norm_conf
.
img_size
=
int
(
img_pixels
**
0.5
)
config_assert
((
norm_conf
.
img_size
**
2
)
==
img_pixels
,
"Incorrect input image size %d for input image pixels %d"
%
(
norm_conf
.
img_size
,
img_pixels
))
norm_conf
.
img_size
=
int
(
img_pixels
**
0.5
)
config_assert
((
norm_conf
.
img_size
**
2
)
==
img_pixels
,
"Incorrect input image size %d for input image pixels %d"
%
(
norm_conf
.
img_size
,
img_pixels
))
norm_conf
.
output_x
=
norm_conf
.
img_size
if
norm
.
norm_type
in
[
'cmrnorm-projection'
]:
norm_conf
.
scale
/=
norm
.
size
else
:
norm_conf
.
scale
/=
norm
.
size
**
2
norm_conf
.
scale
/=
norm
.
size
**
2
'''
caffe_mode: compute the output size using floor instead of ceil,
which is consistent of caffe and CuDNN's convention.
'''
def
parse_conv
(
conv
,
input_layer_name
,
conv_conf
,
num_filters
,
trans
=
False
):
conv_conf
.
filter_size
=
conv
.
filter_size
conv_conf
.
filter_size_y
=
conv
.
filter_size_y
...
...
@@ -1157,31 +1178,32 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
conv_conf
.
filter_channels
=
conv
.
channels
/
conv
.
groups
img_pixels
=
g_layer_map
[
input_layer_name
].
size
/
conv
.
channels
print
(
'channels=%d size=%d'
%
(
conv
.
channels
,
print
(
'channels=%d size=%d'
%
(
conv
.
channels
,
g_layer_map
[
input_layer_name
].
size
))
conv_conf
.
img_size
=
int
(
img_pixels
**
0.5
)
config_assert
((
conv_conf
.
img_size
**
2
)
==
img_pixels
,
(
"Input layer %s: Incorrect input image size %d for input "
+
"image pixels %d"
)
%
(
input_layer_name
,
conv_conf
.
img_size
,
img_pixels
))
conv_conf
.
img_size
=
int
(
img_pixels
**
0.5
)
config_assert
((
conv_conf
.
img_size
**
2
)
==
img_pixels
,
(
"Input layer %s: Incorrect input image size %d for input "
+
"image pixels %d"
)
%
(
input_layer_name
,
conv_conf
.
img_size
,
img_pixels
))
conv_conf
.
output_x
=
cnn_output_size
(
conv_conf
.
img_size
,
conv_conf
.
filter_size
,
conv_conf
.
padding
,
conv_conf
.
stride
,
conv_conf
.
caffe_mode
)
conv_conf
.
img_size
,
conv_conf
.
filter_size
,
conv_conf
.
padding
,
conv_conf
.
stride
,
conv_conf
.
caffe_mode
)
else
:
conv_conf
.
filter_channels
=
num_filters
/
conv
.
groups
outputSize
=
g_layer_map
[
input_layer_name
].
size
/
conv
.
channels
print
(
'channels=%d size=%d'
%
(
conv
.
channels
,
print
(
'channels=%d size=%d'
%
(
conv
.
channels
,
g_layer_map
[
input_layer_name
].
size
))
conv_conf
.
output_x
=
int
(
outputSize
**
0.5
)
config_assert
((
conv_conf
.
output_x
**
2
)
==
outputSize
,
(
"Input layer %s: Incorrect input image size %d for input "
+
"image pixels %d"
)
%
(
input_layer_name
,
conv_conf
.
output_x
,
outputSize
))
conv_conf
.
output_x
=
int
(
outputSize
**
0.5
)
config_assert
((
conv_conf
.
output_x
**
2
)
==
outputSize
,
(
"Input layer %s: Incorrect input image size %d for input "
+
"image pixels %d"
)
%
(
input_layer_name
,
conv_conf
.
output_x
,
outputSize
))
conv_conf
.
img_size
=
cnn_image_size
(
conv_conf
.
output_x
,
conv_conf
.
filter_size
,
conv_conf
.
padding
,
conv_conf
.
stride
,
conv_conf
.
caffe_mode
)
conv_conf
.
output_x
,
conv_conf
.
filter_size
,
conv_conf
.
padding
,
conv_conf
.
stride
,
conv_conf
.
caffe_mode
)
def
parse_block_expand
(
block_expand
,
input_layer_name
,
block_expand_conf
):
block_expand_conf
.
channels
=
block_expand
.
channels
...
...
@@ -1207,27 +1229,28 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
block_expand
.
img_size_y
,
block_expand
.
block_y
,
block_expand
.
padding_y
,
block_expand
.
stride_y
,
False
)
def
parse_maxout
(
maxout
,
input_layer_name
,
maxout_conf
):
maxout_conf
.
channels
=
maxout
.
channels
maxout_conf
.
groups
=
maxout
.
groups
maxout_conf
.
img_size_x
=
maxout
.
img_size_x
maxout_conf
.
img_size_y
=
maxout
.
img_size_y
# Define an evaluator
@
config_func
def
Evaluator
(
name
,
type
,
inputs
,
chunk_scheme
=
None
,
num_chunk_types
=
None
,
classification_threshold
=
None
,
positive_label
=
None
,
dict_file
=
None
,
result_file
=
None
,
num_results
=
None
,
delimited
=
None
,
):
chunk_scheme
=
None
,
num_chunk_types
=
None
,
classification_threshold
=
None
,
positive_label
=
None
,
dict_file
=
None
,
result_file
=
None
,
num_results
=
None
,
delimited
=
None
,
):
evaluator
=
g_config
.
model_config
.
evaluators
.
add
()
evaluator
.
type
=
type
evaluator
.
name
=
MakeLayerNameInSubmodel
(
name
)
...
...
@@ -1256,6 +1279,7 @@ def Evaluator(
if
delimited
is
not
None
:
evaluator
.
delimited
=
delimited
class
LayerBase
(
object
):
def
__init__
(
self
,
...
...
@@ -1307,8 +1331,8 @@ class LayerBase(object):
if
type_of
(
input
)
==
str
:
input_layer_name
=
input
input_config
=
Input
(
input_layer_name
=
input
,
parameter_name
=
gen_parameter_name
(
name
,
input_index
))
input_layer_name
=
input
,
parameter_name
=
gen_parameter_name
(
name
,
input_index
))
input_layer_name
=
input_config
.
input_layer_name
elif
isinstance
(
input
,
Input
):
input_layer_name
=
input
.
input_layer_name
...
...
@@ -1317,16 +1341,15 @@ class LayerBase(object):
input_config
.
parameter_name
=
\
gen_parameter_name
(
name
,
input_index
)
elif
isinstance
(
input
,
Operator
):
self
.
operators
.
append
(
input
)
;
self
.
operators
.
append
(
input
)
input
.
operator_conf
.
input_indices
.
append
(
input_index
)
input_config
=
Input
(
input
.
input_layer_names
[
0
])
input_layer_name
=
input_config
.
input_layer_name
else
:
raise
ValueError
(
'Wrong type for inputs: %s'
%
type_of
(
input
))
raise
ValueError
(
'Wrong type for inputs: %s'
%
type_of
(
input
))
config_assert
(
input_layer_name
in
g_layer_map
,
"Unknown input layer '%s' for layer %s"
%
(
input_layer_name
,
name
))
"Unknown input layer '%s' for layer %s"
%
(
input_layer_name
,
name
))
self
.
inputs
[
input_index
]
=
input_config
layer_input
=
self
.
config
.
inputs
.
add
()
layer_input
.
input_layer_name
=
input_config
.
input_layer_name
...
...
@@ -1338,7 +1361,6 @@ class LayerBase(object):
g_current_submodel
.
layer_names
.
append
(
self
.
config
.
name
)
def
get_input_layer
(
self
,
input_index
):
return
g_layer_map
[
self
.
config
.
inputs
[
input_index
].
input_layer_name
]
...
...
@@ -1347,8 +1369,8 @@ class LayerBase(object):
self
,
bias
,
# True/False or BiasCfg
size
,
dims
=
None
,
for_self
=
True
,
# whether create bias for layer self
dims
=
None
,
for_self
=
True
,
# whether create bias for layer self
):
if
size
==
0
:
...
...
@@ -1356,7 +1378,8 @@ class LayerBase(object):
if
dims
is
None
:
dims
=
[
1
,
size
]
config_assert
(
type_of
(
bias
)
==
bool
or
type_of
(
bias
)
==
Bias
,
config_assert
(
type_of
(
bias
)
==
bool
or
type_of
(
bias
)
==
Bias
,
'Incorrect type for bias: %s'
%
type_of
(
bias
))
if
type_of
(
bias
)
==
bool
:
...
...
@@ -1372,7 +1395,8 @@ class LayerBase(object):
Parameter
(
bias
.
parameter_name
,
size
,
self
.
config
.
device
if
self
.
config
.
HasField
(
'device'
)
else
None
,
self
.
config
.
device
if
self
.
config
.
HasField
(
'device'
)
else
None
,
dims
,
bias
.
learning_rate
,
bias
.
momentum
,
...
...
@@ -1384,22 +1408,21 @@ class LayerBase(object):
initial_smart
=
bias
.
initial_smart
,
num_batches_regularization
=
bias
.
num_batches_regularization
,
sparse_remote_update
=
bias
.
sparse_remote_update
,
gradient_clipping_threshold
=
bias
.
gradient_clipping_threshold
,
gradient_clipping_threshold
=
bias
.
gradient_clipping_threshold
,
is_static
=
bias
.
is_static
,
is_shared
=
bias
.
is_shared
,
)
is_shared
=
bias
.
is_shared
,
)
if
for_self
:
self
.
config
.
bias_parameter_name
=
bias
.
parameter_name
else
:
return
bias
.
parameter_name
def
create_input_parameter
(
self
,
def
create_input_parameter
(
self
,
input_index
,
size
,
dims
=
None
,
sparse
=
None
,
format
=
None
):
sparse
=
None
,
format
=
None
):
if
dims
is
None
:
# TODO(yuyang18): print warning and callstack here!
dims
=
list
()
...
...
@@ -1414,12 +1437,12 @@ class LayerBase(object):
if
input_config
.
parameter_name
in
g_parameter_map
:
para
=
g_parameter_map
[
input_config
.
parameter_name
]
config_assert
(
size
==
para
.
size
,
(
'Shared parameter "%s" does not '
+
'have same size: %s vs. %s'
)
config_assert
(
size
==
para
.
size
,
(
'Shared parameter "%s" does not '
+
'have same size: %s vs. %s'
)
%
(
input_config
.
parameter_name
,
para
.
size
,
size
))
config_assert
(
dims
==
para
.
dims
,
(
'Shared parameter "%s" does not '
+
'have same dims: %s vs. %s'
)
config_assert
(
dims
==
para
.
dims
,
(
'Shared parameter "%s" does not '
+
'have same dims: %s vs. %s'
)
%
(
input_config
.
parameter_name
,
para
.
dims
,
dims
))
return
...
...
@@ -1439,13 +1462,13 @@ class LayerBase(object):
num_batches_regularization
=
input_config
.
num_batches_regularization
,
sparse_remote_update
=
input_config
.
sparse_remote_update
,
sparse_update
=
input_config
.
sparse_update
,
gradient_clipping_threshold
=
input_config
.
gradient_clipping_threshold
,
gradient_clipping_threshold
=
input_config
.
gradient_clipping_threshold
,
sparse
=
sparse
,
format
=
format
,
is_static
=
input_config
.
is_static
,
is_shared
=
input_config
.
is_shared
,
update_hooks
=
input_config
.
update_hooks
)
update_hooks
=
input_config
.
update_hooks
)
def
set_layer_size
(
self
,
size
):
if
self
.
config
.
size
==
0
:
...
...
@@ -1455,27 +1478,18 @@ class LayerBase(object):
'Different inputs result in'
+
'different layer size at layer %s'
%
self
.
config
.
name
)
@
config_layer
(
'multi_class_cross_entropy_with_selfnorm'
)
class
MultiClassCrossEntropySelfNormCostLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
softmax_selfnorm_alpha
=
0.1
,
**
xargs
):
super
(
MultiClassCrossEntropySelfNormCostLayer
,
self
).
__init__
(
name
,
'multi_class_cross_entropy_with_selfnorm'
,
0
,
inputs
,
**
xargs
)
def
__init__
(
self
,
name
,
inputs
,
softmax_selfnorm_alpha
=
0.1
,
**
xargs
):
super
(
MultiClassCrossEntropySelfNormCostLayer
,
self
).
__init__
(
name
,
'multi_class_cross_entropy_with_selfnorm'
,
0
,
inputs
,
**
xargs
)
self
.
config
.
softmax_selfnorm_alpha
=
softmax_selfnorm_alpha
@
config_layer
(
'fc'
)
class
FCLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
inputs
,
bias
=
True
,
**
xargs
):
def
__init__
(
self
,
name
,
size
,
inputs
,
bias
=
True
,
**
xargs
):
super
(
FCLayer
,
self
).
__init__
(
name
,
'fc'
,
size
,
inputs
=
inputs
,
**
xargs
)
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
...
...
@@ -1489,13 +1503,14 @@ class FCLayer(LayerBase):
else
:
sparse
=
None
self
.
create_input_parameter
(
input_index
,
psize
,
dims
,
sparse
,
format
)
self
.
create_input_parameter
(
input_index
,
psize
,
dims
,
sparse
,
format
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
@
config_layer
(
'selective_fc'
)
class
SelectiveFCLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
size
,
inputs
,
...
...
@@ -1539,26 +1554,23 @@ class SelectiveFCLayer(LayerBase):
if
sparse
:
psize
=
self
.
inputs
[
input_index
].
nnz
self
.
create_input_parameter
(
input_index
,
psize
,
dims
,
sparse
,
format
)
self
.
create_input_parameter
(
input_index
,
psize
,
dims
,
sparse
,
format
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
@
config_layer
(
'print'
)
class
PrintLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
):
def
__init__
(
self
,
name
,
inputs
):
super
(
PrintLayer
,
self
).
__init__
(
name
,
'print'
,
0
,
inputs
)
@
config_layer
(
'data'
)
class
DataLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
DataLayer
,
self
).
__init__
(
name
,
'data'
,
size
,
inputs
=
[],
device
=
device
)
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
DataLayer
,
self
).
__init__
(
name
,
'data'
,
size
,
inputs
=
[],
device
=
device
)
'''
DataNormLayer: A layer for data normalization
...
...
@@ -1586,14 +1598,11 @@ Note:
min-max: y = (x-min)/(max-min)
decimal-scaling: y = x/10^j, where j is the smallest integer such that max(|y|)<1
'''
@
config_layer
(
'data_norm'
)
class
DataNormLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
data_norm_strategy
=
"z-score"
,
device
=
None
):
def
__init__
(
self
,
name
,
inputs
,
data_norm_strategy
=
"z-score"
,
device
=
None
):
super
(
DataNormLayer
,
self
).
__init__
(
name
,
'data_norm'
,
0
,
inputs
=
inputs
,
device
=
device
)
self
.
config
.
data_norm_strategy
=
data_norm_strategy
...
...
@@ -1605,15 +1614,12 @@ class DataNormLayer(LayerBase):
self
.
inputs
[
0
].
is_static
=
True
self
.
create_input_parameter
(
0
,
para_size
,
para_dims
)
@
config_layer
(
'prelu'
)
class
ParameterReluLayer
(
LayerBase
):
layer_type
=
'prelu'
def
__init__
(
self
,
name
,
inputs
,
partial_sum
=
1
,
**
args
):
def
__init__
(
self
,
name
,
inputs
,
partial_sum
=
1
,
**
args
):
super
(
ParameterReluLayer
,
self
).
__init__
(
name
,
self
.
layer_type
,
0
,
inputs
=
inputs
,
**
args
)
config_assert
(
len
(
self
.
inputs
)
==
1
)
...
...
@@ -1622,11 +1628,12 @@ class ParameterReluLayer(LayerBase):
self
.
set_layer_size
(
input_layer
.
size
)
self
.
create_input_parameter
(
0
,
input_layer
.
size
/
partial_sum
)
@
config_layer
(
'conv'
)
class
ConvLayerBase
(
LayerBase
):
layer_type
=
'conv'
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
=
[],
bias
=
True
,
...
...
@@ -1661,17 +1668,14 @@ class ConvLayerBase(LayerBase):
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
parse_conv
(
self
.
inputs
[
input_index
].
conv
,
input_layer
.
name
,
self
.
config
.
inputs
[
input_index
].
conv_conf
,
num_filters
)
parse_conv
(
self
.
inputs
[
input_index
].
conv
,
input_layer
.
name
,
self
.
config
.
inputs
[
input_index
].
conv_conf
,
num_filters
)
conv_conf
=
self
.
config
.
inputs
[
input_index
].
conv_conf
psize
=
self
.
calc_parameter_size
(
conv_conf
)
print
(
"output size for %s is %d "
%
(
name
,
conv_conf
.
output_x
))
self
.
create_input_parameter
(
input_index
,
psize
)
self
.
set_layer_size
(
(
conv_conf
.
output_x
**
2
)
*
self
.
config
.
num_filters
)
(
conv_conf
.
output_x
**
2
)
*
self
.
config
.
num_filters
)
psize
=
self
.
config
.
size
if
shared_biases
:
...
...
@@ -1682,10 +1686,12 @@ class ConvLayerBase(LayerBase):
return
self
.
config
.
num_filters
*
conv_conf
.
filter_channels
\
*
(
conv_conf
.
filter_size
*
conv_conf
.
filter_size_y
)
@
config_layer
(
'exconv'
)
class
ConvLayer
(
ConvLayerBase
):
layer_type
=
'exconv'
@
config_layer
(
'cudnn_conv'
)
class
ConvLayer
(
ConvLayerBase
):
layer_type
=
'cudnn_conv'
...
...
@@ -1694,8 +1700,8 @@ class ConvLayer(ConvLayerBase):
@
config_layer
(
'convt'
)
class
ConvTransLayerBase
(
LayerBase
):
layer_type
=
'convt'
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
=
[],
bias
=
True
,
...
...
@@ -1732,7 +1738,7 @@ class ConvTransLayerBase(LayerBase):
print
(
"output size for %s is %d "
%
(
name
,
conv_conf
.
output_x
))
self
.
create_input_parameter
(
input_index
,
psize
)
self
.
set_layer_size
(
(
conv_conf
.
img_size
**
2
)
*
self
.
config
.
num_filters
)
(
conv_conf
.
img_size
**
2
)
*
self
.
config
.
num_filters
)
psize
=
self
.
config
.
size
if
shared_biases
:
...
...
@@ -1743,70 +1749,61 @@ class ConvTransLayerBase(LayerBase):
return
conv_conf
.
channels
*
conv_conf
.
filter_channels
\
*
(
conv_conf
.
filter_size
*
conv_conf
.
filter_size_y
)
@
config_layer
(
'exconvt'
)
class
ConvTransLayer
(
ConvTransLayerBase
):
layer_type
=
'exconvt'
@
config_layer
(
'norm'
)
class
NormLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
NormLayer
,
self
).
__init__
(
name
,
'norm'
,
0
,
inputs
=
inputs
,
device
=
device
)
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
NormLayer
,
self
).
__init__
(
name
,
'norm'
,
0
,
inputs
=
inputs
,
device
=
device
)
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
parse_norm
(
self
.
inputs
[
input_index
].
norm
,
input_layer
.
name
,
parse_norm
(
self
.
inputs
[
input_index
].
norm
,
input_layer
.
name
,
self
.
config
.
inputs
[
input_index
].
norm_conf
)
norm_conf
=
self
.
config
.
inputs
[
input_index
].
norm_conf
self
.
set_layer_size
((
norm_conf
.
output_x
**
2
)
*
norm_conf
.
channels
)
self
.
set_layer_size
((
norm_conf
.
output_x
**
2
)
*
norm_conf
.
channels
)
@
config_layer
(
'pool'
)
class
PoolLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
PoolLayer
,
self
).
__init__
(
name
,
'pool'
,
0
,
inputs
=
inputs
,
device
=
device
)
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
PoolLayer
,
self
).
__init__
(
name
,
'pool'
,
0
,
inputs
=
inputs
,
device
=
device
)
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
parse_pool
(
self
.
inputs
[
input_index
].
pool
,
input_layer
.
name
,
parse_pool
(
self
.
inputs
[
input_index
].
pool
,
input_layer
.
name
,
self
.
config
.
inputs
[
input_index
].
pool_conf
)
pool_conf
=
self
.
config
.
inputs
[
input_index
].
pool_conf
print
(
"output size for %s is %d*%d "
%
(
name
,
pool_conf
.
output_y
,
pool_conf
.
output_x
))
self
.
set_layer_size
((
pool_conf
.
output_x
*
pool_conf
.
output_y
)
*
pool_conf
.
channels
)
print
(
"output size for %s is %d*%d "
%
(
name
,
pool_conf
.
output_y
,
pool_conf
.
output_x
))
self
.
set_layer_size
(
(
pool_conf
.
output_x
*
pool_conf
.
output_y
)
*
pool_conf
.
channels
)
@
config_layer
(
'spp'
)
class
SpatialPyramidPoolLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
SpatialPyramidPoolLayer
,
self
).
__init__
(
name
,
'spp'
,
0
,
inputs
=
inputs
,
device
=
device
)
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
SpatialPyramidPoolLayer
,
self
).
__init__
(
name
,
'spp'
,
0
,
inputs
=
inputs
,
device
=
device
)
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
parse_spp
(
self
.
inputs
[
input_index
].
spp
,
input_layer
.
name
,
parse_spp
(
self
.
inputs
[
input_index
].
spp
,
input_layer
.
name
,
self
.
config
.
inputs
[
input_index
].
spp_conf
)
spp_conf
=
self
.
config
.
inputs
[
input_index
].
spp_conf
output_size
=
(
pow
(
4
,
spp_conf
.
pyramid_height
)
-
1
)
/
(
4
-
1
)
print
(
"output size for %s is %d "
%
(
name
,
output_size
))
self
.
set_layer_size
(
output_size
*
spp_conf
.
channels
)
@
config_layer
(
'batch_norm'
)
class
BatchNormLayer
(
LayerBase
):
layer_type
=
'batch_norm'
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
,
active_type
=
"linear"
,
...
...
@@ -1820,8 +1817,8 @@ class BatchNormLayer(LayerBase):
inputs
=
[]
elif
not
isinstance
(
inputs
,
list
):
inputs
=
[
inputs
]
config_assert
(
len
(
inputs
)
==
1
,
"BatchNormLayer must have one and only one input"
)
config_assert
(
len
(
inputs
)
==
1
,
"BatchNormLayer must have one and only one input"
)
# Create Input for moving mean and std,
# in batch normalization layer.
# These paras no need to update, so set is_static is true.
...
...
@@ -1830,12 +1827,13 @@ class BatchNormLayer(LayerBase):
use_gpu
=
bool
(
int
(
g_command_config_args
.
get
(
"use_gpu"
,
0
)))
is_shared
=
True
if
not
use_gpu
else
False
for
i
in
xrange
(
2
):
inputs
.
append
(
Input
(
inputs
[
0
].
input_layer_name
,
inputs
.
append
(
Input
(
inputs
[
0
].
input_layer_name
,
initial_std
=
0.0
,
initial_mean
=
0.0
,
is_static
=
True
,
is_shared
=
is_shared
,
))
is_shared
=
is_shared
,
))
parallel_nn
=
bool
(
int
(
g_command_config_args
.
get
(
"parallel_nn"
,
0
)))
cudnn_version
=
int
(
g_command_config_args
.
get
(
"cudnn_version"
,
0
))
...
...
@@ -1845,21 +1843,25 @@ class BatchNormLayer(LayerBase):
((
not
parallel_nn
)
or
self
.
config
.
device
>
-
1
)
and
\
cudnn_version
>=
4007
self
.
layer_type
=
"cudnn_batch_norm"
if
use_cudnn
else
"batch_norm"
super
(
BatchNormLayer
,
self
).
__init__
(
name
,
self
.
layer_type
,
0
,
super
(
BatchNormLayer
,
self
).
__init__
(
name
,
self
.
layer_type
,
0
,
active_type
=
active_type
,
inputs
=
inputs
,
device
=
device
,
**
xargs
)
inputs
=
inputs
,
device
=
device
,
**
xargs
)
if
use_global_stats
is
not
None
:
self
.
config
.
use_global_stats
=
use_global_stats
if
moving_average_fraction
is
not
None
:
self
.
config
.
moving_average_fraction
=
moving_average_fraction
input_layer
=
self
.
get_input_layer
(
0
)
parse_image
(
self
.
inputs
[
0
].
image
,
input_layer
.
name
,
input_layer
=
self
.
get_input_layer
(
0
)
parse_image
(
self
.
inputs
[
0
].
image
,
input_layer
.
name
,
self
.
config
.
inputs
[
0
].
image_conf
)
image_conf
=
self
.
config
.
inputs
[
0
].
image_conf
self
.
set_layer_size
((
image_conf
.
img_size
**
2
)
*
image_conf
.
channels
)
self
.
set_layer_size
((
image_conf
.
img_size
**
2
)
*
image_conf
.
channels
)
psize
=
self
.
calc_parameter_size
(
image_conf
)
dims
=
[
1
,
psize
]
...
...
@@ -1872,75 +1874,74 @@ class BatchNormLayer(LayerBase):
def
calc_parameter_size
(
self
,
image_conf
):
return
image_conf
.
channels
@
config_layer
(
'trans'
)
class
TransLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
TransLayer
,
self
).
__init__
(
name
,
'trans'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
TransLayer
,
self
).
__init__
(
name
,
'trans'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'TransLayer must have one and only one input'
)
self
.
set_layer_size
(
self
.
get_input_layer
(
0
).
size
)
@
config_layer
(
'resize'
)
class
ResizeLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
inputs
,
device
=
None
):
super
(
ResizeLayer
,
self
).
__init__
(
name
,
'resize'
,
size
=
size
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
def
__init__
(
self
,
name
,
size
,
inputs
,
device
=
None
):
super
(
ResizeLayer
,
self
).
__init__
(
name
,
'resize'
,
size
=
size
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'ResizeLayer must have one and only one input'
)
@
config_layer
(
'blockexpand'
)
class
BlockExpandLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
BlockExpandLayer
,
self
).
__init__
(
name
,
'blockexpand'
,
0
,
inputs
=
inputs
,
device
=
device
)
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
BlockExpandLayer
,
self
).
__init__
(
name
,
'blockexpand'
,
0
,
inputs
=
inputs
,
device
=
device
)
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
parse_block_expand
(
self
.
inputs
[
input_index
].
block_expand
,
input_layer
.
name
,
parse_block_expand
(
self
.
inputs
[
input_index
].
block_expand
,
input_layer
.
name
,
self
.
config
.
inputs
[
input_index
].
block_expand_conf
)
block_expand_conf
=
self
.
config
.
inputs
[
input_index
].
block_expand_conf
self
.
set_layer_size
(
block_expand_conf
.
block_x
*
block_expand_conf
.
block_y
*
block_expand_conf
.
channels
)
block_expand_conf
=
self
.
config
.
inputs
[
input_index
].
block_expand_conf
self
.
set_layer_size
(
block_expand_conf
.
block_x
*
block_expand_conf
.
block_y
*
block_expand_conf
.
channels
)
@
config_layer
(
'maxout'
)
class
MaxOutLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
**
xargs
):
super
(
MaxOutLayer
,
self
).
__init__
(
name
,
'maxout'
,
0
,
inputs
=
inputs
,
**
xargs
)
def
__init__
(
self
,
name
,
inputs
,
**
xargs
):
super
(
MaxOutLayer
,
self
).
__init__
(
name
,
'maxout'
,
0
,
inputs
=
inputs
,
**
xargs
)
input_layer
=
self
.
get_input_layer
(
0
)
parse_maxout
(
self
.
inputs
[
0
].
maxout
,
input_layer
.
name
,
parse_maxout
(
self
.
inputs
[
0
].
maxout
,
input_layer
.
name
,
self
.
config
.
inputs
[
0
].
maxout_conf
)
maxout_conf
=
self
.
config
.
inputs
[
0
].
maxout_conf
self
.
set_layer_size
(
g_layer_map
[
input_layer
.
name
].
size
/
maxout_conf
.
groups
)
self
.
set_layer_size
(
g_layer_map
[
input_layer
.
name
].
size
/
maxout_conf
.
groups
)
# key: cost type
# value: cost class
g_cost_map
=
{}
# define a cost layer without any parameters
def
define_cost
(
class_name
,
cost_type
):
def
init
(
cls
,
name
,
inputs
,
device
=
None
,
coeff
=
1.
):
super
(
type
(
cls
),
cls
).
__init__
(
name
,
cost_type
,
1
,
inputs
,
device
=
device
,
coeff
=
coeff
)
super
(
type
(
cls
),
cls
).
__init__
(
name
,
cost_type
,
1
,
inputs
,
device
=
device
,
coeff
=
coeff
)
cls
=
type
(
class_name
,
(
LayerBase
,),
dict
(
__init__
=
init
))
cls
=
type
(
class_name
,
(
LayerBase
,
),
dict
(
__init__
=
init
))
global
g_cost_map
g_cost_map
[
cost_type
]
=
cls
define_cost
(
'MultiClassCrossEntropy'
,
'multi-class-cross-entropy'
)
define_cost
(
'RankingCost'
,
'rank-cost'
)
define_cost
(
'AucValidation'
,
'auc-validation'
)
...
...
@@ -1951,18 +1952,14 @@ define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
define_cost
(
'HuberTwoClass'
,
'huber'
)
define_cost
(
'SumCost'
,
'sum_cost'
)
@
config_layer
(
'hsigmoid'
)
class
HierarchicalSigmoidLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
num_classes
,
inputs
,
device
=
None
,
bias
=
True
):
def
__init__
(
self
,
name
,
num_classes
,
inputs
,
device
=
None
,
bias
=
True
):
super
(
HierarchicalSigmoidLayer
,
self
).
__init__
(
name
,
'hsigmoid'
,
1
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
>=
2
,
config_assert
(
len
(
self
.
inputs
)
>=
2
,
'HierarchicalSigmoidLayer must have at least 2 inputs'
)
self
.
config
.
num_classes
=
num_classes
for
input_index
in
xrange
(
len
(
self
.
inputs
)
-
1
):
...
...
@@ -1972,6 +1969,7 @@ class HierarchicalSigmoidLayer(LayerBase):
self
.
create_input_parameter
(
input_index
,
psize
,
dims
)
self
.
create_bias_parameter
(
bias
,
num_classes
-
1
)
'''
lambdaCost for lambdaRank LTR approach
...
...
@@ -1996,29 +1994,25 @@ Usage:
max_sort_size can be greater than the size of a list, in which
case the algorithm will sort the entire list to get gradient.
'''
@
config_layer
(
'lambda_cost'
)
class
LambdaCost
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
NDCG_num
=
5
,
max_sort_size
=
-
1
,
device
=
None
):
def
__init__
(
self
,
name
,
inputs
,
NDCG_num
=
5
,
max_sort_size
=-
1
,
device
=
None
):
super
(
LambdaCost
,
self
).
__init__
(
name
,
'lambda_cost'
,
1
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
2
,
'lambdaCost must have 2 inputs'
)
config_assert
(
len
(
self
.
inputs
)
==
2
,
'lambdaCost must have 2 inputs'
)
self
.
config
.
NDCG_num
=
NDCG_num
if
max_sort_size
!=
-
1
:
config_assert
(
NDCG_num
<=
max_sort_size
,
config_assert
(
NDCG_num
<=
max_sort_size
,
'NDCG_num must be less than or equal to max_sort_size'
)
self
.
config
.
max_sort_size
=
max_sort_size
@
config_layer
(
'nce'
)
class
NCELayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
num_classes
,
inputs
,
...
...
@@ -2027,15 +2021,17 @@ class NCELayer(LayerBase):
bias
=
True
,
**
xargs
):
super
(
NCELayer
,
self
).
__init__
(
name
,
'nce'
,
1
,
inputs
=
inputs
,
**
xargs
)
config_assert
(
len
(
self
.
inputs
)
>=
2
,
'NCELayer must have at least 2 inputs'
)
config_assert
(
len
(
self
.
inputs
)
>=
2
,
'NCELayer must have at least 2 inputs'
)
self
.
config
.
num_classes
=
num_classes
if
neg_sampling_dist
is
not
None
:
config_assert
(
len
(
neg_sampling_dist
)
==
num_classes
,
'len(neg_sampling_dist)(%s) is not same as num_classes (%s)'
%
(
len
(
neg_sampling_dist
),
num_classes
))
config_assert
(
len
(
neg_sampling_dist
)
==
num_classes
,
'len(neg_sampling_dist)(%s) is not same as num_classes (%s)'
%
(
len
(
neg_sampling_dist
),
num_classes
))
s
=
sum
(
neg_sampling_dist
)
config_assert
(
abs
(
s
-
1
)
<
1e-5
,
config_assert
(
abs
(
s
-
1
)
<
1e-5
,
'The sum of neg_sampling_dist (%s) is not 1'
%
s
)
self
.
config
.
neg_sampling_dist
.
extend
(
neg_sampling_dist
)
...
...
@@ -2047,8 +2043,8 @@ class NCELayer(LayerBase):
'Expecting the last input layer of an nce layer to be '
'a data layer'
)
if
(
num_real_inputs
>
1
and
input_layer
.
size
==
1
and
self
.
get_input_layer
(
num_real_inputs
-
1
).
type
==
'data'
):
if
(
num_real_inputs
>
1
and
input_layer
.
size
==
1
and
self
.
get_input_layer
(
num_real_inputs
-
1
).
type
==
'data'
):
# This input layer is assumed to be a sample weight layer
num_real_inputs
-=
1
...
...
@@ -2062,105 +2058,82 @@ class NCELayer(LayerBase):
@
config_layer
(
'addto'
)
class
AddToLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
bias
=
True
,
**
xargs
):
def
__init__
(
self
,
name
,
inputs
,
bias
=
True
,
**
xargs
):
super
(
AddToLayer
,
self
).
__init__
(
name
,
'addto'
,
0
,
inputs
=
inputs
,
**
xargs
)
config_assert
(
len
(
inputs
)
>
0
,
'inputs cannot be empty for AddToLayer'
)
config_assert
(
len
(
inputs
)
>
0
,
'inputs cannot be empty for AddToLayer'
)
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
self
.
set_layer_size
(
input_layer
.
size
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
@
config_layer
(
'agent'
)
class
AgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
AgentLayer
,
self
).
__init__
(
name
,
'agent'
,
size
,
inputs
=
[],
device
=
device
)
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
AgentLayer
,
self
).
__init__
(
name
,
'agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_agent'
)
class
SequenceAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceAgentLayer
,
self
).
__init__
(
name
,
'sequence_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'gather_agent'
)
class
GatherAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
GatherAgentLayer
,
self
).
__init__
(
name
,
'gather_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'scatter_agent'
)
class
ScatterAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
ScatterAgentLayer
,
self
).
__init__
(
name
,
'scatter_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_gather_agent'
)
class
SequenceGatherAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceGatherAgentLayer
,
self
).
__init__
(
name
,
'sequence_gather_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_scatter_agent'
)
class
SequenceScatterAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceScatterAgentLayer
,
self
).
__init__
(
name
,
'sequence_scatter_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'multiplex'
)
class
MultiplexLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
size
,
device
=
None
):
super
(
MultiplexLayer
,
self
).
__init__
(
name
,
'multiplex'
,
size
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
inputs
)
>
2
,
'MultiplexLayer should have more than 2 inputs.'
)
def
__init__
(
self
,
name
,
inputs
,
size
,
device
=
None
):
super
(
MultiplexLayer
,
self
).
__init__
(
name
,
'multiplex'
,
size
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
inputs
)
>
2
,
'MultiplexLayer should have more than 2 inputs.'
)
for
i
in
range
(
1
,
len
(
inputs
)):
config_assert
(
self
.
get_input_layer
(
i
).
size
==
size
,
config_assert
(
self
.
get_input_layer
(
i
).
size
==
size
,
"All the input layers except the first one should"
"have the same size as the MultiplexLayer."
)
@
config_func
def
Link
(
name
,
has_subseq
=
Fals
e
,
):
def
Link
(
nam
e
,
has_subseq
=
False
,
):
link_config
=
LinkConfig
()
link_config
.
link_name
=
name
link_config
.
has_subseq
=
has_subseq
return
link_config
# memory for recurrent layer group.
# *name* and *size* are actual layer's name and size.
# will return name of the memory,
...
...
@@ -2175,14 +2148,14 @@ def Link(name,
# can only be initailized by a *boot_layer* which is a sequence.
#
@
config_func
def
Memory
(
name
,
def
Memory
(
name
,
size
,
is_sequence
=
False
,
boot_layer
=
None
,
boot_bias
=
False
,
boot_bias_active_type
=
""
,
boot_with_const_id
=
None
,
):
boot_with_const_id
=
None
,
):
agent_name
=
name
+
"+delay1"
if
is_sequence
:
agent_layer
=
SequenceAgentLayer
(
agent_name
,
size
)
...
...
@@ -2194,24 +2167,27 @@ def Memory(name,
memory
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
memory
.
link_name
=
MakeLayerNameInSubmodel
(
agent_name
)
memory
.
is_sequence
=
is_sequence
options
=
sum
((
boot_layer
is
not
None
,
bool
(
boot_bias
),
options
=
sum
((
boot_layer
is
not
None
,
bool
(
boot_bias
),
boot_with_const_id
is
not
None
))
config_assert
(
options
<=
1
,
'take one option at most from boot_layer, boot_bias, or boot_with_const_id'
)
config_assert
(
options
<=
1
,
'take one option at most from boot_layer, boot_bias, or boot_with_const_id'
)
if
boot_layer
is
not
None
:
boot_layer
=
MakeLayerNameInParentSubmodel
(
boot_layer
)
config_assert
(
boot_layer
in
g_layer_map
,
'boot_layer "%s" does not correspond to a layer name'
%
boot_layer
)
'boot_layer "%s" does not correspond to a layer name'
%
boot_layer
)
memory
.
boot_layer_name
=
boot_layer
elif
boot_bias
:
memory
.
boot_bias_parameter_name
=
agent_layer
.
create_bias_parameter
(
boot_bias
,
size
,
for_self
=
False
)
boot_bias
,
size
,
for_self
=
False
)
memory
.
boot_bias_active_type
=
boot_bias_active_type
elif
boot_with_const_id
is
not
None
:
memory
.
boot_with_const_id
=
boot_with_const_id
return
agent_name
# Generator for recurrent layer group, to use it:
# 1. define a id layer as output of layer group
# 2. define a memory of this id layer, and assign a boot id(begin of sequence)
...
...
@@ -2223,11 +2199,10 @@ def Memory(name,
@
config_func
def
Generator
(
max_num_frames
,
eos_layer_name
=
"eos_check"
,
num_results_per_sample
=
1
,
beam_size
=
1
,
log_prob
=
None
,
):
eos_layer_name
=
"eos_check"
,
num_results_per_sample
=
1
,
beam_size
=
1
,
log_prob
=
None
,
):
generator_config
=
GeneratorConfig
()
generator_config
.
max_num_frames
=
max_num_frames
generator_config
.
eos_layer_name
=
eos_layer_name
...
...
@@ -2237,10 +2212,10 @@ def Generator(
generator_config
.
log_prob
=
log_prob
return
generator_config
@
config_layer
(
'expand'
)
class
ExpandLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
,
trans_type
=
'non-seq'
,
...
...
@@ -2248,27 +2223,22 @@ class ExpandLayer(LayerBase):
bias
=
False
):
super
(
ExpandLayer
,
self
).
__init__
(
name
,
'expand'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
2
,
'ExpandLayer takes 2 and only 2 inputs'
)
config_assert
(
len
(
self
.
inputs
)
==
2
,
'ExpandLayer takes 2 and only 2 inputs'
)
self
.
config
.
trans_type
=
trans_type
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
self
.
set_layer_size
(
self
.
get_input_layer
(
0
).
size
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
@
config_layer
(
'featmap_expand'
)
class
FeatMapExpandLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
,
num_filters
=
None
,
bias
=
False
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
,
num_filters
=
None
,
bias
=
False
):
super
(
FeatMapExpandLayer
,
self
).
__init__
(
name
,
'featmap_expand'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'ExpandLayer takes 1 and only 1 inputs'
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'ExpandLayer takes 1 and only 1 inputs'
)
if
num_filters
is
not
None
:
self
.
config
.
num_filters
=
num_filters
else
:
...
...
@@ -2278,8 +2248,7 @@ class FeatMapExpandLayer(LayerBase):
@
config_layer
(
'max'
)
class
MaxLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
,
trans_type
=
'non-seq'
,
...
...
@@ -2287,7 +2256,8 @@ class MaxLayer(LayerBase):
device
=
None
,
bias
=
False
,
output_max_index
=
None
):
super
(
MaxLayer
,
self
).
__init__
(
name
,
'max'
,
0
,
inputs
=
inputs
,
device
=
device
)
super
(
MaxLayer
,
self
).
__init__
(
name
,
'max'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'MaxLayer must have 1 input'
)
self
.
config
.
trans_type
=
trans_type
self
.
config
.
active_type
=
active_type
...
...
@@ -2301,12 +2271,7 @@ class MaxLayer(LayerBase):
@
config_layer
(
'maxid'
)
class
MaxIdLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
beam_size
=
None
,
device
=
None
):
def
__init__
(
self
,
name
,
inputs
,
beam_size
=
None
,
device
=
None
):
super
(
MaxIdLayer
,
self
).
__init__
(
name
,
'maxid'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'MaxIdLayer must have 1 input'
)
...
...
@@ -2324,37 +2289,39 @@ class MaxIdLayer(LayerBase):
@
config_layer
(
'eos_id'
)
class
EosIdLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
eos_id
,
device
=
None
):
def
__init__
(
self
,
name
,
inputs
,
eos_id
,
device
=
None
):
super
(
EosIdLayer
,
self
).
__init__
(
name
,
'eos_id'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'EosIdLayer must have 1 input'
)
self
.
set_layer_size
(
2
)
# boolean output
self
.
config
.
eos_id
=
eos_id
@
config_layer
(
'seqlastins'
)
class
SequenceLastInstanceLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
,
active_type
=
'linear'
,
trans_type
=
'non-seq'
,
device
=
None
,
bias
=
False
):
super
(
SequenceLastInstanceLayer
,
self
).
__init__
(
name
,
'seqlastins'
,
0
,
inputs
=
inputs
,
device
=
device
,
active_type
=
active_type
)
config_assert
(
len
(
inputs
)
==
1
,
'SequenceLastInstanceLayer must have 1 input'
)
super
(
SequenceLastInstanceLayer
,
self
).
__init__
(
name
,
'seqlastins'
,
0
,
inputs
=
inputs
,
device
=
device
,
active_type
=
active_type
)
config_assert
(
len
(
inputs
)
==
1
,
'SequenceLastInstanceLayer must have 1 input'
)
self
.
config
.
trans_type
=
trans_type
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
self
.
set_layer_size
(
input_layer
.
size
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
@
config_layer
(
'seqfirstins'
)
class
SequenceFirstInstanceLayer
(
SequenceLastInstanceLayer
):
def
__init__
(
...
...
@@ -2364,167 +2331,163 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
active_type
=
'linear'
,
trans_type
=
'non-seq'
,
device
=
None
,
bias
=
False
,
):
super
(
SequenceFirstInstanceLayer
,
self
).
__init__
(
name
,
inputs
=
inputs
,
active_type
=
active_type
,
device
=
device
,
bias
=
bias
)
bias
=
False
,
):
super
(
SequenceFirstInstanceLayer
,
self
).
__init__
(
name
,
inputs
=
inputs
,
active_type
=
active_type
,
device
=
device
,
bias
=
bias
)
self
.
config
.
trans_type
=
trans_type
self
.
config
.
select_first
=
True
@
config_layer
(
'seqconcat'
)
class
SequenceConcatLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
,
active_type
=
'linear'
,
device
=
None
,
bias
=
False
):
super
(
SequenceConcatLayer
,
self
).
__init__
(
name
,
'seqconcat'
,
0
,
inputs
=
inputs
,
device
=
device
,
active_type
=
active_type
)
config_assert
(
len
(
inputs
)
==
2
,
'SequenceConcatLayer must have 2 inputs'
)
super
(
SequenceConcatLayer
,
self
).
__init__
(
name
,
'seqconcat'
,
0
,
inputs
=
inputs
,
device
=
device
,
active_type
=
active_type
)
config_assert
(
len
(
inputs
)
==
2
,
'SequenceConcatLayer must have 2 inputs'
)
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
self
.
set_layer_size
(
input_layer
.
size
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
@
config_layer
(
'seqreshape'
)
class
SequenceReshapeLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
size
,
inputs
,
active_type
=
'linear'
,
device
=
None
,
bias
=
False
):
super
(
SequenceReshapeLayer
,
self
).
__init__
(
name
,
'seqreshape'
,
size
,
inputs
=
inputs
,
device
=
device
,
active_type
=
active_type
)
config_assert
(
len
(
inputs
)
==
1
,
'SequenceReshapeLayer must have 1 inputs'
)
super
(
SequenceReshapeLayer
,
self
).
__init__
(
name
,
'seqreshape'
,
size
,
inputs
=
inputs
,
device
=
device
,
active_type
=
active_type
)
config_assert
(
len
(
inputs
)
==
1
,
'SequenceReshapeLayer must have 1 inputs'
)
self
.
set_layer_size
(
size
)
self
.
create_bias_parameter
(
bias
,
size
)
@
config_layer
(
'subseq'
)
class
SubSequenceLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
,
active_type
=
'linear'
,
device
=
None
,
bias
=
False
):
super
(
SubSequenceLayer
,
self
).
__init__
(
name
,
'subseq'
,
0
,
inputs
=
inputs
,
device
=
device
,
active_type
=
active_type
)
super
(
SubSequenceLayer
,
self
).
__init__
(
name
,
'subseq'
,
0
,
inputs
=
inputs
,
device
=
device
,
active_type
=
active_type
)
config_assert
(
len
(
inputs
)
==
3
,
'SubSequenceLayer must have 3 inputs'
)
input_layer0
=
self
.
get_input_layer
(
0
)
size
=
input_layer0
.
size
self
.
set_layer_size
(
size
)
self
.
create_bias_parameter
(
bias
,
size
)
@
config_layer
(
'out_prod'
)
class
OuterProdLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
OuterProdLayer
,
self
).
__init__
(
name
,
'out_prod'
,
0
,
inputs
=
inputs
,
device
=
device
)
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
OuterProdLayer
,
self
).
__init__
(
name
,
'out_prod'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
inputs
)
==
2
,
'OuterProdLayer must have 2 inputs'
)
input_layer0
=
self
.
get_input_layer
(
0
)
input_layer1
=
self
.
get_input_layer
(
1
)
self
.
set_layer_size
(
input_layer0
.
size
*
input_layer1
.
size
)
@
config_layer
(
'power'
)
class
PowerLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
PowerLayer
,
self
).
__init__
(
name
,
'power'
,
0
,
inputs
=
inputs
,
device
=
device
)
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
PowerLayer
,
self
).
__init__
(
name
,
'power'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
inputs
)
==
2
,
'PowerLayer must have 2 inputs'
)
input_layer1
=
self
.
get_input_layer
(
1
)
self
.
set_layer_size
(
input_layer1
.
size
)
input_layer0
=
self
.
get_input_layer
(
0
)
config_assert
(
1
==
input_layer0
.
size
,
config_assert
(
1
==
input_layer0
.
size
,
'The left input is the exponent and should be of size 1'
)
@
config_layer
(
'slope_intercept'
)
class
SlopeInterceptLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
slope
=
1.0
,
intercept
=
0.0
,
device
=
None
):
super
(
SlopeInterceptLayer
,
self
).
__init__
(
name
,
'slope_intercept'
,
0
,
inputs
=
inputs
,
device
=
device
)
def
__init__
(
self
,
name
,
inputs
,
slope
=
1.0
,
intercept
=
0.0
,
device
=
None
):
super
(
SlopeInterceptLayer
,
self
).
__init__
(
name
,
'slope_intercept'
,
0
,
inputs
=
inputs
,
device
=
device
)
self
.
config
.
slope
=
slope
self
.
config
.
intercept
=
intercept
config_assert
(
len
(
inputs
)
==
1
,
'SlopeInterceptLayer must have 1 input'
)
input_layer0
=
self
.
get_input_layer
(
0
)
self
.
set_layer_size
(
input_layer0
.
size
)
@
config_layer
(
'scaling'
)
class
ScalingLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
ScalingLayer
,
self
).
__init__
(
name
,
'scaling'
,
0
,
inputs
=
inputs
,
device
=
device
)
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
ScalingLayer
,
self
).
__init__
(
name
,
'scaling'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
inputs
)
==
2
,
'ScalingLayer must have 2 inputs'
)
input_layer1
=
self
.
get_input_layer
(
1
)
self
.
set_layer_size
(
input_layer1
.
size
)
input_layer0
=
self
.
get_input_layer
(
0
)
config_assert
(
1
==
input_layer0
.
size
,
config_assert
(
1
==
input_layer0
.
size
,
'The left input should be of size 1'
)
@
config_layer
(
'conv_shift'
)
class
ConvShiftLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
ConvShiftLayer
,
self
).
__init__
(
name
,
'conv_shift'
,
0
,
inputs
=
inputs
,
device
=
device
)
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
ConvShiftLayer
,
self
).
__init__
(
name
,
'conv_shift'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
inputs
)
==
2
,
'ConvShiftLayer must have 2 inputs'
)
input_layer0
=
self
.
get_input_layer
(
0
)
self
.
set_layer_size
(
input_layer0
.
size
)
@
config_layer
(
'convex_comb'
)
class
ConvexCombinationLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
inputs
,
device
=
None
):
def
__init__
(
self
,
name
,
size
,
inputs
,
device
=
None
):
super
(
ConvexCombinationLayer
,
self
).
__init__
(
name
,
'convex_comb'
,
size
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
2
,
'ConvexCombinationLayer must have 2 inputs'
)
config_assert
(
len
(
self
.
inputs
)
==
2
,
'ConvexCombinationLayer must have 2 inputs'
)
config_assert
(
size
*
self
.
get_input_layer
(
0
).
size
==
self
.
get_input_layer
(
1
).
size
,
'Wrong input size for ConvexCombinationLayer'
)
self
.
set_layer_size
(
size
)
@
config_layer
(
'interpolation'
)
class
InterpolationLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
InterpolationLayer
,
self
).
__init__
(
name
,
'interpolation'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
3
,
'InterpolationLayer must have 3 inputs'
)
config_assert
(
len
(
self
.
inputs
)
==
3
,
'InterpolationLayer must have 3 inputs'
)
input_layer0
=
self
.
get_input_layer
(
0
)
input_layer1
=
self
.
get_input_layer
(
1
)
input_layer2
=
self
.
get_input_layer
(
2
)
...
...
@@ -2533,64 +2496,51 @@ class InterpolationLayer(LayerBase):
config_assert
(
input_layer1
.
size
==
input_layer2
.
size
,
'the two vector inputs should be of the same size'
)
@
config_layer
(
'bilinear_interp'
)
class
BilinearInterpLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
**
xargs
):
def
__init__
(
self
,
name
,
inputs
,
**
xargs
):
super
(
BilinearInterpLayer
,
self
).
__init__
(
name
,
'bilinear_interp'
,
0
,
inputs
=
inputs
,
**
xargs
)
input_layer
=
self
.
get_input_layer
(
0
)
parse_bilinear
(
self
.
inputs
[
0
].
bilinear_interp
,
input_layer
.
name
,
self
.
config
.
inputs
[
0
].
bilinear_interp_conf
);
parse_bilinear
(
self
.
inputs
[
0
].
bilinear_interp
,
input_layer
.
name
,
self
.
config
.
inputs
[
0
].
bilinear_interp_conf
)
conf
=
self
.
inputs
[
0
].
bilinear_interp
self
.
set_layer_size
(
conf
.
out_size_x
*
conf
.
out_size_y
*
conf
.
num_channels
)
self
.
set_layer_size
(
conf
.
out_size_x
*
conf
.
out_size_y
*
conf
.
num_channels
)
@
config_layer
(
'sum_to_one_norm'
)
class
SumToOneNormLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
SumToOneNormLayer
,
self
).
__init__
(
name
,
'sum_to_one_norm'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'SumToOneNormLayer must have 1 input'
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'SumToOneNormLayer must have 1 input'
)
input_layer0
=
self
.
get_input_layer
(
0
)
self
.
set_layer_size
(
input_layer0
.
size
)
@
config_layer
(
'cos_vm'
)
class
CosSimVecMatLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
inputs
,
cos_scale
=
1.0
,
device
=
None
):
def
__init__
(
self
,
name
,
size
,
inputs
,
cos_scale
=
1.0
,
device
=
None
):
super
(
CosSimVecMatLayer
,
self
).
__init__
(
name
,
'cos_vm'
,
size
,
inputs
=
inputs
,
device
=
device
)
self
.
config
.
cos_scale
=
cos_scale
config_assert
(
len
(
self
.
inputs
)
==
2
,
'CosSimVecMatLayer must have 2 inputs'
)
config_assert
(
len
(
self
.
inputs
)
==
2
,
'CosSimVecMatLayer must have 2 inputs'
)
config_assert
(
size
*
self
.
get_input_layer
(
0
).
size
==
self
.
get_input_layer
(
1
).
size
,
'Wrong input size for CosSimVecMatLayer'
)
@
config_layer
(
'sampling_id'
)
class
SamplingIdLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
SamplingIdLayer
,
self
).
__init__
(
name
,
'sampling_id'
,
0
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'SamplingIdLayer must have 1 input'
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'SamplingIdLayer must have 1 input'
)
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
self
.
set_layer_size
(
input_layer
.
size
)
...
...
@@ -2603,8 +2553,7 @@ class SamplingIdLayer(LayerBase):
# 'squarerootn': sum each sample, but divide by sqrt(sample_num).
@
config_layer
(
'average'
)
class
AverageLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
,
average_strategy
=
'average'
,
...
...
@@ -2612,8 +2561,13 @@ class AverageLayer(LayerBase):
active_type
=
'linear'
,
device
=
None
,
bias
=
False
):
super
(
AverageLayer
,
self
).
__init__
(
name
,
'average'
,
0
,
inputs
=
inputs
,
device
=
device
,
active_type
=
active_type
)
super
(
AverageLayer
,
self
).
__init__
(
name
,
'average'
,
0
,
inputs
=
inputs
,
device
=
device
,
active_type
=
active_type
)
self
.
config
.
average_strategy
=
average_strategy
self
.
config
.
trans_type
=
trans_type
config_assert
(
len
(
inputs
)
==
1
,
'AverageLayer must have 1 input'
)
...
...
@@ -2622,14 +2576,10 @@ class AverageLayer(LayerBase):
self
.
set_layer_size
(
input_layer
.
size
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
@
config_layer
(
'cos'
)
class
CosSimLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
cos_scale
=
5
,
device
=
None
):
def
__init__
(
self
,
name
,
inputs
,
cos_scale
=
5
,
device
=
None
):
super
(
CosSimLayer
,
self
).
__init__
(
name
,
'cos'
,
1
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
2
,
'CosSimLayer must have 2 inputs'
)
...
...
@@ -2641,18 +2591,13 @@ class CosSimLayer(LayerBase):
@
config_layer
(
'tensor'
)
class
TensorLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
inputs
,
device
=
None
,
bias
=
True
,
**
xargs
):
super
(
TensorLayer
,
self
).
__init__
(
name
,
'tensor'
,
size
,
inputs
=
inputs
,
device
=
device
,
**
xargs
)
def
__init__
(
self
,
name
,
size
,
inputs
,
device
=
None
,
bias
=
True
,
**
xargs
):
super
(
TensorLayer
,
self
).
__init__
(
name
,
'tensor'
,
size
,
inputs
=
inputs
,
device
=
device
,
**
xargs
)
config_assert
(
len
(
self
.
inputs
)
==
2
,
'TensorLayer must have 2 inputs'
)
config_assert
(
size
>
0
,
'size must be positive'
)
config_assert
(
inputs
[
1
].
parameter_name
==
None
,
'second parameter should be None.'
)
config_assert
(
inputs
[
1
].
parameter_name
==
None
,
'second parameter should be None.'
)
input_layer0
=
self
.
get_input_layer
(
0
)
input_layer1
=
self
.
get_input_layer
(
1
)
psize
=
size
*
input_layer0
.
size
*
input_layer1
.
size
...
...
@@ -2663,8 +2608,7 @@ class TensorLayer(LayerBase):
@
config_layer
(
'mixed'
)
class
MixedLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
,
size
=
0
,
...
...
@@ -2695,14 +2639,17 @@ class MixedLayer(LayerBase):
else
:
sz
=
operator
.
calc_output_size
(
operator_conf
.
input_sizes
)
if
sz
!=
0
:
config_assert
(
sz
==
self
.
config
.
size
,
config_assert
(
sz
==
self
.
config
.
size
,
"different inputs have different size: %s vs. %s"
%
(
sz
,
self
.
config
.
size
))
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
input
=
self
.
inputs
[
input_index
]
if
input_index
not
in
operator_input_index
:
config_assert
(
isinstance
(
input
,
Projection
),
"input should be projection or operation"
)
config_assert
(
isinstance
(
input
,
Projection
),
"input should be projection or operation"
)
if
self
.
config
.
size
==
0
and
isinstance
(
input
,
Projection
):
size
=
input
.
calc_output_size
(
input_layer
)
if
size
!=
0
:
...
...
@@ -2710,7 +2657,8 @@ class MixedLayer(LayerBase):
elif
isinstance
(
input
,
Projection
):
sz
=
input
.
calc_output_size
(
input_layer
)
if
sz
!=
0
:
config_assert
(
sz
==
self
.
config
.
size
,
config_assert
(
sz
==
self
.
config
.
size
,
"different inputs have different size: %s vs. %s"
%
(
sz
,
self
.
config
.
size
))
config_assert
(
size
!=
0
,
"size is not set"
)
...
...
@@ -2724,7 +2672,8 @@ class MixedLayer(LayerBase):
input_config
=
self
.
config
.
inputs
[
input_index
]
input_config
.
proj_conf
.
CopyFrom
(
input
.
proj_conf
)
input_config
.
proj_conf
.
name
=
gen_parameter_name
(
name
,
input_index
)
input_config
.
proj_conf
.
name
=
gen_parameter_name
(
name
,
input_index
)
psize
=
input
.
calc_parameter_size
(
input_layer
.
size
,
size
)
dims
=
input
.
calc_parameter_dims
(
input_layer
.
size
,
size
)
self
.
create_input_parameter
(
input_index
,
psize
,
dims
)
...
...
@@ -2750,21 +2699,16 @@ class MixedLayer(LayerBase):
if
error_clipping_threshold
is
not
None
:
self
.
config
.
error_clipping_threshold
=
error_clipping_threshold
# like MixedLayer, but no bias parameter
@
config_func
def
ExpressionLayer
(
name
,
inputs
,
**
xargs
):
def
ExpressionLayer
(
name
,
inputs
,
**
xargs
):
MixedLayer
(
name
,
inputs
,
bias
=
False
,
**
xargs
)
@
config_layer
(
'concat'
)
class
ConcatenateLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
bias
=
False
,
**
xargs
):
def
__init__
(
self
,
name
,
inputs
,
bias
=
False
,
**
xargs
):
config_assert
(
inputs
,
'inputs cannot be empty'
)
config_assert
(
not
bias
,
'ConcatenateLayer cannot support bias.'
)
super
(
ConcatenateLayer
,
self
).
__init__
(
...
...
@@ -2778,15 +2722,11 @@ class ConcatenateLayer(LayerBase):
self
.
set_layer_size
(
size
)
# like concat layer, but each input layer was processed by a Projection.
@
config_layer
(
'concat2'
)
class
ConcatenateLayer2
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
bias
=
False
,
**
xargs
):
def
__init__
(
self
,
name
,
inputs
,
bias
=
False
,
**
xargs
):
config_assert
(
inputs
,
'inputs cannot be empty'
)
super
(
ConcatenateLayer2
,
self
).
__init__
(
name
,
'concat2'
,
0
,
inputs
=
inputs
,
**
xargs
)
...
...
@@ -2794,7 +2734,8 @@ class ConcatenateLayer2(LayerBase):
if
isinstance
(
self
.
inputs
[
0
],
ConvProjection
):
for
input_index
in
xrange
(
len
(
self
.
inputs
)
-
1
):
input
=
self
.
inputs
[
input_index
+
1
]
config_assert
(
isinstance
(
input
,
ConvProjection
),
config_assert
(
isinstance
(
input
,
ConvProjection
),
"The first input of ConcatenateLayer2 is ConvProjection, "
"the other inputs should also be ConvProjection."
)
...
...
@@ -2834,16 +2775,12 @@ class ConcatenateLayer2(LayerBase):
self
.
config
.
bias_size
=
psize
self
.
create_bias_parameter
(
bias
,
psize
)
@
config_layer
(
'recurrent'
)
class
RecurrentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
reversed
=
False
,
bias
=
True
,
**
xargs
):
super
(
RecurrentLayer
,
self
).
__init__
(
name
,
'recurrent'
,
0
,
inputs
,
**
xargs
)
def
__init__
(
self
,
name
,
inputs
,
reversed
=
False
,
bias
=
True
,
**
xargs
):
super
(
RecurrentLayer
,
self
).
__init__
(
name
,
'recurrent'
,
0
,
inputs
,
**
xargs
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'RecurrentLayer must have 1 input'
)
input_layer
=
self
.
get_input_layer
(
0
)
size
=
input_layer
.
size
...
...
@@ -2853,10 +2790,10 @@ class RecurrentLayer(LayerBase):
self
.
create_input_parameter
(
0
,
size
*
size
,
dims
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
@
config_layer
(
'lstmemory'
)
class
LstmLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
,
reversed
=
False
,
...
...
@@ -2878,10 +2815,10 @@ class LstmLayer(LayerBase):
#bias includes 3 kinds of peephole, 4 + 3 = 7
self
.
create_bias_parameter
(
bias
,
size
*
7
)
@
config_layer
(
'lstm_step'
)
class
LstmStepLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
size
,
inputs
,
...
...
@@ -2889,35 +2826,35 @@ class LstmStepLayer(LayerBase):
active_state_type
=
"sigmoid"
,
bias
=
True
,
**
xargs
):
super
(
LstmStepLayer
,
self
).
__init__
(
name
,
'lstm_step'
,
size
,
inputs
,
**
xargs
)
super
(
LstmStepLayer
,
self
).
__init__
(
name
,
'lstm_step'
,
size
,
inputs
,
**
xargs
)
config_assert
(
len
(
inputs
)
==
2
,
'LstmStepLayer must have 2 inputs'
)
input_layer0
=
self
.
get_input_layer
(
0
)
input_layer1
=
self
.
get_input_layer
(
1
)
config_assert
(
input_layer0
.
size
==
4
*
size
,
'input_layer0.size != 4 * layer.size'
)
config_assert
(
input_layer1
.
size
==
size
,
'input_layer1.size != layer.size'
)
config_assert
(
input_layer0
.
size
==
4
*
size
,
'input_layer0.size != 4 * layer.size'
)
config_assert
(
input_layer1
.
size
==
size
,
'input_layer1.size != layer.size'
)
self
.
config
.
active_gate_type
=
active_gate_type
self
.
config
.
active_state_type
=
active_state_type
self
.
create_bias_parameter
(
bias
,
size
*
3
)
# get the specific output from the input layer.
@
config_layer
(
'get_output'
)
class
GetOutputLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
inputs
):
super
(
GetOutputLayer
,
self
).
__init__
(
name
,
'get_output'
,
size
,
inputs
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'GetOutputLayer must have 1 inputs'
)
def
__init__
(
self
,
name
,
size
,
inputs
):
super
(
GetOutputLayer
,
self
).
__init__
(
name
,
'get_output'
,
size
,
inputs
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'GetOutputLayer must have 1 inputs'
)
inputs
=
self
.
inputs
[
0
]
config_assert
(
inputs
.
input_layer_argument
,
'input_layer_argument cannot be empty'
)
@
config_layer
(
'mdlstmemory'
)
class
MDLstmLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
,
directions
=
True
,
...
...
@@ -2925,34 +2862,39 @@ class MDLstmLayer(LayerBase):
active_state_type
=
"sigmoid"
,
bias
=
True
,
**
xargs
):
super
(
MDLstmLayer
,
self
).
__init__
(
name
,
'mdlstmemory'
,
0
,
inputs
,
**
xargs
)
super
(
MDLstmLayer
,
self
).
__init__
(
name
,
'mdlstmemory'
,
0
,
inputs
,
**
xargs
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'MDLstmLayer must have 1 input'
)
input_layer
=
self
.
get_input_layer
(
0
)
dim_num
=
len
(
directions
)
#check input_layer.size is divided by (3+dim_num)
config_assert
(
input_layer
.
size
%
(
3
+
dim_num
)
==
0
,
"size % (dim_num) should be 0!"
)
size
=
input_layer
.
size
/
(
3
+
dim_num
)
config_assert
(
input_layer
.
size
%
(
3
+
dim_num
)
==
0
,
"size % (dim_num) should be 0!"
)
size
=
input_layer
.
size
/
(
3
+
dim_num
)
self
.
set_layer_size
(
size
)
self
.
config
.
active_gate_type
=
active_gate_type
self
.
config
.
active_state_type
=
active_state_type
for
i
in
xrange
(
len
(
directions
)):
self
.
config
.
directions
.
append
(
int
(
directions
[
i
]))
self
.
create_input_parameter
(
0
,
size
*
size
*
(
3
+
dim_num
),
[
size
,
size
,
3
+
dim_num
])
self
.
create_input_parameter
(
0
,
size
*
size
*
(
3
+
dim_num
),
[
size
,
size
,
3
+
dim_num
])
#bias includes 3 kinds of peephole, 3+dim_num+2+dim_num
self
.
create_bias_parameter
(
bias
,
size
*
(
5
+
2
*
dim_num
))
self
.
create_bias_parameter
(
bias
,
size
*
(
5
+
2
*
dim_num
))
@
config_layer
(
'gated_recurrent'
)
class
GatedRecurrentLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
inputs
,
reversed
=
False
,
active_gate_type
=
"sigmoid"
,
bias
=
True
,
**
xargs
):
super
(
GatedRecurrentLayer
,
self
).
__init__
(
name
,
'gated_recurrent'
,
0
,
inputs
,
**
xargs
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'GatedRecurrentLayer must have 1 input'
)
super
(
GatedRecurrentLayer
,
self
).
__init__
(
name
,
'gated_recurrent'
,
0
,
inputs
,
**
xargs
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'GatedRecurrentLayer must have 1 input'
)
input_layer
=
self
.
get_input_layer
(
0
)
#check input_layer.size is divided by 3
config_assert
(
input_layer
.
size
%
3
==
0
,
"size % 3 should be 0!"
)
...
...
@@ -2963,26 +2905,30 @@ class GatedRecurrentLayer(LayerBase):
self
.
create_input_parameter
(
0
,
size
*
size
*
3
,
[
size
,
size
*
3
])
self
.
create_bias_parameter
(
bias
,
size
*
3
)
@
config_layer
(
'gru_step'
)
class
GruStepLayer
(
LayerBase
):
def
__init__
(
self
,
def
__init__
(
self
,
name
,
size
,
inputs
,
active_gate_type
=
"sigmoid"
,
bias
=
True
,
**
xargs
):
super
(
GruStepLayer
,
self
).
__init__
(
name
,
'gru_step'
,
size
,
inputs
,
**
xargs
)
super
(
GruStepLayer
,
self
).
__init__
(
name
,
'gru_step'
,
size
,
inputs
,
**
xargs
)
config_assert
(
len
(
self
.
inputs
)
==
2
,
'GruStepLayer must have 2 input'
)
input_layer0
=
self
.
get_input_layer
(
0
)
input_layer1
=
self
.
get_input_layer
(
1
)
config_assert
(
input_layer0
.
size
==
3
*
size
,
'input_layer0.size != 3 * layer.size'
)
config_assert
(
input_layer1
.
size
==
size
,
'input_layer1.size != layer.size'
)
config_assert
(
input_layer0
.
size
==
3
*
size
,
'input_layer0.size != 3 * layer.size'
)
config_assert
(
input_layer1
.
size
==
size
,
'input_layer1.size != layer.size'
)
self
.
config
.
active_gate_type
=
active_gate_type
self
.
create_input_parameter
(
0
,
size
*
size
*
3
,
[
size
,
size
*
3
])
self
.
create_bias_parameter
(
bias
,
size
*
3
)
'''
A layer for calculating the cost of sequential conditional random field model.
Example: CRFLayer(name="crf_cost", size=label_num,
...
...
@@ -2990,20 +2936,18 @@ class GruStepLayer(LayerBase):
where "weight" is optional, one weight for each sequence
@param coeff: weight of the layer
'''
@
config_layer
(
'crf'
)
class
CRFLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
inputs
,
coeff
=
1.0
,
device
=
None
):
def
__init__
(
self
,
name
,
size
,
inputs
,
coeff
=
1.0
,
device
=
None
):
super
(
CRFLayer
,
self
).
__init__
(
name
,
'crf'
,
size
,
inputs
,
device
=
device
)
config_assert
(
2
<=
len
(
self
.
inputs
)
<=
3
,
'CRFLayer must have 2 or 3 inputs'
)
config_assert
(
2
<=
len
(
self
.
inputs
)
<=
3
,
'CRFLayer must have 2 or 3 inputs'
)
self
.
create_input_parameter
(
0
,
size
*
(
size
+
2
),
[
size
,
size
+
2
])
self
.
config
.
coeff
=
coeff
'''
A layer for calculating the decoding sequence of sequential conditional
random field model.
...
...
@@ -3012,14 +2956,11 @@ class CRFLayer(LayerBase):
this layer will also calculate error, output_.value[i] is 1 for incorrect
decoding or 0 for correct decoding
'''
@
config_layer
(
'crf_decoding'
)
class
CRFDecodingLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
inputs
,
device
=
None
):
def
__init__
(
self
,
name
,
size
,
inputs
,
device
=
None
):
super
(
CRFDecodingLayer
,
self
).
__init__
(
name
,
'crf_decoding'
,
size
,
inputs
,
device
=
device
)
config_assert
(
...
...
@@ -3027,47 +2968,35 @@ class CRFDecodingLayer(LayerBase):
'CRFDecodingLayer cannot have more than 2 inputs'
)
self
.
create_input_parameter
(
0
,
size
*
(
size
+
2
),
[
size
,
size
+
2
])
@
config_layer
(
'ctc'
)
class
CTCLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
inputs
,
norm_by_times
=
False
,
device
=
None
):
def
__init__
(
self
,
name
,
size
,
inputs
,
norm_by_times
=
False
,
device
=
None
):
super
(
CTCLayer
,
self
).
__init__
(
name
,
'ctc'
,
size
,
inputs
,
device
=
device
)
self
.
config
.
norm_by_times
=
norm_by_times
config_assert
(
len
(
self
.
inputs
)
==
2
,
'CTCLayer must have 2 inputs'
)
@
config_layer
(
'recurrent_layer_group'
)
class
RecurrentLayerGroup
(
LayerBase
):
def
__init__
(
self
,
name
,
device
=
None
):
def
__init__
(
self
,
name
,
device
=
None
):
super
(
RecurrentLayerGroup
,
self
).
__init__
(
name
,
'recurrent_layer_group'
,
0
,
inputs
=
[],
device
=
device
)
# Deprecated, use a new layer specific class instead
@
config_func
def
Layer
(
name
,
type
,
**
xargs
):
def
Layer
(
name
,
type
,
**
xargs
):
layers
=
{}
layers
.
update
(
g_cost_map
)
layers
.
update
(
g_layer_type_map
)
layer_func
=
layers
.
get
(
type
)
config_assert
(
layer_func
,
"layer type '%s' not supported."
%
type
)
config_assert
(
layer_func
,
"layer type '%s' not supported."
%
type
)
return
layer_func
(
name
,
**
xargs
)
@
config_func
def
ParameterHook
(
type
,
**
kwargs
):
def
ParameterHook
(
type
,
**
kwargs
):
if
type
==
'pruning'
:
mask_filename
=
kwargs
.
get
(
'mask_filename'
,
None
)
assert
mask_filename
is
not
None
...
...
@@ -3080,8 +3009,7 @@ def ParameterHook(
@
config_func
def
Parameter
(
name
,
def
Parameter
(
name
,
size
,
device
,
dims
,
...
...
@@ -3102,8 +3030,7 @@ def Parameter(
need_compact
=
None
,
is_static
=
None
,
is_shared
=
None
,
update_hooks
=
None
):
update_hooks
=
None
):
config_assert
(
name
not
in
g_parameter_map
,
'Duplicated parameter name: '
+
name
)
...
...
@@ -3134,8 +3061,8 @@ def Parameter(
para
.
initial_std
=
default
(
initial_std
,
g_default_initial_std
)
para
.
initial_mean
=
default
(
initial_mean
,
g_default_initial_mean
)
num_batches_regularization
=
default
(
num_batches_regularization
,
g_default_num_batches_regularization
)
num_batches_regularization
=
default
(
num_batches_regularization
,
g_default_num_batches_regularization
)
if
num_batches_regularization
is
not
None
:
para
.
num_batches_regularization
=
int
(
num_batches_regularization
)
...
...
@@ -3145,18 +3072,21 @@ def Parameter(
g_config
.
opt_config
.
use_sparse_remote_updater
=
True
if
sparse_update
is
not
None
:
para
.
sparse_update
=
sparse_update
gradient_clipping_threshold
=
default
(
gradient_clipping_threshold
,
g_default_gradient_clipping_threshold
)
gradient_clipping_threshold
=
default
(
gradient_clipping_threshold
,
g_default_gradient_clipping_threshold
)
if
gradient_clipping_threshold
is
not
None
:
para
.
gradient_clipping_threshold
=
gradient_clipping_threshold
para
.
initial_strategy
=
default
(
initial_strategy
,
g_default_initial_strategy
)
para
.
initial_strategy
=
default
(
initial_strategy
,
g_default_initial_strategy
)
para
.
initial_smart
=
default
(
initial_smart
,
g_default_initial_smart
)
if
para
.
initial_smart
:
para
.
initial_mean
=
0.
if
len
(
para
.
dims
)
!=
0
:
para
.
initial_std
=
1.
/
math
.
sqrt
(
para
.
dims
[
0
])
else
:
print
(
"Use initial_smart, but dims not set. Initial_smart may not be used in this layer"
)
print
(
"Use initial_smart, but dims not set. Initial_smart may not be used in this layer"
)
traceback
.
print_exc
()
para
.
initial_std
=
1.
/
math
.
sqrt
(
para
.
size
)
if
g_default_compact_func
is
not
None
:
...
...
@@ -3195,64 +3125,78 @@ def default_initial_std(val):
global
g_default_initial_std
g_default_initial_std
=
val
@
config_func
def
default_initial_mean
(
val
):
global
g_default_initial_mean
g_default_initial_mean
=
val
@
config_func
def
default_initial_strategy
(
val
):
global
g_default_initial_strategy
g_default_initial_strategy
=
val
@
config_func
def
default_initial_smart
(
val
):
global
g_default_initial_smart
g_default_initial_smart
=
val
@
config_func
def
default_momentum
(
val
):
global
g_default_momentum
g_default_momentum
=
val
@
config_func
def
default_decay_rate
(
val
):
global
g_default_decay_rate
g_default_decay_rate
=
val
@
config_func
def
default_num_batches_regularization
(
val
):
global
g_default_num_batches_regularization
g_default_num_batches_regularization
=
val
@
config_func
def
default_gradient_clipping_threshold
(
val
):
global
g_default_gradient_clipping_threshold
g_default_gradient_clipping_threshold
=
val
@
config_func
def
default_device
(
val
):
global
g_default_device
g_default_device
=
val
@
config_func
def
default_update_hooks
(
val
):
global
g_default_update_hooks
g_default_update_hooks
=
val
@
config_func
def
default_compact_func
(
val
):
global
g_default_compact_func
g_default_compact_func
=
val
def
make_importer
(
config_dir
,
config_args
):
def
Import
(
config_file
,
local_args
=
{}):
if
not
config_file
.
startswith
(
'/'
):
config_file
=
config_dir
+
'/'
+
config_file
g_config
.
config_files
.
append
(
config_file
)
execfile
(
config_file
,
make_config_environment
(
config_file
,
config_args
),
local_args
)
execfile
(
config_file
,
make_config_environment
(
config_file
,
config_args
),
local_args
)
return
Import
settings
=
dict
(
batch_size
=
None
,
mini_batch_size
=
None
,
...
...
@@ -3281,26 +3225,24 @@ settings = dict(
ada_rou
=
0.95
,
delta_add_rate
=
1.0
,
shrink_parameter_value
=
0
,
adam_beta1
=
0.9
,
adam_beta2
=
0.999
,
adam_epsilon
=
1e-8
,
)
adam_beta1
=
0.9
,
adam_beta2
=
0.999
,
adam_epsilon
=
1e-8
,
)
settings_deprecated
=
dict
(
usage_ratio
=
1.
,
)
settings_deprecated
=
dict
(
usage_ratio
=
1.
,
)
trainer_settings
=
dict
(
save_dir
=
"./output/model"
,
init_model_path
=
None
,
start_pass
=
0
,
)
start_pass
=
0
,
)
@
config_func
def
Settings
(
**
args
):
for
k
,
v
in
args
.
iteritems
():
if
k
==
"usage_ratio"
:
logger
.
warning
(
"Deprecated: define usage_ratio in DataConfig instead"
)
logger
.
warning
(
"Deprecated: define usage_ratio in DataConfig instead"
)
if
g_config
.
HasField
(
"data_config"
):
g_config
.
data_config
.
__setattr__
(
k
,
v
)
settings_deprecated
[
k
]
=
v
...
...
@@ -3312,10 +3254,12 @@ def Settings(**args):
else
:
logger
.
fatal
(
'Unkown setting: %s'
%
k
)
@
config_func
def
cluster_config
(
**
args
):
pass
@
config_func
def
EnableSubmodelSuffix
(
flag
=
True
):
"""
...
...
@@ -3325,10 +3269,12 @@ def EnableSubmodelSuffix(flag=True):
global
g_add_submodel_suffix
g_add_submodel_suffix
=
flag
def
make_config_environment
(
config_file
,
config_args
):
def
make_setter
(
k
):
def
setter
(
v
):
logger
.
fatal
(
"Obsolete: use Settings(%s=%s, ...) instead"
%
(
k
,
v
))
return
setter
funcs
=
{}
...
...
@@ -3344,13 +3290,13 @@ def make_config_environment(config_file, config_args):
funcs
.
update
(
Import
=
make_importer
(
config_dir
,
config_args
),
get_config_arg
=
make_get_config_arg
(
config_args
),
)
get_config_arg
=
make_get_config_arg
(
config_args
),
)
funcs
.
update
(
g_extended_config_funcs
)
return
funcs
def
make_get_config_arg
(
config_args
):
def
get_config_arg
(
name
,
type
,
default
=
None
):
if
type
==
bool
:
...
...
@@ -3367,6 +3313,7 @@ def make_get_config_arg(config_args):
return
get_config_arg
def
importlib
(
name
):
__import__
(
name
)
return
sys
.
modules
[
name
]
...
...
@@ -3379,10 +3326,12 @@ def find_caller():
return
s
[
0
],
s
[
1
],
s
[
2
]
return
"(unknown file)"
,
0
,
"(unknown function)"
def
my_fatal
(
s
):
logger
.
critical
(
s
)
raise
Exception
()
def
parse_config
(
config_file
,
config_arg_str
):
'''
@param config_arg_str: a string of the form var1=val1,var2=val2. It will be
...
...
@@ -3420,7 +3369,7 @@ def parse_config(config_file, config_arg_str):
for
k
,
v
in
settings
.
iteritems
():
if
v
is
None
:
continue
g_config
.
opt_config
.
__setattr__
(
k
,
v
)
;
g_config
.
opt_config
.
__setattr__
(
k
,
v
)
for
k
,
v
in
trainer_settings
.
iteritems
():
if
v
is
None
:
...
...
@@ -3447,6 +3396,7 @@ def parse_config_and_serialize(config_file, config_arg_str):
traceback
.
print_exc
()
raise
if
__name__
==
'__main__'
:
try
:
config
=
parse_config
(
sys
.
argv
[
1
],
''
)
...
...
python/paddle/trainer/config_parser_extension.py
浏览文件 @
58e1b3b3
...
...
@@ -17,8 +17,7 @@ from paddle.proto.DataConfig_pb2 import DataConfig
g_config
=
None
def
SimpleData
(
files
=
None
,
def
SimpleData
(
files
=
None
,
feat_dim
=
None
,
context_len
=
None
,
buffer_capacity
=
None
):
...
...
@@ -33,6 +32,7 @@ def SimpleData(
data_config
.
buffer_capacity
=
buffer_capacity
return
data_config
def
get_config_funcs
(
trainer_config
):
global
g_config
g_config
=
trainer_config
...
...
python/paddle/trainer/recurrent_units.py
浏览文件 @
58e1b3b3
...
...
@@ -22,161 +22,175 @@
from
paddle.trainer.config_parser
import
*
# long short term memory, can be used in recurrent machine
# *inputs* must be a list of Projections, for example:
# inputs = [FullMatrixProjection("input_layer_name")],
# *para_prefix* defines parameter names, if the *para_prefix* of
# two LstmRecurrentUnit is same, they share same parameters
# *out_memory* can be defined outside if it's used outside
def
LstmRecurrentUnit
(
name
,
size
,
active_type
,
state_active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
out_memory
=
None
):
def
LstmRecurrentUnit
(
name
,
size
,
active_type
,
state_active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
out_memory
=
None
):
if
para_prefix
is
None
:
para_prefix
=
name
if
out_memory
is
None
:
out_memory
=
Memory
(
name
=
name
,
size
=
size
)
out_memory
=
Memory
(
name
=
name
,
size
=
size
)
state_memory
=
Memory
(
name
=
name
+
"_"
+
"state"
,
size
=
size
)
state_memory
=
Memory
(
name
=
name
+
"_"
+
"state"
,
size
=
size
)
Layer
(
name
=
name
+
"_"
+
"input_recurrent"
,
type
=
"mixed"
,
size
=
size
*
4
,
#(input_s, input_gate, forget_gate, output_gate)
error_clipping_threshold
=
error_clipping_threshold
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
para_prefix
+
"_input_recurrent.b"
),
inputs
=
inputs
+
[
FullMatrixProjection
(
out_memory
,
parameter_name
=
para_prefix
+
"_input_recurrent.w"
),
],
)
name
=
name
+
"_"
+
"input_recurrent"
,
type
=
"mixed"
,
size
=
size
*
4
,
#(input_s, input_gate, forget_gate, output_gate)
error_clipping_threshold
=
error_clipping_threshold
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
para_prefix
+
"_input_recurrent.b"
),
inputs
=
inputs
+
[
FullMatrixProjection
(
out_memory
,
parameter_name
=
para_prefix
+
"_input_recurrent.w"
),
],
)
LstmStepLayer
(
name
=
name
,
size
=
size
,
bias
=
Bias
(
parameter_name
=
para_prefix
+
"_check.b"
),
inputs
=
[
name
+
"_"
+
"input_recurrent"
,
state_memory
],
active_type
=
active_type
,
active_gate_type
=
gate_active_type
,
active_state_type
=
state_active_type
,
)
name
=
name
,
size
=
size
,
bias
=
Bias
(
parameter_name
=
para_prefix
+
"_check.b"
),
inputs
=
[
name
+
"_"
+
"input_recurrent"
,
state_memory
],
active_type
=
active_type
,
active_gate_type
=
gate_active_type
,
active_state_type
=
state_active_type
,
)
GetOutputLayer
(
name
=
name
+
"_"
+
"state"
,
size
=
size
,
inputs
=
Input
(
name
,
input_layer_argument
=
"state"
),
)
name
=
name
+
"_"
+
"state"
,
size
=
size
,
inputs
=
Input
(
name
,
input_layer_argument
=
"state"
),
)
def
LstmRecurrentUnitNaive
(
name
,
size
,
active_type
,
state_active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
out_memory
=
None
):
def
LstmRecurrentUnitNaive
(
name
,
size
,
active_type
,
state_active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
out_memory
=
None
):
if
para_prefix
is
None
:
para_prefix
=
name
if
out_memory
is
None
:
out_memory
=
Memory
(
name
=
name
,
size
=
size
)
out_memory
=
Memory
(
name
=
name
,
size
=
size
)
state_memory
=
Memory
(
name
=
name
+
"_"
+
"state"
,
size
=
size
)
state_memory
=
Memory
(
name
=
name
+
"_"
+
"state"
,
size
=
size
)
Layer
(
name
=
name
+
"_"
+
"input_recurrent"
,
type
=
"mixed"
,
size
=
size
*
4
,
#(input_s, input_gate, forget_gate, output_gate)
error_clipping_threshold
=
error_clipping_threshold
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
para_prefix
+
"_input_recurrent.b"
),
inputs
=
inputs
+
[
FullMatrixProjection
(
out_memory
,
parameter_name
=
para_prefix
+
"_input_recurrent.w"
),
],
)
name
=
name
+
"_"
+
"input_recurrent"
,
type
=
"mixed"
,
size
=
size
*
4
,
#(input_s, input_gate, forget_gate, output_gate)
error_clipping_threshold
=
error_clipping_threshold
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
para_prefix
+
"_input_recurrent.b"
),
inputs
=
inputs
+
[
FullMatrixProjection
(
out_memory
,
parameter_name
=
para_prefix
+
"_input_recurrent.w"
),
],
)
ExpressionLayer
(
name
=
name
+
"_"
+
"input_s"
,
size
=
size
,
active_type
=
active_type
,
inputs
=
[
IdentityOffsetProjection
(
name
+
"_"
+
"input_recurrent"
,
offset
=
0
)],
)
name
=
name
+
"_"
+
"input_s"
,
size
=
size
,
active_type
=
active_type
,
inputs
=
[
IdentityOffsetProjection
(
name
+
"_"
+
"input_recurrent"
,
offset
=
0
)
],
)
ExpressionLayer
(
name
=
name
+
"_"
+
"input_gate"
,
active_type
=
gate_active_type
,
inputs
=
[
IdentityOffsetProjection
(
name
+
"_"
+
"input_recurrent"
,
offset
=
size
),
DotMulProjection
(
state_memory
,
parameter_name
=
para_prefix
+
"_input_check.w"
)],
)
name
=
name
+
"_"
+
"input_gate"
,
active_type
=
gate_active_type
,
inputs
=
[
IdentityOffsetProjection
(
name
+
"_"
+
"input_recurrent"
,
offset
=
size
),
DotMulProjection
(
state_memory
,
parameter_name
=
para_prefix
+
"_input_check.w"
)
],
)
ExpressionLayer
(
name
=
name
+
"_"
+
"forget_gate"
,
active_type
=
gate_active_type
,
inputs
=
[
IdentityOffsetProjection
(
name
+
"_"
+
"input_recurrent"
,
offset
=
size
*
2
),
DotMulProjection
(
state_memory
,
parameter_name
=
para_prefix
+
"_forget_check.w"
)],
)
name
=
name
+
"_"
+
"forget_gate"
,
active_type
=
gate_active_type
,
inputs
=
[
IdentityOffsetProjection
(
name
+
"_"
+
"input_recurrent"
,
offset
=
size
*
2
),
DotMulProjection
(
state_memory
,
parameter_name
=
para_prefix
+
"_forget_check.w"
)
],
)
ExpressionLayer
(
name
=
name
+
"_"
+
"state"
,
inputs
=
[
DotMulOperator
([
name
+
"_"
+
"input_s"
,
name
+
"_"
+
"input_gate"
]),
DotMulOperator
([
state_memory
,
name
+
"_"
+
"forget_gate"
]),
],
)
name
=
name
+
"_"
+
"state"
,
inputs
=
[
DotMulOperator
([
name
+
"_"
+
"input_s"
,
name
+
"_"
+
"input_gate"
]),
DotMulOperator
([
state_memory
,
name
+
"_"
+
"forget_gate"
]),
],
)
ExpressionLayer
(
name
=
name
+
"_"
+
"output_gate"
,
active_type
=
gate_active_type
,
inputs
=
[
IdentityOffsetProjection
(
name
+
"_"
+
"input_recurrent"
,
offset
=
size
*
3
),
DotMulProjection
(
name
+
"_"
+
"state"
,
parameter_name
=
para_prefix
+
"_output_check.w"
)],
)
name
=
name
+
"_"
+
"output_gate"
,
active_type
=
gate_active_type
,
inputs
=
[
IdentityOffsetProjection
(
name
+
"_"
+
"input_recurrent"
,
offset
=
size
*
3
),
DotMulProjection
(
name
+
"_"
+
"state"
,
parameter_name
=
para_prefix
+
"_output_check.w"
)
],
)
ExpressionLayer
(
name
=
name
+
"_"
+
"state_atv"
,
active_type
=
state_active_type
,
inputs
=
IdentityProjection
(
name
+
"_"
+
"state"
),
)
name
=
name
+
"_"
+
"state_atv"
,
active_type
=
state_active_type
,
inputs
=
IdentityProjection
(
name
+
"_"
+
"state"
),
)
ExpressionLayer
(
name
=
name
,
inputs
=
DotMulOperator
([
name
+
"_"
+
"state_atv"
,
name
+
"_"
+
"output_gate"
]),
)
name
=
name
,
inputs
=
DotMulOperator
(
[
name
+
"_"
+
"state_atv"
,
name
+
"_"
+
"output_gate"
]),
)
# like LstmRecurrentUnit, but it's a layer group.
# it is equivalent to LstmLayer
def
LstmRecurrentLayerGroup
(
name
,
size
,
active_type
,
state_active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
seq_reversed
=
False
):
def
LstmRecurrentLayerGroup
(
name
,
size
,
active_type
,
state_active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
seq_reversed
=
False
):
input_layer_name
=
name
+
"_"
+
"transform_input"
Layer
(
name
=
input_layer_name
,
type
=
"mixed"
,
size
=
size
*
4
,
active_type
=
""
,
bias
=
False
,
inputs
=
inputs
,
)
RecurrentLayerGroupBegin
(
name
+
"_layer_group"
,
in_links
=
[
input_layer_name
],
out_links
=
[
name
],
seq_reversed
=
seq_reversed
)
name
=
input_layer_name
,
type
=
"mixed"
,
size
=
size
*
4
,
active_type
=
""
,
bias
=
False
,
inputs
=
inputs
,
)
RecurrentLayerGroupBegin
(
name
+
"_layer_group"
,
in_links
=
[
input_layer_name
],
out_links
=
[
name
],
seq_reversed
=
seq_reversed
)
LstmRecurrentUnit
(
name
=
name
,
size
=
size
,
active_type
=
active_type
,
state_active_type
=
state_active_type
,
gate_active_type
=
gate_active_type
,
inputs
=
[
IdentityProjection
(
input_layer_name
)],
para_prefix
=
para_prefix
,
error_clipping_threshold
=
error_clipping_threshold
,
)
name
=
name
,
size
=
size
,
active_type
=
active_type
,
state_active_type
=
state_active_type
,
gate_active_type
=
gate_active_type
,
inputs
=
[
IdentityProjection
(
input_layer_name
)],
para_prefix
=
para_prefix
,
error_clipping_threshold
=
error_clipping_threshold
,
)
RecurrentLayerGroupEnd
(
name
+
"_layer_group"
)
# gated recurrent unit, can be used in recurrent machine
# *inputs* should be a list of Projections, for example:
# inputs = [FullMatrixProjection("input_layer_name")],
...
...
@@ -184,142 +198,157 @@ def LstmRecurrentLayerGroup(name, size,
# two GatedRecurrentUnit is same, they share same parameters
# *out_memory* can be defined outside if it's used outside
def
GatedRecurrentUnit
(
name
,
size
,
active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
out_memory
=
None
):
def
GatedRecurrentUnit
(
name
,
size
,
active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
out_memory
=
None
):
if
type_of
(
inputs
)
==
str
:
#only used by GatedRecurrentLayerGroup
input_layer_name
=
inputs
else
:
input_layer_name
=
name
+
"_"
+
"transform_input"
Layer
(
name
=
input_layer_name
,
type
=
"mixed"
,
size
=
size
*
3
,
active_type
=
""
,
bias
=
False
,
inputs
=
inputs
,
)
name
=
input_layer_name
,
type
=
"mixed"
,
size
=
size
*
3
,
active_type
=
""
,
bias
=
False
,
inputs
=
inputs
,
)
if
para_prefix
is
None
:
para_prefix
=
name
if
out_memory
is
None
:
out_memory
=
Memory
(
name
=
name
,
size
=
size
)
out_memory
=
Memory
(
name
=
name
,
size
=
size
)
GruStepLayer
(
name
=
name
,
size
=
size
,
bias
=
Bias
(
parameter_name
=
para_prefix
+
"_gate.b"
),
inputs
=
[
input_layer_name
,
Input
(
out_memory
,
parameter_name
=
para_prefix
+
"_gate.w"
)],
active_type
=
active_type
,
active_gate_type
=
gate_active_type
,
)
def
GatedRecurrentUnitNaive
(
name
,
size
,
active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
out_memory
=
None
):
name
=
name
,
size
=
size
,
bias
=
Bias
(
parameter_name
=
para_prefix
+
"_gate.b"
),
inputs
=
[
input_layer_name
,
Input
(
out_memory
,
parameter_name
=
para_prefix
+
"_gate.w"
)
],
active_type
=
active_type
,
active_gate_type
=
gate_active_type
,
)
def
GatedRecurrentUnitNaive
(
name
,
size
,
active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
out_memory
=
None
):
if
type_of
(
inputs
)
==
str
:
#only used by GatedRecurrentLayerGroup
input_layer_name
=
inputs
else
:
input_layer_name
=
name
+
"_"
+
"transform_input"
Layer
(
name
=
input_layer_name
,
type
=
"mixed"
,
size
=
size
*
3
,
active_type
=
""
,
bias
=
False
,
inputs
=
inputs
,
)
name
=
input_layer_name
,
type
=
"mixed"
,
size
=
size
*
3
,
active_type
=
""
,
bias
=
False
,
inputs
=
inputs
,
)
if
para_prefix
is
None
:
para_prefix
=
name
if
out_memory
is
None
:
out_memory
=
Memory
(
name
=
name
,
size
=
size
)
out_memory
=
Memory
(
name
=
name
,
size
=
size
)
Layer
(
name
=
name
+
"_"
+
"update_gate"
,
type
=
"mixed"
,
size
=
size
,
active_type
=
gate_active_type
,
error_clipping_threshold
=
error_clipping_threshold
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
para_prefix
+
"_update_gate.b"
),
inputs
=
[
IdentityOffsetProjection
(
input_layer_name
,
offset
=
0
),
FullMatrixProjection
(
out_memory
,
parameter_name
=
para_prefix
+
"_update_gate.w"
)],
)
name
=
name
+
"_"
+
"update_gate"
,
type
=
"mixed"
,
size
=
size
,
active_type
=
gate_active_type
,
error_clipping_threshold
=
error_clipping_threshold
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
para_prefix
+
"_update_gate.b"
),
inputs
=
[
IdentityOffsetProjection
(
input_layer_name
,
offset
=
0
),
FullMatrixProjection
(
out_memory
,
parameter_name
=
para_prefix
+
"_update_gate.w"
)
],
)
Layer
(
name
=
name
+
"_"
+
"reset_gate"
,
type
=
"mixed"
,
size
=
size
,
active_type
=
gate_active_type
,
error_clipping_threshold
=
error_clipping_threshold
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
para_prefix
+
"_reset_gate.b"
),
inputs
=
[
IdentityOffsetProjection
(
input_layer_name
,
offset
=
size
),
FullMatrixProjection
(
out_memory
,
parameter_name
=
para_prefix
+
"_reset_gate.w"
)],
)
name
=
name
+
"_"
+
"reset_gate"
,
type
=
"mixed"
,
size
=
size
,
active_type
=
gate_active_type
,
error_clipping_threshold
=
error_clipping_threshold
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
para_prefix
+
"_reset_gate.b"
),
inputs
=
[
IdentityOffsetProjection
(
input_layer_name
,
offset
=
size
),
FullMatrixProjection
(
out_memory
,
parameter_name
=
para_prefix
+
"_reset_gate.w"
)
],
)
ExpressionLayer
(
name
=
name
+
"_"
+
"reset_output"
,
inputs
=
DotMulOperator
([
out_memory
,
name
+
"_"
+
"reset_gate"
]),
)
name
=
name
+
"_"
+
"reset_output"
,
inputs
=
DotMulOperator
([
out_memory
,
name
+
"_"
+
"reset_gate"
]),
)
Layer
(
name
=
name
+
"_"
+
"output_candidate"
,
type
=
"mixed"
,
size
=
size
,
active_type
=
active_type
,
error_clipping_threshold
=
error_clipping_threshold
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
para_prefix
+
"_output_candidate.b"
),
inputs
=
[
IdentityOffsetProjection
(
input_layer_name
,
offset
=
size
*
2
),
FullMatrixProjection
(
name
+
"_"
+
"reset_output"
,
parameter_name
=
para_prefix
+
"_output_candidate.w"
)],
)
name
=
name
+
"_"
+
"output_candidate"
,
type
=
"mixed"
,
size
=
size
,
active_type
=
active_type
,
error_clipping_threshold
=
error_clipping_threshold
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
para_prefix
+
"_output_candidate.b"
),
inputs
=
[
IdentityOffsetProjection
(
input_layer_name
,
offset
=
size
*
2
),
FullMatrixProjection
(
name
+
"_"
+
"reset_output"
,
parameter_name
=
para_prefix
+
"_output_candidate.w"
)
],
)
ExpressionLayer
(
#element-wise interpolation
name
=
name
,
inputs
=
[
IdentityProjection
(
out_memory
),
DotMulOperator
([
out_memory
,
name
+
"_"
+
"update_gate"
],
scale
=-
1.0
),
DotMulOperator
([
name
+
"_"
+
"output_candidate"
,
name
+
"_"
+
"update_gate"
]),
],
)
name
=
name
,
inputs
=
[
IdentityProjection
(
out_memory
),
DotMulOperator
(
[
out_memory
,
name
+
"_"
+
"update_gate"
],
scale
=-
1.0
),
DotMulOperator
(
[
name
+
"_"
+
"output_candidate"
,
name
+
"_"
+
"update_gate"
]),
],
)
# like GatedRecurrentUnit, but it's a layer group.
# it is equivalent to GatedRecurrentLayer.
def
GatedRecurrentLayerGroup
(
name
,
size
,
active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
seq_reversed
=
False
):
def
GatedRecurrentLayerGroup
(
name
,
size
,
active_type
,
gate_active_type
,
inputs
,
para_prefix
=
None
,
error_clipping_threshold
=
0
,
seq_reversed
=
False
):
input_layer_name
=
name
+
"_"
+
"transform_input"
Layer
(
name
=
input_layer_name
,
type
=
"mixed"
,
size
=
size
*
3
,
active_type
=
""
,
bias
=
False
,
inputs
=
inputs
,
)
RecurrentLayerGroupBegin
(
name
+
"_layer_group"
,
in_links
=
[
input_layer_name
],
out_links
=
[
name
],
seq_reversed
=
seq_reversed
)
name
=
input_layer_name
,
type
=
"mixed"
,
size
=
size
*
3
,
active_type
=
""
,
bias
=
False
,
inputs
=
inputs
,
)
RecurrentLayerGroupBegin
(
name
+
"_layer_group"
,
in_links
=
[
input_layer_name
],
out_links
=
[
name
],
seq_reversed
=
seq_reversed
)
GatedRecurrentUnit
(
name
=
name
,
size
=
size
,
active_type
=
active_type
,
gate_active_type
=
gate_active_type
,
inputs
=
input_layer_name
,
#transform outside
para_prefix
=
para_prefix
,
error_clipping_threshold
=
error_clipping_threshold
,
)
name
=
name
,
size
=
size
,
active_type
=
active_type
,
gate_active_type
=
gate_active_type
,
inputs
=
input_layer_name
,
#transform outside
para_prefix
=
para_prefix
,
error_clipping_threshold
=
error_clipping_threshold
,
)
RecurrentLayerGroupEnd
(
name
+
"_layer_group"
)
python/paddle/trainer_config_helpers/activations.py
浏览文件 @
58e1b3b3
...
...
@@ -12,13 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
"TanhActivation"
,
"SigmoidActivation"
,
"SoftmaxActivation"
,
"IdentityActivation"
,
"LinearActivation"
,
'SequenceSoftmaxActivation'
,
'ExpActivation'
,
"ReluActivation"
,
"BReluActivation"
,
"SoftReluActivation"
,
"STanhActivation"
,
"AbsActivation"
,
"SquareActivation"
,
"BaseActivation"
]
__all__
=
[
"TanhActivation"
,
"SigmoidActivation"
,
"SoftmaxActivation"
,
"IdentityActivation"
,
"LinearActivation"
,
'SequenceSoftmaxActivation'
,
'ExpActivation'
,
"ReluActivation"
,
"BReluActivation"
,
"SoftReluActivation"
,
"STanhActivation"
,
"AbsActivation"
,
"SquareActivation"
,
"BaseActivation"
]
class
BaseActivation
(
object
):
...
...
@@ -51,7 +50,8 @@ class TanhActivation(BaseActivation):
f(z)=tanh(z)=
\\
frac{e^z-e^{-z}}{e^z+e^{-z}}
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'tanh'
,
True
)
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'tanh'
,
True
)
class
SigmoidActivation
(
BaseActivation
):
...
...
@@ -63,7 +63,8 @@ class SigmoidActivation(BaseActivation):
f(z) =
\\
frac{1}{1+exp(-z)}
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'sigmoid'
,
True
)
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'sigmoid'
,
True
)
class
SoftmaxActivation
(
BaseActivation
):
...
...
@@ -104,7 +105,8 @@ class IdentityActivation(BaseActivation):
Just do nothing for output both forward/backward.
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
''
,
False
)
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
''
,
False
)
LinearActivation
=
IdentityActivation
...
...
@@ -124,7 +126,8 @@ class ReluActivation(BaseActivation):
0 &
\\
quad
\\
mathrm{otherwize}
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'relu'
,
True
)
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'relu'
,
True
)
class
BReluActivation
(
BaseActivation
):
...
...
@@ -141,7 +144,8 @@ class BReluActivation(BaseActivation):
0 &
\\
quad
\\
mathrm{otherwise}
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'brelu'
,
False
)
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'brelu'
,
False
)
class
SoftReluActivation
(
BaseActivation
):
...
...
@@ -149,7 +153,9 @@ class SoftReluActivation(BaseActivation):
SoftRelu Activation.
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'softrelu'
,
False
)
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'softrelu'
,
False
)
class
STanhActivation
(
BaseActivation
):
"""
...
...
@@ -160,7 +166,8 @@ class STanhActivation(BaseActivation):
f(z) = 1.7159 * tanh(2/3*z)
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'stanh'
,
False
)
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'stanh'
,
False
)
class
AbsActivation
(
BaseActivation
):
...
...
@@ -178,7 +185,8 @@ class AbsActivation(BaseActivation):
0 &
\\
quad if
\\
quad z = 0
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'abs'
,
False
)
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'abs'
,
False
)
class
SquareActivation
(
BaseActivation
):
...
...
@@ -189,7 +197,9 @@ class SquareActivation(BaseActivation):
f(z) = z^2.
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'square'
,
False
)
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'square'
,
False
)
class
ExpActivation
(
BaseActivation
):
"""
...
...
@@ -198,7 +208,10 @@ class ExpActivation(BaseActivation):
.. math::
f(z) = e^z.
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'exponential'
,
False
)
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'exponential'
,
False
)
class
LogActivation
(
BaseActivation
):
"""
...
...
@@ -207,4 +220,6 @@ class LogActivation(BaseActivation):
.. math::
f(z) = log(z)
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'log'
,
False
)
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'log'
,
False
)
python/paddle/trainer_config_helpers/attrs.py
浏览文件 @
58e1b3b3
...
...
@@ -13,8 +13,9 @@
# limitations under the License.
from
paddle.trainer.config_parser
import
*
__all__
=
[
'ParamAttr'
,
'ExtraAttr'
,
'ParameterAttribute'
,
'ExtraLayerAttribute'
]
__all__
=
[
'ParamAttr'
,
'ExtraAttr'
,
'ParameterAttribute'
,
'ExtraLayerAttribute'
]
def
convert_and_compare
(
x
,
Type
):
...
...
@@ -25,7 +26,8 @@ def convert_and_compare(x, Type):
:param Type: target type to check x over
"""
return
type
(
x
)(
Type
(
x
))
==
x
return
type
(
x
)(
Type
(
x
))
==
x
def
is_compatible_with
(
x
,
Type
):
"""
...
...
@@ -91,9 +93,17 @@ class ParameterAttribute(object):
:type sparse_update: bool
"""
def
__init__
(
self
,
name
=
None
,
is_static
=
False
,
initial_std
=
None
,
initial_mean
=
None
,
initial_max
=
None
,
initial_min
=
None
,
l1_rate
=
None
,
l2_rate
=
None
,
learning_rate
=
None
,
momentum
=
None
,
def
__init__
(
self
,
name
=
None
,
is_static
=
False
,
initial_std
=
None
,
initial_mean
=
None
,
initial_max
=
None
,
initial_min
=
None
,
l1_rate
=
None
,
l2_rate
=
None
,
learning_rate
=
None
,
momentum
=
None
,
sparse_update
=
False
):
# initialize strategy.
if
is_static
:
...
...
@@ -183,7 +193,10 @@ class ExtraLayerAttribute(object):
:type device: int
"""
def
__init__
(
self
,
error_clipping_threshold
=
None
,
drop_rate
=
None
,
device
=
None
):
def
__init__
(
self
,
error_clipping_threshold
=
None
,
drop_rate
=
None
,
device
=
None
):
self
.
attr
=
dict
()
if
isinstance
(
error_clipping_threshold
,
float
):
assert
error_clipping_threshold
>
0
...
...
@@ -200,8 +213,8 @@ class ExtraLayerAttribute(object):
for
key
in
self
.
attr
:
if
not
hasattr
(
self
,
'can_%s'
%
key
)
or
\
not
getattr
(
self
,
'can_%s'
%
key
):
raise
NotImplementedError
(
"Layer %s cannot support %s"
%
(
layer_name
,
key
))
raise
NotImplementedError
(
"Layer %s cannot support %s"
%
(
layer_name
,
key
))
@
staticmethod
def
to_kwargs
(
attr
):
...
...
python/paddle/trainer_config_helpers/data_sources.py
浏览文件 @
58e1b3b3
...
...
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Data Sources are helpers to define paddle training data or testing data.
"""
...
...
@@ -26,8 +25,12 @@ except ImportError:
__all__
=
[
'define_py_data_sources2'
]
def
define_py_data_source
(
file_list
,
cls
,
module
,
obj
,
args
=
None
,
async
=
False
,
def
define_py_data_source
(
file_list
,
cls
,
module
,
obj
,
args
=
None
,
async
=
False
,
data_cls
=
PyData
):
"""
Define a python data source.
...
...
@@ -76,6 +79,7 @@ def define_py_data_source(file_list, cls, module,
args
=
pickle
.
dumps
(
args
,
0
)
if
data_cls
is
None
:
def
py_data2
(
files
,
load_data_module
,
load_data_object
,
load_data_args
,
**
kwargs
):
data
=
DataBase
()
...
...
@@ -86,17 +90,25 @@ def define_py_data_source(file_list, cls, module,
data
.
load_data_args
=
load_data_args
data
.
async_load_data
=
True
return
data
data_cls
=
py_data2
cls
(
data_cls
(
files
=
file_list
,
cls
(
data_cls
(
files
=
file_list
,
load_data_module
=
module
,
load_data_object
=
obj
,
load_data_args
=
args
,
async_load_data
=
async
))
def
define_py_data_sources
(
train_list
,
test_list
,
module
,
obj
,
args
=
None
,
train_async
=
False
,
data_cls
=
PyData
):
def
define_py_data_sources
(
train_list
,
test_list
,
module
,
obj
,
args
=
None
,
train_async
=
False
,
data_cls
=
PyData
):
"""
The annotation is almost the same as define_py_data_sources2, except that
it can specific train_async and data_cls.
...
...
@@ -125,8 +137,8 @@ def define_py_data_sources(train_list, test_list, module, obj, args=None,
"""
def
__is_splitable__
(
o
):
return
(
isinstance
(
o
,
list
)
or
isinstance
(
o
,
tuple
)
)
and
hasattr
(
o
,
'__len__'
)
and
len
(
o
)
==
2
return
(
isinstance
(
o
,
list
)
or
isinstance
(
o
,
tuple
)
)
and
hasattr
(
o
,
'__len__'
)
and
len
(
o
)
==
2
assert
train_list
is
not
None
or
test_list
is
not
None
assert
module
is
not
None
and
obj
is
not
None
...
...
@@ -196,7 +208,8 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None):
:return: None
:rtype: None
"""
define_py_data_sources
(
train_list
=
train_list
,
define_py_data_sources
(
train_list
=
train_list
,
test_list
=
test_list
,
module
=
module
,
obj
=
obj
,
...
...
python/paddle/trainer_config_helpers/default_decorators.py
浏览文件 @
58e1b3b3
...
...
@@ -18,16 +18,18 @@ from .attrs import ParamAttr
from
.activations
import
TanhActivation
from
paddle.trainer.config_parser
import
*
__all__
=
[
'wrap_name_default'
,
'wrap_param_attr_default'
,
'wrap_bias_attr_default'
,
'wrap_act_default'
,
'wrap_param_default'
]
__all__
=
[
'wrap_name_default'
,
'wrap_param_attr_default'
,
'wrap_bias_attr_default'
,
'wrap_act_default'
,
'wrap_param_default'
]
def
__default_not_set_callback__
(
kwargs
,
name
):
return
name
not
in
kwargs
or
kwargs
[
name
]
is
None
def
wrap_param_default
(
param_names
=
None
,
default_factory
=
None
,
def
wrap_param_default
(
param_names
=
None
,
default_factory
=
None
,
not_set_callback
=
__default_not_set_callback__
):
assert
param_names
is
not
None
assert
isinstance
(
param_names
,
list
)
or
isinstance
(
param_names
,
tuple
)
...
...
@@ -43,7 +45,8 @@ def wrap_param_default(param_names=None, default_factory=None,
if
argspec
.
defaults
:
num_positional
-=
len
(
argspec
.
defaults
)
if
not
argspec
.
varargs
and
len
(
args
)
>
num_positional
:
logger
.
fatal
(
"Must use keyword arguments for non-positional args"
)
logger
.
fatal
(
"Must use keyword arguments for non-positional args"
)
for
name
in
param_names
:
if
not_set_callback
(
kwargs
,
name
):
# Not set
kwargs
[
name
]
=
default_factory
(
func
)
...
...
@@ -112,13 +115,13 @@ def wrap_param_attr_default(param_names=None, default_factory=None):
return
wrap_param_default
(
param_names
,
default_factory
)
def
wrap_bias_attr_default
(
param_names
=
None
,
default_factory
=
None
,
def
wrap_bias_attr_default
(
param_names
=
None
,
default_factory
=
None
,
has_bias
=
True
):
if
param_names
is
None
:
param_names
=
[
'bias_attr'
]
if
default_factory
is
None
:
default_factory
=
lambda
_
:
ParamAttr
(
initial_std
=
0.
,
initial_mean
=
0.
)
default_factory
=
lambda
_
:
ParamAttr
(
initial_std
=
0.
,
initial_mean
=
0.
)
def
__bias_attr_not_set__
(
kwargs
,
name
):
if
has_bias
:
...
...
python/paddle/trainer_config_helpers/evaluators.py
浏览文件 @
58e1b3b3
...
...
@@ -15,13 +15,14 @@
from
paddle.trainer.config_parser
import
*
from
default_decorators
import
*
__all__
=
[
"evaluator_base"
,
"classification_error_evaluator"
,
"auc_evaluator"
,
"pnpair_evaluator"
,
"precision_recall_evaluator"
,
"ctc_error_evaluator"
,
"chunk_evaluator"
,
"sum_evaluator"
,
"column_sum_evaluator"
,
"value_printer_evaluator"
,
"gradient_printer_evaluator"
,
"maxid_printer_evaluator"
,
"maxframe_printer_evaluator"
,
"seqtext_printer_evaluator"
,
"classification_error_printer_evaluator"
]
__all__
=
[
"evaluator_base"
,
"classification_error_evaluator"
,
"auc_evaluator"
,
"pnpair_evaluator"
,
"precision_recall_evaluator"
,
"ctc_error_evaluator"
,
"chunk_evaluator"
,
"sum_evaluator"
,
"column_sum_evaluator"
,
"value_printer_evaluator"
,
"gradient_printer_evaluator"
,
"maxid_printer_evaluator"
,
"maxframe_printer_evaluator"
,
"seqtext_printer_evaluator"
,
"classification_error_printer_evaluator"
]
class
EvaluatorAttribute
(
object
):
...
...
@@ -32,10 +33,7 @@ class EvaluatorAttribute(object):
FOR_UTILS
=
1
<<
4
KEYS
=
[
"for_classification"
,
"for_regression"
,
"for_rank"
,
"for_print"
,
"for_classification"
,
"for_regression"
,
"for_rank"
,
"for_print"
,
"for_utils"
]
...
...
@@ -55,10 +53,11 @@ def evaluator(*attrs):
setattr
(
method
,
EvaluatorAttribute
.
to_key
(
attr
),
True
)
method
.
is_evaluator
=
True
return
method
return
impl
def
evaluator_base
(
input
,
def
evaluator_base
(
input
,
type
,
label
=
None
,
weight
=
None
,
...
...
@@ -130,10 +129,10 @@ def evaluator_base(
result_file
=
result_file
,
delimited
=
delimited
)
@
evaluator
(
EvaluatorAttribute
.
FOR_CLASSIFICATION
)
@
wrap_name_default
()
def
classification_error_evaluator
(
input
,
def
classification_error_evaluator
(
input
,
label
,
name
=
None
,
weight
=
None
,
...
...
@@ -170,13 +169,14 @@ def classification_error_evaluator(
:return: None.
"""
evaluator_base
(
name
=
name
,
evaluator_base
(
name
=
name
,
type
=
"classification_error"
,
input
=
input
,
label
=
label
,
weight
=
weight
,
classification_threshold
=
threshold
,
)
classification_threshold
=
threshold
,
)
@
evaluator
(
EvaluatorAttribute
.
FOR_CLASSIFICATION
)
@
wrap_name_default
()
...
...
@@ -184,8 +184,7 @@ def auc_evaluator(
input
,
label
,
name
=
None
,
weight
=
None
,
):
weight
=
None
,
):
"""
Auc Evaluator which adapts to binary classification.
...
...
@@ -205,12 +204,14 @@ def auc_evaluator(
[sample_num, 1].
:type weight: LayerOutput
"""
evaluator_base
(
name
=
name
,
evaluator_base
(
name
=
name
,
type
=
"last-column-auc"
,
input
=
input
,
label
=
label
,
weight
=
weight
)
@
evaluator
(
EvaluatorAttribute
.
FOR_RANK
)
@
wrap_name_default
()
def
pnpair_evaluator
(
...
...
@@ -218,8 +219,7 @@ def pnpair_evaluator(
label
,
info
,
name
=
None
,
weight
=
None
,
):
weight
=
None
,
):
"""
Positive-negative pair rate Evaluator which adapts to rank task like
learning to rank. This evaluator must contain at least three layers.
...
...
@@ -242,13 +242,15 @@ def pnpair_evaluator(
[sample_num, 1]. (TODO, explaination)
:type weight: LayerOutput
"""
evaluator_base
(
name
=
name
,
evaluator_base
(
name
=
name
,
type
=
"pnpair"
,
input
=
input
,
label
=
label
,
info
=
info
,
weight
=
weight
)
@
evaluator
(
EvaluatorAttribute
.
FOR_CLASSIFICATION
)
@
wrap_name_default
()
def
precision_recall_evaluator
(
...
...
@@ -256,8 +258,7 @@ def precision_recall_evaluator(
label
,
positive_label
=
None
,
weight
=
None
,
name
=
None
,
):
name
=
None
,
):
"""
An Evaluator to calculate precision and recall, F1-score.
It is adapt to the task with multiple labels.
...
...
@@ -286,20 +287,21 @@ def precision_recall_evaluator(
[sample_num, 1]. (TODO, explaination)
:type weight: LayerOutput
"""
evaluator_base
(
name
=
name
,
evaluator_base
(
name
=
name
,
type
=
"precision_recall"
,
input
=
input
,
label
=
label
,
positive_label
=
positive_label
,
weight
=
weight
)
@
evaluator
(
EvaluatorAttribute
.
FOR_CLASSIFICATION
)
@
wrap_name_default
()
def
ctc_error_evaluator
(
input
,
label
,
name
=
None
,
):
name
=
None
,
):
"""
This evaluator is to calculate sequence-to-sequence edit distance.
...
...
@@ -317,10 +319,9 @@ def ctc_error_evaluator(
label for ctc_layer
:type label: LayerOutput
"""
evaluator_base
(
name
=
name
,
type
=
"ctc_edit_distance"
,
input
=
input
,
label
=
label
)
evaluator_base
(
name
=
name
,
type
=
"ctc_edit_distance"
,
input
=
input
,
label
=
label
)
@
evaluator
(
EvaluatorAttribute
.
FOR_CLASSIFICATION
)
@
wrap_name_default
()
...
...
@@ -328,8 +329,7 @@ def chunk_evaluator(
input
,
name
=
None
,
chunk_scheme
=
None
,
num_chunk_types
=
None
,
):
num_chunk_types
=
None
,
):
"""
Chunk evaluator is used to evaluate segment labelling accuracy for a
sequence. It calculates the chunk detection F1 score.
...
...
@@ -375,19 +375,20 @@ def chunk_evaluator(
:type chunk_scheme: basestring
:param num_chunk_types: number of chunk types other than "other"
"""
evaluator_base
(
name
=
name
,
evaluator_base
(
name
=
name
,
type
=
"chunk"
,
input
=
input
,
chunk_scheme
=
chunk_scheme
,
num_chunk_types
=
num_chunk_types
)
@
evaluator
(
EvaluatorAttribute
.
FOR_UTILS
)
@
wrap_name_default
()
def
sum_evaluator
(
input
,
name
=
None
,
weight
=
None
,
):
weight
=
None
,
):
"""
An Evaluator to sum the result of input.
...
...
@@ -405,18 +406,15 @@ def sum_evaluator(
[sample_num, 1]. (TODO, explaination)
:type weight: LayerOutput
"""
evaluator_base
(
name
=
name
,
type
=
"sum"
,
input
=
input
,
weight
=
weight
)
evaluator_base
(
name
=
name
,
type
=
"sum"
,
input
=
input
,
weight
=
weight
)
@
evaluator
(
EvaluatorAttribute
.
FOR_UTILS
)
@
wrap_name_default
()
def
column_sum_evaluator
(
input
,
name
=
None
,
weight
=
None
,
):
weight
=
None
,
):
"""
This Evaluator is used to sum the last column of input.
...
...
@@ -431,22 +429,22 @@ def column_sum_evaluator(
:param input: Input Layer name.
:type input: LayerOutput
"""
evaluator_base
(
name
=
name
,
type
=
"last-column-sum"
,
input
=
input
,
weight
=
weight
)
evaluator_base
(
name
=
name
,
type
=
"last-column-sum"
,
input
=
input
,
weight
=
weight
)
"""
The following are printer Evaluators which are usually used to
print the result, like value or gradient of input layers, the
results generated in machine translation, the classification error etc.
"""
@
evaluator
(
EvaluatorAttribute
.
FOR_PRINT
)
@
wrap_name_default
()
def
value_printer_evaluator
(
input
,
name
=
None
,
):
name
=
None
,
):
"""
This Evaluator is used to print the values of input layers. It contains
one or more input layers.
...
...
@@ -462,16 +460,14 @@ def value_printer_evaluator(
:param name: Evaluator name.
:type name: None|basestring
"""
evaluator_base
(
name
=
name
,
type
=
"value_printer"
,
input
=
input
)
evaluator_base
(
name
=
name
,
type
=
"value_printer"
,
input
=
input
)
@
evaluator
(
EvaluatorAttribute
.
FOR_PRINT
)
@
wrap_name_default
()
def
gradient_printer_evaluator
(
input
,
name
=
None
,
):
name
=
None
,
):
"""
This Evaluator is used to print the gradient of input layers. It contains
one or more input layers.
...
...
@@ -487,17 +483,15 @@ def gradient_printer_evaluator(
:param name: Evaluator name.
:type name: None|basestring
"""
evaluator_base
(
name
=
name
,
type
=
"gradient_printer"
,
input
=
input
)
evaluator_base
(
name
=
name
,
type
=
"gradient_printer"
,
input
=
input
)
@
evaluator
(
EvaluatorAttribute
.
FOR_PRINT
)
@
wrap_name_default
()
def
maxid_printer_evaluator
(
input
,
num_results
=
None
,
name
=
None
,
):
name
=
None
,
):
"""
This Evaluator is used to print maximum top k values and their indexes
of each row of input layers. It contains one or more input layers.
...
...
@@ -517,18 +511,16 @@ def maxid_printer_evaluator(
:param name: Evaluator name.
:type name: None|basestring
"""
evaluator_base
(
name
=
name
,
type
=
"max_id_printer"
,
input
=
input
,
num_results
=
num_results
)
evaluator_base
(
name
=
name
,
type
=
"max_id_printer"
,
input
=
input
,
num_results
=
num_results
)
@
evaluator
(
EvaluatorAttribute
.
FOR_PRINT
)
@
wrap_name_default
()
def
maxframe_printer_evaluator
(
input
,
num_results
=
None
,
name
=
None
,
):
name
=
None
,
):
"""
This Evaluator is used to print the top k frames of each input layers.
The input layers should contain sequences info or sequences type.
...
...
@@ -549,11 +541,13 @@ def maxframe_printer_evaluator(
:param name: Evaluator name.
:type name: None|basestring
"""
evaluator_base
(
name
=
name
,
evaluator_base
(
name
=
name
,
type
=
"max_frame_printer"
,
input
=
input
,
num_results
=
num_results
)
@
evaluator
(
EvaluatorAttribute
.
FOR_PRINT
)
@
wrap_name_default
()
def
seqtext_printer_evaluator
(
...
...
@@ -562,8 +556,7 @@ def seqtext_printer_evaluator(
id_input
=
None
,
dict_file
=
None
,
delimited
=
None
,
name
=
None
,
):
name
=
None
,
):
"""
Sequence text printer will print text according to index matrix and a
dictionary. There can be multiple input to this layer:
...
...
@@ -636,21 +629,22 @@ def seqtext_printer_evaluator(
inputs
=
[
id_input
,
input
]
input
.
parents
.
append
(
id_input
)
evaluator_base
(
name
=
name
,
evaluator_base
(
name
=
name
,
type
=
"seq_text_printer"
,
input
=
inputs
,
dict_file
=
dict_file
,
result_file
=
result_file
,
delimited
=
delimited
)
@
evaluator
(
EvaluatorAttribute
.
FOR_PRINT
)
@
wrap_name_default
()
def
classification_error_printer_evaluator
(
input
,
label
,
threshold
=
0.5
,
name
=
None
,
):
name
=
None
,
):
"""
This Evaluator is used to print the classification error of each sample.
...
...
@@ -667,7 +661,8 @@ def classification_error_printer_evaluator(
:param name: Evaluator name.
:type name: None|basestring
"""
evaluator_base
(
name
=
name
,
evaluator_base
(
name
=
name
,
type
=
"classification_error_printer"
,
input
=
input
,
label
=
label
,
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
58e1b3b3
...
...
@@ -29,36 +29,83 @@ except ImportError:
import
pickle
import
copy
__all__
=
[
"full_matrix_projection"
,
"AggregateLevel"
,
"ExpandLevel"
,
"identity_projection"
,
"dotmul_projection"
,
"dotmul_operator"
,
__all__
=
[
"full_matrix_projection"
,
"AggregateLevel"
,
"ExpandLevel"
,
"identity_projection"
,
"dotmul_projection"
,
"dotmul_operator"
,
"repeat_layer"
,
"table_projection"
,
"mixed_layer"
,
"data_layer"
,
"embedding_layer"
,
"fc_layer"
,
"grumemory"
,
"pooling_layer"
,
"lstmemory"
,
"last_seq"
,
"first_seq"
,
"cos_sim"
,
"hsigmoid"
,
"conv_projection"
,
"regression_cost"
,
'classification_cost'
,
"LayerOutput"
,
'img_conv_layer'
,
'img_pool_layer'
,
'batch_norm_layer'
,
'img_cmrnorm_layer'
,
'addto_layer'
,
'concat_layer'
,
'lstm_step_layer'
,
'recurrent_group'
,
'memory'
,
'StaticInput'
,
'expand_layer'
,
'scaling_layer'
,
'power_layer'
,
'interpolation_layer'
,
'bilinear_interp_layer'
,
'trans_layer'
,
'sum_to_one_norm_layer'
,
'get_output_layer'
,
'LayerType'
,
'context_projection'
,
'beam_search'
,
'maxid_layer'
,
'GeneratedInput'
,
'SubsequenceInput'
,
'gru_step_layer'
,
'recurrent_layer'
,
'BaseGeneratedInput'
,
'conv_operator'
,
'conv_shift_layer'
,
'tensor_layer'
,
'selective_fc_layer'
,
'sampling_id_layer'
,
'slope_intercept_layer'
,
'trans_full_matrix_projection'
,
"table_projection"
,
"mixed_layer"
,
"data_layer"
,
"embedding_layer"
,
"fc_layer"
,
"grumemory"
,
"pooling_layer"
,
"lstmemory"
,
"last_seq"
,
"first_seq"
,
"cos_sim"
,
"hsigmoid"
,
"conv_projection"
,
"regression_cost"
,
'classification_cost'
,
"LayerOutput"
,
'img_conv_layer'
,
'img_pool_layer'
,
'batch_norm_layer'
,
'img_cmrnorm_layer'
,
'addto_layer'
,
'concat_layer'
,
'lstm_step_layer'
,
'recurrent_group'
,
'memory'
,
'StaticInput'
,
'expand_layer'
,
'scaling_layer'
,
'power_layer'
,
'interpolation_layer'
,
'bilinear_interp_layer'
,
'trans_layer'
,
'sum_to_one_norm_layer'
,
'get_output_layer'
,
'LayerType'
,
'context_projection'
,
'beam_search'
,
'maxid_layer'
,
'GeneratedInput'
,
'SubsequenceInput'
,
'gru_step_layer'
,
'recurrent_layer'
,
'BaseGeneratedInput'
,
'conv_operator'
,
'conv_shift_layer'
,
'tensor_layer'
,
'selective_fc_layer'
,
'sampling_id_layer'
,
'slope_intercept_layer'
,
'trans_full_matrix_projection'
,
'linear_comb_layer'
,
'convex_comb_layer'
,
'ctc_layer'
,
'crf_layer'
,
'crf_decoding_layer'
,
'convex_comb_layer'
,
'ctc_layer'
,
'crf_layer'
,
'crf_decoding_layer'
,
'nce_layer'
,
'cross_entropy_with_selfnorm'
,
'cross_entropy'
,
'multi_binary_label_cross_entropy'
,
'sum_cost'
,
'rank_cost'
,
'lambda_cost'
,
'huber_cost'
,
'cross_entropy_with_selfnorm'
,
'cross_entropy'
,
'multi_binary_label_cross_entropy'
,
'sum_cost'
,
'rank_cost'
,
'lambda_cost'
,
'huber_cost'
,
'block_expand_layer'
,
'maxout_layer'
,
'out_prod_layer'
,
'print_layer'
,
'maxout_layer'
,
'out_prod_layer'
,
'print_layer'
,
'spp_layer'
,
]
]
class
LayerType
(
object
):
...
...
@@ -181,8 +228,15 @@ class LayerOutput(object):
:type parents: list|tuple|collections.Sequence
"""
def
__init__
(
self
,
name
,
layer_type
,
parents
=
None
,
activation
=
None
,
num_filters
=
None
,
img_norm_type
=
None
,
size
=
None
,
outputs
=
None
,
def
__init__
(
self
,
name
,
layer_type
,
parents
=
None
,
activation
=
None
,
num_filters
=
None
,
img_norm_type
=
None
,
size
=
None
,
outputs
=
None
,
reverse
=
None
):
assert
isinstance
(
name
,
basestring
)
assert
isinstance
(
layer_type
,
basestring
)
...
...
@@ -223,6 +277,7 @@ DEVICE = 'device'
def
layer_support
(
*
attrs
):
attrs_list
=
list
(
attrs
)
attrs_list
.
append
(
DEVICE
)
def
decorator
(
method
):
@
functools
.
wraps
(
method
)
def
wrapper
(
*
args
,
**
kwargs
):
...
...
@@ -282,9 +337,8 @@ def full_matrix_projection(input, size=0, param_attr=None):
:return: A FullMatrixProjection Object.
:rtype: FullMatrixProjection
"""
proj
=
FullMatrixProjection
(
input_layer_name
=
input
.
name
,
size
=
size
,
**
param_attr
.
attr
)
proj
=
FullMatrixProjection
(
input_layer_name
=
input
.
name
,
size
=
size
,
**
param_attr
.
attr
)
proj
.
origin
=
input
return
proj
...
...
@@ -319,9 +373,8 @@ def trans_full_matrix_projection(input, size=0, param_attr=None):
:return: A TransposedFullMatrixProjection Object.
:rtype: TransposedFullMatrixProjection
"""
proj
=
TransposedFullMatrixProjection
(
input_layer_name
=
input
.
name
,
size
=
size
,
**
param_attr
.
attr
)
proj
=
TransposedFullMatrixProjection
(
input_layer_name
=
input
.
name
,
size
=
size
,
**
param_attr
.
attr
)
proj
.
origin
=
input
return
proj
...
...
@@ -365,9 +418,8 @@ def table_projection(input, size=0, param_attr=None):
:return: A TableProjection Object.
:rtype: TableProjection
"""
proj
=
TableProjection
(
input_layer_name
=
input
.
name
,
size
=
size
,
**
param_attr
.
attr
)
proj
=
TableProjection
(
input_layer_name
=
input
.
name
,
size
=
size
,
**
param_attr
.
attr
)
proj
.
origin
=
input
return
proj
...
...
@@ -413,8 +465,8 @@ def identity_projection(input, offset=None):
proj
=
IdentityProjection
(
input_layer_name
=
input
.
name
)
proj
.
origin
=
input
else
:
proj
=
IdentityOffsetProjection
(
input_layer_name
=
input
.
name
,
offset
=
offset
)
proj
=
IdentityOffsetProjection
(
input_layer_name
=
input
.
name
,
offset
=
offset
)
proj
.
origin
=
input
return
proj
...
...
@@ -443,9 +495,8 @@ def dotmul_projection(input, param_attr=None):
:return: A DotMulProjection Object.
:rtype: DotMulProjection
"""
proj
=
DotMulProjection
(
input_layer_name
=
input
.
name
,
size
=
input
.
size
,
**
param_attr
.
attr
)
proj
=
DotMulProjection
(
input_layer_name
=
input
.
name
,
size
=
input
.
size
,
**
param_attr
.
attr
)
proj
.
origin
=
input
return
proj
...
...
@@ -485,14 +536,15 @@ def dotmul_operator(a=None, b=None, scale=1, **kwargs):
if
a
.
size
is
not
None
and
b
.
size
is
not
None
:
assert
a
.
size
==
b
.
size
op
=
DotMulOperator
(
input_layer_names
=
[
a
.
name
,
b
.
name
],
scale
=
scale
)
op
=
DotMulOperator
(
input_layer_names
=
[
a
.
name
,
b
.
name
],
scale
=
scale
)
op
.
origin
=
[
a
,
b
]
return
op
@
wrap_bias_attr_default
([
'padding_attr'
])
def
context_projection
(
input
,
context_len
,
context_start
=
None
,
def
context_projection
(
input
,
context_len
,
context_start
=
None
,
padding_attr
=
False
):
"""
Context Projection.
...
...
@@ -529,7 +581,8 @@ def context_projection(input, context_len, context_start=None,
if
trainable
:
extra_dict
=
padding_attr
.
attr
proj
=
ContextProjection
(
input_layer_name
=
input
.
name
,
proj
=
ContextProjection
(
input_layer_name
=
input
.
name
,
context_length
=
context_len
,
context_start
=
context_start
,
trainable_padding
=
trainable
,
...
...
@@ -547,8 +600,7 @@ class MixedLayerType(LayerOutput):
def
__init__
(
self
):
Exception
.
__init__
(
self
)
def
__init__
(
self
,
name
,
size
,
act
,
bias_attr
,
layer_attr
,
parents
=
None
):
def
__init__
(
self
,
name
,
size
,
act
,
bias_attr
,
layer_attr
,
parents
=
None
):
"""
Ctor.
:param name: layer name.
...
...
@@ -565,8 +617,13 @@ class MixedLayerType(LayerOutput):
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute or None
"""
LayerOutput
.
__init__
(
self
,
name
,
LayerType
.
MIXED_LAYER
,
parents
,
size
=
size
,
activation
=
act
)
LayerOutput
.
__init__
(
self
,
name
,
LayerType
.
MIXED_LAYER
,
parents
,
size
=
size
,
activation
=
act
)
self
.
bias_attr
=
bias_attr
self
.
layer_attr
=
layer_attr
self
.
inputs
=
[]
...
...
@@ -604,8 +661,7 @@ class MixedLayerType(LayerOutput):
active_type
=
self
.
activation
.
name
,
bias
=
ParamAttr
.
to_bias
(
self
.
bias_attr
),
inputs
=
self
.
inputs
,
**
ExtraLayerAttribute
.
to_kwargs
(
self
.
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
self
.
layer_attr
))
# update the size which might be computed inside MixedLayer
# according to the operator's output size
self
.
size
=
ml
.
config
.
size
...
...
@@ -615,7 +671,11 @@ class MixedLayerType(LayerOutput):
@
wrap_act_default
(
act
=
LinearActivation
())
@
wrap_bias_attr_default
(
has_bias
=
False
)
@
layer_support
(
ERROR_CLIPPING
,
DROPOUT
)
def
mixed_layer
(
size
=
0
,
input
=
None
,
name
=
None
,
act
=
None
,
bias_attr
=
False
,
def
mixed_layer
(
size
=
0
,
input
=
None
,
name
=
None
,
act
=
None
,
bias_attr
=
False
,
layer_attr
=
None
):
"""
Mixed Layer. A mixed layer will add all inputs together, then activate.
...
...
@@ -660,7 +720,11 @@ def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False,
if
input
is
None
:
return
MixedLayerType
(
name
,
size
,
act
,
bias_attr
,
layer_attr
)
else
:
with
mixed_layer
(
name
=
name
,
size
=
size
,
act
=
act
,
bias_attr
=
bias_attr
,
with
mixed_layer
(
name
=
name
,
size
=
size
,
act
=
act
,
bias_attr
=
bias_attr
,
layer_attr
=
layer_attr
)
as
m
:
if
isinstance
(
input
,
collections
.
Sequence
):
for
each
in
input
:
...
...
@@ -691,7 +755,10 @@ def data_layer(name, size, layer_attr=None):
:return: LayerOutput object.
:rtype: LayerOutput
"""
Layer
(
type
=
LayerType
.
DATA
,
name
=
name
,
size
=
size
,
Layer
(
type
=
LayerType
.
DATA
,
name
=
name
,
size
=
size
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
DATA
,
size
=
size
)
...
...
@@ -718,7 +785,10 @@ def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None):
:return: LayerOutput object.
:rtype: LayerOutput
"""
with
mixed_layer
(
name
=
name
,
size
=
size
,
act
=
LinearActivation
(),
with
mixed_layer
(
name
=
name
,
size
=
size
,
act
=
LinearActivation
(),
bias_attr
=
False
,
layer_attr
=
layer_attr
)
as
mix
:
mix
+=
table_projection
(
input
=
input
,
size
=
size
,
param_attr
=
param_attr
)
...
...
@@ -730,8 +800,13 @@ def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None):
@
wrap_bias_attr_default
()
@
wrap_act_default
()
@
layer_support
(
ERROR_CLIPPING
,
DROPOUT
)
def
fc_layer
(
input
,
size
,
act
=
None
,
name
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
def
fc_layer
(
input
,
size
,
act
=
None
,
name
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
"""
Helper for declare fully connected layer.
...
...
@@ -783,17 +858,17 @@ def fc_layer(input, size, act=None, name=None,
assert
isinstance
(
input
,
collections
.
Sequence
)
Layer
(
inputs
=
[
Input
(
ipt
.
name
,
**
attr
.
attr
)
for
ipt
,
attr
in
zip
(
input
,
param_attr
)],
inputs
=
[
Input
(
ipt
.
name
,
**
attr
.
attr
)
for
ipt
,
attr
in
zip
(
input
,
param_attr
)
],
name
=
name
,
type
=
LayerType
.
FC_LAYER
,
size
=
size
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
active_type
=
act
.
name
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
FC_LAYER
,
input
,
activation
=
act
,
size
=
size
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
FC_LAYER
,
input
,
activation
=
act
,
size
=
size
)
@
wrap_name_default
(
"print"
)
...
...
@@ -816,8 +891,7 @@ def print_layer(input, name=None):
Layer
(
name
=
name
,
type
=
LayerType
.
PRINT_LAYER
,
inputs
=
[
l
.
name
for
l
in
input
],
)
inputs
=
[
l
.
name
for
l
in
input
],
)
# this layer don't return anything, can not be input of other layer.
...
...
@@ -825,7 +899,10 @@ def print_layer(input, name=None):
@
wrap_bias_attr_default
(
has_bias
=
False
)
@
wrap_param_default
([
'pooling_type'
],
default_factory
=
lambda
_
:
MaxPooling
())
@
layer_support
()
def
pooling_layer
(
input
,
pooling_type
=
None
,
name
=
None
,
bias_attr
=
None
,
def
pooling_layer
(
input
,
pooling_type
=
None
,
name
=
None
,
bias_attr
=
None
,
agg_level
=
AggregateLevel
.
EACH_TIMESTEP
,
layer_attr
=
None
):
"""
...
...
@@ -872,24 +949,27 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None,
inputs
=
[
Input
(
input
.
name
)],
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
trans_type
=
agg_level
,
**
extra_dict
)
return
LayerOutput
(
name
,
pooling_type
.
name
,
parents
=
[
input
],
size
=
input
.
size
)
**
extra_dict
)
return
LayerOutput
(
name
,
pooling_type
.
name
,
parents
=
[
input
],
size
=
input
.
size
)
@
wrap_bias_attr_default
()
@
wrap_param_attr_default
()
@
wrap_act_default
(
param_names
=
[
'gate_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
param_names
=
[
'gate_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
param_names
=
[
"act"
,
'state_act'
],
act
=
TanhActivation
())
@
wrap_name_default
(
"lstmemory"
)
@
layer_support
(
DROPOUT
)
def
lstmemory
(
input
,
name
=
None
,
reverse
=
False
,
act
=
None
,
gate_act
=
None
,
size
=
None
,
state_act
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
def
lstmemory
(
input
,
name
=
None
,
reverse
=
False
,
act
=
None
,
gate_act
=
None
,
size
=
None
,
state_act
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
layer_attr
=
None
):
"""
Long Short-term Memory Cell.
...
...
@@ -964,7 +1044,8 @@ def lstmemory(input, name=None, reverse=False, act=None,
"layer. The lstm size should be equal with input layer size/4. The"
" size which is set explicitly will be ignored."
%
name
)
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
LSTMEMORY
,
active_type
=
act
.
name
,
active_state_type
=
state_act
.
name
,
...
...
@@ -974,20 +1055,27 @@ def lstmemory(input, name=None, reverse=False, act=None,
inputs
=
[
Input
(
input
.
name
,
**
param_attr
.
attr
)],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
LSTMEMORY
,
[
input
],
size
=
input
.
size
/
4
,
return
LayerOutput
(
name
,
LayerType
.
LSTMEMORY
,
[
input
],
size
=
input
.
size
/
4
,
reverse
=
reverse
)
@
wrap_bias_attr_default
()
@
wrap_param_attr_default
()
@
wrap_act_default
(
param_names
=
[
'gate_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
param_names
=
[
'gate_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
param_names
=
[
"act"
],
act
=
TanhActivation
())
@
wrap_name_default
(
"gru"
)
@
layer_support
(
DROPOUT
)
def
grumemory
(
input
,
name
=
None
,
reverse
=
False
,
act
=
None
,
gate_act
=
None
,
size
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
def
grumemory
(
input
,
name
=
None
,
reverse
=
False
,
act
=
None
,
gate_act
=
None
,
size
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
layer_attr
=
None
):
"""
Gate Recurrent Unit Layer.
...
...
@@ -1078,23 +1166,28 @@ def grumemory(input, name=None, reverse=False, act=None,
" and should be input size / 3. Set size explicitly will be "
"ignored."
)
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
GRUMEMORY
,
active_type
=
act
.
name
,
active_gate_type
=
gate_act
.
name
,
reversed
=
reverse
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
inputs
=
[
Input
(
input
.
name
,
**
param_attr
.
attr
)],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
GRUMEMORY
,
[
input
],
size
=
input
.
size
/
3
,
return
LayerOutput
(
name
,
LayerType
.
GRUMEMORY
,
[
input
],
size
=
input
.
size
/
3
,
reverse
=
reverse
)
@
wrap_name_default
()
@
layer_support
()
def
last_seq
(
input
,
name
=
None
,
agg_level
=
AggregateLevel
.
EACH_TIMESTEP
,
def
last_seq
(
input
,
name
=
None
,
agg_level
=
AggregateLevel
.
EACH_TIMESTEP
,
layer_attr
=
None
):
"""
Get Last Timestamp Activation of a sequence.
...
...
@@ -1120,15 +1213,19 @@ def last_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP,
type
=
LayerType
.
SEQUENCE_LAST_INSTANCE
,
inputs
=
[
input
.
name
],
trans_type
=
agg_level
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
SEQUENCE_LAST_INSTANCE
,
parents
=
[
input
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
SEQUENCE_LAST_INSTANCE
,
parents
=
[
input
],
size
=
input
.
size
)
@
wrap_name_default
()
@
layer_support
()
def
first_seq
(
input
,
name
=
None
,
agg_level
=
AggregateLevel
.
EACH_TIMESTEP
,
def
first_seq
(
input
,
name
=
None
,
agg_level
=
AggregateLevel
.
EACH_TIMESTEP
,
layer_attr
=
None
):
"""
Get First Timestamp Activation of a sequence.
...
...
@@ -1155,10 +1252,12 @@ def first_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP,
type
=
LayerType
.
SEQUENCE_FIRST_INSTANCE
,
inputs
=
[
input
.
name
],
trans_type
=
agg_level
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
SEQUENCE_FIRST_INSTANCE
,
parents
=
[
input
],
size
=
input
.
size
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
SEQUENCE_FIRST_INSTANCE
,
parents
=
[
input
],
size
=
input
.
size
)
class
ExpandLevel
(
object
):
...
...
@@ -1168,7 +1267,8 @@ class ExpandLevel(object):
@
wrap_name_default
()
@
layer_support
()
def
expand_layer
(
input
,
expand_as
,
def
expand_layer
(
input
,
expand_as
,
name
=
None
,
bias_attr
=
False
,
expand_level
=
ExpandLevel
.
FROM_TIMESTEP
,
...
...
@@ -1208,9 +1308,9 @@ def expand_layer(input, expand_as,
bias
=
ParamAttr
.
to_bias
(
bias_attr
=
bias_attr
),
type
=
LayerType
.
EXPAND_LAYER
,
trans_type
=
expand_level
,
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
=
name
,
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
=
name
,
size
=
input
.
size
,
layer_type
=
LayerType
.
EXPAND_LAYER
,
parents
=
[
input
,
expand_as
])
...
...
@@ -1218,9 +1318,7 @@ def expand_layer(input, expand_as,
@
wrap_name_default
()
@
layer_support
()
def
repeat_layer
(
input
,
num_repeats
,
name
=
None
,
layer_attr
=
None
):
def
repeat_layer
(
input
,
num_repeats
,
name
=
None
,
layer_attr
=
None
):
"""
A layer for repeating the input for num_repeats times. This is equivalent
to apply concat_layer() with num_repeats same input.
...
...
@@ -1251,13 +1349,14 @@ def repeat_layer(input, num_repeats,
name
=
name
,
num_filters
=
num_repeats
,
type
=
LayerType
.
FEATURE_MAP_EXPAND_LAYER
,
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
=
name
,
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
=
name
,
size
=
l
.
config
.
size
,
layer_type
=
LayerType
.
FEATURE_MAP_EXPAND_LAYER
,
parents
=
[
input
])
@
wrap_name_default
()
@
layer_support
()
def
interpolation_layer
(
input
,
weight
,
name
=
None
,
layer_attr
=
None
):
...
...
@@ -1302,9 +1401,10 @@ def interpolation_layer(input, weight, name=None, layer_attr=None):
name
=
name
,
type
=
LayerType
.
INTERPOLATION_LAYER
,
inputs
=
[
weight
.
name
,
input
[
0
].
name
,
input
[
1
].
name
],
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
INTERPOLATION_LAYER
,
**
ExtraAttr
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
INTERPOLATION_LAYER
,
parents
=
[
weight
,
input
[
0
],
input
[
1
]],
size
=
input
[
0
].
size
)
...
...
@@ -1345,15 +1445,23 @@ def bilinear_interp_layer(input,
assert
out_size_x
>
0
and
out_size_y
>
0
assert
input
.
num_filters
is
not
None
num_channels
=
input
.
num_filters
l
=
Layer
(
name
=
name
,
inputs
=
Input
(
input
.
name
,
bilinear_interp
=
BilinearInterp
(
out_size_x
=
out_size_x
,
l
=
Layer
(
name
=
name
,
inputs
=
Input
(
input
.
name
,
bilinear_interp
=
BilinearInterp
(
out_size_x
=
out_size_x
,
out_size_y
=
out_size_y
,
num_channels
=
num_channels
)),
type
=
LayerType
.
BILINEAR_INTERP_LAYER
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
BILINEAR_INTERP_LAYER
,
parents
=
[
input
],
num_filters
=
num_channels
,
size
=
l
.
config
.
size
)
return
LayerOutput
(
name
,
LayerType
.
BILINEAR_INTERP_LAYER
,
parents
=
[
input
],
num_filters
=
num_channels
,
size
=
l
.
config
.
size
)
@
wrap_name_default
()
@
layer_support
()
...
...
@@ -1392,10 +1500,9 @@ def power_layer(input, weight, name=None, layer_attr=None):
name
=
name
,
type
=
LayerType
.
POWER_LAYER
,
inputs
=
[
weight
.
name
,
input
.
name
],
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
POWER_LAYER
,
parents
=
[
input
,
weight
],
size
=
input
.
size
)
**
ExtraAttr
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
POWER_LAYER
,
parents
=
[
input
,
weight
],
size
=
input
.
size
)
@
wrap_name_default
()
...
...
@@ -1437,10 +1544,9 @@ def scaling_layer(input, weight, name=None, layer_attr=None):
name
=
name
,
type
=
LayerType
.
SCALING_LAYER
,
inputs
=
[
weight
.
name
,
input
.
name
],
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
SCALING_LAYER
,
parents
=
[
weight
,
input
],
size
=
input
.
size
)
**
ExtraAttr
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
SCALING_LAYER
,
parents
=
[
weight
,
input
],
size
=
input
.
size
)
@
wrap_name_default
()
...
...
@@ -1473,10 +1579,9 @@ def trans_layer(input, name=None, layer_attr=None):
name
=
name
,
type
=
LayerType
.
TRANS_LAYER
,
inputs
=
[
input
.
name
],
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
TRANS_LAYER
,
parents
=
[
input
],
size
=
input
.
size
)
**
ExtraAttr
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
TRANS_LAYER
,
parents
=
[
input
],
size
=
input
.
size
)
@
wrap_name_default
()
...
...
@@ -1518,8 +1623,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
type
=
LayerType
.
COSINE_SIM
,
cos_scale
=
scale
,
inputs
=
[
a
.
name
,
b
.
name
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
else
:
if
a
.
size
is
not
None
and
b
.
size
is
not
None
:
assert
size
==
b
.
size
/
a
.
size
...
...
@@ -1529,8 +1633,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
size
=
size
,
cos_scale
=
scale
,
inputs
=
[
a
.
name
,
b
.
name
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
COSINE_SIM
,
parents
=
[
a
,
b
],
size
=
size
)
...
...
@@ -1538,8 +1641,13 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
@
wrap_bias_attr_default
(
has_bias
=
True
)
@
wrap_param_attr_default
()
@
layer_support
()
def
hsigmoid
(
input
,
label
,
num_classes
,
name
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
layer_attr
=
None
):
def
hsigmoid
(
input
,
label
,
num_classes
,
name
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
layer_attr
=
None
):
"""
Organize the classes into a binary tree. At each node, a sigmoid function
is used to calculate the probability of belonging to the right branch.
...
...
@@ -1600,10 +1708,9 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
num_classes
=
num_classes
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
inputs
=
ipts_for_layer
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
HSIGMOID
,
parents
=
parents
,
size
=
l
.
config
.
size
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
HSIGMOID
,
parents
=
parents
,
size
=
l
.
config
.
size
)
@
wrap_name_default
(
"conv"
)
...
...
@@ -1611,11 +1718,22 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
@
wrap_bias_attr_default
()
@
wrap_act_default
(
act
=
ReluActivation
())
@
layer_support
(
DROPOUT
)
def
img_conv_layer
(
input
,
filter_size
,
num_filters
,
name
=
None
,
num_channels
=
None
,
act
=
None
,
groups
=
1
,
stride
=
1
,
padding
=
0
,
bias_attr
=
None
,
param_attr
=
None
,
shared_biases
=
True
,
layer_attr
=
None
,
filter_size_y
=
None
,
stride_y
=
None
,
padding_y
=
None
,
def
img_conv_layer
(
input
,
filter_size
,
num_filters
,
name
=
None
,
num_channels
=
None
,
act
=
None
,
groups
=
1
,
stride
=
1
,
padding
=
0
,
bias_attr
=
None
,
param_attr
=
None
,
shared_biases
=
True
,
layer_attr
=
None
,
filter_size_y
=
None
,
stride_y
=
None
,
padding_y
=
None
,
trans
=
False
):
"""
Convolution layer for image. Paddle only support square input currently and
...
...
@@ -1713,7 +1831,7 @@ def img_conv_layer(input, filter_size, num_filters,
if
param_attr
.
attr
.
get
(
'initial_smart'
):
# special initial for conv layers.
init_w
=
(
2.0
/
(
filter_size
**
2
*
num_channels
))
**
0.5
init_w
=
(
2.0
/
(
filter_size
**
2
*
num_channels
))
**
0.5
param_attr
.
attr
[
"initial_mean"
]
=
0.0
param_attr
.
attr
[
"initial_std"
]
=
init_w
param_attr
.
attr
[
"initial_strategy"
]
=
0
...
...
@@ -1723,10 +1841,16 @@ def img_conv_layer(input, filter_size, num_filters,
l
=
Layer
(
name
=
name
,
inputs
=
Input
(
input
.
name
,
conv
=
Conv
(
filter_size
=
filter_size
,
padding
=
padding
,
stride
=
stride
,
channels
=
num_channels
,
groups
=
groups
,
filter_size_y
=
filter_size_y
,
padding_y
=
padding_y
,
inputs
=
Input
(
input
.
name
,
conv
=
Conv
(
filter_size
=
filter_size
,
padding
=
padding
,
stride
=
stride
,
channels
=
num_channels
,
groups
=
groups
,
filter_size_y
=
filter_size_y
,
padding_y
=
padding_y
,
stride_y
=
stride_y
),
**
param_attr
.
attr
),
active_type
=
act
.
name
,
...
...
@@ -1734,19 +1858,29 @@ def img_conv_layer(input, filter_size, num_filters,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
shared_biases
=
shared_biases
,
type
=
lt
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
lt
,
parents
=
[
input
],
activation
=
act
,
num_filters
=
num_filters
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
lt
,
parents
=
[
input
],
activation
=
act
,
num_filters
=
num_filters
,
size
=
l
.
config
.
size
)
@
wrap_name_default
(
"pool"
)
@
layer_support
()
def
img_pool_layer
(
input
,
pool_size
,
name
=
None
,
num_channels
=
None
,
pool_type
=
None
,
stride
=
1
,
padding
=
0
,
layer_attr
=
None
,
pool_size_y
=
None
,
stride_y
=
None
,
padding_y
=
None
,
def
img_pool_layer
(
input
,
pool_size
,
name
=
None
,
num_channels
=
None
,
pool_type
=
None
,
stride
=
1
,
padding
=
0
,
layer_attr
=
None
,
pool_size_y
=
None
,
stride_y
=
None
,
padding_y
=
None
,
img_width
=
None
):
"""
Image pooling Layer.
...
...
@@ -1804,7 +1938,9 @@ def img_pool_layer(input, pool_size, name=None,
l
=
Layer
(
name
=
name
,
type
=
LayerType
.
POOL_LAYER
,
inputs
=
[
Input
(
input
.
name
,
inputs
=
[
Input
(
input
.
name
,
pool
=
Pool
(
pool_type
=
type_name
,
channels
=
num_channels
,
...
...
@@ -1815,18 +1951,26 @@ def img_pool_layer(input, pool_size, name=None,
size_y
=
pool_size_y
,
stride_y
=
stride_y
,
padding_y
=
padding_y
,
img_width
=
img_width
))],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
POOL_LAYER
,
parents
=
[
input
],
num_filters
=
num_channels
,
size
=
l
.
config
.
size
)
img_width
=
img_width
))
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
POOL_LAYER
,
parents
=
[
input
],
num_filters
=
num_channels
,
size
=
l
.
config
.
size
)
@
wrap_name_default
(
"spp"
)
@
layer_support
()
def
spp_layer
(
input
,
name
=
None
,
num_channels
=
None
,
pool_type
=
None
,
pyramid_height
=
None
,
img_width
=
None
,
layer_attr
=
None
):
def
spp_layer
(
input
,
name
=
None
,
num_channels
=
None
,
pool_type
=
None
,
pyramid_height
=
None
,
img_width
=
None
,
layer_attr
=
None
):
"""
Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition.
The details please refer to
...
...
@@ -1866,42 +2010,58 @@ def spp_layer(input, name=None, num_channels=None, pool_type=None,
l
=
Layer
(
name
=
name
,
type
=
LayerType
.
SPP_LAYER
,
inputs
=
Input
(
input
.
name
,
spp
=
SpatialPyramidPool
(
pool_type
=
type_name
,
inputs
=
Input
(
input
.
name
,
spp
=
SpatialPyramidPool
(
pool_type
=
type_name
,
channels
=
num_channels
,
pyramid_height
=
pyramid_height
,
img_width
=
img_width
)
),
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
layer_type
=
LayerType
.
SPP_LAYER
,
parents
=
[
input
],
num_filters
=
num_channels
,
size
=
l
.
config
.
size
)
img_width
=
img_width
)),
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
layer_type
=
LayerType
.
SPP_LAYER
,
parents
=
[
input
],
num_filters
=
num_channels
,
size
=
l
.
config
.
size
)
def
__img_norm_layer__
(
name
,
input
,
size
,
norm_type
,
scale
,
power
,
num_channels
,
blocked
,
layer_attr
):
def
__img_norm_layer__
(
name
,
input
,
size
,
norm_type
,
scale
,
power
,
num_channels
,
blocked
,
layer_attr
):
if
num_channels
is
None
:
assert
input
.
num_filters
is
not
None
num_channels
=
input
.
num_filters
l
=
Layer
(
name
=
name
,
type
=
LayerType
.
NORM_LAYER
,
inputs
=
Input
(
input
.
name
,
norm
=
Norm
(
norm_type
=
norm_type
,
channels
=
num_channels
,
size
=
size
,
name
=
name
,
type
=
LayerType
.
NORM_LAYER
,
inputs
=
Input
(
input
.
name
,
norm
=
Norm
(
norm_type
=
norm_type
,
channels
=
num_channels
,
size
=
size
,
scale
=
scale
,
pow
=
power
,
blocked
=
blocked
)
),
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
layer_type
=
LayerType
.
NORM_LAYER
,
parents
=
[
input
],
num_filters
=
num_channels
,
img_norm_type
=
norm_type
,
pow
=
power
,
blocked
=
blocked
)),
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
layer_type
=
LayerType
.
NORM_LAYER
,
parents
=
[
input
],
num_filters
=
num_channels
,
img_norm_type
=
norm_type
,
size
=
l
.
config
.
size
)
@
wrap_name_default
(
"crmnorm"
)
@
layer_support
()
def
img_cmrnorm_layer
(
input
,
size
,
scale
=
0.0128
,
power
=
0.75
,
name
=
None
,
num_channels
=
None
,
def
img_cmrnorm_layer
(
input
,
size
,
scale
=
0.0128
,
power
=
0.75
,
name
=
None
,
num_channels
=
None
,
layer_attr
=
None
):
"""
Response normalization across feature maps.
...
...
@@ -1935,8 +2095,13 @@ def img_cmrnorm_layer(input, size, scale=0.0128, power=0.75,
@
wrap_act_default
(
act
=
ReluActivation
())
@
wrap_name_default
(
"batch_norm"
)
@
layer_support
(
DROPOUT
)
def
batch_norm_layer
(
input
,
act
=
None
,
name
=
None
,
num_channels
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
layer_attr
=
None
,
def
batch_norm_layer
(
input
,
act
=
None
,
name
=
None
,
num_channels
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
layer_attr
=
None
,
batch_norm_type
=
None
,
moving_average_fraction
=
0.9
,
use_global_stats
=
None
):
...
...
@@ -2022,20 +2187,21 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None,
(
batch_norm_type
==
"cudnn_batch_norm"
)
l
=
Layer
(
name
=
name
,
inputs
=
Input
(
input
.
name
,
image
=
Image
(
channels
=
num_channels
),
**
param_attr
.
attr
),
inputs
=
Input
(
input
.
name
,
image
=
Image
(
channels
=
num_channels
),
**
param_attr
.
attr
),
active_type
=
act
.
name
,
type
=
LayerType
.
BATCH_NORM_LAYER
,
batch_norm_type
=
batch_norm_type
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
moving_average_fraction
=
moving_average_fraction
,
use_global_stats
=
use_global_stats
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
BATCH_NORM_LAYER
,
parents
=
[
input
],
activation
=
act
,
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
BATCH_NORM_LAYER
,
parents
=
[
input
],
activation
=
act
,
num_filters
=
num_channels
,
size
=
l
.
config
.
size
)
...
...
@@ -2072,18 +2238,16 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None):
name
=
name
,
type
=
LayerType
.
SUM_TO_ONE_NORM_LAYER
,
inputs
=
[
input
.
name
],
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
SUM_TO_ONE_NORM_LAYER
,
parents
=
[
input
],
size
=
input
.
size
)
**
ExtraAttr
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
SUM_TO_ONE_NORM_LAYER
,
parents
=
[
input
],
size
=
input
.
size
)
@
wrap_name_default
(
"addto"
)
@
wrap_act_default
(
act
=
LinearActivation
())
@
wrap_bias_attr_default
(
has_bias
=
False
)
@
layer_support
(
DROPOUT
)
def
addto_layer
(
input
,
act
=
None
,
name
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
def
addto_layer
(
input
,
act
=
None
,
name
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
"""
AddtoLayer.
...
...
@@ -2143,14 +2307,19 @@ def addto_layer(input, act=None, name=None, bias_attr=None,
num_filters
=
each_input
.
num_filters
l
=
Layer
(
name
=
name
,
type
=
LayerType
.
ADDTO_LAYER
,
inputs
=
ipts_for_layer
,
name
=
name
,
type
=
LayerType
.
ADDTO_LAYER
,
inputs
=
ipts_for_layer
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
active_type
=
act
.
name
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
ADDTO_LAYER
,
parents
=
input
,
activation
=
act
,
num_filters
=
num_filters
,
return
LayerOutput
(
name
,
LayerType
.
ADDTO_LAYER
,
parents
=
input
,
activation
=
act
,
num_filters
=
num_filters
,
size
=
l
.
config
.
size
)
...
...
@@ -2210,22 +2379,22 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
LayerOutput
)
return
a
is_concat_layer
=
__is_type__
(
reduce
(
__reduce_concat_type__
,
map
(
type
,
input
)),
LayerOutput
)
is_concat_layer
=
__is_type__
(
reduce
(
__reduce_concat_type__
,
map
(
type
,
input
)),
LayerOutput
)
layer_type
=
(
LayerType
.
CONCAT_LAYER
if
is_concat_layer
else
LayerType
.
CONCAT_PROJ_LAYER
)
layer_type
=
(
LayerType
.
CONCAT_LAYER
if
is_concat_layer
else
LayerType
.
CONCAT_PROJ_LAYER
)
if
layer_type
==
LayerType
.
CONCAT_LAYER
:
assert
not
bias_attr
Layer
(
name
=
name
,
type
=
layer_type
,
name
=
name
,
type
=
layer_type
,
inputs
=
[
x
.
name
for
x
in
input
]
if
is_concat_layer
else
input
,
active_type
=
act
.
name
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
sz
=
0
for
each_input
in
input
:
...
...
@@ -2235,14 +2404,20 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
sz
=
None
break
return
LayerOutput
(
name
,
layer_type
=
layer_type
,
parents
=
input
if
is_concat_layer
else
[
x
.
origin
for
x
in
input
],
activation
=
act
,
size
=
sz
)
return
LayerOutput
(
name
,
layer_type
=
layer_type
,
parents
=
input
if
is_concat_layer
else
[
x
.
origin
for
x
in
input
],
activation
=
act
,
size
=
sz
)
def
memory
(
name
,
size
,
is_seq
=
False
,
boot_layer
=
None
,
boot_bias
=
None
,
boot_bias_active_type
=
None
,
def
memory
(
name
,
size
,
is_seq
=
False
,
boot_layer
=
None
,
boot_bias
=
None
,
boot_bias_active_type
=
None
,
boot_with_const_id
=
None
):
"""
The memory layers is a layer cross each time step. Reference this output
...
...
@@ -2290,30 +2465,33 @@ def memory(name, size, is_seq=False, boot_layer=None,
assert
boot_layer
is
None
or
isinstance
(
boot_layer
,
LayerOutput
)
agent_name
=
Memory
(
name
,
size
,
is_seq
,
boot_layer
.
name
if
boot_layer
is
not
None
else
None
,
boot_bias
,
boot_bias_active_type
.
name
,
boot_with_const_id
)
agent_name
=
Memory
(
name
,
size
,
is_seq
,
boot_layer
.
name
if
boot_layer
is
not
None
else
None
,
boot_bias
,
boot_bias_active_type
.
name
,
boot_with_const_id
)
lout
=
LayerOutput
(
name
=
agent_name
,
size
=
size
,
lout
=
LayerOutput
(
name
=
agent_name
,
size
=
size
,
layer_type
=
LayerType
.
MEMORY
,
parents
=
[
boot_layer
]
if
boot_layer
is
not
None
else
None
)
parents
=
[
boot_layer
]
if
boot_layer
is
not
None
else
None
)
return
lout
@
wrap_bias_attr_default
()
@
wrap_act_default
(
param_names
=
[
'gate_act'
,
'state_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
param_names
=
[
'gate_act'
,
'state_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
act
=
TanhActivation
())
@
wrap_name_default
(
'lstm_step'
)
@
layer_support
()
def
lstm_step_layer
(
input
,
state
,
size
,
act
=
None
,
name
=
None
,
gate_act
=
None
,
state_act
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
def
lstm_step_layer
(
input
,
state
,
size
,
act
=
None
,
name
=
None
,
gate_act
=
None
,
state_act
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
"""
LSTM Step Layer. It used in recurrent_group. The lstm equations are shown
as follow.
...
...
@@ -2380,24 +2558,32 @@ def lstm_step_layer(input, state, size, act=None,
active_gate_type
=
gate_act
.
name
,
active_state_type
=
state_act
.
name
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
size
=
size
,
inputs
=
[
input
.
name
,
state
.
name
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
size
=
size
,
inputs
=
[
input
.
name
,
state
.
name
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
LSTM_STEP_LAYER
,
parents
=
[
input
,
state
],
activation
=
act
,
size
=
size
,
outputs
=
[
'default'
,
'state'
])
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
LSTM_STEP_LAYER
,
parents
=
[
input
,
state
],
activation
=
act
,
size
=
size
,
outputs
=
[
'default'
,
'state'
])
@
wrap_bias_attr_default
()
@
wrap_act_default
(
param_names
=
[
'gate_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
param_names
=
[
'gate_act'
],
act
=
SigmoidActivation
())
@
wrap_act_default
(
act
=
TanhActivation
())
@
wrap_name_default
(
'gru_step'
)
@
layer_support
()
def
gru_step_layer
(
input
,
output_mem
,
size
=
None
,
act
=
None
,
name
=
None
,
gate_act
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
def
gru_step_layer
(
input
,
output_mem
,
size
=
None
,
act
=
None
,
name
=
None
,
gate_act
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
"""
:param input:
...
...
@@ -2418,20 +2604,18 @@ def gru_step_layer(input, output_mem, size=None, act=None,
Layer
(
name
=
name
,
type
=
LayerType
.
GRU_STEP_LAYER
,
inputs
=
[
input
.
name
,
output_mem
.
name
],
inputs
=
[
input
.
name
,
output_mem
.
name
],
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
size
=
size
,
active_type
=
act
.
name
,
active_gate_type
=
gate_act
.
name
,
**
ExtraAttr
.
to_kwargs
(
layer_attr
)
)
**
ExtraAttr
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
GRU_STEP_LAYER
,
name
=
name
,
layer_type
=
LayerType
.
GRU_STEP_LAYER
,
parents
=
[
input
,
output_mem
],
size
=
size
,
activation
=
act
)
size
=
size
,
activation
=
act
)
@
wrap_name_default
()
...
...
@@ -2459,13 +2643,19 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None):
' The get output name is %s, which not'
\
' in %s'
%
(
arg_name
,
","
.
join
(
input
.
outputs
))
Layer
(
name
=
name
,
type
=
LayerType
.
GET_OUTPUT_LAYER
,
inputs
=
[
Input
(
input
.
name
,
input_layer_argument
=
arg_name
)],
Layer
(
name
=
name
,
type
=
LayerType
.
GET_OUTPUT_LAYER
,
inputs
=
[
Input
(
input
.
name
,
input_layer_argument
=
arg_name
)],
size
=
input
.
size
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
GET_OUTPUT_LAYER
,
parents
=
[
input
],
size
=
input
.
size
)
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
GET_OUTPUT_LAYER
,
parents
=
[
input
],
size
=
input
.
size
)
@
wrap_name_default
()
...
...
@@ -2473,8 +2663,13 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None):
@
wrap_bias_attr_default
()
@
wrap_param_attr_default
()
@
layer_support
()
def
recurrent_layer
(
input
,
act
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
name
=
None
,
reverse
=
False
,
layer_attr
=
None
):
def
recurrent_layer
(
input
,
act
=
None
,
bias_attr
=
None
,
param_attr
=
None
,
name
=
None
,
reverse
=
False
,
layer_attr
=
None
):
"""
Simple recurrent unit layer. It is just a fully connect layer through both
time and neural network.
...
...
@@ -2509,15 +2704,20 @@ def recurrent_layer(input, act=None, bias_attr=None,
:return: LayerOutput object.
:rtype: LayerOutput
"""
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
RECURRENT_LAYER
,
inputs
=
Input
(
input
.
name
,
**
param_attr
.
attr
),
active_type
=
act
.
name
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
reversed
=
reverse
,
**
ExtraAttr
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
RECURRENT_LAYER
,
parents
=
[
input
],
size
=
input
.
size
,
activation
=
act
,
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
RECURRENT_LAYER
,
parents
=
[
input
],
size
=
input
.
size
,
activation
=
act
,
reverse
=
reverse
)
...
...
@@ -2646,7 +2846,7 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
return
True
return
False
assert
(
targetInlink
==
None
or
targetInlink_in_inlinks
())
assert
(
targetInlink
==
None
or
targetInlink_in_inlinks
())
targetInlinkName
=
None
if
targetInlink
==
None
\
else
targetInlink
.
name
if
isinstance
(
targetInlink
,
LayerOutput
)
\
else
targetInlink
.
input
.
name
...
...
@@ -2661,7 +2861,8 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
return
x
.
name
RecurrentLayerGroupWithoutOutLinksBegin
(
name
=
name
,
in_links
=
map
(
map_in_links
,
in_links
),
name
=
name
,
in_links
=
map
(
map_in_links
,
in_links
),
seq_reversed
=
reverse
,
target_inlinkname
=
targetInlinkName
)
in_args
=
[]
...
...
@@ -2673,11 +2874,14 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
in_args
.
append
(
each_input
.
input
)
else
:
mem_name
=
"__%s_memory__"
%
each_input
.
input
.
name
mem
=
memory
(
name
=
mem_name
,
mem
=
memory
(
name
=
mem_name
,
is_seq
=
each_input
.
is_seq
,
size
=
each_input
.
input
.
size
,
boot_layer
=
each_input
.
input
)
with
mixed_layer
(
name
=
mem_name
,
size
=
each_input
.
input
.
size
,
with
mixed_layer
(
name
=
mem_name
,
size
=
each_input
.
input
.
size
,
act
=
IdentityActivation
())
as
mix
:
mix
+=
identity_projection
(
mem
)
in_args
.
append
(
mem
)
...
...
@@ -2720,14 +2924,15 @@ class GeneratedInput(BaseGeneratedInput):
return
maxid_layer
(
input
=
input
,
name
=
'__beam_search_predict__'
)
def
before_real_step
(
self
):
predict_id
=
memory
(
name
=
'__beam_search_predict__'
,
predict_id
=
memory
(
name
=
'__beam_search_predict__'
,
size
=
self
.
size
,
boot_with_const_id
=
self
.
bos_id
)
trg_emb
=
embedding_layer
(
input
=
predict_id
,
trg_emb
=
embedding_layer
(
input
=
predict_id
,
size
=
self
.
embedding_size
,
param_attr
=
ParamAttr
(
name
=
self
.
embedding_name
))
param_attr
=
ParamAttr
(
name
=
self
.
embedding_name
))
return
trg_emb
def
__init__
(
self
,
size
,
embedding_name
,
embedding_size
):
...
...
@@ -2760,11 +2965,13 @@ def maxid_layer(input, name=None, layer_attr=None):
"""
assert
isinstance
(
input
,
LayerOutput
)
l
=
Layer
(
name
=
name
,
l
=
Layer
(
name
=
name
,
type
=
'maxid'
,
inputs
=
[
input
.
name
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
=
name
,
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
MAXID_LAYER
,
parents
=
[
input
],
size
=
l
.
config
.
size
)
...
...
@@ -2796,11 +3003,13 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
assert
isinstance
(
input1
,
LayerOutput
)
assert
isinstance
(
input2
,
LayerOutput
)
l
=
Layer
(
name
=
name
,
l
=
Layer
(
name
=
name
,
type
=
LayerType
.
OUT_PROD_LAYER
,
inputs
=
[
input1
.
name
,
input2
.
name
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
=
name
,
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
OUT_PROD_LAYER
,
parents
=
[
input1
,
input2
],
size
=
l
.
config
.
size
)
...
...
@@ -2832,19 +3041,27 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
:return: LayerOutput object.
:rtype: LayerOutput
"""
l
=
Layer
(
name
=
name
,
l
=
Layer
(
name
=
name
,
type
=
LayerType
.
EOSID_LAYER
,
eos_id
=
eos_id
,
inputs
=
[
input
.
name
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
EOSID_LAYER
,
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
EOSID_LAYER
,
parents
=
[
input
],
size
=
l
.
config
.
size
)
@
wrap_name_default
()
def
beam_search
(
step
,
input
,
bos_id
,
eos_id
,
beam_size
,
max_length
=
500
,
name
=
None
,
def
beam_search
(
step
,
input
,
bos_id
,
eos_id
,
beam_size
,
max_length
=
500
,
name
=
None
,
num_results_per_sample
=
None
):
"""
Beam search is a heuristic search algorithm used in sequence generation.
...
...
@@ -2918,8 +3135,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
if
num_results_per_sample
>
beam_size
:
logger
.
warning
(
"num_results_per_sample should be less than beam_size"
)
if
isinstance
(
input
,
StaticInput
)
or
isinstance
(
input
,
BaseGeneratedInput
):
if
isinstance
(
input
,
StaticInput
)
or
isinstance
(
input
,
BaseGeneratedInput
):
input
=
[
input
]
generated_input_index
=
-
1
...
...
@@ -2944,7 +3160,8 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
def
__real_step__
(
*
args
):
eos_name
=
"__%s_eos_layer__"
%
name
RecurrentLayerGroupSetGenerator
(
Generator
(
RecurrentLayerGroupSetGenerator
(
Generator
(
eos_layer_name
=
eos_name
,
max_num_frames
=
max_length
,
beam_size
=
beam_size
,
...
...
@@ -2959,11 +3176,12 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
return
predict
tmp
=
recurrent_group
(
step
=
__real_step__
,
input
=
real_input
,
reverse
=
False
,
name
=
name
)
tmp
=
recurrent_group
(
step
=
__real_step__
,
input
=
real_input
,
reverse
=
False
,
name
=
name
)
return
tmp
def
__cost_input__
(
input
,
label
,
weight
=
None
):
"""
inputs and parents for cost layers.
...
...
@@ -2979,8 +3197,7 @@ def __cost_input__(input, label, weight=None):
@
wrap_name_default
()
@
layer_support
()
def
regression_cost
(
input
,
label
,
weight
=
None
,
name
=
None
,
layer_attr
=
None
):
def
regression_cost
(
input
,
label
,
weight
=
None
,
name
=
None
,
layer_attr
=
None
):
"""
Regression Layer.
...
...
@@ -3002,14 +3219,20 @@ def regression_cost(input, label, weight=None, name=None,
"""
ipts
,
parents
=
__cost_input__
(
input
,
label
,
weight
)
Layer
(
inputs
=
ipts
,
type
=
"square_error"
,
name
=
name
,
Layer
(
inputs
=
ipts
,
type
=
"square_error"
,
name
=
name
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
COST
,
parents
=
parents
,
size
=
1
)
@
wrap_name_default
(
"cost"
)
@
layer_support
()
def
classification_cost
(
input
,
label
,
weight
=
None
,
name
=
None
,
def
classification_cost
(
input
,
label
,
weight
=
None
,
name
=
None
,
evaluator
=
classification_error_evaluator
,
layer_attr
=
None
):
"""
...
...
@@ -3036,7 +3259,10 @@ def classification_cost(input, label, weight=None, name=None,
ipts
,
parents
=
__cost_input__
(
input
,
label
,
weight
)
Layer
(
name
=
name
,
type
=
"multi-class-cross-entropy"
,
inputs
=
ipts
,
Layer
(
name
=
name
,
type
=
"multi-class-cross-entropy"
,
inputs
=
ipts
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
def
__add_evaluator__
(
e
):
...
...
@@ -3059,9 +3285,16 @@ def classification_cost(input, label, weight=None, name=None,
return
LayerOutput
(
name
,
LayerType
.
COST
,
parents
=
parents
,
size
=
1
)
def
conv_operator
(
img
,
filter
,
filter_size
,
num_filters
,
num_channels
=
None
,
stride
=
1
,
padding
=
0
,
filter_size_y
=
None
,
stride_y
=
None
,
padding_y
=
None
):
def
conv_operator
(
img
,
filter
,
filter_size
,
num_filters
,
num_channels
=
None
,
stride
=
1
,
padding
=
0
,
filter_size_y
=
None
,
stride_y
=
None
,
padding_y
=
None
):
"""
Different from img_conv_layer, conv_op is an Operator, which can be used
in mixed_layer. And conv_op takes two inputs to perform convolution.
...
...
@@ -3117,9 +3350,11 @@ def conv_operator(img, filter, filter_size, num_filters,
if
filter
.
size
is
not
None
:
filter
.
size
=
filter_size
*
filter_size_y
*
num_filters
*
num_channels
op
=
ConvOperator
(
input_layer_names
=
[
img
.
name
,
filter
.
name
],
op
=
ConvOperator
(
input_layer_names
=
[
img
.
name
,
filter
.
name
],
num_filters
=
num_filters
,
conv_conf
=
Conv
(
filter_size
=
filter_size
,
conv_conf
=
Conv
(
filter_size
=
filter_size
,
padding
=
padding
,
stride
=
stride
,
channels
=
num_channels
,
...
...
@@ -3130,11 +3365,19 @@ def conv_operator(img, filter, filter_size, num_filters,
op
.
origin
=
[
img
,
filter
]
return
op
@
wrap_param_attr_default
()
def
conv_projection
(
input
,
filter_size
,
num_filters
,
num_channels
=
None
,
stride
=
1
,
padding
=
0
,
filter_size_y
=
None
,
stride_y
=
None
,
padding_y
=
None
,
groups
=
1
,
param_attr
=
None
):
def
conv_projection
(
input
,
filter_size
,
num_filters
,
num_channels
=
None
,
stride
=
1
,
padding
=
0
,
filter_size_y
=
None
,
stride_y
=
None
,
padding_y
=
None
,
groups
=
1
,
param_attr
=
None
):
"""
ConvProjection with a layer as input.
It performs element-wise multiplication with weight.
...
...
@@ -3206,15 +3449,17 @@ def conv_projection(input, filter_size, num_filters,
if
param_attr
.
attr
.
get
(
'initial_smart'
):
# special initial for conv layers.
init_w
=
(
2.0
/
(
filter_size
**
2
*
num_channels
))
**
0.5
init_w
=
(
2.0
/
(
filter_size
**
2
*
num_channels
))
**
0.5
param_attr
.
attr
[
"initial_mean"
]
=
0.0
param_attr
.
attr
[
"initial_std"
]
=
init_w
param_attr
.
attr
[
"initial_strategy"
]
=
0
param_attr
.
attr
[
"initial_smart"
]
=
False
proj
=
ConvProjection
(
input_layer_name
=
input
.
name
,
proj
=
ConvProjection
(
input_layer_name
=
input
.
name
,
num_filters
=
num_filters
,
conv_conf
=
Conv
(
filter_size
=
filter_size
,
conv_conf
=
Conv
(
filter_size
=
filter_size
,
padding
=
padding
,
stride
=
stride
,
channels
=
num_channels
,
...
...
@@ -3270,11 +3515,10 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
name
=
name
,
type
=
LayerType
.
CONV_SHIFT_LAYER
,
inputs
=
[
a
.
name
,
b
.
name
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
CONV_SHIFT_LAYER
,
parents
=
[
a
,
b
],
size
=
a
.
size
)
return
LayerOutput
(
name
,
LayerType
.
CONV_SHIFT_LAYER
,
parents
=
[
a
,
b
],
size
=
a
.
size
)
@
wrap_name_default
()
...
...
@@ -3282,8 +3526,14 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
@
wrap_bias_attr_default
()
@
wrap_act_default
(
act
=
LinearActivation
())
@
layer_support
(
ERROR_CLIPPING
,
DROPOUT
)
def
tensor_layer
(
a
,
b
,
size
,
act
=
None
,
name
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
def
tensor_layer
(
a
,
b
,
size
,
act
=
None
,
name
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
"""
This layer performs tensor operation for two input.
For example, each sample:
...
...
@@ -3332,12 +3582,10 @@ def tensor_layer(a, b, size, act=None, name=None,
type
=
LayerType
.
TENSOR_LAYER
,
active_type
=
act
.
name
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
inputs
=
[
Input
(
a
.
name
,
**
param_attr
.
attr
),
Input
(
b
.
name
)],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
TENSOR_LAYER
,
parents
=
[
a
,
b
],
activation
=
act
,
size
=
size
)
inputs
=
[
Input
(
a
.
name
,
**
param_attr
.
attr
),
Input
(
b
.
name
)],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
TENSOR_LAYER
,
parents
=
[
a
,
b
],
activation
=
act
,
size
=
size
)
@
wrap_name_default
()
...
...
@@ -3345,11 +3593,17 @@ def tensor_layer(a, b, size, act=None, name=None,
@
wrap_bias_attr_default
()
@
wrap_act_default
()
@
layer_support
()
def
selective_fc_layer
(
input
,
select
,
size
,
act
=
None
,
name
=
None
,
def
selective_fc_layer
(
input
,
select
,
size
,
act
=
None
,
name
=
None
,
pass_generation
=
False
,
has_selected_colums
=
True
,
mul_ratio
=
0.02
,
param_attr
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
param_attr
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
"""
Selectived fully connected layer. Different from fc_layer, the output
of this layer maybe sparse. It requires an additional input to indicate
...
...
@@ -3399,8 +3653,9 @@ def selective_fc_layer(input, select, size, act=None, name=None,
if
select
.
size
is
not
None
:
assert
select
.
size
==
size
Layer
(
inputs
=
[
Input
(
ipt
.
name
,
**
attr
.
attr
)
for
ipt
,
attr
in
zip
(
input
,
param_attr
)]
+
[
select
.
name
],
inputs
=
[
Input
(
ipt
.
name
,
**
attr
.
attr
)
for
ipt
,
attr
in
zip
(
input
,
param_attr
)
]
+
[
select
.
name
],
name
=
name
,
type
=
LayerType
.
SEL_FC_LAYER
,
size
=
size
,
...
...
@@ -3409,9 +3664,11 @@ def selective_fc_layer(input, select, size, act=None, name=None,
selective_fc_pass_generation
=
pass_generation
,
has_selected_colums
=
has_selected_colums
,
selective_fc_full_mul_ratio
=
mul_ratio
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
SEL_FC_LAYER
,
list
(
input
)
+
[
select
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
SEL_FC_LAYER
,
list
(
input
)
+
[
select
],
activation
=
act
,
size
=
size
)
...
...
@@ -3442,15 +3699,17 @@ def sampling_id_layer(input, name=None, layer_attr=None):
name
=
name
,
type
=
LayerType
.
SAMPLING_ID_LAYER
,
inputs
=
[
Input
(
input
.
name
)],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
SAMPLING_ID_LAYER
,
input
,
size
=
l
.
config
.
size
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
SAMPLING_ID_LAYER
,
input
,
size
=
l
.
config
.
size
)
@
wrap_name_default
()
@
layer_support
()
def
slope_intercept_layer
(
input
,
name
=
None
,
slope
=
1.0
,
intercept
=
0.0
,
def
slope_intercept_layer
(
input
,
name
=
None
,
slope
=
1.0
,
intercept
=
0.0
,
layer_attr
=
None
):
"""
This layer for applying a slope and an intercept to the input
...
...
@@ -3484,16 +3743,14 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0,
slope
=
slope
,
intercept
=
intercept
,
inputs
=
[
Input
(
input
.
name
)],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
SLOPE_INTERCEPT_LAYER
,
input
,
size
=
input
.
size
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
SLOPE_INTERCEPT_LAYER
,
input
,
size
=
input
.
size
)
@
wrap_name_default
()
@
layer_support
()
def
linear_comb_layer
(
weights
,
vectors
,
size
=
None
,
name
=
None
,
layer_attr
=
None
):
def
linear_comb_layer
(
weights
,
vectors
,
size
=
None
,
name
=
None
,
layer_attr
=
None
):
"""
A layer for weighted sum of vectors takes two inputs.
- Input: size of weights is M
...
...
@@ -3551,10 +3808,9 @@ def linear_comb_layer(weights, vectors, size=None, name=None,
type
=
LayerType
.
LINEAR_COMBINATION_LAYER
,
size
=
size
,
inputs
=
[
Input
(
weights
.
name
),
Input
(
vectors
.
name
)],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
LINEAR_COMBINATION_LAYER
,
[
weights
,
vectors
],
size
=
size
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
LINEAR_COMBINATION_LAYER
,
[
weights
,
vectors
],
size
=
size
)
convex_comb_layer
=
linear_comb_layer
...
...
@@ -3626,9 +3882,12 @@ def block_expand_layer(input,
if
num_channels
is
None
:
assert
input
.
num_filters
is
not
None
num_channels
=
input
.
num_filters
l
=
Layer
(
name
=
name
,
inputs
=
Input
(
input
.
name
,
block_expand
=
BlockExpand
(
channels
=
num_channels
,
l
=
Layer
(
name
=
name
,
inputs
=
Input
(
input
.
name
,
block_expand
=
BlockExpand
(
channels
=
num_channels
,
block_x
=
block_x
,
block_y
=
block_y
,
stride_x
=
stride_x
,
...
...
@@ -3636,11 +3895,10 @@ def block_expand_layer(input,
padding_x
=
padding_x
,
padding_y
=
padding_y
)),
type
=
LayerType
.
BLOCK_EXPAND
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
BLOCK_EXPAND
,
parents
=
[
input
],
size
=
l
.
config
.
size
)
return
LayerOutput
(
name
,
LayerType
.
BLOCK_EXPAND
,
parents
=
[
input
],
size
=
l
.
config
.
size
)
@
wrap_name_default
()
...
...
@@ -3701,19 +3959,24 @@ def maxout_layer(input,
assert
input
.
num_filters
is
not
None
num_channels
=
input
.
num_filters
assert
num_channels
%
groups
==
0
l
=
Layer
(
name
=
name
,
inputs
=
Input
(
input
.
name
,
maxout
=
MaxOut
(
channels
=
num_channels
,
groups
=
groups
)),
l
=
Layer
(
name
=
name
,
inputs
=
Input
(
input
.
name
,
maxout
=
MaxOut
(
channels
=
num_channels
,
groups
=
groups
)),
type
=
LayerType
.
MAXOUT
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
MAXOUT
,
parents
=
[
input
],
size
=
l
.
config
.
size
)
return
LayerOutput
(
name
,
LayerType
.
MAXOUT
,
parents
=
[
input
],
size
=
l
.
config
.
size
)
@
wrap_name_default
()
@
layer_support
()
def
ctc_layer
(
input
,
label
,
size
=
None
,
name
=
None
,
norm_by_times
=
False
,
def
ctc_layer
(
input
,
label
,
size
=
None
,
name
=
None
,
norm_by_times
=
False
,
layer_attr
=
None
):
"""
Connectionist Temporal Classification (CTC) is designed for temporal
...
...
@@ -3769,15 +4032,19 @@ def ctc_layer(input, label, size=None, name=None, norm_by_times=False,
size
=
size
,
norm_by_times
=
norm_by_times
,
inputs
=
[
input
.
name
,
label
.
name
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
CTC_LAYER
,
[
input
,
label
],
size
=
size
)
@
wrap_name_default
()
@
wrap_param_attr_default
()
@
layer_support
()
def
crf_layer
(
input
,
label
,
size
=
None
,
weight
=
None
,
param_attr
=
None
,
name
=
None
,
def
crf_layer
(
input
,
label
,
size
=
None
,
weight
=
None
,
param_attr
=
None
,
name
=
None
,
layer_attr
=
None
):
"""
A layer for calculating the cost of sequential conditional random
...
...
@@ -3819,8 +4086,7 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None,
else
:
assert
size
==
input
.
size
ipts
=
[
Input
(
input
.
name
,
**
param_attr
.
attr
),
Input
(
label
.
name
)]
ipts
=
[
Input
(
input
.
name
,
**
param_attr
.
attr
),
Input
(
label
.
name
)]
if
weight
is
not
None
:
ipts
.
append
(
Input
(
weight
.
name
))
...
...
@@ -3829,8 +4095,7 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None,
type
=
LayerType
.
CRF_LAYER
,
size
=
size
,
inputs
=
ipts
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
parents
=
[
input
,
label
]
if
weight
is
not
None
:
parents
.
append
(
weight
)
...
...
@@ -3843,7 +4108,11 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None,
@
wrap_name_default
()
@
wrap_param_attr_default
()
@
layer_support
()
def
crf_decoding_layer
(
input
,
size
,
label
=
None
,
param_attr
=
None
,
name
=
None
,
def
crf_decoding_layer
(
input
,
size
,
label
=
None
,
param_attr
=
None
,
name
=
None
,
layer_attr
=
None
):
"""
A layer for calculating the decoding sequence of sequential conditional
...
...
@@ -3880,8 +4149,7 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None,
type
=
LayerType
.
CRF_DECODING_LAYER
,
size
=
size
,
inputs
=
ipts
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
parents
=
[
input
]
if
label
is
not
None
:
parents
.
append
(
label
)
...
...
@@ -3890,12 +4158,19 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None,
# classes.
return
LayerOutput
(
name
,
LayerType
.
CRF_DECODING_LAYER
,
parents
,
size
=
1
)
@
wrap_bias_attr_default
(
has_bias
=
True
)
@
wrap_name_default
()
@
layer_support
()
def
nce_layer
(
input
,
label
,
num_classes
,
weight
=
None
,
num_neg_samples
=
10
,
neg_distribution
=
None
,
name
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
def
nce_layer
(
input
,
label
,
num_classes
,
weight
=
None
,
num_neg_samples
=
10
,
neg_distribution
=
None
,
name
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
"""
Noise-contrastive estimation.
Implements the method in the following paper:
...
...
@@ -3964,10 +4239,10 @@ def nce_layer(input, label, num_classes, weight=None,
num_neg_samples
=
num_neg_samples
,
inputs
=
ipts_for_layer
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
NCE_LAYER
,
parents
=
parents
,
size
=
l
.
config
.
size
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
NCE_LAYER
,
parents
=
parents
,
size
=
l
.
config
.
size
)
"""
following are cost Layers.
...
...
@@ -3976,7 +4251,13 @@ following are cost Layers.
@
wrap_name_default
()
@
layer_support
()
def
rank_cost
(
left
,
right
,
label
,
weight
=
None
,
name
=
None
,
coeff
=
1.0
,
layer_attr
=
None
):
def
rank_cost
(
left
,
right
,
label
,
weight
=
None
,
name
=
None
,
coeff
=
1.0
,
layer_attr
=
None
):
"""
A cost Layer for learning to rank using gradient descent. Details can refer
to `papers <http://research.microsoft.com/en-us/um/people/cburges/papers/
...
...
@@ -4035,19 +4316,24 @@ def rank_cost(left, right, label, weight=None, name=None, coeff=1.0, layer_attr=
ipts
.
append
(
weight
.
name
)
parents
.
append
(
weight
)
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
RANK_COST
,
inputs
=
ipts
,
coeff
=
coeff
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
RANK_COST
,
parents
=
parents
,
size
=
1
)
@
wrap_name_default
()
@
layer_support
()
def
lambda_cost
(
input
,
score
,
name
,
NDCG_num
=
5
,
max_sort_size
=-
1
,
layer_attr
=
None
):
def
lambda_cost
(
input
,
score
,
name
,
NDCG_num
=
5
,
max_sort_size
=-
1
,
layer_attr
=
None
):
"""
lambdaCost for lambdaRank LTR approach.
...
...
@@ -4086,16 +4372,16 @@ def lambda_cost(input, score, name, NDCG_num=5, max_sort_size=-1, layer_attr=Non
assert
isinstance
(
input
,
LayerOutput
)
and
isinstance
(
score
,
LayerOutput
)
if
score
.
size
is
not
None
:
assert
score
.
size
==
1
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
LAMBDA_COST
,
inputs
=
[
input
.
name
,
score
.
name
],
NDCG_num
=
NDCG_num
,
max_sort_size
=
max_sort_size
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
LAMBDA_COST
,
parents
=
[
input
,
score
],
size
=
1
)
return
LayerOutput
(
name
,
LayerType
.
LAMBDA_COST
,
parents
=
[
input
,
score
],
size
=
1
)
@
wrap_name_default
()
...
...
@@ -4123,19 +4409,22 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
:rtype: LayerOutput.
"""
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
CROSS_ENTROPY
,
inputs
=
[
input
.
name
,
label
.
name
],
coeff
=
coeff
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY
,
parents
=
[
input
,
label
],
size
=
1
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY
,
parents
=
[
input
,
label
],
size
=
1
)
@
wrap_name_default
()
@
layer_support
()
def
cross_entropy_with_selfnorm
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
def
cross_entropy_with_selfnorm
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
softmax_selfnorm_alpha
=
0.1
,
layer_attr
=
None
):
"""
...
...
@@ -4161,17 +4450,19 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0,
:return: LayerOutput object.
:rtype: LayerOutput.
"""
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
CROSS_ENTROPY_WITH_SELFNORM
,
inputs
=
[
input
.
name
,
label
.
name
],
coeff
=
coeff
,
softmax_selfnorm_alpha
=
softmax_selfnorm_alpha
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY_WITH_SELFNORM
,
parents
=
[
input
,
label
],
size
=
1
)
parents
=
[
input
,
label
],
size
=
1
)
@
wrap_name_default
()
...
...
@@ -4194,16 +4485,13 @@ def sum_cost(input, name=None, layer_attr=None):
:rtype: LayerOutput.
"""
assert
isinstance
(
input
,
LayerOutput
)
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
SUM_COST
,
inputs
=
[
input
.
name
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
SUM_COST
,
parents
=
[
input
],
size
=
1
)
return
LayerOutput
(
name
,
LayerType
.
SUM_COST
,
parents
=
[
input
],
size
=
1
)
@
wrap_name_default
()
...
...
@@ -4233,18 +4521,21 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
assert
isinstance
(
input
,
LayerOutput
)
if
input
.
size
is
not
None
:
assert
input
.
size
==
1
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
HUBER
,
inputs
=
[
input
.
name
,
label
.
name
],
coeff
=
coeff
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
HUBER
,
parents
=
[
input
,
label
],
size
=
1
)
@
wrap_name_default
()
@
layer_support
()
def
multi_binary_label_cross_entropy
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
def
multi_binary_label_cross_entropy
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
layer_attr
=
None
):
"""
A loss layer for multi binary label cross entropy.
...
...
@@ -4272,15 +4563,19 @@ def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0,
if
input
.
activation
is
None
or
\
not
isinstance
(
input
.
activation
,
SigmoidActivation
):
logger
.
log
(
logging
.
WARN
,
logger
.
log
(
logging
.
WARN
,
"%s is not recommend for multi_binary_label_cross_entropy's activation, "
"maybe the sigmoid is better"
%
repr
(
input
.
activation
))
Layer
(
name
=
name
,
Layer
(
name
=
name
,
type
=
LayerType
.
MULTI_BIN_LABEL_CROSS_ENTROPY
,
inputs
=
[
input
.
name
,
label
.
name
],
coeff
=
coeff
,
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
MULTI_BIN_LABEL_CROSS_ENTROPY
,
parents
=
[
input
,
label
],
size
=
1
)
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
MULTI_BIN_LABEL_CROSS_ENTROPY
,
parents
=
[
input
,
label
],
size
=
1
)
python/paddle/trainer_config_helpers/math.py
浏览文件 @
58e1b3b3
...
...
@@ -21,16 +21,18 @@ from paddle.trainer.config_parser import logger
__all__
=
[]
def
register_unary_math_op
(
op_name
,
act
):
def
op
(
input
,
name
=
None
):
return
mixed_layer
(
input
=
[
identity_projection
(
input
=
input
)],
name
=
name
,
act
=
act
)
return
mixed_layer
(
input
=
[
identity_projection
(
input
=
input
)],
name
=
name
,
act
=
act
)
op
=
wrap_name_default
(
op_name
)(
op
)
op
.
__doc__
=
type
(
act
).
__doc__
globals
()[
op_name
]
=
op
__all__
.
append
(
op_name
)
register_unary_math_op
(
'exp'
,
act
.
ExpActivation
())
register_unary_math_op
(
'log'
,
act
.
LogActivation
())
register_unary_math_op
(
'abs'
,
act
.
AbsActivation
())
...
...
@@ -38,6 +40,7 @@ register_unary_math_op('sigmoid', act.SigmoidActivation())
register_unary_math_op
(
'tanh'
,
act
.
TanhActivation
())
register_unary_math_op
(
'square'
,
act
.
SquareActivation
())
def
add
(
layeroutput
,
other
):
if
is_compatible_with
(
other
,
float
):
return
slope_intercept_layer
(
input
=
layeroutput
,
intercept
=
other
)
...
...
@@ -45,8 +48,10 @@ def add(layeroutput, other):
logger
.
fatal
(
"LayerOutput can only be added with"
" another LayerOutput or a number"
)
if
layeroutput
.
size
==
other
.
size
:
return
mixed_layer
(
input
=
[
identity_projection
(
input
=
layeroutput
),
identity_projection
(
input
=
other
)])
return
mixed_layer
(
input
=
[
identity_projection
(
input
=
layeroutput
),
identity_projection
(
input
=
other
)
])
if
other
.
size
!=
1
and
layeroutput
.
size
!=
1
:
logger
.
fatal
(
"Two LayerOutput can be added only if they have equal size"
" or one of their sizes is 1. sizes are %s and %s"
%
...
...
@@ -56,12 +61,15 @@ def add(layeroutput, other):
layeroutput
=
other
other
=
tmp
other
=
repeat_layer
(
other
,
layeroutput
.
size
)
return
mixed_layer
(
input
=
[
identity_projection
(
input
=
layeroutput
),
identity_projection
(
input
=
other
)])
return
mixed_layer
(
input
=
[
identity_projection
(
input
=
layeroutput
),
identity_projection
(
input
=
other
)
])
LayerOutput
.
__radd__
=
add
LayerOutput
.
__add__
=
add
def
sub
(
layeroutput
,
other
):
if
is_compatible_with
(
other
,
float
):
return
slope_intercept_layer
(
input
=
layeroutput
,
intercept
=
other
)
...
...
@@ -71,14 +79,18 @@ def sub(layeroutput, other):
neg
=
slope_intercept_layer
(
input
=
other
,
slope
=-
1.0
)
return
add
(
layeroutput
,
neg
)
LayerOutput
.
__sub__
=
sub
def
rsub
(
layeroutput
,
other
):
neg
=
slope_intercept_layer
(
input
=
layeroutput
,
slope
=-
1.0
)
return
add
(
neg
,
other
)
LayerOutput
.
__rsub__
=
rsub
def
mul
(
layeroutput
,
other
):
if
is_compatible_with
(
other
,
float
):
return
slope_intercept_layer
(
input
=
layeroutput
,
slope
=
other
)
...
...
@@ -93,5 +105,6 @@ def mul(layeroutput, other):
logger
.
fatal
(
"At least one of the operand of '*' must be a number"
" or a LayerOutput with size=1"
)
LayerOutput
.
__mul__
=
mul
LayerOutput
.
__rmul__
=
mul
python/paddle/trainer_config_helpers/networks.py
浏览文件 @
58e1b3b3
...
...
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
"""
...
...
@@ -25,28 +24,32 @@ from layers import * # There are too many layers used in network, so import *
from
poolings
import
MaxPooling
,
SumPooling
from
paddle.trainer.config_parser
import
*
__all__
=
[
'sequence_conv_pool'
,
'simple_lstm'
,
"simple_img_conv_pool"
,
"img_conv_bn_pool"
,
'dropout_layer'
,
'lstmemory_group'
,
'lstmemory_unit'
,
'small_vgg'
,
'img_conv_group'
,
'vgg_16_network
'
,
'gru_unit'
,
'gru_group'
,
'simple_gru'
,
'simple_attention
'
,
'simple_gru2'
,
'bidirectional_gru'
,
'text_conv_pool
'
,
'bidirectional_lstm'
,
'inputs'
,
'outputs'
]
__all__
=
[
'sequence_conv_pool'
,
'simple_lstm'
,
"simple_img_conv_pool"
,
"img_conv_bn_pool"
,
'dropout_layer'
,
'lstmemory_group'
,
'lstmemory_unit
'
,
'small_vgg'
,
'img_conv_group'
,
'vgg_16_network'
,
'gru_unit'
,
'gru_group
'
,
'simple_gru'
,
'simple_attention'
,
'simple_gru2'
,
'bidirectional_gru
'
,
'text_conv_pool'
,
'bidirectional_lstm'
,
'inputs'
,
'outputs'
]
######################################################
# Text CNN #
######################################################
@
wrap_name_default
(
"sequence_conv_pooling"
)
def
sequence_conv_pool
(
input
,
context_len
,
hidden_size
,
context_len
,
hidden_size
,
name
=
None
,
context_start
=
None
,
pool_type
=
None
,
context_proj_layer_name
=
None
,
pool_type
=
None
,
context_proj_layer_name
=
None
,
context_proj_param_attr
=
False
,
fc_layer_name
=
None
,
fc_param_attr
=
None
,
fc_bias_attr
=
None
,
fc_act
=
None
,
fc_bias_attr
=
None
,
fc_act
=
None
,
pool_bias_attr
=
None
,
fc_attr
=
None
,
context_attr
=
None
,
...
...
@@ -101,21 +104,31 @@ def sequence_conv_pool(input,
context_proj_layer_name
=
"%s_conv_proj"
%
name
\
if
context_proj_layer_name
is
None
else
context_proj_layer_name
with
mixed_layer
(
name
=
context_proj_layer_name
,
with
mixed_layer
(
name
=
context_proj_layer_name
,
size
=
input
.
size
*
context_len
,
act
=
LinearActivation
(),
layer_attr
=
context_attr
)
as
m
:
m
+=
context_projection
(
input
,
context_len
=
context_len
,
m
+=
context_projection
(
input
,
context_len
=
context_len
,
context_start
=
context_start
,
padding_attr
=
context_proj_param_attr
)
fc_layer_name
=
"%s_conv_fc"
%
name
\
if
fc_layer_name
is
None
else
fc_layer_name
fl
=
fc_layer
(
name
=
fc_layer_name
,
input
=
m
,
size
=
hidden_size
,
act
=
fc_act
,
layer_attr
=
fc_attr
,
param_attr
=
fc_param_attr
,
bias_attr
=
fc_bias_attr
)
fl
=
fc_layer
(
name
=
fc_layer_name
,
input
=
m
,
size
=
hidden_size
,
act
=
fc_act
,
layer_attr
=
fc_attr
,
param_attr
=
fc_param_attr
,
bias_attr
=
fc_bias_attr
)
return
pooling_layer
(
name
=
name
,
input
=
fl
,
return
pooling_layer
(
name
=
name
,
input
=
fl
,
pooling_type
=
pool_type
,
bias_attr
=
pool_bias_attr
,
layer_attr
=
pool_attr
)
...
...
@@ -123,18 +136,30 @@ def sequence_conv_pool(input,
text_conv_pool
=
sequence_conv_pool
############################################################################
# Images #
############################################################################
@
wrap_name_default
(
"conv_pool"
)
def
simple_img_conv_pool
(
input
,
filter_size
,
num_filters
,
pool_size
,
name
=
None
,
pool_type
=
None
,
act
=
None
,
groups
=
1
,
conv_stride
=
1
,
conv_padding
=
0
,
bias_attr
=
None
,
num_channel
=
None
,
param_attr
=
None
,
shared_bias
=
True
,
conv_layer_attr
=
None
,
pool_stride
=
1
,
pool_padding
=
0
,
pool_layer_attr
=
None
):
def
simple_img_conv_pool
(
input
,
filter_size
,
num_filters
,
pool_size
,
name
=
None
,
pool_type
=
None
,
act
=
None
,
groups
=
1
,
conv_stride
=
1
,
conv_padding
=
0
,
bias_attr
=
None
,
num_channel
=
None
,
param_attr
=
None
,
shared_bias
=
True
,
conv_layer_attr
=
None
,
pool_stride
=
1
,
pool_padding
=
0
,
pool_layer_attr
=
None
):
"""
Simple image convolution and pooling group.
...
...
@@ -179,29 +204,52 @@ def simple_img_conv_pool(input, filter_size, num_filters, pool_size, name=None,
:return: Layer's output
:rtype: LayerOutput
"""
_conv_
=
img_conv_layer
(
name
=
"%s_conv"
%
name
,
input
=
input
,
_conv_
=
img_conv_layer
(
name
=
"%s_conv"
%
name
,
input
=
input
,
filter_size
=
filter_size
,
num_filters
=
num_filters
,
num_channels
=
num_channel
,
act
=
act
,
groups
=
groups
,
num_filters
=
num_filters
,
num_channels
=
num_channel
,
act
=
act
,
groups
=
groups
,
stride
=
conv_stride
,
padding
=
conv_padding
,
bias_attr
=
bias_attr
,
param_attr
=
param_attr
,
shared_biases
=
shared_bias
,
padding
=
conv_padding
,
bias_attr
=
bias_attr
,
param_attr
=
param_attr
,
shared_biases
=
shared_bias
,
layer_attr
=
conv_layer_attr
)
return
img_pool_layer
(
name
=
"%s_pool"
%
name
,
input
=
_conv_
,
return
img_pool_layer
(
name
=
"%s_pool"
%
name
,
input
=
_conv_
,
pool_size
=
pool_size
,
pool_type
=
pool_type
,
stride
=
pool_stride
,
pool_type
=
pool_type
,
stride
=
pool_stride
,
padding
=
pool_padding
,
layer_attr
=
pool_layer_attr
)
@
wrap_name_default
(
"conv_bn_pool"
)
def
img_conv_bn_pool
(
input
,
filter_size
,
num_filters
,
pool_size
,
name
=
None
,
pool_type
=
None
,
act
=
None
,
groups
=
1
,
conv_stride
=
1
,
conv_padding
=
0
,
conv_bias_attr
=
None
,
num_channel
=
None
,
conv_param_attr
=
None
,
shared_bias
=
True
,
conv_layer_attr
=
None
,
bn_param_attr
=
None
,
bn_bias_attr
=
None
,
bn_layer_attr
=
None
,
pool_stride
=
1
,
pool_padding
=
0
,
pool_layer_attr
=
None
):
def
img_conv_bn_pool
(
input
,
filter_size
,
num_filters
,
pool_size
,
name
=
None
,
pool_type
=
None
,
act
=
None
,
groups
=
1
,
conv_stride
=
1
,
conv_padding
=
0
,
conv_bias_attr
=
None
,
num_channel
=
None
,
conv_param_attr
=
None
,
shared_bias
=
True
,
conv_layer_attr
=
None
,
bn_param_attr
=
None
,
bn_bias_attr
=
None
,
bn_layer_attr
=
None
,
pool_stride
=
1
,
pool_padding
=
0
,
pool_layer_attr
=
None
):
"""
Convolution, batch normalization, pooling group.
...
...
@@ -248,31 +296,42 @@ def img_conv_bn_pool(input, filter_size, num_filters, pool_size, name=None,
:return: Layer groups output
:rtype: LayerOutput
"""
__conv__
=
img_conv_layer
(
name
=
"%s_conv"
%
name
,
input
=
input
,
filter_size
=
filter_size
,
num_filters
=
num_filters
,
num_channels
=
num_channel
,
act
=
LinearActivation
(),
groups
=
groups
,
stride
=
conv_stride
,
padding
=
conv_padding
,
__conv__
=
img_conv_layer
(
name
=
"%s_conv"
%
name
,
input
=
input
,
filter_size
=
filter_size
,
num_filters
=
num_filters
,
num_channels
=
num_channel
,
act
=
LinearActivation
(),
groups
=
groups
,
stride
=
conv_stride
,
padding
=
conv_padding
,
bias_attr
=
conv_bias_attr
,
param_attr
=
conv_param_attr
,
shared_biases
=
shared_bias
,
layer_attr
=
conv_layer_attr
)
__bn__
=
batch_norm_layer
(
name
=
"%s_bn"
%
name
,
input
=
__conv__
,
act
=
act
,
bias_attr
=
bn_bias_attr
,
param_attr
=
bn_param_attr
,
__bn__
=
batch_norm_layer
(
name
=
"%s_bn"
%
name
,
input
=
__conv__
,
act
=
act
,
bias_attr
=
bn_bias_attr
,
param_attr
=
bn_param_attr
,
layer_attr
=
bn_layer_attr
)
return
img_pool_layer
(
name
=
"%s_pool"
%
name
,
input
=
__bn__
,
pool_type
=
pool_type
,
pool_size
=
pool_size
,
stride
=
pool_stride
,
return
img_pool_layer
(
name
=
"%s_pool"
%
name
,
input
=
__bn__
,
pool_type
=
pool_type
,
pool_size
=
pool_size
,
stride
=
pool_stride
,
padding
=
pool_padding
,
layer_attr
=
pool_layer_attr
)
@
wrap_act_default
(
param_names
=
[
'conv_act'
],
act
=
ReluActivation
())
@
wrap_param_default
(
param_names
=
[
'pool_type'
],
default_factory
=
lambda
_
:
MaxPooling
())
def
img_conv_group
(
input
,
conv_num_filter
,
@
wrap_act_default
(
param_names
=
[
'conv_act'
],
act
=
ReluActivation
())
@
wrap_param_default
(
param_names
=
[
'pool_type'
],
default_factory
=
lambda
_
:
MaxPooling
())
def
img_conv_group
(
input
,
conv_num_filter
,
pool_size
,
num_channels
=
None
,
conv_padding
=
1
,
...
...
@@ -333,7 +392,9 @@ def img_conv_group(input, conv_num_filter,
else
:
extra_kwargs
[
'act'
]
=
conv_act
[
i
]
tmp
=
img_conv_layer
(
input
=
tmp
,
padding
=
conv_padding
[
i
],
tmp
=
img_conv_layer
(
input
=
tmp
,
padding
=
conv_padding
[
i
],
filter_size
=
conv_filter_size
[
i
],
num_filters
=
conv_num_filter
[
i
],
**
extra_kwargs
)
...
...
@@ -345,16 +406,20 @@ def img_conv_group(input, conv_num_filter,
if
dropout
==
0
or
abs
(
dropout
)
<
1e-5
:
# dropout not set
tmp
=
batch_norm_layer
(
input
=
tmp
,
act
=
conv_act
[
i
])
else
:
tmp
=
batch_norm_layer
(
input
=
tmp
,
act
=
conv_act
[
i
],
tmp
=
batch_norm_layer
(
input
=
tmp
,
act
=
conv_act
[
i
],
layer_attr
=
ExtraAttr
(
drop_rate
=
dropout
))
return
img_pool_layer
(
input
=
tmp
,
stride
=
pool_stride
,
pool_size
=
pool_size
,
pool_type
=
pool_type
)
return
img_pool_layer
(
input
=
tmp
,
stride
=
pool_stride
,
pool_size
=
pool_size
,
pool_type
=
pool_type
)
def
small_vgg
(
input_image
,
num_channels
,
num_classes
):
def
__vgg__
(
ipt
,
num_filter
,
times
,
dropouts
,
num_channels_
=
None
):
return
img_conv_group
(
input
=
ipt
,
num_channels
=
num_channels_
,
return
img_conv_group
(
input
=
ipt
,
num_channels
=
num_channels_
,
pool_size
=
2
,
pool_stride
=
2
,
conv_num_filter
=
[
num_filter
]
*
times
,
...
...
@@ -368,10 +433,13 @@ def small_vgg(input_image, num_channels, num_classes):
tmp
=
__vgg__
(
tmp
,
128
,
2
,
[
0.4
,
0
])
tmp
=
__vgg__
(
tmp
,
256
,
3
,
[
0.4
,
0.4
,
0
])
tmp
=
__vgg__
(
tmp
,
512
,
3
,
[
0.4
,
0.4
,
0
])
tmp
=
img_pool_layer
(
input
=
tmp
,
stride
=
2
,
pool_size
=
2
,
pool_type
=
MaxPooling
())
tmp
=
img_pool_layer
(
input
=
tmp
,
stride
=
2
,
pool_size
=
2
,
pool_type
=
MaxPooling
())
tmp
=
dropout_layer
(
input
=
tmp
,
dropout_rate
=
0.5
)
tmp
=
fc_layer
(
input
=
tmp
,
size
=
512
,
layer_attr
=
ExtraAttr
(
drop_rate
=
0.5
),
tmp
=
fc_layer
(
input
=
tmp
,
size
=
512
,
layer_attr
=
ExtraAttr
(
drop_rate
=
0.5
),
act
=
LinearActivation
())
tmp
=
batch_norm_layer
(
input
=
tmp
,
act
=
ReluActivation
())
return
fc_layer
(
input
=
tmp
,
size
=
num_classes
,
act
=
SoftmaxActivation
())
...
...
@@ -389,36 +457,66 @@ def vgg_16_network(input_image, num_channels, num_classes=1000):
:return:
"""
tmp
=
img_conv_group
(
input
=
input_image
,
num_channels
=
num_channels
,
conv_padding
=
1
,
conv_num_filter
=
[
64
,
64
],
tmp
=
img_conv_group
(
input
=
input_image
,
num_channels
=
num_channels
,
conv_padding
=
1
,
conv_num_filter
=
[
64
,
64
],
conv_filter_size
=
3
,
conv_act
=
ReluActivation
(),
pool_size
=
2
,
conv_act
=
ReluActivation
(),
pool_size
=
2
,
pool_stride
=
2
,
pool_type
=
MaxPooling
())
tmp
=
img_conv_group
(
input
=
tmp
,
conv_num_filter
=
[
128
,
128
],
conv_padding
=
1
,
conv_filter_size
=
3
,
conv_act
=
ReluActivation
(),
pool_stride
=
2
,
pool_type
=
MaxPooling
(),
tmp
=
img_conv_group
(
input
=
tmp
,
conv_num_filter
=
[
128
,
128
],
conv_padding
=
1
,
conv_filter_size
=
3
,
conv_act
=
ReluActivation
(),
pool_stride
=
2
,
pool_type
=
MaxPooling
(),
pool_size
=
2
)
tmp
=
img_conv_group
(
input
=
tmp
,
conv_num_filter
=
[
256
,
256
,
256
],
tmp
=
img_conv_group
(
input
=
tmp
,
conv_num_filter
=
[
256
,
256
,
256
],
conv_padding
=
1
,
conv_filter_size
=
3
,
conv_act
=
ReluActivation
(),
pool_stride
=
2
,
pool_type
=
MaxPooling
(),
pool_size
=
2
)
conv_filter_size
=
3
,
conv_act
=
ReluActivation
(),
pool_stride
=
2
,
pool_type
=
MaxPooling
(),
pool_size
=
2
)
tmp
=
img_conv_group
(
input
=
tmp
,
conv_num_filter
=
[
512
,
512
,
512
],
tmp
=
img_conv_group
(
input
=
tmp
,
conv_num_filter
=
[
512
,
512
,
512
],
conv_padding
=
1
,
conv_filter_size
=
3
,
conv_act
=
ReluActivation
(),
pool_stride
=
2
,
pool_type
=
MaxPooling
(),
pool_size
=
2
)
tmp
=
img_conv_group
(
input
=
tmp
,
conv_num_filter
=
[
512
,
512
,
512
],
conv_filter_size
=
3
,
conv_act
=
ReluActivation
(),
pool_stride
=
2
,
pool_type
=
MaxPooling
(),
pool_size
=
2
)
tmp
=
img_conv_group
(
input
=
tmp
,
conv_num_filter
=
[
512
,
512
,
512
],
conv_padding
=
1
,
conv_filter_size
=
3
,
conv_act
=
ReluActivation
(),
pool_stride
=
2
,
pool_type
=
MaxPooling
(),
pool_size
=
2
)
conv_filter_size
=
3
,
conv_act
=
ReluActivation
(),
pool_stride
=
2
,
pool_type
=
MaxPooling
(),
pool_size
=
2
)
tmp
=
fc_layer
(
input
=
tmp
,
size
=
4096
,
act
=
ReluActivation
(),
tmp
=
fc_layer
(
input
=
tmp
,
size
=
4096
,
act
=
ReluActivation
(),
layer_attr
=
ExtraAttr
(
drop_rate
=
0.5
))
tmp
=
fc_layer
(
input
=
tmp
,
size
=
4096
,
act
=
ReluActivation
(),
tmp
=
fc_layer
(
input
=
tmp
,
size
=
4096
,
act
=
ReluActivation
(),
layer_attr
=
ExtraAttr
(
drop_rate
=
0.5
))
return
fc_layer
(
input
=
tmp
,
size
=
num_classes
,
act
=
SoftmaxActivation
())
...
...
@@ -428,10 +526,19 @@ def vgg_16_network(input_image, num_channels, num_classes=1000):
# Recurrent #
############################################################################
@
wrap_name_default
(
"lstm"
)
def
simple_lstm
(
input
,
size
,
name
=
None
,
reverse
=
False
,
mat_param_attr
=
None
,
bias_param_attr
=
None
,
inner_param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
state_act
=
None
,
mixed_layer_attr
=
None
,
def
simple_lstm
(
input
,
size
,
name
=
None
,
reverse
=
False
,
mat_param_attr
=
None
,
bias_param_attr
=
None
,
inner_param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
state_act
=
None
,
mixed_layer_attr
=
None
,
lstm_cell_attr
=
None
):
"""
Simple LSTM Cell.
...
...
@@ -485,23 +592,38 @@ def simple_lstm(input, size, name=None, reverse=False, mat_param_attr=None,
:rtype: LayerOutput
"""
fc_name
=
'lstm_transform_%s'
%
name
with
mixed_layer
(
name
=
fc_name
,
size
=
size
*
4
,
with
mixed_layer
(
name
=
fc_name
,
size
=
size
*
4
,
act
=
IdentityActivation
(),
layer_attr
=
mixed_layer_attr
,
bias_attr
=
False
)
as
m
:
layer_attr
=
mixed_layer_attr
,
bias_attr
=
False
)
as
m
:
m
+=
full_matrix_projection
(
input
,
param_attr
=
mat_param_attr
)
return
lstmemory
(
name
=
name
,
input
=
m
,
reverse
=
reverse
,
return
lstmemory
(
name
=
name
,
input
=
m
,
reverse
=
reverse
,
bias_attr
=
bias_param_attr
,
param_attr
=
inner_param_attr
,
act
=
act
,
gate_act
=
gate_act
,
state_act
=
state_act
,
param_attr
=
inner_param_attr
,
act
=
act
,
gate_act
=
gate_act
,
state_act
=
state_act
,
layer_attr
=
lstm_cell_attr
)
@
wrap_name_default
(
'lstm_unit'
)
def
lstmemory_unit
(
input
,
name
=
None
,
size
=
None
,
param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
state_act
=
None
,
mixed_bias_attr
=
None
,
lstm_bias_attr
=
None
,
mixed_layer_attr
=
None
,
lstm_layer_attr
=
None
,
def
lstmemory_unit
(
input
,
name
=
None
,
size
=
None
,
param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
state_act
=
None
,
mixed_bias_attr
=
None
,
lstm_bias_attr
=
None
,
mixed_layer_attr
=
None
,
lstm_layer_attr
=
None
,
get_output_layer_attr
=
None
):
"""
Define calculations that a LSTM unit performs in a single time step.
...
...
@@ -572,8 +694,10 @@ def lstmemory_unit(input, name=None, size=None, param_attr=None,
out_mem
=
memory
(
name
=
name
,
size
=
size
)
state_mem
=
memory
(
name
=
"%s_state"
%
name
,
size
=
size
)
with
mixed_layer
(
name
=
"%s_input_recurrent"
%
name
,
size
=
size
*
4
,
bias_attr
=
mixed_bias_attr
,
with
mixed_layer
(
name
=
"%s_input_recurrent"
%
name
,
size
=
size
*
4
,
bias_attr
=
mixed_bias_attr
,
layer_attr
=
mixed_layer_attr
,
act
=
IdentityActivation
())
as
m
:
m
+=
identity_projection
(
input
=
input
)
...
...
@@ -588,9 +712,9 @@ def lstmemory_unit(input, name=None, size=None, param_attr=None,
act
=
act
,
gate_act
=
gate_act
,
state_act
=
state_act
,
layer_attr
=
lstm_layer_attr
)
get_output_layer
(
name
=
'%s_state'
%
name
,
layer_attr
=
lstm_layer_attr
)
get_output_layer
(
name
=
'%s_state'
%
name
,
input
=
lstm_out
,
arg_name
=
'state'
,
layer_attr
=
get_output_layer_attr
)
...
...
@@ -599,11 +723,18 @@ def lstmemory_unit(input, name=None, size=None, param_attr=None,
@
wrap_name_default
(
'lstm_group'
)
def
lstmemory_group
(
input
,
size
=
None
,
name
=
None
,
reverse
=
False
,
param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
state_act
=
None
,
mixed_bias_attr
=
None
,
lstm_bias_attr
=
None
,
mixed_layer_attr
=
None
,
lstm_layer_attr
=
None
,
def
lstmemory_group
(
input
,
size
=
None
,
name
=
None
,
reverse
=
False
,
param_attr
=
None
,
act
=
None
,
gate_act
=
None
,
state_act
=
None
,
mixed_bias_attr
=
None
,
lstm_bias_attr
=
None
,
mixed_layer_attr
=
None
,
lstm_layer_attr
=
None
,
get_output_layer_attr
=
None
):
"""
lstm_group is a recurrent layer group version Long Short Term Memory. It
...
...
@@ -665,17 +796,22 @@ def lstmemory_group(input, size=None, name=None,
"""
def
__lstm_step__
(
ipt
):
return
lstmemory_unit
(
input
=
ipt
,
name
=
name
,
size
=
size
,
mixed_bias_attr
=
mixed_bias_attr
,
return
lstmemory_unit
(
input
=
ipt
,
name
=
name
,
size
=
size
,
mixed_bias_attr
=
mixed_bias_attr
,
mixed_layer_attr
=
mixed_layer_attr
,
param_attr
=
param_attr
,
lstm_bias_attr
=
lstm_bias_attr
,
act
=
act
,
gate_act
=
gate_act
,
act
=
act
,
gate_act
=
gate_act
,
state_act
=
state_act
,
lstm_layer_attr
=
lstm_layer_attr
,
get_output_layer_attr
=
get_output_layer_attr
)
return
recurrent_group
(
name
=
'%s_recurrent_group'
%
name
,
return
recurrent_group
(
name
=
'%s_recurrent_group'
%
name
,
step
=
__lstm_step__
,
reverse
=
reverse
,
input
=
input
)
...
...
@@ -728,8 +864,7 @@ def gru_unit(input,
bias_attr
=
gru_bias_attr
,
act
=
act
,
gate_act
=
gate_act
,
layer_attr
=
gru_layer_attr
)
layer_attr
=
gru_layer_attr
)
return
gru_out
...
...
@@ -739,7 +874,8 @@ def gru_group(input,
name
=
None
,
reverse
=
False
,
gru_bias_attr
=
None
,
act
=
None
,
gate_act
=
None
,
act
=
None
,
gate_act
=
None
,
gru_layer_attr
=
None
):
"""
gru_group is a recurrent layer group version Gated Recurrent Unit. It
...
...
@@ -788,10 +924,10 @@ def gru_group(input,
gru_bias_attr
=
gru_bias_attr
,
act
=
act
,
gate_act
=
gate_act
,
gru_layer_attr
=
gru_layer_attr
)
gru_layer_attr
=
gru_layer_attr
)
return
recurrent_group
(
name
=
'%s_recurrent_group'
%
name
,
return
recurrent_group
(
name
=
'%s_recurrent_group'
%
name
,
step
=
__gru_step__
,
reverse
=
reverse
,
input
=
input
)
...
...
@@ -808,8 +944,7 @@ def simple_gru(input,
gru_bias_attr
=
None
,
act
=
None
,
gate_act
=
None
,
gru_layer_attr
=
None
):
gru_layer_attr
=
None
):
"""
You maybe see gru_step_layer, grumemory in layers.py, gru_unit, gru_group,
simple_gru in network.py. The reason why there are so many interfaces is
...
...
@@ -862,13 +997,15 @@ def simple_gru(input,
:return: the gru group.
:rtype: LayerOutput
"""
with
mixed_layer
(
name
=
'%s_transform'
%
name
,
with
mixed_layer
(
name
=
'%s_transform'
%
name
,
size
=
size
*
3
,
bias_attr
=
mixed_bias_param_attr
,
layer_attr
=
mixed_layer_attr
)
as
m
:
m
+=
full_matrix_projection
(
input
=
input
,
param_attr
=
mixed_param_attr
)
return
gru_group
(
name
=
name
,
return
gru_group
(
name
=
name
,
size
=
size
,
input
=
m
,
reverse
=
reverse
,
...
...
@@ -890,8 +1027,7 @@ def simple_gru2(input,
act
=
None
,
gate_act
=
None
,
mixed_layer_attr
=
None
,
gru_cell_attr
=
None
):
gru_cell_attr
=
None
):
"""
simple_gru2 is the same with simple_gru, but using grumemory instead
Please see grumemory in layers.py for more detail about the maths.
...
...
@@ -922,13 +1058,15 @@ def simple_gru2(input,
:return: the gru group.
:rtype: LayerOutput
"""
with
mixed_layer
(
name
=
'%s_transform'
%
name
,
with
mixed_layer
(
name
=
'%s_transform'
%
name
,
size
=
size
*
3
,
bias_attr
=
mixed_bias_attr
,
layer_attr
=
mixed_layer_attr
)
as
m
:
m
+=
full_matrix_projection
(
input
=
input
,
param_attr
=
mixed_param_attr
)
return
grumemory
(
name
=
name
,
return
grumemory
(
name
=
name
,
size
=
size
,
input
=
m
,
reverse
=
reverse
,
...
...
@@ -940,19 +1078,30 @@ def simple_gru2(input,
@
wrap_name_default
(
"bidirectional_gru"
)
def
bidirectional_gru
(
input
,
size
,
name
=
None
,
return_seq
=
False
,
fwd_mixed_param_attr
=
None
,
fwd_mixed_bias_attr
=
None
,
fwd_gru_param_attr
=
None
,
fwd_gru_bias_attr
=
None
,
fwd_act
=
None
,
fwd_gate_act
=
None
,
fwd_mixed_layer_attr
=
None
,
fwd_gru_cell_attr
=
None
,
bwd_mixed_param_attr
=
None
,
bwd_mixed_bias_attr
=
None
,
bwd_gru_param_attr
=
None
,
bwd_gru_bias_attr
=
None
,
bwd_act
=
None
,
bwd_gate_act
=
None
,
bwd_mixed_layer_attr
=
None
,
bwd_gru_cell_attr
=
None
,
last_seq_attr
=
None
,
first_seq_attr
=
None
,
concat_attr
=
None
,
concat_act
=
None
):
def
bidirectional_gru
(
input
,
size
,
name
=
None
,
return_seq
=
False
,
fwd_mixed_param_attr
=
None
,
fwd_mixed_bias_attr
=
None
,
fwd_gru_param_attr
=
None
,
fwd_gru_bias_attr
=
None
,
fwd_act
=
None
,
fwd_gate_act
=
None
,
fwd_mixed_layer_attr
=
None
,
fwd_gru_cell_attr
=
None
,
bwd_mixed_param_attr
=
None
,
bwd_mixed_bias_attr
=
None
,
bwd_gru_param_attr
=
None
,
bwd_gru_bias_attr
=
None
,
bwd_act
=
None
,
bwd_gate_act
=
None
,
bwd_mixed_layer_attr
=
None
,
bwd_gru_cell_attr
=
None
,
last_seq_attr
=
None
,
first_seq_attr
=
None
,
concat_attr
=
None
,
concat_act
=
None
):
"""
A bidirectional_gru is a recurrent unit that iterates over the input
sequence both in forward and bardward orders, and then concatenate two
...
...
@@ -983,41 +1132,61 @@ def bidirectional_gru(input, size, name=None, return_seq=False,
"""
args
=
locals
()
fw
=
simple_gru2
(
name
=
'%s_fw'
%
name
,
input
=
input
,
size
=
size
,
fw
=
simple_gru2
(
name
=
'%s_fw'
%
name
,
input
=
input
,
size
=
size
,
**
dict
((
k
[
len
(
'fwd_'
):],
v
)
for
k
,
v
in
args
.
iteritems
()
if
k
.
startswith
(
'fwd_'
)))
bw
=
simple_gru2
(
name
=
"%s_bw"
%
name
,
input
=
input
,
size
=
size
,
bw
=
simple_gru2
(
name
=
"%s_bw"
%
name
,
input
=
input
,
size
=
size
,
reverse
=
True
,
**
dict
((
k
[
len
(
'bwd_'
):],
v
)
for
k
,
v
in
args
.
iteritems
()
if
k
.
startswith
(
'bwd_'
)))
if
return_seq
:
return
concat_layer
(
name
=
name
,
input
=
[
fw
,
bw
],
layer_attr
=
concat_attr
,
act
=
concat_act
)
return
concat_layer
(
name
=
name
,
input
=
[
fw
,
bw
],
layer_attr
=
concat_attr
,
act
=
concat_act
)
else
:
fw_seq
=
last_seq
(
name
=
"%s_fw_last"
%
name
,
input
=
fw
,
layer_attr
=
last_seq_attr
)
bw_seq
=
first_seq
(
name
=
"%s_bw_last"
%
name
,
input
=
bw
,
layer_attr
=
first_seq_attr
)
return
concat_layer
(
name
=
name
,
input
=
[
fw_seq
,
bw_seq
],
layer_attr
=
concat_attr
,
act
=
concat_act
)
fw_seq
=
last_seq
(
name
=
"%s_fw_last"
%
name
,
input
=
fw
,
layer_attr
=
last_seq_attr
)
bw_seq
=
first_seq
(
name
=
"%s_bw_last"
%
name
,
input
=
bw
,
layer_attr
=
first_seq_attr
)
return
concat_layer
(
name
=
name
,
input
=
[
fw_seq
,
bw_seq
],
layer_attr
=
concat_attr
,
act
=
concat_act
)
@
wrap_name_default
(
"bidirectional_lstm"
)
def
bidirectional_lstm
(
input
,
size
,
name
=
None
,
return_seq
=
False
,
fwd_mat_param_attr
=
None
,
fwd_bias_param_attr
=
None
,
fwd_inner_param_attr
=
None
,
fwd_act
=
None
,
fwd_gate_act
=
None
,
fwd_state_act
=
None
,
fwd_mixed_layer_attr
=
None
,
fwd_lstm_cell_attr
=
None
,
bwd_mat_param_attr
=
None
,
bwd_bias_param_attr
=
None
,
bwd_inner_param_attr
=
None
,
bwd_act
=
None
,
bwd_gate_act
=
None
,
bwd_state_act
=
None
,
bwd_mixed_layer_attr
=
None
,
bwd_lstm_cell_attr
=
None
,
last_seq_attr
=
None
,
first_seq_attr
=
None
,
concat_attr
=
None
,
concat_act
=
None
):
def
bidirectional_lstm
(
input
,
size
,
name
=
None
,
return_seq
=
False
,
fwd_mat_param_attr
=
None
,
fwd_bias_param_attr
=
None
,
fwd_inner_param_attr
=
None
,
fwd_act
=
None
,
fwd_gate_act
=
None
,
fwd_state_act
=
None
,
fwd_mixed_layer_attr
=
None
,
fwd_lstm_cell_attr
=
None
,
bwd_mat_param_attr
=
None
,
bwd_bias_param_attr
=
None
,
bwd_inner_param_attr
=
None
,
bwd_act
=
None
,
bwd_gate_act
=
None
,
bwd_state_act
=
None
,
bwd_mixed_layer_attr
=
None
,
bwd_lstm_cell_attr
=
None
,
last_seq_attr
=
None
,
first_seq_attr
=
None
,
concat_attr
=
None
,
concat_act
=
None
):
"""
A bidirectional_lstm is a recurrent unit that iterates over the input
sequence both in forward and bardward orders, and then concatenate two
...
...
@@ -1053,25 +1222,34 @@ def bidirectional_lstm(input, size, name=None, return_seq=False,
"""
args
=
locals
()
fw
=
simple_lstm
(
name
=
'%s_fw'
%
name
,
input
=
input
,
size
=
size
,
fw
=
simple_lstm
(
name
=
'%s_fw'
%
name
,
input
=
input
,
size
=
size
,
**
dict
((
k
[
len
(
'fwd_'
):],
v
)
for
k
,
v
in
args
.
iteritems
()
if
k
.
startswith
(
'fwd_'
)))
bw
=
simple_lstm
(
name
=
"%s_bw"
%
name
,
input
=
input
,
size
=
size
,
bw
=
simple_lstm
(
name
=
"%s_bw"
%
name
,
input
=
input
,
size
=
size
,
reverse
=
True
,
**
dict
((
k
[
len
(
'bwd_'
):],
v
)
for
k
,
v
in
args
.
iteritems
()
if
k
.
startswith
(
'bwd_'
)))
if
return_seq
:
return
concat_layer
(
name
=
name
,
input
=
[
fw
,
bw
],
layer_attr
=
concat_attr
,
act
=
concat_act
)
return
concat_layer
(
name
=
name
,
input
=
[
fw
,
bw
],
layer_attr
=
concat_attr
,
act
=
concat_act
)
else
:
fw_seq
=
last_seq
(
name
=
"%s_fw_last"
%
name
,
input
=
fw
,
layer_attr
=
last_seq_attr
)
bw_seq
=
first_seq
(
name
=
"%s_bw_last"
%
name
,
input
=
bw
,
layer_attr
=
first_seq_attr
)
return
concat_layer
(
name
=
name
,
input
=
[
fw_seq
,
bw_seq
],
layer_attr
=
concat_attr
,
act
=
concat_act
)
fw_seq
=
last_seq
(
name
=
"%s_fw_last"
%
name
,
input
=
fw
,
layer_attr
=
last_seq_attr
)
bw_seq
=
first_seq
(
name
=
"%s_bw_last"
%
name
,
input
=
bw
,
layer_attr
=
first_seq_attr
)
return
concat_layer
(
name
=
name
,
input
=
[
fw_seq
,
bw_seq
],
layer_attr
=
concat_attr
,
act
=
concat_act
)
@
wrap_name_default
()
...
...
@@ -1142,37 +1320,41 @@ def simple_attention(encoded_sequence,
proj_size
=
encoded_proj
.
size
with
mixed_layer
(
size
=
proj_size
,
name
=
"%s_transform"
%
name
)
as
m
:
m
+=
full_matrix_projection
(
decoder_state
,
param_attr
=
transform_param_attr
)
m
+=
full_matrix_projection
(
decoder_state
,
param_attr
=
transform_param_attr
)
expanded
=
expand_layer
(
input
=
m
,
expand_as
=
encoded_sequence
,
name
=
'%s_expand'
%
name
)
expanded
=
expand_layer
(
input
=
m
,
expand_as
=
encoded_sequence
,
name
=
'%s_expand'
%
name
)
with
mixed_layer
(
size
=
proj_size
,
act
=
weight_act
,
name
=
"%s_combine"
%
name
)
as
m
:
with
mixed_layer
(
size
=
proj_size
,
act
=
weight_act
,
name
=
"%s_combine"
%
name
)
as
m
:
m
+=
identity_projection
(
expanded
)
m
+=
identity_projection
(
encoded_proj
)
# sequence softmax is used to normalize similarities between decoder state
# and encoder outputs into a distribution
attention_weight
=
fc_layer
(
input
=
m
,
attention_weight
=
fc_layer
(
input
=
m
,
size
=
1
,
act
=
SequenceSoftmaxActivation
(),
param_attr
=
softmax_param_attr
,
name
=
"%s_softmax"
%
name
,
bias_attr
=
False
)
scaled
=
scaling_layer
(
weight
=
attention_weight
,
input
=
encoded_sequence
,
scaled
=
scaling_layer
(
weight
=
attention_weight
,
input
=
encoded_sequence
,
name
=
'%s_scaling'
%
name
)
return
pooling_layer
(
input
=
scaled
,
pooling_type
=
SumPooling
(),
name
=
"%s_pooling"
%
name
)
return
pooling_layer
(
input
=
scaled
,
pooling_type
=
SumPooling
(),
name
=
"%s_pooling"
%
name
)
############################################################################
# Miscs #
############################################################################
@
wrap_name_default
(
"dropout"
)
def
dropout_layer
(
input
,
dropout_rate
,
name
=
None
):
"""
...
...
@@ -1183,7 +1365,10 @@ def dropout_layer(input, dropout_rate, name=None):
:param dropout_rate:
:return:
"""
return
addto_layer
(
name
=
name
,
input
=
input
,
act
=
LinearActivation
(),
return
addto_layer
(
name
=
name
,
input
=
input
,
act
=
LinearActivation
(),
bias_attr
=
False
,
layer_attr
=
ExtraAttr
(
drop_rate
=
dropout_rate
))
...
...
@@ -1218,7 +1403,6 @@ def outputs(layers, *args):
def
__dfs_travel__
(
layer
,
predicate
=
lambda
x
:
x
.
layer_type
==
LayerType
.
DATA
):
"""
DFS LRV Travel for output layer.
...
...
@@ -1259,8 +1443,9 @@ def outputs(layers, *args):
for
each_layer
in
layers
:
assert
isinstance
(
each_layer
,
LayerOutput
)
inputs
.
extend
(
__dfs_travel__
(
each_layer
))
outputs_
.
extend
(
__dfs_travel__
(
each_layer
,
lambda
x
:
x
.
layer_type
==
LayerType
.
COST
))
outputs_
.
extend
(
__dfs_travel__
(
each_layer
,
lambda
x
:
x
.
layer_type
==
LayerType
.
COST
))
# Currently, we got each leaf node's inputs order, output order.
# We merge them together.
...
...
@@ -1278,16 +1463,13 @@ def outputs(layers, *args):
if
each_output
.
name
not
in
final_outputs
:
final_outputs
.
append
(
each_output
.
name
)
logger
.
info
(
""
.
join
([
"The input order is ["
,
", "
.
join
(
final_inputs
),
"]"
])
)
logger
.
info
(
""
.
join
([
"The input order is ["
,
", "
.
join
(
final_inputs
),
"]"
]))
if
len
(
final_outputs
)
==
0
:
final_outputs
=
map
(
lambda
x
:
x
.
name
,
layers
)
logger
.
info
(
""
.
join
([
"The output order is ["
,
", "
.
join
(
final_outputs
),
"]"
]))
logger
.
info
(
""
.
join
(
[
"The output order is ["
,
", "
.
join
(
final_outputs
),
"]"
]))
Inputs
(
*
final_inputs
)
Outputs
(
*
final_outputs
)
python/paddle/trainer_config_helpers/optimizers.py
浏览文件 @
58e1b3b3
...
...
@@ -17,11 +17,12 @@ from paddle.trainer.config_parser import Settings, default_decay_rate, \
from
.default_decorators
import
wrap_param_default
__all__
=
[
'Optimizer'
,
'BaseSGDOptimizer'
,
'MomentumOptimizer'
,
'AdamaxOptimizer'
,
'AdamOptimizer'
,
'AdaGradOptimizer'
,
'RMSPropOptimizer'
,
'DecayedAdaGradOptimizer'
,
'AdaDeltaOptimizer'
,
'BaseRegularization'
,
'L2Regularization'
,
'settings'
,
'ModelAverage'
]
__all__
=
[
'Optimizer'
,
'BaseSGDOptimizer'
,
'MomentumOptimizer'
,
'AdamaxOptimizer'
,
'AdamOptimizer'
,
'AdaGradOptimizer'
,
'RMSPropOptimizer'
,
'DecayedAdaGradOptimizer'
,
'AdaDeltaOptimizer'
,
'BaseRegularization'
,
'L2Regularization'
,
'settings'
,
'ModelAverage'
]
class
Optimizer
(
object
):
...
...
@@ -90,18 +91,15 @@ class MomentumOptimizer(BaseSGDOptimizer):
:param sparse: with sparse support or not.
:type sparse: bool
"""
def
extra_settings
(
self
):
default_momentum
(
self
.
momentum
)
def
to_setting_kwargs
(
self
):
if
self
.
sparse
:
return
{
'learning_method'
:
'sparse_momentum'
}
return
{
'learning_method'
:
'sparse_momentum'
}
else
:
return
{
'learning_method'
:
'momentum'
}
return
{
'learning_method'
:
'momentum'
}
def
__init__
(
self
,
momentum
=
None
,
sparse
=
False
):
self
.
momentum
=
momentum
...
...
@@ -197,9 +195,7 @@ class AdaGradOptimizer(BaseSGDOptimizer):
"""
def
to_setting_kwargs
(
self
):
return
{
'learning_method'
:
'adagrad'
}
return
{
'learning_method'
:
'adagrad'
}
def
__init__
(
self
):
pass
...
...
@@ -311,9 +307,7 @@ class L2Regularization(BaseRegularization):
def
to_setting_kwargs
(
self
):
if
self
.
algorithm
==
'owlqn'
:
return
{
'l2weight'
:
self
.
decay_rate
}
return
{
'l2weight'
:
self
.
decay_rate
}
else
:
return
dict
()
...
...
@@ -330,7 +324,8 @@ class ModelAverage(Optimizer):
'do_average_in_cpu'
:
self
.
do_average_in_cpu
}
def
__init__
(
self
,
average_window
,
def
__init__
(
self
,
average_window
,
max_average_window
=
None
,
do_average_in_cpu
=
False
):
self
.
average_window
=
average_window
...
...
@@ -356,10 +351,10 @@ def __extends__(dict1, dict2):
return
dict1
@
wrap_param_default
(
[
'learning_method'
],
default_factory
=
lambda
_
:
MomentumOptimizer
())
@
wrap_param_default
(
[
'regularization'
],
default_factory
=
lambda
_
:
BaseRegularization
())
@
wrap_param_default
(
[
'learning_method'
],
default_factory
=
lambda
_
:
MomentumOptimizer
())
@
wrap_param_default
(
[
'regularization'
],
default_factory
=
lambda
_
:
BaseRegularization
())
def
settings
(
batch_size
,
learning_rate
=
1e-3
,
learning_rate_decay_a
=
0.
,
...
...
@@ -373,8 +368,7 @@ def settings(batch_size,
regularization
=
None
,
is_async
=
False
,
model_average
=
None
,
gradient_clipping_threshold
=
None
):
gradient_clipping_threshold
=
None
):
"""
Set the optimization method, learning rate, batch size, and other training
settings. The currently supported algorithms are SGD and Async-SGD.
...
...
@@ -415,10 +409,11 @@ def settings(batch_size,
else
:
algorithm
=
'owlqn'
args
=
[
'batch_size'
,
'learning_rate'
,
'learning_rate_decay_a'
,
'learning_rate_decay_b'
,
'learning_rate_schedule'
,
'learning_rate_args'
,
'average_window'
,
'do_average_in_cpu'
,
'max_average_window'
]
args
=
[
'batch_size'
,
'learning_rate'
,
'learning_rate_decay_a'
,
'learning_rate_decay_b'
,
'learning_rate_schedule'
,
'learning_rate_args'
,
'average_window'
,
'do_average_in_cpu'
,
'max_average_window'
]
kwargs
=
dict
()
kwargs
[
'algorithm'
]
=
algorithm
for
arg
in
args
:
...
...
python/paddle/trainer_config_helpers/poolings.py
浏览文件 @
58e1b3b3
...
...
@@ -11,18 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
"""
__all__
=
[
"BasePoolingType"
,
"MaxPooling"
,
"AvgPooling"
,
"CudnnMaxPooling"
,
"CudnnAvgPooling"
,
"SumPooling"
,
"SquareRootNPooling"
"BasePoolingType"
,
"MaxPooling"
,
"AvgPooling"
,
"CudnnMaxPooling"
,
"CudnnAvgPooling"
,
"SumPooling"
,
"SquareRootNPooling"
]
...
...
@@ -36,6 +30,7 @@ class BasePoolingType(object):
:type name: basestring
"""
def
__init__
(
self
,
name
):
self
.
name
=
name
...
...
@@ -54,6 +49,7 @@ class MaxPooling(BasePoolingType):
value. None means use default value in proto.
:type output_max_index: bool|None
"""
def
__init__
(
self
,
output_max_index
=
None
):
BasePoolingType
.
__init__
(
self
,
"max"
)
self
.
output_max_index
=
output_max_index
...
...
@@ -64,6 +60,7 @@ class CudnnMaxPooling(BasePoolingType):
Cudnn max pooling only support GPU. Return the maxinum value in the
pooling window.
"""
def
__init__
(
self
):
BasePoolingType
.
__init__
(
self
,
"cudnn-max-pool"
)
...
...
@@ -73,9 +70,11 @@ class CudnnAvgPooling(BasePoolingType):
Cudnn average pooling only support GPU. Return the average value in the
pooling window.
"""
def
__init__
(
self
):
BasePoolingType
.
__init__
(
self
,
"cudnn-avg-pool"
)
class
AvgPooling
(
BasePoolingType
):
"""
Average pooling.
...
...
@@ -105,7 +104,9 @@ class SumPooling(AvgPooling):
sum(samples
\\
_of
\\
_a
\\
_sequence)
"""
def
__init__
(
self
):
AvgPooling
.
__init__
(
self
,
AvgPooling
.
STRATEGY_SUM
)
def
__init__
(
self
):
AvgPooling
.
__init__
(
self
,
AvgPooling
.
STRATEGY_SUM
)
class
SquareRootNPooling
(
AvgPooling
):
...
...
@@ -118,4 +119,6 @@ class SquareRootNPooling(AvgPooling):
sum(samples
\\
_of
\\
_a
\\
_sequence)/sqrt(sample
\\
_num)
"""
def
__init__
(
self
):
AvgPooling
.
__init__
(
self
,
AvgPooling
.
STRATEGY_SQROOTN
)
def
__init__
(
self
):
AvgPooling
.
__init__
(
self
,
AvgPooling
.
STRATEGY_SQROOTN
)
python/paddle/trainer_config_helpers/tests/configs/img_layers.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-3
,
batch_size
=
1000
)
settings
(
learning_rate
=
1e-3
,
batch_size
=
1000
)
img
=
data_layer
(
name
=
'image'
,
size
=
256
*
256
)
img
=
data_layer
(
name
=
'image'
,
size
=
256
*
256
)
# the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size.
img_conv
=
img_conv_layer
(
input
=
img
,
num_channels
=
1
,
num_filters
=
64
,
filter_size
=
(
32
,
32
),
padding
=
(
1
,
1
),
stride
=
(
1
,
1
),
img_conv
=
img_conv_layer
(
input
=
img
,
num_channels
=
1
,
num_filters
=
64
,
filter_size
=
(
32
,
32
),
padding
=
(
1
,
1
),
stride
=
(
1
,
1
),
act
=
LinearActivation
())
img_bn
=
batch_norm_layer
(
input
=
img_conv
,
act
=
ReluActivation
())
...
...
@@ -18,5 +20,4 @@ img_norm = img_cmrnorm_layer(input=img_bn, size=32)
img_pool
=
img_pool_layer
(
input
=
img_conv
,
pool_size
=
32
,
pool_type
=
MaxPooling
())
outputs
(
img_pool
,
img_norm
)
python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-3
,
batch_size
=
1000
)
settings
(
learning_rate
=
1e-3
,
batch_size
=
1000
)
img
=
data_layer
(
name
=
'image'
,
size
=
227
*
227
)
img
=
data_layer
(
name
=
'image'
,
size
=
227
*
227
)
# the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size.
img_conv
=
img_conv_layer
(
input
=
img
,
num_channels
=
1
,
num_filters
=
64
,
filter_size
=
(
32
,
32
),
padding
=
(
1
,
1
),
stride
=
(
1
,
1
),
act
=
LinearActivation
(),
trans
=
True
)
img_conv
=
img_conv_layer
(
input
=
img
,
num_channels
=
1
,
num_filters
=
64
,
filter_size
=
(
32
,
32
),
padding
=
(
1
,
1
),
stride
=
(
1
,
1
),
act
=
LinearActivation
(),
trans
=
True
)
img_bn
=
batch_norm_layer
(
input
=
img_conv
,
act
=
ReluActivation
())
img_norm
=
img_cmrnorm_layer
(
input
=
img_bn
,
size
=
32
)
img_pool
=
img_pool_layer
(
input
=
img_conv
,
pool_size
=
32
,
pool_type
=
MaxPooling
())
outputs
(
img_pool
,
img_norm
)
python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
30
)
seq_op
=
[
first_seq
,
last_seq
]
seq_op
=
[
first_seq
,
last_seq
]
agg_level
=
[
AggregateLevel
.
EACH_SEQUENCE
,
AggregateLevel
.
EACH_TIMESTEP
]
agg_level
=
[
AggregateLevel
.
EACH_SEQUENCE
,
AggregateLevel
.
EACH_TIMESTEP
]
opts
=
[]
...
...
python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
浏览文件 @
58e1b3b3
...
...
@@ -4,18 +4,18 @@ Test all activations.
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'input'
,
size
=
100
)
acts
=
[
TanhActivation
,
SigmoidActivation
,
SoftmaxActivation
,
IdentityActivation
,
LinearActivation
,
ExpActivation
,
ReluActivation
,
BReluActivation
,
SoftReluActivation
,
STanhActivation
,
AbsActivation
,
SquareActivation
]
SoftReluActivation
,
STanhActivation
,
AbsActivation
,
SquareActivation
]
outputs
(
[
fc_layer
(
input
=
din
,
size
=
100
,
act
=
act
(),
name
=
"layer_%d"
%
i
)
for
i
,
act
in
enumerate
(
acts
)])
outputs
([
fc_layer
(
input
=
din
,
size
=
100
,
act
=
act
(),
name
=
"layer_%d"
%
i
)
for
i
,
act
in
enumerate
(
acts
)
])
python/paddle/trainer_config_helpers/tests/configs/math_ops.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
from
paddle.trainer_config_helpers
import
math
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
x
=
data_layer
(
name
=
'data'
,
size
=
100
)
x
=
math
.
exp
(
x
)
...
...
@@ -21,10 +18,9 @@ y = y - 2
y
=
2
-
y
y
=
2
*
y
y
=
y
*
3
z
=
data_layer
(
name
=
'data_2'
,
size
=
1
)
z
=
data_layer
(
name
=
'data_2'
,
size
=
1
)
y
=
y
*
z
y
=
z
*
y
y
=
y
+
z
y
=
z
+
y
outputs
(
y
)
python/paddle/trainer_config_helpers/tests/configs/projections.py
浏览文件 @
58e1b3b3
...
...
@@ -3,10 +3,7 @@ Test mixed layer, projections and operators.
'''
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
din
=
data_layer
(
name
=
'test'
,
size
=
100
)
...
...
@@ -30,18 +27,20 @@ with mixed_layer() as m5:
with
mixed_layer
()
as
m6
:
m6
+=
dotmul_operator
(
a
=
m3
,
b
=
m4
)
img
=
data_layer
(
name
=
'img'
,
size
=
32
*
32
)
flt
=
data_layer
(
name
=
'filter'
,
size
=
3
*
3
*
1
*
64
)
img
=
data_layer
(
name
=
'img'
,
size
=
32
*
32
)
flt
=
data_layer
(
name
=
'filter'
,
size
=
3
*
3
*
1
*
64
)
with
mixed_layer
()
as
m7
:
m7
+=
conv_operator
(
img
=
img
,
filter
=
flt
,
num_filters
=
64
,
num_channels
=
1
,
filter_size
=
3
)
end
=
mixed_layer
(
input
=
[
full_matrix_projection
(
input
=
m5
),
trans_full_matrix_projection
(
input
=
m6
),
full_matrix_projection
(
input
=
m7
)],
m7
+=
conv_operator
(
img
=
img
,
filter
=
flt
,
num_filters
=
64
,
num_channels
=
1
,
filter_size
=
3
)
end
=
mixed_layer
(
input
=
[
full_matrix_projection
(
input
=
m5
),
trans_full_matrix_projection
(
input
=
m6
),
full_matrix_projection
(
input
=
m7
)
],
size
=
100
,
layer_attr
=
ExtraAttr
(
drop_rate
=
0.5
,
error_clipping_threshold
=
40
))
layer_attr
=
ExtraAttr
(
drop_rate
=
0.5
,
error_clipping_threshold
=
40
))
outputs
(
end
)
python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
a
=
data_layer
(
name
=
'feature_a'
,
size
=
200
)
b
=
data_layer
(
name
=
'feature_b'
,
size
=
200
)
...
...
@@ -11,12 +8,22 @@ b = data_layer(name='feature_b', size=200)
fc_param
=
ParamAttr
(
name
=
'fc_param'
,
initial_max
=
1.0
,
initial_min
=-
1.0
)
bias_param
=
ParamAttr
(
name
=
'bias_param'
,
initial_mean
=
0.0
,
initial_std
=
0.0
)
softmax_param
=
ParamAttr
(
name
=
'softmax_param'
,
initial_max
=
1.0
,
initial_min
=-
1.0
)
softmax_param
=
ParamAttr
(
name
=
'softmax_param'
,
initial_max
=
1.0
,
initial_min
=-
1.0
)
hidden_a
=
fc_layer
(
input
=
a
,
size
=
200
,
param_attr
=
fc_param
,
bias_attr
=
bias_param
)
hidden_b
=
fc_layer
(
input
=
b
,
size
=
200
,
param_attr
=
fc_param
,
bias_attr
=
bias_param
)
hidden_a
=
fc_layer
(
input
=
a
,
size
=
200
,
param_attr
=
fc_param
,
bias_attr
=
bias_param
)
hidden_b
=
fc_layer
(
input
=
b
,
size
=
200
,
param_attr
=
fc_param
,
bias_attr
=
bias_param
)
predict
=
fc_layer
(
input
=
[
hidden_a
,
hidden_b
],
param_attr
=
[
softmax_param
,
softmax_param
],
bias_attr
=
False
,
size
=
10
,
act
=
SoftmaxActivation
())
predict
=
fc_layer
(
input
=
[
hidden_a
,
hidden_b
],
param_attr
=
[
softmax_param
,
softmax_param
],
bias_attr
=
False
,
size
=
10
,
act
=
SoftmaxActivation
())
outputs
(
classification_cost
(
input
=
predict
,
label
=
data_layer
(
name
=
'label'
,
size
=
10
)))
outputs
(
classification_cost
(
input
=
predict
,
label
=
data_layer
(
name
=
'label'
,
size
=
10
)))
python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
浏览文件 @
58e1b3b3
...
...
@@ -16,14 +16,26 @@ with mixed_layer(size=400, bias_attr=False) as m2:
lstm_param
=
ParamAttr
(
name
=
'lstm_param'
)
lstm_bias
=
ParamAttr
(
name
=
'lstm_bias'
,
initial_mean
=
0.
,
initial_std
=
0.
)
lstm1
=
lstmemory_group
(
input
=
m1
,
param_attr
=
lstm_param
,
lstm_bias_attr
=
lstm_bias
,
mixed_bias_attr
=
False
)
lstm2
=
lstmemory_group
(
input
=
m2
,
param_attr
=
lstm_param
,
lstm_bias_attr
=
lstm_bias
,
mixed_bias_attr
=
False
)
lstm1
=
lstmemory_group
(
input
=
m1
,
param_attr
=
lstm_param
,
lstm_bias_attr
=
lstm_bias
,
mixed_bias_attr
=
False
)
lstm2
=
lstmemory_group
(
input
=
m2
,
param_attr
=
lstm_param
,
lstm_bias_attr
=
lstm_bias
,
mixed_bias_attr
=
False
)
softmax_param
=
ParamAttr
(
name
=
'softmax_param'
)
predict
=
fc_layer
(
input
=
[
last_seq
(
input
=
lstm1
),
last_seq
(
input
=
lstm2
)],
predict
=
fc_layer
(
input
=
[
last_seq
(
input
=
lstm1
),
last_seq
(
input
=
lstm2
)],
size
=
10
,
param_attr
=
[
softmax_param
,
softmax_param
],
bias_attr
=
False
,
act
=
SoftmaxActivation
())
outputs
(
classification_cost
(
input
=
predict
,
label
=
data_layer
(
name
=
'label'
,
size
=
10
)))
outputs
(
classification_cost
(
input
=
predict
,
label
=
data_layer
(
name
=
'label'
,
size
=
10
)))
python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
din
=
data_layer
(
name
=
'data'
,
size
=
200
)
...
...
@@ -13,24 +10,28 @@ rnn = recurrent_layer(input=hidden, act=SigmoidActivation())
rnn2
=
recurrent_layer
(
input
=
hidden
,
act
=
SigmoidActivation
(),
reverse
=
True
)
lstm1_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
4
,
act
=
LinearActivation
(),
bias_attr
=
False
)
lstm1_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
4
,
act
=
LinearActivation
(),
bias_attr
=
False
)
lstm1
=
lstmemory
(
input
=
lstm1_param
,
act
=
SigmoidActivation
())
lstm2_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
4
,
act
=
LinearActivation
(),
bias_attr
=
False
)
lstm2_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
4
,
act
=
LinearActivation
(),
bias_attr
=
False
)
lstm2
=
lstmemory
(
input
=
lstm2_param
,
act
=
SigmoidActivation
(),
reverse
=
True
)
gru1_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
3
,
act
=
LinearActivation
(),
bias_attr
=
False
)
gru1_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
3
,
act
=
LinearActivation
(),
bias_attr
=
False
)
gru1
=
grumemory
(
input
=
gru1_param
,
act
=
SigmoidActivation
())
gru2_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
3
,
act
=
LinearActivation
(),
bias_attr
=
False
)
gru2_param
=
fc_layer
(
input
=
hidden
,
size
=
200
*
3
,
act
=
LinearActivation
(),
bias_attr
=
False
)
gru2
=
grumemory
(
input
=
gru2_param
,
act
=
SigmoidActivation
(),
reverse
=
True
)
outputs
(
last_seq
(
input
=
rnn
),
first_seq
(
input
=
rnn2
),
last_seq
(
input
=
lstm1
),
first_seq
(
input
=
lstm2
),
last_seq
(
input
=
gru1
),
first_seq
(
gru2
))
outputs
(
last_seq
(
input
=
rnn
),
first_seq
(
input
=
rnn2
),
last_seq
(
input
=
lstm1
),
first_seq
(
input
=
lstm2
),
last_seq
(
input
=
gru1
),
first_seq
(
gru2
))
python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
din
=
data_layer
(
name
=
'data'
,
size
=
120
)
...
...
python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
data
=
data_layer
(
name
=
'data'
,
size
=
2304
)
conv
=
img_conv_layer
(
input
=
data
,
filter_size
=
3
,
conv
=
img_conv_layer
(
input
=
data
,
filter_size
=
3
,
num_channels
=
1
,
num_filters
=
16
,
padding
=
1
,
act
=
LinearActivation
(),
bias_attr
=
True
)
bilinear
=
bilinear_interp_layer
(
input
=
conv
,
out_size_x
=
64
,
out_size_y
=
64
)
bilinear
=
bilinear_interp_layer
(
input
=
conv
,
out_size_x
=
64
,
out_size_y
=
64
)
pool
=
img_pool_layer
(
input
=
bilinear
,
pool
=
img_pool_layer
(
input
=
bilinear
,
num_channels
=
4
,
pool_size
=
2
,
stride
=
2
,
...
...
python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
seq_in
=
data_layer
(
name
=
'input'
,
size
=
200
)
labels
=
data_layer
(
name
=
'labels'
,
size
=
5000
)
...
...
@@ -12,17 +9,33 @@ probs = data_layer(name='probs', size=10)
xe_label
=
data_layer
(
name
=
'xe-label'
,
size
=
10
)
hidden
=
fc_layer
(
input
=
seq_in
,
size
=
4
)
outputs
(
ctc_layer
(
input
=
seq_in
,
label
=
labels
),
crf_layer
(
input
=
hidden
,
label
=
data_layer
(
name
=
'crf_label'
,
size
=
4
)),
rank_cost
(
left
=
data_layer
(
name
=
'left'
,
size
=
1
),
right
=
data_layer
(
name
=
'right'
,
size
=
1
),
label
=
data_layer
(
name
=
'label'
,
size
=
1
)),
lambda_cost
(
input
=
data_layer
(
name
=
'list_feature'
,
size
=
100
),
score
=
data_layer
(
name
=
'list_scores'
,
size
=
1
)),
cross_entropy
(
input
=
probs
,
label
=
xe_label
),
cross_entropy_with_selfnorm
(
input
=
probs
,
label
=
xe_label
),
huber_cost
(
input
=
data_layer
(
name
=
'huber_probs'
,
size
=
1
),
label
=
data_layer
(
name
=
'huber_label'
,
size
=
1
)),
multi_binary_label_cross_entropy
(
input
=
probs
,
label
=
xe_label
),
outputs
(
ctc_layer
(
input
=
seq_in
,
label
=
labels
),
crf_layer
(
input
=
hidden
,
label
=
data_layer
(
name
=
'crf_label'
,
size
=
4
)),
rank_cost
(
left
=
data_layer
(
name
=
'left'
,
size
=
1
),
right
=
data_layer
(
name
=
'right'
,
size
=
1
),
label
=
data_layer
(
name
=
'label'
,
size
=
1
)),
lambda_cost
(
input
=
data_layer
(
name
=
'list_feature'
,
size
=
100
),
score
=
data_layer
(
name
=
'list_scores'
,
size
=
1
)),
cross_entropy
(
input
=
probs
,
label
=
xe_label
),
cross_entropy_with_selfnorm
(
input
=
probs
,
label
=
xe_label
),
huber_cost
(
input
=
data_layer
(
name
=
'huber_probs'
,
size
=
1
),
label
=
data_layer
(
name
=
'huber_label'
,
size
=
1
)),
multi_binary_label_cross_entropy
(
input
=
probs
,
label
=
xe_label
),
sum_cost
(
input
=
hidden
))
python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
data
=
data_layer
(
name
=
'input'
,
size
=
300
)
lbl
=
data_layer
(
name
=
'label'
,
size
=
1
)
wt
=
data_layer
(
name
=
'weight'
,
size
=
1
)
fc
=
fc_layer
(
input
=
data
,
size
=
10
,
act
=
SoftmaxActivation
())
outputs
(
classification_cost
(
input
=
fc
,
label
=
lbl
,
weight
=
wt
),
regression_cost
(
input
=
fc
,
label
=
lbl
,
weight
=
wt
))
outputs
(
classification_cost
(
input
=
fc
,
label
=
lbl
,
weight
=
wt
),
regression_cost
(
input
=
fc
,
label
=
lbl
,
weight
=
wt
))
python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
30
)
data_seq
=
data_layer
(
name
=
'data_seq'
,
size
=
30
)
outputs
(
expand_layer
(
input
=
din
,
expand_as
=
data_seq
,
expand_level
=
ExpandLevel
.
FROM_SEQUENCE
),
expand_layer
(
input
=
din
,
expand_as
=
data_seq
,
expand_level
=
ExpandLevel
.
FROM_TIMESTEP
))
outputs
(
expand_layer
(
input
=
din
,
expand_as
=
data_seq
,
expand_level
=
ExpandLevel
.
FROM_SEQUENCE
),
expand_layer
(
input
=
din
,
expand_as
=
data_seq
,
expand_level
=
ExpandLevel
.
FROM_TIMESTEP
))
python/paddle/trainer_config_helpers/tests/configs/test_fc.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
100
)
trans
=
trans_layer
(
input
=
din
)
hidden
=
fc_layer
(
input
=
trans
,
size
=
100
,
bias_attr
=
False
)
hidden
=
fc_layer
(
input
=
trans
,
size
=
100
,
bias_attr
=
False
)
mask
=
data_layer
(
name
=
'mask'
,
size
=
100
)
hidden_sel
=
selective_fc_layer
(
input
=
din
,
select
=
mask
,
size
=
100
,
act
=
SigmoidActivation
())
hidden_sel
=
selective_fc_layer
(
input
=
din
,
select
=
mask
,
size
=
100
,
act
=
SigmoidActivation
())
outputs
(
hidden
,
hidden_sel
)
python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
din
=
data_layer
(
name
=
'data'
,
size
=
120
)
outputs
(
grumemory
(
input
=
din
,
size
=
40
,
reverse
=
True
,
gate_act
=
TanhActivation
(),
outputs
(
grumemory
(
input
=
din
,
size
=
40
,
reverse
=
True
,
gate_act
=
TanhActivation
(),
act
=
SigmoidActivation
()))
python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'data'
,
size
=
100
)
label
=
data_layer
(
name
=
'label'
,
size
=
10
)
...
...
python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
din
=
data_layer
(
name
=
'data'
,
size
=
128
)
outputs
(
lstmemory
(
input
=
din
,
reverse
=
True
,
gate_act
=
TanhActivation
(),
act
=
TanhActivation
(),
size
=
32
))
outputs
(
lstmemory
(
input
=
din
,
reverse
=
True
,
gate_act
=
TanhActivation
(),
act
=
TanhActivation
(),
size
=
32
))
python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
data
=
data_layer
(
name
=
'data'
,
size
=
2304
)
conv
=
img_conv_layer
(
input
=
data
,
filter_size
=
3
,
conv
=
img_conv_layer
(
input
=
data
,
filter_size
=
3
,
num_channels
=
1
,
num_filters
=
16
,
padding
=
1
,
act
=
LinearActivation
(),
bias_attr
=
True
)
maxout
=
maxout_layer
(
input
=
conv
,
num_channels
=
16
,
groups
=
2
)
maxout
=
maxout_layer
(
input
=
conv
,
num_channels
=
16
,
groups
=
2
)
pool
=
img_pool_layer
(
input
=
maxout
,
num_channels
=
8
,
pool_size
=
2
,
stride
=
2
,
pool_type
=
MaxPooling
())
pool
=
img_pool_layer
(
input
=
maxout
,
num_channels
=
8
,
pool_size
=
2
,
stride
=
2
,
pool_type
=
MaxPooling
())
conv2
=
img_conv_layer
(
input
=
pool
,
filter_size
=
3
,
conv2
=
img_conv_layer
(
input
=
pool
,
filter_size
=
3
,
num_channels
=
32
,
num_filters
=
128
,
padding
=
1
,
act
=
LinearActivation
(),
bias_attr
=
True
)
maxout2
=
maxout_layer
(
input
=
conv
,
num_channels
=
128
,
groups
=
4
)
maxout2
=
maxout_layer
(
input
=
conv
,
num_channels
=
128
,
groups
=
4
)
block
=
block_expand_layer
(
input
=
maxout
,
num_channels
=
32
,
stride_x
=
1
,
stride_y
=
1
,
block_x
=
1
,
block_y
=
6
)
block
=
block_expand_layer
(
input
=
maxout
,
num_channels
=
32
,
stride_x
=
1
,
stride_y
=
1
,
block_x
=
1
,
block_y
=
6
)
fc
=
fc_layer
(
input
=
block
,
size
=
384
,
bias_attr
=
False
)
...
...
python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-5
)
weight
=
data_layer
(
name
=
'w'
,
size
=
1
)
a
=
data_layer
(
name
=
'a'
,
size
=
100
)
...
...
@@ -11,13 +8,23 @@ b = data_layer(name='b', size=100)
c
=
data_layer
(
name
=
'c'
,
size
=
200
)
d
=
data_layer
(
name
=
'd'
,
size
=
31
)
outputs
(
interpolation_layer
(
input
=
[
a
,
b
],
weight
=
weight
),
power_layer
(
input
=
a
,
weight
=
weight
),
scaling_layer
(
input
=
a
,
weight
=
weight
),
cos_sim
(
a
=
a
,
b
=
b
),
cos_sim
(
a
=
a
,
b
=
c
,
size
=
2
),
outputs
(
interpolation_layer
(
input
=
[
a
,
b
],
weight
=
weight
),
power_layer
(
input
=
a
,
weight
=
weight
),
scaling_layer
(
input
=
a
,
weight
=
weight
),
cos_sim
(
a
=
a
,
b
=
b
),
cos_sim
(
a
=
a
,
b
=
c
,
size
=
2
),
sum_to_one_norm_layer
(
input
=
a
),
conv_shift_layer
(
a
=
a
,
b
=
d
),
tensor_layer
(
a
=
a
,
b
=
b
,
size
=
1000
),
slope_intercept_layer
(
input
=
a
,
slope
=
0.7
,
intercept
=
0.9
),
linear_comb_layer
(
weights
=
b
,
vectors
=
c
))
conv_shift_layer
(
a
=
a
,
b
=
d
),
tensor_layer
(
a
=
a
,
b
=
b
,
size
=
1000
),
slope_intercept_layer
(
input
=
a
,
slope
=
0.7
,
intercept
=
0.9
),
linear_comb_layer
(
weights
=
b
,
vectors
=
c
))
python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'input'
,
size
=
100
)
...
...
python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
seq
=
data_layer
(
name
=
'seq_input'
,
size
=
100
)
sub_seq
=
data_layer
(
name
=
'sub_seq_input'
,
size
=
100
)
...
...
@@ -25,11 +22,15 @@ with mixed_layer() as lstm_param: # test lstm unit, rnn group
with
mixed_layer
()
as
gru_param
:
gru_param
+=
full_matrix_projection
(
input
=
seq
,
size
=
100
*
3
)
outputs
(
last_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_forward'
),
input
=
seq
)),
first_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_back'
),
input
=
seq
,
reverse
=
True
)),
last_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_subseq_forward'
),
input
=
SubsequenceInput
(
input
=
sub_seq
))),
last_seq
(
input
=
lstmemory_group
(
input
=
lstm_param
,
size
=
100
)),
last_seq
(
input
=
gru_group
(
input
=
gru_param
,
size
=
100
)))
outputs
(
last_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_forward'
),
input
=
seq
)),
first_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_back'
),
input
=
seq
,
reverse
=
True
)),
last_seq
(
input
=
recurrent_group
(
step
=
generate_rnn_simple
(
'rnn_subseq_forward'
),
input
=
SubsequenceInput
(
input
=
sub_seq
))),
last_seq
(
input
=
lstmemory_group
(
input
=
lstm_param
,
size
=
100
)),
last_seq
(
input
=
gru_group
(
input
=
gru_param
,
size
=
100
)))
python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
settings
(
learning_rate
=
1e-4
,
batch_size
=
1000
)
din
=
data_layer
(
name
=
'dat_in'
,
size
=
100
)
POOL_TYPE
=
[
MaxPooling
,
AvgPooling
,
SumPooling
]
POOL_TYPE
=
[
MaxPooling
,
AvgPooling
,
SumPooling
]
AGG_LEVEL
=
[
AggregateLevel
.
EACH_SEQUENCE
,
AggregateLevel
.
EACH_TIMESTEP
]
AGG_LEVEL
=
[
AggregateLevel
.
EACH_SEQUENCE
,
AggregateLevel
.
EACH_TIMESTEP
]
opts
=
[]
...
...
@@ -24,7 +14,8 @@ for pt in POOL_TYPE:
for
al
in
AGG_LEVEL
:
opts
.
append
(
pooling_layer
(
input
=
din
,
agg_level
=
al
,
pooling_type
=
pt
()))
opts
.
append
(
pooling_layer
(
input
=
din
,
pooling_type
=
MaxPooling
(
output_max_index
=
True
)))
opts
.
append
(
pooling_layer
(
input
=
din
,
pooling_type
=
MaxPooling
(
output_max_index
=
True
)))
outputs
(
opts
)
python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
define_py_data_sources2
(
train_list
=
"train.list"
,
define_py_data_sources2
(
train_list
=
"train.list"
,
test_list
=
"test.list"
,
module
=
[
"a"
,
"b"
],
obj
=
(
"c"
,
"d"
))
settings
(
learning_rate
=
1e-3
,
batch_size
=
1000
)
settings
(
learning_rate
=
1e-3
,
batch_size
=
1000
)
outputs
(
data_layer
(
name
=
"a"
,
size
=
10
))
python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
100
,
learning_rate
=
1e-5
)
settings
(
batch_size
=
100
,
learning_rate
=
1e-5
)
data
=
data_layer
(
name
=
'data'
,
size
=
3200
)
spp
=
spp_layer
(
input
=
data
,
spp
=
spp_layer
(
input
=
data
,
pyramid_height
=
2
,
num_channels
=
16
,
pool_type
=
MaxPooling
(),
...
...
python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
浏览文件 @
58e1b3b3
from
paddle.trainer_config_helpers
import
*
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
settings
(
batch_size
=
1000
,
learning_rate
=
1e-4
)
probs
=
data_layer
(
name
=
'probs'
,
size
=
100
)
...
...
python/paddle/trainer_config_helpers/tests/configs/util_layers.py
浏览文件 @
58e1b3b3
...
...
@@ -7,9 +7,7 @@ b = data_layer(name='b', size=10)
result
=
addto_layer
(
input
=
[
a
,
b
])
concat1
=
concat_layer
(
input
=
[
a
,
b
])
concat2
=
concat_layer
(
input
=
[
identity_projection
(
input
=
a
),
identity_projection
(
input
=
b
)
])
concat2
=
concat_layer
(
input
=
[
identity_projection
(
input
=
a
),
identity_projection
(
input
=
b
)])
outputs
(
result
,
concat1
,
concat2
)
python/paddle/trainer_config_helpers/tests/layers_test_config.py
浏览文件 @
58e1b3b3
...
...
@@ -24,13 +24,17 @@ z = out_prod_layer(input1=x, input2=y)
x1
=
fc_layer
(
input
=
x
,
size
=
5
)
y1
=
fc_layer
(
input
=
y
,
size
=
5
)
z1
=
mixed_layer
(
act
=
LinearActivation
(),
input
=
[
conv_operator
(
img
=
x1
,
z1
=
mixed_layer
(
act
=
LinearActivation
(),
input
=
[
conv_operator
(
img
=
x1
,
filter
=
y1
,
filter_size
=
1
,
num_filters
=
5
,
num_channels
=
5
,
stride
=
1
)])
stride
=
1
)
])
assert
z1
.
size
>
0
...
...
@@ -41,7 +45,8 @@ cos3 = cos_sim(a=x1, b=y2, size=3)
linear_comb
=
linear_comb_layer
(
weights
=
x1
,
vectors
=
y2
,
size
=
3
)
out
=
fc_layer
(
input
=
[
cos1
,
cos3
,
linear_comb
,
z
,
z1
],
out
=
fc_layer
(
input
=
[
cos1
,
cos3
,
linear_comb
,
z
,
z1
],
size
=
num_classes
,
act
=
SoftmaxActivation
())
...
...
@@ -49,26 +54,27 @@ print_layer(input=[out])
outputs
(
classification_cost
(
out
,
data_layer
(
name
=
"label"
,
size
=
num_classes
)))
dotmul
=
mixed_layer
(
input
=
[
dotmul_operator
(
a
=
x1
,
b
=
x1
),
dotmul_projection
(
input
=
y1
)])
proj_with_attr_init
=
mixed_layer
(
input
=
full_matrix_projection
(
input
=
y1
,
param_attr
=
ParamAttr
(
learning_rate
=
0
,
initial_mean
=
0
,
initial_std
=
0
)),
bias_attr
=
ParamAttr
(
initial_mean
=
0
,
initial_std
=
0
,
learning_rate
=
0
),
act
=
LinearActivation
(),
size
=
5
,
dotmul
=
mixed_layer
(
input
=
[
dotmul_operator
(
a
=
x1
,
b
=
x1
),
dotmul_projection
(
input
=
y1
)])
proj_with_attr_init
=
mixed_layer
(
input
=
full_matrix_projection
(
input
=
y1
,
param_attr
=
ParamAttr
(
learning_rate
=
0
,
initial_mean
=
0
,
initial_std
=
0
)),
bias_attr
=
ParamAttr
(
initial_mean
=
0
,
initial_std
=
0
,
learning_rate
=
0
),
act
=
LinearActivation
(),
size
=
5
,
name
=
'proj_with_attr_init'
)
# for ctc
tmp
=
fc_layer
(
input
=
[
x1
,
dotmul
,
proj_with_attr_init
],
tmp
=
fc_layer
(
input
=
[
x1
,
dotmul
,
proj_with_attr_init
],
size
=
num_classes
+
1
,
act
=
SoftmaxActivation
())
ctc
=
ctc_layer
(
input
=
tmp
,
label
=
y
,
size
=
num_classes
+
1
)
ctc
=
ctc_layer
(
input
=
tmp
,
label
=
y
,
size
=
num_classes
+
1
)
ctc_eval
=
ctc_error_evaluator
(
input
=
tmp
,
label
=
y
)
settings
(
...
...
@@ -76,5 +82,4 @@ settings(
learning_rate
=
2e-3
,
learning_method
=
AdamOptimizer
(),
regularization
=
L2Regularization
(
8e-4
),
gradient_clipping_threshold
=
25
)
gradient_clipping_threshold
=
25
)
python/paddle/trainer_config_helpers/utils.py
浏览文件 @
58e1b3b3
...
...
@@ -23,8 +23,8 @@ def deprecated(instead):
@
functools
.
wraps
(
func
)
def
__wrapper__
(
*
args
,
**
kwargs
):
logger
.
warning
(
"The interface %s is deprecated, "
"will be removed soon. Please use %s instead."
%
(
func
.
__name__
,
instead
))
"will be removed soon. Please use %s instead."
%
(
func
.
__name__
,
instead
))
return
func
(
*
args
,
**
kwargs
)
...
...
python/paddle/utils/image_util.py
浏览文件 @
58e1b3b3
...
...
@@ -16,17 +16,20 @@ import numpy as np
from
PIL
import
Image
from
cStringIO
import
StringIO
def
resize_image
(
img
,
target_size
):
"""
Resize an image so that the shorter edge has length target_size.
img: the input image to be resized.
target_size: the target resized image size.
"""
percent
=
(
target_size
/
float
(
min
(
img
.
size
[
0
],
img
.
size
[
1
])))
resized_size
=
int
(
round
(
img
.
size
[
0
]
*
percent
)),
int
(
round
(
img
.
size
[
1
]
*
percent
))
percent
=
(
target_size
/
float
(
min
(
img
.
size
[
0
],
img
.
size
[
1
])))
resized_size
=
int
(
round
(
img
.
size
[
0
]
*
percent
)),
int
(
round
(
img
.
size
[
1
]
*
percent
))
img
=
img
.
resize
(
resized_size
,
Image
.
ANTIALIAS
)
return
img
def
flip
(
im
):
"""
Return the flipped image.
...
...
@@ -38,6 +41,7 @@ def flip(im):
else
:
return
im
[:,
::
-
1
]
def
crop_img
(
im
,
inner_size
,
color
=
True
,
test
=
True
):
"""
Return cropped image.
...
...
@@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True):
If True, crop the center of images.
"""
if
color
:
height
,
width
=
max
(
inner_size
,
im
.
shape
[
1
]),
max
(
inner_size
,
im
.
shape
[
2
])
height
,
width
=
max
(
inner_size
,
im
.
shape
[
1
]),
max
(
inner_size
,
im
.
shape
[
2
])
padded_im
=
np
.
zeros
((
3
,
height
,
width
))
startY
=
(
height
-
im
.
shape
[
1
])
/
2
startX
=
(
width
-
im
.
shape
[
2
])
/
2
endY
,
endX
=
startY
+
im
.
shape
[
1
],
startX
+
im
.
shape
[
2
]
padded_im
[:,
startY
:
endY
,
startX
:
endX
]
=
im
padded_im
[:,
startY
:
endY
,
startX
:
endX
]
=
im
else
:
im
=
im
.
astype
(
'float32'
)
height
,
width
=
max
(
inner_size
,
im
.
shape
[
0
]),
max
(
inner_size
,
im
.
shape
[
1
])
height
,
width
=
max
(
inner_size
,
im
.
shape
[
0
]),
max
(
inner_size
,
im
.
shape
[
1
])
padded_im
=
np
.
zeros
((
height
,
width
))
startY
=
(
height
-
im
.
shape
[
0
])
/
2
startX
=
(
width
-
im
.
shape
[
1
])
/
2
endY
,
endX
=
startY
+
im
.
shape
[
0
],
startX
+
im
.
shape
[
1
]
padded_im
[
startY
:
endY
,
startX
:
endX
]
=
im
padded_im
[
startY
:
endY
,
startX
:
endX
]
=
im
if
test
:
startY
=
(
height
-
inner_size
)
/
2
startX
=
(
width
-
inner_size
)
/
2
...
...
@@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True):
startX
=
np
.
random
.
randint
(
0
,
width
-
inner_size
+
1
)
endY
,
endX
=
startY
+
inner_size
,
startX
+
inner_size
if
color
:
pic
=
padded_im
[:,
startY
:
endY
,
startX
:
endX
]
pic
=
padded_im
[:,
startY
:
endY
,
startX
:
endX
]
else
:
pic
=
padded_im
[
startY
:
endY
,
startX
:
endX
]
pic
=
padded_im
[
startY
:
endY
,
startX
:
endX
]
if
(
not
test
)
and
(
np
.
random
.
randint
(
2
)
==
0
):
pic
=
flip
(
pic
)
return
pic
def
decode_jpeg
(
jpeg_string
):
np_array
=
np
.
array
(
Image
.
open
(
StringIO
(
jpeg_string
)))
if
len
(
np_array
.
shape
)
==
3
:
np_array
=
np
.
transpose
(
np_array
,
(
2
,
0
,
1
))
return
np_array
def
preprocess_img
(
im
,
img_mean
,
crop_size
,
is_train
,
color
=
True
):
"""
Does data augmentation for images.
...
...
@@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True):
pic
-=
img_mean
return
pic
.
flatten
()
def
load_meta
(
meta_path
,
mean_img_size
,
crop_size
,
color
=
True
):
"""
Return the loaded meta file.
...
...
@@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True):
mean
=
np
.
load
(
meta_path
)[
'data_mean'
]
border
=
(
mean_img_size
-
crop_size
)
/
2
if
color
:
assert
(
mean_img_size
*
mean_img_size
*
3
==
mean
.
shape
[
0
])
assert
(
mean_img_size
*
mean_img_size
*
3
==
mean
.
shape
[
0
])
mean
=
mean
.
reshape
(
3
,
mean_img_size
,
mean_img_size
)
mean
=
mean
[:,
border
:
border
+
crop_size
,
border
:
border
+
crop_size
].
astype
(
'float32'
)
mean
=
mean
[:,
border
:
border
+
crop_size
,
border
:
border
+
crop_size
].
astype
(
'float32'
)
else
:
assert
(
mean_img_size
*
mean_img_size
==
mean
.
shape
[
0
])
assert
(
mean_img_size
*
mean_img_size
==
mean
.
shape
[
0
])
mean
=
mean
.
reshape
(
mean_img_size
,
mean_img_size
)
mean
=
mean
[
border
:
border
+
crop_size
,
border
:
border
+
crop_size
].
astype
(
'float32'
)
mean
=
mean
[
border
:
border
+
crop_size
,
border
:
border
+
crop_size
].
astype
(
'float32'
)
return
mean
def
load_image
(
img_path
,
is_color
=
True
):
"""
Load image and return.
...
...
@@ -130,6 +140,7 @@ def load_image(img_path, is_color=True):
img
.
load
()
return
img
def
oversample
(
img
,
crop_dims
):
"""
image : iterable of (H x W x K) ndarrays
...
...
@@ -152,26 +163,29 @@ def oversample(img, crop_dims):
for
j
in
w_indices
:
crops_ix
[
curr
]
=
(
i
,
j
,
i
+
crop_dims
[
0
],
j
+
crop_dims
[
1
])
curr
+=
1
crops_ix
[
4
]
=
np
.
tile
(
im_center
,
(
1
,
2
))
+
np
.
concatenate
([
-
crop_dims
/
2.0
,
crop_dims
/
2.0
])
crops_ix
[
4
]
=
np
.
tile
(
im_center
,
(
1
,
2
))
+
np
.
concatenate
(
[
-
crop_dims
/
2.0
,
crop_dims
/
2.0
])
crops_ix
=
np
.
tile
(
crops_ix
,
(
2
,
1
))
# Extract crops
crops
=
np
.
empty
((
10
*
len
(
img
),
crop_dims
[
0
],
crop_dims
[
1
],
im_shape
[
-
1
]),
dtype
=
np
.
float32
)
crops
=
np
.
empty
(
(
10
*
len
(
img
),
crop_dims
[
0
],
crop_dims
[
1
],
im_shape
[
-
1
]),
dtype
=
np
.
float32
)
ix
=
0
for
im
in
img
:
for
crop
in
crops_ix
:
crops
[
ix
]
=
im
[
crop
[
0
]:
crop
[
2
],
crop
[
1
]:
crop
[
3
],
:]
ix
+=
1
crops
[
ix
-
5
:
ix
]
=
crops
[
ix
-
5
:
ix
,
:,
::
-
1
,
:]
# flip for mirrors
crops
[
ix
-
5
:
ix
]
=
crops
[
ix
-
5
:
ix
,
:,
::
-
1
,
:]
# flip for mirrors
return
crops
class
ImageTransformer
:
def
__init__
(
self
,
transpose
=
None
,
channel_swap
=
None
,
mean
=
None
,
is_color
=
True
):
def
__init__
(
self
,
transpose
=
None
,
channel_swap
=
None
,
mean
=
None
,
is_color
=
True
):
self
.
transpose
=
transpose
self
.
channel_swap
=
None
self
.
mean
=
None
...
...
python/paddle/utils/make_model_diagram.py
浏览文件 @
58e1b3b3
...
...
@@ -15,7 +15,6 @@
# Generate dot diagram file for the given paddle model config
# The generated file can be viewed using Graphviz (http://graphviz.org)
import
sys
import
traceback
...
...
@@ -46,16 +45,16 @@ def make_diagram(config_file, dot_file, config_arg_str):
submodel_layers
=
set
()
def
make_link
(
link
):
return
'l%s -> l%s;'
%
(
name2id
[
link
.
layer_name
],
name2id
[
link
.
link_name
])
return
'l%s -> l%s;'
%
(
name2id
[
link
.
layer_name
],
name2id
[
link
.
link_name
])
def
make_mem
(
mem
):
s
=
''
if
mem
.
boot_layer_name
:
s
+=
'l%s -> l%s;
\n
'
%
(
name2id
[
mem
.
boot_layer_name
],
name2id
[
mem
.
layer_name
])
s
+=
'l%s -> l%s [style=dashed];'
%
(
name2id
[
mem
.
layer_name
],
name2id
[
mem
.
link_name
])
s
+=
'l%s -> l%s;
\n
'
%
(
name2id
[
mem
.
boot_layer_name
],
name2id
[
mem
.
layer_name
])
s
+=
'l%s -> l%s [style=dashed];'
%
(
name2id
[
mem
.
layer_name
],
name2id
[
mem
.
link_name
])
return
s
print
>>
f
,
'digraph graphname {'
...
...
@@ -110,8 +109,8 @@ def make_diagram(config_file, dot_file, config_arg_str):
def
usage
():
print
>>
sys
.
stderr
,
(
"Usage: python show_model_diagram.py"
+
" CONFIG_FILE DOT_FILE [config_str]"
)
print
>>
sys
.
stderr
,
(
"Usage: python show_model_diagram.py"
+
" CONFIG_FILE DOT_FILE [config_str]"
)
exit
(
1
)
...
...
python/paddle/utils/plotcurve.py
浏览文件 @
58e1b3b3
...
...
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Plot training and testing curve from paddle log.
It takes input from a file or stdin, and output to a file or stdout.
...
...
@@ -59,8 +58,8 @@ import re
import
os
def
plot_paddle_curve
(
keys
,
inputfile
,
outputfile
,
format
=
'png'
,
show_fig
=
False
):
def
plot_paddle_curve
(
keys
,
inputfile
,
outputfile
,
format
=
'png'
,
show_fig
=
False
):
"""Plot curves from paddle log and save to outputfile.
:param keys: a list of strings to be plotted, e.g. AvgCost
...
...
@@ -93,10 +92,15 @@ def plot_paddle_curve(keys, inputfile, outputfile,
return
m
=
len
(
keys
)
+
1
for
i
in
xrange
(
1
,
m
):
pyplot
.
plot
(
x
[:,
0
],
x
[:,
i
],
color
=
cm
.
jet
(
1.0
*
(
i
-
1
)
/
(
2
*
m
)),
pyplot
.
plot
(
x
[:,
0
],
x
[:,
i
],
color
=
cm
.
jet
(
1.0
*
(
i
-
1
)
/
(
2
*
m
)),
label
=
keys
[
i
-
1
])
if
(
x_test
.
shape
[
0
]
>
0
):
pyplot
.
plot
(
x
[:,
0
],
x_test
[:,
i
],
pyplot
.
plot
(
x
[:,
0
],
x_test
[:,
i
],
color
=
cm
.
jet
(
1.0
-
1.0
*
(
i
-
1
)
/
(
2
*
m
)),
label
=
"Test "
+
keys
[
i
-
1
])
pyplot
.
xlabel
(
'number of epoch'
)
...
...
@@ -111,11 +115,19 @@ def main(argv):
"""
main method of plotting curves.
"""
cmdparser
=
argparse
.
ArgumentParser
(
"Plot training and testing curves from paddle log file."
)
cmdparser
.
add_argument
(
'key'
,
nargs
=
'*'
,
help
=
'keys of scores to plot, the default is AvgCost'
)
cmdparser
.
add_argument
(
'-i'
,
'--input'
,
help
=
'input filename of paddle log, '
cmdparser
=
argparse
.
ArgumentParser
(
"Plot training and testing curves from paddle log file."
)
cmdparser
.
add_argument
(
'key'
,
nargs
=
'*'
,
help
=
'keys of scores to plot, the default is AvgCost'
)
cmdparser
.
add_argument
(
'-i'
,
'--input'
,
help
=
'input filename of paddle log, '
'default will be standard input'
)
cmdparser
.
add_argument
(
'-o'
,
'--output'
,
help
=
'output filename of figure, '
cmdparser
.
add_argument
(
'-o'
,
'--output'
,
help
=
'output filename of figure, '
'default will be standard output'
)
cmdparser
.
add_argument
(
'--format'
,
help
=
'figure format(png|pdf|ps|eps|svg)'
)
args
=
cmdparser
.
parse_args
(
argv
)
...
...
python/paddle/utils/predefined_net.py
浏览文件 @
58e1b3b3
...
...
@@ -41,9 +41,8 @@ def image_data(data_dir,
the size of the mean image, the number of classes.
async_load_data: whether to load image data asynchronuously.
"""
data_creator
=
ImageClassificationDatasetCreater
(
data_dir
,
processed_image_size
,
color
)
data_creator
=
ImageClassificationDatasetCreater
(
data_dir
,
processed_image_size
,
color
)
batch_data_dir
=
data_dir
train_list
=
os
.
path
.
join
(
batch_data_dir
,
train_list
)
test_list
=
os
.
path
.
join
(
batch_data_dir
,
test_list
)
...
...
@@ -64,13 +63,17 @@ def image_data(data_dir,
'color'
:
color_string
}
define_py_data_sources2
(
train_list
,
test_list
,
define_py_data_sources2
(
train_list
,
test_list
,
module
=
'image_provider'
,
obj
=
'processData'
,
args
=
args
)
return
{
"image_size"
:
image_size
,
return
{
"image_size"
:
image_size
,
"num_classes"
:
num_classes
,
"is_color"
:
is_color
}
"is_color"
:
is_color
}
def
get_extra_layer_attr
(
drop_rate
):
...
...
@@ -80,8 +83,8 @@ def get_extra_layer_attr(drop_rate):
return
ExtraLayerAttribute
(
drop_rate
=
drop_rate
)
def
image_data_layers
(
image_size
,
num_classes
,
is_
color
=
False
,
is_
predict
=
False
):
def
image_data_layers
(
image_size
,
num_classes
,
is_color
=
False
,
is_predict
=
False
):
"""
Data layers for image classification.
image_size: image size.
...
...
@@ -109,14 +112,16 @@ def simple_conv_net(data_conf, is_color=False):
num_classes: num of classes.
is_color: whether the input images are color.
"""
for
k
,
v
in
data_conf
.
iteritems
():
globals
()[
k
]
=
v
for
k
,
v
in
data_conf
.
iteritems
():
globals
()[
k
]
=
v
data_input
,
label_input
,
num_image_channels
=
\
image_data_layers
(
image_size
,
num_classes
,
is_color
,
is_predict
)
filter_sizes
=
[
5
,
5
]
num_channels
=
[
32
,
64
]
strides
=
[
1
,
1
]
fc_dims
=
[
500
]
conv_bn_pool1
=
img_conv_bn_pool
(
name
=
"g1"
,
conv_bn_pool1
=
img_conv_bn_pool
(
name
=
"g1"
,
input
=
data_input
,
filter_size
=
filter_sizes
[
0
],
num_channel
=
num_image_channels
,
...
...
@@ -126,7 +131,8 @@ def simple_conv_net(data_conf, is_color=False):
pool_size
=
3
,
pool_stride
=
2
,
act
=
ReluActivation
())
conv_bn_pool2
=
img_conv_bn_pool
(
name
=
"g2"
,
conv_bn_pool2
=
img_conv_bn_pool
(
name
=
"g2"
,
input
=
conv_bn_pool1
,
filter_size
=
filter_sizes
[
1
],
num_channel
=
num_channels
[
0
],
...
...
@@ -136,29 +142,28 @@ def simple_conv_net(data_conf, is_color=False):
pool_size
=
3
,
pool_stride
=
2
,
act
=
ReluActivation
())
fc3
=
fc_layer
(
name
=
"fc3"
,
input
=
conv_bn_pool2
,
dim
=
fc_dims
[
0
],
act
=
ReluActivation
())
fc3_dropped
=
dropout_layer
(
name
=
"fc3_dropped"
,
input
=
fc3
,
dropout_rate
=
0.5
)
output
=
fc_layer
(
name
=
"output"
,
fc3
=
fc_layer
(
name
=
"fc3"
,
input
=
conv_bn_pool2
,
dim
=
fc_dims
[
0
],
act
=
ReluActivation
())
fc3_dropped
=
dropout_layer
(
name
=
"fc3_dropped"
,
input
=
fc3
,
dropout_rate
=
0.5
)
output
=
fc_layer
(
name
=
"output"
,
input
=
fc3_dropped
,
dim
=
fc_dims
[
0
],
act
=
SoftmaxActivation
())
if
is_predict
:
end_of_network
(
output
)
else
:
cost
=
classify
(
name
=
"cost"
,
input
=
output
,
label
=
label_input
)
cost
=
classify
(
name
=
"cost"
,
input
=
output
,
label
=
label_input
)
end_of_network
(
cost
)
def
conv_layer_group
(
prefix_num
,
num_layers
,
input
,
input_channels
,
output_channels
,
drop_rates
=
[],
strides
=
[],
def
conv_layer_group
(
prefix_num
,
num_layers
,
input
,
input_channels
,
output_channels
,
drop_rates
=
[],
strides
=
[],
with_bn
=
[]):
"""
A set of convolution layers, and batch normalization layers,
...
...
@@ -190,7 +195,8 @@ def conv_layer_group(prefix_num, num_layers, input,
i_conv_in
=
group_output
i_channels_conv
=
input_channels
if
i
==
1
else
output_channels
conv_act
=
LinearActivation
()
if
with_bn
[
i
-
1
]
else
ReluActivation
()
conv_output
=
img_conv_layer
(
name
=
"conv%d_%d"
%
(
prefix_num
,
i
),
conv_output
=
img_conv_layer
(
name
=
"conv%d_%d"
%
(
prefix_num
,
i
),
input
=
i_conv_in
,
filter_size
=
3
,
num_channels
=
i_channels_conv
,
...
...
@@ -199,16 +205,17 @@ def conv_layer_group(prefix_num, num_layers, input,
padding
=
1
,
act
=
conv_act
)
if
with_bn
[
i
-
1
]:
bn
=
batch_norm_layer
(
name
=
"conv%d_%d_bn"
%
(
prefix_num
,
i
),
bn
=
batch_norm_layer
(
name
=
"conv%d_%d_bn"
%
(
prefix_num
,
i
),
input
=
conv_output
,
num_channels
=
output_channels
,
act
=
ReluActivation
(),
layer_attr
=
get_extra_layer_attr
(
drop_rate
=
drop_rates
[
i
-
1
]))
layer_attr
=
get_extra_layer_attr
(
drop_rate
=
drop_rates
[
i
-
1
]))
group_output
=
bn
else
:
group_output
=
conv_output
pool
=
img_pool_layer
(
name
=
"pool%d"
%
prefix_num
,
pool
=
img_pool_layer
(
name
=
"pool%d"
%
prefix_num
,
input
=
group_output
,
pool_size
=
2
,
num_channels
=
output_channels
,
...
...
@@ -216,10 +223,17 @@ def conv_layer_group(prefix_num, num_layers, input,
return
pool
def
vgg_conv_net
(
image_size
,
num_classes
,
num_layers
,
channels
,
strides
,
with_bn
,
fc_dims
,
drop_rates
,
drop_rates_fc
=
[],
is_color
=
True
,
is_predict
=
False
):
def
vgg_conv_net
(
image_size
,
num_classes
,
num_layers
,
channels
,
strides
,
with_bn
,
fc_dims
,
drop_rates
,
drop_rates_fc
=
[],
is_color
=
True
,
is_predict
=
False
):
"""
A Wrapper for a VGG network for image classification.
It is a set of convolutional groups followed by several fully
...
...
@@ -248,7 +262,8 @@ def vgg_conv_net(image_size, num_classes, num_layers,
for
i
in
range
(
len
(
num_layers
)):
input_layer
=
data_input
if
i
==
0
else
group_output
input_channels
=
3
if
i
==
0
else
channels
[
i
-
1
]
group_output
=
conv_layer_group
(
prefix_num
=
i
+
1
,
group_output
=
conv_layer_group
(
prefix_num
=
i
+
1
,
num_layers
=
num_layers
[
i
],
input
=
input_layer
,
input_channels
=
input_channels
,
...
...
@@ -259,7 +274,8 @@ def vgg_conv_net(image_size, num_classes, num_layers,
conv_output_name
=
group_output
if
drop_rates_fc
[
0
]
!=
0.0
:
dropped_pool_name
=
"pool_dropped"
conv_output_name
=
dropout_layer
(
name
=
dropped_pool_name
,
conv_output_name
=
dropout_layer
(
name
=
dropped_pool_name
,
input
=
conv_output_name
,
dropout_rate
=
drop_rates_fc
[
0
])
for
i
in
range
(
len
(
fc_dims
)):
...
...
@@ -267,32 +283,28 @@ def vgg_conv_net(image_size, num_classes, num_layers,
active_type
=
LinearActivation
()
if
i
==
len
(
fc_dims
)
-
1
else
ReluActivation
()
drop_rate
=
0.0
if
i
==
len
(
fc_dims
)
-
1
else
drop_rates_fc
[
i
+
1
]
fc_output
=
fc_layer
(
name
=
"fc%d"
%
(
i
+
1
),
fc_output
=
fc_layer
(
name
=
"fc%d"
%
(
i
+
1
),
input
=
input_layer_name
,
size
=
fc_dims
[
i
],
act
=
active_type
,
layer_attr
=
get_extra_layer_attr
(
drop_rate
))
bn
=
batch_norm_layer
(
name
=
"fc_bn"
,
bn
=
batch_norm_layer
(
name
=
"fc_bn"
,
input
=
fc_output
,
num_channels
=
fc_dims
[
len
(
fc_dims
)
-
1
],
act
=
ReluActivation
(),
layer_attr
=
get_extra_layer_attr
(
drop_rate
=
drop_rates_fc
[
-
1
]))
output
=
fc_layer
(
name
=
"output"
,
input
=
bn
,
size
=
num_classes
,
act
=
SoftmaxActivation
())
layer_attr
=
get_extra_layer_attr
(
drop_rate
=
drop_rates_fc
[
-
1
]))
output
=
fc_layer
(
name
=
"output"
,
input
=
bn
,
size
=
num_classes
,
act
=
SoftmaxActivation
())
if
is_predict
:
outputs
(
output
)
else
:
cost
=
classification_cost
(
name
=
"cost"
,
input
=
output
,
label
=
label_input
)
cost
=
classification_cost
(
name
=
"cost"
,
input
=
output
,
label
=
label_input
)
outputs
(
cost
)
def
vgg16_conv_net
(
image_size
,
num_classes
,
is_color
=
True
,
is_predict
=
False
):
def
vgg16_conv_net
(
image_size
,
num_classes
,
is_color
=
True
,
is_predict
=
False
):
"""
A Wrapper for a 16 layers VGG network for image classification.
The detailed architecture of the paper can be found here:
...
...
@@ -314,8 +326,7 @@ def vgg16_conv_net(image_size, num_classes,
is_predict
=
is_predict
)
def
small_vgg
(
data_conf
,
is_predict
=
False
):
def
small_vgg
(
data_conf
,
is_predict
=
False
):
"""
A Wrapper for a small VGG network for CIFAR-10 image classification.
The detailed architecture of the paper can be found here:
...
...
@@ -329,7 +340,8 @@ def small_vgg(data_conf,
num_classes: num of classes.
is_color: whether the input images are color.
"""
for
k
,
v
in
data_conf
.
iteritems
():
globals
()[
k
]
=
v
for
k
,
v
in
data_conf
.
iteritems
():
globals
()[
k
]
=
v
vgg_conv_net
(
image_size
,
num_classes
,
num_layers
=
[
2
,
2
,
3
,
3
],
channels
=
[
64
,
128
,
256
,
512
],
...
...
@@ -343,8 +355,11 @@ def small_vgg(data_conf,
is_predict
=
is_predict
)
def
training_settings
(
learning_rate
=
0.1
,
batch_size
=
128
,
algorithm
=
"sgd"
,
momentum
=
0.9
,
decay_rate
=
0.001
):
def
training_settings
(
learning_rate
=
0.1
,
batch_size
=
128
,
algorithm
=
"sgd"
,
momentum
=
0.9
,
decay_rate
=
0.001
):
"""
Training settings.
learning_rate: learning rate of the training.
...
...
@@ -357,7 +372,8 @@ def training_settings(learning_rate=0.1, batch_size=128, algorithm="sgd",
momentum: momentum of the training algorithm.
decay_rate: weight decay rate.
"""
Settings
(
algorithm
=
algorithm
,
Settings
(
algorithm
=
algorithm
,
batch_size
=
batch_size
,
learning_rate
=
learning_rate
/
float
(
batch_size
))
default_momentum
(
momentum
)
...
...
python/paddle/utils/preprocess_img.py
浏览文件 @
58e1b3b3
...
...
@@ -28,16 +28,18 @@ def resize_image(img, target_size):
img: the input image to be resized.
target_size: the target resized image size.
"""
percent
=
(
target_size
/
float
(
min
(
img
.
size
[
0
],
img
.
size
[
1
])))
percent
=
(
target_size
/
float
(
min
(
img
.
size
[
0
],
img
.
size
[
1
])))
resized_size
=
int
(
round
(
img
.
size
[
0
]
*
percent
)),
\
int
(
round
(
img
.
size
[
1
]
*
percent
))
img
=
img
.
resize
(
resized_size
,
Image
.
ANTIALIAS
)
return
img
class
DiskImage
:
"""
A class of image data on disk.
"""
def
__init__
(
self
,
path
,
target_size
):
"""
path: path of the image.
...
...
@@ -77,6 +79,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
"""
A class to process data for image classification.
"""
def
__init__
(
self
,
data_path
,
target_size
,
color
=
True
):
"""
data_path: the path to store the training data and batches.
...
...
@@ -95,8 +98,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
The meta file contains the meam image, as well as some configs.
data: the training Dataaet.
"""
output_path
=
os
.
path
.
join
(
self
.
data_path
,
self
.
batch_dir_name
,
output_path
=
os
.
path
.
join
(
self
.
data_path
,
self
.
batch_dir_name
,
self
.
meta_filename
)
if
self
.
color
:
mean_img
=
np
.
zeros
((
3
,
self
.
target_size
,
self
.
target_size
))
...
...
@@ -108,12 +110,13 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
mean_img
+=
cropped_img
mean_img
/=
len
(
data
.
data
)
mean_img
=
mean_img
.
astype
(
'int32'
).
flatten
()
preprocess_util
.
save_file
({
"data_mean"
:
mean_img
,
preprocess_util
.
save_file
({
"data_mean"
:
mean_img
,
"image_size"
:
self
.
target_size
,
"mean_image_size"
:
self
.
target_size
,
"num_classes"
:
self
.
num_classes
,
"color"
:
self
.
color
},
output_path
)
"color"
:
self
.
color
},
output_path
)
pass
def
create_dataset_from_list
(
self
,
path
):
...
...
@@ -125,12 +128,11 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
label_name
=
items
[
1
]
if
not
label_name
in
label_set
:
label_set
[
label_name
]
=
len
(
label_set
.
keys
())
img
=
DiskImage
(
path
=
image_path
,
target_size
=
self
.
target_size
)
label
=
preprocess_util
.
Lablel
(
label
=
label_set
[
label_name
],
name
=
label_name
)
img
=
DiskImage
(
path
=
image_path
,
target_size
=
self
.
target_size
)
label
=
preprocess_util
.
Lablel
(
label
=
label_set
[
label_name
],
name
=
label_name
)
return
preprocess_util
.
Dataset
(
data
,
self
.
keys
),
label_set
def
create_dataset_from_dir
(
self
,
path
):
"""
Create a Dataset object for image classfication.
...
...
@@ -143,11 +145,12 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
label_set
=
preprocess_util
.
get_label_set_from_dir
(
path
)
data
=
[]
for
l_name
in
label_set
.
keys
():
image_paths
=
preprocess_util
.
list_images
(
os
.
path
.
join
(
path
,
l_name
))
image_paths
=
preprocess_util
.
list_images
(
os
.
path
.
join
(
path
,
l_name
))
for
p
in
image_paths
:
img
=
DiskImage
(
path
=
p
,
target_size
=
self
.
target_size
)
label
=
preprocess_util
.
Label
(
label
=
label_set
[
l_name
],
name
=
l_name
)
img
=
DiskImage
(
path
=
p
,
target_size
=
self
.
target_size
)
label
=
preprocess_util
.
Label
(
label
=
label_set
[
l_name
],
name
=
l_name
)
data
.
append
((
img
,
label
))
random
.
shuffle
(
data
)
return
preprocess_util
.
Dataset
(
data
,
self
.
keys
),
label_set
python/paddle/utils/preprocess_util.py
浏览文件 @
58e1b3b3
...
...
@@ -18,6 +18,7 @@ import cPickle as pickle
import
random
import
collections
def
save_file
(
data
,
filename
):
"""
Save data into pickle format.
...
...
@@ -26,6 +27,7 @@ def save_file(data, filename):
"""
pickle
.
dump
(
data
,
open
(
filename
,
'wb'
),
protocol
=
pickle
.
HIGHEST_PROTOCOL
)
def
save_list
(
l
,
outfile
):
"""
Save a list of string into a text file. There is one line for each string.
...
...
@@ -42,15 +44,20 @@ def exclude_pattern(f):
"""
return
f
.
startswith
(
"."
)
or
f
.
endswith
(
"~"
)
def
list_dirs
(
path
):
"""
Return a list of directories in path. Exclude all the directories that
start with '.'.
path: the base directory to search over.
"""
return
[
os
.
path
.
join
(
path
,
d
)
for
d
in
next
(
os
.
walk
(
path
))[
1
]
if
not
exclude_pattern
(
d
)]
return
[
os
.
path
.
join
(
path
,
d
)
for
d
in
next
(
os
.
walk
(
path
))[
1
]
if
not
exclude_pattern
(
d
)
]
def
list_images
(
path
,
exts
=
set
([
"jpg"
,
"png"
,
"bmp"
,
"jpeg"
])):
def
list_images
(
path
,
exts
=
set
([
"jpg"
,
"png"
,
"bmp"
,
"jpeg"
])):
"""
Return a list of images in path.
path: the base directory to search over.
...
...
@@ -60,6 +67,7 @@ def list_images(path, exts = set(["jpg", "png", "bmp", "jpeg"])):
if
os
.
path
.
isfile
(
os
.
path
.
join
(
path
,
d
))
and
not
exclude_pattern
(
d
)
\
and
os
.
path
.
splitext
(
d
)[
-
1
][
1
:]
in
exts
]
def
list_files
(
path
):
"""
Return a list of files in path.
...
...
@@ -69,6 +77,7 @@ def list_files(path):
return
[
os
.
path
.
join
(
path
,
d
)
for
d
in
os
.
listdir
(
path
)
\
if
os
.
path
.
isfile
(
os
.
path
.
join
(
path
,
d
))
and
not
exclude_pattern
(
d
)]
def
get_label_set_from_dir
(
path
):
"""
Return a dictionary of the labels and label ids from a path.
...
...
@@ -84,6 +93,7 @@ class Label:
"""
A class of label data.
"""
def
__init__
(
self
,
label
,
name
):
"""
label: the id of the label.
...
...
@@ -101,6 +111,7 @@ class Label:
def
__hash__
(
self
):
return
hash
((
self
.
label
))
class
Dataset
:
"""
A class to represent a dataset. A dataset contains a set of items.
...
...
@@ -108,6 +119,7 @@ class Dataset:
For example: in image classification dataset, each item contains two slot,
The first slot is an image, and the second slot is a label.
"""
def
__init__
(
self
,
data
,
keys
):
"""
data: a list of data.
...
...
@@ -120,7 +132,7 @@ class Dataset:
def
check_valid
(
self
):
for
d
in
self
.
data
:
assert
(
len
(
d
)
==
len
(
self
.
keys
))
assert
(
len
(
d
)
==
len
(
self
.
keys
))
def
permute
(
self
,
key_id
,
num_per_batch
):
"""
...
...
@@ -167,7 +179,8 @@ class Dataset:
while
len
(
permuted_data
)
<
len
(
self
.
data
):
for
k
in
keyvalue_indices
:
begin_idx
=
keyvalue_readpointer
[
k
]
end_idx
=
int
(
min
(
begin_idx
+
num_data_per_key_batch
,
end_idx
=
int
(
min
(
begin_idx
+
num_data_per_key_batch
,
len
(
keyvalue_indices
[
k
])))
print
"begin_idx, end_idx"
print
begin_idx
,
end_idx
...
...
@@ -177,12 +190,12 @@ class Dataset:
self
.
data
=
permuted_data
class
DataBatcher
:
"""
A class that is used to create batches for both training and testing
datasets.
"""
def
__init__
(
self
,
train_data
,
test_data
,
label_set
):
"""
train_data, test_data: Each one is a dataset object repesenting
...
...
@@ -193,7 +206,7 @@ class DataBatcher:
self
.
test_data
=
test_data
self
.
label_set
=
label_set
self
.
num_per_batch
=
5000
assert
(
self
.
train_data
.
keys
==
self
.
test_data
.
keys
)
assert
(
self
.
train_data
.
keys
==
self
.
test_data
.
keys
)
def
create_batches_and_list
(
self
,
output_path
,
train_list_name
,
test_list_name
,
label_set_name
):
...
...
@@ -202,16 +215,19 @@ class DataBatcher:
It also create train.list and test.list to indicate the list
of the batch files for training and testing data, respectively.
"""
train_list
=
self
.
create_batches
(
self
.
train_data
,
output_path
,
"train_"
,
self
.
num_per_batch
)
train_list
=
self
.
create_batches
(
self
.
train_data
,
output_path
,
"train_"
,
self
.
num_per_batch
)
test_list
=
self
.
create_batches
(
self
.
test_data
,
output_path
,
"test_"
,
self
.
num_per_batch
)
save_list
(
train_list
,
os
.
path
.
join
(
output_path
,
train_list_name
))
save_list
(
test_list
,
os
.
path
.
join
(
output_path
,
test_list_name
))
save_file
(
self
.
label_set
,
os
.
path
.
join
(
output_path
,
label_set_name
))
def
create_batches
(
self
,
data
,
output_path
,
prefix
=
""
,
num_data_per_batch
=
5000
):
def
create_batches
(
self
,
data
,
output_path
,
prefix
=
""
,
num_data_per_batch
=
5000
):
"""
Create batches for a Dataset object.
data: the Dataset object to process.
...
...
@@ -244,6 +260,7 @@ class DatasetCreater(object):
- create_dataset()
- create_meta_file()
"""
def
__init__
(
self
,
data_path
):
"""
data_path: the path to store the training data and batches.
...
...
@@ -324,23 +341,21 @@ class DatasetCreater(object):
out_path
=
os
.
path
.
join
(
self
.
data_path
,
self
.
batch_dir_name
)
if
not
os
.
path
.
exists
(
out_path
):
os
.
makedirs
(
out_path
)
if
(
self
.
overwrite
or
not
os
.
path
.
exists
(
os
.
path
.
join
(
out_path
,
self
.
train_list_name
))):
if
(
self
.
overwrite
or
not
os
.
path
.
exists
(
os
.
path
.
join
(
out_path
,
self
.
train_list_name
))):
train_data
,
train_label_set
=
\
self
.
create_dataset
(
train_path
)
test_data
,
test_label_set
=
\
self
.
create_dataset
(
test_path
)
train_data
.
permute
(
self
.
keys
.
index
(
self
.
permutate_key
),
self
.
num_per_batch
)
train_data
.
permute
(
self
.
keys
.
index
(
self
.
permutate_key
),
self
.
num_per_batch
)
assert
(
train_label_set
==
test_label_set
)
data_batcher
=
DataBatcher
(
train_data
,
test_data
,
train_label_set
)
assert
(
train_label_set
==
test_label_set
)
data_batcher
=
DataBatcher
(
train_data
,
test_data
,
train_label_set
)
data_batcher
.
num_per_batch
=
self
.
num_per_batch
data_batcher
.
create_batches_and_list
(
self
.
output_path
,
self
.
train_list_name
,
self
.
test_list_name
,
data_batcher
.
create_batches_and_list
(
self
.
output_path
,
self
.
train_list_name
,
self
.
test_list_name
,
self
.
label_set_name
)
self
.
num_classes
=
len
(
train_label_set
.
keys
())
self
.
create_meta_file
(
train_data
)
...
...
python/paddle/utils/show_pb.py
浏览文件 @
58e1b3b3
...
...
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Show the content of proto buffer data file of PADDLE
"""
...
...
@@ -21,6 +20,7 @@ import sys
from
google.protobuf.internal.decoder
import
_DecodeVarint
import
paddle.proto.DataFormat_pb2
as
DataFormat
def
read_proto
(
file
,
message
):
"""
read a protobuffer struct from file, the length of the struct is stored as
...
...
@@ -39,7 +39,7 @@ def read_proto(file, message):
def
usage
():
print
>>
sys
.
stderr
,
"Usage: python show_pb.py PROTO_DATA_FILE"
print
>>
sys
.
stderr
,
"Usage: python show_pb.py PROTO_DATA_FILE"
exit
(
1
)
...
...
@@ -55,6 +55,3 @@ if __name__ == '__main__':
sample
=
DataFormat
.
DataSample
()
while
read_proto
(
f
,
sample
):
print
sample
python/paddle/utils/torch2paddle.py
浏览文件 @
58e1b3b3
...
...
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Convert torch parameter file to paddle model files.
...
...
@@ -28,10 +27,11 @@ import torchfile
import
cPickle
as
pickle
import
argparse
# save parameters
def
save_layer_parameters
(
outfile
,
feats
):
version
=
0
value_size
=
4
;
value_size
=
4
ret
=
""
for
feat
in
feats
:
ret
+=
feat
.
tostring
()
...
...
@@ -41,16 +41,18 @@ def save_layer_parameters(outfile, feats):
fo
.
write
(
ret
)
fo
.
close
()
def
save_net_parameters
(
layers
,
params
,
output_path
):
for
i
in
range
(
len
(
layers
)):
weight
=
params
[
i
*
2
]
biases
=
params
[
i
*
2
+
1
]
weight
=
params
[
i
*
2
]
biases
=
params
[
i
*
2
+
1
]
weight_file
=
os
.
path
.
join
(
output_path
,
'_%s.w0'
%
layers
[
i
])
biases_file
=
os
.
path
.
join
(
output_path
,
'_%s.wbias'
%
layers
[
i
])
print
"Saving for layer %s."
%
layers
[
i
]
save_layer_parameters
(
weight_file
,
[
weight
])
save_layer_parameters
(
biases_file
,
biases
)
def
load_layer_parameters
(
filename
):
fn
=
open
(
filename
,
'rb'
)
version
,
=
struct
.
unpack
(
'i'
,
fn
.
read
(
4
))
...
...
@@ -60,16 +62,20 @@ def load_layer_parameters(filename):
value
=
np
.
fromfile
(
fn
,
dtype
)
return
value
def
main
(
argv
):
"""
main method of converting torch to paddle files.
:param argv:
:return:
"""
cmdparser
=
argparse
.
ArgumentParser
(
"Convert torch parameter file to paddle model files."
)
cmdparser
.
add_argument
(
'-i'
,
'--input'
,
help
=
'input filename of torch parameters'
)
cmdparser
=
argparse
.
ArgumentParser
(
"Convert torch parameter file to paddle model files."
)
cmdparser
.
add_argument
(
'-i'
,
'--input'
,
help
=
'input filename of torch parameters'
)
cmdparser
.
add_argument
(
'-l'
,
'--layers'
,
help
=
'list of layer names'
)
cmdparser
.
add_argument
(
'-o'
,
'--output'
,
help
=
'output file path of paddle model'
)
cmdparser
.
add_argument
(
'-o'
,
'--output'
,
help
=
'output file path of paddle model'
)
args
=
cmdparser
.
parse_args
(
argv
)
if
args
.
input
and
args
.
layers
and
args
.
output
:
...
...
@@ -77,7 +83,10 @@ def main(argv):
layers
=
[
line
.
strip
()
for
line
in
open
(
args
.
layers
,
'r'
)]
save_net_parameters
(
layers
,
params
,
args
.
output
)
else
:
print
(
'Usage: python torch2paddle.py -i torchfile.t7 -l layers.txt -o path/to/paddle_model'
)
print
(
'Usage: python torch2paddle.py -i torchfile.t7 -l layers.txt -o path/to/paddle_model'
)
if
__name__
==
"__main__"
:
main
(
sys
.
argv
[
1
:])
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录