提交 58e1b3b3 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #446 from QiJune/format_py_code_2nd

format python code in python directory
...@@ -11,4 +11,3 @@ ...@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
...@@ -18,9 +18,8 @@ import collections ...@@ -18,9 +18,8 @@ import collections
import functools import functools
import itertools import itertools
logging.basicConfig( logging.basicConfig(format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]" " %(message)s")
" %(message)s")
class SequenceType(object): class SequenceType(object):
...@@ -132,8 +131,10 @@ class InputOrderWrapper(object): ...@@ -132,8 +131,10 @@ class InputOrderWrapper(object):
def __call__(self, obj, filename): def __call__(self, obj, filename):
for item in self.generator(obj, filename): for item in self.generator(obj, filename):
if isinstance(item, dict): if isinstance(item, dict):
yield [item.get(input_name, None) for input_name in yield [
self.input_order] item.get(input_name, None)
for input_name in self.input_order
]
else: else:
yield item yield item
...@@ -162,8 +163,8 @@ class CheckWrapper(object): ...@@ -162,8 +163,8 @@ class CheckWrapper(object):
yield items yield items
except AssertionError as e: except AssertionError as e:
self.logger.warning( self.logger.warning(
"Item (%s) is not fit the input type with error %s" "Item (%s) is not fit the input type with error %s" %
% (repr(item), repr(e))) (repr(item), repr(e)))
if self.check_fail_continue: if self.check_fail_continue:
continue continue
...@@ -202,13 +203,17 @@ class CheckWrapper(object): ...@@ -202,13 +203,17 @@ class CheckWrapper(object):
callback(each) callback(each)
def provider(input_types=None, should_shuffle=None, pool_size=-1, def provider(input_types=None,
should_shuffle=None,
pool_size=-1,
min_pool_size=-1, min_pool_size=-1,
can_over_batch_size=True, can_over_batch_size=True,
calc_batch_size=None, calc_batch_size=None,
cache=CacheType.NO_CACHE, cache=CacheType.NO_CACHE,
check=False, check_fail_continue=False, check=False,
init_hook=None, **kwargs): check_fail_continue=False,
init_hook=None,
**kwargs):
""" """
Provider decorator. Use it to make a function into PyDataProvider2 object. Provider decorator. Use it to make a function into PyDataProvider2 object.
In this function, user only need to get each sample for some train/test In this function, user only need to get each sample for some train/test
...@@ -318,9 +323,9 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1, ...@@ -318,9 +323,9 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
"Could not recognize should_shuffle (%s), " "Could not recognize should_shuffle (%s), "
"just use default value of should_shuffle." "just use default value of should_shuffle."
" Please set should_shuffle to bool value or " " Please set should_shuffle to bool value or "
"something in %s" % ( "something in %s" %
repr(self.should_shuffle), (repr(self.should_shuffle),
repr(true_table + false_table))) repr(true_table + false_table)))
self.should_shuffle = None self.should_shuffle = None
self.pool_size = pool_size self.pool_size = pool_size
...@@ -351,8 +356,7 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1, ...@@ -351,8 +356,7 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
self.generator = InputOrderWrapper(self.generator, self.generator = InputOrderWrapper(self.generator,
self.input_order) self.input_order)
if self.check: if self.check:
self.generator = CheckWrapper(self.generator, self.generator = CheckWrapper(self.generator, self.slots,
self.slots,
check_fail_continue, check_fail_continue,
self.logger) self.logger)
...@@ -368,4 +372,3 @@ def deserialize_args(args): ...@@ -368,4 +372,3 @@ def deserialize_args(args):
:return: :return:
""" """
return cPickle.loads(args) return cPickle.loads(args)
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
This module provide a wrapper(decorator) to wrap a data process method into a This module provide a wrapper(decorator) to wrap a data process method into a
PyDataProvider. Some examples are shown `here <data_provider/python_case.html>`_. PyDataProvider. Some examples are shown `here <data_provider/python_case.html>`_.
...@@ -47,6 +46,7 @@ except ImportError: ...@@ -47,6 +46,7 @@ except ImportError:
import io import io
class SlotType(object): # Just a hint for user. class SlotType(object): # Just a hint for user.
pass pass
...@@ -83,6 +83,7 @@ class SparseNonValueSlot(SlotType): ...@@ -83,6 +83,7 @@ class SparseNonValueSlot(SlotType):
- **SubSeq**: [[[int, int, ...], [int, ....], ...] , \ - **SubSeq**: [[[int, int, ...], [int, ....], ...] , \
[[int, int, ...], [int, ....], ...] , ...] [[int, int, ...], [int, ....], ...] , ...]
""" """
def __init__(self, dim): def __init__(self, dim):
""" """
:param dim: slot dimension :param dim: slot dimension
...@@ -294,8 +295,9 @@ class GeneralPyDataProvider: ...@@ -294,8 +295,9 @@ class GeneralPyDataProvider:
fn = "%s_%d" % (self.profile_filename, self.profile_count) fn = "%s_%d" % (self.profile_filename, self.profile_count)
sortby = "cumulative" sortby = "cumulative"
with open(fn, "w") as f: with open(fn, "w") as f:
pstats.Stats(self.profiler, stream=f).sort_stats( pstats.Stats(
sortby).print_stats() self.profiler,
stream=f).sort_stats(sortby).print_stats()
self.logger.info("saving profile to file %s" % fn) self.logger.info("saving profile to file %s" % fn)
self.profile_count += 1 self.profile_count += 1
self.logger.info("resetting profile") self.logger.info("resetting profile")
...@@ -453,9 +455,10 @@ class GeneralPyDataProvider: ...@@ -453,9 +455,10 @@ class GeneralPyDataProvider:
seq_stream.flush() seq_stream.flush()
subseq_stream.flush() subseq_stream.flush()
return "".join([self.int_packer.pack(current_batch_size), return "".join([
data_bytes.getvalue(), self.int_packer.pack(current_batch_size), data_bytes.getvalue(),
seq_bytes.getvalue(), subseq_bytes.getvalue()]) seq_bytes.getvalue(), subseq_bytes.getvalue()
])
finally: finally:
data_stream.close() data_stream.close()
...@@ -516,7 +519,7 @@ class GeneralPyDataProvider: ...@@ -516,7 +519,7 @@ class GeneralPyDataProvider:
self.data_pool[idx]) self.data_pool[idx])
idx -= 1 idx -= 1
ret_list += self.data_pool[self.data_pool_idx: idx + 1] ret_list += self.data_pool[self.data_pool_idx:idx + 1]
# for speed reason, just shift left index, not delete data actually. # for speed reason, just shift left index, not delete data actually.
self.data_pool_idx = idx + 1 self.data_pool_idx = idx + 1
...@@ -537,8 +540,8 @@ class GeneralPyDataProvider: ...@@ -537,8 +540,8 @@ class GeneralPyDataProvider:
if self.max_pool_size == 0: if self.max_pool_size == 0:
for i in xrange(min(self.file_count, len(self.generators))): for i in xrange(min(self.file_count, len(self.generators))):
self.data_pool += list(self.generators[i]) self.data_pool += list(self.generators[i])
self.generators = self.generators[ self.generators = self.generators[min(self.file_count,
min(self.file_count, len(self.generators)):] len(self.generators)):]
self.max_pool_size = len(self.data_pool) self.max_pool_size = len(self.data_pool)
else: else:
while len(self.data_pool) < self.max_pool_size and len( while len(self.data_pool) < self.max_pool_size and len(
...@@ -562,9 +565,15 @@ def default_init_hook(cls, *args, **kwargs): ...@@ -562,9 +565,15 @@ def default_init_hook(cls, *args, **kwargs):
del cls, args, kwargs del cls, args, kwargs
def provider(slots=None, use_seq=False, should_shuffle=True, pool_size=1, def provider(slots=None,
can_over_batch_size=True, calc_batch_size=lambda data: 1, use_seq=False,
debug=False, init_hook=default_init_hook, profile_filename=None): should_shuffle=True,
pool_size=1,
can_over_batch_size=True,
calc_batch_size=lambda data: 1,
debug=False,
init_hook=default_init_hook,
profile_filename=None):
""" """
The decorator for PyDataProvider. User should use this to create Provider class. The decorator for PyDataProvider. User should use this to create Provider class.
User should only concern how to read sample from file. User should only concern how to read sample from file.
...@@ -663,7 +672,7 @@ def provider(slots=None, use_seq=False, should_shuffle=True, pool_size=1, ...@@ -663,7 +672,7 @@ def provider(slots=None, use_seq=False, should_shuffle=True, pool_size=1,
def __init__(self, *file_list, **kwargs): def __init__(self, *file_list, **kwargs):
logging.basicConfig( logging.basicConfig(
format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]" format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
" %(message)s") " %(message)s")
self.logger = logging.getLogger("") self.logger = logging.getLogger("")
if debug: if debug:
......
...@@ -11,4 +11,3 @@ ...@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
''' '''
The following functions are available in the config file: The following functions are available in the config file:
...@@ -101,50 +100,45 @@ except Exception as e: ...@@ -101,50 +100,45 @@ except Exception as e:
raise raise
logging.basicConfig( logging.basicConfig(
format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
)
logger = logging.getLogger('paddle') logger = logging.getLogger('paddle')
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
__real_print__ = print __real_print__ = print
print=logger.info print = logger.info
# from layer type name to layer class # from layer type name to layer class
g_layer_type_map = {} g_layer_type_map = {}
# Initialize global variables. We use this function so that we can # Initialize global variables. We use this function so that we can
# call parse_config() multiple times # call parse_config() multiple times
def init_config_environment( def init_config_environment(
g_default_momentum = None, g_default_momentum=None,
g_default_decay_rate = None, g_default_decay_rate=None,
g_default_initial_mean = 0., g_default_initial_mean=0.,
g_default_initial_std = 0.01, g_default_initial_std=0.01,
g_default_num_batches_regularization = None, g_default_num_batches_regularization=None,
g_default_initial_strategy = 0, g_default_initial_strategy=0,
g_default_initial_smart = False, g_default_initial_smart=False,
g_default_gradient_clipping_threshold = None, g_default_gradient_clipping_threshold=None,
g_default_device = None, g_default_device=None,
g_default_update_hooks = None, g_default_update_hooks=None,
g_default_compact_func = None, g_default_compact_func=None,
g_config=TrainerConfig(),
g_config = TrainerConfig(), g_layer_map={},
g_layer_map = {}, g_parameter_map={},
g_parameter_map = {}, g_extended_config_funcs={},
g_extended_config_funcs = {},
# store command args of paddle_trainer # store command args of paddle_trainer
g_command_config_args = {}, g_command_config_args={},
# Used for PyDataProvider to avoid duplicate module name # Used for PyDataProvider to avoid duplicate module name
g_py_module_name_list = [], g_py_module_name_list=[],
g_current_submodel=None,
g_current_submodel = None, g_root_submodel=None,
g_root_submodel = None, g_submodel_map={},
g_submodel_map = {}, g_submodel_stack=[],
g_submodel_stack = [], g_add_submodel_suffix=False, ):
g_add_submodel_suffix = False,
):
for k, v in locals().iteritems(): for k, v in locals().iteritems():
globals()[k] = copy.deepcopy(v) globals()[k] = copy.deepcopy(v)
...@@ -161,43 +155,54 @@ def config_assert(b, msg): ...@@ -161,43 +155,54 @@ def config_assert(b, msg):
if not b: if not b:
logger.fatal(msg) logger.fatal(msg)
g_config_funcs = {} g_config_funcs = {}
# decorator for indicating a function which can be used in config file # decorator for indicating a function which can be used in config file
def config_func(func): def config_func(func):
g_config_funcs[func.func_name] = func g_config_funcs[func.func_name] = func
return func return func
# decorator for indicating a class which can be used in config file # decorator for indicating a class which can be used in config file
def config_class(cls): def config_class(cls):
g_config_funcs[cls.__name__] = cls g_config_funcs[cls.__name__] = cls
return cls return cls
# decorator for indicating a class for a layer type # decorator for indicating a class for a layer type
def config_layer(layer_type): def config_layer(layer_type):
def wrap(cls): def wrap(cls):
g_config_funcs[cls.__name__] = cls g_config_funcs[cls.__name__] = cls
g_layer_type_map[layer_type] = cls g_layer_type_map[layer_type] = cls
return cls return cls
return wrap return wrap
def gen_parameter_name(layer_name, input_index): def gen_parameter_name(layer_name, input_index):
return '_%s.w%d' % (layer_name, input_index) return '_%s.w%d' % (layer_name, input_index)
def gen_bias_parameter_name(layer_name): def gen_bias_parameter_name(layer_name):
return '_%s.wbias' % layer_name return '_%s.wbias' % layer_name
def default(x, default_value): def default(x, default_value):
return default_value if x is None else x return default_value if x is None else x
class Cfg(object): class Cfg(object):
def add_keys(self, locals): def add_keys(self, locals):
for k, v in locals.iteritems(): for k, v in locals.iteritems():
if not k.startswith('_'): if not k.startswith('_'):
self.__setattr__(k, v) self.__setattr__(k, v)
# functions available in config file # functions available in config file
# Define the name of the input layers of the NeuralNetwork. # Define the name of the input layers of the NeuralNetwork.
# The type of these layers must be "data". # The type of these layers must be "data".
# These layers will be provided with the DataBatch obtained # These layers will be provided with the DataBatch obtained
...@@ -216,6 +221,7 @@ def Inputs(*args): ...@@ -216,6 +221,7 @@ def Inputs(*args):
if g_current_submodel is g_root_submodel: if g_current_submodel is g_root_submodel:
g_config.model_config.input_layer_names.append(name) g_config.model_config.input_layer_names.append(name)
@config_func @config_func
def HasInputsSet(): def HasInputsSet():
return len(g_current_submodel.input_layer_names) != 0 return len(g_current_submodel.input_layer_names) != 0
...@@ -244,7 +250,7 @@ def SubModelBegin(name): ...@@ -244,7 +250,7 @@ def SubModelBegin(name):
global g_current_submodel, g_root_submodel, g_submodel_stack global g_current_submodel, g_root_submodel, g_submodel_stack
g_submodel_stack.append(g_current_submodel) g_submodel_stack.append(g_current_submodel)
name = MakeLayerNameInParentSubmodel(name) #rename in nested submodel name = MakeLayerNameInParentSubmodel(name) #rename in nested submodel
config_assert(name not in g_submodel_map, config_assert(name not in g_submodel_map,
'Duplicated submodel name: %s' % name) 'Duplicated submodel name: %s' % name)
...@@ -254,36 +260,42 @@ def SubModelBegin(name): ...@@ -254,36 +260,42 @@ def SubModelBegin(name):
g_submodel_map[name] = sub_model g_submodel_map[name] = sub_model
g_current_submodel = sub_model g_current_submodel = sub_model
@config_func @config_func
def SubModelEnd(name = None): def SubModelEnd(name=None):
global g_current_submodel, g_root_submodel, g_submodel_stack global g_current_submodel, g_root_submodel, g_submodel_stack
config_assert(g_current_submodel is not g_root_submodel, "submodel not begin") config_assert(g_current_submodel is not g_root_submodel,
"submodel not begin")
if name is not None: if name is not None:
config_assert(g_current_submodel.name == MakeLayerNameInParentSubmodel(name), config_assert(
"submodel name error") g_current_submodel.name == MakeLayerNameInParentSubmodel(name),
"submodel name error")
g_current_submodel = g_submodel_stack.pop() g_current_submodel = g_submodel_stack.pop()
def MakeLayerNameInParentSubmodel(name): def MakeLayerNameInParentSubmodel(name):
suffix = "" suffix = ""
if len(g_submodel_stack) > 1: if len(g_submodel_stack) > 1:
suffix = "@" + g_submodel_stack[-1].name suffix = "@" + g_submodel_stack[-1].name
return name + suffix return name + suffix
def GetLayerBaseName(name): def GetLayerBaseName(name):
return name.split('@')[0] return name.split('@')[0]
def MakeLayerNameInSubmodel(name, submodel_name = None):
def MakeLayerNameInSubmodel(name, submodel_name=None):
global g_current_submodel global g_current_submodel
global g_add_submodel_suffix global g_add_submodel_suffix
if (submodel_name is None if (submodel_name is None and not g_add_submodel_suffix and
and not g_add_submodel_suffix not g_current_submodel.is_recurrent_layer_group):
and not g_current_submodel.is_recurrent_layer_group):
return name return name
if submodel_name is None: if submodel_name is None:
submodel_name = g_current_submodel.name submodel_name = g_current_submodel.name
return name + "@" + submodel_name return name + "@" + submodel_name
# Define a recurrent layer group begin with RecurrentLayerGroupBegin # Define a recurrent layer group begin with RecurrentLayerGroupBegin
# and end with RecurrentLayerGroupEnd. # and end with RecurrentLayerGroupEnd.
# A recurrent layer group forward/backward one frame after previous frame # A recurrent layer group forward/backward one frame after previous frame
...@@ -332,8 +344,10 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -332,8 +344,10 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
if in_links_count == 0: if in_links_count == 0:
in_links_has_subseq = has_subseq in_links_has_subseq = has_subseq
else: else:
config_assert(in_links_has_subseq == has_subseq, config_assert(
"The sequence type of in_links should be the same in RecurrentLayerGroup") in_links_has_subseq == has_subseq,
"The sequence type of in_links should be the same in RecurrentLayerGroup"
)
in_links_count += 1 in_links_count += 1
layer_name = MakeLayerNameInParentSubmodel(name) layer_name = MakeLayerNameInParentSubmodel(name)
layer = g_layer_map[layer_name] layer = g_layer_map[layer_name]
...@@ -347,6 +361,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -347,6 +361,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
pair.link_name = MakeLayerNameInSubmodel(name) pair.link_name = MakeLayerNameInSubmodel(name)
pair.has_subseq = has_subseq pair.has_subseq = has_subseq
@config_func @config_func
def RecurrentLayerGroupSetOutLink(link): def RecurrentLayerGroupSetOutLink(link):
if isinstance(link, basestring): if isinstance(link, basestring):
...@@ -363,8 +378,7 @@ def RecurrentLayerGroupSetOutLink(link): ...@@ -363,8 +378,7 @@ def RecurrentLayerGroupSetOutLink(link):
def RecurrentLayerGroupSetGenerator(generator=None): def RecurrentLayerGroupSetGenerator(generator=None):
generator.eos_layer_name = MakeLayerNameInSubmodel( generator.eos_layer_name = MakeLayerNameInSubmodel(generator.eos_layer_name)
generator.eos_layer_name)
g_current_submodel.generator.CopyFrom(generator) g_current_submodel.generator.CopyFrom(generator)
...@@ -375,21 +389,18 @@ def RecurrentLayerGroupBegin(name, ...@@ -375,21 +389,18 @@ def RecurrentLayerGroupBegin(name,
generator=None, generator=None,
target_inlinkname="", target_inlinkname="",
seq_reversed=False): seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name, RecurrentLayerGroupWithoutOutLinksBegin(name, in_links, seq_reversed,
in_links,
seq_reversed,
target_inlinkname) target_inlinkname)
for link in out_links: for link in out_links:
RecurrentLayerGroupSetOutLink(link) RecurrentLayerGroupSetOutLink(link)
if generator is not None: if generator is not None:
RecurrentLayerGroupSetGenerator(generator) RecurrentLayerGroupSetGenerator(generator)
config_assert(len(in_links) == 0, config_assert(
"no in_links should be passed to generator") len(in_links) == 0, "no in_links should be passed to generator")
config_assert(len(out_links) >= 1, config_assert(
"one or more than one out_links should be passed to generator") len(out_links) >= 1,
"one or more than one out_links should be passed to generator")
@config_func @config_func
...@@ -397,9 +408,10 @@ def RecurrentLayerGroupEnd(name): ...@@ -397,9 +408,10 @@ def RecurrentLayerGroupEnd(name):
global g_current_submodel global g_current_submodel
config_assert(g_current_submodel.is_recurrent_layer_group, config_assert(g_current_submodel.is_recurrent_layer_group,
"RecurrentLayerGroup not begin") "RecurrentLayerGroup not begin")
for pair in g_current_submodel.memories: #check exist for pair in g_current_submodel.memories: #check exist
layer = g_layer_map[pair.layer_name] layer = g_layer_map[pair.layer_name]
config_assert(layer is not None, "memory declare wrong name:%s" % pair.layer_name) config_assert(layer is not None, "memory declare wrong name:%s" %
pair.layer_name)
memory_link = g_layer_map[pair.link_name] memory_link = g_layer_map[pair.link_name]
config_assert(layer.size == memory_link.size, config_assert(layer.size == memory_link.size,
"memory declare wrong size:%d" % memory_link.size) "memory declare wrong size:%d" % memory_link.size)
...@@ -418,12 +430,14 @@ def RecurrentLayerGroupEnd(name): ...@@ -418,12 +430,14 @@ def RecurrentLayerGroupEnd(name):
else: else:
GatherAgentLayer(name=agent_name, size=layer.size) GatherAgentLayer(name=agent_name, size=layer.size)
# Define the model type # Define the model type
# currently, the paddle supports "nn", "recurrent_nn", "recursive_nn" and "multi_nn" # currently, the paddle supports "nn", "recurrent_nn", "recursive_nn" and "multi_nn"
@config_func @config_func
def model_type(name): def model_type(name):
g_config.model_config.type = name g_config.model_config.type = name
@config_class @config_class
class Bias(Cfg): class Bias(Cfg):
def __init__( def __init__(
...@@ -441,10 +455,10 @@ class Bias(Cfg): ...@@ -441,10 +455,10 @@ class Bias(Cfg):
sparse_remote_update=None, sparse_remote_update=None,
gradient_clipping_threshold=None, gradient_clipping_threshold=None,
is_static=None, is_static=None,
is_shared=None, is_shared=None, ):
):
self.add_keys(locals()) self.add_keys(locals())
# Define one input for a layer # Define one input for a layer
@config_class @config_class
class Input(Cfg): class Input(Cfg):
...@@ -477,19 +491,20 @@ class Input(Cfg): ...@@ -477,19 +491,20 @@ class Input(Cfg):
is_static=None, is_static=None,
is_shared=None, is_shared=None,
update_hooks=None, update_hooks=None,
input_layer_argument=None, input_layer_argument=None, ):
):
self.add_keys(locals()) self.add_keys(locals())
self.input_layer_name = MakeLayerNameInSubmodel(input_layer_name) self.input_layer_name = MakeLayerNameInSubmodel(input_layer_name)
# Define a projection for iexed layer # Define a projection for iexed layer
@config_class @config_class
class Projection(Input): class Projection(Input):
type = None # subclass should set it correctly type = None # subclass should set it correctly
def __init__( def __init__(
self, self,
input_layer_name, input_layer_name,
size = 0, # projection output size size=0, # projection output size
parameter_name=None, parameter_name=None,
learning_rate=None, learning_rate=None,
momentum=None, momentum=None,
...@@ -509,8 +524,7 @@ class Projection(Input): ...@@ -509,8 +524,7 @@ class Projection(Input):
is_static=None, is_static=None,
is_shared=None, is_shared=None,
update_hooks=None, update_hooks=None,
input_layer_argument=None, input_layer_argument=None, ):
):
self.add_keys(locals()) self.add_keys(locals())
self.input_layer_name = MakeLayerNameInSubmodel(input_layer_name) self.input_layer_name = MakeLayerNameInSubmodel(input_layer_name)
...@@ -524,8 +538,10 @@ class Projection(Input): ...@@ -524,8 +538,10 @@ class Projection(Input):
# to indicate using the size from Layer config # to indicate using the size from Layer config
def calc_output_size(self, input_layer_config): def calc_output_size(self, input_layer_config):
return self.size return self.size
def calc_parameter_size(self, input_size, output_size): def calc_parameter_size(self, input_size, output_size):
raise NotimplementedError raise NotimplementedError
def calc_parameter_dims(self, input_size, output_size): def calc_parameter_dims(self, input_size, output_size):
raise NotimplementedError raise NotimplementedError
...@@ -536,31 +552,32 @@ class IdentityProjection(Projection): ...@@ -536,31 +552,32 @@ class IdentityProjection(Projection):
def calc_output_size(self, input_layer_config): def calc_output_size(self, input_layer_config):
return input_layer_config.size return input_layer_config.size
def calc_parameter_size(self, input_size, output_size): def calc_parameter_size(self, input_size, output_size):
return 0 return 0
def calc_parameter_dims(self, input_size, output_size): def calc_parameter_dims(self, input_size, output_size):
return [] return []
# Like IdentityProjection, but layer size may smaller than input size, # Like IdentityProjection, but layer size may smaller than input size,
# the projection select dimesions [offset, offset+layer_size) from input # the projection select dimesions [offset, offset+layer_size) from input
@config_class @config_class
class IdentityOffsetProjection(Projection): class IdentityOffsetProjection(Projection):
type = 'identity_offset' type = 'identity_offset'
def __init__( def __init__(self, input_layer_name, offset, **xargs):
self, super(IdentityOffsetProjection, self).__init__(input_layer_name,
input_layer_name, **xargs)
offset,
**xargs):
super(IdentityOffsetProjection, self).__init__(
input_layer_name, **xargs)
self.proj_conf.offset = offset self.proj_conf.offset = offset
def calc_parameter_size(self, input_size, output_size): def calc_parameter_size(self, input_size, output_size):
return 0 return 0
def calc_parameter_dims(self, input_size, output_size): def calc_parameter_dims(self, input_size, output_size):
return [] return []
# DotMulProjection performs element-wise multiplication with weight # DotMulProjection performs element-wise multiplication with weight
@config_class @config_class
class DotMulProjection(Projection): class DotMulProjection(Projection):
...@@ -568,49 +585,53 @@ class DotMulProjection(Projection): ...@@ -568,49 +585,53 @@ class DotMulProjection(Projection):
def calc_output_size(self, input_layer_config): def calc_output_size(self, input_layer_config):
return input_layer_config.size return input_layer_config.size
def calc_parameter_size(self, input_size, output_size): def calc_parameter_size(self, input_size, output_size):
return output_size return output_size
def calc_parameter_dims(self, input_size, output_size): def calc_parameter_dims(self, input_size, output_size):
return [1, output_size] return [1, output_size]
@config_class @config_class
class TableProjection(Projection): class TableProjection(Projection):
type = 'table' type = 'table'
def calc_parameter_size(self, input_size, output_size): def calc_parameter_size(self, input_size, output_size):
return input_size * output_size return input_size * output_size
def calc_parameter_dims(self, input_size, output_size): def calc_parameter_dims(self, input_size, output_size):
return [input_size, output_size] return [input_size, output_size]
@config_class @config_class
class FullMatrixProjection(Projection): class FullMatrixProjection(Projection):
type = 'fc' type = 'fc'
def calc_parameter_size(self, input_size, output_size): def calc_parameter_size(self, input_size, output_size):
return input_size * output_size return input_size * output_size
def calc_parameter_dims(self, input_size, output_size): def calc_parameter_dims(self, input_size, output_size):
return [input_size, output_size] return [input_size, output_size]
@config_class @config_class
class TransposedFullMatrixProjection(Projection): class TransposedFullMatrixProjection(Projection):
type = 'trans_fc' type = 'trans_fc'
def calc_parameter_size(self, input_size, output_size): def calc_parameter_size(self, input_size, output_size):
return input_size * output_size return input_size * output_size
def calc_parameter_dims(self, input_size, output_size): def calc_parameter_dims(self, input_size, output_size):
return [output_size, input_size] return [output_size, input_size]
@config_class @config_class
class ContextProjection(Projection): class ContextProjection(Projection):
type = 'context' type = 'context'
def __init__( def __init__(self, input_layer_name, context_start, context_length,
self, trainable_padding, **xargs):
input_layer_name,
context_start,
context_length,
trainable_padding,
**xargs):
super(ContextProjection, self).__init__(input_layer_name, **xargs) super(ContextProjection, self).__init__(input_layer_name, **xargs)
self.proj_conf.context_start = context_start self.proj_conf.context_start = context_start
self.proj_conf.context_length = context_length self.proj_conf.context_length = context_length
...@@ -638,23 +659,21 @@ class ContextProjection(Projection): ...@@ -638,23 +659,21 @@ class ContextProjection(Projection):
class ConvProjection(Projection): class ConvProjection(Projection):
type = 'conv' type = 'conv'
def __init__( def __init__(self,
self, input_layer_name,
input_layer_name, num_filters=None,
num_filters=None, conv_conf=None,
conv_conf=None, **xargs):
**xargs):
super(ConvProjection, self).__init__(input_layer_name, **xargs) super(ConvProjection, self).__init__(input_layer_name, **xargs)
if num_filters is not None: if num_filters is not None:
self.proj_conf.num_filters = num_filters self.proj_conf.num_filters = num_filters
parse_conv(conv_conf, parse_conv(conv_conf, input_layer_name, self.proj_conf.conv_conf,
input_layer_name,
self.proj_conf.conv_conf,
num_filters) num_filters)
# TODO: support rectangle input # TODO: support rectangle input
self.proj_conf.output_size = (self.proj_conf.conv_conf.output_x ** 2) * num_filters self.proj_conf.output_size = (self.proj_conf.conv_conf.output_x**
2) * num_filters
def calc_output_size(self, input_layer_config): def calc_output_size(self, input_layer_config):
return self.proj_conf.output_size return self.proj_conf.output_size
...@@ -672,14 +691,15 @@ class ConvProjection(Projection): ...@@ -672,14 +691,15 @@ class ConvProjection(Projection):
def calc_parameter_dims(self, input_size, output_size): def calc_parameter_dims(self, input_size, output_size):
return None return None
# Define a operator for mixed layer # Define a operator for mixed layer
@config_class @config_class
class Operator(Cfg): class Operator(Cfg):
type = None # subclass should set it correctly type = None # subclass should set it correctly
def __init__( def __init__(
self, self,
input_layer_names, input_layer_names, ):
):
self.add_keys(locals()) self.add_keys(locals())
self.operator_conf = OperatorConfig() self.operator_conf = OperatorConfig()
self.operator_conf.type = self.type self.operator_conf.type = self.type
...@@ -690,16 +710,13 @@ class Operator(Cfg): ...@@ -690,16 +710,13 @@ class Operator(Cfg):
def calc_output_size(self, input_sizes): def calc_output_size(self, input_sizes):
return 0 return 0
@config_class @config_class
class DotMulOperator(Operator): class DotMulOperator(Operator):
type = 'dot_mul' type = 'dot_mul'
def __init__(
self, def __init__(self, input_layer_names, scale=None, **xargs):
input_layer_names, super(DotMulOperator, self).__init__(input_layer_names, **xargs)
scale=None,
**xargs):
super(DotMulOperator, self).__init__(
input_layer_names, **xargs)
if scale is not None: if scale is not None:
self.operator_conf.dotmul_scale = scale self.operator_conf.dotmul_scale = scale
...@@ -715,26 +732,24 @@ class DotMulOperator(Operator): ...@@ -715,26 +732,24 @@ class DotMulOperator(Operator):
return input_sizes[0] return input_sizes[0]
@config_class @config_class
class ConvOperator(Operator): class ConvOperator(Operator):
type = 'conv' type = 'conv'
def __init__(
self, def __init__(self,
input_layer_names, input_layer_names,
num_filters=None, num_filters=None,
conv_conf=None, conv_conf=None,
**xargs): **xargs):
super(ConvOperator, self).__init__( super(ConvOperator, self).__init__(input_layer_names, **xargs)
input_layer_names, **xargs)
if num_filters is not None: if num_filters is not None:
self.operator_conf.num_filters = num_filters self.operator_conf.num_filters = num_filters
parse_conv(conv_conf, parse_conv(conv_conf,
MakeLayerNameInSubmodel(input_layer_names[0]), MakeLayerNameInSubmodel(input_layer_names[0]),
self.operator_conf.conv_conf, self.operator_conf.conv_conf, num_filters)
num_filters) self.operator_conf.output_size = (self.operator_conf.conv_conf.output_x
self.operator_conf.output_size = (self.operator_conf.conv_conf.output_x ** 2) * num_filters **2) * num_filters
config_assert(len(input_layer_names) == 2, "Conv is binary operator") config_assert(len(input_layer_names) == 2, "Conv is binary operator")
...@@ -745,119 +760,106 @@ class ConvOperator(Operator): ...@@ -745,119 +760,106 @@ class ConvOperator(Operator):
# please refer to the comments in proto/ModelConfig.proto # please refer to the comments in proto/ModelConfig.proto
@config_class @config_class
class Conv(Cfg): class Conv(Cfg):
def __init__( def __init__(self,
self, filter_size,
filter_size, channels,
channels, padding=None,
padding = None, stride=None,
stride = None, groups=None,
groups = None, filter_channels=None,
filter_channels = None, output_x=None,
output_x = None, img_size=None,
img_size = None, caffe_mode=True,
caffe_mode = True, filter_size_y=None,
filter_size_y = None, padding_y=None,
padding_y = None, stride_y=None):
stride_y = None):
self.add_keys(locals()) self.add_keys(locals())
if filter_size_y is None: if filter_size_y is None:
self.filter_size_y = filter_size self.filter_size_y = filter_size
if padding_y is None: if padding_y is None:
self.padding_y = padding self.padding_y = padding
if stride_y is None: if stride_y is None:
self.stride_y = stride self.stride_y = stride
if output_x is not None: if output_x is not None:
config_assert(output_x <= 0) config_assert(output_x <= 0)
# please refer to the comments in proto/ModelConfig.proto # please refer to the comments in proto/ModelConfig.proto
@config_class @config_class
class BilinearInterp(Cfg): class BilinearInterp(Cfg):
def __init__( def __init__(self, out_size_x=None, out_size_y=None, num_channels=None):
self,
out_size_x = None,
out_size_y = None,
num_channels = None):
self.add_keys(locals()) self.add_keys(locals())
# please refer to the comments in proto/ModelConfig.proto # please refer to the comments in proto/ModelConfig.proto
@config_class @config_class
class Pool(Cfg): class Pool(Cfg):
def __init__( def __init__(self,
self, pool_type,
pool_type, channels,
channels, size_x,
size_x, size_y=None,
size_y = None, img_width=None,
img_width = None, start=None,
start = None, stride=None,
stride = None, stride_y=None,
stride_y = None, padding=None,
padding = None, padding_y=None):
padding_y = None):
self.add_keys(locals()) self.add_keys(locals())
# please refer to the comments in proto/ModelConfig.proto # please refer to the comments in proto/ModelConfig.proto
@config_class @config_class
class SpatialPyramidPool(Cfg): class SpatialPyramidPool(Cfg):
def __init__( def __init__(self, pool_type, pyramid_height, channels, img_width=None):
self,
pool_type,
pyramid_height,
channels,
img_width = None):
self.add_keys(locals()) self.add_keys(locals())
# please refer to the comments in proto/ModelConfig.proto # please refer to the comments in proto/ModelConfig.proto
@config_class @config_class
class Norm(Cfg): class Norm(Cfg):
def __init__( def __init__(self,
self, norm_type,
norm_type, channels,
channels, size,
size, scale,
scale, pow,
pow, output_x=None,
output_x = None, img_size=None,
img_size = None, blocked=None):
blocked = None):
self.add_keys(locals()) self.add_keys(locals())
# please refer to the comments in proto/ModelConfig.proto # please refer to the comments in proto/ModelConfig.proto
@config_class @config_class
class Image(Cfg): class Image(Cfg):
def __init__( def __init__(self, channels, img_size=None):
self,
channels,
img_size = None):
self.add_keys(locals()) self.add_keys(locals())
@config_class @config_class
class BlockExpand(Cfg): class BlockExpand(Cfg):
def __init__( def __init__(self,
self, channels,
channels, padding_x=0,
padding_x = 0, padding_y=0,
padding_y = 0, stride_x=0,
stride_x = 0, stride_y=0,
stride_y = 0, block_x=0,
block_x = 0, block_y=0,
block_y = 0, img_size_x=0,
img_size_x = 0, img_size_y=0,
img_size_y = 0, output_x=0,
output_x = 0, output_y=0):
output_y = 0):
self.add_keys(locals()) self.add_keys(locals())
@config_class @config_class
class MaxOut(Cfg): class MaxOut(Cfg):
def __init__( def __init__(self, channels, groups, img_size_x=0, img_size_y=0):
self,
channels,
groups,
img_size_x = 0,
img_size_y = 0):
self.add_keys(locals()) self.add_keys(locals())
def DataBase(async_load_data=False, def DataBase(async_load_data=False,
constant_slots=None, constant_slots=None,
data_ratio=1, data_ratio=1,
...@@ -871,23 +873,23 @@ def DataBase(async_load_data=False, ...@@ -871,23 +873,23 @@ def DataBase(async_load_data=False,
if constant_slots: if constant_slots:
data_config.constant_slots.extend(constant_slots) data_config.constant_slots.extend(constant_slots)
data_config.data_ratio=data_ratio data_config.data_ratio = data_ratio
data_config.is_main_data=is_main_data data_config.is_main_data = is_main_data
usage_ratio=default(usage_ratio, settings_deprecated["usage_ratio"]) usage_ratio = default(usage_ratio, settings_deprecated["usage_ratio"])
config_assert(usage_ratio >= 0 and usage_ratio <= 1, config_assert(usage_ratio >= 0 and usage_ratio <= 1,
"The range of usage_ratio is [0, 1]") "The range of usage_ratio is [0, 1]")
data_config.usage_ratio = usage_ratio data_config.usage_ratio = usage_ratio
return data_config return data_config
@config_func @config_func
def SimpleData( def SimpleData(files=None,
files=None, feat_dim=None,
feat_dim=None, context_len=None,
context_len=None, buffer_capacity=None,
buffer_capacity=None, **xargs):
**xargs):
data_config = DataBase(**xargs) data_config = DataBase(**xargs)
data_config.type = 'simple' data_config.type = 'simple'
data_config.files = files data_config.files = files
...@@ -898,31 +900,36 @@ def SimpleData( ...@@ -898,31 +900,36 @@ def SimpleData(
data_config.buffer_capacity = buffer_capacity data_config.buffer_capacity = buffer_capacity
return data_config return data_config
@config_func @config_func
def PyData( def PyData(files=None,
files=None, type=None,
type=None, file_group_queue_capacity=None,
file_group_queue_capacity=None, load_data_module=None,
load_data_module=None, load_data_object=None,
load_data_object=None, load_data_args="",
load_data_args="", load_file_count=None,
load_file_count=None, constant_slots=None,
constant_slots=None, load_thread_num=None,
load_thread_num=None, **xargs):
**xargs):
data_config = DataBase(**xargs) data_config = DataBase(**xargs)
data_config.type = 'py' data_config.type = 'py'
if load_data_module in g_py_module_name_list: if load_data_module in g_py_module_name_list:
def get_path(module): def get_path(module):
m = __import__(load_data_module) m = __import__(load_data_module)
return os.path.split(os.path.realpath(m.__file__))[0] return os.path.split(os.path.realpath(m.__file__))[0]
# python C-api is not thread safe, one module can only be import once, # python C-api is not thread safe, one module can only be import once,
# so here we nedd to copy the module with different names if it has to be # so here we nedd to copy the module with different names if it has to be
# imported several times. # imported several times.
module_new_name = "%s_copy_%d" % (load_data_module, len(g_py_module_name_list)) module_new_name = "%s_copy_%d" % (load_data_module,
len(g_py_module_name_list))
g_py_module_name_list.append(module_new_name) g_py_module_name_list.append(module_new_name)
module_path = "%s/%s.py" % (get_path(load_data_module), load_data_module) module_path = "%s/%s.py" % (get_path(load_data_module),
new_module_path = "%s/%s.py" % (get_path(load_data_module), module_new_name) load_data_module)
new_module_path = "%s/%s.py" % (get_path(load_data_module),
module_new_name)
if os.path.isfile(module_path) == False: if os.path.isfile(module_path) == False:
raise Exception("File %s is not exist." % module_path) raise Exception("File %s is not exist." % module_path)
shutil.copy2(module_path, new_module_path) shutil.copy2(module_path, new_module_path)
...@@ -947,15 +954,15 @@ def PyData( ...@@ -947,15 +954,15 @@ def PyData(
data_config.constant_slots.extend(constant_slots) data_config.constant_slots.extend(constant_slots)
return data_config return data_config
@config_func @config_func
def ProtoData( def ProtoData(files=None,
files=None, type=None,
type=None, file_group_queue_capacity=None,
file_group_queue_capacity=None, load_file_count=None,
load_file_count=None, constant_slots=None,
constant_slots=None, load_thread_num=None,
load_thread_num=None, **xargs):
**xargs):
data_config = DataBase(**xargs) data_config = DataBase(**xargs)
if type is None: if type is None:
data_config.type = 'proto' data_config.type = 'proto'
...@@ -976,25 +983,24 @@ def ProtoData( ...@@ -976,25 +983,24 @@ def ProtoData(
data_config.constant_slots.extend(constant_slots) data_config.constant_slots.extend(constant_slots)
return data_config return data_config
#real data for training is actually provided by "sub_data" data providers. #real data for training is actually provided by "sub_data" data providers.
@config_func @config_func
def MultiData( def MultiData(sub_data=[]):
sub_data=[]
):
data_config = DataConfig() data_config = DataConfig()
data_config.type = 'multi' data_config.type = 'multi'
data_config.sub_data_configs.extend(sub_data) data_config.sub_data_configs.extend(sub_data)
return data_config return data_config
@config_func @config_func
def Data( def Data(type,
type, files=None,
files=None, feat_dim=None,
feat_dim=None, slot_dims=None,
slot_dims=None, context_len=None,
context_len=None, buffer_capacity=None,
buffer_capacity=None, **xargs):
**xargs):
data_config = DataBase(**xargs) data_config = DataBase(**xargs)
data_config.type = type data_config.type = type
...@@ -1030,15 +1036,19 @@ def TestData(data_config, async_load_data=None): ...@@ -1030,15 +1036,19 @@ def TestData(data_config, async_load_data=None):
" Data definition") " Data definition")
g_config.test_data_config.async_load_data = async_load_data g_config.test_data_config.async_load_data = async_load_data
def parse_bilinear(bilinear, input_layer_name, bilinear_conf): def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
bilinear_conf.out_size_x = bilinear.out_size_x; bilinear_conf.out_size_x = bilinear.out_size_x
bilinear_conf.out_size_y = bilinear.out_size_y; bilinear_conf.out_size_y = bilinear.out_size_y
bilinear_conf.num_channels = bilinear.num_channels; bilinear_conf.num_channels = bilinear.num_channels
''' '''
caffe_mode: compute the output size using floor instead of ceil, caffe_mode: compute the output size using floor instead of ceil,
which is consistent of caffe and CuDNN's convention. which is consistent of caffe and CuDNN's convention.
''' '''
def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
output = (2 * padding + img_size - filter_size) / float(stride) output = (2 * padding + img_size - filter_size) / float(stride)
if caffe_mode: if caffe_mode:
...@@ -1046,81 +1056,89 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): ...@@ -1046,81 +1056,89 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
else: else:
return 1 + int(math.ceil(output)) return 1 + int(math.ceil(output))
''' '''
calcualte image_size based on output_size for convolution. calcualte image_size based on output_size for convolution.
It is the reverse function of cnn_output_size It is the reverse function of cnn_output_size
''' '''
def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode): def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode):
if caffe_mode: if caffe_mode:
img_size = (output_size - 1) * stride + filter_size - 2 * padding img_size = (output_size - 1) * stride + filter_size - 2 * padding
else: else:
img_size = (output_size - 2) * stride + filter_size - 2 * padding + 1 img_size = (output_size - 2) * stride + filter_size - 2 * padding + 1
return img_size return img_size
def parse_pool(pool, input_layer_name, pool_conf): def parse_pool(pool, input_layer_name, pool_conf):
pool_conf.pool_type = pool.pool_type pool_conf.pool_type = pool.pool_type
config_assert(pool.pool_type in ['max-projection', 'avg-projection', config_assert(pool.pool_type in [
'cudnn-max-pool', 'cudnn-avg-pool'], 'max-projection', 'avg-projection', 'cudnn-max-pool', 'cudnn-avg-pool'
"pool-type %s is not in " ], "pool-type %s is not in "
"['max-projection', 'avg-projection', " "['max-projection', 'avg-projection', "
"'cudnn-max-pool', 'cudnn-avg-pool']" "'cudnn-max-pool', 'cudnn-avg-pool']" % pool.pool_type)
% pool.pool_type)
pool_conf.channels = pool.channels pool_conf.channels = pool.channels
pool_conf.size_x = pool.size_x pool_conf.size_x = pool.size_x
pool_conf.stride = pool.stride pool_conf.stride = pool.stride
pool_conf.size_y = default(pool.size_y, pool_conf.size_x) pool_conf.size_y = default(pool.size_y, pool_conf.size_x)
pool_conf.stride_y = default(pool.stride_y, pool_conf.stride); pool_conf.stride_y = default(pool.stride_y, pool_conf.stride)
img_pixels = g_layer_map[input_layer_name].size / pool.channels img_pixels = g_layer_map[input_layer_name].size / pool.channels
# the img_width may be removed, # the img_width may be removed,
# and it can be calculated automatically later. # and it can be calculated automatically later.
pool_conf.img_size = default(pool.img_width, int(img_pixels ** 0.5)) pool_conf.img_size = default(pool.img_width, int(img_pixels**0.5))
pool_conf.img_size_y = img_pixels / pool_conf.img_size pool_conf.img_size_y = img_pixels / pool_conf.img_size
config_assert(pool_conf.img_size * pool_conf.img_size_y == img_pixels, config_assert(pool_conf.img_size * pool_conf.img_size_y == img_pixels,
"Incorrect input image size %d for input image pixels %d" "Incorrect input image size %d for input image pixels %d" %
% (pool_conf.img_size, img_pixels)) (pool_conf.img_size, img_pixels))
config_assert(not pool.start, "start is deprecated in pooling.") config_assert(not pool.start, "start is deprecated in pooling.")
if pool.padding is not None: if pool.padding is not None:
pool_conf.padding = pool.padding pool_conf.padding = pool.padding
pool_conf.padding_y = default(pool.padding_y, pool_conf.padding) pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x, pool_conf.output_x = cnn_output_size(
pool_conf.padding, pool_conf.stride, False) pool_conf.img_size, pool_conf.size_x, pool_conf.padding,
pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y, pool_conf.stride, False)
pool_conf.padding_y, pool_conf.stride_y, False) pool_conf.output_y = cnn_output_size(
pool_conf.img_size_y, pool_conf.size_y, pool_conf.padding_y,
pool_conf.stride_y, False)
def parse_spp(spp, input_layer_name, spp_conf): def parse_spp(spp, input_layer_name, spp_conf):
spp_conf.pool_type = spp.pool_type spp_conf.pool_type = spp.pool_type
config_assert(spp.pool_type in ['max-projection', 'avg-projection'], config_assert(spp.pool_type in ['max-projection', 'avg-projection'],
"pool-type %s is not in " "['max-projection', 'avg-projection']" "pool-type %s is not in "
% spp.pool_type) "['max-projection', 'avg-projection']" % spp.pool_type)
spp_conf.pyramid_height = spp.pyramid_height spp_conf.pyramid_height = spp.pyramid_height
spp_conf.channels = spp.channels spp_conf.channels = spp.channels
img_pixels = g_layer_map[input_layer_name].size / spp_conf.channels img_pixels = g_layer_map[input_layer_name].size / spp_conf.channels
spp_conf.img_size = default(spp.img_width, int(img_pixels ** 0.5)) spp_conf.img_size = default(spp.img_width, int(img_pixels**0.5))
spp_conf.img_size_y = img_pixels / spp_conf.img_size spp_conf.img_size_y = img_pixels / spp_conf.img_size
config_assert(spp_conf.img_size * spp_conf.img_size_y == img_pixels, config_assert(spp_conf.img_size * spp_conf.img_size_y == img_pixels,
"Incorrect input image size %d for input image pixels %d" "Incorrect input image size %d for input image pixels %d" %
% (spp_conf.img_size, img_pixels)) (spp_conf.img_size, img_pixels))
def parse_image(image, input_layer_name, image_conf): def parse_image(image, input_layer_name, image_conf):
image_conf.channels = image.channels image_conf.channels = image.channels
image_pixels = g_layer_map[input_layer_name].size / image_conf.channels image_pixels = g_layer_map[input_layer_name].size / image_conf.channels
image_conf.img_size = int(image_pixels ** 0.5) image_conf.img_size = int(image_pixels**0.5)
config_assert((image_conf.img_size ** 2) == image_pixels, config_assert((image_conf.img_size**2) == image_pixels,
"Incorrect input image size %d for input image pixels %d" "Incorrect input image size %d for input image pixels %d" %
% (image_conf.img_size, image_pixels)) (image_conf.img_size, image_pixels))
def parse_norm(norm, input_layer_name, norm_conf): def parse_norm(norm, input_layer_name, norm_conf):
norm_conf.norm_type = norm.norm_type norm_conf.norm_type = norm.norm_type
config_assert(norm.norm_type in ['rnorm', 'cmrnorm-projection'], config_assert(norm.norm_type in ['rnorm', 'cmrnorm-projection'],
"norm-type %s is not in [rnorm, 'cmrnorm-projection']" "norm-type %s is not in [rnorm, 'cmrnorm-projection']" %
% norm.norm_type) norm.norm_type)
norm_conf.channels = norm.channels norm_conf.channels = norm.channels
norm_conf.size = norm.size norm_conf.size = norm.size
norm_conf.scale = norm.scale norm_conf.scale = norm.scale
...@@ -1128,20 +1146,23 @@ def parse_norm(norm, input_layer_name, norm_conf): ...@@ -1128,20 +1146,23 @@ def parse_norm(norm, input_layer_name, norm_conf):
norm_conf.blocked = norm.blocked norm_conf.blocked = norm.blocked
img_pixels = g_layer_map[input_layer_name].size / norm.channels img_pixels = g_layer_map[input_layer_name].size / norm.channels
norm_conf.img_size = int(img_pixels ** 0.5) norm_conf.img_size = int(img_pixels**0.5)
config_assert((norm_conf.img_size ** 2) == img_pixels, config_assert((norm_conf.img_size**2) == img_pixels,
"Incorrect input image size %d for input image pixels %d" "Incorrect input image size %d for input image pixels %d" %
% (norm_conf.img_size, img_pixels)) (norm_conf.img_size, img_pixels))
norm_conf.output_x = norm_conf.img_size norm_conf.output_x = norm_conf.img_size
if norm.norm_type in ['cmrnorm-projection']: if norm.norm_type in ['cmrnorm-projection']:
norm_conf.scale /= norm.size norm_conf.scale /= norm.size
else: else:
norm_conf.scale /= norm.size ** 2 norm_conf.scale /= norm.size**2
''' '''
caffe_mode: compute the output size using floor instead of ceil, caffe_mode: compute the output size using floor instead of ceil,
which is consistent of caffe and CuDNN's convention. which is consistent of caffe and CuDNN's convention.
''' '''
def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
conv_conf.filter_size = conv.filter_size conv_conf.filter_size = conv.filter_size
conv_conf.filter_size_y = conv.filter_size_y conv_conf.filter_size_y = conv.filter_size_y
...@@ -1152,36 +1173,37 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): ...@@ -1152,36 +1173,37 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
conv_conf.stride_y = conv.stride_y conv_conf.stride_y = conv.stride_y
conv_conf.groups = conv.groups conv_conf.groups = conv.groups
conv_conf.caffe_mode = conv.caffe_mode conv_conf.caffe_mode = conv.caffe_mode
if not trans: if not trans:
conv_conf.filter_channels = conv.channels / conv.groups conv_conf.filter_channels = conv.channels / conv.groups
img_pixels = g_layer_map[input_layer_name].size / conv.channels img_pixels = g_layer_map[input_layer_name].size / conv.channels
print('channels=%d size=%d'%(conv.channels, print('channels=%d size=%d' % (conv.channels,
g_layer_map[input_layer_name].size)) g_layer_map[input_layer_name].size))
conv_conf.img_size = int(img_pixels ** 0.5) conv_conf.img_size = int(img_pixels**0.5)
config_assert((conv_conf.img_size ** 2) == img_pixels, config_assert((conv_conf.img_size**2) == img_pixels, (
("Input layer %s: Incorrect input image size %d for input " "Input layer %s: Incorrect input image size %d for input " +
+ "image pixels %d") "image pixels %d") %
% (input_layer_name, conv_conf.img_size, img_pixels)) (input_layer_name, conv_conf.img_size, img_pixels))
conv_conf.output_x = cnn_output_size( conv_conf.output_x = cnn_output_size(
conv_conf.img_size, conv_conf.filter_size, conv_conf.img_size, conv_conf.filter_size, conv_conf.padding,
conv_conf.padding, conv_conf.stride, conv_conf.caffe_mode) conv_conf.stride, conv_conf.caffe_mode)
else: else:
conv_conf.filter_channels = num_filters / conv.groups conv_conf.filter_channels = num_filters / conv.groups
outputSize = g_layer_map[input_layer_name].size / conv.channels outputSize = g_layer_map[input_layer_name].size / conv.channels
print('channels=%d size=%d'%(conv.channels, print('channels=%d size=%d' % (conv.channels,
g_layer_map[input_layer_name].size)) g_layer_map[input_layer_name].size))
conv_conf.output_x = int(outputSize ** 0.5) conv_conf.output_x = int(outputSize**0.5)
config_assert((conv_conf.output_x ** 2) == outputSize, config_assert((conv_conf.output_x**2) == outputSize, (
("Input layer %s: Incorrect input image size %d for input " "Input layer %s: Incorrect input image size %d for input " +
+ "image pixels %d") "image pixels %d") %
% (input_layer_name, conv_conf.output_x, outputSize)) (input_layer_name, conv_conf.output_x, outputSize))
conv_conf.img_size = cnn_image_size( conv_conf.img_size = cnn_image_size(
conv_conf.output_x, conv_conf.filter_size, conv_conf.output_x, conv_conf.filter_size, conv_conf.padding,
conv_conf.padding, conv_conf.stride, conv_conf.caffe_mode) conv_conf.stride, conv_conf.caffe_mode)
def parse_block_expand(block_expand, input_layer_name, block_expand_conf): def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
block_expand_conf.channels = block_expand.channels block_expand_conf.channels = block_expand.channels
...@@ -1207,27 +1229,28 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf): ...@@ -1207,27 +1229,28 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
block_expand.img_size_y, block_expand.block_y, block_expand.img_size_y, block_expand.block_y,
block_expand.padding_y, block_expand.stride_y, False) block_expand.padding_y, block_expand.stride_y, False)
def parse_maxout(maxout, input_layer_name, maxout_conf): def parse_maxout(maxout, input_layer_name, maxout_conf):
maxout_conf.channels = maxout.channels maxout_conf.channels = maxout.channels
maxout_conf.groups = maxout.groups maxout_conf.groups = maxout.groups
maxout_conf.img_size_x = maxout.img_size_x maxout_conf.img_size_x = maxout.img_size_x
maxout_conf.img_size_y = maxout.img_size_y maxout_conf.img_size_y = maxout.img_size_y
# Define an evaluator # Define an evaluator
@config_func @config_func
def Evaluator( def Evaluator(
name, name,
type, type,
inputs, inputs,
chunk_scheme = None, chunk_scheme=None,
num_chunk_types = None, num_chunk_types=None,
classification_threshold = None, classification_threshold=None,
positive_label = None, positive_label=None,
dict_file = None, dict_file=None,
result_file = None, result_file=None,
num_results = None, num_results=None,
delimited = None, delimited=None, ):
):
evaluator = g_config.model_config.evaluators.add() evaluator = g_config.model_config.evaluators.add()
evaluator.type = type evaluator.type = type
evaluator.name = MakeLayerNameInSubmodel(name) evaluator.name = MakeLayerNameInSubmodel(name)
...@@ -1256,19 +1279,20 @@ def Evaluator( ...@@ -1256,19 +1279,20 @@ def Evaluator(
if delimited is not None: if delimited is not None:
evaluator.delimited = delimited evaluator.delimited = delimited
class LayerBase(object): class LayerBase(object):
def __init__( def __init__(
self, self,
name, name,
type, type,
size, # size can be 0. In this case, subclass should set it. size, # size can be 0. In this case, subclass should set it.
inputs, inputs,
device=None, device=None,
active_type="", active_type="",
drop_rate=0., drop_rate=0.,
coeff=None): coeff=None):
config_assert('@' not in name, config_assert('@' not in name,
"layer name: %s contain special character @" % name) "layer name: %s contain special character @" % name)
global g_current_submodel global g_current_submodel
name = MakeLayerNameInSubmodel(name) name = MakeLayerNameInSubmodel(name)
...@@ -1307,8 +1331,8 @@ class LayerBase(object): ...@@ -1307,8 +1331,8 @@ class LayerBase(object):
if type_of(input) == str: if type_of(input) == str:
input_layer_name = input input_layer_name = input
input_config = Input( input_config = Input(
input_layer_name = input, input_layer_name=input,
parameter_name = gen_parameter_name(name, input_index)) parameter_name=gen_parameter_name(name, input_index))
input_layer_name = input_config.input_layer_name input_layer_name = input_config.input_layer_name
elif isinstance(input, Input): elif isinstance(input, Input):
input_layer_name = input.input_layer_name input_layer_name = input.input_layer_name
...@@ -1317,16 +1341,15 @@ class LayerBase(object): ...@@ -1317,16 +1341,15 @@ class LayerBase(object):
input_config.parameter_name = \ input_config.parameter_name = \
gen_parameter_name(name, input_index) gen_parameter_name(name, input_index)
elif isinstance(input, Operator): elif isinstance(input, Operator):
self.operators.append(input); self.operators.append(input)
input.operator_conf.input_indices.append(input_index) input.operator_conf.input_indices.append(input_index)
input_config = Input(input.input_layer_names[0]) input_config = Input(input.input_layer_names[0])
input_layer_name = input_config.input_layer_name input_layer_name = input_config.input_layer_name
else: else:
raise ValueError( raise ValueError('Wrong type for inputs: %s' % type_of(input))
'Wrong type for inputs: %s' % type_of(input))
config_assert(input_layer_name in g_layer_map, config_assert(input_layer_name in g_layer_map,
"Unknown input layer '%s' for layer %s" "Unknown input layer '%s' for layer %s" %
% (input_layer_name, name)) (input_layer_name, name))
self.inputs[input_index] = input_config self.inputs[input_index] = input_config
layer_input = self.config.inputs.add() layer_input = self.config.inputs.add()
layer_input.input_layer_name = input_config.input_layer_name layer_input.input_layer_name = input_config.input_layer_name
...@@ -1338,26 +1361,26 @@ class LayerBase(object): ...@@ -1338,26 +1361,26 @@ class LayerBase(object):
g_current_submodel.layer_names.append(self.config.name) g_current_submodel.layer_names.append(self.config.name)
def get_input_layer(self, input_index): def get_input_layer(self, input_index):
return g_layer_map[self.config.inputs[input_index].input_layer_name] return g_layer_map[self.config.inputs[input_index].input_layer_name]
# will return the bias created if not *for_self* # will return the bias created if not *for_self*
def create_bias_parameter( def create_bias_parameter(
self, self,
bias, # True/False or BiasCfg bias, # True/False or BiasCfg
size, size,
dims = None, dims=None,
for_self = True, # whether create bias for layer self for_self=True, # whether create bias for layer self
): ):
if size == 0: if size == 0:
return return
if dims is None: if dims is None:
dims = [1, size] dims = [1, size]
config_assert(type_of(bias) == bool or type_of(bias) == Bias, config_assert(
'Incorrect type for bias: %s' % type_of(bias)) type_of(bias) == bool or type_of(bias) == Bias,
'Incorrect type for bias: %s' % type_of(bias))
if type_of(bias) == bool: if type_of(bias) == bool:
if bias: if bias:
...@@ -1372,7 +1395,8 @@ class LayerBase(object): ...@@ -1372,7 +1395,8 @@ class LayerBase(object):
Parameter( Parameter(
bias.parameter_name, bias.parameter_name,
size, size,
self.config.device if self.config.HasField('device') else None, self.config.device
if self.config.HasField('device') else None,
dims, dims,
bias.learning_rate, bias.learning_rate,
bias.momentum, bias.momentum,
...@@ -1384,22 +1408,21 @@ class LayerBase(object): ...@@ -1384,22 +1408,21 @@ class LayerBase(object):
initial_smart=bias.initial_smart, initial_smart=bias.initial_smart,
num_batches_regularization=bias.num_batches_regularization, num_batches_regularization=bias.num_batches_regularization,
sparse_remote_update=bias.sparse_remote_update, sparse_remote_update=bias.sparse_remote_update,
gradient_clipping_threshold=bias.gradient_clipping_threshold, gradient_clipping_threshold=bias.
gradient_clipping_threshold,
is_static=bias.is_static, is_static=bias.is_static,
is_shared=bias.is_shared, is_shared=bias.is_shared, )
)
if for_self: if for_self:
self.config.bias_parameter_name = bias.parameter_name self.config.bias_parameter_name = bias.parameter_name
else: else:
return bias.parameter_name return bias.parameter_name
def create_input_parameter( def create_input_parameter(self,
self, input_index,
input_index, size,
size, dims=None,
dims=None, sparse=None,
sparse = None, format=None):
format = None):
if dims is None: if dims is None:
# TODO(yuyang18): print warning and callstack here! # TODO(yuyang18): print warning and callstack here!
dims = list() dims = list()
...@@ -1414,12 +1437,12 @@ class LayerBase(object): ...@@ -1414,12 +1437,12 @@ class LayerBase(object):
if input_config.parameter_name in g_parameter_map: if input_config.parameter_name in g_parameter_map:
para = g_parameter_map[input_config.parameter_name] para = g_parameter_map[input_config.parameter_name]
config_assert(size == para.size, ('Shared parameter "%s" does not ' config_assert(size == para.size, (
+ 'have same size: %s vs. %s') 'Shared parameter "%s" does not ' + 'have same size: %s vs. %s')
% (input_config.parameter_name, para.size, size)) % (input_config.parameter_name, para.size, size))
config_assert(dims == para.dims, ('Shared parameter "%s" does not ' config_assert(dims == para.dims, (
+ 'have same dims: %s vs. %s') 'Shared parameter "%s" does not ' + 'have same dims: %s vs. %s')
% (input_config.parameter_name, para.dims, dims)) % (input_config.parameter_name, para.dims, dims))
return return
...@@ -1439,13 +1462,13 @@ class LayerBase(object): ...@@ -1439,13 +1462,13 @@ class LayerBase(object):
num_batches_regularization=input_config.num_batches_regularization, num_batches_regularization=input_config.num_batches_regularization,
sparse_remote_update=input_config.sparse_remote_update, sparse_remote_update=input_config.sparse_remote_update,
sparse_update=input_config.sparse_update, sparse_update=input_config.sparse_update,
gradient_clipping_threshold=input_config.gradient_clipping_threshold, gradient_clipping_threshold=input_config.
gradient_clipping_threshold,
sparse=sparse, sparse=sparse,
format=format, format=format,
is_static=input_config.is_static, is_static=input_config.is_static,
is_shared=input_config.is_shared, is_shared=input_config.is_shared,
update_hooks=input_config.update_hooks update_hooks=input_config.update_hooks)
)
def set_layer_size(self, size): def set_layer_size(self, size):
if self.config.size == 0: if self.config.size == 0:
...@@ -1455,27 +1478,18 @@ class LayerBase(object): ...@@ -1455,27 +1478,18 @@ class LayerBase(object):
'Different inputs result in' + 'Different inputs result in' +
'different layer size at layer %s' % self.config.name) 'different layer size at layer %s' % self.config.name)
@config_layer('multi_class_cross_entropy_with_selfnorm') @config_layer('multi_class_cross_entropy_with_selfnorm')
class MultiClassCrossEntropySelfNormCostLayer(LayerBase): class MultiClassCrossEntropySelfNormCostLayer(LayerBase):
def __init__( def __init__(self, name, inputs, softmax_selfnorm_alpha=0.1, **xargs):
self, super(MultiClassCrossEntropySelfNormCostLayer, self).__init__(
name, name, 'multi_class_cross_entropy_with_selfnorm', 0, inputs, **xargs)
inputs,
softmax_selfnorm_alpha=0.1,
**xargs):
super(MultiClassCrossEntropySelfNormCostLayer, self).__init__(name,
'multi_class_cross_entropy_with_selfnorm', 0, inputs, **xargs)
self.config.softmax_selfnorm_alpha = softmax_selfnorm_alpha self.config.softmax_selfnorm_alpha = softmax_selfnorm_alpha
@config_layer('fc') @config_layer('fc')
class FCLayer(LayerBase): class FCLayer(LayerBase):
def __init__( def __init__(self, name, size, inputs, bias=True, **xargs):
self,
name,
size,
inputs,
bias=True,
**xargs):
super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs)
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
...@@ -1489,22 +1503,23 @@ class FCLayer(LayerBase): ...@@ -1489,22 +1503,23 @@ class FCLayer(LayerBase):
else: else:
sparse = None sparse = None
self.create_input_parameter(input_index, psize, dims, sparse, format) self.create_input_parameter(input_index, psize, dims, sparse,
format)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
@config_layer('selective_fc') @config_layer('selective_fc')
class SelectiveFCLayer(LayerBase): class SelectiveFCLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, size,
size, inputs,
inputs, bias=True,
bias=True, selective_fc_pass_generation=False,
selective_fc_pass_generation=False, has_selected_colums=True,
has_selected_colums=True, selective_fc_full_mul_ratio=0.02,
selective_fc_full_mul_ratio=0.02, selective_fc_parallel_plain_mul_thread_num=None,
selective_fc_parallel_plain_mul_thread_num=None, **xargs):
**xargs):
super(SelectiveFCLayer, self).__init__( super(SelectiveFCLayer, self).__init__(
name, 'selective_fc', size, inputs=inputs, **xargs) name, 'selective_fc', size, inputs=inputs, **xargs)
# user MUST know if selctive fc is used in training, # user MUST know if selctive fc is used in training,
...@@ -1525,8 +1540,8 @@ class SelectiveFCLayer(LayerBase): ...@@ -1525,8 +1540,8 @@ class SelectiveFCLayer(LayerBase):
input_num = len(self.inputs) input_num = len(self.inputs)
if has_selected_colums: if has_selected_colums:
config_assert(input_num >= 2, config_assert(input_num >= 2,
("if indices of selected columns are not specified, " ("if indices of selected columns are not specified, "
"selective_fc Layer has at least two inputs")) "selective_fc Layer has at least two inputs"))
input_num -= 1 input_num -= 1
for input_index in xrange(input_num): for input_index in xrange(input_num):
...@@ -1539,26 +1554,23 @@ class SelectiveFCLayer(LayerBase): ...@@ -1539,26 +1554,23 @@ class SelectiveFCLayer(LayerBase):
if sparse: if sparse:
psize = self.inputs[input_index].nnz psize = self.inputs[input_index].nnz
self.create_input_parameter( self.create_input_parameter(input_index, psize, dims, sparse,
input_index, psize, dims, sparse, format) format)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
@config_layer('print') @config_layer('print')
class PrintLayer(LayerBase): class PrintLayer(LayerBase):
def __init__( def __init__(self, name, inputs):
self,
name,
inputs):
super(PrintLayer, self).__init__(name, 'print', 0, inputs) super(PrintLayer, self).__init__(name, 'print', 0, inputs)
@config_layer('data') @config_layer('data')
class DataLayer(LayerBase): class DataLayer(LayerBase):
def __init__( def __init__(self, name, size, device=None):
self, super(DataLayer, self).__init__(
name, name, 'data', size, inputs=[], device=device)
size,
device=None):
super(DataLayer, self).__init__(name, 'data' , size, inputs=[], device=device)
''' '''
DataNormLayer: A layer for data normalization DataNormLayer: A layer for data normalization
...@@ -1586,14 +1598,11 @@ Note: ...@@ -1586,14 +1598,11 @@ Note:
min-max: y = (x-min)/(max-min) min-max: y = (x-min)/(max-min)
decimal-scaling: y = x/10^j, where j is the smallest integer such that max(|y|)<1 decimal-scaling: y = x/10^j, where j is the smallest integer such that max(|y|)<1
''' '''
@config_layer('data_norm') @config_layer('data_norm')
class DataNormLayer(LayerBase): class DataNormLayer(LayerBase):
def __init__( def __init__(self, name, inputs, data_norm_strategy="z-score", device=None):
self,
name,
inputs,
data_norm_strategy="z-score",
device=None):
super(DataNormLayer, self).__init__( super(DataNormLayer, self).__init__(
name, 'data_norm', 0, inputs=inputs, device=device) name, 'data_norm', 0, inputs=inputs, device=device)
self.config.data_norm_strategy = data_norm_strategy self.config.data_norm_strategy = data_norm_strategy
...@@ -1605,15 +1614,12 @@ class DataNormLayer(LayerBase): ...@@ -1605,15 +1614,12 @@ class DataNormLayer(LayerBase):
self.inputs[0].is_static = True self.inputs[0].is_static = True
self.create_input_parameter(0, para_size, para_dims) self.create_input_parameter(0, para_size, para_dims)
@config_layer('prelu') @config_layer('prelu')
class ParameterReluLayer(LayerBase): class ParameterReluLayer(LayerBase):
layer_type = 'prelu' layer_type = 'prelu'
def __init__(
self, def __init__(self, name, inputs, partial_sum=1, **args):
name,
inputs,
partial_sum = 1,
**args):
super(ParameterReluLayer, self).__init__( super(ParameterReluLayer, self).__init__(
name, self.layer_type, 0, inputs=inputs, **args) name, self.layer_type, 0, inputs=inputs, **args)
config_assert(len(self.inputs) == 1) config_assert(len(self.inputs) == 1)
...@@ -1622,17 +1628,18 @@ class ParameterReluLayer(LayerBase): ...@@ -1622,17 +1628,18 @@ class ParameterReluLayer(LayerBase):
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
self.create_input_parameter(0, input_layer.size / partial_sum) self.create_input_parameter(0, input_layer.size / partial_sum)
@config_layer('conv') @config_layer('conv')
class ConvLayerBase(LayerBase): class ConvLayerBase(LayerBase):
layer_type = 'conv' layer_type = 'conv'
def __init__(
self, def __init__(self,
name, name,
inputs=[], inputs=[],
bias=True, bias=True,
num_filters=None, num_filters=None,
shared_biases=False, shared_biases=False,
**xargs): **xargs):
super(ConvLayerBase, self).__init__( super(ConvLayerBase, self).__init__(
name, self.layer_type, 0, inputs=inputs, **xargs) name, self.layer_type, 0, inputs=inputs, **xargs)
...@@ -1649,7 +1656,7 @@ class ConvLayerBase(LayerBase): ...@@ -1649,7 +1656,7 @@ class ConvLayerBase(LayerBase):
config_assert(use_gpu, "cudnn_conv only support GPU") config_assert(use_gpu, "cudnn_conv only support GPU")
if (use_gpu == 1 and self.layer_type != "exconv" and if (use_gpu == 1 and self.layer_type != "exconv" and
(parallel_nn == 0 or self.config.device > -1)): (parallel_nn == 0 or self.config.device > -1)):
self.layer_type = "cudnn_conv" self.layer_type = "cudnn_conv"
else: else:
self.layer_type = "exconv" self.layer_type = "exconv"
...@@ -1661,17 +1668,14 @@ class ConvLayerBase(LayerBase): ...@@ -1661,17 +1668,14 @@ class ConvLayerBase(LayerBase):
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
parse_conv( parse_conv(self.inputs[input_index].conv, input_layer.name,
self.inputs[input_index].conv, self.config.inputs[input_index].conv_conf, num_filters)
input_layer.name,
self.config.inputs[input_index].conv_conf,
num_filters)
conv_conf = self.config.inputs[input_index].conv_conf conv_conf = self.config.inputs[input_index].conv_conf
psize = self.calc_parameter_size(conv_conf) psize = self.calc_parameter_size(conv_conf)
print("output size for %s is %d " % (name, conv_conf.output_x)) print("output size for %s is %d " % (name, conv_conf.output_x))
self.create_input_parameter(input_index, psize) self.create_input_parameter(input_index, psize)
self.set_layer_size( self.set_layer_size(
(conv_conf.output_x ** 2) * self.config.num_filters) (conv_conf.output_x**2) * self.config.num_filters)
psize = self.config.size psize = self.config.size
if shared_biases: if shared_biases:
...@@ -1682,10 +1686,12 @@ class ConvLayerBase(LayerBase): ...@@ -1682,10 +1686,12 @@ class ConvLayerBase(LayerBase):
return self.config.num_filters * conv_conf.filter_channels \ return self.config.num_filters * conv_conf.filter_channels \
* (conv_conf.filter_size * conv_conf.filter_size_y) * (conv_conf.filter_size * conv_conf.filter_size_y)
@config_layer('exconv') @config_layer('exconv')
class ConvLayer(ConvLayerBase): class ConvLayer(ConvLayerBase):
layer_type = 'exconv' layer_type = 'exconv'
@config_layer('cudnn_conv') @config_layer('cudnn_conv')
class ConvLayer(ConvLayerBase): class ConvLayer(ConvLayerBase):
layer_type = 'cudnn_conv' layer_type = 'cudnn_conv'
...@@ -1694,14 +1700,14 @@ class ConvLayer(ConvLayerBase): ...@@ -1694,14 +1700,14 @@ class ConvLayer(ConvLayerBase):
@config_layer('convt') @config_layer('convt')
class ConvTransLayerBase(LayerBase): class ConvTransLayerBase(LayerBase):
layer_type = 'convt' layer_type = 'convt'
def __init__(
self, def __init__(self,
name, name,
inputs=[], inputs=[],
bias=True, bias=True,
num_filters=None, num_filters=None,
shared_biases=False, shared_biases=False,
**xargs): **xargs):
super(ConvTransLayerBase, self).__init__( super(ConvTransLayerBase, self).__init__(
name, self.layer_type, 0, inputs=inputs, **xargs) name, self.layer_type, 0, inputs=inputs, **xargs)
...@@ -1732,7 +1738,7 @@ class ConvTransLayerBase(LayerBase): ...@@ -1732,7 +1738,7 @@ class ConvTransLayerBase(LayerBase):
print("output size for %s is %d " % (name, conv_conf.output_x)) print("output size for %s is %d " % (name, conv_conf.output_x))
self.create_input_parameter(input_index, psize) self.create_input_parameter(input_index, psize)
self.set_layer_size( self.set_layer_size(
(conv_conf.img_size ** 2) * self.config.num_filters) (conv_conf.img_size**2) * self.config.num_filters)
psize = self.config.size psize = self.config.size
if shared_biases: if shared_biases:
...@@ -1743,85 +1749,76 @@ class ConvTransLayerBase(LayerBase): ...@@ -1743,85 +1749,76 @@ class ConvTransLayerBase(LayerBase):
return conv_conf.channels * conv_conf.filter_channels \ return conv_conf.channels * conv_conf.filter_channels \
* (conv_conf.filter_size * conv_conf.filter_size_y) * (conv_conf.filter_size * conv_conf.filter_size_y)
@config_layer('exconvt') @config_layer('exconvt')
class ConvTransLayer(ConvTransLayerBase): class ConvTransLayer(ConvTransLayerBase):
layer_type = 'exconvt' layer_type = 'exconvt'
@config_layer('norm') @config_layer('norm')
class NormLayer(LayerBase): class NormLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self, super(NormLayer, self).__init__(
name, name, 'norm', 0, inputs=inputs, device=device)
inputs,
device=None):
super(NormLayer, self).__init__(name, 'norm', 0, inputs=inputs, device=device)
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
parse_norm( parse_norm(self.inputs[input_index].norm, input_layer.name,
self.inputs[input_index].norm, self.config.inputs[input_index].norm_conf)
input_layer.name,
self.config.inputs[input_index].norm_conf)
norm_conf = self.config.inputs[input_index].norm_conf norm_conf = self.config.inputs[input_index].norm_conf
self.set_layer_size((norm_conf.output_x ** 2) * norm_conf.channels) self.set_layer_size((norm_conf.output_x**2) * norm_conf.channels)
@config_layer('pool') @config_layer('pool')
class PoolLayer(LayerBase): class PoolLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self, super(PoolLayer, self).__init__(
name, name, 'pool', 0, inputs=inputs, device=device)
inputs,
device=None):
super(PoolLayer, self).__init__(name, 'pool', 0, inputs=inputs, device=device)
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
parse_pool( parse_pool(self.inputs[input_index].pool, input_layer.name,
self.inputs[input_index].pool, self.config.inputs[input_index].pool_conf)
input_layer.name,
self.config.inputs[input_index].pool_conf)
pool_conf = self.config.inputs[input_index].pool_conf pool_conf = self.config.inputs[input_index].pool_conf
print("output size for %s is %d*%d " % ( print("output size for %s is %d*%d " % (name, pool_conf.output_y,
name, pool_conf.output_y, pool_conf.output_x)) pool_conf.output_x))
self.set_layer_size((pool_conf.output_x * pool_conf.output_y) * pool_conf.channels) self.set_layer_size(
(pool_conf.output_x * pool_conf.output_y) * pool_conf.channels)
@config_layer('spp') @config_layer('spp')
class SpatialPyramidPoolLayer(LayerBase): class SpatialPyramidPoolLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self, super(SpatialPyramidPoolLayer, self).__init__(
name, name, 'spp', 0, inputs=inputs, device=device)
inputs,
device=None):
super(SpatialPyramidPoolLayer, self).__init__(name, 'spp', 0, inputs=inputs, device=device)
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
parse_spp( parse_spp(self.inputs[input_index].spp, input_layer.name,
self.inputs[input_index].spp, self.config.inputs[input_index].spp_conf)
input_layer.name,
self.config.inputs[input_index].spp_conf)
spp_conf = self.config.inputs[input_index].spp_conf spp_conf = self.config.inputs[input_index].spp_conf
output_size = (pow(4, spp_conf.pyramid_height) - 1) / (4 - 1) output_size = (pow(4, spp_conf.pyramid_height) - 1) / (4 - 1)
print("output size for %s is %d " % (name, output_size)) print("output size for %s is %d " % (name, output_size))
self.set_layer_size(output_size * spp_conf.channels) self.set_layer_size(output_size * spp_conf.channels)
@config_layer('batch_norm') @config_layer('batch_norm')
class BatchNormLayer(LayerBase): class BatchNormLayer(LayerBase):
layer_type = 'batch_norm' layer_type = 'batch_norm'
def __init__(
self, def __init__(self,
name, name,
inputs, inputs,
active_type="linear", active_type="linear",
bias=True, bias=True,
device=None, device=None,
use_global_stats=True, use_global_stats=True,
moving_average_fraction=0.9, moving_average_fraction=0.9,
batch_norm_type=None, batch_norm_type=None,
**xargs): **xargs):
if inputs is None: if inputs is None:
inputs = [] inputs = []
elif not isinstance(inputs, list): elif not isinstance(inputs, list):
inputs = [inputs] inputs = [inputs]
config_assert(len(inputs) == 1, config_assert(
"BatchNormLayer must have one and only one input") len(inputs) == 1, "BatchNormLayer must have one and only one input")
# Create Input for moving mean and std, # Create Input for moving mean and std,
# in batch normalization layer. # in batch normalization layer.
# These paras no need to update, so set is_static is true. # These paras no need to update, so set is_static is true.
...@@ -1830,12 +1827,13 @@ class BatchNormLayer(LayerBase): ...@@ -1830,12 +1827,13 @@ class BatchNormLayer(LayerBase):
use_gpu = bool(int(g_command_config_args.get("use_gpu", 0))) use_gpu = bool(int(g_command_config_args.get("use_gpu", 0)))
is_shared = True if not use_gpu else False is_shared = True if not use_gpu else False
for i in xrange(2): for i in xrange(2):
inputs.append(Input(inputs[0].input_layer_name, inputs.append(
initial_std=0.0, Input(
initial_mean=0.0, inputs[0].input_layer_name,
is_static=True, initial_std=0.0,
is_shared=is_shared, initial_mean=0.0,
)) is_static=True,
is_shared=is_shared, ))
parallel_nn = bool(int(g_command_config_args.get("parallel_nn", 0))) parallel_nn = bool(int(g_command_config_args.get("parallel_nn", 0)))
cudnn_version = int(g_command_config_args.get("cudnn_version", 0)) cudnn_version = int(g_command_config_args.get("cudnn_version", 0))
...@@ -1845,21 +1843,25 @@ class BatchNormLayer(LayerBase): ...@@ -1845,21 +1843,25 @@ class BatchNormLayer(LayerBase):
((not parallel_nn) or self.config.device > -1) and \ ((not parallel_nn) or self.config.device > -1) and \
cudnn_version >= 4007 cudnn_version >= 4007
self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm" self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm"
super(BatchNormLayer, self).__init__(name, self.layer_type, 0, super(BatchNormLayer, self).__init__(
active_type=active_type, name,
inputs=inputs, device=device, **xargs) self.layer_type,
0,
active_type=active_type,
inputs=inputs,
device=device,
**xargs)
if use_global_stats is not None: if use_global_stats is not None:
self.config.use_global_stats = use_global_stats self.config.use_global_stats = use_global_stats
if moving_average_fraction is not None: if moving_average_fraction is not None:
self.config.moving_average_fraction = moving_average_fraction self.config.moving_average_fraction = moving_average_fraction
input_layer= self.get_input_layer(0) input_layer = self.get_input_layer(0)
parse_image(self.inputs[0].image, parse_image(self.inputs[0].image, input_layer.name,
input_layer.name,
self.config.inputs[0].image_conf) self.config.inputs[0].image_conf)
image_conf = self.config.inputs[0].image_conf image_conf = self.config.inputs[0].image_conf
self.set_layer_size((image_conf.img_size ** 2) * image_conf.channels) self.set_layer_size((image_conf.img_size**2) * image_conf.channels)
psize = self.calc_parameter_size(image_conf) psize = self.calc_parameter_size(image_conf)
dims = [1, psize] dims = [1, psize]
...@@ -1872,75 +1874,74 @@ class BatchNormLayer(LayerBase): ...@@ -1872,75 +1874,74 @@ class BatchNormLayer(LayerBase):
def calc_parameter_size(self, image_conf): def calc_parameter_size(self, image_conf):
return image_conf.channels return image_conf.channels
@config_layer('trans') @config_layer('trans')
class TransLayer(LayerBase): class TransLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self, super(TransLayer, self).__init__(
name, name, 'trans', 0, inputs=inputs, device=device)
inputs, config_assert(
device=None): len(self.inputs) == 1,
super(TransLayer, self).__init__(name, 'trans', 0, inputs=inputs, device=device) 'TransLayer must have one and only one input')
config_assert(len(self.inputs) == 1,
'TransLayer must have one and only one input')
self.set_layer_size(self.get_input_layer(0).size) self.set_layer_size(self.get_input_layer(0).size)
@config_layer('resize') @config_layer('resize')
class ResizeLayer(LayerBase): class ResizeLayer(LayerBase):
def __init__( def __init__(self, name, size, inputs, device=None):
self, super(ResizeLayer, self).__init__(
name, name, 'resize', size=size, inputs=inputs, device=device)
size, config_assert(
inputs, len(self.inputs) == 1,
device=None): 'ResizeLayer must have one and only one input')
super(ResizeLayer, self).__init__(name, 'resize', size=size, inputs=inputs, device=device)
config_assert(len(self.inputs) == 1,
'ResizeLayer must have one and only one input')
@config_layer('blockexpand') @config_layer('blockexpand')
class BlockExpandLayer(LayerBase): class BlockExpandLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self, super(BlockExpandLayer, self).__init__(
name, name, 'blockexpand', 0, inputs=inputs, device=device)
inputs,
device=None):
super(BlockExpandLayer, self).__init__(name, 'blockexpand', 0, inputs=inputs, device=device)
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
parse_block_expand(self.inputs[input_index].block_expand, parse_block_expand(
input_layer.name, self.inputs[input_index].block_expand, input_layer.name,
self.config.inputs[input_index].block_expand_conf) self.config.inputs[input_index].block_expand_conf)
block_expand_conf = self.config.inputs[input_index].block_expand_conf block_expand_conf = self.config.inputs[
self.set_layer_size(block_expand_conf.block_x * block_expand_conf.block_y input_index].block_expand_conf
* block_expand_conf.channels) self.set_layer_size(block_expand_conf.block_x *
block_expand_conf.block_y *
block_expand_conf.channels)
@config_layer('maxout') @config_layer('maxout')
class MaxOutLayer(LayerBase): class MaxOutLayer(LayerBase):
def __init__( def __init__(self, name, inputs, **xargs):
self, super(MaxOutLayer, self).__init__(
name, name, 'maxout', 0, inputs=inputs, **xargs)
inputs,
**xargs):
super(MaxOutLayer, self).__init__(name, 'maxout', 0, inputs=inputs, **xargs)
input_layer = self.get_input_layer(0) input_layer = self.get_input_layer(0)
parse_maxout(self.inputs[0].maxout, parse_maxout(self.inputs[0].maxout, input_layer.name,
input_layer.name,
self.config.inputs[0].maxout_conf) self.config.inputs[0].maxout_conf)
maxout_conf = self.config.inputs[0].maxout_conf maxout_conf = self.config.inputs[0].maxout_conf
self.set_layer_size(g_layer_map[input_layer.name].size / maxout_conf.groups) self.set_layer_size(g_layer_map[input_layer.name].size /
maxout_conf.groups)
# key: cost type # key: cost type
# value: cost class # value: cost class
g_cost_map = {} g_cost_map = {}
# define a cost layer without any parameters # define a cost layer without any parameters
def define_cost(class_name, cost_type): def define_cost(class_name, cost_type):
def init(cls, name, inputs, device=None, coeff=1.): def init(cls, name, inputs, device=None, coeff=1.):
super(type(cls), cls).__init__(name, cost_type, 1, inputs, device=device, coeff=coeff) super(type(cls), cls).__init__(
name, cost_type, 1, inputs, device=device, coeff=coeff)
cls = type(class_name, (LayerBase,), dict(__init__=init)) cls = type(class_name, (LayerBase, ), dict(__init__=init))
global g_cost_map global g_cost_map
g_cost_map[cost_type] = cls g_cost_map[cost_type] = cls
define_cost('MultiClassCrossEntropy', 'multi-class-cross-entropy') define_cost('MultiClassCrossEntropy', 'multi-class-cross-entropy')
define_cost('RankingCost', 'rank-cost') define_cost('RankingCost', 'rank-cost')
define_cost('AucValidation', 'auc-validation') define_cost('AucValidation', 'auc-validation')
...@@ -1951,19 +1952,15 @@ define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') ...@@ -1951,19 +1952,15 @@ define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
define_cost('HuberTwoClass', 'huber') define_cost('HuberTwoClass', 'huber')
define_cost('SumCost', 'sum_cost') define_cost('SumCost', 'sum_cost')
@config_layer('hsigmoid') @config_layer('hsigmoid')
class HierarchicalSigmoidLayer(LayerBase): class HierarchicalSigmoidLayer(LayerBase):
def __init__( def __init__(self, name, num_classes, inputs, device=None, bias=True):
self,
name,
num_classes,
inputs,
device=None,
bias=True):
super(HierarchicalSigmoidLayer, self).__init__( super(HierarchicalSigmoidLayer, self).__init__(
name, 'hsigmoid', 1, inputs=inputs, device=device) name, 'hsigmoid', 1, inputs=inputs, device=device)
config_assert(len(self.inputs) >= 2, config_assert(
'HierarchicalSigmoidLayer must have at least 2 inputs') len(self.inputs) >= 2,
'HierarchicalSigmoidLayer must have at least 2 inputs')
self.config.num_classes = num_classes self.config.num_classes = num_classes
for input_index in xrange(len(self.inputs) - 1): for input_index in xrange(len(self.inputs) - 1):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
...@@ -1972,6 +1969,7 @@ class HierarchicalSigmoidLayer(LayerBase): ...@@ -1972,6 +1969,7 @@ class HierarchicalSigmoidLayer(LayerBase):
self.create_input_parameter(input_index, psize, dims) self.create_input_parameter(input_index, psize, dims)
self.create_bias_parameter(bias, num_classes - 1) self.create_bias_parameter(bias, num_classes - 1)
''' '''
lambdaCost for lambdaRank LTR approach lambdaCost for lambdaRank LTR approach
...@@ -1996,59 +1994,57 @@ Usage: ...@@ -1996,59 +1994,57 @@ Usage:
max_sort_size can be greater than the size of a list, in which max_sort_size can be greater than the size of a list, in which
case the algorithm will sort the entire list to get gradient. case the algorithm will sort the entire list to get gradient.
''' '''
@config_layer('lambda_cost') @config_layer('lambda_cost')
class LambdaCost(LayerBase): class LambdaCost(LayerBase):
def __init__( def __init__(self, name, inputs, NDCG_num=5, max_sort_size=-1, device=None):
self,
name,
inputs,
NDCG_num = 5,
max_sort_size = -1,
device=None):
super(LambdaCost, self).__init__( super(LambdaCost, self).__init__(
name, 'lambda_cost', 1, inputs=inputs, device=device) name, 'lambda_cost', 1, inputs=inputs, device=device)
config_assert(len(self.inputs) == 2, config_assert(len(self.inputs) == 2, 'lambdaCost must have 2 inputs')
'lambdaCost must have 2 inputs')
self.config.NDCG_num = NDCG_num self.config.NDCG_num = NDCG_num
if max_sort_size != -1: if max_sort_size != -1:
config_assert(NDCG_num <= max_sort_size, config_assert(
'NDCG_num must be less than or equal to max_sort_size') NDCG_num <= max_sort_size,
'NDCG_num must be less than or equal to max_sort_size')
self.config.max_sort_size = max_sort_size self.config.max_sort_size = max_sort_size
@config_layer('nce') @config_layer('nce')
class NCELayer(LayerBase): class NCELayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, num_classes,
num_classes, inputs,
inputs, num_neg_samples=10,
num_neg_samples=10, neg_sampling_dist=None,
neg_sampling_dist=None, bias=True,
bias=True, **xargs):
**xargs):
super(NCELayer, self).__init__(name, 'nce', 1, inputs=inputs, **xargs) super(NCELayer, self).__init__(name, 'nce', 1, inputs=inputs, **xargs)
config_assert(len(self.inputs) >= 2, config_assert(
'NCELayer must have at least 2 inputs') len(self.inputs) >= 2, 'NCELayer must have at least 2 inputs')
self.config.num_classes = num_classes self.config.num_classes = num_classes
if neg_sampling_dist is not None: if neg_sampling_dist is not None:
config_assert(len(neg_sampling_dist) == num_classes, config_assert(
'len(neg_sampling_dist)(%s) is not same as num_classes (%s)' len(neg_sampling_dist) == num_classes,
% (len(neg_sampling_dist), num_classes)) 'len(neg_sampling_dist)(%s) is not same as num_classes (%s)' %
(len(neg_sampling_dist), num_classes))
s = sum(neg_sampling_dist) s = sum(neg_sampling_dist)
config_assert(abs(s - 1) < 1e-5, config_assert(
'The sum of neg_sampling_dist (%s) is not 1' % s) abs(s - 1) < 1e-5,
'The sum of neg_sampling_dist (%s) is not 1' % s)
self.config.neg_sampling_dist.extend(neg_sampling_dist) self.config.neg_sampling_dist.extend(neg_sampling_dist)
self.config.num_neg_samples = num_neg_samples self.config.num_neg_samples = num_neg_samples
num_real_inputs = len(self.inputs) - 1 num_real_inputs = len(self.inputs) - 1
input_layer = self.get_input_layer(num_real_inputs) input_layer = self.get_input_layer(num_real_inputs)
config_assert(input_layer.type == 'data', config_assert(input_layer.type == 'data',
'Expecting the last input layer of an nce layer to be ' 'Expecting the last input layer of an nce layer to be '
'a data layer') 'a data layer')
if (num_real_inputs > 1 and input_layer.size == 1 if (num_real_inputs > 1 and input_layer.size == 1 and
and self.get_input_layer(num_real_inputs - 1).type == 'data'): self.get_input_layer(num_real_inputs - 1).type == 'data'):
# This input layer is assumed to be a sample weight layer # This input layer is assumed to be a sample weight layer
num_real_inputs -= 1 num_real_inputs -= 1
...@@ -2062,105 +2058,82 @@ class NCELayer(LayerBase): ...@@ -2062,105 +2058,82 @@ class NCELayer(LayerBase):
@config_layer('addto') @config_layer('addto')
class AddToLayer(LayerBase): class AddToLayer(LayerBase):
def __init__( def __init__(self, name, inputs, bias=True, **xargs):
self,
name,
inputs,
bias=True,
**xargs):
super(AddToLayer, self).__init__( super(AddToLayer, self).__init__(
name, 'addto', 0, inputs=inputs, **xargs) name, 'addto', 0, inputs=inputs, **xargs)
config_assert(len(inputs) > 0, config_assert(len(inputs) > 0, 'inputs cannot be empty for AddToLayer')
'inputs cannot be empty for AddToLayer')
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
@config_layer('agent') @config_layer('agent')
class AgentLayer(LayerBase): class AgentLayer(LayerBase):
def __init__( def __init__(self, name, size, device=None):
self, super(AgentLayer, self).__init__(
name, name, 'agent', size, inputs=[], device=device)
size,
device=None):
super(AgentLayer, self).__init__(name, 'agent', size, inputs=[], device=device)
@config_layer('sequence_agent') @config_layer('sequence_agent')
class SequenceAgentLayer(LayerBase): class SequenceAgentLayer(LayerBase):
def __init__( def __init__(self, name, size, device=None):
self,
name,
size,
device=None):
super(SequenceAgentLayer, self).__init__( super(SequenceAgentLayer, self).__init__(
name, 'sequence_agent', size, inputs=[], device=device) name, 'sequence_agent', size, inputs=[], device=device)
@config_layer('gather_agent') @config_layer('gather_agent')
class GatherAgentLayer(LayerBase): class GatherAgentLayer(LayerBase):
def __init__( def __init__(self, name, size, device=None):
self,
name,
size,
device=None):
super(GatherAgentLayer, self).__init__( super(GatherAgentLayer, self).__init__(
name, 'gather_agent', size, inputs=[], device=device) name, 'gather_agent', size, inputs=[], device=device)
@config_layer('scatter_agent') @config_layer('scatter_agent')
class ScatterAgentLayer(LayerBase): class ScatterAgentLayer(LayerBase):
def __init__( def __init__(self, name, size, device=None):
self,
name,
size,
device=None):
super(ScatterAgentLayer, self).__init__( super(ScatterAgentLayer, self).__init__(
name, 'scatter_agent', size, inputs=[], device=device) name, 'scatter_agent', size, inputs=[], device=device)
@config_layer('sequence_gather_agent') @config_layer('sequence_gather_agent')
class SequenceGatherAgentLayer(LayerBase): class SequenceGatherAgentLayer(LayerBase):
def __init__( def __init__(self, name, size, device=None):
self,
name,
size,
device=None):
super(SequenceGatherAgentLayer, self).__init__( super(SequenceGatherAgentLayer, self).__init__(
name, 'sequence_gather_agent', size, inputs=[], device=device) name, 'sequence_gather_agent', size, inputs=[], device=device)
@config_layer('sequence_scatter_agent') @config_layer('sequence_scatter_agent')
class SequenceScatterAgentLayer(LayerBase): class SequenceScatterAgentLayer(LayerBase):
def __init__( def __init__(self, name, size, device=None):
self,
name,
size,
device=None):
super(SequenceScatterAgentLayer, self).__init__( super(SequenceScatterAgentLayer, self).__init__(
name, 'sequence_scatter_agent', size, inputs=[], device=device) name, 'sequence_scatter_agent', size, inputs=[], device=device)
@config_layer('multiplex') @config_layer('multiplex')
class MultiplexLayer(LayerBase): class MultiplexLayer(LayerBase):
def __init__( def __init__(self, name, inputs, size, device=None):
self, super(MultiplexLayer, self).__init__(
name, name, 'multiplex', size, inputs=inputs, device=device)
inputs, config_assert(
size, len(inputs) > 2, 'MultiplexLayer should have more than 2 inputs.')
device=None):
super(MultiplexLayer, self).__init__(name, 'multiplex', size, inputs=inputs, device=device)
config_assert(len(inputs) > 2,
'MultiplexLayer should have more than 2 inputs.')
for i in range(1, len(inputs)): for i in range(1, len(inputs)):
config_assert(self.get_input_layer(i).size == size, config_assert(
"All the input layers except the first one should" self.get_input_layer(i).size == size,
"have the same size as the MultiplexLayer.") "All the input layers except the first one should"
"have the same size as the MultiplexLayer.")
@config_func @config_func
def Link(name, def Link(
has_subseq=False, name,
): has_subseq=False, ):
link_config = LinkConfig() link_config = LinkConfig()
link_config.link_name = name link_config.link_name = name
link_config.has_subseq = has_subseq link_config.has_subseq = has_subseq
return link_config return link_config
# memory for recurrent layer group. # memory for recurrent layer group.
# *name* and *size* are actual layer's name and size. # *name* and *size* are actual layer's name and size.
# will return name of the memory, # will return name of the memory,
...@@ -2175,43 +2148,46 @@ def Link(name, ...@@ -2175,43 +2148,46 @@ def Link(name,
# can only be initailized by a *boot_layer* which is a sequence. # can only be initailized by a *boot_layer* which is a sequence.
# #
@config_func @config_func
def Memory(name, def Memory(
size, name,
is_sequence=False, size,
boot_layer=None, is_sequence=False,
boot_bias=False, boot_layer=None,
boot_bias_active_type="", boot_bias=False,
boot_with_const_id=None, boot_bias_active_type="",
): boot_with_const_id=None, ):
agent_name = name + "+delay1" agent_name = name + "+delay1"
if is_sequence: if is_sequence:
agent_layer = SequenceAgentLayer(agent_name, size) agent_layer = SequenceAgentLayer(agent_name, size)
else: else:
agent_layer = AgentLayer(agent_name, size) agent_layer = AgentLayer(agent_name, size)
config_assert(g_current_submodel.is_recurrent_layer_group, config_assert(g_current_submodel.is_recurrent_layer_group,
'Memory should be used in recurrent layer group only') 'Memory should be used in recurrent layer group only')
memory = g_current_submodel.memories.add() memory = g_current_submodel.memories.add()
memory.layer_name = MakeLayerNameInSubmodel(name) memory.layer_name = MakeLayerNameInSubmodel(name)
memory.link_name = MakeLayerNameInSubmodel(agent_name) memory.link_name = MakeLayerNameInSubmodel(agent_name)
memory.is_sequence = is_sequence memory.is_sequence = is_sequence
options = sum((boot_layer is not None, options = sum((boot_layer is not None, bool(boot_bias),
bool(boot_bias),
boot_with_const_id is not None)) boot_with_const_id is not None))
config_assert(options <= 1, config_assert(
'take one option at most from boot_layer, boot_bias, or boot_with_const_id') options <= 1,
'take one option at most from boot_layer, boot_bias, or boot_with_const_id'
)
if boot_layer is not None: if boot_layer is not None:
boot_layer = MakeLayerNameInParentSubmodel(boot_layer) boot_layer = MakeLayerNameInParentSubmodel(boot_layer)
config_assert(boot_layer in g_layer_map, config_assert(boot_layer in g_layer_map,
'boot_layer "%s" does not correspond to a layer name' % boot_layer) 'boot_layer "%s" does not correspond to a layer name' %
boot_layer)
memory.boot_layer_name = boot_layer memory.boot_layer_name = boot_layer
elif boot_bias: elif boot_bias:
memory.boot_bias_parameter_name = agent_layer.create_bias_parameter( memory.boot_bias_parameter_name = agent_layer.create_bias_parameter(
boot_bias, size, for_self = False) boot_bias, size, for_self=False)
memory.boot_bias_active_type = boot_bias_active_type memory.boot_bias_active_type = boot_bias_active_type
elif boot_with_const_id is not None: elif boot_with_const_id is not None:
memory.boot_with_const_id = boot_with_const_id memory.boot_with_const_id = boot_with_const_id
return agent_name return agent_name
# Generator for recurrent layer group, to use it: # Generator for recurrent layer group, to use it:
# 1. define a id layer as output of layer group # 1. define a id layer as output of layer group
# 2. define a memory of this id layer, and assign a boot id(begin of sequence) # 2. define a memory of this id layer, and assign a boot id(begin of sequence)
...@@ -2223,11 +2199,10 @@ def Memory(name, ...@@ -2223,11 +2199,10 @@ def Memory(name,
@config_func @config_func
def Generator( def Generator(
max_num_frames, max_num_frames,
eos_layer_name = "eos_check", eos_layer_name="eos_check",
num_results_per_sample = 1, num_results_per_sample=1,
beam_size = 1, beam_size=1,
log_prob = None, log_prob=None, ):
):
generator_config = GeneratorConfig() generator_config = GeneratorConfig()
generator_config.max_num_frames = max_num_frames generator_config.max_num_frames = max_num_frames
generator_config.eos_layer_name = eos_layer_name generator_config.eos_layer_name = eos_layer_name
...@@ -2237,60 +2212,55 @@ def Generator( ...@@ -2237,60 +2212,55 @@ def Generator(
generator_config.log_prob = log_prob generator_config.log_prob = log_prob
return generator_config return generator_config
@config_layer('expand') @config_layer('expand')
class ExpandLayer(LayerBase): class ExpandLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, inputs,
inputs, trans_type='non-seq',
trans_type='non-seq', device=None,
device=None, bias=False):
bias=False): super(ExpandLayer, self).__init__(
super(ExpandLayer, self).__init__( name, 'expand', 0, inputs=inputs, device=device)
name, 'expand', 0, inputs=inputs, device=device) config_assert(
config_assert(len(self.inputs) == 2, len(self.inputs) == 2, 'ExpandLayer takes 2 and only 2 inputs')
'ExpandLayer takes 2 and only 2 inputs') self.config.trans_type = trans_type
self.config.trans_type = trans_type for input_index in xrange(len(self.inputs)):
for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index)
input_layer = self.get_input_layer(input_index) self.set_layer_size(self.get_input_layer(0).size)
self.set_layer_size(self.get_input_layer(0).size) self.create_bias_parameter(bias, self.config.size)
self.create_bias_parameter(bias, self.config.size)
@config_layer('featmap_expand') @config_layer('featmap_expand')
class FeatMapExpandLayer(LayerBase): class FeatMapExpandLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None, num_filters=None, bias=False):
self, super(FeatMapExpandLayer, self).__init__(
name, name, 'featmap_expand', 0, inputs=inputs, device=device)
inputs, config_assert(
device=None, len(self.inputs) == 1, 'ExpandLayer takes 1 and only 1 inputs')
num_filters=None, if num_filters is not None:
bias=False):
super(FeatMapExpandLayer, self).__init__(
name, 'featmap_expand', 0, inputs=inputs, device=device)
config_assert(len(self.inputs) == 1,
'ExpandLayer takes 1 and only 1 inputs')
if num_filters is not None:
self.config.num_filters = num_filters self.config.num_filters = num_filters
else: else:
logger.fatal("FeatMapExpandLayer must specify num_filters.") logger.fatal("FeatMapExpandLayer must specify num_filters.")
self.set_layer_size(self.get_input_layer(0).size * num_filters) self.set_layer_size(self.get_input_layer(0).size * num_filters)
@config_layer('max') @config_layer('max')
class MaxLayer(LayerBase): class MaxLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, inputs,
inputs, trans_type='non-seq',
trans_type='non-seq', active_type='linear',
active_type='linear', device=None,
device=None, bias=False,
bias=False, output_max_index=None):
output_max_index=None): super(MaxLayer, self).__init__(
super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, device=device) name, 'max', 0, inputs=inputs, device=device)
config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input') config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
self.config.trans_type = trans_type self.config.trans_type = trans_type
self.config.active_type = active_type self.config.active_type = active_type
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
...@@ -2301,12 +2271,7 @@ class MaxLayer(LayerBase): ...@@ -2301,12 +2271,7 @@ class MaxLayer(LayerBase):
@config_layer('maxid') @config_layer('maxid')
class MaxIdLayer(LayerBase): class MaxIdLayer(LayerBase):
def __init__( def __init__(self, name, inputs, beam_size=None, device=None):
self,
name,
inputs,
beam_size=None,
device=None):
super(MaxIdLayer, self).__init__( super(MaxIdLayer, self).__init__(
name, 'maxid', 0, inputs=inputs, device=device) name, 'maxid', 0, inputs=inputs, device=device)
config_assert(len(self.inputs) == 1, 'MaxIdLayer must have 1 input') config_assert(len(self.inputs) == 1, 'MaxIdLayer must have 1 input')
...@@ -2324,37 +2289,39 @@ class MaxIdLayer(LayerBase): ...@@ -2324,37 +2289,39 @@ class MaxIdLayer(LayerBase):
@config_layer('eos_id') @config_layer('eos_id')
class EosIdLayer(LayerBase): class EosIdLayer(LayerBase):
def __init__( def __init__(self, name, inputs, eos_id, device=None):
self,
name,
inputs,
eos_id,
device=None):
super(EosIdLayer, self).__init__( super(EosIdLayer, self).__init__(
name, 'eos_id', 0, inputs=inputs, device=device) name, 'eos_id', 0, inputs=inputs, device=device)
config_assert(len(self.inputs) == 1, 'EosIdLayer must have 1 input') config_assert(len(self.inputs) == 1, 'EosIdLayer must have 1 input')
self.set_layer_size(2) # boolean output self.set_layer_size(2) # boolean output
self.config.eos_id = eos_id self.config.eos_id = eos_id
@config_layer('seqlastins') @config_layer('seqlastins')
class SequenceLastInstanceLayer(LayerBase): class SequenceLastInstanceLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, inputs,
inputs, active_type='linear',
active_type='linear', trans_type='non-seq',
trans_type='non-seq', device=None,
device=None, bias=False):
bias=False): super(SequenceLastInstanceLayer, self).__init__(
super(SequenceLastInstanceLayer, self).__init__(name, 'seqlastins', name,
0, inputs=inputs, device=device, active_type=active_type) 'seqlastins',
config_assert(len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input') 0,
self.config.trans_type = trans_type inputs=inputs,
device=device,
active_type=active_type)
config_assert(
len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input')
self.config.trans_type = trans_type
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
@config_layer('seqfirstins') @config_layer('seqfirstins')
class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
def __init__( def __init__(
...@@ -2364,167 +2331,163 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): ...@@ -2364,167 +2331,163 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
active_type='linear', active_type='linear',
trans_type='non-seq', trans_type='non-seq',
device=None, device=None,
bias=False, bias=False, ):
): super(SequenceFirstInstanceLayer, self).__init__(
super(SequenceFirstInstanceLayer, self).__init__(name, name,
inputs=inputs, active_type=active_type, device=device, bias=bias) inputs=inputs,
self.config.trans_type = trans_type active_type=active_type,
device=device,
bias=bias)
self.config.trans_type = trans_type
self.config.select_first = True self.config.select_first = True
@config_layer('seqconcat') @config_layer('seqconcat')
class SequenceConcatLayer(LayerBase): class SequenceConcatLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, inputs,
inputs, active_type='linear',
active_type='linear', device=None,
device=None, bias=False):
bias=False): super(SequenceConcatLayer, self).__init__(
super(SequenceConcatLayer, self).__init__(name, 'seqconcat', name,
0, inputs=inputs, device=device, active_type=active_type) 'seqconcat',
config_assert(len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs') 0,
inputs=inputs,
device=device,
active_type=active_type)
config_assert(
len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs')
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
@config_layer('seqreshape') @config_layer('seqreshape')
class SequenceReshapeLayer(LayerBase): class SequenceReshapeLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, size,
inputs,
active_type='linear',
device=None,
bias=False):
super(SequenceReshapeLayer, self).__init__(
name,
'seqreshape',
size, size,
inputs, inputs=inputs,
active_type='linear', device=device,
device=None, active_type=active_type)
bias=False): config_assert(
super(SequenceReshapeLayer, self).__init__(name, 'seqreshape', len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs')
size, inputs=inputs, device=device, active_type=active_type)
config_assert(len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs')
self.set_layer_size(size) self.set_layer_size(size)
self.create_bias_parameter(bias, size) self.create_bias_parameter(bias, size)
@config_layer('subseq') @config_layer('subseq')
class SubSequenceLayer(LayerBase): class SubSequenceLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, inputs,
inputs, active_type='linear',
active_type='linear', device=None,
device=None, bias=False):
bias=False): super(SubSequenceLayer, self).__init__(
super(SubSequenceLayer, self).__init__(name, 'subseq', name,
0, inputs=inputs, device=device, active_type=active_type) 'subseq',
0,
inputs=inputs,
device=device,
active_type=active_type)
config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs') config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
size = input_layer0.size size = input_layer0.size
self.set_layer_size(size) self.set_layer_size(size)
self.create_bias_parameter(bias, size) self.create_bias_parameter(bias, size)
@config_layer('out_prod') @config_layer('out_prod')
class OuterProdLayer(LayerBase): class OuterProdLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self, super(OuterProdLayer, self).__init__(
name, name, 'out_prod', 0, inputs=inputs, device=device)
inputs,
device=None):
super(OuterProdLayer, self).__init__(name, 'out_prod',
0, inputs=inputs, device=device)
config_assert(len(inputs) == 2, 'OuterProdLayer must have 2 inputs') config_assert(len(inputs) == 2, 'OuterProdLayer must have 2 inputs')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
input_layer1 = self.get_input_layer(1) input_layer1 = self.get_input_layer(1)
self.set_layer_size(input_layer0.size * input_layer1.size) self.set_layer_size(input_layer0.size * input_layer1.size)
@config_layer('power') @config_layer('power')
class PowerLayer(LayerBase): class PowerLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self, super(PowerLayer, self).__init__(
name, name, 'power', 0, inputs=inputs, device=device)
inputs,
device=None):
super(PowerLayer, self).__init__(name, 'power',
0, inputs=inputs, device=device)
config_assert(len(inputs) == 2, 'PowerLayer must have 2 inputs') config_assert(len(inputs) == 2, 'PowerLayer must have 2 inputs')
input_layer1 = self.get_input_layer(1) input_layer1 = self.get_input_layer(1)
self.set_layer_size(input_layer1.size) self.set_layer_size(input_layer1.size)
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
config_assert(1==input_layer0.size, config_assert(1 == input_layer0.size,
'The left input is the exponent and should be of size 1') 'The left input is the exponent and should be of size 1')
@config_layer('slope_intercept') @config_layer('slope_intercept')
class SlopeInterceptLayer(LayerBase): class SlopeInterceptLayer(LayerBase):
def __init__( def __init__(self, name, inputs, slope=1.0, intercept=0.0, device=None):
self, super(SlopeInterceptLayer, self).__init__(
name, name, 'slope_intercept', 0, inputs=inputs, device=device)
inputs,
slope=1.0,
intercept=0.0,
device=None):
super(SlopeInterceptLayer, self).__init__(name, 'slope_intercept',
0, inputs=inputs, device=device)
self.config.slope = slope self.config.slope = slope
self.config.intercept = intercept self.config.intercept = intercept
config_assert(len(inputs) == 1, 'SlopeInterceptLayer must have 1 input') config_assert(len(inputs) == 1, 'SlopeInterceptLayer must have 1 input')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
self.set_layer_size(input_layer0.size) self.set_layer_size(input_layer0.size)
@config_layer('scaling') @config_layer('scaling')
class ScalingLayer(LayerBase): class ScalingLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self, super(ScalingLayer, self).__init__(
name, name, 'scaling', 0, inputs=inputs, device=device)
inputs,
device=None):
super(ScalingLayer, self).__init__(name, 'scaling',
0, inputs=inputs, device=device)
config_assert(len(inputs) == 2, 'ScalingLayer must have 2 inputs') config_assert(len(inputs) == 2, 'ScalingLayer must have 2 inputs')
input_layer1 = self.get_input_layer(1) input_layer1 = self.get_input_layer(1)
self.set_layer_size(input_layer1.size) self.set_layer_size(input_layer1.size)
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
config_assert(1==input_layer0.size, config_assert(1 == input_layer0.size,
'The left input should be of size 1') 'The left input should be of size 1')
@config_layer('conv_shift') @config_layer('conv_shift')
class ConvShiftLayer(LayerBase): class ConvShiftLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self, super(ConvShiftLayer, self).__init__(
name, name, 'conv_shift', 0, inputs=inputs, device=device)
inputs,
device=None):
super(ConvShiftLayer, self).__init__(name, 'conv_shift',
0, inputs=inputs, device=device)
config_assert(len(inputs) == 2, 'ConvShiftLayer must have 2 inputs') config_assert(len(inputs) == 2, 'ConvShiftLayer must have 2 inputs')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
self.set_layer_size(input_layer0.size) self.set_layer_size(input_layer0.size)
@config_layer('convex_comb') @config_layer('convex_comb')
class ConvexCombinationLayer(LayerBase): class ConvexCombinationLayer(LayerBase):
def __init__( def __init__(self, name, size, inputs, device=None):
self,
name,
size,
inputs,
device=None):
super(ConvexCombinationLayer, self).__init__( super(ConvexCombinationLayer, self).__init__(
name, 'convex_comb', size, inputs=inputs, device=device) name, 'convex_comb', size, inputs=inputs, device=device)
config_assert(len(self.inputs) == 2, config_assert(
'ConvexCombinationLayer must have 2 inputs') len(self.inputs) == 2, 'ConvexCombinationLayer must have 2 inputs')
config_assert( config_assert(
size * self.get_input_layer(0).size == self.get_input_layer(1).size, size * self.get_input_layer(0).size == self.get_input_layer(1).size,
'Wrong input size for ConvexCombinationLayer') 'Wrong input size for ConvexCombinationLayer')
self.set_layer_size(size) self.set_layer_size(size)
@config_layer('interpolation') @config_layer('interpolation')
class InterpolationLayer(LayerBase): class InterpolationLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self,
name,
inputs,
device=None):
super(InterpolationLayer, self).__init__( super(InterpolationLayer, self).__init__(
name, 'interpolation', 0, inputs=inputs, device=device) name, 'interpolation', 0, inputs=inputs, device=device)
config_assert(len(self.inputs) == 3, config_assert(
'InterpolationLayer must have 3 inputs') len(self.inputs) == 3, 'InterpolationLayer must have 3 inputs')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
input_layer1 = self.get_input_layer(1) input_layer1 = self.get_input_layer(1)
input_layer2 = self.get_input_layer(2) input_layer2 = self.get_input_layer(2)
...@@ -2533,64 +2496,51 @@ class InterpolationLayer(LayerBase): ...@@ -2533,64 +2496,51 @@ class InterpolationLayer(LayerBase):
config_assert(input_layer1.size == input_layer2.size, config_assert(input_layer1.size == input_layer2.size,
'the two vector inputs should be of the same size') 'the two vector inputs should be of the same size')
@config_layer('bilinear_interp') @config_layer('bilinear_interp')
class BilinearInterpLayer(LayerBase): class BilinearInterpLayer(LayerBase):
def __init__( def __init__(self, name, inputs, **xargs):
self,
name,
inputs,
**xargs):
super(BilinearInterpLayer, self).__init__( super(BilinearInterpLayer, self).__init__(
name, 'bilinear_interp', 0, inputs=inputs, **xargs) name, 'bilinear_interp', 0, inputs=inputs, **xargs)
input_layer = self.get_input_layer(0) input_layer = self.get_input_layer(0)
parse_bilinear(self.inputs[0].bilinear_interp, parse_bilinear(self.inputs[0].bilinear_interp, input_layer.name,
input_layer.name, self.config.inputs[0].bilinear_interp_conf)
self.config.inputs[0].bilinear_interp_conf);
conf = self.inputs[0].bilinear_interp conf = self.inputs[0].bilinear_interp
self.set_layer_size(conf.out_size_x * conf.out_size_y * conf.num_channels) self.set_layer_size(conf.out_size_x * conf.out_size_y *
conf.num_channels)
@config_layer('sum_to_one_norm') @config_layer('sum_to_one_norm')
class SumToOneNormLayer(LayerBase): class SumToOneNormLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self,
name,
inputs,
device=None):
super(SumToOneNormLayer, self).__init__( super(SumToOneNormLayer, self).__init__(
name, 'sum_to_one_norm', 0, inputs=inputs, device=device) name, 'sum_to_one_norm', 0, inputs=inputs, device=device)
config_assert(len(self.inputs) == 1, config_assert(
'SumToOneNormLayer must have 1 input') len(self.inputs) == 1, 'SumToOneNormLayer must have 1 input')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
self.set_layer_size(input_layer0.size) self.set_layer_size(input_layer0.size)
@config_layer('cos_vm') @config_layer('cos_vm')
class CosSimVecMatLayer(LayerBase): class CosSimVecMatLayer(LayerBase):
def __init__( def __init__(self, name, size, inputs, cos_scale=1.0, device=None):
self,
name,
size,
inputs,
cos_scale=1.0,
device=None):
super(CosSimVecMatLayer, self).__init__( super(CosSimVecMatLayer, self).__init__(
name, 'cos_vm', size, inputs=inputs, device=device) name, 'cos_vm', size, inputs=inputs, device=device)
self.config.cos_scale = cos_scale self.config.cos_scale = cos_scale
config_assert(len(self.inputs) == 2, config_assert(
'CosSimVecMatLayer must have 2 inputs') len(self.inputs) == 2, 'CosSimVecMatLayer must have 2 inputs')
config_assert( config_assert(
size * self.get_input_layer(0).size == self.get_input_layer(1).size, size * self.get_input_layer(0).size == self.get_input_layer(1).size,
'Wrong input size for CosSimVecMatLayer') 'Wrong input size for CosSimVecMatLayer')
@config_layer('sampling_id') @config_layer('sampling_id')
class SamplingIdLayer(LayerBase): class SamplingIdLayer(LayerBase):
def __init__( def __init__(self, name, inputs, device=None):
self,
name,
inputs,
device=None):
super(SamplingIdLayer, self).__init__( super(SamplingIdLayer, self).__init__(
name, 'sampling_id', 0, inputs=inputs, device=device) name, 'sampling_id', 0, inputs=inputs, device=device)
config_assert(len(self.inputs) == 1, 'SamplingIdLayer must have 1 input') config_assert(
len(self.inputs) == 1, 'SamplingIdLayer must have 1 input')
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
...@@ -2603,33 +2553,33 @@ class SamplingIdLayer(LayerBase): ...@@ -2603,33 +2553,33 @@ class SamplingIdLayer(LayerBase):
# 'squarerootn': sum each sample, but divide by sqrt(sample_num). # 'squarerootn': sum each sample, but divide by sqrt(sample_num).
@config_layer('average') @config_layer('average')
class AverageLayer(LayerBase): class AverageLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, inputs,
inputs, average_strategy='average',
average_strategy='average', trans_type='non-seq',
trans_type='non-seq', active_type='linear',
active_type='linear', device=None,
device=None, bias=False):
bias=False): super(AverageLayer, self).__init__(
super(AverageLayer, self).__init__(name, 'average', 0, inputs=inputs, name,
device=device, active_type=active_type) 'average',
0,
inputs=inputs,
device=device,
active_type=active_type)
self.config.average_strategy = average_strategy self.config.average_strategy = average_strategy
self.config.trans_type = trans_type self.config.trans_type = trans_type
config_assert(len(inputs) == 1, 'AverageLayer must have 1 input') config_assert(len(inputs) == 1, 'AverageLayer must have 1 input')
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
@config_layer('cos') @config_layer('cos')
class CosSimLayer(LayerBase): class CosSimLayer(LayerBase):
def __init__( def __init__(self, name, inputs, cos_scale=5, device=None):
self,
name,
inputs,
cos_scale=5,
device=None):
super(CosSimLayer, self).__init__( super(CosSimLayer, self).__init__(
name, 'cos', 1, inputs=inputs, device=device) name, 'cos', 1, inputs=inputs, device=device)
config_assert(len(self.inputs) == 2, 'CosSimLayer must have 2 inputs') config_assert(len(self.inputs) == 2, 'CosSimLayer must have 2 inputs')
...@@ -2641,18 +2591,13 @@ class CosSimLayer(LayerBase): ...@@ -2641,18 +2591,13 @@ class CosSimLayer(LayerBase):
@config_layer('tensor') @config_layer('tensor')
class TensorLayer(LayerBase): class TensorLayer(LayerBase):
def __init__( def __init__(self, name, size, inputs, device=None, bias=True, **xargs):
self, super(TensorLayer, self).__init__(
name, name, 'tensor', size, inputs=inputs, device=device, **xargs)
size,
inputs,
device=None,
bias=True,
**xargs):
super(TensorLayer, self).__init__(name, 'tensor', size, inputs=inputs, device=device, **xargs)
config_assert(len(self.inputs) == 2, 'TensorLayer must have 2 inputs') config_assert(len(self.inputs) == 2, 'TensorLayer must have 2 inputs')
config_assert(size > 0, 'size must be positive') config_assert(size > 0, 'size must be positive')
config_assert(inputs[1].parameter_name == None, 'second parameter should be None.') config_assert(inputs[1].parameter_name == None,
'second parameter should be None.')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
input_layer1 = self.get_input_layer(1) input_layer1 = self.get_input_layer(1)
psize = size * input_layer0.size * input_layer1.size psize = size * input_layer0.size * input_layer1.size
...@@ -2663,14 +2608,13 @@ class TensorLayer(LayerBase): ...@@ -2663,14 +2608,13 @@ class TensorLayer(LayerBase):
@config_layer('mixed') @config_layer('mixed')
class MixedLayer(LayerBase): class MixedLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, inputs,
inputs, size=0,
size=0, bias=True,
bias=True, error_clipping_threshold=None,
error_clipping_threshold=None, **xargs):
**xargs):
config_assert(inputs, 'inputs cannot be empty') config_assert(inputs, 'inputs cannot be empty')
super(MixedLayer, self).__init__( super(MixedLayer, self).__init__(
name, 'mixed', size, inputs=inputs, **xargs) name, 'mixed', size, inputs=inputs, **xargs)
...@@ -2695,24 +2639,28 @@ class MixedLayer(LayerBase): ...@@ -2695,24 +2639,28 @@ class MixedLayer(LayerBase):
else: else:
sz = operator.calc_output_size(operator_conf.input_sizes) sz = operator.calc_output_size(operator_conf.input_sizes)
if sz != 0: if sz != 0:
config_assert(sz == self.config.size, config_assert(
"different inputs have different size: %s vs. %s" % sz == self.config.size,
(sz, self.config.size)) "different inputs have different size: %s vs. %s" %
(sz, self.config.size))
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
input = self.inputs[input_index] input = self.inputs[input_index]
if input_index not in operator_input_index: if input_index not in operator_input_index:
config_assert(isinstance(input, Projection), "input should be projection or operation") config_assert(
isinstance(input, Projection),
"input should be projection or operation")
if self.config.size == 0 and isinstance(input, Projection): if self.config.size == 0 and isinstance(input, Projection):
size = input.calc_output_size(input_layer) size = input.calc_output_size(input_layer)
if size != 0: if size != 0:
self.set_layer_size(size) self.set_layer_size(size)
elif isinstance(input, Projection): elif isinstance(input, Projection):
sz = input.calc_output_size(input_layer) sz = input.calc_output_size(input_layer)
if sz != 0: if sz != 0:
config_assert(sz == self.config.size, config_assert(
"different inputs have different size: %s vs. %s" % sz == self.config.size,
(sz, self.config.size)) "different inputs have different size: %s vs. %s" %
(sz, self.config.size))
config_assert(size != 0, "size is not set") config_assert(size != 0, "size is not set")
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
...@@ -2724,7 +2672,8 @@ class MixedLayer(LayerBase): ...@@ -2724,7 +2672,8 @@ class MixedLayer(LayerBase):
input_config = self.config.inputs[input_index] input_config = self.config.inputs[input_index]
input_config.proj_conf.CopyFrom(input.proj_conf) input_config.proj_conf.CopyFrom(input.proj_conf)
input_config.proj_conf.name = gen_parameter_name(name, input_index) input_config.proj_conf.name = gen_parameter_name(name,
input_index)
psize = input.calc_parameter_size(input_layer.size, size) psize = input.calc_parameter_size(input_layer.size, size)
dims = input.calc_parameter_dims(input_layer.size, size) dims = input.calc_parameter_dims(input_layer.size, size)
self.create_input_parameter(input_index, psize, dims) self.create_input_parameter(input_index, psize, dims)
...@@ -2750,21 +2699,16 @@ class MixedLayer(LayerBase): ...@@ -2750,21 +2699,16 @@ class MixedLayer(LayerBase):
if error_clipping_threshold is not None: if error_clipping_threshold is not None:
self.config.error_clipping_threshold = error_clipping_threshold self.config.error_clipping_threshold = error_clipping_threshold
# like MixedLayer, but no bias parameter # like MixedLayer, but no bias parameter
@config_func @config_func
def ExpressionLayer(name, def ExpressionLayer(name, inputs, **xargs):
inputs,
**xargs):
MixedLayer(name, inputs, bias=False, **xargs) MixedLayer(name, inputs, bias=False, **xargs)
@config_layer('concat') @config_layer('concat')
class ConcatenateLayer(LayerBase): class ConcatenateLayer(LayerBase):
def __init__( def __init__(self, name, inputs, bias=False, **xargs):
self,
name,
inputs,
bias=False,
**xargs):
config_assert(inputs, 'inputs cannot be empty') config_assert(inputs, 'inputs cannot be empty')
config_assert(not bias, 'ConcatenateLayer cannot support bias.') config_assert(not bias, 'ConcatenateLayer cannot support bias.')
super(ConcatenateLayer, self).__init__( super(ConcatenateLayer, self).__init__(
...@@ -2773,30 +2717,27 @@ class ConcatenateLayer(LayerBase): ...@@ -2773,30 +2717,27 @@ class ConcatenateLayer(LayerBase):
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
input = self.inputs[input_index] input = self.inputs[input_index]
if self.config.size == 0: if self.config.size == 0:
size += input_layer.size size += input_layer.size
self.set_layer_size(size) self.set_layer_size(size)
# like concat layer, but each input layer was processed by a Projection. # like concat layer, but each input layer was processed by a Projection.
@config_layer('concat2') @config_layer('concat2')
class ConcatenateLayer2(LayerBase): class ConcatenateLayer2(LayerBase):
def __init__( def __init__(self, name, inputs, bias=False, **xargs):
self,
name,
inputs,
bias=False,
**xargs):
config_assert(inputs, 'inputs cannot be empty') config_assert(inputs, 'inputs cannot be empty')
super(ConcatenateLayer2, self).__init__( super(ConcatenateLayer2, self).__init__(
name, 'concat2', 0, inputs=inputs, **xargs) name, 'concat2', 0, inputs=inputs, **xargs)
if isinstance(self.inputs[0], ConvProjection): if isinstance(self.inputs[0], ConvProjection):
for input_index in xrange(len(self.inputs) - 1): for input_index in xrange(len(self.inputs) - 1):
input = self.inputs[input_index + 1] input = self.inputs[input_index + 1]
config_assert(isinstance(input, ConvProjection), config_assert(
"The first input of ConcatenateLayer2 is ConvProjection, " isinstance(input, ConvProjection),
"the other inputs should also be ConvProjection.") "The first input of ConcatenateLayer2 is ConvProjection, "
"the other inputs should also be ConvProjection.")
size = 0 size = 0
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
...@@ -2818,9 +2759,9 @@ class ConcatenateLayer2(LayerBase): ...@@ -2818,9 +2759,9 @@ class ConcatenateLayer2(LayerBase):
input_config.proj_conf.CopyFrom(input.proj_conf) input_config.proj_conf.CopyFrom(input.proj_conf)
input_config.proj_conf.name = gen_parameter_name(name, input_index) input_config.proj_conf.name = gen_parameter_name(name, input_index)
psize = input.calc_parameter_size(input.proj_conf.input_size, psize = input.calc_parameter_size(input.proj_conf.input_size,
input.proj_conf.output_size) input.proj_conf.output_size)
dims = input.calc_parameter_dims(input.proj_conf.input_size, dims = input.calc_parameter_dims(input.proj_conf.input_size,
input.proj_conf.output_size) input.proj_conf.output_size)
self.create_input_parameter(input_index, psize, dims) self.create_input_parameter(input_index, psize, dims)
psize = self.config.size psize = self.config.size
...@@ -2834,16 +2775,12 @@ class ConcatenateLayer2(LayerBase): ...@@ -2834,16 +2775,12 @@ class ConcatenateLayer2(LayerBase):
self.config.bias_size = psize self.config.bias_size = psize
self.create_bias_parameter(bias, psize) self.create_bias_parameter(bias, psize)
@config_layer('recurrent') @config_layer('recurrent')
class RecurrentLayer(LayerBase): class RecurrentLayer(LayerBase):
def __init__( def __init__(self, name, inputs, reversed=False, bias=True, **xargs):
self, super(RecurrentLayer, self).__init__(name, 'recurrent', 0, inputs, **
name, xargs)
inputs,
reversed=False,
bias=True,
**xargs):
super(RecurrentLayer, self).__init__(name, 'recurrent', 0, inputs, **xargs)
config_assert(len(self.inputs) == 1, 'RecurrentLayer must have 1 input') config_assert(len(self.inputs) == 1, 'RecurrentLayer must have 1 input')
input_layer = self.get_input_layer(0) input_layer = self.get_input_layer(0)
size = input_layer.size size = input_layer.size
...@@ -2853,17 +2790,17 @@ class RecurrentLayer(LayerBase): ...@@ -2853,17 +2790,17 @@ class RecurrentLayer(LayerBase):
self.create_input_parameter(0, size * size, dims) self.create_input_parameter(0, size * size, dims)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
@config_layer('lstmemory') @config_layer('lstmemory')
class LstmLayer(LayerBase): class LstmLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, inputs,
inputs, reversed=False,
reversed=False, active_gate_type="sigmoid",
active_gate_type="sigmoid", active_state_type="sigmoid",
active_state_type="sigmoid", bias=True,
bias=True, **xargs):
**xargs):
super(LstmLayer, self).__init__(name, 'lstmemory', 0, inputs, **xargs) super(LstmLayer, self).__init__(name, 'lstmemory', 0, inputs, **xargs)
config_assert(len(self.inputs) == 1, 'LstmLayer must have 1 input') config_assert(len(self.inputs) == 1, 'LstmLayer must have 1 input')
input_layer = self.get_input_layer(0) input_layer = self.get_input_layer(0)
...@@ -2872,117 +2809,126 @@ class LstmLayer(LayerBase): ...@@ -2872,117 +2809,126 @@ class LstmLayer(LayerBase):
size = input_layer.size / 4 size = input_layer.size / 4
self.set_layer_size(size) self.set_layer_size(size)
self.config.reversed = reversed self.config.reversed = reversed
self.config.active_gate_type = active_gate_type self.config.active_gate_type = active_gate_type
self.config.active_state_type = active_state_type self.config.active_state_type = active_state_type
self.create_input_parameter(0, size * size * 4, [size, size, 4]) self.create_input_parameter(0, size * size * 4, [size, size, 4])
#bias includes 3 kinds of peephole, 4 + 3 = 7 #bias includes 3 kinds of peephole, 4 + 3 = 7
self.create_bias_parameter(bias, size * 7) self.create_bias_parameter(bias, size * 7)
@config_layer('lstm_step') @config_layer('lstm_step')
class LstmStepLayer(LayerBase): class LstmStepLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, size,
size, inputs,
inputs, active_gate_type="sigmoid",
active_gate_type="sigmoid", active_state_type="sigmoid",
active_state_type="sigmoid", bias=True,
bias=True, **xargs):
**xargs): super(LstmStepLayer, self).__init__(name, 'lstm_step', size, inputs,
super(LstmStepLayer, self).__init__(name, 'lstm_step', **xargs)
size, inputs, **xargs)
config_assert(len(inputs) == 2, 'LstmStepLayer must have 2 inputs') config_assert(len(inputs) == 2, 'LstmStepLayer must have 2 inputs')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
input_layer1 = self.get_input_layer(1) input_layer1 = self.get_input_layer(1)
config_assert(input_layer0.size == 4 * size, 'input_layer0.size != 4 * layer.size') config_assert(input_layer0.size == 4 * size,
config_assert(input_layer1.size == size, 'input_layer1.size != layer.size') 'input_layer0.size != 4 * layer.size')
self.config.active_gate_type = active_gate_type config_assert(input_layer1.size == size,
'input_layer1.size != layer.size')
self.config.active_gate_type = active_gate_type
self.config.active_state_type = active_state_type self.config.active_state_type = active_state_type
self.create_bias_parameter(bias, size * 3) self.create_bias_parameter(bias, size * 3)
# get the specific output from the input layer. # get the specific output from the input layer.
@config_layer('get_output') @config_layer('get_output')
class GetOutputLayer(LayerBase): class GetOutputLayer(LayerBase):
def __init__( def __init__(self, name, size, inputs):
self, super(GetOutputLayer, self).__init__(name, 'get_output', size, inputs)
name, config_assert(
size, len(self.inputs) == 1, 'GetOutputLayer must have 1 inputs')
inputs):
super(GetOutputLayer, self).__init__(name, 'get_output' , size, inputs)
config_assert(len(self.inputs) == 1, 'GetOutputLayer must have 1 inputs')
inputs = self.inputs[0] inputs = self.inputs[0]
config_assert(inputs.input_layer_argument, config_assert(inputs.input_layer_argument,
'input_layer_argument cannot be empty') 'input_layer_argument cannot be empty')
@config_layer('mdlstmemory') @config_layer('mdlstmemory')
class MDLstmLayer(LayerBase): class MDLstmLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, inputs,
inputs, directions=True,
directions=True, active_gate_type="sigmoid",
active_gate_type="sigmoid", active_state_type="sigmoid",
active_state_type="sigmoid", bias=True,
bias=True, **xargs):
**xargs): super(MDLstmLayer, self).__init__(name, 'mdlstmemory', 0, inputs, **
super(MDLstmLayer, self).__init__(name, 'mdlstmemory', 0, inputs, **xargs) xargs)
config_assert(len(self.inputs) == 1, 'MDLstmLayer must have 1 input') config_assert(len(self.inputs) == 1, 'MDLstmLayer must have 1 input')
input_layer = self.get_input_layer(0) input_layer = self.get_input_layer(0)
dim_num = len(directions) dim_num = len(directions)
#check input_layer.size is divided by (3+dim_num) #check input_layer.size is divided by (3+dim_num)
config_assert(input_layer.size % (3+dim_num) == 0, "size % (dim_num) should be 0!") config_assert(input_layer.size %
size = input_layer.size / (3+dim_num) (3 + dim_num) == 0, "size % (dim_num) should be 0!")
size = input_layer.size / (3 + dim_num)
self.set_layer_size(size) self.set_layer_size(size)
self.config.active_gate_type = active_gate_type self.config.active_gate_type = active_gate_type
self.config.active_state_type = active_state_type self.config.active_state_type = active_state_type
for i in xrange(len(directions)): for i in xrange(len(directions)):
self.config.directions.append(int(directions[i])) self.config.directions.append(int(directions[i]))
self.create_input_parameter(0, size * size * (3+dim_num), [size, size, 3+dim_num]) self.create_input_parameter(0, size * size *
(3 + dim_num), [size, size, 3 + dim_num])
#bias includes 3 kinds of peephole, 3+dim_num+2+dim_num #bias includes 3 kinds of peephole, 3+dim_num+2+dim_num
self.create_bias_parameter(bias, size * (5+2*dim_num)) self.create_bias_parameter(bias, size * (5 + 2 * dim_num))
@config_layer('gated_recurrent') @config_layer('gated_recurrent')
class GatedRecurrentLayer(LayerBase): class GatedRecurrentLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, inputs,
inputs, reversed=False,
reversed=False, active_gate_type="sigmoid",
active_gate_type="sigmoid", bias=True,
bias=True, **xargs):
**xargs): super(GatedRecurrentLayer, self).__init__(name, 'gated_recurrent', 0,
super(GatedRecurrentLayer, self).__init__(name, 'gated_recurrent', 0, inputs, **xargs) inputs, **xargs)
config_assert(len(self.inputs) == 1, 'GatedRecurrentLayer must have 1 input') config_assert(
len(self.inputs) == 1, 'GatedRecurrentLayer must have 1 input')
input_layer = self.get_input_layer(0) input_layer = self.get_input_layer(0)
#check input_layer.size is divided by 3 #check input_layer.size is divided by 3
config_assert(input_layer.size % 3 == 0, "size % 3 should be 0!") config_assert(input_layer.size % 3 == 0, "size % 3 should be 0!")
size = input_layer.size / 3 size = input_layer.size / 3
self.set_layer_size(size) self.set_layer_size(size)
self.config.reversed = reversed self.config.reversed = reversed
self.config.active_gate_type = active_gate_type self.config.active_gate_type = active_gate_type
self.create_input_parameter(0, size * size * 3, [size, size * 3]) self.create_input_parameter(0, size * size * 3, [size, size * 3])
self.create_bias_parameter(bias, size * 3) self.create_bias_parameter(bias, size * 3)
@config_layer('gru_step') @config_layer('gru_step')
class GruStepLayer(LayerBase): class GruStepLayer(LayerBase):
def __init__( def __init__(self,
self, name,
name, size,
size, inputs,
inputs, active_gate_type="sigmoid",
active_gate_type="sigmoid", bias=True,
bias=True, **xargs):
**xargs): super(GruStepLayer, self).__init__(name, 'gru_step', size, inputs, **
super(GruStepLayer, self).__init__(name, 'gru_step', size, inputs, **xargs) xargs)
config_assert(len(self.inputs) == 2, 'GruStepLayer must have 2 input') config_assert(len(self.inputs) == 2, 'GruStepLayer must have 2 input')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
input_layer1 = self.get_input_layer(1) input_layer1 = self.get_input_layer(1)
config_assert(input_layer0.size == 3 * size, 'input_layer0.size != 3 * layer.size') config_assert(input_layer0.size == 3 * size,
config_assert(input_layer1.size == size, 'input_layer1.size != layer.size') 'input_layer0.size != 3 * layer.size')
self.config.active_gate_type = active_gate_type config_assert(input_layer1.size == size,
'input_layer1.size != layer.size')
self.config.active_gate_type = active_gate_type
self.create_input_parameter(0, size * size * 3, [size, size * 3]) self.create_input_parameter(0, size * size * 3, [size, size * 3])
self.create_bias_parameter(bias, size * 3) self.create_bias_parameter(bias, size * 3)
''' '''
A layer for calculating the cost of sequential conditional random field model. A layer for calculating the cost of sequential conditional random field model.
Example: CRFLayer(name="crf_cost", size=label_num, Example: CRFLayer(name="crf_cost", size=label_num,
...@@ -2990,20 +2936,18 @@ class GruStepLayer(LayerBase): ...@@ -2990,20 +2936,18 @@ class GruStepLayer(LayerBase):
where "weight" is optional, one weight for each sequence where "weight" is optional, one weight for each sequence
@param coeff: weight of the layer @param coeff: weight of the layer
''' '''
@config_layer('crf') @config_layer('crf')
class CRFLayer(LayerBase): class CRFLayer(LayerBase):
def __init__( def __init__(self, name, size, inputs, coeff=1.0, device=None):
self,
name,
size,
inputs,
coeff=1.0,
device=None):
super(CRFLayer, self).__init__(name, 'crf', size, inputs, device=device) super(CRFLayer, self).__init__(name, 'crf', size, inputs, device=device)
config_assert(2 <= len(self.inputs) <= 3, 'CRFLayer must have 2 or 3 inputs') config_assert(2 <= len(self.inputs) <= 3,
'CRFLayer must have 2 or 3 inputs')
self.create_input_parameter(0, size * (size + 2), [size, size + 2]) self.create_input_parameter(0, size * (size + 2), [size, size + 2])
self.config.coeff = coeff self.config.coeff = coeff
''' '''
A layer for calculating the decoding sequence of sequential conditional A layer for calculating the decoding sequence of sequential conditional
random field model. random field model.
...@@ -3012,14 +2956,11 @@ class CRFLayer(LayerBase): ...@@ -3012,14 +2956,11 @@ class CRFLayer(LayerBase):
this layer will also calculate error, output_.value[i] is 1 for incorrect this layer will also calculate error, output_.value[i] is 1 for incorrect
decoding or 0 for correct decoding decoding or 0 for correct decoding
''' '''
@config_layer('crf_decoding') @config_layer('crf_decoding')
class CRFDecodingLayer(LayerBase): class CRFDecodingLayer(LayerBase):
def __init__( def __init__(self, name, size, inputs, device=None):
self,
name,
size,
inputs,
device=None):
super(CRFDecodingLayer, self).__init__( super(CRFDecodingLayer, self).__init__(
name, 'crf_decoding', size, inputs, device=device) name, 'crf_decoding', size, inputs, device=device)
config_assert( config_assert(
...@@ -3027,47 +2968,35 @@ class CRFDecodingLayer(LayerBase): ...@@ -3027,47 +2968,35 @@ class CRFDecodingLayer(LayerBase):
'CRFDecodingLayer cannot have more than 2 inputs') 'CRFDecodingLayer cannot have more than 2 inputs')
self.create_input_parameter(0, size * (size + 2), [size, size + 2]) self.create_input_parameter(0, size * (size + 2), [size, size + 2])
@config_layer('ctc') @config_layer('ctc')
class CTCLayer(LayerBase): class CTCLayer(LayerBase):
def __init__( def __init__(self, name, size, inputs, norm_by_times=False, device=None):
self,
name,
size,
inputs,
norm_by_times = False,
device=None):
super(CTCLayer, self).__init__(name, 'ctc', size, inputs, device=device) super(CTCLayer, self).__init__(name, 'ctc', size, inputs, device=device)
self.config.norm_by_times = norm_by_times self.config.norm_by_times = norm_by_times
config_assert(len(self.inputs) == 2, 'CTCLayer must have 2 inputs') config_assert(len(self.inputs) == 2, 'CTCLayer must have 2 inputs')
@config_layer('recurrent_layer_group') @config_layer('recurrent_layer_group')
class RecurrentLayerGroup(LayerBase): class RecurrentLayerGroup(LayerBase):
def __init__( def __init__(self, name, device=None):
self,
name,
device=None):
super(RecurrentLayerGroup, self).__init__( super(RecurrentLayerGroup, self).__init__(
name, 'recurrent_layer_group', 0, inputs=[], device=device) name, 'recurrent_layer_group', 0, inputs=[], device=device)
# Deprecated, use a new layer specific class instead # Deprecated, use a new layer specific class instead
@config_func @config_func
def Layer( def Layer(name, type, **xargs):
name,
type,
**xargs):
layers = {} layers = {}
layers.update(g_cost_map) layers.update(g_cost_map)
layers.update(g_layer_type_map) layers.update(g_layer_type_map)
layer_func = layers.get(type) layer_func = layers.get(type)
config_assert(layer_func, config_assert(layer_func, "layer type '%s' not supported." % type)
"layer type '%s' not supported." % type)
return layer_func(name, **xargs) return layer_func(name, **xargs)
@config_func @config_func
def ParameterHook( def ParameterHook(type, **kwargs):
type,
**kwargs):
if type == 'pruning': if type == 'pruning':
mask_filename = kwargs.get('mask_filename', None) mask_filename = kwargs.get('mask_filename', None)
assert mask_filename is not None assert mask_filename is not None
...@@ -3080,30 +3009,28 @@ def ParameterHook( ...@@ -3080,30 +3009,28 @@ def ParameterHook(
@config_func @config_func
def Parameter( def Parameter(name,
name, size,
size, device,
device, dims,
dims, learning_rate=None,
learning_rate=None, momentum=None,
momentum=None, decay_rate=None,
decay_rate=None, decay_rate_l1=None,
decay_rate_l1=None, initial_mean=None,
initial_mean=None, initial_std=None,
initial_std=None, initial_strategy=None,
initial_strategy=None, initial_smart=None,
initial_smart=None, num_batches_regularization=None,
num_batches_regularization=None, sparse_remote_update=None,
sparse_remote_update=None, sparse_update=None,
sparse_update=None, gradient_clipping_threshold=None,
gradient_clipping_threshold=None, sparse=None,
sparse=None, format=None,
format=None, need_compact=None,
need_compact=None, is_static=None,
is_static=None, is_shared=None,
is_shared=None, update_hooks=None):
update_hooks=None
):
config_assert(name not in g_parameter_map, config_assert(name not in g_parameter_map,
'Duplicated parameter name: ' + name) 'Duplicated parameter name: ' + name)
...@@ -3134,8 +3061,8 @@ def Parameter( ...@@ -3134,8 +3061,8 @@ def Parameter(
para.initial_std = default(initial_std, g_default_initial_std) para.initial_std = default(initial_std, g_default_initial_std)
para.initial_mean = default(initial_mean, g_default_initial_mean) para.initial_mean = default(initial_mean, g_default_initial_mean)
num_batches_regularization = default( num_batches_regularization = default(num_batches_regularization,
num_batches_regularization, g_default_num_batches_regularization) g_default_num_batches_regularization)
if num_batches_regularization is not None: if num_batches_regularization is not None:
para.num_batches_regularization = int(num_batches_regularization) para.num_batches_regularization = int(num_batches_regularization)
...@@ -3145,18 +3072,21 @@ def Parameter( ...@@ -3145,18 +3072,21 @@ def Parameter(
g_config.opt_config.use_sparse_remote_updater = True g_config.opt_config.use_sparse_remote_updater = True
if sparse_update is not None: if sparse_update is not None:
para.sparse_update = sparse_update para.sparse_update = sparse_update
gradient_clipping_threshold = default( gradient_clipping_threshold = default(gradient_clipping_threshold,
gradient_clipping_threshold, g_default_gradient_clipping_threshold) g_default_gradient_clipping_threshold)
if gradient_clipping_threshold is not None: if gradient_clipping_threshold is not None:
para.gradient_clipping_threshold = gradient_clipping_threshold para.gradient_clipping_threshold = gradient_clipping_threshold
para.initial_strategy = default(initial_strategy, g_default_initial_strategy) para.initial_strategy = default(initial_strategy,
g_default_initial_strategy)
para.initial_smart = default(initial_smart, g_default_initial_smart) para.initial_smart = default(initial_smart, g_default_initial_smart)
if para.initial_smart: if para.initial_smart:
para.initial_mean = 0. para.initial_mean = 0.
if len(para.dims) != 0: if len(para.dims) != 0:
para.initial_std = 1. / math.sqrt(para.dims[0]) para.initial_std = 1. / math.sqrt(para.dims[0])
else: else:
print("Use initial_smart, but dims not set. Initial_smart may not be used in this layer") print(
"Use initial_smart, but dims not set. Initial_smart may not be used in this layer"
)
traceback.print_exc() traceback.print_exc()
para.initial_std = 1. / math.sqrt(para.size) para.initial_std = 1. / math.sqrt(para.size)
if g_default_compact_func is not None: if g_default_compact_func is not None:
...@@ -3195,64 +3125,78 @@ def default_initial_std(val): ...@@ -3195,64 +3125,78 @@ def default_initial_std(val):
global g_default_initial_std global g_default_initial_std
g_default_initial_std = val g_default_initial_std = val
@config_func @config_func
def default_initial_mean(val): def default_initial_mean(val):
global g_default_initial_mean global g_default_initial_mean
g_default_initial_mean = val g_default_initial_mean = val
@config_func @config_func
def default_initial_strategy(val): def default_initial_strategy(val):
global g_default_initial_strategy global g_default_initial_strategy
g_default_initial_strategy = val g_default_initial_strategy = val
@config_func @config_func
def default_initial_smart(val): def default_initial_smart(val):
global g_default_initial_smart global g_default_initial_smart
g_default_initial_smart = val g_default_initial_smart = val
@config_func @config_func
def default_momentum(val): def default_momentum(val):
global g_default_momentum global g_default_momentum
g_default_momentum = val g_default_momentum = val
@config_func @config_func
def default_decay_rate(val): def default_decay_rate(val):
global g_default_decay_rate global g_default_decay_rate
g_default_decay_rate = val g_default_decay_rate = val
@config_func @config_func
def default_num_batches_regularization(val): def default_num_batches_regularization(val):
global g_default_num_batches_regularization global g_default_num_batches_regularization
g_default_num_batches_regularization = val g_default_num_batches_regularization = val
@config_func @config_func
def default_gradient_clipping_threshold(val): def default_gradient_clipping_threshold(val):
global g_default_gradient_clipping_threshold global g_default_gradient_clipping_threshold
g_default_gradient_clipping_threshold = val g_default_gradient_clipping_threshold = val
@config_func @config_func
def default_device(val): def default_device(val):
global g_default_device global g_default_device
g_default_device = val g_default_device = val
@config_func @config_func
def default_update_hooks(val): def default_update_hooks(val):
global g_default_update_hooks global g_default_update_hooks
g_default_update_hooks = val g_default_update_hooks = val
@config_func @config_func
def default_compact_func(val): def default_compact_func(val):
global g_default_compact_func global g_default_compact_func
g_default_compact_func = val g_default_compact_func = val
def make_importer(config_dir, config_args): def make_importer(config_dir, config_args):
def Import(config_file, local_args={}): def Import(config_file, local_args={}):
if not config_file.startswith('/'): if not config_file.startswith('/'):
config_file = config_dir + '/' + config_file config_file = config_dir + '/' + config_file
g_config.config_files.append(config_file) g_config.config_files.append(config_file)
execfile(config_file, make_config_environment(config_file, config_args), local_args) execfile(config_file,
make_config_environment(config_file, config_args), local_args)
return Import return Import
settings = dict( settings = dict(
batch_size=None, batch_size=None,
mini_batch_size=None, mini_batch_size=None,
...@@ -3281,26 +3225,24 @@ settings = dict( ...@@ -3281,26 +3225,24 @@ settings = dict(
ada_rou=0.95, ada_rou=0.95,
delta_add_rate=1.0, delta_add_rate=1.0,
shrink_parameter_value=0, shrink_parameter_value=0,
adam_beta1 = 0.9, adam_beta1=0.9,
adam_beta2 = 0.999, adam_beta2=0.999,
adam_epsilon = 1e-8, adam_epsilon=1e-8, )
)
settings_deprecated = dict( settings_deprecated = dict(usage_ratio=1., )
usage_ratio=1.,
)
trainer_settings = dict( trainer_settings = dict(
save_dir="./output/model", save_dir="./output/model",
init_model_path=None, init_model_path=None,
start_pass=0, start_pass=0, )
)
@config_func @config_func
def Settings(**args): def Settings(**args):
for k, v in args.iteritems(): for k, v in args.iteritems():
if k == "usage_ratio": if k == "usage_ratio":
logger.warning("Deprecated: define usage_ratio in DataConfig instead") logger.warning(
"Deprecated: define usage_ratio in DataConfig instead")
if g_config.HasField("data_config"): if g_config.HasField("data_config"):
g_config.data_config.__setattr__(k, v) g_config.data_config.__setattr__(k, v)
settings_deprecated[k] = v settings_deprecated[k] = v
...@@ -3312,10 +3254,12 @@ def Settings(**args): ...@@ -3312,10 +3254,12 @@ def Settings(**args):
else: else:
logger.fatal('Unkown setting: %s' % k) logger.fatal('Unkown setting: %s' % k)
@config_func @config_func
def cluster_config(**args): def cluster_config(**args):
pass pass
@config_func @config_func
def EnableSubmodelSuffix(flag=True): def EnableSubmodelSuffix(flag=True):
""" """
...@@ -3325,10 +3269,12 @@ def EnableSubmodelSuffix(flag=True): ...@@ -3325,10 +3269,12 @@ def EnableSubmodelSuffix(flag=True):
global g_add_submodel_suffix global g_add_submodel_suffix
g_add_submodel_suffix = flag g_add_submodel_suffix = flag
def make_config_environment(config_file, config_args): def make_config_environment(config_file, config_args):
def make_setter(k): def make_setter(k):
def setter(v): def setter(v):
logger.fatal("Obsolete: use Settings(%s=%s, ...) instead" % (k, v)) logger.fatal("Obsolete: use Settings(%s=%s, ...) instead" % (k, v))
return setter return setter
funcs = {} funcs = {}
...@@ -3344,13 +3290,13 @@ def make_config_environment(config_file, config_args): ...@@ -3344,13 +3290,13 @@ def make_config_environment(config_file, config_args):
funcs.update( funcs.update(
Import=make_importer(config_dir, config_args), Import=make_importer(config_dir, config_args),
get_config_arg=make_get_config_arg(config_args), get_config_arg=make_get_config_arg(config_args), )
)
funcs.update(g_extended_config_funcs) funcs.update(g_extended_config_funcs)
return funcs return funcs
def make_get_config_arg(config_args): def make_get_config_arg(config_args):
def get_config_arg(name, type, default=None): def get_config_arg(name, type, default=None):
if type == bool: if type == bool:
...@@ -3367,6 +3313,7 @@ def make_get_config_arg(config_args): ...@@ -3367,6 +3313,7 @@ def make_get_config_arg(config_args):
return get_config_arg return get_config_arg
def importlib(name): def importlib(name):
__import__(name) __import__(name)
return sys.modules[name] return sys.modules[name]
...@@ -3379,10 +3326,12 @@ def find_caller(): ...@@ -3379,10 +3326,12 @@ def find_caller():
return s[0], s[1], s[2] return s[0], s[1], s[2]
return "(unknown file)", 0, "(unknown function)" return "(unknown file)", 0, "(unknown function)"
def my_fatal(s): def my_fatal(s):
logger.critical(s) logger.critical(s)
raise Exception() raise Exception()
def parse_config(config_file, config_arg_str): def parse_config(config_file, config_arg_str):
''' '''
@param config_arg_str: a string of the form var1=val1,var2=val2. It will be @param config_arg_str: a string of the form var1=val1,var2=val2. It will be
...@@ -3420,7 +3369,7 @@ def parse_config(config_file, config_arg_str): ...@@ -3420,7 +3369,7 @@ def parse_config(config_file, config_arg_str):
for k, v in settings.iteritems(): for k, v in settings.iteritems():
if v is None: if v is None:
continue continue
g_config.opt_config.__setattr__(k, v); g_config.opt_config.__setattr__(k, v)
for k, v in trainer_settings.iteritems(): for k, v in trainer_settings.iteritems():
if v is None: if v is None:
...@@ -3447,6 +3396,7 @@ def parse_config_and_serialize(config_file, config_arg_str): ...@@ -3447,6 +3396,7 @@ def parse_config_and_serialize(config_file, config_arg_str):
traceback.print_exc() traceback.print_exc()
raise raise
if __name__ == '__main__': if __name__ == '__main__':
try: try:
config = parse_config(sys.argv[1], '') config = parse_config(sys.argv[1], '')
......
...@@ -17,11 +17,10 @@ from paddle.proto.DataConfig_pb2 import DataConfig ...@@ -17,11 +17,10 @@ from paddle.proto.DataConfig_pb2 import DataConfig
g_config = None g_config = None
def SimpleData( def SimpleData(files=None,
files=None, feat_dim=None,
feat_dim=None, context_len=None,
context_len=None, buffer_capacity=None):
buffer_capacity=None):
data_config = DataConfig() data_config = DataConfig()
data_config.type = 'simple' data_config.type = 'simple'
...@@ -33,6 +32,7 @@ def SimpleData( ...@@ -33,6 +32,7 @@ def SimpleData(
data_config.buffer_capacity = buffer_capacity data_config.buffer_capacity = buffer_capacity
return data_config return data_config
def get_config_funcs(trainer_config): def get_config_funcs(trainer_config):
global g_config global g_config
g_config = trainer_config g_config = trainer_config
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# recurrent_units.py # recurrent_units.py
# Version 2.0 # Version 2.0
# #
...@@ -22,161 +22,175 @@ ...@@ -22,161 +22,175 @@
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
# long short term memory, can be used in recurrent machine # long short term memory, can be used in recurrent machine
# *inputs* must be a list of Projections, for example: # *inputs* must be a list of Projections, for example:
# inputs = [FullMatrixProjection("input_layer_name")], # inputs = [FullMatrixProjection("input_layer_name")],
# *para_prefix* defines parameter names, if the *para_prefix* of # *para_prefix* defines parameter names, if the *para_prefix* of
# two LstmRecurrentUnit is same, they share same parameters # two LstmRecurrentUnit is same, they share same parameters
# *out_memory* can be defined outside if it's used outside # *out_memory* can be defined outside if it's used outside
def LstmRecurrentUnit(name, size, def LstmRecurrentUnit(name,
active_type, state_active_type, gate_active_type, size,
inputs, para_prefix = None, active_type,
error_clipping_threshold = 0, state_active_type,
out_memory = None): gate_active_type,
inputs,
para_prefix=None,
error_clipping_threshold=0,
out_memory=None):
if para_prefix is None: if para_prefix is None:
para_prefix = name para_prefix = name
if out_memory is None: if out_memory is None:
out_memory = Memory(name = name, size = size) out_memory = Memory(name=name, size=size)
state_memory = Memory(name=name + "_" + "state", size=size)
state_memory = Memory(name = name + "_" + "state", size = size)
Layer( Layer(
name = name + "_" + "input_recurrent", name=name + "_" + "input_recurrent",
type = "mixed", type="mixed",
size = size * 4, #(input_s, input_gate, forget_gate, output_gate) size=size * 4, #(input_s, input_gate, forget_gate, output_gate)
error_clipping_threshold = error_clipping_threshold, error_clipping_threshold=error_clipping_threshold,
bias = Bias(initial_std = 0, bias=Bias(
parameter_name = para_prefix + "_input_recurrent.b"), initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"),
inputs = inputs + [ inputs=inputs + [
FullMatrixProjection(out_memory, FullMatrixProjection(
parameter_name = para_prefix + "_input_recurrent.w"), out_memory, parameter_name=para_prefix + "_input_recurrent.w"),
], ], )
)
LstmStepLayer( LstmStepLayer(
name = name, name=name,
size = size, size=size,
bias = Bias(parameter_name = para_prefix + "_check.b"), bias=Bias(parameter_name=para_prefix + "_check.b"),
inputs = [name + "_" + "input_recurrent", state_memory], inputs=[name + "_" + "input_recurrent", state_memory],
active_type = active_type, active_type=active_type,
active_gate_type = gate_active_type, active_gate_type=gate_active_type,
active_state_type = state_active_type, active_state_type=state_active_type, )
)
GetOutputLayer( GetOutputLayer(
name = name + "_" + "state", name=name + "_" + "state",
size = size, size=size,
inputs = Input(name, input_layer_argument = "state"), inputs=Input(
) name, input_layer_argument="state"), )
def LstmRecurrentUnitNaive(name, size,
active_type, state_active_type, gate_active_type, def LstmRecurrentUnitNaive(name,
inputs, para_prefix = None, size,
error_clipping_threshold = 0, active_type,
out_memory = None): state_active_type,
gate_active_type,
if para_prefix is None: inputs,
para_prefix=None,
error_clipping_threshold=0,
out_memory=None):
if para_prefix is None:
para_prefix = name para_prefix = name
if out_memory is None: if out_memory is None:
out_memory = Memory(name = name, size = size) out_memory = Memory(name=name, size=size)
state_memory = Memory(name=name + "_" + "state", size=size)
state_memory = Memory(name = name + "_" + "state", size = size)
Layer( Layer(
name = name + "_" + "input_recurrent", name=name + "_" + "input_recurrent",
type = "mixed", type="mixed",
size = size * 4, #(input_s, input_gate, forget_gate, output_gate) size=size * 4, #(input_s, input_gate, forget_gate, output_gate)
error_clipping_threshold = error_clipping_threshold, error_clipping_threshold=error_clipping_threshold,
bias = Bias(initial_std = 0, bias=Bias(
parameter_name = para_prefix + "_input_recurrent.b"), initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"),
inputs = inputs + [ inputs=inputs + [
FullMatrixProjection(out_memory, FullMatrixProjection(
parameter_name = para_prefix + "_input_recurrent.w"), out_memory, parameter_name=para_prefix + "_input_recurrent.w"),
], ], )
)
ExpressionLayer( ExpressionLayer(
name = name + "_" + "input_s", name=name + "_" + "input_s",
size = size, size=size,
active_type = active_type, active_type=active_type,
inputs = [IdentityOffsetProjection(name + "_" + "input_recurrent", offset=0)], inputs=[
) IdentityOffsetProjection(
name + "_" + "input_recurrent", offset=0)
], )
ExpressionLayer( ExpressionLayer(
name = name + "_" + "input_gate", name=name + "_" + "input_gate",
active_type = gate_active_type, active_type=gate_active_type,
inputs = [IdentityOffsetProjection(name + "_" + "input_recurrent", offset=size), inputs=[
DotMulProjection(state_memory, IdentityOffsetProjection(
parameter_name = para_prefix + "_input_check.w")], name + "_" + "input_recurrent", offset=size), DotMulProjection(
) state_memory, parameter_name=para_prefix + "_input_check.w")
], )
ExpressionLayer( ExpressionLayer(
name = name + "_" + "forget_gate", name=name + "_" + "forget_gate",
active_type = gate_active_type, active_type=gate_active_type,
inputs = [IdentityOffsetProjection(name + "_" + "input_recurrent", offset=size*2), inputs=[
DotMulProjection(state_memory, IdentityOffsetProjection(
parameter_name = para_prefix + "_forget_check.w")], name + "_" + "input_recurrent", offset=size * 2),
) DotMulProjection(
state_memory, parameter_name=para_prefix + "_forget_check.w")
], )
ExpressionLayer( ExpressionLayer(
name = name + "_" + "state", name=name + "_" + "state",
inputs = [DotMulOperator([name + "_" + "input_s", inputs=[
name + "_" + "input_gate"]), DotMulOperator([name + "_" + "input_s", name + "_" + "input_gate"]),
DotMulOperator([state_memory, DotMulOperator([state_memory, name + "_" + "forget_gate"]),
name + "_" + "forget_gate"]), ], )
],
)
ExpressionLayer( ExpressionLayer(
name = name + "_" + "output_gate", name=name + "_" + "output_gate",
active_type = gate_active_type, active_type=gate_active_type,
inputs = [IdentityOffsetProjection(name + "_" + "input_recurrent", offset=size*3), inputs=[
DotMulProjection(name + "_" + "state", IdentityOffsetProjection(
parameter_name = para_prefix + "_output_check.w")], name + "_" + "input_recurrent", offset=size * 3),
) DotMulProjection(
name + "_" + "state",
parameter_name=para_prefix + "_output_check.w")
], )
ExpressionLayer( ExpressionLayer(
name = name + "_" + "state_atv", name=name + "_" + "state_atv",
active_type = state_active_type, active_type=state_active_type,
inputs = IdentityProjection(name + "_" + "state"), inputs=IdentityProjection(name + "_" + "state"), )
)
ExpressionLayer( ExpressionLayer(
name = name, name=name,
inputs = DotMulOperator([name + "_" + "state_atv", inputs=DotMulOperator(
name + "_" + "output_gate"]), [name + "_" + "state_atv", name + "_" + "output_gate"]), )
)
# like LstmRecurrentUnit, but it's a layer group. # like LstmRecurrentUnit, but it's a layer group.
# it is equivalent to LstmLayer # it is equivalent to LstmLayer
def LstmRecurrentLayerGroup(name, size, def LstmRecurrentLayerGroup(name,
active_type, state_active_type, gate_active_type, size,
inputs, para_prefix = None, active_type,
error_clipping_threshold = 0, state_active_type,
seq_reversed = False): gate_active_type,
inputs,
para_prefix=None,
error_clipping_threshold=0,
seq_reversed=False):
input_layer_name = name + "_" + "transform_input" input_layer_name = name + "_" + "transform_input"
Layer( Layer(
name = input_layer_name, name=input_layer_name,
type = "mixed", type="mixed",
size = size * 4, size=size * 4,
active_type = "", active_type="",
bias = False, bias=False,
inputs = inputs, inputs=inputs, )
)
RecurrentLayerGroupBegin(
RecurrentLayerGroupBegin(name + "_layer_group", name + "_layer_group",
in_links = [input_layer_name], in_links=[input_layer_name],
out_links = [name], out_links=[name],
seq_reversed = seq_reversed) seq_reversed=seq_reversed)
LstmRecurrentUnit( LstmRecurrentUnit(
name = name, name=name,
size = size, size=size,
active_type = active_type, active_type=active_type,
state_active_type = state_active_type, state_active_type=state_active_type,
gate_active_type = gate_active_type, gate_active_type=gate_active_type,
inputs = [IdentityProjection(input_layer_name)], inputs=[IdentityProjection(input_layer_name)],
para_prefix = para_prefix, para_prefix=para_prefix,
error_clipping_threshold = error_clipping_threshold, error_clipping_threshold=error_clipping_threshold, )
)
RecurrentLayerGroupEnd(name + "_layer_group") RecurrentLayerGroupEnd(name + "_layer_group")
# gated recurrent unit, can be used in recurrent machine # gated recurrent unit, can be used in recurrent machine
# *inputs* should be a list of Projections, for example: # *inputs* should be a list of Projections, for example:
# inputs = [FullMatrixProjection("input_layer_name")], # inputs = [FullMatrixProjection("input_layer_name")],
...@@ -184,142 +198,157 @@ def LstmRecurrentLayerGroup(name, size, ...@@ -184,142 +198,157 @@ def LstmRecurrentLayerGroup(name, size,
# two GatedRecurrentUnit is same, they share same parameters # two GatedRecurrentUnit is same, they share same parameters
# *out_memory* can be defined outside if it's used outside # *out_memory* can be defined outside if it's used outside
def GatedRecurrentUnit(name, size,
active_type, gate_active_type, def GatedRecurrentUnit(name,
inputs, para_prefix = None, size,
error_clipping_threshold = 0, active_type,
out_memory = None): gate_active_type,
if type_of(inputs) == str: #only used by GatedRecurrentLayerGroup inputs,
para_prefix=None,
error_clipping_threshold=0,
out_memory=None):
if type_of(inputs) == str: #only used by GatedRecurrentLayerGroup
input_layer_name = inputs input_layer_name = inputs
else: else:
input_layer_name = name + "_" + "transform_input" input_layer_name = name + "_" + "transform_input"
Layer( Layer(
name = input_layer_name, name=input_layer_name,
type = "mixed", type="mixed",
size = size * 3, size=size * 3,
active_type = "", active_type="",
bias = False, bias=False,
inputs = inputs, inputs=inputs, )
)
if para_prefix is None:
if para_prefix is None:
para_prefix = name para_prefix = name
if out_memory is None: if out_memory is None:
out_memory = Memory(name = name, size = size) out_memory = Memory(name=name, size=size)
GruStepLayer( GruStepLayer(
name = name, name=name,
size = size, size=size,
bias = Bias(parameter_name = para_prefix + "_gate.b"), bias=Bias(parameter_name=para_prefix + "_gate.b"),
inputs = [input_layer_name, inputs=[
Input(out_memory, parameter_name = para_prefix + "_gate.w")], input_layer_name, Input(
active_type = active_type, out_memory, parameter_name=para_prefix + "_gate.w")
active_gate_type = gate_active_type, ],
) active_type=active_type,
active_gate_type=gate_active_type, )
def GatedRecurrentUnitNaive(name, size,
active_type, gate_active_type,
inputs, para_prefix = None, def GatedRecurrentUnitNaive(name,
error_clipping_threshold = 0, size,
out_memory = None): active_type,
gate_active_type,
if type_of(inputs) == str: #only used by GatedRecurrentLayerGroup inputs,
para_prefix=None,
error_clipping_threshold=0,
out_memory=None):
if type_of(inputs) == str: #only used by GatedRecurrentLayerGroup
input_layer_name = inputs input_layer_name = inputs
else: else:
input_layer_name = name + "_" + "transform_input" input_layer_name = name + "_" + "transform_input"
Layer( Layer(
name = input_layer_name, name=input_layer_name,
type = "mixed", type="mixed",
size = size * 3, size=size * 3,
active_type = "", active_type="",
bias = False, bias=False,
inputs = inputs, inputs=inputs, )
)
if para_prefix is None:
if para_prefix is None:
para_prefix = name para_prefix = name
if out_memory is None: if out_memory is None:
out_memory = Memory(name = name, size = size) out_memory = Memory(name=name, size=size)
Layer( Layer(
name = name + "_" + "update_gate", name=name + "_" + "update_gate",
type = "mixed", type="mixed",
size = size, size=size,
active_type = gate_active_type, active_type=gate_active_type,
error_clipping_threshold = error_clipping_threshold, error_clipping_threshold=error_clipping_threshold,
bias = Bias(initial_std = 0, parameter_name = para_prefix + "_update_gate.b"), bias=Bias(
inputs = [IdentityOffsetProjection(input_layer_name, offset=0), initial_std=0, parameter_name=para_prefix + "_update_gate.b"),
FullMatrixProjection(out_memory, inputs=[
parameter_name = para_prefix + "_update_gate.w")], IdentityOffsetProjection(
) input_layer_name, offset=0), FullMatrixProjection(
out_memory, parameter_name=para_prefix + "_update_gate.w")
], )
Layer( Layer(
name = name + "_" + "reset_gate", name=name + "_" + "reset_gate",
type = "mixed", type="mixed",
size = size, size=size,
active_type = gate_active_type, active_type=gate_active_type,
error_clipping_threshold = error_clipping_threshold, error_clipping_threshold=error_clipping_threshold,
bias = Bias(initial_std = 0, parameter_name = para_prefix + "_reset_gate.b"), bias=Bias(
inputs = [IdentityOffsetProjection(input_layer_name, offset=size), initial_std=0, parameter_name=para_prefix + "_reset_gate.b"),
FullMatrixProjection(out_memory, inputs=[
parameter_name = para_prefix + "_reset_gate.w")], IdentityOffsetProjection(
) input_layer_name, offset=size), FullMatrixProjection(
out_memory, parameter_name=para_prefix + "_reset_gate.w")
], )
ExpressionLayer( ExpressionLayer(
name = name + "_" + "reset_output", name=name + "_" + "reset_output",
inputs = DotMulOperator([out_memory, name + "_" + "reset_gate"]), inputs=DotMulOperator([out_memory, name + "_" + "reset_gate"]), )
)
Layer( Layer(
name = name + "_" + "output_candidate", name=name + "_" + "output_candidate",
type = "mixed", type="mixed",
size = size, size=size,
active_type = active_type, active_type=active_type,
error_clipping_threshold = error_clipping_threshold, error_clipping_threshold=error_clipping_threshold,
bias = Bias(initial_std = 0, parameter_name = para_prefix + "_output_candidate.b"), bias=Bias(
inputs = [IdentityOffsetProjection(input_layer_name, offset=size*2), initial_std=0, parameter_name=para_prefix + "_output_candidate.b"),
FullMatrixProjection(name + "_" + "reset_output", inputs=[
parameter_name = para_prefix + "_output_candidate.w")], IdentityOffsetProjection(
) input_layer_name, offset=size * 2), FullMatrixProjection(
ExpressionLayer( #element-wise interpolation name + "_" + "reset_output",
name = name, parameter_name=para_prefix + "_output_candidate.w")
inputs = [IdentityProjection(out_memory), ], )
DotMulOperator([out_memory, ExpressionLayer( #element-wise interpolation
name + "_" + "update_gate"], scale=-1.0), name=name,
DotMulOperator([name + "_" + "output_candidate", inputs=[
name + "_" + "update_gate"]), IdentityProjection(out_memory),
], DotMulOperator(
) [out_memory, name + "_" + "update_gate"], scale=-1.0),
DotMulOperator(
[name + "_" + "output_candidate", name + "_" + "update_gate"]),
], )
# like GatedRecurrentUnit, but it's a layer group. # like GatedRecurrentUnit, but it's a layer group.
# it is equivalent to GatedRecurrentLayer. # it is equivalent to GatedRecurrentLayer.
def GatedRecurrentLayerGroup(name, size, def GatedRecurrentLayerGroup(name,
active_type, gate_active_type, size,
inputs, para_prefix = None, active_type,
error_clipping_threshold = 0, gate_active_type,
seq_reversed = False): inputs,
para_prefix=None,
error_clipping_threshold=0,
seq_reversed=False):
input_layer_name = name + "_" + "transform_input" input_layer_name = name + "_" + "transform_input"
Layer( Layer(
name = input_layer_name, name=input_layer_name,
type = "mixed", type="mixed",
size = size * 3, size=size * 3,
active_type = "", active_type="",
bias = False, bias=False,
inputs = inputs, inputs=inputs, )
)
RecurrentLayerGroupBegin(
RecurrentLayerGroupBegin(name + "_layer_group", name + "_layer_group",
in_links = [input_layer_name], in_links=[input_layer_name],
out_links = [name], out_links=[name],
seq_reversed = seq_reversed) seq_reversed=seq_reversed)
GatedRecurrentUnit( GatedRecurrentUnit(
name = name, name=name,
size = size, size=size,
active_type = active_type, active_type=active_type,
gate_active_type = gate_active_type, gate_active_type=gate_active_type,
inputs = input_layer_name, #transform outside inputs=input_layer_name, #transform outside
para_prefix = para_prefix, para_prefix=para_prefix,
error_clipping_threshold = error_clipping_threshold, error_clipping_threshold=error_clipping_threshold, )
)
RecurrentLayerGroupEnd(name + "_layer_group") RecurrentLayerGroupEnd(name + "_layer_group")
...@@ -12,13 +12,12 @@ ...@@ -12,13 +12,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ["TanhActivation", "SigmoidActivation", __all__ = [
"SoftmaxActivation", "IdentityActivation", "LinearActivation", "TanhActivation", "SigmoidActivation", "SoftmaxActivation",
'SequenceSoftmaxActivation', 'ExpActivation', "IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation',
"ReluActivation", "BReluActivation", "SoftReluActivation", 'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation",
"STanhActivation", "STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation"
"AbsActivation", "SquareActivation", ]
"BaseActivation"]
class BaseActivation(object): class BaseActivation(object):
...@@ -51,7 +50,8 @@ class TanhActivation(BaseActivation): ...@@ -51,7 +50,8 @@ class TanhActivation(BaseActivation):
f(z)=tanh(z)=\\frac{e^z-e^{-z}}{e^z+e^{-z}} f(z)=tanh(z)=\\frac{e^z-e^{-z}}{e^z+e^{-z}}
""" """
def __init__(self): BaseActivation.__init__(self, 'tanh', True) def __init__(self):
BaseActivation.__init__(self, 'tanh', True)
class SigmoidActivation(BaseActivation): class SigmoidActivation(BaseActivation):
...@@ -63,7 +63,8 @@ class SigmoidActivation(BaseActivation): ...@@ -63,7 +63,8 @@ class SigmoidActivation(BaseActivation):
f(z) = \\frac{1}{1+exp(-z)} f(z) = \\frac{1}{1+exp(-z)}
""" """
def __init__(self): BaseActivation.__init__(self, 'sigmoid', True) def __init__(self):
BaseActivation.__init__(self, 'sigmoid', True)
class SoftmaxActivation(BaseActivation): class SoftmaxActivation(BaseActivation):
...@@ -104,7 +105,8 @@ class IdentityActivation(BaseActivation): ...@@ -104,7 +105,8 @@ class IdentityActivation(BaseActivation):
Just do nothing for output both forward/backward. Just do nothing for output both forward/backward.
""" """
def __init__(self): BaseActivation.__init__(self, '', False) def __init__(self):
BaseActivation.__init__(self, '', False)
LinearActivation = IdentityActivation LinearActivation = IdentityActivation
...@@ -124,7 +126,8 @@ class ReluActivation(BaseActivation): ...@@ -124,7 +126,8 @@ class ReluActivation(BaseActivation):
0 &\\quad\\mathrm{otherwize} 0 &\\quad\\mathrm{otherwize}
""" """
def __init__(self): BaseActivation.__init__(self, 'relu', True) def __init__(self):
BaseActivation.__init__(self, 'relu', True)
class BReluActivation(BaseActivation): class BReluActivation(BaseActivation):
...@@ -141,7 +144,8 @@ class BReluActivation(BaseActivation): ...@@ -141,7 +144,8 @@ class BReluActivation(BaseActivation):
0 &\\quad \\mathrm{otherwise} 0 &\\quad \\mathrm{otherwise}
""" """
def __init__(self): BaseActivation.__init__(self, 'brelu', False) def __init__(self):
BaseActivation.__init__(self, 'brelu', False)
class SoftReluActivation(BaseActivation): class SoftReluActivation(BaseActivation):
...@@ -149,7 +153,9 @@ class SoftReluActivation(BaseActivation): ...@@ -149,7 +153,9 @@ class SoftReluActivation(BaseActivation):
SoftRelu Activation. SoftRelu Activation.
""" """
def __init__(self): BaseActivation.__init__(self, 'softrelu', False) def __init__(self):
BaseActivation.__init__(self, 'softrelu', False)
class STanhActivation(BaseActivation): class STanhActivation(BaseActivation):
""" """
...@@ -160,7 +166,8 @@ class STanhActivation(BaseActivation): ...@@ -160,7 +166,8 @@ class STanhActivation(BaseActivation):
f(z) = 1.7159 * tanh(2/3*z) f(z) = 1.7159 * tanh(2/3*z)
""" """
def __init__(self): BaseActivation.__init__(self, 'stanh', False) def __init__(self):
BaseActivation.__init__(self, 'stanh', False)
class AbsActivation(BaseActivation): class AbsActivation(BaseActivation):
...@@ -178,7 +185,8 @@ class AbsActivation(BaseActivation): ...@@ -178,7 +185,8 @@ class AbsActivation(BaseActivation):
0 &\\quad if \\quad z = 0 0 &\\quad if \\quad z = 0
""" """
def __init__(self): BaseActivation.__init__(self, 'abs', False) def __init__(self):
BaseActivation.__init__(self, 'abs', False)
class SquareActivation(BaseActivation): class SquareActivation(BaseActivation):
...@@ -189,7 +197,9 @@ class SquareActivation(BaseActivation): ...@@ -189,7 +197,9 @@ class SquareActivation(BaseActivation):
f(z) = z^2. f(z) = z^2.
""" """
def __init__(self): BaseActivation.__init__(self, 'square', False) def __init__(self):
BaseActivation.__init__(self, 'square', False)
class ExpActivation(BaseActivation): class ExpActivation(BaseActivation):
""" """
...@@ -198,7 +208,10 @@ class ExpActivation(BaseActivation): ...@@ -198,7 +208,10 @@ class ExpActivation(BaseActivation):
.. math:: .. math::
f(z) = e^z. f(z) = e^z.
""" """
def __init__(self): BaseActivation.__init__(self, 'exponential', False)
def __init__(self):
BaseActivation.__init__(self, 'exponential', False)
class LogActivation(BaseActivation): class LogActivation(BaseActivation):
""" """
...@@ -207,4 +220,6 @@ class LogActivation(BaseActivation): ...@@ -207,4 +220,6 @@ class LogActivation(BaseActivation):
.. math:: .. math::
f(z) = log(z) f(z) = log(z)
""" """
def __init__(self): BaseActivation.__init__(self, 'log', False)
def __init__(self):
BaseActivation.__init__(self, 'log', False)
...@@ -13,8 +13,9 @@ ...@@ -13,8 +13,9 @@
# limitations under the License. # limitations under the License.
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
__all__ = ['ParamAttr', 'ExtraAttr', 'ParameterAttribute', __all__ = [
'ExtraLayerAttribute'] 'ParamAttr', 'ExtraAttr', 'ParameterAttribute', 'ExtraLayerAttribute'
]
def convert_and_compare(x, Type): def convert_and_compare(x, Type):
...@@ -25,7 +26,8 @@ def convert_and_compare(x, Type): ...@@ -25,7 +26,8 @@ def convert_and_compare(x, Type):
:param Type: target type to check x over :param Type: target type to check x over
""" """
return type(x)(Type(x))==x return type(x)(Type(x)) == x
def is_compatible_with(x, Type): def is_compatible_with(x, Type):
""" """
...@@ -38,9 +40,9 @@ def is_compatible_with(x, Type): ...@@ -38,9 +40,9 @@ def is_compatible_with(x, Type):
return True return True
try: try:
if float == Type or int == Type: if float == Type or int == Type:
# avoid those types that can be converted to float/int but not very # avoid those types that can be converted to float/int but not very
# meaningful and could potentially lead to error # meaningful and could potentially lead to error
# i.e., str and bool typed value should not be used for initializing float/int variable # i.e., str and bool typed value should not be used for initializing float/int variable
if not isinstance(x, str) and not isinstance(x, bool): if not isinstance(x, str) and not isinstance(x, bool):
return convert_and_compare(x, Type) return convert_and_compare(x, Type)
elif bool == Type: elif bool == Type:
...@@ -91,9 +93,17 @@ class ParameterAttribute(object): ...@@ -91,9 +93,17 @@ class ParameterAttribute(object):
:type sparse_update: bool :type sparse_update: bool
""" """
def __init__(self, name=None, is_static=False, initial_std=None, def __init__(self,
initial_mean=None, initial_max=None, initial_min=None, name=None,
l1_rate=None, l2_rate=None, learning_rate=None, momentum=None, is_static=False,
initial_std=None,
initial_mean=None,
initial_max=None,
initial_min=None,
l1_rate=None,
l2_rate=None,
learning_rate=None,
momentum=None,
sparse_update=False): sparse_update=False):
# initialize strategy. # initialize strategy.
if is_static: if is_static:
...@@ -183,7 +193,10 @@ class ExtraLayerAttribute(object): ...@@ -183,7 +193,10 @@ class ExtraLayerAttribute(object):
:type device: int :type device: int
""" """
def __init__(self, error_clipping_threshold=None, drop_rate=None, device=None): def __init__(self,
error_clipping_threshold=None,
drop_rate=None,
device=None):
self.attr = dict() self.attr = dict()
if isinstance(error_clipping_threshold, float): if isinstance(error_clipping_threshold, float):
assert error_clipping_threshold > 0 assert error_clipping_threshold > 0
...@@ -200,8 +213,8 @@ class ExtraLayerAttribute(object): ...@@ -200,8 +213,8 @@ class ExtraLayerAttribute(object):
for key in self.attr: for key in self.attr:
if not hasattr(self, 'can_%s' % key) or \ if not hasattr(self, 'can_%s' % key) or \
not getattr(self, 'can_%s' % key): not getattr(self, 'can_%s' % key):
raise NotImplementedError( raise NotImplementedError("Layer %s cannot support %s" %
"Layer %s cannot support %s" % (layer_name, key)) (layer_name, key))
@staticmethod @staticmethod
def to_kwargs(attr): def to_kwargs(attr):
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Data Sources are helpers to define paddle training data or testing data. Data Sources are helpers to define paddle training data or testing data.
""" """
...@@ -26,8 +25,12 @@ except ImportError: ...@@ -26,8 +25,12 @@ except ImportError:
__all__ = ['define_py_data_sources2'] __all__ = ['define_py_data_sources2']
def define_py_data_source(file_list, cls, module, def define_py_data_source(file_list,
obj, args=None, async=False, cls,
module,
obj,
args=None,
async=False,
data_cls=PyData): data_cls=PyData):
""" """
Define a python data source. Define a python data source.
...@@ -76,8 +79,9 @@ def define_py_data_source(file_list, cls, module, ...@@ -76,8 +79,9 @@ def define_py_data_source(file_list, cls, module,
args = pickle.dumps(args, 0) args = pickle.dumps(args, 0)
if data_cls is None: if data_cls is None:
def py_data2(files, load_data_module, load_data_object, load_data_args, def py_data2(files, load_data_module, load_data_object, load_data_args,
**kwargs): **kwargs):
data = DataBase() data = DataBase()
data.type = 'py2' data.type = 'py2'
data.files = files data.files = files
...@@ -86,17 +90,25 @@ def define_py_data_source(file_list, cls, module, ...@@ -86,17 +90,25 @@ def define_py_data_source(file_list, cls, module,
data.load_data_args = load_data_args data.load_data_args = load_data_args
data.async_load_data = True data.async_load_data = True
return data return data
data_cls = py_data2
cls(data_cls(files=file_list,
load_data_module=module,
load_data_object=obj,
load_data_args=args,
async_load_data=async))
data_cls = py_data2
def define_py_data_sources(train_list, test_list, module, obj, args=None, cls(
train_async=False, data_cls=PyData): data_cls(
files=file_list,
load_data_module=module,
load_data_object=obj,
load_data_args=args,
async_load_data=async))
def define_py_data_sources(train_list,
test_list,
module,
obj,
args=None,
train_async=False,
data_cls=PyData):
""" """
The annotation is almost the same as define_py_data_sources2, except that The annotation is almost the same as define_py_data_sources2, except that
it can specific train_async and data_cls. it can specific train_async and data_cls.
...@@ -125,8 +137,8 @@ def define_py_data_sources(train_list, test_list, module, obj, args=None, ...@@ -125,8 +137,8 @@ def define_py_data_sources(train_list, test_list, module, obj, args=None,
""" """
def __is_splitable__(o): def __is_splitable__(o):
return (isinstance(o, list) or isinstance(o, tuple) return (isinstance(o, list) or
) and hasattr(o, '__len__') and len(o) == 2 isinstance(o, tuple)) and hasattr(o, '__len__') and len(o) == 2
assert train_list is not None or test_list is not None assert train_list is not None or test_list is not None
assert module is not None and obj is not None assert module is not None and obj is not None
...@@ -196,9 +208,10 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None): ...@@ -196,9 +208,10 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None):
:return: None :return: None
:rtype: None :rtype: None
""" """
define_py_data_sources(train_list=train_list, define_py_data_sources(
test_list=test_list, train_list=train_list,
module=module, test_list=test_list,
obj=obj, module=module,
args=args, obj=obj,
data_cls=None) args=args,
data_cls=None)
...@@ -18,16 +18,18 @@ from .attrs import ParamAttr ...@@ -18,16 +18,18 @@ from .attrs import ParamAttr
from .activations import TanhActivation from .activations import TanhActivation
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
__all__ = ['wrap_name_default', 'wrap_param_attr_default', __all__ = [
'wrap_bias_attr_default', 'wrap_act_default', 'wrap_name_default', 'wrap_param_attr_default', 'wrap_bias_attr_default',
'wrap_param_default'] 'wrap_act_default', 'wrap_param_default'
]
def __default_not_set_callback__(kwargs, name): def __default_not_set_callback__(kwargs, name):
return name not in kwargs or kwargs[name] is None return name not in kwargs or kwargs[name] is None
def wrap_param_default(param_names=None, default_factory=None, def wrap_param_default(param_names=None,
default_factory=None,
not_set_callback=__default_not_set_callback__): not_set_callback=__default_not_set_callback__):
assert param_names is not None assert param_names is not None
assert isinstance(param_names, list) or isinstance(param_names, tuple) assert isinstance(param_names, list) or isinstance(param_names, tuple)
...@@ -43,7 +45,8 @@ def wrap_param_default(param_names=None, default_factory=None, ...@@ -43,7 +45,8 @@ def wrap_param_default(param_names=None, default_factory=None,
if argspec.defaults: if argspec.defaults:
num_positional -= len(argspec.defaults) num_positional -= len(argspec.defaults)
if not argspec.varargs and len(args) > num_positional: if not argspec.varargs and len(args) > num_positional:
logger.fatal("Must use keyword arguments for non-positional args") logger.fatal(
"Must use keyword arguments for non-positional args")
for name in param_names: for name in param_names:
if not_set_callback(kwargs, name): # Not set if not_set_callback(kwargs, name): # Not set
kwargs[name] = default_factory(func) kwargs[name] = default_factory(func)
...@@ -112,13 +115,13 @@ def wrap_param_attr_default(param_names=None, default_factory=None): ...@@ -112,13 +115,13 @@ def wrap_param_attr_default(param_names=None, default_factory=None):
return wrap_param_default(param_names, default_factory) return wrap_param_default(param_names, default_factory)
def wrap_bias_attr_default(param_names=None, default_factory=None, def wrap_bias_attr_default(param_names=None,
default_factory=None,
has_bias=True): has_bias=True):
if param_names is None: if param_names is None:
param_names = ['bias_attr'] param_names = ['bias_attr']
if default_factory is None: if default_factory is None:
default_factory = lambda _: ParamAttr(initial_std=0., default_factory = lambda _: ParamAttr(initial_std=0., initial_mean=0.)
initial_mean=0.)
def __bias_attr_not_set__(kwargs, name): def __bias_attr_not_set__(kwargs, name):
if has_bias: if has_bias:
......
...@@ -15,13 +15,14 @@ ...@@ -15,13 +15,14 @@
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
from default_decorators import * from default_decorators import *
__all__ = ["evaluator_base","classification_error_evaluator", "auc_evaluator", __all__ = [
"pnpair_evaluator", "precision_recall_evaluator", "evaluator_base", "classification_error_evaluator", "auc_evaluator",
"ctc_error_evaluator", "chunk_evaluator", "sum_evaluator", "pnpair_evaluator", "precision_recall_evaluator", "ctc_error_evaluator",
"column_sum_evaluator", "value_printer_evaluator", "chunk_evaluator", "sum_evaluator", "column_sum_evaluator",
"gradient_printer_evaluator", "maxid_printer_evaluator", "value_printer_evaluator", "gradient_printer_evaluator",
"maxframe_printer_evaluator", "seqtext_printer_evaluator", "maxid_printer_evaluator", "maxframe_printer_evaluator",
"classification_error_printer_evaluator"] "seqtext_printer_evaluator", "classification_error_printer_evaluator"
]
class EvaluatorAttribute(object): class EvaluatorAttribute(object):
...@@ -32,10 +33,7 @@ class EvaluatorAttribute(object): ...@@ -32,10 +33,7 @@ class EvaluatorAttribute(object):
FOR_UTILS = 1 << 4 FOR_UTILS = 1 << 4
KEYS = [ KEYS = [
"for_classification", "for_classification", "for_regression", "for_rank", "for_print",
"for_regression",
"for_rank",
"for_print",
"for_utils" "for_utils"
] ]
...@@ -55,22 +53,23 @@ def evaluator(*attrs): ...@@ -55,22 +53,23 @@ def evaluator(*attrs):
setattr(method, EvaluatorAttribute.to_key(attr), True) setattr(method, EvaluatorAttribute.to_key(attr), True)
method.is_evaluator = True method.is_evaluator = True
return method return method
return impl return impl
def evaluator_base(
input, def evaluator_base(input,
type, type,
label=None, label=None,
weight=None, weight=None,
name=None, name=None,
chunk_scheme=None, chunk_scheme=None,
num_chunk_types=None, num_chunk_types=None,
classification_threshold=None, classification_threshold=None,
positive_label=None, positive_label=None,
dict_file=None, dict_file=None,
result_file=None, result_file=None,
num_results=None, num_results=None,
delimited=None): delimited=None):
""" """
Evaluator will evaluate the network status while training/testing. Evaluator will evaluate the network status while training/testing.
...@@ -130,14 +129,14 @@ def evaluator_base( ...@@ -130,14 +129,14 @@ def evaluator_base(
result_file=result_file, result_file=result_file,
delimited=delimited) delimited=delimited)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default() @wrap_name_default()
def classification_error_evaluator( def classification_error_evaluator(input,
input, label,
label, name=None,
name=None, weight=None,
weight=None, threshold=None):
threshold=None):
""" """
Classification Error Evaluator. It will print error rate for classification. Classification Error Evaluator. It will print error rate for classification.
...@@ -170,13 +169,14 @@ def classification_error_evaluator( ...@@ -170,13 +169,14 @@ def classification_error_evaluator(
:return: None. :return: None.
""" """
evaluator_base(name=name, evaluator_base(
type="classification_error", name=name,
input=input, type="classification_error",
label=label, input=input,
weight=weight, label=label,
classification_threshold=threshold, weight=weight,
) classification_threshold=threshold, )
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default() @wrap_name_default()
...@@ -184,8 +184,7 @@ def auc_evaluator( ...@@ -184,8 +184,7 @@ def auc_evaluator(
input, input,
label, label,
name=None, name=None,
weight=None, weight=None, ):
):
""" """
Auc Evaluator which adapts to binary classification. Auc Evaluator which adapts to binary classification.
...@@ -205,11 +204,13 @@ def auc_evaluator( ...@@ -205,11 +204,13 @@ def auc_evaluator(
[sample_num, 1]. [sample_num, 1].
:type weight: LayerOutput :type weight: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(
type="last-column-auc", name=name,
input=input, type="last-column-auc",
label=label, input=input,
weight=weight) label=label,
weight=weight)
@evaluator(EvaluatorAttribute.FOR_RANK) @evaluator(EvaluatorAttribute.FOR_RANK)
@wrap_name_default() @wrap_name_default()
...@@ -218,8 +219,7 @@ def pnpair_evaluator( ...@@ -218,8 +219,7 @@ def pnpair_evaluator(
label, label,
info, info,
name=None, name=None,
weight=None, weight=None, ):
):
""" """
Positive-negative pair rate Evaluator which adapts to rank task like Positive-negative pair rate Evaluator which adapts to rank task like
learning to rank. This evaluator must contain at least three layers. learning to rank. This evaluator must contain at least three layers.
...@@ -242,12 +242,14 @@ def pnpair_evaluator( ...@@ -242,12 +242,14 @@ def pnpair_evaluator(
[sample_num, 1]. (TODO, explaination) [sample_num, 1]. (TODO, explaination)
:type weight: LayerOutput :type weight: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(
type="pnpair", name=name,
input=input, type="pnpair",
label=label, input=input,
info=info, label=label,
weight=weight) info=info,
weight=weight)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default() @wrap_name_default()
...@@ -256,8 +258,7 @@ def precision_recall_evaluator( ...@@ -256,8 +258,7 @@ def precision_recall_evaluator(
label, label,
positive_label=None, positive_label=None,
weight=None, weight=None,
name=None, name=None, ):
):
""" """
An Evaluator to calculate precision and recall, F1-score. An Evaluator to calculate precision and recall, F1-score.
It is adapt to the task with multiple labels. It is adapt to the task with multiple labels.
...@@ -286,20 +287,21 @@ def precision_recall_evaluator( ...@@ -286,20 +287,21 @@ def precision_recall_evaluator(
[sample_num, 1]. (TODO, explaination) [sample_num, 1]. (TODO, explaination)
:type weight: LayerOutput :type weight: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(
type="precision_recall", name=name,
input=input, type="precision_recall",
label=label, input=input,
positive_label=positive_label, label=label,
weight=weight) positive_label=positive_label,
weight=weight)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default() @wrap_name_default()
def ctc_error_evaluator( def ctc_error_evaluator(
input, input,
label, label,
name=None, name=None, ):
):
""" """
This evaluator is to calculate sequence-to-sequence edit distance. This evaluator is to calculate sequence-to-sequence edit distance.
...@@ -317,10 +319,9 @@ def ctc_error_evaluator( ...@@ -317,10 +319,9 @@ def ctc_error_evaluator(
label for ctc_layer label for ctc_layer
:type label: LayerOutput :type label: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(
type="ctc_edit_distance", name=name, type="ctc_edit_distance", input=input, label=label)
input=input,
label=label)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default() @wrap_name_default()
...@@ -328,8 +329,7 @@ def chunk_evaluator( ...@@ -328,8 +329,7 @@ def chunk_evaluator(
input, input,
name=None, name=None,
chunk_scheme=None, chunk_scheme=None,
num_chunk_types=None, num_chunk_types=None, ):
):
""" """
Chunk evaluator is used to evaluate segment labelling accuracy for a Chunk evaluator is used to evaluate segment labelling accuracy for a
sequence. It calculates the chunk detection F1 score. sequence. It calculates the chunk detection F1 score.
...@@ -375,19 +375,20 @@ def chunk_evaluator( ...@@ -375,19 +375,20 @@ def chunk_evaluator(
:type chunk_scheme: basestring :type chunk_scheme: basestring
:param num_chunk_types: number of chunk types other than "other" :param num_chunk_types: number of chunk types other than "other"
""" """
evaluator_base(name=name, evaluator_base(
type="chunk", name=name,
input=input, type="chunk",
chunk_scheme=chunk_scheme, input=input,
num_chunk_types=num_chunk_types) chunk_scheme=chunk_scheme,
num_chunk_types=num_chunk_types)
@evaluator(EvaluatorAttribute.FOR_UTILS) @evaluator(EvaluatorAttribute.FOR_UTILS)
@wrap_name_default() @wrap_name_default()
def sum_evaluator( def sum_evaluator(
input, input,
name=None, name=None,
weight=None, weight=None, ):
):
""" """
An Evaluator to sum the result of input. An Evaluator to sum the result of input.
...@@ -405,18 +406,15 @@ def sum_evaluator( ...@@ -405,18 +406,15 @@ def sum_evaluator(
[sample_num, 1]. (TODO, explaination) [sample_num, 1]. (TODO, explaination)
:type weight: LayerOutput :type weight: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(name=name, type="sum", input=input, weight=weight)
type="sum",
input=input,
weight=weight)
@evaluator(EvaluatorAttribute.FOR_UTILS) @evaluator(EvaluatorAttribute.FOR_UTILS)
@wrap_name_default() @wrap_name_default()
def column_sum_evaluator( def column_sum_evaluator(
input, input,
name=None, name=None,
weight=None, weight=None, ):
):
""" """
This Evaluator is used to sum the last column of input. This Evaluator is used to sum the last column of input.
...@@ -431,22 +429,22 @@ def column_sum_evaluator( ...@@ -431,22 +429,22 @@ def column_sum_evaluator(
:param input: Input Layer name. :param input: Input Layer name.
:type input: LayerOutput :type input: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(
type="last-column-sum", name=name, type="last-column-sum", input=input, weight=weight)
input=input,
weight=weight)
""" """
The following are printer Evaluators which are usually used to The following are printer Evaluators which are usually used to
print the result, like value or gradient of input layers, the print the result, like value or gradient of input layers, the
results generated in machine translation, the classification error etc. results generated in machine translation, the classification error etc.
""" """
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def value_printer_evaluator( def value_printer_evaluator(
input, input,
name=None, name=None, ):
):
""" """
This Evaluator is used to print the values of input layers. It contains This Evaluator is used to print the values of input layers. It contains
one or more input layers. one or more input layers.
...@@ -462,16 +460,14 @@ def value_printer_evaluator( ...@@ -462,16 +460,14 @@ def value_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
evaluator_base(name=name, evaluator_base(name=name, type="value_printer", input=input)
type="value_printer",
input=input)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def gradient_printer_evaluator( def gradient_printer_evaluator(
input, input,
name=None, name=None, ):
):
""" """
This Evaluator is used to print the gradient of input layers. It contains This Evaluator is used to print the gradient of input layers. It contains
one or more input layers. one or more input layers.
...@@ -487,17 +483,15 @@ def gradient_printer_evaluator( ...@@ -487,17 +483,15 @@ def gradient_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
evaluator_base(name=name, evaluator_base(name=name, type="gradient_printer", input=input)
type="gradient_printer",
input=input)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def maxid_printer_evaluator( def maxid_printer_evaluator(
input, input,
num_results=None, num_results=None,
name=None, name=None, ):
):
""" """
This Evaluator is used to print maximum top k values and their indexes This Evaluator is used to print maximum top k values and their indexes
of each row of input layers. It contains one or more input layers. of each row of input layers. It contains one or more input layers.
...@@ -517,18 +511,16 @@ def maxid_printer_evaluator( ...@@ -517,18 +511,16 @@ def maxid_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
evaluator_base(name=name, evaluator_base(
type="max_id_printer", name=name, type="max_id_printer", input=input, num_results=num_results)
input=input,
num_results=num_results)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def maxframe_printer_evaluator( def maxframe_printer_evaluator(
input, input,
num_results=None, num_results=None,
name=None, name=None, ):
):
""" """
This Evaluator is used to print the top k frames of each input layers. This Evaluator is used to print the top k frames of each input layers.
The input layers should contain sequences info or sequences type. The input layers should contain sequences info or sequences type.
...@@ -549,10 +541,12 @@ def maxframe_printer_evaluator( ...@@ -549,10 +541,12 @@ def maxframe_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
evaluator_base(name=name, evaluator_base(
type="max_frame_printer", name=name,
input=input, type="max_frame_printer",
num_results=num_results) input=input,
num_results=num_results)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
...@@ -562,8 +556,7 @@ def seqtext_printer_evaluator( ...@@ -562,8 +556,7 @@ def seqtext_printer_evaluator(
id_input=None, id_input=None,
dict_file=None, dict_file=None,
delimited=None, delimited=None,
name=None, name=None, ):
):
""" """
Sequence text printer will print text according to index matrix and a Sequence text printer will print text according to index matrix and a
dictionary. There can be multiple input to this layer: dictionary. There can be multiple input to this layer:
...@@ -636,12 +629,14 @@ def seqtext_printer_evaluator( ...@@ -636,12 +629,14 @@ def seqtext_printer_evaluator(
inputs = [id_input, input] inputs = [id_input, input]
input.parents.append(id_input) input.parents.append(id_input)
evaluator_base(name=name, evaluator_base(
type="seq_text_printer", name=name,
input=inputs, type="seq_text_printer",
dict_file=dict_file, input=inputs,
result_file=result_file, dict_file=dict_file,
delimited=delimited) result_file=result_file,
delimited=delimited)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
...@@ -649,8 +644,7 @@ def classification_error_printer_evaluator( ...@@ -649,8 +644,7 @@ def classification_error_printer_evaluator(
input, input,
label, label,
threshold=0.5, threshold=0.5,
name=None, name=None, ):
):
""" """
This Evaluator is used to print the classification error of each sample. This Evaluator is used to print the classification error of each sample.
...@@ -667,8 +661,9 @@ def classification_error_printer_evaluator( ...@@ -667,8 +661,9 @@ def classification_error_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
evaluator_base(name=name, evaluator_base(
type="classification_error_printer", name=name,
input=input, type="classification_error_printer",
label=label, input=input,
classification_threshold=threshold) label=label,
classification_threshold=threshold)
...@@ -29,36 +29,83 @@ except ImportError: ...@@ -29,36 +29,83 @@ except ImportError:
import pickle import pickle
import copy import copy
__all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", __all__ = [
"identity_projection", "dotmul_projection", "dotmul_operator", "full_matrix_projection",
"repeat_layer", "AggregateLevel",
"table_projection", "mixed_layer", "data_layer", "ExpandLevel",
"embedding_layer", "fc_layer", "grumemory", "identity_projection",
"pooling_layer", "lstmemory", "last_seq", "first_seq", "dotmul_projection",
"cos_sim", "hsigmoid", "conv_projection", "dotmul_operator",
"regression_cost", 'classification_cost', "LayerOutput", "repeat_layer",
'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', "table_projection",
'img_cmrnorm_layer', 'addto_layer', "mixed_layer",
'concat_layer', 'lstm_step_layer', 'recurrent_group', "data_layer",
'memory', 'StaticInput', 'expand_layer', 'scaling_layer', "embedding_layer",
'power_layer', 'interpolation_layer', 'bilinear_interp_layer', "fc_layer",
'trans_layer', 'sum_to_one_norm_layer', "grumemory",
'get_output_layer', 'LayerType', 'context_projection', "pooling_layer",
'beam_search', 'maxid_layer', 'GeneratedInput', 'SubsequenceInput', "lstmemory",
'gru_step_layer', 'recurrent_layer', "last_seq",
'BaseGeneratedInput', 'conv_operator', 'conv_shift_layer', "first_seq",
'tensor_layer', 'selective_fc_layer', 'sampling_id_layer', "cos_sim",
'slope_intercept_layer', 'trans_full_matrix_projection', "hsigmoid",
'linear_comb_layer', "conv_projection",
'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer', "regression_cost",
'nce_layer', 'classification_cost',
'cross_entropy_with_selfnorm', 'cross_entropy', "LayerOutput",
'multi_binary_label_cross_entropy', 'sum_cost', 'img_conv_layer',
'rank_cost', 'lambda_cost', 'huber_cost', 'img_pool_layer',
'block_expand_layer', 'batch_norm_layer',
'maxout_layer', 'out_prod_layer', 'print_layer', 'img_cmrnorm_layer',
'spp_layer', 'addto_layer',
] 'concat_layer',
'lstm_step_layer',
'recurrent_group',
'memory',
'StaticInput',
'expand_layer',
'scaling_layer',
'power_layer',
'interpolation_layer',
'bilinear_interp_layer',
'trans_layer',
'sum_to_one_norm_layer',
'get_output_layer',
'LayerType',
'context_projection',
'beam_search',
'maxid_layer',
'GeneratedInput',
'SubsequenceInput',
'gru_step_layer',
'recurrent_layer',
'BaseGeneratedInput',
'conv_operator',
'conv_shift_layer',
'tensor_layer',
'selective_fc_layer',
'sampling_id_layer',
'slope_intercept_layer',
'trans_full_matrix_projection',
'linear_comb_layer',
'convex_comb_layer',
'ctc_layer',
'crf_layer',
'crf_decoding_layer',
'nce_layer',
'cross_entropy_with_selfnorm',
'cross_entropy',
'multi_binary_label_cross_entropy',
'sum_cost',
'rank_cost',
'lambda_cost',
'huber_cost',
'block_expand_layer',
'maxout_layer',
'out_prod_layer',
'print_layer',
'spp_layer',
]
class LayerType(object): class LayerType(object):
...@@ -181,8 +228,15 @@ class LayerOutput(object): ...@@ -181,8 +228,15 @@ class LayerOutput(object):
:type parents: list|tuple|collections.Sequence :type parents: list|tuple|collections.Sequence
""" """
def __init__(self, name, layer_type, parents=None, activation=None, def __init__(self,
num_filters=None, img_norm_type=None, size=None, outputs=None, name,
layer_type,
parents=None,
activation=None,
num_filters=None,
img_norm_type=None,
size=None,
outputs=None,
reverse=None): reverse=None):
assert isinstance(name, basestring) assert isinstance(name, basestring)
assert isinstance(layer_type, basestring) assert isinstance(layer_type, basestring)
...@@ -223,6 +277,7 @@ DEVICE = 'device' ...@@ -223,6 +277,7 @@ DEVICE = 'device'
def layer_support(*attrs): def layer_support(*attrs):
attrs_list = list(attrs) attrs_list = list(attrs)
attrs_list.append(DEVICE) attrs_list.append(DEVICE)
def decorator(method): def decorator(method):
@functools.wraps(method) @functools.wraps(method)
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
...@@ -282,9 +337,8 @@ def full_matrix_projection(input, size=0, param_attr=None): ...@@ -282,9 +337,8 @@ def full_matrix_projection(input, size=0, param_attr=None):
:return: A FullMatrixProjection Object. :return: A FullMatrixProjection Object.
:rtype: FullMatrixProjection :rtype: FullMatrixProjection
""" """
proj = FullMatrixProjection(input_layer_name=input.name, proj = FullMatrixProjection(
size=size, input_layer_name=input.name, size=size, **param_attr.attr)
**param_attr.attr)
proj.origin = input proj.origin = input
return proj return proj
...@@ -319,9 +373,8 @@ def trans_full_matrix_projection(input, size=0, param_attr=None): ...@@ -319,9 +373,8 @@ def trans_full_matrix_projection(input, size=0, param_attr=None):
:return: A TransposedFullMatrixProjection Object. :return: A TransposedFullMatrixProjection Object.
:rtype: TransposedFullMatrixProjection :rtype: TransposedFullMatrixProjection
""" """
proj = TransposedFullMatrixProjection(input_layer_name=input.name, proj = TransposedFullMatrixProjection(
size=size, input_layer_name=input.name, size=size, **param_attr.attr)
**param_attr.attr)
proj.origin = input proj.origin = input
return proj return proj
...@@ -365,9 +418,8 @@ def table_projection(input, size=0, param_attr=None): ...@@ -365,9 +418,8 @@ def table_projection(input, size=0, param_attr=None):
:return: A TableProjection Object. :return: A TableProjection Object.
:rtype: TableProjection :rtype: TableProjection
""" """
proj = TableProjection(input_layer_name=input.name, proj = TableProjection(
size=size, input_layer_name=input.name, size=size, **param_attr.attr)
**param_attr.attr)
proj.origin = input proj.origin = input
return proj return proj
...@@ -413,8 +465,8 @@ def identity_projection(input, offset=None): ...@@ -413,8 +465,8 @@ def identity_projection(input, offset=None):
proj = IdentityProjection(input_layer_name=input.name) proj = IdentityProjection(input_layer_name=input.name)
proj.origin = input proj.origin = input
else: else:
proj = IdentityOffsetProjection(input_layer_name=input.name, proj = IdentityOffsetProjection(
offset=offset) input_layer_name=input.name, offset=offset)
proj.origin = input proj.origin = input
return proj return proj
...@@ -443,9 +495,8 @@ def dotmul_projection(input, param_attr=None): ...@@ -443,9 +495,8 @@ def dotmul_projection(input, param_attr=None):
:return: A DotMulProjection Object. :return: A DotMulProjection Object.
:rtype: DotMulProjection :rtype: DotMulProjection
""" """
proj = DotMulProjection(input_layer_name=input.name, proj = DotMulProjection(
size=input.size, input_layer_name=input.name, size=input.size, **param_attr.attr)
**param_attr.attr)
proj.origin = input proj.origin = input
return proj return proj
...@@ -478,21 +529,22 @@ def dotmul_operator(a=None, b=None, scale=1, **kwargs): ...@@ -478,21 +529,22 @@ def dotmul_operator(a=None, b=None, scale=1, **kwargs):
if 'x' in kwargs or 'y' in kwargs: if 'x' in kwargs or 'y' in kwargs:
logger.warning('x and y arguments for dotmul_operator is deprecated. ' logger.warning('x and y arguments for dotmul_operator is deprecated. '
'Please use a and b as parameter.') 'Please use a and b as parameter.')
a = kwargs.get('x', a) # For Backward capacity. a = kwargs.get('x', a) # For Backward capacity.
b = kwargs.get('y', b) b = kwargs.get('y', b)
assert isinstance(a, LayerOutput) assert isinstance(a, LayerOutput)
assert isinstance(b, LayerOutput) assert isinstance(b, LayerOutput)
if a.size is not None and b.size is not None: if a.size is not None and b.size is not None:
assert a.size == b.size assert a.size == b.size
op = DotMulOperator(input_layer_names=[a.name, b.name], op = DotMulOperator(input_layer_names=[a.name, b.name], scale=scale)
scale=scale)
op.origin = [a, b] op.origin = [a, b]
return op return op
@wrap_bias_attr_default(['padding_attr']) @wrap_bias_attr_default(['padding_attr'])
def context_projection(input, context_len, context_start=None, def context_projection(input,
context_len,
context_start=None,
padding_attr=False): padding_attr=False):
""" """
Context Projection. Context Projection.
...@@ -529,11 +581,12 @@ def context_projection(input, context_len, context_start=None, ...@@ -529,11 +581,12 @@ def context_projection(input, context_len, context_start=None,
if trainable: if trainable:
extra_dict = padding_attr.attr extra_dict = padding_attr.attr
proj = ContextProjection(input_layer_name=input.name, proj = ContextProjection(
context_length=context_len, input_layer_name=input.name,
context_start=context_start, context_length=context_len,
trainable_padding=trainable, context_start=context_start,
**extra_dict) trainable_padding=trainable,
**extra_dict)
proj.origin = input proj.origin = input
return proj return proj
...@@ -547,8 +600,7 @@ class MixedLayerType(LayerOutput): ...@@ -547,8 +600,7 @@ class MixedLayerType(LayerOutput):
def __init__(self): def __init__(self):
Exception.__init__(self) Exception.__init__(self)
def __init__(self, name, size, act, bias_attr, layer_attr, def __init__(self, name, size, act, bias_attr, layer_attr, parents=None):
parents=None):
""" """
Ctor. Ctor.
:param name: layer name. :param name: layer name.
...@@ -565,8 +617,13 @@ class MixedLayerType(LayerOutput): ...@@ -565,8 +617,13 @@ class MixedLayerType(LayerOutput):
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute or None :type layer_attr: ExtraLayerAttribute or None
""" """
LayerOutput.__init__(self, name, LayerType.MIXED_LAYER, parents, LayerOutput.__init__(
size=size, activation=act) self,
name,
LayerType.MIXED_LAYER,
parents,
size=size,
activation=act)
self.bias_attr = bias_attr self.bias_attr = bias_attr
self.layer_attr = layer_attr self.layer_attr = layer_attr
self.inputs = [] self.inputs = []
...@@ -604,8 +661,7 @@ class MixedLayerType(LayerOutput): ...@@ -604,8 +661,7 @@ class MixedLayerType(LayerOutput):
active_type=self.activation.name, active_type=self.activation.name,
bias=ParamAttr.to_bias(self.bias_attr), bias=ParamAttr.to_bias(self.bias_attr),
inputs=self.inputs, inputs=self.inputs,
**ExtraLayerAttribute.to_kwargs(self.layer_attr) **ExtraLayerAttribute.to_kwargs(self.layer_attr))
)
# update the size which might be computed inside MixedLayer # update the size which might be computed inside MixedLayer
# according to the operator's output size # according to the operator's output size
self.size = ml.config.size self.size = ml.config.size
...@@ -615,7 +671,11 @@ class MixedLayerType(LayerOutput): ...@@ -615,7 +671,11 @@ class MixedLayerType(LayerOutput):
@wrap_act_default(act=LinearActivation()) @wrap_act_default(act=LinearActivation())
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
@layer_support(ERROR_CLIPPING, DROPOUT) @layer_support(ERROR_CLIPPING, DROPOUT)
def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False, def mixed_layer(size=0,
input=None,
name=None,
act=None,
bias_attr=False,
layer_attr=None): layer_attr=None):
""" """
Mixed Layer. A mixed layer will add all inputs together, then activate. Mixed Layer. A mixed layer will add all inputs together, then activate.
...@@ -660,8 +720,12 @@ def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False, ...@@ -660,8 +720,12 @@ def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False,
if input is None: if input is None:
return MixedLayerType(name, size, act, bias_attr, layer_attr) return MixedLayerType(name, size, act, bias_attr, layer_attr)
else: else:
with mixed_layer(name=name, size=size, act=act, bias_attr=bias_attr, with mixed_layer(
layer_attr=layer_attr) as m: name=name,
size=size,
act=act,
bias_attr=bias_attr,
layer_attr=layer_attr) as m:
if isinstance(input, collections.Sequence): if isinstance(input, collections.Sequence):
for each in input: for each in input:
m += each m += each
...@@ -691,8 +755,11 @@ def data_layer(name, size, layer_attr=None): ...@@ -691,8 +755,11 @@ def data_layer(name, size, layer_attr=None):
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
Layer(type=LayerType.DATA, name=name, size=size, Layer(
**ExtraLayerAttribute.to_kwargs(layer_attr)) type=LayerType.DATA,
name=name,
size=size,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.DATA, size=size) return LayerOutput(name, LayerType.DATA, size=size)
...@@ -718,9 +785,12 @@ def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None): ...@@ -718,9 +785,12 @@ def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None):
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
with mixed_layer(name=name, size=size, act=LinearActivation(), with mixed_layer(
bias_attr=False, name=name,
layer_attr=layer_attr) as mix: size=size,
act=LinearActivation(),
bias_attr=False,
layer_attr=layer_attr) as mix:
mix += table_projection(input=input, size=size, param_attr=param_attr) mix += table_projection(input=input, size=size, param_attr=param_attr)
return mix return mix
...@@ -730,8 +800,13 @@ def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None): ...@@ -730,8 +800,13 @@ def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None):
@wrap_bias_attr_default() @wrap_bias_attr_default()
@wrap_act_default() @wrap_act_default()
@layer_support(ERROR_CLIPPING, DROPOUT) @layer_support(ERROR_CLIPPING, DROPOUT)
def fc_layer(input, size, act=None, name=None, def fc_layer(input,
param_attr=None, bias_attr=None, layer_attr=None): size,
act=None,
name=None,
param_attr=None,
bias_attr=None,
layer_attr=None):
""" """
Helper for declare fully connected layer. Helper for declare fully connected layer.
...@@ -783,17 +858,17 @@ def fc_layer(input, size, act=None, name=None, ...@@ -783,17 +858,17 @@ def fc_layer(input, size, act=None, name=None,
assert isinstance(input, collections.Sequence) assert isinstance(input, collections.Sequence)
Layer( Layer(
inputs=[Input(ipt.name, **attr.attr) for ipt, attr in zip( inputs=[
input, param_attr)], Input(ipt.name, **attr.attr) for ipt, attr in zip(input, param_attr)
],
name=name, name=name,
type=LayerType.FC_LAYER, type=LayerType.FC_LAYER,
size=size, size=size,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
active_type=act.name, active_type=act.name,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.FC_LAYER, input, activation=act, name, LayerType.FC_LAYER, input, activation=act, size=size)
size=size)
@wrap_name_default("print") @wrap_name_default("print")
...@@ -816,8 +891,7 @@ def print_layer(input, name=None): ...@@ -816,8 +891,7 @@ def print_layer(input, name=None):
Layer( Layer(
name=name, name=name,
type=LayerType.PRINT_LAYER, type=LayerType.PRINT_LAYER,
inputs=[l.name for l in input], inputs=[l.name for l in input], )
)
# this layer don't return anything, can not be input of other layer. # this layer don't return anything, can not be input of other layer.
...@@ -825,7 +899,10 @@ def print_layer(input, name=None): ...@@ -825,7 +899,10 @@ def print_layer(input, name=None):
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
@wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling()) @wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling())
@layer_support() @layer_support()
def pooling_layer(input, pooling_type=None, name=None, bias_attr=None, def pooling_layer(input,
pooling_type=None,
name=None,
bias_attr=None,
agg_level=AggregateLevel.EACH_TIMESTEP, agg_level=AggregateLevel.EACH_TIMESTEP,
layer_attr=None): layer_attr=None):
""" """
...@@ -872,24 +949,27 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None, ...@@ -872,24 +949,27 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None,
inputs=[Input(input.name)], inputs=[Input(input.name)],
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
trans_type=agg_level, trans_type=agg_level,
**extra_dict **extra_dict)
)
return LayerOutput(name, pooling_type.name, parents=[input],
size=input.size)
return LayerOutput(
name, pooling_type.name, parents=[input], size=input.size)
@wrap_bias_attr_default() @wrap_bias_attr_default()
@wrap_param_attr_default() @wrap_param_attr_default()
@wrap_act_default(param_names=['gate_act'], @wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
act=SigmoidActivation())
@wrap_act_default(param_names=["act", 'state_act'], act=TanhActivation()) @wrap_act_default(param_names=["act", 'state_act'], act=TanhActivation())
@wrap_name_default("lstmemory") @wrap_name_default("lstmemory")
@layer_support(DROPOUT) @layer_support(DROPOUT)
def lstmemory(input, name=None, reverse=False, act=None, def lstmemory(input,
gate_act=None, size=None, name=None,
state_act=None, bias_attr=None, param_attr=None, reverse=False,
act=None,
gate_act=None,
size=None,
state_act=None,
bias_attr=None,
param_attr=None,
layer_attr=None): layer_attr=None):
""" """
Long Short-term Memory Cell. Long Short-term Memory Cell.
...@@ -964,30 +1044,38 @@ def lstmemory(input, name=None, reverse=False, act=None, ...@@ -964,30 +1044,38 @@ def lstmemory(input, name=None, reverse=False, act=None,
"layer. The lstm size should be equal with input layer size/4. The" "layer. The lstm size should be equal with input layer size/4. The"
" size which is set explicitly will be ignored." % name) " size which is set explicitly will be ignored." % name)
Layer(name=name, Layer(
type=LayerType.LSTMEMORY, name=name,
active_type=act.name, type=LayerType.LSTMEMORY,
active_state_type=state_act.name, active_type=act.name,
active_gate_type=gate_act.name, active_state_type=state_act.name,
reversed=reverse, active_gate_type=gate_act.name,
bias=ParamAttr.to_bias(bias_attr), reversed=reverse,
inputs=[Input(input.name, **param_attr.attr)], bias=ParamAttr.to_bias(bias_attr),
**ExtraLayerAttribute.to_kwargs(layer_attr)) inputs=[Input(input.name, **param_attr.attr)],
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.LSTMEMORY, [input], size=input.size / 4, return LayerOutput(
reverse=reverse) name,
LayerType.LSTMEMORY, [input],
size=input.size / 4,
reverse=reverse)
@wrap_bias_attr_default() @wrap_bias_attr_default()
@wrap_param_attr_default() @wrap_param_attr_default()
@wrap_act_default(param_names=['gate_act'], @wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
act=SigmoidActivation())
@wrap_act_default(param_names=["act"], act=TanhActivation()) @wrap_act_default(param_names=["act"], act=TanhActivation())
@wrap_name_default("gru") @wrap_name_default("gru")
@layer_support(DROPOUT) @layer_support(DROPOUT)
def grumemory(input, name=None, reverse=False, act=None, def grumemory(input,
gate_act=None, size=None, name=None,
bias_attr=None, param_attr=None, reverse=False,
act=None,
gate_act=None,
size=None,
bias_attr=None,
param_attr=None,
layer_attr=None): layer_attr=None):
""" """
Gate Recurrent Unit Layer. Gate Recurrent Unit Layer.
...@@ -1078,23 +1166,28 @@ def grumemory(input, name=None, reverse=False, act=None, ...@@ -1078,23 +1166,28 @@ def grumemory(input, name=None, reverse=False, act=None,
" and should be input size / 3. Set size explicitly will be " " and should be input size / 3. Set size explicitly will be "
"ignored.") "ignored.")
Layer(name=name, Layer(
type=LayerType.GRUMEMORY, name=name,
active_type=act.name, type=LayerType.GRUMEMORY,
active_gate_type=gate_act.name, active_type=act.name,
reversed=reverse, active_gate_type=gate_act.name,
bias=ParamAttr.to_bias(bias_attr), reversed=reverse,
inputs=[Input(input.name, **param_attr.attr)], bias=ParamAttr.to_bias(bias_attr),
**ExtraLayerAttribute.to_kwargs(layer_attr) inputs=[Input(input.name, **param_attr.attr)],
) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.GRUMEMORY, [input], size=input.size / 3, return LayerOutput(
reverse=reverse) name,
LayerType.GRUMEMORY, [input],
size=input.size / 3,
reverse=reverse)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def last_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP, def last_seq(input,
name=None,
agg_level=AggregateLevel.EACH_TIMESTEP,
layer_attr=None): layer_attr=None):
""" """
Get Last Timestamp Activation of a sequence. Get Last Timestamp Activation of a sequence.
...@@ -1120,15 +1213,19 @@ def last_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP, ...@@ -1120,15 +1213,19 @@ def last_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP,
type=LayerType.SEQUENCE_LAST_INSTANCE, type=LayerType.SEQUENCE_LAST_INSTANCE,
inputs=[input.name], inputs=[input.name],
trans_type=agg_level, trans_type=agg_level,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.SEQUENCE_LAST_INSTANCE, parents=[input], name,
size=input.size) LayerType.SEQUENCE_LAST_INSTANCE,
parents=[input],
size=input.size)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def first_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP, def first_seq(input,
name=None,
agg_level=AggregateLevel.EACH_TIMESTEP,
layer_attr=None): layer_attr=None):
""" """
Get First Timestamp Activation of a sequence. Get First Timestamp Activation of a sequence.
...@@ -1155,10 +1252,12 @@ def first_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP, ...@@ -1155,10 +1252,12 @@ def first_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP,
type=LayerType.SEQUENCE_FIRST_INSTANCE, type=LayerType.SEQUENCE_FIRST_INSTANCE,
inputs=[input.name], inputs=[input.name],
trans_type=agg_level, trans_type=agg_level,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.SEQUENCE_FIRST_INSTANCE, name,
parents=[input], size=input.size) LayerType.SEQUENCE_FIRST_INSTANCE,
parents=[input],
size=input.size)
class ExpandLevel(object): class ExpandLevel(object):
...@@ -1168,7 +1267,8 @@ class ExpandLevel(object): ...@@ -1168,7 +1267,8 @@ class ExpandLevel(object):
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def expand_layer(input, expand_as, def expand_layer(input,
expand_as,
name=None, name=None,
bias_attr=False, bias_attr=False,
expand_level=ExpandLevel.FROM_TIMESTEP, expand_level=ExpandLevel.FROM_TIMESTEP,
...@@ -1208,19 +1308,17 @@ def expand_layer(input, expand_as, ...@@ -1208,19 +1308,17 @@ def expand_layer(input, expand_as,
bias=ParamAttr.to_bias(bias_attr=bias_attr), bias=ParamAttr.to_bias(bias_attr=bias_attr),
type=LayerType.EXPAND_LAYER, type=LayerType.EXPAND_LAYER,
trans_type=expand_level, trans_type=expand_level,
**ExtraAttr.to_kwargs(layer_attr) **ExtraAttr.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name=name, name=name,
size=input.size, size=input.size,
layer_type=LayerType.EXPAND_LAYER, layer_type=LayerType.EXPAND_LAYER,
parents=[input, expand_as]) parents=[input, expand_as])
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def repeat_layer(input, num_repeats, def repeat_layer(input, num_repeats, name=None, layer_attr=None):
name=None,
layer_attr=None):
""" """
A layer for repeating the input for num_repeats times. This is equivalent A layer for repeating the input for num_repeats times. This is equivalent
to apply concat_layer() with num_repeats same input. to apply concat_layer() with num_repeats same input.
...@@ -1251,12 +1349,13 @@ def repeat_layer(input, num_repeats, ...@@ -1251,12 +1349,13 @@ def repeat_layer(input, num_repeats,
name=name, name=name,
num_filters=num_repeats, num_filters=num_repeats,
type=LayerType.FEATURE_MAP_EXPAND_LAYER, type=LayerType.FEATURE_MAP_EXPAND_LAYER,
**ExtraAttr.to_kwargs(layer_attr) **ExtraAttr.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name=name, name=name,
size=l.config.size, size=l.config.size,
layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER, layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER,
parents=[input]) parents=[input])
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
...@@ -1302,11 +1401,12 @@ def interpolation_layer(input, weight, name=None, layer_attr=None): ...@@ -1302,11 +1401,12 @@ def interpolation_layer(input, weight, name=None, layer_attr=None):
name=name, name=name,
type=LayerType.INTERPOLATION_LAYER, type=LayerType.INTERPOLATION_LAYER,
inputs=[weight.name, input[0].name, input[1].name], inputs=[weight.name, input[0].name, input[1].name],
**ExtraAttr.to_kwargs(layer_attr) **ExtraAttr.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.INTERPOLATION_LAYER, name,
parents=[weight, input[0], input[1]], LayerType.INTERPOLATION_LAYER,
size=input[0].size) parents=[weight, input[0], input[1]],
size=input[0].size)
@wrap_name_default() @wrap_name_default()
...@@ -1345,15 +1445,23 @@ def bilinear_interp_layer(input, ...@@ -1345,15 +1445,23 @@ def bilinear_interp_layer(input,
assert out_size_x > 0 and out_size_y > 0 assert out_size_x > 0 and out_size_y > 0
assert input.num_filters is not None assert input.num_filters is not None
num_channels = input.num_filters num_channels = input.num_filters
l = Layer(name=name, l = Layer(
inputs=Input(input.name, name=name,
bilinear_interp=BilinearInterp(out_size_x=out_size_x, inputs=Input(
out_size_y=out_size_y, input.name,
num_channels=num_channels)), bilinear_interp=BilinearInterp(
type=LayerType.BILINEAR_INTERP_LAYER, out_size_x=out_size_x,
**ExtraLayerAttribute.to_kwargs(layer_attr)) out_size_y=out_size_y,
return LayerOutput(name, LayerType.BILINEAR_INTERP_LAYER, parents=[input], num_channels=num_channels)),
num_filters=num_channels, size=l.config.size) type=LayerType.BILINEAR_INTERP_LAYER,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name,
LayerType.BILINEAR_INTERP_LAYER,
parents=[input],
num_filters=num_channels,
size=l.config.size)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
...@@ -1392,10 +1500,9 @@ def power_layer(input, weight, name=None, layer_attr=None): ...@@ -1392,10 +1500,9 @@ def power_layer(input, weight, name=None, layer_attr=None):
name=name, name=name,
type=LayerType.POWER_LAYER, type=LayerType.POWER_LAYER,
inputs=[weight.name, input.name], inputs=[weight.name, input.name],
**ExtraAttr.to_kwargs(layer_attr) **ExtraAttr.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.POWER_LAYER, name, LayerType.POWER_LAYER, parents=[input, weight], size=input.size)
parents=[input, weight], size=input.size)
@wrap_name_default() @wrap_name_default()
...@@ -1437,10 +1544,9 @@ def scaling_layer(input, weight, name=None, layer_attr=None): ...@@ -1437,10 +1544,9 @@ def scaling_layer(input, weight, name=None, layer_attr=None):
name=name, name=name,
type=LayerType.SCALING_LAYER, type=LayerType.SCALING_LAYER,
inputs=[weight.name, input.name], inputs=[weight.name, input.name],
**ExtraAttr.to_kwargs(layer_attr) **ExtraAttr.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.SCALING_LAYER, parents=[weight, input], name, LayerType.SCALING_LAYER, parents=[weight, input], size=input.size)
size=input.size)
@wrap_name_default() @wrap_name_default()
...@@ -1473,10 +1579,9 @@ def trans_layer(input, name=None, layer_attr=None): ...@@ -1473,10 +1579,9 @@ def trans_layer(input, name=None, layer_attr=None):
name=name, name=name,
type=LayerType.TRANS_LAYER, type=LayerType.TRANS_LAYER,
inputs=[input.name], inputs=[input.name],
**ExtraAttr.to_kwargs(layer_attr) **ExtraAttr.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.TRANS_LAYER, parents=[input], name, LayerType.TRANS_LAYER, parents=[input], size=input.size)
size=input.size)
@wrap_name_default() @wrap_name_default()
...@@ -1518,8 +1623,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): ...@@ -1518,8 +1623,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
type=LayerType.COSINE_SIM, type=LayerType.COSINE_SIM,
cos_scale=scale, cos_scale=scale,
inputs=[a.name, b.name], inputs=[a.name, b.name],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
)
else: else:
if a.size is not None and b.size is not None: if a.size is not None and b.size is not None:
assert size == b.size / a.size assert size == b.size / a.size
...@@ -1529,8 +1633,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): ...@@ -1529,8 +1633,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
size=size, size=size,
cos_scale=scale, cos_scale=scale,
inputs=[a.name, b.name], inputs=[a.name, b.name],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
)
return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b], size=size) return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b], size=size)
...@@ -1538,8 +1641,13 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): ...@@ -1538,8 +1641,13 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
@wrap_bias_attr_default(has_bias=True) @wrap_bias_attr_default(has_bias=True)
@wrap_param_attr_default() @wrap_param_attr_default()
@layer_support() @layer_support()
def hsigmoid(input, label, num_classes, name=None, bias_attr=None, def hsigmoid(input,
param_attr=None, layer_attr=None): label,
num_classes,
name=None,
bias_attr=None,
param_attr=None,
layer_attr=None):
""" """
Organize the classes into a binary tree. At each node, a sigmoid function Organize the classes into a binary tree. At each node, a sigmoid function
is used to calculate the probability of belonging to the right branch. is used to calculate the probability of belonging to the right branch.
...@@ -1600,10 +1708,9 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None, ...@@ -1600,10 +1708,9 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
num_classes=num_classes, num_classes=num_classes,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
inputs=ipts_for_layer, inputs=ipts_for_layer,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.HSIGMOID, parents=parents, name, LayerType.HSIGMOID, parents=parents, size=l.config.size)
size=l.config.size)
@wrap_name_default("conv") @wrap_name_default("conv")
...@@ -1611,11 +1718,22 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None, ...@@ -1611,11 +1718,22 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
@wrap_bias_attr_default() @wrap_bias_attr_default()
@wrap_act_default(act=ReluActivation()) @wrap_act_default(act=ReluActivation())
@layer_support(DROPOUT) @layer_support(DROPOUT)
def img_conv_layer(input, filter_size, num_filters, def img_conv_layer(input,
name=None, num_channels=None, filter_size,
act=None, groups=1, stride=1, padding=0, bias_attr=None, num_filters,
param_attr=None, shared_biases=True, layer_attr=None, name=None,
filter_size_y=None, stride_y=None, padding_y=None, num_channels=None,
act=None,
groups=1,
stride=1,
padding=0,
bias_attr=None,
param_attr=None,
shared_biases=True,
layer_attr=None,
filter_size_y=None,
stride_y=None,
padding_y=None,
trans=False): trans=False):
""" """
Convolution layer for image. Paddle only support square input currently and Convolution layer for image. Paddle only support square input currently and
...@@ -1713,40 +1831,56 @@ def img_conv_layer(input, filter_size, num_filters, ...@@ -1713,40 +1831,56 @@ def img_conv_layer(input, filter_size, num_filters,
if param_attr.attr.get('initial_smart'): if param_attr.attr.get('initial_smart'):
# special initial for conv layers. # special initial for conv layers.
init_w = (2.0 / (filter_size ** 2 * num_channels)) ** 0.5 init_w = (2.0 / (filter_size**2 * num_channels))**0.5
param_attr.attr["initial_mean"] = 0.0 param_attr.attr["initial_mean"] = 0.0
param_attr.attr["initial_std"] = init_w param_attr.attr["initial_std"] = init_w
param_attr.attr["initial_strategy"] = 0 param_attr.attr["initial_strategy"] = 0
param_attr.attr["initial_smart"] = False param_attr.attr["initial_smart"] = False
lt = LayerType.CONVTRANS_LAYER if trans else LayerType.CONV_LAYER lt = LayerType.CONVTRANS_LAYER if trans else LayerType.CONV_LAYER
l = Layer( l = Layer(
name=name, name=name,
inputs=Input(input.name, conv=Conv( inputs=Input(
filter_size=filter_size, padding=padding, stride=stride, input.name,
channels=num_channels, groups=groups, conv=Conv(
filter_size_y=filter_size_y, padding_y=padding_y, filter_size=filter_size,
stride_y=stride_y), padding=padding,
**param_attr.attr), stride=stride,
channels=num_channels,
groups=groups,
filter_size_y=filter_size_y,
padding_y=padding_y,
stride_y=stride_y),
**param_attr.attr),
active_type=act.name, active_type=act.name,
num_filters=num_filters, num_filters=num_filters,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
shared_biases=shared_biases, shared_biases=shared_biases,
type=lt, type=lt,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, lt, parents=[input], name,
activation=act, num_filters=num_filters, lt,
size=l.config.size) parents=[input],
activation=act,
num_filters=num_filters,
size=l.config.size)
@wrap_name_default("pool") @wrap_name_default("pool")
@layer_support() @layer_support()
def img_pool_layer(input, pool_size, name=None, def img_pool_layer(input,
num_channels=None, pool_type=None, pool_size,
stride=1, padding=0, layer_attr=None, name=None,
pool_size_y=None, stride_y=None, padding_y=None, num_channels=None,
pool_type=None,
stride=1,
padding=0,
layer_attr=None,
pool_size_y=None,
stride_y=None,
padding_y=None,
img_width=None): img_width=None):
""" """
Image pooling Layer. Image pooling Layer.
...@@ -1804,29 +1938,39 @@ def img_pool_layer(input, pool_size, name=None, ...@@ -1804,29 +1938,39 @@ def img_pool_layer(input, pool_size, name=None,
l = Layer( l = Layer(
name=name, name=name,
type=LayerType.POOL_LAYER, type=LayerType.POOL_LAYER,
inputs=[Input(input.name, inputs=[
pool=Pool( Input(
pool_type=type_name, input.name,
channels=num_channels, pool=Pool(
size_x=pool_size, pool_type=type_name,
start=None, channels=num_channels,
stride=stride, size_x=pool_size,
padding=padding, start=None,
size_y=pool_size_y, stride=stride,
stride_y=stride_y, padding=padding,
padding_y=padding_y, size_y=pool_size_y,
img_width=img_width stride_y=stride_y,
))], padding_y=padding_y,
**ExtraLayerAttribute.to_kwargs(layer_attr) img_width=img_width))
) ],
return LayerOutput(name, LayerType.POOL_LAYER, parents=[input], **ExtraLayerAttribute.to_kwargs(layer_attr))
num_filters=num_channels, size=l.config.size) return LayerOutput(
name,
LayerType.POOL_LAYER,
parents=[input],
num_filters=num_channels,
size=l.config.size)
@wrap_name_default("spp") @wrap_name_default("spp")
@layer_support() @layer_support()
def spp_layer(input, name=None, num_channels=None, pool_type=None, def spp_layer(input,
pyramid_height=None, img_width=None, layer_attr=None): name=None,
num_channels=None,
pool_type=None,
pyramid_height=None,
img_width=None,
layer_attr=None):
""" """
Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition.
The details please refer to The details please refer to
...@@ -1866,42 +2010,58 @@ def spp_layer(input, name=None, num_channels=None, pool_type=None, ...@@ -1866,42 +2010,58 @@ def spp_layer(input, name=None, num_channels=None, pool_type=None,
l = Layer( l = Layer(
name=name, name=name,
type=LayerType.SPP_LAYER, type=LayerType.SPP_LAYER,
inputs=Input(input.name, inputs=Input(
spp=SpatialPyramidPool(pool_type=type_name, input.name,
channels=num_channels, spp=SpatialPyramidPool(
pyramid_height=pyramid_height, pool_type=type_name,
img_width=img_width) channels=num_channels,
), pyramid_height=pyramid_height,
**ExtraLayerAttribute.to_kwargs(layer_attr) img_width=img_width)),
) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, layer_type=LayerType.SPP_LAYER, parents=[input], return LayerOutput(
num_filters=num_channels, size=l.config.size) name,
layer_type=LayerType.SPP_LAYER,
parents=[input],
def __img_norm_layer__(name, input, size, norm_type, scale, power, num_filters=num_channels,
num_channels, blocked, layer_attr): size=l.config.size)
def __img_norm_layer__(name, input, size, norm_type, scale, power, num_channels,
blocked, layer_attr):
if num_channels is None: if num_channels is None:
assert input.num_filters is not None assert input.num_filters is not None
num_channels = input.num_filters num_channels = input.num_filters
l = Layer( l = Layer(
name=name, type=LayerType.NORM_LAYER, inputs=Input( name=name,
input.name, norm=Norm(norm_type=norm_type, type=LayerType.NORM_LAYER,
channels=num_channels, size=size, inputs=Input(
scale=scale, input.name,
pow=power, blocked=blocked) norm=Norm(
), norm_type=norm_type,
**ExtraLayerAttribute.to_kwargs(layer_attr) channels=num_channels,
) size=size,
return LayerOutput(name, layer_type=LayerType.NORM_LAYER, parents=[input], scale=scale,
num_filters=num_channels, img_norm_type=norm_type, pow=power,
size=l.config.size) blocked=blocked)),
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name,
layer_type=LayerType.NORM_LAYER,
parents=[input],
num_filters=num_channels,
img_norm_type=norm_type,
size=l.config.size)
@wrap_name_default("crmnorm") @wrap_name_default("crmnorm")
@layer_support() @layer_support()
def img_cmrnorm_layer(input, size, scale=0.0128, power=0.75, def img_cmrnorm_layer(input,
name=None, num_channels=None, size,
scale=0.0128,
power=0.75,
name=None,
num_channels=None,
layer_attr=None): layer_attr=None):
""" """
Response normalization across feature maps. Response normalization across feature maps.
...@@ -1935,8 +2095,13 @@ def img_cmrnorm_layer(input, size, scale=0.0128, power=0.75, ...@@ -1935,8 +2095,13 @@ def img_cmrnorm_layer(input, size, scale=0.0128, power=0.75,
@wrap_act_default(act=ReluActivation()) @wrap_act_default(act=ReluActivation())
@wrap_name_default("batch_norm") @wrap_name_default("batch_norm")
@layer_support(DROPOUT) @layer_support(DROPOUT)
def batch_norm_layer(input, act=None, name=None, num_channels=None, def batch_norm_layer(input,
bias_attr=None, param_attr=None, layer_attr=None, act=None,
name=None,
num_channels=None,
bias_attr=None,
param_attr=None,
layer_attr=None,
batch_norm_type=None, batch_norm_type=None,
moving_average_fraction=0.9, moving_average_fraction=0.9,
use_global_stats=None): use_global_stats=None):
...@@ -2022,22 +2187,23 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None, ...@@ -2022,22 +2187,23 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None,
(batch_norm_type == "cudnn_batch_norm") (batch_norm_type == "cudnn_batch_norm")
l = Layer( l = Layer(
name=name, name=name,
inputs=Input(input.name, inputs=Input(
image=Image(channels=num_channels), input.name, image=Image(channels=num_channels), **param_attr.attr),
**param_attr.attr),
active_type=act.name, active_type=act.name,
type=LayerType.BATCH_NORM_LAYER, type=LayerType.BATCH_NORM_LAYER,
batch_norm_type=batch_norm_type, batch_norm_type=batch_norm_type,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
moving_average_fraction=moving_average_fraction, moving_average_fraction=moving_average_fraction,
use_global_stats=use_global_stats, use_global_stats=use_global_stats,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
)
return LayerOutput(name=name, layer_type=LayerType.BATCH_NORM_LAYER, return LayerOutput(
parents=[input], activation=act, name=name,
num_filters=num_channels, layer_type=LayerType.BATCH_NORM_LAYER,
size=l.config.size) parents=[input],
activation=act,
num_filters=num_channels,
size=l.config.size)
@wrap_name_default() @wrap_name_default()
...@@ -2072,18 +2238,16 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None): ...@@ -2072,18 +2238,16 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None):
name=name, name=name,
type=LayerType.SUM_TO_ONE_NORM_LAYER, type=LayerType.SUM_TO_ONE_NORM_LAYER,
inputs=[input.name], inputs=[input.name],
**ExtraAttr.to_kwargs(layer_attr) **ExtraAttr.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.SUM_TO_ONE_NORM_LAYER, parents=[input], name, LayerType.SUM_TO_ONE_NORM_LAYER, parents=[input], size=input.size)
size=input.size)
@wrap_name_default("addto") @wrap_name_default("addto")
@wrap_act_default(act=LinearActivation()) @wrap_act_default(act=LinearActivation())
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
@layer_support(DROPOUT) @layer_support(DROPOUT)
def addto_layer(input, act=None, name=None, bias_attr=None, def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
layer_attr=None):
""" """
AddtoLayer. AddtoLayer.
...@@ -2143,15 +2307,20 @@ def addto_layer(input, act=None, name=None, bias_attr=None, ...@@ -2143,15 +2307,20 @@ def addto_layer(input, act=None, name=None, bias_attr=None,
num_filters = each_input.num_filters num_filters = each_input.num_filters
l = Layer( l = Layer(
name=name, type=LayerType.ADDTO_LAYER, inputs=ipts_for_layer, name=name,
type=LayerType.ADDTO_LAYER,
inputs=ipts_for_layer,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
active_type=act.name, active_type=act.name,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
)
return LayerOutput(name, LayerType.ADDTO_LAYER, parents=input, return LayerOutput(
activation=act, num_filters=num_filters, name,
size=l.config.size) LayerType.ADDTO_LAYER,
parents=input,
activation=act,
num_filters=num_filters,
size=l.config.size)
@wrap_act_default(act=IdentityActivation()) @wrap_act_default(act=IdentityActivation())
...@@ -2210,22 +2379,22 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): ...@@ -2210,22 +2379,22 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
LayerOutput) LayerOutput)
return a return a
is_concat_layer = __is_type__(reduce(__reduce_concat_type__, is_concat_layer = __is_type__(
map(type, input)), LayerOutput) reduce(__reduce_concat_type__, map(type, input)), LayerOutput)
layer_type = (LayerType.CONCAT_LAYER if is_concat_layer layer_type = (LayerType.CONCAT_LAYER
else LayerType.CONCAT_PROJ_LAYER) if is_concat_layer else LayerType.CONCAT_PROJ_LAYER)
if layer_type == LayerType.CONCAT_LAYER: if layer_type == LayerType.CONCAT_LAYER:
assert not bias_attr assert not bias_attr
Layer( Layer(
name=name, type=layer_type, name=name,
type=layer_type,
inputs=[x.name for x in input] if is_concat_layer else input, inputs=[x.name for x in input] if is_concat_layer else input,
active_type=act.name, active_type=act.name,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
)
sz = 0 sz = 0
for each_input in input: for each_input in input:
...@@ -2235,14 +2404,20 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): ...@@ -2235,14 +2404,20 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
sz = None sz = None
break break
return LayerOutput(name, layer_type=layer_type, return LayerOutput(
parents=input if is_concat_layer else [ name,
x.origin for x in input], layer_type=layer_type,
activation=act, size=sz) parents=input if is_concat_layer else [x.origin for x in input],
activation=act,
size=sz)
def memory(name, size, is_seq=False, boot_layer=None,
boot_bias=None, boot_bias_active_type=None,
def memory(name,
size,
is_seq=False,
boot_layer=None,
boot_bias=None,
boot_bias_active_type=None,
boot_with_const_id=None): boot_with_const_id=None):
""" """
The memory layers is a layer cross each time step. Reference this output The memory layers is a layer cross each time step. Reference this output
...@@ -2290,30 +2465,33 @@ def memory(name, size, is_seq=False, boot_layer=None, ...@@ -2290,30 +2465,33 @@ def memory(name, size, is_seq=False, boot_layer=None,
assert boot_layer is None or isinstance(boot_layer, LayerOutput) assert boot_layer is None or isinstance(boot_layer, LayerOutput)
agent_name = Memory(name, size, agent_name = Memory(name, size, is_seq, boot_layer.name
is_seq, if boot_layer is not None else None, boot_bias,
boot_layer.name if boot_layer is not None else None, boot_bias_active_type.name, boot_with_const_id)
boot_bias,
boot_bias_active_type.name, lout = LayerOutput(
boot_with_const_id) name=agent_name,
size=size,
lout = LayerOutput(name=agent_name, size=size, layer_type=LayerType.MEMORY,
layer_type=LayerType.MEMORY, parents=[boot_layer] if boot_layer is not None else None)
parents=[boot_layer] if boot_layer is not None
else None)
return lout return lout
@wrap_bias_attr_default() @wrap_bias_attr_default()
@wrap_act_default(param_names=['gate_act', @wrap_act_default(
'state_act'], param_names=['gate_act', 'state_act'], act=SigmoidActivation())
act=SigmoidActivation())
@wrap_act_default(act=TanhActivation()) @wrap_act_default(act=TanhActivation())
@wrap_name_default('lstm_step') @wrap_name_default('lstm_step')
@layer_support() @layer_support()
def lstm_step_layer(input, state, size, act=None, def lstm_step_layer(input,
name=None, gate_act=None, state_act=None, state,
bias_attr=None, layer_attr=None): size,
act=None,
name=None,
gate_act=None,
state_act=None,
bias_attr=None,
layer_attr=None):
""" """
LSTM Step Layer. It used in recurrent_group. The lstm equations are shown LSTM Step Layer. It used in recurrent_group. The lstm equations are shown
as follow. as follow.
...@@ -2380,24 +2558,32 @@ def lstm_step_layer(input, state, size, act=None, ...@@ -2380,24 +2558,32 @@ def lstm_step_layer(input, state, size, act=None,
active_gate_type=gate_act.name, active_gate_type=gate_act.name,
active_state_type=state_act.name, active_state_type=state_act.name,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
size=size, inputs=[input.name, state.name], size=size,
**ExtraLayerAttribute.to_kwargs(layer_attr) inputs=[input.name, state.name],
) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name=name, layer_type=LayerType.LSTM_STEP_LAYER, return LayerOutput(
parents=[input, state], activation=act, name=name,
size=size, outputs=['default', 'state']) layer_type=LayerType.LSTM_STEP_LAYER,
parents=[input, state],
activation=act,
size=size,
outputs=['default', 'state'])
@wrap_bias_attr_default() @wrap_bias_attr_default()
@wrap_act_default(param_names=['gate_act'], @wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
act=SigmoidActivation())
@wrap_act_default(act=TanhActivation()) @wrap_act_default(act=TanhActivation())
@wrap_name_default('gru_step') @wrap_name_default('gru_step')
@layer_support() @layer_support()
def gru_step_layer(input, output_mem, size=None, act=None, def gru_step_layer(input,
name=None, gate_act=None, output_mem,
bias_attr=None, layer_attr=None): size=None,
act=None,
name=None,
gate_act=None,
bias_attr=None,
layer_attr=None):
""" """
:param input: :param input:
...@@ -2418,20 +2604,18 @@ def gru_step_layer(input, output_mem, size=None, act=None, ...@@ -2418,20 +2604,18 @@ def gru_step_layer(input, output_mem, size=None, act=None,
Layer( Layer(
name=name, name=name,
type=LayerType.GRU_STEP_LAYER, type=LayerType.GRU_STEP_LAYER,
inputs=[ inputs=[input.name, output_mem.name],
input.name,
output_mem.name
],
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
size=size, size=size,
active_type=act.name, active_type=act.name,
active_gate_type=gate_act.name, active_gate_type=gate_act.name,
**ExtraAttr.to_kwargs(layer_attr) **ExtraAttr.to_kwargs(layer_attr))
)
return LayerOutput( return LayerOutput(
name=name, layer_type=LayerType.GRU_STEP_LAYER, name=name,
layer_type=LayerType.GRU_STEP_LAYER,
parents=[input, output_mem], parents=[input, output_mem],
size=size, activation=act) size=size,
activation=act)
@wrap_name_default() @wrap_name_default()
...@@ -2459,13 +2643,19 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None): ...@@ -2459,13 +2643,19 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None):
' The get output name is %s, which not' \ ' The get output name is %s, which not' \
' in %s' % ( ' in %s' % (
arg_name, ",".join(input.outputs)) arg_name, ",".join(input.outputs))
Layer(name=name, type=LayerType.GET_OUTPUT_LAYER, Layer(
inputs=[Input(input.name, input_layer_argument=arg_name)], name=name,
size=input.size, type=LayerType.GET_OUTPUT_LAYER,
**ExtraLayerAttribute.to_kwargs(layer_attr)) inputs=[Input(
input.name, input_layer_argument=arg_name)],
size=input.size,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name=name, layer_type=LayerType.GET_OUTPUT_LAYER, return LayerOutput(
parents=[input], size=input.size) name=name,
layer_type=LayerType.GET_OUTPUT_LAYER,
parents=[input],
size=input.size)
@wrap_name_default() @wrap_name_default()
...@@ -2473,8 +2663,13 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None): ...@@ -2473,8 +2663,13 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None):
@wrap_bias_attr_default() @wrap_bias_attr_default()
@wrap_param_attr_default() @wrap_param_attr_default()
@layer_support() @layer_support()
def recurrent_layer(input, act=None, bias_attr=None, def recurrent_layer(input,
param_attr=None, name=None, reverse=False, layer_attr=None): act=None,
bias_attr=None,
param_attr=None,
name=None,
reverse=False,
layer_attr=None):
""" """
Simple recurrent unit layer. It is just a fully connect layer through both Simple recurrent unit layer. It is just a fully connect layer through both
time and neural network. time and neural network.
...@@ -2509,16 +2704,21 @@ def recurrent_layer(input, act=None, bias_attr=None, ...@@ -2509,16 +2704,21 @@ def recurrent_layer(input, act=None, bias_attr=None,
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
Layer(name=name, Layer(
type=LayerType.RECURRENT_LAYER, name=name,
inputs=Input(input.name, **param_attr.attr), type=LayerType.RECURRENT_LAYER,
active_type=act.name, inputs=Input(input.name, **param_attr.attr),
bias=ParamAttr.to_bias(bias_attr), active_type=act.name,
reversed=reverse, bias=ParamAttr.to_bias(bias_attr),
**ExtraAttr.to_kwargs(layer_attr)) reversed=reverse,
return LayerOutput(name=name, layer_type=LayerType.RECURRENT_LAYER, **ExtraAttr.to_kwargs(layer_attr))
parents=[input], size=input.size, activation=act, return LayerOutput(
reverse=reverse) name=name,
layer_type=LayerType.RECURRENT_LAYER,
parents=[input],
size=input.size,
activation=act,
reverse=reverse)
class StaticInput(object): class StaticInput(object):
...@@ -2646,7 +2846,7 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -2646,7 +2846,7 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
return True return True
return False return False
assert(targetInlink == None or targetInlink_in_inlinks()) assert (targetInlink == None or targetInlink_in_inlinks())
targetInlinkName = None if targetInlink == None \ targetInlinkName = None if targetInlink == None \
else targetInlink.name if isinstance(targetInlink, LayerOutput) \ else targetInlink.name if isinstance(targetInlink, LayerOutput) \
else targetInlink.input.name else targetInlink.input.name
...@@ -2661,7 +2861,8 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -2661,7 +2861,8 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
return x.name return x.name
RecurrentLayerGroupWithoutOutLinksBegin( RecurrentLayerGroupWithoutOutLinksBegin(
name=name, in_links=map(map_in_links, in_links), name=name,
in_links=map(map_in_links, in_links),
seq_reversed=reverse, seq_reversed=reverse,
target_inlinkname=targetInlinkName) target_inlinkname=targetInlinkName)
in_args = [] in_args = []
...@@ -2673,12 +2874,15 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -2673,12 +2874,15 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
in_args.append(each_input.input) in_args.append(each_input.input)
else: else:
mem_name = "__%s_memory__" % each_input.input.name mem_name = "__%s_memory__" % each_input.input.name
mem = memory(name=mem_name, mem = memory(
is_seq=each_input.is_seq, name=mem_name,
size=each_input.input.size, is_seq=each_input.is_seq,
boot_layer=each_input.input) size=each_input.input.size,
with mixed_layer(name=mem_name, size=each_input.input.size, boot_layer=each_input.input)
act=IdentityActivation()) as mix: with mixed_layer(
name=mem_name,
size=each_input.input.size,
act=IdentityActivation()) as mix:
mix += identity_projection(mem) mix += identity_projection(mem)
in_args.append(mem) in_args.append(mem)
...@@ -2720,14 +2924,15 @@ class GeneratedInput(BaseGeneratedInput): ...@@ -2720,14 +2924,15 @@ class GeneratedInput(BaseGeneratedInput):
return maxid_layer(input=input, name='__beam_search_predict__') return maxid_layer(input=input, name='__beam_search_predict__')
def before_real_step(self): def before_real_step(self):
predict_id = memory(name='__beam_search_predict__', predict_id = memory(
size=self.size, name='__beam_search_predict__',
boot_with_const_id=self.bos_id) size=self.size,
boot_with_const_id=self.bos_id)
trg_emb = embedding_layer(input=predict_id,
size=self.embedding_size, trg_emb = embedding_layer(
param_attr=ParamAttr( input=predict_id,
name=self.embedding_name)) size=self.embedding_size,
param_attr=ParamAttr(name=self.embedding_name))
return trg_emb return trg_emb
def __init__(self, size, embedding_name, embedding_size): def __init__(self, size, embedding_name, embedding_size):
...@@ -2760,14 +2965,16 @@ def maxid_layer(input, name=None, layer_attr=None): ...@@ -2760,14 +2965,16 @@ def maxid_layer(input, name=None, layer_attr=None):
""" """
assert isinstance(input, LayerOutput) assert isinstance(input, LayerOutput)
l = Layer(name=name, l = Layer(
type='maxid', name=name,
inputs=[input.name], type='maxid',
**ExtraLayerAttribute.to_kwargs(layer_attr)) inputs=[input.name],
return LayerOutput(name=name, **ExtraLayerAttribute.to_kwargs(layer_attr))
layer_type=LayerType.MAXID_LAYER, return LayerOutput(
parents=[input], name=name,
size=l.config.size) layer_type=LayerType.MAXID_LAYER,
parents=[input],
size=l.config.size)
@wrap_name_default() @wrap_name_default()
...@@ -2796,14 +3003,16 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None): ...@@ -2796,14 +3003,16 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
assert isinstance(input1, LayerOutput) assert isinstance(input1, LayerOutput)
assert isinstance(input2, LayerOutput) assert isinstance(input2, LayerOutput)
l = Layer(name=name, l = Layer(
type=LayerType.OUT_PROD_LAYER, name=name,
inputs=[input1.name, input2.name], type=LayerType.OUT_PROD_LAYER,
**ExtraLayerAttribute.to_kwargs(layer_attr)) inputs=[input1.name, input2.name],
return LayerOutput(name=name, **ExtraLayerAttribute.to_kwargs(layer_attr))
layer_type=LayerType.OUT_PROD_LAYER, return LayerOutput(
parents=[input1, input2], name=name,
size=l.config.size) layer_type=LayerType.OUT_PROD_LAYER,
parents=[input1, input2],
size=l.config.size)
@wrap_name_default() @wrap_name_default()
...@@ -2832,19 +3041,27 @@ def eos_layer(input, eos_id, name=None, layer_attr=None): ...@@ -2832,19 +3041,27 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
l = Layer(name=name, l = Layer(
type=LayerType.EOSID_LAYER, name=name,
eos_id=eos_id, type=LayerType.EOSID_LAYER,
inputs=[input.name], eos_id=eos_id,
**ExtraLayerAttribute.to_kwargs(layer_attr)) inputs=[input.name],
return LayerOutput(name=name, layer_type=LayerType.EOSID_LAYER, **ExtraLayerAttribute.to_kwargs(layer_attr))
parents=[input], return LayerOutput(
size=l.config.size) name=name,
layer_type=LayerType.EOSID_LAYER,
parents=[input],
size=l.config.size)
@wrap_name_default() @wrap_name_default()
def beam_search(step, input, bos_id, eos_id, beam_size, def beam_search(step,
max_length=500, name=None, input,
bos_id,
eos_id,
beam_size,
max_length=500,
name=None,
num_results_per_sample=None): num_results_per_sample=None):
""" """
Beam search is a heuristic search algorithm used in sequence generation. Beam search is a heuristic search algorithm used in sequence generation.
...@@ -2918,8 +3135,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size, ...@@ -2918,8 +3135,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
if num_results_per_sample > beam_size: if num_results_per_sample > beam_size:
logger.warning("num_results_per_sample should be less than beam_size") logger.warning("num_results_per_sample should be less than beam_size")
if isinstance(input, StaticInput) or isinstance(input, if isinstance(input, StaticInput) or isinstance(input, BaseGeneratedInput):
BaseGeneratedInput):
input = [input] input = [input]
generated_input_index = -1 generated_input_index = -1
...@@ -2944,11 +3160,12 @@ def beam_search(step, input, bos_id, eos_id, beam_size, ...@@ -2944,11 +3160,12 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
def __real_step__(*args): def __real_step__(*args):
eos_name = "__%s_eos_layer__" % name eos_name = "__%s_eos_layer__" % name
RecurrentLayerGroupSetGenerator(Generator( RecurrentLayerGroupSetGenerator(
eos_layer_name=eos_name, Generator(
max_num_frames=max_length, eos_layer_name=eos_name,
beam_size=beam_size, max_num_frames=max_length,
num_results_per_sample=num_results_per_sample)) beam_size=beam_size,
num_results_per_sample=num_results_per_sample))
args = list(args) args = list(args)
args.insert(generated_input_index, gipt.before_real_step()) args.insert(generated_input_index, gipt.before_real_step())
...@@ -2959,11 +3176,12 @@ def beam_search(step, input, bos_id, eos_id, beam_size, ...@@ -2959,11 +3176,12 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
return predict return predict
tmp = recurrent_group(step=__real_step__, input=real_input, reverse=False, tmp = recurrent_group(
name=name) step=__real_step__, input=real_input, reverse=False, name=name)
return tmp return tmp
def __cost_input__(input, label, weight=None): def __cost_input__(input, label, weight=None):
""" """
inputs and parents for cost layers. inputs and parents for cost layers.
...@@ -2979,8 +3197,7 @@ def __cost_input__(input, label, weight=None): ...@@ -2979,8 +3197,7 @@ def __cost_input__(input, label, weight=None):
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def regression_cost(input, label, weight=None, name=None, def regression_cost(input, label, weight=None, name=None, layer_attr=None):
layer_attr=None):
""" """
Regression Layer. Regression Layer.
...@@ -3002,14 +3219,20 @@ def regression_cost(input, label, weight=None, name=None, ...@@ -3002,14 +3219,20 @@ def regression_cost(input, label, weight=None, name=None,
""" """
ipts, parents = __cost_input__(input, label, weight) ipts, parents = __cost_input__(input, label, weight)
Layer(inputs=ipts, type="square_error", name=name, Layer(
**ExtraLayerAttribute.to_kwargs(layer_attr)) inputs=ipts,
type="square_error",
name=name,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.COST, parents=parents, size=1) return LayerOutput(name, LayerType.COST, parents=parents, size=1)
@wrap_name_default("cost") @wrap_name_default("cost")
@layer_support() @layer_support()
def classification_cost(input, label, weight=None, name=None, def classification_cost(input,
label,
weight=None,
name=None,
evaluator=classification_error_evaluator, evaluator=classification_error_evaluator,
layer_attr=None): layer_attr=None):
""" """
...@@ -3036,8 +3259,11 @@ def classification_cost(input, label, weight=None, name=None, ...@@ -3036,8 +3259,11 @@ def classification_cost(input, label, weight=None, name=None,
ipts, parents = __cost_input__(input, label, weight) ipts, parents = __cost_input__(input, label, weight)
Layer(name=name, type="multi-class-cross-entropy", inputs=ipts, Layer(
**ExtraLayerAttribute.to_kwargs(layer_attr)) name=name,
type="multi-class-cross-entropy",
inputs=ipts,
**ExtraLayerAttribute.to_kwargs(layer_attr))
def __add_evaluator__(e): def __add_evaluator__(e):
assert callable(e) assert callable(e)
...@@ -3059,9 +3285,16 @@ def classification_cost(input, label, weight=None, name=None, ...@@ -3059,9 +3285,16 @@ def classification_cost(input, label, weight=None, name=None,
return LayerOutput(name, LayerType.COST, parents=parents, size=1) return LayerOutput(name, LayerType.COST, parents=parents, size=1)
def conv_operator(img, filter, filter_size, num_filters, def conv_operator(img,
num_channels=None, stride=1, padding=0, filter,
filter_size_y=None, stride_y=None, padding_y=None): filter_size,
num_filters,
num_channels=None,
stride=1,
padding=0,
filter_size_y=None,
stride_y=None,
padding_y=None):
""" """
Different from img_conv_layer, conv_op is an Operator, which can be used Different from img_conv_layer, conv_op is an Operator, which can be used
in mixed_layer. And conv_op takes two inputs to perform convolution. in mixed_layer. And conv_op takes two inputs to perform convolution.
...@@ -3117,24 +3350,34 @@ def conv_operator(img, filter, filter_size, num_filters, ...@@ -3117,24 +3350,34 @@ def conv_operator(img, filter, filter_size, num_filters,
if filter.size is not None: if filter.size is not None:
filter.size = filter_size * filter_size_y * num_filters * num_channels filter.size = filter_size * filter_size_y * num_filters * num_channels
op = ConvOperator(input_layer_names=[img.name, filter.name], op = ConvOperator(
num_filters=num_filters, input_layer_names=[img.name, filter.name],
conv_conf=Conv(filter_size=filter_size, num_filters=num_filters,
padding=padding, conv_conf=Conv(
stride=stride, filter_size=filter_size,
channels=num_channels, padding=padding,
filter_size_y=filter_size_y, stride=stride,
padding_y=padding_y, channels=num_channels,
stride_y=stride_y, filter_size_y=filter_size_y,
groups=1)) padding_y=padding_y,
stride_y=stride_y,
groups=1))
op.origin = [img, filter] op.origin = [img, filter]
return op return op
@wrap_param_attr_default() @wrap_param_attr_default()
def conv_projection(input, filter_size, num_filters, def conv_projection(input,
num_channels=None, stride=1, padding=0, filter_size,
filter_size_y=None, stride_y=None, padding_y=None, num_filters,
groups=1, param_attr=None): num_channels=None,
stride=1,
padding=0,
filter_size_y=None,
stride_y=None,
padding_y=None,
groups=1,
param_attr=None):
""" """
ConvProjection with a layer as input. ConvProjection with a layer as input.
It performs element-wise multiplication with weight. It performs element-wise multiplication with weight.
...@@ -3206,23 +3449,25 @@ def conv_projection(input, filter_size, num_filters, ...@@ -3206,23 +3449,25 @@ def conv_projection(input, filter_size, num_filters,
if param_attr.attr.get('initial_smart'): if param_attr.attr.get('initial_smart'):
# special initial for conv layers. # special initial for conv layers.
init_w = (2.0 / (filter_size ** 2 * num_channels)) ** 0.5 init_w = (2.0 / (filter_size**2 * num_channels))**0.5
param_attr.attr["initial_mean"] = 0.0 param_attr.attr["initial_mean"] = 0.0
param_attr.attr["initial_std"] = init_w param_attr.attr["initial_std"] = init_w
param_attr.attr["initial_strategy"] = 0 param_attr.attr["initial_strategy"] = 0
param_attr.attr["initial_smart"] = False param_attr.attr["initial_smart"] = False
proj = ConvProjection(input_layer_name=input.name, proj = ConvProjection(
num_filters=num_filters, input_layer_name=input.name,
conv_conf=Conv(filter_size=filter_size, num_filters=num_filters,
padding=padding, conv_conf=Conv(
stride=stride, filter_size=filter_size,
channels=num_channels, padding=padding,
filter_size_y=filter_size_y, stride=stride,
padding_y=padding_y, channels=num_channels,
stride_y=stride_y, filter_size_y=filter_size_y,
groups=groups), padding_y=padding_y,
**param_attr.attr) stride_y=stride_y,
groups=groups),
**param_attr.attr)
proj.origin = input proj.origin = input
return proj return proj
...@@ -3270,11 +3515,10 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): ...@@ -3270,11 +3515,10 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
name=name, name=name,
type=LayerType.CONV_SHIFT_LAYER, type=LayerType.CONV_SHIFT_LAYER,
inputs=[a.name, b.name], inputs=[a.name, b.name],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
)
return LayerOutput(name, LayerType.CONV_SHIFT_LAYER, parents=[a, b], return LayerOutput(
size=a.size) name, LayerType.CONV_SHIFT_LAYER, parents=[a, b], size=a.size)
@wrap_name_default() @wrap_name_default()
...@@ -3282,8 +3526,14 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): ...@@ -3282,8 +3526,14 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
@wrap_bias_attr_default() @wrap_bias_attr_default()
@wrap_act_default(act=LinearActivation()) @wrap_act_default(act=LinearActivation())
@layer_support(ERROR_CLIPPING, DROPOUT) @layer_support(ERROR_CLIPPING, DROPOUT)
def tensor_layer(a, b, size, act=None, name=None, def tensor_layer(a,
param_attr=None, bias_attr=None, layer_attr=None): b,
size,
act=None,
name=None,
param_attr=None,
bias_attr=None,
layer_attr=None):
""" """
This layer performs tensor operation for two input. This layer performs tensor operation for two input.
For example, each sample: For example, each sample:
...@@ -3332,12 +3582,10 @@ def tensor_layer(a, b, size, act=None, name=None, ...@@ -3332,12 +3582,10 @@ def tensor_layer(a, b, size, act=None, name=None,
type=LayerType.TENSOR_LAYER, type=LayerType.TENSOR_LAYER,
active_type=act.name, active_type=act.name,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
inputs=[Input(a.name, **param_attr.attr), inputs=[Input(a.name, **param_attr.attr), Input(b.name)],
Input(b.name)], **ExtraLayerAttribute.to_kwargs(layer_attr))
**ExtraLayerAttribute.to_kwargs(layer_attr) return LayerOutput(
) name, LayerType.TENSOR_LAYER, parents=[a, b], activation=act, size=size)
return LayerOutput(name, LayerType.TENSOR_LAYER, parents=[a, b],
activation=act, size=size)
@wrap_name_default() @wrap_name_default()
...@@ -3345,11 +3593,17 @@ def tensor_layer(a, b, size, act=None, name=None, ...@@ -3345,11 +3593,17 @@ def tensor_layer(a, b, size, act=None, name=None,
@wrap_bias_attr_default() @wrap_bias_attr_default()
@wrap_act_default() @wrap_act_default()
@layer_support() @layer_support()
def selective_fc_layer(input, select, size, act=None, name=None, def selective_fc_layer(input,
select,
size,
act=None,
name=None,
pass_generation=False, pass_generation=False,
has_selected_colums=True, has_selected_colums=True,
mul_ratio=0.02, mul_ratio=0.02,
param_attr=None, bias_attr=None, layer_attr=None): param_attr=None,
bias_attr=None,
layer_attr=None):
""" """
Selectived fully connected layer. Different from fc_layer, the output Selectived fully connected layer. Different from fc_layer, the output
of this layer maybe sparse. It requires an additional input to indicate of this layer maybe sparse. It requires an additional input to indicate
...@@ -3399,8 +3653,9 @@ def selective_fc_layer(input, select, size, act=None, name=None, ...@@ -3399,8 +3653,9 @@ def selective_fc_layer(input, select, size, act=None, name=None,
if select.size is not None: if select.size is not None:
assert select.size == size assert select.size == size
Layer( Layer(
inputs=[Input(ipt.name, **attr.attr) for ipt, attr in zip( inputs=[
input, param_attr)] + [select.name], Input(ipt.name, **attr.attr) for ipt, attr in zip(input, param_attr)
] + [select.name],
name=name, name=name,
type=LayerType.SEL_FC_LAYER, type=LayerType.SEL_FC_LAYER,
size=size, size=size,
...@@ -3409,11 +3664,13 @@ def selective_fc_layer(input, select, size, act=None, name=None, ...@@ -3409,11 +3664,13 @@ def selective_fc_layer(input, select, size, act=None, name=None,
selective_fc_pass_generation=pass_generation, selective_fc_pass_generation=pass_generation,
has_selected_colums=has_selected_colums, has_selected_colums=has_selected_colums,
selective_fc_full_mul_ratio=mul_ratio, selective_fc_full_mul_ratio=mul_ratio,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.SEL_FC_LAYER, list(input) + [select], name,
activation=act, LayerType.SEL_FC_LAYER,
size=size) list(input) + [select],
activation=act,
size=size)
@wrap_name_default() @wrap_name_default()
...@@ -3442,15 +3699,17 @@ def sampling_id_layer(input, name=None, layer_attr=None): ...@@ -3442,15 +3699,17 @@ def sampling_id_layer(input, name=None, layer_attr=None):
name=name, name=name,
type=LayerType.SAMPLING_ID_LAYER, type=LayerType.SAMPLING_ID_LAYER,
inputs=[Input(input.name)], inputs=[Input(input.name)],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.SAMPLING_ID_LAYER, input, name, LayerType.SAMPLING_ID_LAYER, input, size=l.config.size)
size=l.config.size)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0, def slope_intercept_layer(input,
name=None,
slope=1.0,
intercept=0.0,
layer_attr=None): layer_attr=None):
""" """
This layer for applying a slope and an intercept to the input This layer for applying a slope and an intercept to the input
...@@ -3484,16 +3743,14 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0, ...@@ -3484,16 +3743,14 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0,
slope=slope, slope=slope,
intercept=intercept, intercept=intercept,
inputs=[Input(input.name)], inputs=[Input(input.name)],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.SLOPE_INTERCEPT_LAYER, input, name, LayerType.SLOPE_INTERCEPT_LAYER, input, size=input.size)
size=input.size)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def linear_comb_layer(weights, vectors, size=None, name=None, def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None):
layer_attr=None):
""" """
A layer for weighted sum of vectors takes two inputs. A layer for weighted sum of vectors takes two inputs.
- Input: size of weights is M - Input: size of weights is M
...@@ -3543,7 +3800,7 @@ def linear_comb_layer(weights, vectors, size=None, name=None, ...@@ -3543,7 +3800,7 @@ def linear_comb_layer(weights, vectors, size=None, name=None,
if vectors.size is not None and weights.size is not None: if vectors.size is not None and weights.size is not None:
assert vectors.size % weights.size == 0 assert vectors.size % weights.size == 0
if size is None: if size is None:
size = vectors.size / weights.size size = vectors.size / weights.size
else: else:
assert size == vectors.size / weights.size assert size == vectors.size / weights.size
Layer( Layer(
...@@ -3551,10 +3808,9 @@ def linear_comb_layer(weights, vectors, size=None, name=None, ...@@ -3551,10 +3808,9 @@ def linear_comb_layer(weights, vectors, size=None, name=None,
type=LayerType.LINEAR_COMBINATION_LAYER, type=LayerType.LINEAR_COMBINATION_LAYER,
size=size, size=size,
inputs=[Input(weights.name), Input(vectors.name)], inputs=[Input(weights.name), Input(vectors.name)],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.LINEAR_COMBINATION_LAYER, name, LayerType.LINEAR_COMBINATION_LAYER, [weights, vectors], size=size)
[weights, vectors], size=size)
convex_comb_layer = linear_comb_layer convex_comb_layer = linear_comb_layer
...@@ -3626,21 +3882,23 @@ def block_expand_layer(input, ...@@ -3626,21 +3882,23 @@ def block_expand_layer(input,
if num_channels is None: if num_channels is None:
assert input.num_filters is not None assert input.num_filters is not None
num_channels = input.num_filters num_channels = input.num_filters
l = Layer(name=name, l = Layer(
inputs=Input(input.name, name=name,
block_expand=BlockExpand(channels=num_channels, inputs=Input(
block_x=block_x, input.name,
block_y=block_y, block_expand=BlockExpand(
stride_x=stride_x, channels=num_channels,
stride_y=stride_y, block_x=block_x,
padding_x=padding_x, block_y=block_y,
padding_y=padding_y)), stride_x=stride_x,
type=LayerType.BLOCK_EXPAND, stride_y=stride_y,
**ExtraLayerAttribute.to_kwargs(layer_attr) padding_x=padding_x,
) padding_y=padding_y)),
type=LayerType.BLOCK_EXPAND,
return LayerOutput(name, LayerType.BLOCK_EXPAND, parents=[input], **ExtraLayerAttribute.to_kwargs(layer_attr))
size=l.config.size)
return LayerOutput(
name, LayerType.BLOCK_EXPAND, parents=[input], size=l.config.size)
@wrap_name_default() @wrap_name_default()
...@@ -3701,19 +3959,24 @@ def maxout_layer(input, ...@@ -3701,19 +3959,24 @@ def maxout_layer(input,
assert input.num_filters is not None assert input.num_filters is not None
num_channels = input.num_filters num_channels = input.num_filters
assert num_channels % groups == 0 assert num_channels % groups == 0
l = Layer(name=name, l = Layer(
inputs=Input(input.name, name=name,
maxout=MaxOut(channels=num_channels, inputs=Input(
groups=groups)), input.name, maxout=MaxOut(
type=LayerType.MAXOUT, channels=num_channels, groups=groups)),
**ExtraLayerAttribute.to_kwargs(layer_attr)) type=LayerType.MAXOUT,
return LayerOutput(name, LayerType.MAXOUT, parents=[input], **ExtraLayerAttribute.to_kwargs(layer_attr))
size=l.config.size) return LayerOutput(
name, LayerType.MAXOUT, parents=[input], size=l.config.size)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def ctc_layer(input, label, size=None, name=None, norm_by_times=False, def ctc_layer(input,
label,
size=None,
name=None,
norm_by_times=False,
layer_attr=None): layer_attr=None):
""" """
Connectionist Temporal Classification (CTC) is designed for temporal Connectionist Temporal Classification (CTC) is designed for temporal
...@@ -3769,15 +4032,19 @@ def ctc_layer(input, label, size=None, name=None, norm_by_times=False, ...@@ -3769,15 +4032,19 @@ def ctc_layer(input, label, size=None, name=None, norm_by_times=False,
size=size, size=size,
norm_by_times=norm_by_times, norm_by_times=norm_by_times,
inputs=[input.name, label.name], inputs=[input.name, label.name],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
)
return LayerOutput(name, LayerType.CTC_LAYER, [input, label], size=size) return LayerOutput(name, LayerType.CTC_LAYER, [input, label], size=size)
@wrap_name_default() @wrap_name_default()
@wrap_param_attr_default() @wrap_param_attr_default()
@layer_support() @layer_support()
def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None, def crf_layer(input,
label,
size=None,
weight=None,
param_attr=None,
name=None,
layer_attr=None): layer_attr=None):
""" """
A layer for calculating the cost of sequential conditional random A layer for calculating the cost of sequential conditional random
...@@ -3819,8 +4086,7 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None, ...@@ -3819,8 +4086,7 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None,
else: else:
assert size == input.size assert size == input.size
ipts = [Input(input.name, **param_attr.attr), ipts = [Input(input.name, **param_attr.attr), Input(label.name)]
Input(label.name)]
if weight is not None: if weight is not None:
ipts.append(Input(weight.name)) ipts.append(Input(weight.name))
...@@ -3829,8 +4095,7 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None, ...@@ -3829,8 +4095,7 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None,
type=LayerType.CRF_LAYER, type=LayerType.CRF_LAYER,
size=size, size=size,
inputs=ipts, inputs=ipts,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
)
parents = [input, label] parents = [input, label]
if weight is not None: if weight is not None:
parents.append(weight) parents.append(weight)
...@@ -3843,7 +4108,11 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None, ...@@ -3843,7 +4108,11 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None,
@wrap_name_default() @wrap_name_default()
@wrap_param_attr_default() @wrap_param_attr_default()
@layer_support() @layer_support()
def crf_decoding_layer(input, size, label=None, param_attr=None, name=None, def crf_decoding_layer(input,
size,
label=None,
param_attr=None,
name=None,
layer_attr=None): layer_attr=None):
""" """
A layer for calculating the decoding sequence of sequential conditional A layer for calculating the decoding sequence of sequential conditional
...@@ -3880,8 +4149,7 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None, ...@@ -3880,8 +4149,7 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None,
type=LayerType.CRF_DECODING_LAYER, type=LayerType.CRF_DECODING_LAYER,
size=size, size=size,
inputs=ipts, inputs=ipts,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
)
parents = [input] parents = [input]
if label is not None: if label is not None:
parents.append(label) parents.append(label)
...@@ -3890,12 +4158,19 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None, ...@@ -3890,12 +4158,19 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None,
# classes. # classes.
return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1) return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1)
@wrap_bias_attr_default(has_bias=True) @wrap_bias_attr_default(has_bias=True)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def nce_layer(input, label, num_classes, weight=None, def nce_layer(input,
num_neg_samples=10, neg_distribution=None, label,
name=None, bias_attr=None, layer_attr=None): num_classes,
weight=None,
num_neg_samples=10,
neg_distribution=None,
name=None,
bias_attr=None,
layer_attr=None):
""" """
Noise-contrastive estimation. Noise-contrastive estimation.
Implements the method in the following paper: Implements the method in the following paper:
...@@ -3964,10 +4239,10 @@ def nce_layer(input, label, num_classes, weight=None, ...@@ -3964,10 +4239,10 @@ def nce_layer(input, label, num_classes, weight=None,
num_neg_samples=num_neg_samples, num_neg_samples=num_neg_samples,
inputs=ipts_for_layer, inputs=ipts_for_layer,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr))
) return LayerOutput(
return LayerOutput(name, LayerType.NCE_LAYER, parents=parents, name, LayerType.NCE_LAYER, parents=parents, size=l.config.size)
size=l.config.size)
""" """
following are cost Layers. following are cost Layers.
...@@ -3976,7 +4251,13 @@ following are cost Layers. ...@@ -3976,7 +4251,13 @@ following are cost Layers.
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def rank_cost(left, right, label, weight=None, name=None, coeff=1.0, layer_attr=None): def rank_cost(left,
right,
label,
weight=None,
name=None,
coeff=1.0,
layer_attr=None):
""" """
A cost Layer for learning to rank using gradient descent. Details can refer A cost Layer for learning to rank using gradient descent. Details can refer
to `papers <http://research.microsoft.com/en-us/um/people/cburges/papers/ to `papers <http://research.microsoft.com/en-us/um/people/cburges/papers/
...@@ -4035,19 +4316,24 @@ def rank_cost(left, right, label, weight=None, name=None, coeff=1.0, layer_attr= ...@@ -4035,19 +4316,24 @@ def rank_cost(left, right, label, weight=None, name=None, coeff=1.0, layer_attr=
ipts.append(weight.name) ipts.append(weight.name)
parents.append(weight) parents.append(weight)
Layer(name=name, Layer(
type=LayerType.RANK_COST, name=name,
inputs=ipts, type=LayerType.RANK_COST,
coeff=coeff, inputs=ipts,
**ExtraLayerAttribute.to_kwargs(layer_attr) coeff=coeff,
) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.RANK_COST, parents=parents, size=1) return LayerOutput(name, LayerType.RANK_COST, parents=parents, size=1)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def lambda_cost(input, score, name, NDCG_num=5, max_sort_size=-1, layer_attr=None): def lambda_cost(input,
score,
name,
NDCG_num=5,
max_sort_size=-1,
layer_attr=None):
""" """
lambdaCost for lambdaRank LTR approach. lambdaCost for lambdaRank LTR approach.
...@@ -4086,16 +4372,16 @@ def lambda_cost(input, score, name, NDCG_num=5, max_sort_size=-1, layer_attr=Non ...@@ -4086,16 +4372,16 @@ def lambda_cost(input, score, name, NDCG_num=5, max_sort_size=-1, layer_attr=Non
assert isinstance(input, LayerOutput) and isinstance(score, LayerOutput) assert isinstance(input, LayerOutput) and isinstance(score, LayerOutput)
if score.size is not None: if score.size is not None:
assert score.size == 1 assert score.size == 1
Layer(name=name, Layer(
type=LayerType.LAMBDA_COST, name=name,
inputs=[input.name, score.name], type=LayerType.LAMBDA_COST,
NDCG_num=NDCG_num, inputs=[input.name, score.name],
max_sort_size=max_sort_size, NDCG_num=NDCG_num,
**ExtraLayerAttribute.to_kwargs(layer_attr) max_sort_size=max_sort_size,
) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.LAMBDA_COST, parents=[input, score], return LayerOutput(
size=1) name, LayerType.LAMBDA_COST, parents=[input, score], size=1)
@wrap_name_default() @wrap_name_default()
...@@ -4123,19 +4409,22 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -4123,19 +4409,22 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
:rtype: LayerOutput. :rtype: LayerOutput.
""" """
Layer(name=name, Layer(
type=LayerType.CROSS_ENTROPY, name=name,
inputs=[input.name, label.name], type=LayerType.CROSS_ENTROPY,
coeff=coeff, inputs=[input.name, label.name],
**ExtraLayerAttribute.to_kwargs(layer_attr) coeff=coeff,
) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=[input, label], return LayerOutput(
size=1) name, LayerType.CROSS_ENTROPY, parents=[input, label], size=1)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0, def cross_entropy_with_selfnorm(input,
label,
name=None,
coeff=1.0,
softmax_selfnorm_alpha=0.1, softmax_selfnorm_alpha=0.1,
layer_attr=None): layer_attr=None):
""" """
...@@ -4161,17 +4450,19 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0, ...@@ -4161,17 +4450,19 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0,
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput. :rtype: LayerOutput.
""" """
Layer(name=name, Layer(
type=LayerType.CROSS_ENTROPY_WITH_SELFNORM, name=name,
inputs=[input.name, label.name], type=LayerType.CROSS_ENTROPY_WITH_SELFNORM,
coeff=coeff, inputs=[input.name, label.name],
softmax_selfnorm_alpha=softmax_selfnorm_alpha, coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr) softmax_selfnorm_alpha=softmax_selfnorm_alpha,
) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, return LayerOutput(
LayerType.CROSS_ENTROPY_WITH_SELFNORM, name,
parents=[input, label], size=1) LayerType.CROSS_ENTROPY_WITH_SELFNORM,
parents=[input, label],
size=1)
@wrap_name_default() @wrap_name_default()
...@@ -4194,16 +4485,13 @@ def sum_cost(input, name=None, layer_attr=None): ...@@ -4194,16 +4485,13 @@ def sum_cost(input, name=None, layer_attr=None):
:rtype: LayerOutput. :rtype: LayerOutput.
""" """
assert isinstance(input, LayerOutput) assert isinstance(input, LayerOutput)
Layer(name=name, Layer(
type=LayerType.SUM_COST, name=name,
inputs=[input.name], type=LayerType.SUM_COST,
**ExtraLayerAttribute.to_kwargs(layer_attr) inputs=[input.name],
) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, return LayerOutput(name, LayerType.SUM_COST, parents=[input], size=1)
LayerType.SUM_COST,
parents=[input],
size=1)
@wrap_name_default() @wrap_name_default()
...@@ -4233,18 +4521,21 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -4233,18 +4521,21 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
assert isinstance(input, LayerOutput) assert isinstance(input, LayerOutput)
if input.size is not None: if input.size is not None:
assert input.size == 1 assert input.size == 1
Layer(name=name, Layer(
type=LayerType.HUBER, name=name,
inputs=[input.name, label.name], type=LayerType.HUBER,
coeff=coeff, inputs=[input.name, label.name],
**ExtraLayerAttribute.to_kwargs(layer_attr) coeff=coeff,
) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.HUBER, parents=[input, label], size=1) return LayerOutput(name, LayerType.HUBER, parents=[input, label], size=1)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0, def multi_binary_label_cross_entropy(input,
label,
name=None,
coeff=1.0,
layer_attr=None): layer_attr=None):
""" """
A loss layer for multi binary label cross entropy. A loss layer for multi binary label cross entropy.
...@@ -4272,15 +4563,19 @@ def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0, ...@@ -4272,15 +4563,19 @@ def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0,
if input.activation is None or \ if input.activation is None or \
not isinstance(input.activation, SigmoidActivation): not isinstance(input.activation, SigmoidActivation):
logger.log(logging.WARN, logger.log(
"%s is not recommend for multi_binary_label_cross_entropy's activation, " logging.WARN,
"maybe the sigmoid is better" % repr(input.activation)) "%s is not recommend for multi_binary_label_cross_entropy's activation, "
"maybe the sigmoid is better" % repr(input.activation))
Layer(name=name,
type=LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY, Layer(
inputs=[input.name, label.name], name=name,
coeff=coeff, type=LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY,
**ExtraLayerAttribute.to_kwargs(layer_attr) inputs=[input.name, label.name],
) coeff=coeff,
return LayerOutput(name, LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY, **ExtraLayerAttribute.to_kwargs(layer_attr))
parents=[input, label], size=1) return LayerOutput(
name,
LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY,
parents=[input, label],
size=1)
...@@ -21,16 +21,18 @@ from paddle.trainer.config_parser import logger ...@@ -21,16 +21,18 @@ from paddle.trainer.config_parser import logger
__all__ = [] __all__ = []
def register_unary_math_op(op_name, act): def register_unary_math_op(op_name, act):
def op(input, name=None): def op(input, name=None):
return mixed_layer(input=[identity_projection(input=input)], return mixed_layer(
name=name, input=[identity_projection(input=input)], name=name, act=act)
act=act)
op = wrap_name_default(op_name)(op) op = wrap_name_default(op_name)(op)
op.__doc__ = type(act).__doc__ op.__doc__ = type(act).__doc__
globals()[op_name] = op globals()[op_name] = op
__all__.append(op_name) __all__.append(op_name)
register_unary_math_op('exp', act.ExpActivation()) register_unary_math_op('exp', act.ExpActivation())
register_unary_math_op('log', act.LogActivation()) register_unary_math_op('log', act.LogActivation())
register_unary_math_op('abs', act.AbsActivation()) register_unary_math_op('abs', act.AbsActivation())
...@@ -38,6 +40,7 @@ register_unary_math_op('sigmoid', act.SigmoidActivation()) ...@@ -38,6 +40,7 @@ register_unary_math_op('sigmoid', act.SigmoidActivation())
register_unary_math_op('tanh', act.TanhActivation()) register_unary_math_op('tanh', act.TanhActivation())
register_unary_math_op('square', act.SquareActivation()) register_unary_math_op('square', act.SquareActivation())
def add(layeroutput, other): def add(layeroutput, other):
if is_compatible_with(other, float): if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=other) return slope_intercept_layer(input=layeroutput, intercept=other)
...@@ -45,8 +48,10 @@ def add(layeroutput, other): ...@@ -45,8 +48,10 @@ def add(layeroutput, other):
logger.fatal("LayerOutput can only be added with" logger.fatal("LayerOutput can only be added with"
" another LayerOutput or a number") " another LayerOutput or a number")
if layeroutput.size == other.size: if layeroutput.size == other.size:
return mixed_layer(input=[identity_projection(input=layeroutput), return mixed_layer(input=[
identity_projection(input=other)]) identity_projection(input=layeroutput),
identity_projection(input=other)
])
if other.size != 1 and layeroutput.size != 1: if other.size != 1 and layeroutput.size != 1:
logger.fatal("Two LayerOutput can be added only if they have equal size" logger.fatal("Two LayerOutput can be added only if they have equal size"
" or one of their sizes is 1. sizes are %s and %s" % " or one of their sizes is 1. sizes are %s and %s" %
...@@ -56,12 +61,15 @@ def add(layeroutput, other): ...@@ -56,12 +61,15 @@ def add(layeroutput, other):
layeroutput = other layeroutput = other
other = tmp other = tmp
other = repeat_layer(other, layeroutput.size) other = repeat_layer(other, layeroutput.size)
return mixed_layer(input=[identity_projection(input=layeroutput), return mixed_layer(input=[
identity_projection(input=other)]) identity_projection(input=layeroutput), identity_projection(input=other)
])
LayerOutput.__radd__ = add LayerOutput.__radd__ = add
LayerOutput.__add__ = add LayerOutput.__add__ = add
def sub(layeroutput, other): def sub(layeroutput, other):
if is_compatible_with(other, float): if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=other) return slope_intercept_layer(input=layeroutput, intercept=other)
...@@ -71,14 +79,18 @@ def sub(layeroutput, other): ...@@ -71,14 +79,18 @@ def sub(layeroutput, other):
neg = slope_intercept_layer(input=other, slope=-1.0) neg = slope_intercept_layer(input=other, slope=-1.0)
return add(layeroutput, neg) return add(layeroutput, neg)
LayerOutput.__sub__ = sub LayerOutput.__sub__ = sub
def rsub(layeroutput, other): def rsub(layeroutput, other):
neg = slope_intercept_layer(input=layeroutput, slope=-1.0) neg = slope_intercept_layer(input=layeroutput, slope=-1.0)
return add(neg, other) return add(neg, other)
LayerOutput.__rsub__ = rsub LayerOutput.__rsub__ = rsub
def mul(layeroutput, other): def mul(layeroutput, other):
if is_compatible_with(other, float): if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, slope=other) return slope_intercept_layer(input=layeroutput, slope=other)
...@@ -93,5 +105,6 @@ def mul(layeroutput, other): ...@@ -93,5 +105,6 @@ def mul(layeroutput, other):
logger.fatal("At least one of the operand of '*' must be a number" logger.fatal("At least one of the operand of '*' must be a number"
" or a LayerOutput with size=1") " or a LayerOutput with size=1")
LayerOutput.__mul__ = mul LayerOutput.__mul__ = mul
LayerOutput.__rmul__ = mul LayerOutput.__rmul__ = mul
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
""" """
...@@ -25,28 +24,32 @@ from layers import * # There are too many layers used in network, so import * ...@@ -25,28 +24,32 @@ from layers import * # There are too many layers used in network, so import *
from poolings import MaxPooling, SumPooling from poolings import MaxPooling, SumPooling
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
__all__ = ['sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool", __all__ = [
"img_conv_bn_pool", 'dropout_layer', 'lstmemory_group', 'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool",
'lstmemory_unit', 'small_vgg', 'img_conv_group', 'vgg_16_network', "img_conv_bn_pool", 'dropout_layer', 'lstmemory_group', 'lstmemory_unit',
'gru_unit', 'gru_group', 'simple_gru', 'simple_attention', 'small_vgg', 'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group',
'simple_gru2', 'bidirectional_gru', 'text_conv_pool', 'simple_gru', 'simple_attention', 'simple_gru2', 'bidirectional_gru',
'bidirectional_lstm', 'inputs', 'outputs'] 'text_conv_pool', 'bidirectional_lstm', 'inputs', 'outputs'
]
###################################################### ######################################################
# Text CNN # # Text CNN #
###################################################### ######################################################
@wrap_name_default("sequence_conv_pooling") @wrap_name_default("sequence_conv_pooling")
def sequence_conv_pool(input, def sequence_conv_pool(input,
context_len, hidden_size, context_len,
hidden_size,
name=None, name=None,
context_start=None, context_start=None,
pool_type=None, context_proj_layer_name=None, pool_type=None,
context_proj_layer_name=None,
context_proj_param_attr=False, context_proj_param_attr=False,
fc_layer_name=None, fc_layer_name=None,
fc_param_attr=None, fc_param_attr=None,
fc_bias_attr=None, fc_act=None, fc_bias_attr=None,
fc_act=None,
pool_bias_attr=None, pool_bias_attr=None,
fc_attr=None, fc_attr=None,
context_attr=None, context_attr=None,
...@@ -101,40 +104,62 @@ def sequence_conv_pool(input, ...@@ -101,40 +104,62 @@ def sequence_conv_pool(input,
context_proj_layer_name = "%s_conv_proj" % name \ context_proj_layer_name = "%s_conv_proj" % name \
if context_proj_layer_name is None else context_proj_layer_name if context_proj_layer_name is None else context_proj_layer_name
with mixed_layer(name=context_proj_layer_name, with mixed_layer(
size=input.size * context_len, name=context_proj_layer_name,
act=LinearActivation(), size=input.size * context_len,
layer_attr=context_attr) as m: act=LinearActivation(),
m += context_projection(input, context_len=context_len, layer_attr=context_attr) as m:
context_start=context_start, m += context_projection(
padding_attr=context_proj_param_attr) input,
context_len=context_len,
context_start=context_start,
padding_attr=context_proj_param_attr)
fc_layer_name = "%s_conv_fc" % name \ fc_layer_name = "%s_conv_fc" % name \
if fc_layer_name is None else fc_layer_name if fc_layer_name is None else fc_layer_name
fl = fc_layer(name=fc_layer_name, input=m, size=hidden_size, fl = fc_layer(
act=fc_act, layer_attr=fc_attr, name=fc_layer_name,
param_attr=fc_param_attr, bias_attr=fc_bias_attr) input=m,
size=hidden_size,
act=fc_act,
layer_attr=fc_attr,
param_attr=fc_param_attr,
bias_attr=fc_bias_attr)
return pooling_layer(name=name, input=fl, return pooling_layer(
pooling_type=pool_type, name=name,
bias_attr=pool_bias_attr, input=fl,
layer_attr=pool_attr) pooling_type=pool_type,
bias_attr=pool_bias_attr,
layer_attr=pool_attr)
text_conv_pool = sequence_conv_pool text_conv_pool = sequence_conv_pool
############################################################################ ############################################################################
# Images # # Images #
############################################################################ ############################################################################
@wrap_name_default("conv_pool") @wrap_name_default("conv_pool")
def simple_img_conv_pool(input, filter_size, num_filters, pool_size, name=None, def simple_img_conv_pool(input,
pool_type=None, act=None, groups=1, conv_stride=1, filter_size,
conv_padding=0, bias_attr=None, num_channel=None, num_filters,
param_attr=None, shared_bias=True, pool_size,
conv_layer_attr=None, pool_stride=1, name=None,
pool_padding=0, pool_layer_attr=None): pool_type=None,
act=None,
groups=1,
conv_stride=1,
conv_padding=0,
bias_attr=None,
num_channel=None,
param_attr=None,
shared_bias=True,
conv_layer_attr=None,
pool_stride=1,
pool_padding=0,
pool_layer_attr=None):
""" """
Simple image convolution and pooling group. Simple image convolution and pooling group.
...@@ -179,29 +204,52 @@ def simple_img_conv_pool(input, filter_size, num_filters, pool_size, name=None, ...@@ -179,29 +204,52 @@ def simple_img_conv_pool(input, filter_size, num_filters, pool_size, name=None,
:return: Layer's output :return: Layer's output
:rtype: LayerOutput :rtype: LayerOutput
""" """
_conv_ = img_conv_layer(name="%s_conv" % name, input=input, _conv_ = img_conv_layer(
filter_size=filter_size, name="%s_conv" % name,
num_filters=num_filters, num_channels=num_channel, input=input,
act=act, groups=groups, filter_size=filter_size,
stride=conv_stride, num_filters=num_filters,
padding=conv_padding, bias_attr=bias_attr, num_channels=num_channel,
param_attr=param_attr, shared_biases=shared_bias, act=act,
layer_attr=conv_layer_attr) groups=groups,
return img_pool_layer(name="%s_pool" % name, input=_conv_, stride=conv_stride,
pool_size=pool_size, padding=conv_padding,
pool_type=pool_type, stride=pool_stride, bias_attr=bias_attr,
padding=pool_padding, param_attr=param_attr,
layer_attr=pool_layer_attr) shared_biases=shared_bias,
layer_attr=conv_layer_attr)
return img_pool_layer(
name="%s_pool" % name,
input=_conv_,
pool_size=pool_size,
pool_type=pool_type,
stride=pool_stride,
padding=pool_padding,
layer_attr=pool_layer_attr)
@wrap_name_default("conv_bn_pool") @wrap_name_default("conv_bn_pool")
def img_conv_bn_pool(input, filter_size, num_filters, pool_size, name=None, def img_conv_bn_pool(input,
pool_type=None, act=None, groups=1, conv_stride=1, filter_size,
conv_padding=0, conv_bias_attr=None, num_channel=None, num_filters,
conv_param_attr=None, shared_bias=True, pool_size,
conv_layer_attr=None, bn_param_attr=None, name=None,
bn_bias_attr=None, bn_layer_attr=None, pool_stride=1, pool_type=None,
pool_padding=0, pool_layer_attr=None): act=None,
groups=1,
conv_stride=1,
conv_padding=0,
conv_bias_attr=None,
num_channel=None,
conv_param_attr=None,
shared_bias=True,
conv_layer_attr=None,
bn_param_attr=None,
bn_bias_attr=None,
bn_layer_attr=None,
pool_stride=1,
pool_padding=0,
pool_layer_attr=None):
""" """
Convolution, batch normalization, pooling group. Convolution, batch normalization, pooling group.
...@@ -248,31 +296,42 @@ def img_conv_bn_pool(input, filter_size, num_filters, pool_size, name=None, ...@@ -248,31 +296,42 @@ def img_conv_bn_pool(input, filter_size, num_filters, pool_size, name=None,
:return: Layer groups output :return: Layer groups output
:rtype: LayerOutput :rtype: LayerOutput
""" """
__conv__ = img_conv_layer(name="%s_conv" % name, __conv__ = img_conv_layer(
input=input, filter_size=filter_size, name="%s_conv" % name,
num_filters=num_filters, num_channels=num_channel, input=input,
act=LinearActivation(), groups=groups, filter_size=filter_size,
stride=conv_stride, padding=conv_padding, num_filters=num_filters,
bias_attr=conv_bias_attr, num_channels=num_channel,
param_attr=conv_param_attr, act=LinearActivation(),
shared_biases=shared_bias, groups=groups,
layer_attr=conv_layer_attr) stride=conv_stride,
__bn__ = batch_norm_layer(name="%s_bn" % name, padding=conv_padding,
input=__conv__, act=act, bias_attr=conv_bias_attr,
bias_attr=bn_bias_attr, param_attr=bn_param_attr, param_attr=conv_param_attr,
layer_attr=bn_layer_attr) shared_biases=shared_bias,
return img_pool_layer(name="%s_pool" % name, layer_attr=conv_layer_attr)
input=__bn__, pool_type=pool_type, __bn__ = batch_norm_layer(
pool_size=pool_size, stride=pool_stride, name="%s_bn" % name,
padding=pool_padding, input=__conv__,
layer_attr=pool_layer_attr) act=act,
bias_attr=bn_bias_attr,
param_attr=bn_param_attr,
@wrap_act_default(param_names=['conv_act'], layer_attr=bn_layer_attr)
act=ReluActivation()) return img_pool_layer(
@wrap_param_default(param_names=['pool_type'], name="%s_pool" % name,
default_factory=lambda _: MaxPooling()) input=__bn__,
def img_conv_group(input, conv_num_filter, pool_type=pool_type,
pool_size=pool_size,
stride=pool_stride,
padding=pool_padding,
layer_attr=pool_layer_attr)
@wrap_act_default(param_names=['conv_act'], act=ReluActivation())
@wrap_param_default(
param_names=['pool_type'], default_factory=lambda _: MaxPooling())
def img_conv_group(input,
conv_num_filter,
pool_size, pool_size,
num_channels=None, num_channels=None,
conv_padding=1, conv_padding=1,
...@@ -333,10 +392,12 @@ def img_conv_group(input, conv_num_filter, ...@@ -333,10 +392,12 @@ def img_conv_group(input, conv_num_filter,
else: else:
extra_kwargs['act'] = conv_act[i] extra_kwargs['act'] = conv_act[i]
tmp = img_conv_layer(input=tmp, padding=conv_padding[i], tmp = img_conv_layer(
filter_size=conv_filter_size[i], input=tmp,
num_filters=conv_num_filter[i], padding=conv_padding[i],
**extra_kwargs) filter_size=conv_filter_size[i],
num_filters=conv_num_filter[i],
**extra_kwargs)
# logger.debug("tmp.num_filters = %d" % tmp.num_filters) # logger.debug("tmp.num_filters = %d" % tmp.num_filters)
...@@ -345,34 +406,41 @@ def img_conv_group(input, conv_num_filter, ...@@ -345,34 +406,41 @@ def img_conv_group(input, conv_num_filter,
if dropout == 0 or abs(dropout) < 1e-5: # dropout not set if dropout == 0 or abs(dropout) < 1e-5: # dropout not set
tmp = batch_norm_layer(input=tmp, act=conv_act[i]) tmp = batch_norm_layer(input=tmp, act=conv_act[i])
else: else:
tmp = batch_norm_layer(input=tmp, act=conv_act[i], tmp = batch_norm_layer(
layer_attr=ExtraAttr(drop_rate=dropout)) input=tmp,
act=conv_act[i],
layer_attr=ExtraAttr(drop_rate=dropout))
return img_pool_layer(input=tmp, stride=pool_stride, pool_size=pool_size, return img_pool_layer(
pool_type=pool_type) input=tmp, stride=pool_stride, pool_size=pool_size, pool_type=pool_type)
def small_vgg(input_image, num_channels, num_classes): def small_vgg(input_image, num_channels, num_classes):
def __vgg__(ipt, num_filter, times, dropouts, num_channels_=None): def __vgg__(ipt, num_filter, times, dropouts, num_channels_=None):
return img_conv_group(input=ipt, num_channels=num_channels_, return img_conv_group(
pool_size=2, input=ipt,
pool_stride=2, num_channels=num_channels_,
conv_num_filter=[num_filter] * times, pool_size=2,
conv_filter_size=3, pool_stride=2,
conv_act=ReluActivation(), conv_num_filter=[num_filter] * times,
conv_with_batchnorm=True, conv_filter_size=3,
conv_batchnorm_drop_rate=dropouts, conv_act=ReluActivation(),
pool_type=MaxPooling()) conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type=MaxPooling())
tmp = __vgg__(input_image, 64, 2, [0.3, 0], num_channels) tmp = __vgg__(input_image, 64, 2, [0.3, 0], num_channels)
tmp = __vgg__(tmp, 128, 2, [0.4, 0]) tmp = __vgg__(tmp, 128, 2, [0.4, 0])
tmp = __vgg__(tmp, 256, 3, [0.4, 0.4, 0]) tmp = __vgg__(tmp, 256, 3, [0.4, 0.4, 0])
tmp = __vgg__(tmp, 512, 3, [0.4, 0.4, 0]) tmp = __vgg__(tmp, 512, 3, [0.4, 0.4, 0])
tmp = img_pool_layer(input=tmp, stride=2, tmp = img_pool_layer(
pool_size=2, pool_type=MaxPooling()) input=tmp, stride=2, pool_size=2, pool_type=MaxPooling())
tmp = dropout_layer(input=tmp, dropout_rate=0.5) tmp = dropout_layer(input=tmp, dropout_rate=0.5)
tmp = fc_layer(input=tmp, size=512, layer_attr=ExtraAttr(drop_rate=0.5), tmp = fc_layer(
act=LinearActivation()) input=tmp,
size=512,
layer_attr=ExtraAttr(drop_rate=0.5),
act=LinearActivation())
tmp = batch_norm_layer(input=tmp, act=ReluActivation()) tmp = batch_norm_layer(input=tmp, act=ReluActivation())
return fc_layer(input=tmp, size=num_classes, act=SoftmaxActivation()) return fc_layer(input=tmp, size=num_classes, act=SoftmaxActivation())
...@@ -389,37 +457,67 @@ def vgg_16_network(input_image, num_channels, num_classes=1000): ...@@ -389,37 +457,67 @@ def vgg_16_network(input_image, num_channels, num_classes=1000):
:return: :return:
""" """
tmp = img_conv_group(input=input_image, num_channels=num_channels, tmp = img_conv_group(
conv_padding=1, conv_num_filter=[64, 64], input=input_image,
conv_filter_size=3, num_channels=num_channels,
conv_act=ReluActivation(), pool_size=2, conv_padding=1,
pool_stride=2, conv_num_filter=[64, 64],
pool_type=MaxPooling()) conv_filter_size=3,
conv_act=ReluActivation(),
tmp = img_conv_group(input=tmp, conv_num_filter=[128, 128], conv_padding=1, pool_size=2,
conv_filter_size=3, conv_act=ReluActivation(), pool_stride=2,
pool_stride=2, pool_type=MaxPooling(), pool_type=MaxPooling())
pool_size=2)
tmp = img_conv_group(
tmp = img_conv_group(input=tmp, conv_num_filter=[256, 256, 256], input=tmp,
conv_padding=1, conv_num_filter=[128, 128],
conv_filter_size=3, conv_act=ReluActivation(), conv_padding=1,
pool_stride=2, pool_type=MaxPooling(), pool_size=2) conv_filter_size=3,
conv_act=ReluActivation(),
tmp = img_conv_group(input=tmp, conv_num_filter=[512, 512, 512], pool_stride=2,
conv_padding=1, pool_type=MaxPooling(),
conv_filter_size=3, conv_act=ReluActivation(), pool_size=2)
pool_stride=2, pool_type=MaxPooling(), pool_size=2)
tmp = img_conv_group(input=tmp, conv_num_filter=[512, 512, 512], tmp = img_conv_group(
conv_padding=1, input=tmp,
conv_filter_size=3, conv_act=ReluActivation(), conv_num_filter=[256, 256, 256],
pool_stride=2, pool_type=MaxPooling(), pool_size=2) conv_padding=1,
conv_filter_size=3,
tmp = fc_layer(input=tmp, size=4096, act=ReluActivation(), conv_act=ReluActivation(),
layer_attr=ExtraAttr(drop_rate=0.5)) pool_stride=2,
pool_type=MaxPooling(),
tmp = fc_layer(input=tmp, size=4096, act=ReluActivation(), pool_size=2)
layer_attr=ExtraAttr(drop_rate=0.5))
tmp = img_conv_group(
input=tmp,
conv_num_filter=[512, 512, 512],
conv_padding=1,
conv_filter_size=3,
conv_act=ReluActivation(),
pool_stride=2,
pool_type=MaxPooling(),
pool_size=2)
tmp = img_conv_group(
input=tmp,
conv_num_filter=[512, 512, 512],
conv_padding=1,
conv_filter_size=3,
conv_act=ReluActivation(),
pool_stride=2,
pool_type=MaxPooling(),
pool_size=2)
tmp = fc_layer(
input=tmp,
size=4096,
act=ReluActivation(),
layer_attr=ExtraAttr(drop_rate=0.5))
tmp = fc_layer(
input=tmp,
size=4096,
act=ReluActivation(),
layer_attr=ExtraAttr(drop_rate=0.5))
return fc_layer(input=tmp, size=num_classes, act=SoftmaxActivation()) return fc_layer(input=tmp, size=num_classes, act=SoftmaxActivation())
...@@ -428,10 +526,19 @@ def vgg_16_network(input_image, num_channels, num_classes=1000): ...@@ -428,10 +526,19 @@ def vgg_16_network(input_image, num_channels, num_classes=1000):
# Recurrent # # Recurrent #
############################################################################ ############################################################################
@wrap_name_default("lstm") @wrap_name_default("lstm")
def simple_lstm(input, size, name=None, reverse=False, mat_param_attr=None, def simple_lstm(input,
bias_param_attr=None, inner_param_attr=None, act=None, size,
gate_act=None, state_act=None, mixed_layer_attr=None, name=None,
reverse=False,
mat_param_attr=None,
bias_param_attr=None,
inner_param_attr=None,
act=None,
gate_act=None,
state_act=None,
mixed_layer_attr=None,
lstm_cell_attr=None): lstm_cell_attr=None):
""" """
Simple LSTM Cell. Simple LSTM Cell.
...@@ -485,23 +592,38 @@ def simple_lstm(input, size, name=None, reverse=False, mat_param_attr=None, ...@@ -485,23 +592,38 @@ def simple_lstm(input, size, name=None, reverse=False, mat_param_attr=None,
:rtype: LayerOutput :rtype: LayerOutput
""" """
fc_name = 'lstm_transform_%s' % name fc_name = 'lstm_transform_%s' % name
with mixed_layer(name=fc_name, size=size * 4, with mixed_layer(
act=IdentityActivation(), name=fc_name,
layer_attr=mixed_layer_attr, bias_attr=False) as m: size=size * 4,
act=IdentityActivation(),
layer_attr=mixed_layer_attr,
bias_attr=False) as m:
m += full_matrix_projection(input, param_attr=mat_param_attr) m += full_matrix_projection(input, param_attr=mat_param_attr)
return lstmemory(name=name, input=m, reverse=reverse, return lstmemory(
bias_attr=bias_param_attr, name=name,
param_attr=inner_param_attr, act=act, input=m,
gate_act=gate_act, state_act=state_act, reverse=reverse,
layer_attr=lstm_cell_attr) bias_attr=bias_param_attr,
param_attr=inner_param_attr,
act=act,
gate_act=gate_act,
state_act=state_act,
layer_attr=lstm_cell_attr)
@wrap_name_default('lstm_unit') @wrap_name_default('lstm_unit')
def lstmemory_unit(input, name=None, size=None, param_attr=None, def lstmemory_unit(input,
act=None, gate_act=None, state_act=None, name=None,
mixed_bias_attr=None, lstm_bias_attr=None, size=None,
mixed_layer_attr=None, lstm_layer_attr=None, param_attr=None,
act=None,
gate_act=None,
state_act=None,
mixed_bias_attr=None,
lstm_bias_attr=None,
mixed_layer_attr=None,
lstm_layer_attr=None,
get_output_layer_attr=None): get_output_layer_attr=None):
""" """
Define calculations that a LSTM unit performs in a single time step. Define calculations that a LSTM unit performs in a single time step.
...@@ -572,10 +694,12 @@ def lstmemory_unit(input, name=None, size=None, param_attr=None, ...@@ -572,10 +694,12 @@ def lstmemory_unit(input, name=None, size=None, param_attr=None,
out_mem = memory(name=name, size=size) out_mem = memory(name=name, size=size)
state_mem = memory(name="%s_state" % name, size=size) state_mem = memory(name="%s_state" % name, size=size)
with mixed_layer(name="%s_input_recurrent" % name, with mixed_layer(
size=size * 4, bias_attr=mixed_bias_attr, name="%s_input_recurrent" % name,
layer_attr=mixed_layer_attr, size=size * 4,
act=IdentityActivation()) as m: bias_attr=mixed_bias_attr,
layer_attr=mixed_layer_attr,
act=IdentityActivation()) as m:
m += identity_projection(input=input) m += identity_projection(input=input)
m += full_matrix_projection(input=out_mem, param_attr=param_attr) m += full_matrix_projection(input=out_mem, param_attr=param_attr)
...@@ -588,22 +712,29 @@ def lstmemory_unit(input, name=None, size=None, param_attr=None, ...@@ -588,22 +712,29 @@ def lstmemory_unit(input, name=None, size=None, param_attr=None,
act=act, act=act,
gate_act=gate_act, gate_act=gate_act,
state_act=state_act, state_act=state_act,
layer_attr=lstm_layer_attr layer_attr=lstm_layer_attr)
) get_output_layer(
get_output_layer(name='%s_state' % name, name='%s_state' % name,
input=lstm_out, input=lstm_out,
arg_name='state', arg_name='state',
layer_attr=get_output_layer_attr) layer_attr=get_output_layer_attr)
return lstm_out return lstm_out
@wrap_name_default('lstm_group') @wrap_name_default('lstm_group')
def lstmemory_group(input, size=None, name=None, def lstmemory_group(input,
reverse=False, param_attr=None, size=None,
act=None, gate_act=None, state_act=None, name=None,
mixed_bias_attr=None, lstm_bias_attr=None, reverse=False,
mixed_layer_attr=None, lstm_layer_attr=None, param_attr=None,
act=None,
gate_act=None,
state_act=None,
mixed_bias_attr=None,
lstm_bias_attr=None,
mixed_layer_attr=None,
lstm_layer_attr=None,
get_output_layer_attr=None): get_output_layer_attr=None):
""" """
lstm_group is a recurrent layer group version Long Short Term Memory. It lstm_group is a recurrent layer group version Long Short Term Memory. It
...@@ -665,20 +796,25 @@ def lstmemory_group(input, size=None, name=None, ...@@ -665,20 +796,25 @@ def lstmemory_group(input, size=None, name=None,
""" """
def __lstm_step__(ipt): def __lstm_step__(ipt):
return lstmemory_unit(input=ipt, name=name, return lstmemory_unit(
size=size, mixed_bias_attr=mixed_bias_attr, input=ipt,
mixed_layer_attr=mixed_layer_attr, name=name,
param_attr=param_attr, size=size,
lstm_bias_attr=lstm_bias_attr, mixed_bias_attr=mixed_bias_attr,
act=act, gate_act=gate_act, mixed_layer_attr=mixed_layer_attr,
state_act=state_act, param_attr=param_attr,
lstm_layer_attr=lstm_layer_attr, lstm_bias_attr=lstm_bias_attr,
get_output_layer_attr=get_output_layer_attr) act=act,
gate_act=gate_act,
return recurrent_group(name='%s_recurrent_group' % name, state_act=state_act,
step=__lstm_step__, lstm_layer_attr=lstm_layer_attr,
reverse=reverse, get_output_layer_attr=get_output_layer_attr)
input=input)
return recurrent_group(
name='%s_recurrent_group' % name,
step=__lstm_step__,
reverse=reverse,
input=input)
@wrap_name_default('gru_unit') @wrap_name_default('gru_unit')
...@@ -728,8 +864,7 @@ def gru_unit(input, ...@@ -728,8 +864,7 @@ def gru_unit(input,
bias_attr=gru_bias_attr, bias_attr=gru_bias_attr,
act=act, act=act,
gate_act=gate_act, gate_act=gate_act,
layer_attr=gru_layer_attr layer_attr=gru_layer_attr)
)
return gru_out return gru_out
...@@ -739,7 +874,8 @@ def gru_group(input, ...@@ -739,7 +874,8 @@ def gru_group(input,
name=None, name=None,
reverse=False, reverse=False,
gru_bias_attr=None, gru_bias_attr=None,
act=None, gate_act=None, act=None,
gate_act=None,
gru_layer_attr=None): gru_layer_attr=None):
""" """
gru_group is a recurrent layer group version Gated Recurrent Unit. It gru_group is a recurrent layer group version Gated Recurrent Unit. It
...@@ -788,13 +924,13 @@ def gru_group(input, ...@@ -788,13 +924,13 @@ def gru_group(input,
gru_bias_attr=gru_bias_attr, gru_bias_attr=gru_bias_attr,
act=act, act=act,
gate_act=gate_act, gate_act=gate_act,
gru_layer_attr=gru_layer_attr gru_layer_attr=gru_layer_attr)
)
return recurrent_group(name='%s_recurrent_group' % name, return recurrent_group(
step=__gru_step__, name='%s_recurrent_group' % name,
reverse=reverse, step=__gru_step__,
input=input) reverse=reverse,
input=input)
@wrap_name_default('simple_gru') @wrap_name_default('simple_gru')
...@@ -808,8 +944,7 @@ def simple_gru(input, ...@@ -808,8 +944,7 @@ def simple_gru(input,
gru_bias_attr=None, gru_bias_attr=None,
act=None, act=None,
gate_act=None, gate_act=None,
gru_layer_attr=None gru_layer_attr=None):
):
""" """
You maybe see gru_step_layer, grumemory in layers.py, gru_unit, gru_group, You maybe see gru_step_layer, grumemory in layers.py, gru_unit, gru_group,
simple_gru in network.py. The reason why there are so many interfaces is simple_gru in network.py. The reason why there are so many interfaces is
...@@ -862,20 +997,22 @@ def simple_gru(input, ...@@ -862,20 +997,22 @@ def simple_gru(input,
:return: the gru group. :return: the gru group.
:rtype: LayerOutput :rtype: LayerOutput
""" """
with mixed_layer(name='%s_transform' % name, with mixed_layer(
size=size * 3, name='%s_transform' % name,
bias_attr=mixed_bias_param_attr, size=size * 3,
layer_attr=mixed_layer_attr) as m: bias_attr=mixed_bias_param_attr,
layer_attr=mixed_layer_attr) as m:
m += full_matrix_projection(input=input, param_attr=mixed_param_attr) m += full_matrix_projection(input=input, param_attr=mixed_param_attr)
return gru_group(name=name, return gru_group(
size=size, name=name,
input=m, size=size,
reverse=reverse, input=m,
gru_bias_attr=gru_bias_attr, reverse=reverse,
act=act, gru_bias_attr=gru_bias_attr,
gate_act=gate_act, act=act,
gru_layer_attr=gru_layer_attr) gate_act=gate_act,
gru_layer_attr=gru_layer_attr)
@wrap_name_default('simple_gru2') @wrap_name_default('simple_gru2')
...@@ -890,8 +1027,7 @@ def simple_gru2(input, ...@@ -890,8 +1027,7 @@ def simple_gru2(input,
act=None, act=None,
gate_act=None, gate_act=None,
mixed_layer_attr=None, mixed_layer_attr=None,
gru_cell_attr=None gru_cell_attr=None):
):
""" """
simple_gru2 is the same with simple_gru, but using grumemory instead simple_gru2 is the same with simple_gru, but using grumemory instead
Please see grumemory in layers.py for more detail about the maths. Please see grumemory in layers.py for more detail about the maths.
...@@ -922,37 +1058,50 @@ def simple_gru2(input, ...@@ -922,37 +1058,50 @@ def simple_gru2(input,
:return: the gru group. :return: the gru group.
:rtype: LayerOutput :rtype: LayerOutput
""" """
with mixed_layer(name='%s_transform' % name, with mixed_layer(
size=size * 3, name='%s_transform' % name,
bias_attr=mixed_bias_attr, size=size * 3,
layer_attr=mixed_layer_attr) as m: bias_attr=mixed_bias_attr,
layer_attr=mixed_layer_attr) as m:
m += full_matrix_projection(input=input, param_attr=mixed_param_attr) m += full_matrix_projection(input=input, param_attr=mixed_param_attr)
return grumemory(name=name, return grumemory(
size=size, name=name,
input=m, size=size,
reverse=reverse, input=m,
bias_attr=gru_bias_attr, reverse=reverse,
param_attr=gru_param_attr, bias_attr=gru_bias_attr,
act=act, param_attr=gru_param_attr,
gate_act=gate_act, act=act,
layer_attr=gru_cell_attr) gate_act=gate_act,
layer_attr=gru_cell_attr)
@wrap_name_default("bidirectional_gru") @wrap_name_default("bidirectional_gru")
def bidirectional_gru(input, size, name=None, return_seq=False, def bidirectional_gru(input,
fwd_mixed_param_attr=None, fwd_mixed_bias_attr=None, size,
fwd_gru_param_attr=None, fwd_gru_bias_attr=None, name=None,
fwd_act=None, fwd_gate_act=None, return_seq=False,
fwd_mixed_layer_attr=None, fwd_gru_cell_attr=None, fwd_mixed_param_attr=None,
fwd_mixed_bias_attr=None,
bwd_mixed_param_attr=None, bwd_mixed_bias_attr=None, fwd_gru_param_attr=None,
bwd_gru_param_attr=None, bwd_gru_bias_attr=None, fwd_gru_bias_attr=None,
bwd_act=None, bwd_gate_act=None, fwd_act=None,
bwd_mixed_layer_attr=None, bwd_gru_cell_attr=None, fwd_gate_act=None,
fwd_mixed_layer_attr=None,
last_seq_attr=None, first_seq_attr=None, fwd_gru_cell_attr=None,
concat_attr=None, concat_act=None): bwd_mixed_param_attr=None,
bwd_mixed_bias_attr=None,
bwd_gru_param_attr=None,
bwd_gru_bias_attr=None,
bwd_act=None,
bwd_gate_act=None,
bwd_mixed_layer_attr=None,
bwd_gru_cell_attr=None,
last_seq_attr=None,
first_seq_attr=None,
concat_attr=None,
concat_act=None):
""" """
A bidirectional_gru is a recurrent unit that iterates over the input A bidirectional_gru is a recurrent unit that iterates over the input
sequence both in forward and bardward orders, and then concatenate two sequence both in forward and bardward orders, and then concatenate two
...@@ -983,41 +1132,61 @@ def bidirectional_gru(input, size, name=None, return_seq=False, ...@@ -983,41 +1132,61 @@ def bidirectional_gru(input, size, name=None, return_seq=False,
""" """
args = locals() args = locals()
fw = simple_gru2(name='%s_fw' % name, input=input, size=size, fw = simple_gru2(
**dict((k[len('fwd_'):], v) for k, v in args.iteritems() name='%s_fw' % name,
if k.startswith('fwd_'))) input=input,
size=size,
**dict((k[len('fwd_'):], v) for k, v in args.iteritems()
if k.startswith('fwd_')))
bw = simple_gru2(name="%s_bw" % name, input=input, size=size, bw = simple_gru2(
reverse=True, name="%s_bw" % name,
**dict((k[len('bwd_'):], v) for k, v in args.iteritems() input=input,
if k.startswith('bwd_'))) size=size,
reverse=True,
**dict((k[len('bwd_'):], v) for k, v in args.iteritems()
if k.startswith('bwd_')))
if return_seq: if return_seq:
return concat_layer(name=name, input=[fw, bw], layer_attr=concat_attr, return concat_layer(
act=concat_act) name=name, input=[fw, bw], layer_attr=concat_attr, act=concat_act)
else: else:
fw_seq = last_seq(name="%s_fw_last" % name, input=fw, fw_seq = last_seq(
layer_attr=last_seq_attr) name="%s_fw_last" % name, input=fw, layer_attr=last_seq_attr)
bw_seq = first_seq(name="%s_bw_last" % name, input=bw, bw_seq = first_seq(
layer_attr=first_seq_attr) name="%s_bw_last" % name, input=bw, layer_attr=first_seq_attr)
return concat_layer(name=name, input=[fw_seq, bw_seq], return concat_layer(
layer_attr=concat_attr, act=concat_act) name=name,
input=[fw_seq, bw_seq],
layer_attr=concat_attr,
act=concat_act)
@wrap_name_default("bidirectional_lstm") @wrap_name_default("bidirectional_lstm")
def bidirectional_lstm(input, size, name=None, return_seq=False, def bidirectional_lstm(input,
fwd_mat_param_attr=None, fwd_bias_param_attr=None, size,
fwd_inner_param_attr=None, fwd_act=None, name=None,
fwd_gate_act=None, fwd_state_act=None, return_seq=False,
fwd_mixed_layer_attr=None, fwd_lstm_cell_attr=None, fwd_mat_param_attr=None,
fwd_bias_param_attr=None,
bwd_mat_param_attr=None, bwd_bias_param_attr=None, fwd_inner_param_attr=None,
bwd_inner_param_attr=None, bwd_act=None, fwd_act=None,
bwd_gate_act=None, bwd_state_act=None, fwd_gate_act=None,
bwd_mixed_layer_attr=None, bwd_lstm_cell_attr=None, fwd_state_act=None,
fwd_mixed_layer_attr=None,
last_seq_attr=None, first_seq_attr=None, fwd_lstm_cell_attr=None,
concat_attr=None, concat_act=None): bwd_mat_param_attr=None,
bwd_bias_param_attr=None,
bwd_inner_param_attr=None,
bwd_act=None,
bwd_gate_act=None,
bwd_state_act=None,
bwd_mixed_layer_attr=None,
bwd_lstm_cell_attr=None,
last_seq_attr=None,
first_seq_attr=None,
concat_attr=None,
concat_act=None):
""" """
A bidirectional_lstm is a recurrent unit that iterates over the input A bidirectional_lstm is a recurrent unit that iterates over the input
sequence both in forward and bardward orders, and then concatenate two sequence both in forward and bardward orders, and then concatenate two
...@@ -1053,25 +1222,34 @@ def bidirectional_lstm(input, size, name=None, return_seq=False, ...@@ -1053,25 +1222,34 @@ def bidirectional_lstm(input, size, name=None, return_seq=False,
""" """
args = locals() args = locals()
fw = simple_lstm(name='%s_fw' % name, input=input, size=size, fw = simple_lstm(
**dict((k[len('fwd_'):], v) for k, v in args.iteritems() name='%s_fw' % name,
if k.startswith('fwd_'))) input=input,
size=size,
**dict((k[len('fwd_'):], v) for k, v in args.iteritems()
if k.startswith('fwd_')))
bw = simple_lstm(name="%s_bw" % name, input=input, size=size, bw = simple_lstm(
reverse=True, name="%s_bw" % name,
**dict((k[len('bwd_'):], v) for k, v in args.iteritems() input=input,
if k.startswith('bwd_'))) size=size,
reverse=True,
**dict((k[len('bwd_'):], v) for k, v in args.iteritems()
if k.startswith('bwd_')))
if return_seq: if return_seq:
return concat_layer(name=name, input=[fw, bw], layer_attr=concat_attr, return concat_layer(
act=concat_act) name=name, input=[fw, bw], layer_attr=concat_attr, act=concat_act)
else: else:
fw_seq = last_seq(name="%s_fw_last" % name, input=fw, fw_seq = last_seq(
layer_attr=last_seq_attr) name="%s_fw_last" % name, input=fw, layer_attr=last_seq_attr)
bw_seq = first_seq(name="%s_bw_last" % name, input=bw, bw_seq = first_seq(
layer_attr=first_seq_attr) name="%s_bw_last" % name, input=bw, layer_attr=first_seq_attr)
return concat_layer(name=name, input=[fw_seq, bw_seq], return concat_layer(
layer_attr=concat_attr, act=concat_act) name=name,
input=[fw_seq, bw_seq],
layer_attr=concat_attr,
act=concat_act)
@wrap_name_default() @wrap_name_default()
...@@ -1142,37 +1320,41 @@ def simple_attention(encoded_sequence, ...@@ -1142,37 +1320,41 @@ def simple_attention(encoded_sequence,
proj_size = encoded_proj.size proj_size = encoded_proj.size
with mixed_layer(size=proj_size, name="%s_transform" % name) as m: with mixed_layer(size=proj_size, name="%s_transform" % name) as m:
m += full_matrix_projection(decoder_state, m += full_matrix_projection(
param_attr=transform_param_attr) decoder_state, param_attr=transform_param_attr)
expanded = expand_layer(input=m, expand_as=encoded_sequence, expanded = expand_layer(
name='%s_expand' % name) input=m, expand_as=encoded_sequence, name='%s_expand' % name)
with mixed_layer(size=proj_size, act=weight_act, with mixed_layer(
name="%s_combine" % name) as m: size=proj_size, act=weight_act, name="%s_combine" % name) as m:
m += identity_projection(expanded) m += identity_projection(expanded)
m += identity_projection(encoded_proj) m += identity_projection(encoded_proj)
# sequence softmax is used to normalize similarities between decoder state # sequence softmax is used to normalize similarities between decoder state
# and encoder outputs into a distribution # and encoder outputs into a distribution
attention_weight = fc_layer(input=m, attention_weight = fc_layer(
size=1, input=m,
act=SequenceSoftmaxActivation(), size=1,
param_attr=softmax_param_attr, act=SequenceSoftmaxActivation(),
name="%s_softmax" % name, param_attr=softmax_param_attr,
bias_attr=False) name="%s_softmax" % name,
bias_attr=False)
scaled = scaling_layer(weight=attention_weight, input=encoded_sequence, scaled = scaling_layer(
name='%s_scaling' % name) weight=attention_weight,
input=encoded_sequence,
name='%s_scaling' % name)
return pooling_layer(input=scaled, pooling_type=SumPooling(), return pooling_layer(
name="%s_pooling" % name) input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name)
############################################################################ ############################################################################
# Miscs # # Miscs #
############################################################################ ############################################################################
@wrap_name_default("dropout") @wrap_name_default("dropout")
def dropout_layer(input, dropout_rate, name=None): def dropout_layer(input, dropout_rate, name=None):
""" """
...@@ -1183,9 +1365,12 @@ def dropout_layer(input, dropout_rate, name=None): ...@@ -1183,9 +1365,12 @@ def dropout_layer(input, dropout_rate, name=None):
:param dropout_rate: :param dropout_rate:
:return: :return:
""" """
return addto_layer(name=name, input=input, act=LinearActivation(), return addto_layer(
bias_attr=False, name=name,
layer_attr=ExtraAttr(drop_rate=dropout_rate)) input=input,
act=LinearActivation(),
bias_attr=False,
layer_attr=ExtraAttr(drop_rate=dropout_rate))
def inputs(layers, *args): def inputs(layers, *args):
...@@ -1218,7 +1403,6 @@ def outputs(layers, *args): ...@@ -1218,7 +1403,6 @@ def outputs(layers, *args):
def __dfs_travel__(layer, def __dfs_travel__(layer,
predicate=lambda x: x.layer_type == LayerType.DATA): predicate=lambda x: x.layer_type == LayerType.DATA):
""" """
DFS LRV Travel for output layer. DFS LRV Travel for output layer.
...@@ -1259,8 +1443,9 @@ def outputs(layers, *args): ...@@ -1259,8 +1443,9 @@ def outputs(layers, *args):
for each_layer in layers: for each_layer in layers:
assert isinstance(each_layer, LayerOutput) assert isinstance(each_layer, LayerOutput)
inputs.extend(__dfs_travel__(each_layer)) inputs.extend(__dfs_travel__(each_layer))
outputs_.extend(__dfs_travel__( outputs_.extend(
each_layer, lambda x: x.layer_type == LayerType.COST)) __dfs_travel__(each_layer,
lambda x: x.layer_type == LayerType.COST))
# Currently, we got each leaf node's inputs order, output order. # Currently, we got each leaf node's inputs order, output order.
# We merge them together. # We merge them together.
...@@ -1278,16 +1463,13 @@ def outputs(layers, *args): ...@@ -1278,16 +1463,13 @@ def outputs(layers, *args):
if each_output.name not in final_outputs: if each_output.name not in final_outputs:
final_outputs.append(each_output.name) final_outputs.append(each_output.name)
logger.info( logger.info("".join(["The input order is [", ", ".join(final_inputs), "]"]))
"".join(["The input order is [", ", ".join(final_inputs), "]"])
)
if len(final_outputs) == 0: if len(final_outputs) == 0:
final_outputs = map(lambda x: x.name, layers) final_outputs = map(lambda x: x.name, layers)
logger.info( logger.info("".join(
"".join(["The output order is [", ", ".join(final_outputs), "]" ["The output order is [", ", ".join(final_outputs), "]"]))
]))
Inputs(*final_inputs) Inputs(*final_inputs)
Outputs(*final_outputs) Outputs(*final_outputs)
...@@ -17,11 +17,12 @@ from paddle.trainer.config_parser import Settings, default_decay_rate, \ ...@@ -17,11 +17,12 @@ from paddle.trainer.config_parser import Settings, default_decay_rate, \
from .default_decorators import wrap_param_default from .default_decorators import wrap_param_default
__all__ = ['Optimizer', 'BaseSGDOptimizer', 'MomentumOptimizer', __all__ = [
'AdamaxOptimizer', 'AdamOptimizer', 'AdaGradOptimizer', 'Optimizer', 'BaseSGDOptimizer', 'MomentumOptimizer', 'AdamaxOptimizer',
'RMSPropOptimizer', 'DecayedAdaGradOptimizer', 'AdamOptimizer', 'AdaGradOptimizer', 'RMSPropOptimizer',
'AdaDeltaOptimizer', 'BaseRegularization', 'L2Regularization', 'DecayedAdaGradOptimizer', 'AdaDeltaOptimizer', 'BaseRegularization',
'settings', 'ModelAverage'] 'L2Regularization', 'settings', 'ModelAverage'
]
class Optimizer(object): class Optimizer(object):
...@@ -90,18 +91,15 @@ class MomentumOptimizer(BaseSGDOptimizer): ...@@ -90,18 +91,15 @@ class MomentumOptimizer(BaseSGDOptimizer):
:param sparse: with sparse support or not. :param sparse: with sparse support or not.
:type sparse: bool :type sparse: bool
""" """
def extra_settings(self): def extra_settings(self):
default_momentum(self.momentum) default_momentum(self.momentum)
def to_setting_kwargs(self): def to_setting_kwargs(self):
if self.sparse: if self.sparse:
return { return {'learning_method': 'sparse_momentum'}
'learning_method': 'sparse_momentum'
}
else: else:
return { return {'learning_method': 'momentum'}
'learning_method': 'momentum'
}
def __init__(self, momentum=None, sparse=False): def __init__(self, momentum=None, sparse=False):
self.momentum = momentum self.momentum = momentum
...@@ -197,9 +195,7 @@ class AdaGradOptimizer(BaseSGDOptimizer): ...@@ -197,9 +195,7 @@ class AdaGradOptimizer(BaseSGDOptimizer):
""" """
def to_setting_kwargs(self): def to_setting_kwargs(self):
return { return {'learning_method': 'adagrad'}
'learning_method': 'adagrad'
}
def __init__(self): def __init__(self):
pass pass
...@@ -311,9 +307,7 @@ class L2Regularization(BaseRegularization): ...@@ -311,9 +307,7 @@ class L2Regularization(BaseRegularization):
def to_setting_kwargs(self): def to_setting_kwargs(self):
if self.algorithm == 'owlqn': if self.algorithm == 'owlqn':
return { return {'l2weight': self.decay_rate}
'l2weight': self.decay_rate
}
else: else:
return dict() return dict()
...@@ -330,7 +324,8 @@ class ModelAverage(Optimizer): ...@@ -330,7 +324,8 @@ class ModelAverage(Optimizer):
'do_average_in_cpu': self.do_average_in_cpu 'do_average_in_cpu': self.do_average_in_cpu
} }
def __init__(self, average_window, def __init__(self,
average_window,
max_average_window=None, max_average_window=None,
do_average_in_cpu=False): do_average_in_cpu=False):
self.average_window = average_window self.average_window = average_window
...@@ -356,10 +351,10 @@ def __extends__(dict1, dict2): ...@@ -356,10 +351,10 @@ def __extends__(dict1, dict2):
return dict1 return dict1
@wrap_param_default(['learning_method'], @wrap_param_default(
default_factory=lambda _: MomentumOptimizer()) ['learning_method'], default_factory=lambda _: MomentumOptimizer())
@wrap_param_default(['regularization'], @wrap_param_default(
default_factory=lambda _: BaseRegularization()) ['regularization'], default_factory=lambda _: BaseRegularization())
def settings(batch_size, def settings(batch_size,
learning_rate=1e-3, learning_rate=1e-3,
learning_rate_decay_a=0., learning_rate_decay_a=0.,
...@@ -373,8 +368,7 @@ def settings(batch_size, ...@@ -373,8 +368,7 @@ def settings(batch_size,
regularization=None, regularization=None,
is_async=False, is_async=False,
model_average=None, model_average=None,
gradient_clipping_threshold=None gradient_clipping_threshold=None):
):
""" """
Set the optimization method, learning rate, batch size, and other training Set the optimization method, learning rate, batch size, and other training
settings. The currently supported algorithms are SGD and Async-SGD. settings. The currently supported algorithms are SGD and Async-SGD.
...@@ -415,10 +409,11 @@ def settings(batch_size, ...@@ -415,10 +409,11 @@ def settings(batch_size,
else: else:
algorithm = 'owlqn' algorithm = 'owlqn'
args=['batch_size', 'learning_rate', 'learning_rate_decay_a', args = [
'learning_rate_decay_b', 'learning_rate_schedule', 'batch_size', 'learning_rate', 'learning_rate_decay_a',
'learning_rate_args', 'average_window', 'do_average_in_cpu', 'learning_rate_decay_b', 'learning_rate_schedule', 'learning_rate_args',
'max_average_window'] 'average_window', 'do_average_in_cpu', 'max_average_window'
]
kwargs = dict() kwargs = dict()
kwargs['algorithm'] = algorithm kwargs['algorithm'] = algorithm
for arg in args: for arg in args:
......
...@@ -11,18 +11,12 @@ ...@@ -11,18 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
""" """
__all__ = [ __all__ = [
"BasePoolingType", "BasePoolingType", "MaxPooling", "AvgPooling", "CudnnMaxPooling",
"MaxPooling", "CudnnAvgPooling", "SumPooling", "SquareRootNPooling"
"AvgPooling",
"CudnnMaxPooling",
"CudnnAvgPooling",
"SumPooling",
"SquareRootNPooling"
] ]
...@@ -36,6 +30,7 @@ class BasePoolingType(object): ...@@ -36,6 +30,7 @@ class BasePoolingType(object):
:type name: basestring :type name: basestring
""" """
def __init__(self, name): def __init__(self, name):
self.name = name self.name = name
...@@ -54,6 +49,7 @@ class MaxPooling(BasePoolingType): ...@@ -54,6 +49,7 @@ class MaxPooling(BasePoolingType):
value. None means use default value in proto. value. None means use default value in proto.
:type output_max_index: bool|None :type output_max_index: bool|None
""" """
def __init__(self, output_max_index=None): def __init__(self, output_max_index=None):
BasePoolingType.__init__(self, "max") BasePoolingType.__init__(self, "max")
self.output_max_index = output_max_index self.output_max_index = output_max_index
...@@ -64,6 +60,7 @@ class CudnnMaxPooling(BasePoolingType): ...@@ -64,6 +60,7 @@ class CudnnMaxPooling(BasePoolingType):
Cudnn max pooling only support GPU. Return the maxinum value in the Cudnn max pooling only support GPU. Return the maxinum value in the
pooling window. pooling window.
""" """
def __init__(self): def __init__(self):
BasePoolingType.__init__(self, "cudnn-max-pool") BasePoolingType.__init__(self, "cudnn-max-pool")
...@@ -73,9 +70,11 @@ class CudnnAvgPooling(BasePoolingType): ...@@ -73,9 +70,11 @@ class CudnnAvgPooling(BasePoolingType):
Cudnn average pooling only support GPU. Return the average value in the Cudnn average pooling only support GPU. Return the average value in the
pooling window. pooling window.
""" """
def __init__(self): def __init__(self):
BasePoolingType.__init__(self, "cudnn-avg-pool") BasePoolingType.__init__(self, "cudnn-avg-pool")
class AvgPooling(BasePoolingType): class AvgPooling(BasePoolingType):
""" """
Average pooling. Average pooling.
...@@ -105,7 +104,9 @@ class SumPooling(AvgPooling): ...@@ -105,7 +104,9 @@ class SumPooling(AvgPooling):
sum(samples\\_of\\_a\\_sequence) sum(samples\\_of\\_a\\_sequence)
""" """
def __init__(self): AvgPooling.__init__(self, AvgPooling.STRATEGY_SUM)
def __init__(self):
AvgPooling.__init__(self, AvgPooling.STRATEGY_SUM)
class SquareRootNPooling(AvgPooling): class SquareRootNPooling(AvgPooling):
...@@ -118,4 +119,6 @@ class SquareRootNPooling(AvgPooling): ...@@ -118,4 +119,6 @@ class SquareRootNPooling(AvgPooling):
sum(samples\\_of\\_a\\_sequence)/sqrt(sample\\_num) sum(samples\\_of\\_a\\_sequence)/sqrt(sample\\_num)
""" """
def __init__(self): AvgPooling.__init__(self, AvgPooling.STRATEGY_SQROOTN)
def __init__(self):
AvgPooling.__init__(self, AvgPooling.STRATEGY_SQROOTN)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-3, batch_size=1000)
learning_rate=1e-3,
batch_size=1000
)
img = data_layer(name='image', size=256*256) img = data_layer(name='image', size=256 * 256)
# the parse_conv in config_parse.py is not strictly accurate when filter_size # the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size. # is not square. So here set square filter_size.
img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64, img_conv = img_conv_layer(
filter_size=(32, 32), padding=(1, 1), stride=(1, 1), input=img,
act=LinearActivation()) num_channels=1,
num_filters=64,
filter_size=(32, 32),
padding=(1, 1),
stride=(1, 1),
act=LinearActivation())
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
img_norm = img_cmrnorm_layer(input=img_bn, size=32) img_norm = img_cmrnorm_layer(input=img_bn, size=32)
img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling()) img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
outputs(img_pool, img_norm) outputs(img_pool, img_norm)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-3, batch_size=1000)
learning_rate=1e-3,
batch_size=1000
)
img = data_layer(name='image', size=227*227) img = data_layer(name='image', size=227 * 227)
# the parse_conv in config_parse.py is not strictly accurate when filter_size # the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size. # is not square. So here set square filter_size.
img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64, img_conv = img_conv_layer(
filter_size=(32, 32), padding=(1, 1), stride=(1, 1), input=img,
act=LinearActivation(), trans=True) num_channels=1,
num_filters=64,
filter_size=(32, 32),
padding=(1, 1),
stride=(1, 1),
act=LinearActivation(),
trans=True)
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
img_norm = img_cmrnorm_layer(input=img_bn, size=32) img_norm = img_cmrnorm_layer(input=img_bn, size=32)
img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling()) img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
outputs(img_pool, img_norm) outputs(img_pool, img_norm)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-5)
batch_size=1000,
learning_rate=1e-5
)
din = data_layer(name='data', size=30) din = data_layer(name='data', size=30)
seq_op = [ seq_op = [first_seq, last_seq]
first_seq,
last_seq
]
agg_level = [ agg_level = [AggregateLevel.EACH_SEQUENCE, AggregateLevel.EACH_TIMESTEP]
AggregateLevel.EACH_SEQUENCE,
AggregateLevel.EACH_TIMESTEP
]
opts = [] opts = []
...@@ -23,4 +14,4 @@ for op in seq_op: ...@@ -23,4 +14,4 @@ for op in seq_op:
for al in agg_level: for al in agg_level:
opts.append(op(input=din, agg_level=al)) opts.append(op(input=din, agg_level=al))
outputs(opts) outputs(opts)
\ No newline at end of file
...@@ -4,18 +4,18 @@ Test all activations. ...@@ -4,18 +4,18 @@ Test all activations.
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-4, batch_size=1000)
learning_rate=1e-4,
batch_size=1000
)
din = data_layer(name='input', size=100) din = data_layer(name='input', size=100)
acts = [ acts = [
TanhActivation, SigmoidActivation, SoftmaxActivation, IdentityActivation, TanhActivation, SigmoidActivation, SoftmaxActivation, IdentityActivation,
LinearActivation, ExpActivation, ReluActivation, BReluActivation, LinearActivation, ExpActivation, ReluActivation, BReluActivation,
SoftReluActivation, STanhActivation, AbsActivation, SquareActivation] SoftReluActivation, STanhActivation, AbsActivation, SquareActivation
]
outputs( outputs([
[fc_layer(input=din, size=100, act=act(), name="layer_%d" % i) for i, act in fc_layer(
enumerate(acts)]) input=din, size=100, act=act(), name="layer_%d" % i)
for i, act in enumerate(acts)
])
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
from paddle.trainer_config_helpers import math from paddle.trainer_config_helpers import math
settings( settings(batch_size=1000, learning_rate=1e-5)
batch_size=1000,
learning_rate=1e-5
)
x = data_layer(name='data', size=100) x = data_layer(name='data', size=100)
x = math.exp(x) x = math.exp(x)
...@@ -21,10 +18,9 @@ y = y - 2 ...@@ -21,10 +18,9 @@ y = y - 2
y = 2 - y y = 2 - y
y = 2 * y y = 2 * y
y = y * 3 y = y * 3
z= data_layer(name='data_2', size=1) z = data_layer(name='data_2', size=1)
y = y * z y = y * z
y = z * y y = z * y
y = y + z y = y + z
y = z + y y = z + y
outputs(y) outputs(y)
...@@ -3,10 +3,7 @@ Test mixed layer, projections and operators. ...@@ -3,10 +3,7 @@ Test mixed layer, projections and operators.
''' '''
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-4)
batch_size=1000,
learning_rate=1e-4
)
din = data_layer(name='test', size=100) din = data_layer(name='test', size=100)
...@@ -30,18 +27,20 @@ with mixed_layer() as m5: ...@@ -30,18 +27,20 @@ with mixed_layer() as m5:
with mixed_layer() as m6: with mixed_layer() as m6:
m6 += dotmul_operator(a=m3, b=m4) m6 += dotmul_operator(a=m3, b=m4)
img = data_layer(name='img', size=32*32) img = data_layer(name='img', size=32 * 32)
flt = data_layer(name='filter', size=3*3*1*64) flt = data_layer(name='filter', size=3 * 3 * 1 * 64)
with mixed_layer() as m7: with mixed_layer() as m7:
m7 += conv_operator(img=img, filter=flt, num_filters=64, m7 += conv_operator(
num_channels=1, filter_size=3) img=img, filter=flt, num_filters=64, num_channels=1, filter_size=3)
end = mixed_layer(input=[full_matrix_projection(input=m5), end = mixed_layer(
trans_full_matrix_projection(input=m6), input=[
full_matrix_projection(input=m7)], full_matrix_projection(input=m5),
size=100, trans_full_matrix_projection(input=m6), full_matrix_projection(input=m7)
layer_attr=ExtraAttr(drop_rate=0.5, ],
error_clipping_threshold=40)) size=100,
layer_attr=ExtraAttr(
drop_rate=0.5, error_clipping_threshold=40))
outputs(end) outputs(end)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-4, batch_size=1000)
learning_rate=1e-4,
batch_size=1000
)
a = data_layer(name='feature_a', size=200) a = data_layer(name='feature_a', size=200)
b = data_layer(name='feature_b', size=200) b = data_layer(name='feature_b', size=200)
...@@ -11,12 +8,22 @@ b = data_layer(name='feature_b', size=200) ...@@ -11,12 +8,22 @@ b = data_layer(name='feature_b', size=200)
fc_param = ParamAttr(name='fc_param', initial_max=1.0, initial_min=-1.0) fc_param = ParamAttr(name='fc_param', initial_max=1.0, initial_min=-1.0)
bias_param = ParamAttr(name='bias_param', initial_mean=0.0, initial_std=0.0) bias_param = ParamAttr(name='bias_param', initial_mean=0.0, initial_std=0.0)
softmax_param = ParamAttr(name='softmax_param', initial_max=1.0, initial_min=-1.0) softmax_param = ParamAttr(
name='softmax_param', initial_max=1.0, initial_min=-1.0)
hidden_a = fc_layer(input=a, size=200, param_attr=fc_param, bias_attr=bias_param) hidden_a = fc_layer(
hidden_b = fc_layer(input=b, size=200, param_attr=fc_param, bias_attr=bias_param) input=a, size=200, param_attr=fc_param, bias_attr=bias_param)
hidden_b = fc_layer(
input=b, size=200, param_attr=fc_param, bias_attr=bias_param)
predict = fc_layer(input=[hidden_a, hidden_b], param_attr=[softmax_param, softmax_param], predict = fc_layer(
bias_attr=False, size=10, act=SoftmaxActivation()) input=[hidden_a, hidden_b],
param_attr=[softmax_param, softmax_param],
bias_attr=False,
size=10,
act=SoftmaxActivation())
outputs(classification_cost(input=predict, label=data_layer(name='label', size=10))) outputs(
classification_cost(
input=predict, label=data_layer(
name='label', size=10)))
...@@ -16,14 +16,26 @@ with mixed_layer(size=400, bias_attr=False) as m2: ...@@ -16,14 +16,26 @@ with mixed_layer(size=400, bias_attr=False) as m2:
lstm_param = ParamAttr(name='lstm_param') lstm_param = ParamAttr(name='lstm_param')
lstm_bias = ParamAttr(name='lstm_bias', initial_mean=0., initial_std=0.) lstm_bias = ParamAttr(name='lstm_bias', initial_mean=0., initial_std=0.)
lstm1 = lstmemory_group(input=m1, param_attr=lstm_param, lstm_bias_attr=lstm_bias, mixed_bias_attr=False) lstm1 = lstmemory_group(
lstm2 = lstmemory_group(input=m2, param_attr=lstm_param, lstm_bias_attr=lstm_bias, mixed_bias_attr=False) input=m1,
param_attr=lstm_param,
lstm_bias_attr=lstm_bias,
mixed_bias_attr=False)
lstm2 = lstmemory_group(
input=m2,
param_attr=lstm_param,
lstm_bias_attr=lstm_bias,
mixed_bias_attr=False)
softmax_param = ParamAttr(name='softmax_param') softmax_param = ParamAttr(name='softmax_param')
predict = fc_layer(input=[last_seq(input=lstm1), last_seq(input=lstm2)], predict = fc_layer(
size=10, input=[last_seq(input=lstm1), last_seq(input=lstm2)],
param_attr=[softmax_param, softmax_param], size=10,
bias_attr=False, param_attr=[softmax_param, softmax_param],
act=SoftmaxActivation()) bias_attr=False,
outputs(classification_cost(input=predict, label=data_layer(name='label', size=10))) act=SoftmaxActivation())
outputs(
classification_cost(
input=predict, label=data_layer(
name='label', size=10)))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-4)
batch_size=1000,
learning_rate=1e-4
)
din = data_layer(name='data', size=200) din = data_layer(name='data', size=200)
...@@ -13,24 +10,28 @@ rnn = recurrent_layer(input=hidden, act=SigmoidActivation()) ...@@ -13,24 +10,28 @@ rnn = recurrent_layer(input=hidden, act=SigmoidActivation())
rnn2 = recurrent_layer(input=hidden, act=SigmoidActivation(), reverse=True) rnn2 = recurrent_layer(input=hidden, act=SigmoidActivation(), reverse=True)
lstm1_param = fc_layer(input=hidden, size=200*4, act=LinearActivation(), lstm1_param = fc_layer(
bias_attr=False) input=hidden, size=200 * 4, act=LinearActivation(), bias_attr=False)
lstm1 = lstmemory(input=lstm1_param, act=SigmoidActivation()) lstm1 = lstmemory(input=lstm1_param, act=SigmoidActivation())
lstm2_param = fc_layer(input=hidden, size=200*4, act=LinearActivation(), lstm2_param = fc_layer(
bias_attr=False) input=hidden, size=200 * 4, act=LinearActivation(), bias_attr=False)
lstm2 = lstmemory(input=lstm2_param, act=SigmoidActivation(), reverse=True) lstm2 = lstmemory(input=lstm2_param, act=SigmoidActivation(), reverse=True)
gru1_param = fc_layer(input=hidden, size=200*3, act=LinearActivation(), gru1_param = fc_layer(
bias_attr=False) input=hidden, size=200 * 3, act=LinearActivation(), bias_attr=False)
gru1 = grumemory(input=gru1_param, act=SigmoidActivation()) gru1 = grumemory(input=gru1_param, act=SigmoidActivation())
gru2_param = fc_layer(input=hidden, size=200*3, act=LinearActivation(), gru2_param = fc_layer(
bias_attr=False) input=hidden, size=200 * 3, act=LinearActivation(), bias_attr=False)
gru2 = grumemory(input=gru2_param, act=SigmoidActivation(), reverse=True) gru2 = grumemory(input=gru2_param, act=SigmoidActivation(), reverse=True)
outputs(last_seq(input=rnn), first_seq(input=rnn2), outputs(
last_seq(input=lstm1), first_seq(input=lstm2), last_seq(input=rnn),
last_seq(input=gru1), first_seq(gru2)) first_seq(input=rnn2),
last_seq(input=lstm1),
first_seq(input=lstm2),
last_seq(input=gru1),
first_seq(gru2))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-4)
batch_size=1000,
learning_rate=1e-4
)
din = data_layer(name='data', size=120) din = data_layer(name='data', size=120)
......
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-5)
batch_size=1000,
learning_rate=1e-5
)
data = data_layer(name='data', size=2304) data = data_layer(name='data', size=2304)
conv = img_conv_layer(input=data, conv = img_conv_layer(
filter_size = 3, input=data,
num_channels=1, filter_size=3,
num_filters=16, num_channels=1,
padding=1, num_filters=16,
act=LinearActivation(), padding=1,
bias_attr=True) act=LinearActivation(),
bias_attr=True)
bilinear = bilinear_interp_layer(input=conv, bilinear = bilinear_interp_layer(input=conv, out_size_x=64, out_size_y=64)
out_size_x=64,
out_size_y=64)
pool = img_pool_layer(input=bilinear, pool = img_pool_layer(
num_channels=4, input=bilinear,
pool_size=2, num_channels=4,
stride=2, pool_size=2,
pool_type=MaxPooling()) stride=2,
pool_type=MaxPooling())
fc = fc_layer(input=pool, size=384, bias_attr=False) fc = fc_layer(input=pool, size=384, bias_attr=False)
outputs(fc) outputs(fc)
\ No newline at end of file
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-4, batch_size=1000)
learning_rate=1e-4,
batch_size=1000
)
seq_in = data_layer(name='input', size=200) seq_in = data_layer(name='input', size=200)
labels = data_layer(name='labels', size=5000) labels = data_layer(name='labels', size=5000)
...@@ -12,17 +9,33 @@ probs = data_layer(name='probs', size=10) ...@@ -12,17 +9,33 @@ probs = data_layer(name='probs', size=10)
xe_label = data_layer(name='xe-label', size=10) xe_label = data_layer(name='xe-label', size=10)
hidden = fc_layer(input=seq_in, size=4) hidden = fc_layer(input=seq_in, size=4)
outputs(ctc_layer(input=seq_in, label=labels), outputs(
crf_layer(input=hidden, ctc_layer(
label=data_layer(name='crf_label', size=4)), input=seq_in, label=labels),
rank_cost(left=data_layer(name='left', size=1), crf_layer(
right=data_layer(name='right', size=1), input=hidden, label=data_layer(
label=data_layer(name='label', size=1)), name='crf_label', size=4)),
lambda_cost(input=data_layer(name='list_feature', size=100), rank_cost(
score=data_layer(name='list_scores', size=1)), left=data_layer(
cross_entropy(input=probs, label=xe_label), name='left', size=1),
cross_entropy_with_selfnorm(input=probs, label=xe_label), right=data_layer(
huber_cost(input=data_layer(name='huber_probs', size=1), name='right', size=1),
label=data_layer(name='huber_label', size=1)), label=data_layer(
multi_binary_label_cross_entropy(input=probs, label=xe_label), name='label', size=1)),
sum_cost(input=hidden)) lambda_cost(
input=data_layer(
name='list_feature', size=100),
score=data_layer(
name='list_scores', size=1)),
cross_entropy(
input=probs, label=xe_label),
cross_entropy_with_selfnorm(
input=probs, label=xe_label),
huber_cost(
input=data_layer(
name='huber_probs', size=1),
label=data_layer(
name='huber_label', size=1)),
multi_binary_label_cross_entropy(
input=probs, label=xe_label),
sum_cost(input=hidden))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-4, batch_size=1000)
learning_rate=1e-4,
batch_size=1000
)
data = data_layer(name='input', size=300) data = data_layer(name='input', size=300)
lbl = data_layer(name='label', size=1) lbl = data_layer(name='label', size=1)
wt = data_layer(name='weight', size=1) wt = data_layer(name='weight', size=1)
fc = fc_layer(input=data, size=10, act=SoftmaxActivation()) fc = fc_layer(input=data, size=10, act=SoftmaxActivation())
outputs(classification_cost(input=fc, label=lbl, weight=wt), outputs(
regression_cost(input=fc, label=lbl, weight=wt)) classification_cost(
input=fc, label=lbl, weight=wt),
regression_cost(
input=fc, label=lbl, weight=wt))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-5)
batch_size=1000,
learning_rate=1e-5
)
din = data_layer(name='data', size=30) din = data_layer(name='data', size=30)
data_seq = data_layer(name='data_seq', size=30) data_seq = data_layer(name='data_seq', size=30)
outputs(expand_layer(input=din, expand_as=data_seq, outputs(
expand_level=ExpandLevel.FROM_SEQUENCE), expand_layer(
expand_layer(input=din, expand_as=data_seq, input=din, expand_as=data_seq, expand_level=ExpandLevel.FROM_SEQUENCE),
expand_level=ExpandLevel.FROM_TIMESTEP)) expand_layer(
input=din, expand_as=data_seq, expand_level=ExpandLevel.FROM_TIMESTEP))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-5)
batch_size=1000,
learning_rate=1e-5
)
din = data_layer(name='data', size=100) din = data_layer(name='data', size=100)
trans = trans_layer(input=din) trans = trans_layer(input=din)
hidden = fc_layer(input=trans, size=100, hidden = fc_layer(input=trans, size=100, bias_attr=False)
bias_attr=False)
mask = data_layer(name='mask', size=100) mask = data_layer(name='mask', size=100)
hidden_sel = selective_fc_layer(input=din, select=mask, size=100, hidden_sel = selective_fc_layer(
act=SigmoidActivation()) input=din, select=mask, size=100, act=SigmoidActivation())
outputs(hidden, hidden_sel) outputs(hidden, hidden_sel)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-4)
batch_size=1000,
learning_rate=1e-4
)
din = data_layer(name='data', size=120) din = data_layer(name='data', size=120)
outputs(grumemory(input=din, size=40, reverse=True, gate_act=TanhActivation(), outputs(
act=SigmoidActivation())) grumemory(
input=din,
size=40,
reverse=True,
gate_act=TanhActivation(),
act=SigmoidActivation()))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-4, batch_size=1000)
learning_rate=1e-4,
batch_size=1000
)
din = data_layer(name='data', size=100) din = data_layer(name='data', size=100)
label = data_layer(name='label', size=10) label = data_layer(name='label', size=10)
outputs(hsigmoid(input=din, label=label, num_classes=10)) outputs(hsigmoid(input=din, label=label, num_classes=10))
\ No newline at end of file
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-5)
batch_size=1000,
learning_rate=1e-5
)
din = data_layer(name='data', size=128) din = data_layer(name='data', size=128)
outputs(lstmemory(input=din, reverse=True, gate_act=TanhActivation(), outputs(
act=TanhActivation(), size=32)) lstmemory(
input=din,
reverse=True,
gate_act=TanhActivation(),
act=TanhActivation(),
size=32))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-5)
batch_size=1000,
learning_rate=1e-5
)
data = data_layer(name='data', size=2304) data = data_layer(name='data', size=2304)
conv = img_conv_layer(input=data, conv = img_conv_layer(
filter_size = 3, input=data,
num_channels=1, filter_size=3,
num_filters=16, num_channels=1,
padding=1, num_filters=16,
act=LinearActivation(), padding=1,
bias_attr=True) act=LinearActivation(),
bias_attr=True)
maxout = maxout_layer(input=conv,
num_channels=16, maxout = maxout_layer(input=conv, num_channels=16, groups=2)
groups=2)
pool = img_pool_layer(
pool = img_pool_layer(input=maxout, input=maxout, num_channels=8, pool_size=2, stride=2, pool_type=MaxPooling())
num_channels=8,
pool_size=2, conv2 = img_conv_layer(
stride=2, input=pool,
pool_type=MaxPooling()) filter_size=3,
num_channels=32,
conv2 = img_conv_layer(input=pool, num_filters=128,
filter_size = 3, padding=1,
num_channels=32, act=LinearActivation(),
num_filters=128, bias_attr=True)
padding=1,
act=LinearActivation(), maxout2 = maxout_layer(input=conv, num_channels=128, groups=4)
bias_attr=True)
block = block_expand_layer(
maxout2 = maxout_layer(input=conv, input=maxout, num_channels=32, stride_x=1, stride_y=1, block_x=1, block_y=6)
num_channels=128,
groups=4)
block = block_expand_layer(input=maxout,
num_channels=32,
stride_x=1,
stride_y=1,
block_x=1,
block_y=6)
fc = fc_layer(input=block, size=384, bias_attr=False) fc = fc_layer(input=block, size=384, bias_attr=False)
......
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-5)
batch_size=1000,
learning_rate=1e-5
)
weight = data_layer(name='w', size=1) weight = data_layer(name='w', size=1)
a = data_layer(name='a', size=100) a = data_layer(name='a', size=100)
...@@ -11,13 +8,23 @@ b = data_layer(name='b', size=100) ...@@ -11,13 +8,23 @@ b = data_layer(name='b', size=100)
c = data_layer(name='c', size=200) c = data_layer(name='c', size=200)
d = data_layer(name='d', size=31) d = data_layer(name='d', size=31)
outputs(interpolation_layer(input=[a, b], weight=weight), outputs(
power_layer(input=a, weight=weight), interpolation_layer(
scaling_layer(input=a, weight=weight), input=[a, b], weight=weight),
cos_sim(a=a, b=b), power_layer(
cos_sim(a=a, b=c, size=2), input=a, weight=weight),
sum_to_one_norm_layer(input=a), scaling_layer(
conv_shift_layer(a=a, b=d), input=a, weight=weight),
tensor_layer(a=a, b=b, size=1000), cos_sim(
slope_intercept_layer(input=a, slope=0.7, intercept=0.9), a=a, b=b),
linear_comb_layer(weights=b, vectors=c)) cos_sim(
a=a, b=c, size=2),
sum_to_one_norm_layer(input=a),
conv_shift_layer(
a=a, b=d),
tensor_layer(
a=a, b=b, size=1000),
slope_intercept_layer(
input=a, slope=0.7, intercept=0.9),
linear_comb_layer(
weights=b, vectors=c))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-4, batch_size=1000)
learning_rate=1e-4,
batch_size=1000
)
din = data_layer(name='input', size=100) din = data_layer(name='input', size=100)
......
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-4, batch_size=1000)
learning_rate=1e-4,
batch_size=1000
)
seq = data_layer(name='seq_input', size=100) seq = data_layer(name='seq_input', size=100)
sub_seq = data_layer(name='sub_seq_input', size=100) sub_seq = data_layer(name='sub_seq_input', size=100)
...@@ -25,11 +22,15 @@ with mixed_layer() as lstm_param: # test lstm unit, rnn group ...@@ -25,11 +22,15 @@ with mixed_layer() as lstm_param: # test lstm unit, rnn group
with mixed_layer() as gru_param: with mixed_layer() as gru_param:
gru_param += full_matrix_projection(input=seq, size=100 * 3) gru_param += full_matrix_projection(input=seq, size=100 * 3)
outputs(last_seq(input=recurrent_group(step=generate_rnn_simple('rnn_forward'), outputs(
input=seq)), last_seq(input=recurrent_group(
first_seq(input=recurrent_group(step=generate_rnn_simple('rnn_back'), step=generate_rnn_simple('rnn_forward'), input=seq)),
input=seq, reverse=True)), first_seq(input=recurrent_group(
last_seq(input=recurrent_group(step=generate_rnn_simple( step=generate_rnn_simple('rnn_back'), input=seq, reverse=True)),
'rnn_subseq_forward'), input=SubsequenceInput(input=sub_seq))), last_seq(input=recurrent_group(
last_seq(input=lstmemory_group(input=lstm_param, size=100)), step=generate_rnn_simple('rnn_subseq_forward'),
last_seq(input=gru_group(input=gru_param, size=100))) input=SubsequenceInput(input=sub_seq))),
last_seq(input=lstmemory_group(
input=lstm_param, size=100)),
last_seq(input=gru_group(
input=gru_param, size=100)))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(learning_rate=1e-4, batch_size=1000)
learning_rate=1e-4,
batch_size=1000
)
din = data_layer(name='dat_in', size=100) din = data_layer(name='dat_in', size=100)
POOL_TYPE = [ POOL_TYPE = [MaxPooling, AvgPooling, SumPooling]
MaxPooling,
AvgPooling,
SumPooling
]
AGG_LEVEL = [ AGG_LEVEL = [AggregateLevel.EACH_SEQUENCE, AggregateLevel.EACH_TIMESTEP]
AggregateLevel.EACH_SEQUENCE,
AggregateLevel.EACH_TIMESTEP
]
opts = [] opts = []
...@@ -24,7 +14,8 @@ for pt in POOL_TYPE: ...@@ -24,7 +14,8 @@ for pt in POOL_TYPE:
for al in AGG_LEVEL: for al in AGG_LEVEL:
opts.append(pooling_layer(input=din, agg_level=al, pooling_type=pt())) opts.append(pooling_layer(input=din, agg_level=al, pooling_type=pt()))
opts.append(pooling_layer(input=din, opts.append(
pooling_type=MaxPooling(output_max_index=True))) pooling_layer(
input=din, pooling_type=MaxPooling(output_max_index=True)))
outputs(opts) outputs(opts)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
define_py_data_sources2(train_list="train.list", define_py_data_sources2(
test_list="test.list", train_list="train.list",
module=["a", "b"], test_list="test.list",
obj=("c", "d")) module=["a", "b"],
settings( obj=("c", "d"))
learning_rate=1e-3, settings(learning_rate=1e-3, batch_size=1000)
batch_size=1000
)
outputs(data_layer(name="a", size=10)) outputs(data_layer(name="a", size=10))
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=100, learning_rate=1e-5)
batch_size=100,
learning_rate=1e-5
)
data = data_layer(name='data', size=3200) data = data_layer(name='data', size=3200)
spp = spp_layer(input=data, spp = spp_layer(
pyramid_height=2, input=data,
num_channels=16, pyramid_height=2,
pool_type=MaxPooling(), num_channels=16,
img_width=10) pool_type=MaxPooling(),
img_width=10)
outputs(spp) outputs(spp)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings( settings(batch_size=1000, learning_rate=1e-4)
batch_size=1000,
learning_rate=1e-4
)
probs = data_layer(name='probs', size=100) probs = data_layer(name='probs', size=100)
...@@ -11,4 +8,4 @@ outputs( ...@@ -11,4 +8,4 @@ outputs(
# It seems this layer is not correct, and should be rewrite. # It seems this layer is not correct, and should be rewrite.
# block_expand_layer(input=probs, channel=1, block_x=1, block_y=3), # block_expand_layer(input=probs, channel=1, block_x=1, block_y=3),
) )
\ No newline at end of file
...@@ -7,9 +7,7 @@ b = data_layer(name='b', size=10) ...@@ -7,9 +7,7 @@ b = data_layer(name='b', size=10)
result = addto_layer(input=[a, b]) result = addto_layer(input=[a, b])
concat1 = concat_layer(input=[a, b]) concat1 = concat_layer(input=[a, b])
concat2 = concat_layer(input=[ concat2 = concat_layer(
identity_projection(input=a), input=[identity_projection(input=a), identity_projection(input=b)])
identity_projection(input=b)
])
outputs(result, concat1, concat2) outputs(result, concat1, concat2)
\ No newline at end of file
...@@ -24,13 +24,17 @@ z = out_prod_layer(input1=x, input2=y) ...@@ -24,13 +24,17 @@ z = out_prod_layer(input1=x, input2=y)
x1 = fc_layer(input=x, size=5) x1 = fc_layer(input=x, size=5)
y1 = fc_layer(input=y, size=5) y1 = fc_layer(input=y, size=5)
z1 = mixed_layer(act=LinearActivation(), z1 = mixed_layer(
input=[conv_operator(img=x1, act=LinearActivation(),
filter=y1, input=[
filter_size=1, conv_operator(
num_filters=5, img=x1,
num_channels=5, filter=y1,
stride=1)]) filter_size=1,
num_filters=5,
num_channels=5,
stride=1)
])
assert z1.size > 0 assert z1.size > 0
...@@ -41,34 +45,36 @@ cos3 = cos_sim(a=x1, b=y2, size=3) ...@@ -41,34 +45,36 @@ cos3 = cos_sim(a=x1, b=y2, size=3)
linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3) linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3)
out = fc_layer(input=[cos1, cos3, linear_comb, z, z1], out = fc_layer(
size=num_classes, input=[cos1, cos3, linear_comb, z, z1],
act=SoftmaxActivation()) size=num_classes,
act=SoftmaxActivation())
print_layer(input=[out]) print_layer(input=[out])
outputs(classification_cost(out, data_layer(name="label", size=num_classes))) outputs(classification_cost(out, data_layer(name="label", size=num_classes)))
dotmul = mixed_layer(input=[dotmul_operator(a=x1, b=x1), dotmul = mixed_layer(
dotmul_projection(input=y1)]) input=[dotmul_operator(
a=x1, b=x1), dotmul_projection(input=y1)])
proj_with_attr_init = mixed_layer(input=full_matrix_projection(input=y1,
param_attr=ParamAttr(learning_rate = 0, proj_with_attr_init = mixed_layer(
initial_mean = 0, input=full_matrix_projection(
initial_std = 0)), input=y1,
bias_attr = ParamAttr(initial_mean=0, initial_std=0, learning_rate=0), param_attr=ParamAttr(
act = LinearActivation(), learning_rate=0, initial_mean=0, initial_std=0)),
size = 5, bias_attr=ParamAttr(
name='proj_with_attr_init') initial_mean=0, initial_std=0, learning_rate=0),
act=LinearActivation(),
size=5,
name='proj_with_attr_init')
# for ctc # for ctc
tmp = fc_layer(input=[x1, dotmul, proj_with_attr_init], tmp = fc_layer(
size=num_classes + 1, input=[x1, dotmul, proj_with_attr_init],
act=SoftmaxActivation()) size=num_classes + 1,
ctc = ctc_layer(input=tmp, act=SoftmaxActivation())
label=y, ctc = ctc_layer(input=tmp, label=y, size=num_classes + 1)
size=num_classes + 1)
ctc_eval = ctc_error_evaluator(input=tmp, label=y) ctc_eval = ctc_error_evaluator(input=tmp, label=y)
settings( settings(
...@@ -76,5 +82,4 @@ settings( ...@@ -76,5 +82,4 @@ settings(
learning_rate=2e-3, learning_rate=2e-3,
learning_method=AdamOptimizer(), learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4), regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25 gradient_clipping_threshold=25)
)
...@@ -23,8 +23,8 @@ def deprecated(instead): ...@@ -23,8 +23,8 @@ def deprecated(instead):
@functools.wraps(func) @functools.wraps(func)
def __wrapper__(*args, **kwargs): def __wrapper__(*args, **kwargs):
logger.warning("The interface %s is deprecated, " logger.warning("The interface %s is deprecated, "
"will be removed soon. Please use %s instead." "will be removed soon. Please use %s instead." %
% (func.__name__, instead)) (func.__name__, instead))
return func(*args, **kwargs) return func(*args, **kwargs)
......
...@@ -16,17 +16,20 @@ import numpy as np ...@@ -16,17 +16,20 @@ import numpy as np
from PIL import Image from PIL import Image
from cStringIO import StringIO from cStringIO import StringIO
def resize_image(img, target_size): def resize_image(img, target_size):
""" """
Resize an image so that the shorter edge has length target_size. Resize an image so that the shorter edge has length target_size.
img: the input image to be resized. img: the input image to be resized.
target_size: the target resized image size. target_size: the target resized image size.
""" """
percent = (target_size/float(min(img.size[0], img.size[1]))) percent = (target_size / float(min(img.size[0], img.size[1])))
resized_size = int(round(img.size[0] * percent)), int(round(img.size[1] * percent)) resized_size = int(round(img.size[0] * percent)), int(
round(img.size[1] * percent))
img = img.resize(resized_size, Image.ANTIALIAS) img = img.resize(resized_size, Image.ANTIALIAS)
return img return img
def flip(im): def flip(im):
""" """
Return the flipped image. Return the flipped image.
...@@ -38,6 +41,7 @@ def flip(im): ...@@ -38,6 +41,7 @@ def flip(im):
else: else:
return im[:, ::-1] return im[:, ::-1]
def crop_img(im, inner_size, color=True, test=True): def crop_img(im, inner_size, color=True, test=True):
""" """
Return cropped image. Return cropped image.
...@@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True): ...@@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True):
If True, crop the center of images. If True, crop the center of images.
""" """
if color: if color:
height, width = max(inner_size, im.shape[1]), max(inner_size, im.shape[2]) height, width = max(inner_size, im.shape[1]), max(inner_size,
im.shape[2])
padded_im = np.zeros((3, height, width)) padded_im = np.zeros((3, height, width))
startY = (height - im.shape[1]) / 2 startY = (height - im.shape[1]) / 2
startX = (width - im.shape[2]) / 2 startX = (width - im.shape[2]) / 2
endY, endX = startY + im.shape[1], startX + im.shape[2] endY, endX = startY + im.shape[1], startX + im.shape[2]
padded_im[:, startY: endY, startX: endX] = im padded_im[:, startY:endY, startX:endX] = im
else: else:
im = im.astype('float32') im = im.astype('float32')
height, width = max(inner_size, im.shape[0]), max(inner_size, im.shape[1]) height, width = max(inner_size, im.shape[0]), max(inner_size,
im.shape[1])
padded_im = np.zeros((height, width)) padded_im = np.zeros((height, width))
startY = (height - im.shape[0]) / 2 startY = (height - im.shape[0]) / 2
startX = (width - im.shape[1]) / 2 startX = (width - im.shape[1]) / 2
endY, endX = startY + im.shape[0], startX + im.shape[1] endY, endX = startY + im.shape[0], startX + im.shape[1]
padded_im[startY: endY, startX: endX] = im padded_im[startY:endY, startX:endX] = im
if test: if test:
startY = (height - inner_size) / 2 startY = (height - inner_size) / 2
startX = (width - inner_size) / 2 startX = (width - inner_size) / 2
...@@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True): ...@@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True):
startX = np.random.randint(0, width - inner_size + 1) startX = np.random.randint(0, width - inner_size + 1)
endY, endX = startY + inner_size, startX + inner_size endY, endX = startY + inner_size, startX + inner_size
if color: if color:
pic = padded_im[:, startY: endY, startX: endX] pic = padded_im[:, startY:endY, startX:endX]
else: else:
pic = padded_im[startY: endY, startX: endX] pic = padded_im[startY:endY, startX:endX]
if (not test) and (np.random.randint(2) == 0): if (not test) and (np.random.randint(2) == 0):
pic = flip(pic) pic = flip(pic)
return pic return pic
def decode_jpeg(jpeg_string): def decode_jpeg(jpeg_string):
np_array = np.array(Image.open(StringIO(jpeg_string))) np_array = np.array(Image.open(StringIO(jpeg_string)))
if len(np_array.shape) == 3: if len(np_array.shape) == 3:
np_array = np.transpose(np_array, (2, 0, 1)) np_array = np.transpose(np_array, (2, 0, 1))
return np_array return np_array
def preprocess_img(im, img_mean, crop_size, is_train, color=True): def preprocess_img(im, img_mean, crop_size, is_train, color=True):
""" """
Does data augmentation for images. Does data augmentation for images.
...@@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True): ...@@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True):
pic -= img_mean pic -= img_mean
return pic.flatten() return pic.flatten()
def load_meta(meta_path, mean_img_size, crop_size, color=True): def load_meta(meta_path, mean_img_size, crop_size, color=True):
""" """
Return the loaded meta file. Return the loaded meta file.
...@@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True): ...@@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True):
mean = np.load(meta_path)['data_mean'] mean = np.load(meta_path)['data_mean']
border = (mean_img_size - crop_size) / 2 border = (mean_img_size - crop_size) / 2
if color: if color:
assert(mean_img_size * mean_img_size * 3 == mean.shape[0]) assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
mean = mean.reshape(3, mean_img_size, mean_img_size) mean = mean.reshape(3, mean_img_size, mean_img_size)
mean = mean[:, border: border + crop_size, mean = mean[:, border:border + crop_size, border:border +
border: border + crop_size].astype('float32') crop_size].astype('float32')
else: else:
assert(mean_img_size * mean_img_size == mean.shape[0]) assert (mean_img_size * mean_img_size == mean.shape[0])
mean = mean.reshape(mean_img_size, mean_img_size) mean = mean.reshape(mean_img_size, mean_img_size)
mean = mean[border: border + crop_size, mean = mean[border:border + crop_size, border:border +
border: border + crop_size].astype('float32') crop_size].astype('float32')
return mean return mean
def load_image(img_path, is_color=True): def load_image(img_path, is_color=True):
""" """
Load image and return. Load image and return.
...@@ -130,6 +140,7 @@ def load_image(img_path, is_color=True): ...@@ -130,6 +140,7 @@ def load_image(img_path, is_color=True):
img.load() img.load()
return img return img
def oversample(img, crop_dims): def oversample(img, crop_dims):
""" """
image : iterable of (H x W x K) ndarrays image : iterable of (H x W x K) ndarrays
...@@ -152,50 +163,53 @@ def oversample(img, crop_dims): ...@@ -152,50 +163,53 @@ def oversample(img, crop_dims):
for j in w_indices: for j in w_indices:
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1]) crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
curr += 1 curr += 1
crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([ crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
-crop_dims / 2.0, [-crop_dims / 2.0, crop_dims / 2.0])
crop_dims / 2.0
])
crops_ix = np.tile(crops_ix, (2, 1)) crops_ix = np.tile(crops_ix, (2, 1))
# Extract crops # Extract crops
crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1], crops = np.empty(
im_shape[-1]), dtype=np.float32) (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
dtype=np.float32)
ix = 0 ix = 0
for im in img: for im in img:
for crop in crops_ix: for crop in crops_ix:
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :] crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
ix += 1 ix += 1
crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :] # flip for mirrors
return crops return crops
class ImageTransformer: class ImageTransformer:
def __init__(self, transpose = None, def __init__(self,
channel_swap = None, mean = None, is_color = True): transpose=None,
channel_swap=None,
mean=None,
is_color=True):
self.transpose = transpose self.transpose = transpose
self.channel_swap = None self.channel_swap = None
self.mean = None self.mean = None
self.is_color = is_color self.is_color = is_color
def set_transpose(self, order): def set_transpose(self, order):
if self.is_color: if self.is_color:
assert 3 == len(order) assert 3 == len(order)
self.transpose = order self.transpose = order
def set_channel_swap(self, order): def set_channel_swap(self, order):
if self.is_color: if self.is_color:
assert 3 == len(order) assert 3 == len(order)
self.channel_swap = order self.channel_swap = order
def set_mean(self, mean): def set_mean(self, mean):
# mean value, may be one value per channel # mean value, may be one value per channel
if mean.ndim == 1: if mean.ndim == 1:
mean = mean[:, np.newaxis, np.newaxis] mean = mean[:, np.newaxis, np.newaxis]
else: else:
# elementwise mean # elementwise mean
if self.is_color: if self.is_color:
assert len(mean.shape) == 3 assert len(mean.shape) == 3
self.mean = mean self.mean = mean
def transformer(self, data): def transformer(self, data):
if self.transpose is not None: if self.transpose is not None:
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
# Generate dot diagram file for the given paddle model config # Generate dot diagram file for the given paddle model config
# The generated file can be viewed using Graphviz (http://graphviz.org) # The generated file can be viewed using Graphviz (http://graphviz.org)
import sys import sys
import traceback import traceback
...@@ -46,16 +45,16 @@ def make_diagram(config_file, dot_file, config_arg_str): ...@@ -46,16 +45,16 @@ def make_diagram(config_file, dot_file, config_arg_str):
submodel_layers = set() submodel_layers = set()
def make_link(link): def make_link(link):
return 'l%s -> l%s;' % ( return 'l%s -> l%s;' % (name2id[link.layer_name],
name2id[link.layer_name], name2id[link.link_name]) name2id[link.link_name])
def make_mem(mem): def make_mem(mem):
s = '' s = ''
if mem.boot_layer_name: if mem.boot_layer_name:
s += 'l%s -> l%s;\n' % ( s += 'l%s -> l%s;\n' % (name2id[mem.boot_layer_name],
name2id[mem.boot_layer_name], name2id[mem.layer_name]) name2id[mem.layer_name])
s += 'l%s -> l%s [style=dashed];' % ( s += 'l%s -> l%s [style=dashed];' % (name2id[mem.layer_name],
name2id[mem.layer_name], name2id[mem.link_name]) name2id[mem.link_name])
return s return s
print >> f, 'digraph graphname {' print >> f, 'digraph graphname {'
...@@ -110,8 +109,8 @@ def make_diagram(config_file, dot_file, config_arg_str): ...@@ -110,8 +109,8 @@ def make_diagram(config_file, dot_file, config_arg_str):
def usage(): def usage():
print >> sys.stderr, ("Usage: python show_model_diagram.py" print >> sys.stderr, ("Usage: python show_model_diagram.py" +
+ " CONFIG_FILE DOT_FILE [config_str]") " CONFIG_FILE DOT_FILE [config_str]")
exit(1) exit(1)
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Plot training and testing curve from paddle log. """Plot training and testing curve from paddle log.
It takes input from a file or stdin, and output to a file or stdout. It takes input from a file or stdin, and output to a file or stdout.
...@@ -59,8 +58,8 @@ import re ...@@ -59,8 +58,8 @@ import re
import os import os
def plot_paddle_curve(keys, inputfile, outputfile, def plot_paddle_curve(keys, inputfile, outputfile, format='png',
format='png', show_fig = False): show_fig=False):
"""Plot curves from paddle log and save to outputfile. """Plot curves from paddle log and save to outputfile.
:param keys: a list of strings to be plotted, e.g. AvgCost :param keys: a list of strings to be plotted, e.g. AvgCost
...@@ -93,12 +92,17 @@ def plot_paddle_curve(keys, inputfile, outputfile, ...@@ -93,12 +92,17 @@ def plot_paddle_curve(keys, inputfile, outputfile,
return return
m = len(keys) + 1 m = len(keys) + 1
for i in xrange(1, m): for i in xrange(1, m):
pyplot.plot(x[:, 0], x[:, i], color=cm.jet(1.0 * (i - 1) / (2 * m)), pyplot.plot(
label=keys[i - 1]) x[:, 0],
x[:, i],
color=cm.jet(1.0 * (i - 1) / (2 * m)),
label=keys[i - 1])
if (x_test.shape[0] > 0): if (x_test.shape[0] > 0):
pyplot.plot(x[:, 0], x_test[:, i], pyplot.plot(
color=cm.jet(1.0 - 1.0 * (i - 1) / (2 * m)), x[:, 0],
label="Test " + keys[i - 1]) x_test[:, i],
color=cm.jet(1.0 - 1.0 * (i - 1) / (2 * m)),
label="Test " + keys[i - 1])
pyplot.xlabel('number of epoch') pyplot.xlabel('number of epoch')
pyplot.legend(loc='best') pyplot.legend(loc='best')
if show_fig: if show_fig:
...@@ -111,12 +115,20 @@ def main(argv): ...@@ -111,12 +115,20 @@ def main(argv):
""" """
main method of plotting curves. main method of plotting curves.
""" """
cmdparser = argparse.ArgumentParser("Plot training and testing curves from paddle log file.") cmdparser = argparse.ArgumentParser(
cmdparser.add_argument('key', nargs='*', help='keys of scores to plot, the default is AvgCost') "Plot training and testing curves from paddle log file.")
cmdparser.add_argument('-i', '--input', help='input filename of paddle log, ' cmdparser.add_argument(
'default will be standard input') 'key', nargs='*', help='keys of scores to plot, the default is AvgCost')
cmdparser.add_argument('-o', '--output', help='output filename of figure, ' cmdparser.add_argument(
'default will be standard output') '-i',
'--input',
help='input filename of paddle log, '
'default will be standard input')
cmdparser.add_argument(
'-o',
'--output',
help='output filename of figure, '
'default will be standard output')
cmdparser.add_argument('--format', help='figure format(png|pdf|ps|eps|svg)') cmdparser.add_argument('--format', help='figure format(png|pdf|ps|eps|svg)')
args = cmdparser.parse_args(argv) args = cmdparser.parse_args(argv)
keys = args.key keys = args.key
......
...@@ -41,9 +41,8 @@ def image_data(data_dir, ...@@ -41,9 +41,8 @@ def image_data(data_dir,
the size of the mean image, the number of classes. the size of the mean image, the number of classes.
async_load_data: whether to load image data asynchronuously. async_load_data: whether to load image data asynchronuously.
""" """
data_creator = ImageClassificationDatasetCreater(data_dir, data_creator = ImageClassificationDatasetCreater(
processed_image_size, data_dir, processed_image_size, color)
color)
batch_data_dir = data_dir batch_data_dir = data_dir
train_list = os.path.join(batch_data_dir, train_list) train_list = os.path.join(batch_data_dir, train_list)
test_list = os.path.join(batch_data_dir, test_list) test_list = os.path.join(batch_data_dir, test_list)
...@@ -64,13 +63,17 @@ def image_data(data_dir, ...@@ -64,13 +63,17 @@ def image_data(data_dir,
'color': color_string 'color': color_string
} }
define_py_data_sources2(train_list, test_list, define_py_data_sources2(
module='image_provider', train_list,
obj='processData', test_list,
args=args) module='image_provider',
return {"image_size": image_size, obj='processData',
"num_classes": num_classes, args=args)
"is_color": is_color} return {
"image_size": image_size,
"num_classes": num_classes,
"is_color": is_color
}
def get_extra_layer_attr(drop_rate): def get_extra_layer_attr(drop_rate):
...@@ -80,8 +83,8 @@ def get_extra_layer_attr(drop_rate): ...@@ -80,8 +83,8 @@ def get_extra_layer_attr(drop_rate):
return ExtraLayerAttribute(drop_rate=drop_rate) return ExtraLayerAttribute(drop_rate=drop_rate)
def image_data_layers(image_size, num_classes, def image_data_layers(image_size, num_classes, is_color=False,
is_color=False, is_predict=False): is_predict=False):
""" """
Data layers for image classification. Data layers for image classification.
image_size: image size. image_size: image size.
...@@ -109,56 +112,58 @@ def simple_conv_net(data_conf, is_color=False): ...@@ -109,56 +112,58 @@ def simple_conv_net(data_conf, is_color=False):
num_classes: num of classes. num_classes: num of classes.
is_color: whether the input images are color. is_color: whether the input images are color.
""" """
for k, v in data_conf.iteritems(): globals()[k] = v for k, v in data_conf.iteritems():
globals()[k] = v
data_input, label_input, num_image_channels = \ data_input, label_input, num_image_channels = \
image_data_layers(image_size, num_classes, is_color, is_predict) image_data_layers(image_size, num_classes, is_color, is_predict)
filter_sizes = [5, 5] filter_sizes = [5, 5]
num_channels = [32, 64] num_channels = [32, 64]
strides = [1, 1] strides = [1, 1]
fc_dims = [500] fc_dims = [500]
conv_bn_pool1 = img_conv_bn_pool(name="g1", conv_bn_pool1 = img_conv_bn_pool(
input=data_input, name="g1",
filter_size=filter_sizes[0], input=data_input,
num_channel=num_image_channels, filter_size=filter_sizes[0],
num_filters=num_channels[0], num_channel=num_image_channels,
conv_stride=1, num_filters=num_channels[0],
conv_padding=0, conv_stride=1,
pool_size=3, conv_padding=0,
pool_stride=2, pool_size=3,
act=ReluActivation()) pool_stride=2,
conv_bn_pool2 = img_conv_bn_pool(name="g2", act=ReluActivation())
input=conv_bn_pool1, conv_bn_pool2 = img_conv_bn_pool(
filter_size=filter_sizes[1], name="g2",
num_channel=num_channels[0], input=conv_bn_pool1,
num_filters=num_channels[1], filter_size=filter_sizes[1],
conv_stride=1, num_channel=num_channels[0],
conv_padding=0, num_filters=num_channels[1],
pool_size=3, conv_stride=1,
pool_stride=2, conv_padding=0,
act=ReluActivation()) pool_size=3,
fc3 = fc_layer(name="fc3", pool_stride=2,
input=conv_bn_pool2, act=ReluActivation())
dim=fc_dims[0], fc3 = fc_layer(
act=ReluActivation()) name="fc3", input=conv_bn_pool2, dim=fc_dims[0], act=ReluActivation())
fc3_dropped = dropout_layer(name="fc3_dropped", fc3_dropped = dropout_layer(name="fc3_dropped", input=fc3, dropout_rate=0.5)
input=fc3, output = fc_layer(
dropout_rate=0.5) name="output",
output = fc_layer(name="output", input=fc3_dropped,
input=fc3_dropped, dim=fc_dims[0],
dim=fc_dims[0], act=SoftmaxActivation())
act=SoftmaxActivation())
if is_predict: if is_predict:
end_of_network(output) end_of_network(output)
else: else:
cost = classify(name="cost", cost = classify(name="cost", input=output, label=label_input)
input=output,
label=label_input)
end_of_network(cost) end_of_network(cost)
def conv_layer_group(prefix_num, num_layers, input, def conv_layer_group(prefix_num,
input_channels, output_channels, num_layers,
drop_rates=[], strides=[], input,
input_channels,
output_channels,
drop_rates=[],
strides=[],
with_bn=[]): with_bn=[]):
""" """
A set of convolution layers, and batch normalization layers, A set of convolution layers, and batch normalization layers,
...@@ -190,36 +195,45 @@ def conv_layer_group(prefix_num, num_layers, input, ...@@ -190,36 +195,45 @@ def conv_layer_group(prefix_num, num_layers, input,
i_conv_in = group_output i_conv_in = group_output
i_channels_conv = input_channels if i == 1 else output_channels i_channels_conv = input_channels if i == 1 else output_channels
conv_act = LinearActivation() if with_bn[i - 1] else ReluActivation() conv_act = LinearActivation() if with_bn[i - 1] else ReluActivation()
conv_output = img_conv_layer(name="conv%d_%d" % (prefix_num, i), conv_output = img_conv_layer(
input=i_conv_in, name="conv%d_%d" % (prefix_num, i),
filter_size=3, input=i_conv_in,
num_channels=i_channels_conv, filter_size=3,
num_filters=output_channels, num_channels=i_channels_conv,
stride=strides[i - 1], num_filters=output_channels,
padding=1, stride=strides[i - 1],
act=conv_act) padding=1,
act=conv_act)
if with_bn[i - 1]: if with_bn[i - 1]:
bn = batch_norm_layer(name="conv%d_%d_bn" % (prefix_num, i), bn = batch_norm_layer(
input=conv_output, name="conv%d_%d_bn" % (prefix_num, i),
num_channels=output_channels, input=conv_output,
act=ReluActivation(), num_channels=output_channels,
layer_attr=get_extra_layer_attr( act=ReluActivation(),
drop_rate=drop_rates[i - 1])) layer_attr=get_extra_layer_attr(drop_rate=drop_rates[i - 1]))
group_output = bn group_output = bn
else: else:
group_output = conv_output group_output = conv_output
pool = img_pool_layer(name="pool%d" % prefix_num, pool = img_pool_layer(
input=group_output, name="pool%d" % prefix_num,
pool_size=2, input=group_output,
num_channels=output_channels, pool_size=2,
stride=2) num_channels=output_channels,
stride=2)
return pool return pool
def vgg_conv_net(image_size, num_classes, num_layers, def vgg_conv_net(image_size,
channels, strides, with_bn, fc_dims, num_classes,
drop_rates, drop_rates_fc=[], num_layers,
is_color=True, is_predict=False): channels,
strides,
with_bn,
fc_dims,
drop_rates,
drop_rates_fc=[],
is_color=True,
is_predict=False):
""" """
A Wrapper for a VGG network for image classification. A Wrapper for a VGG network for image classification.
It is a set of convolutional groups followed by several fully It is a set of convolutional groups followed by several fully
...@@ -248,51 +262,49 @@ def vgg_conv_net(image_size, num_classes, num_layers, ...@@ -248,51 +262,49 @@ def vgg_conv_net(image_size, num_classes, num_layers,
for i in range(len(num_layers)): for i in range(len(num_layers)):
input_layer = data_input if i == 0 else group_output input_layer = data_input if i == 0 else group_output
input_channels = 3 if i == 0 else channels[i - 1] input_channels = 3 if i == 0 else channels[i - 1]
group_output = conv_layer_group(prefix_num=i + 1, group_output = conv_layer_group(
num_layers=num_layers[i], prefix_num=i + 1,
input=input_layer, num_layers=num_layers[i],
input_channels=input_channels, input=input_layer,
output_channels=channels[i], input_channels=input_channels,
drop_rates=drop_rates[i], output_channels=channels[i],
strides=strides[i], drop_rates=drop_rates[i],
with_bn=with_bn[i]) strides=strides[i],
with_bn=with_bn[i])
conv_output_name = group_output conv_output_name = group_output
if drop_rates_fc[0] != 0.0: if drop_rates_fc[0] != 0.0:
dropped_pool_name = "pool_dropped" dropped_pool_name = "pool_dropped"
conv_output_name = dropout_layer(name=dropped_pool_name, conv_output_name = dropout_layer(
input=conv_output_name, name=dropped_pool_name,
dropout_rate=drop_rates_fc[0]) input=conv_output_name,
dropout_rate=drop_rates_fc[0])
for i in range(len(fc_dims)): for i in range(len(fc_dims)):
input_layer_name = conv_output_name if i == 0 else fc_output input_layer_name = conv_output_name if i == 0 else fc_output
active_type = LinearActivation() if i == len( active_type = LinearActivation() if i == len(
fc_dims) - 1 else ReluActivation() fc_dims) - 1 else ReluActivation()
drop_rate = 0.0 if i == len(fc_dims) - 1 else drop_rates_fc[i + 1] drop_rate = 0.0 if i == len(fc_dims) - 1 else drop_rates_fc[i + 1]
fc_output = fc_layer(name="fc%d" % (i + 1), fc_output = fc_layer(
input=input_layer_name, name="fc%d" % (i + 1),
size=fc_dims[i], input=input_layer_name,
act=active_type, size=fc_dims[i],
layer_attr=get_extra_layer_attr(drop_rate)) act=active_type,
bn = batch_norm_layer(name="fc_bn", layer_attr=get_extra_layer_attr(drop_rate))
input=fc_output, bn = batch_norm_layer(
num_channels=fc_dims[len(fc_dims) - 1], name="fc_bn",
act=ReluActivation(), input=fc_output,
layer_attr=get_extra_layer_attr( num_channels=fc_dims[len(fc_dims) - 1],
drop_rate=drop_rates_fc[-1])) act=ReluActivation(),
output = fc_layer(name="output", layer_attr=get_extra_layer_attr(drop_rate=drop_rates_fc[-1]))
input=bn, output = fc_layer(
size=num_classes, name="output", input=bn, size=num_classes, act=SoftmaxActivation())
act=SoftmaxActivation())
if is_predict: if is_predict:
outputs(output) outputs(output)
else: else:
cost = classification_cost(name="cost", cost = classification_cost(name="cost", input=output, label=label_input)
input=output,
label=label_input)
outputs(cost) outputs(cost)
def vgg16_conv_net(image_size, num_classes, def vgg16_conv_net(image_size, num_classes, is_color=True, is_predict=False):
is_color=True, is_predict=False):
""" """
A Wrapper for a 16 layers VGG network for image classification. A Wrapper for a 16 layers VGG network for image classification.
The detailed architecture of the paper can be found here: The detailed architecture of the paper can be found here:
...@@ -314,8 +326,7 @@ def vgg16_conv_net(image_size, num_classes, ...@@ -314,8 +326,7 @@ def vgg16_conv_net(image_size, num_classes,
is_predict=is_predict) is_predict=is_predict)
def small_vgg(data_conf, def small_vgg(data_conf, is_predict=False):
is_predict=False):
""" """
A Wrapper for a small VGG network for CIFAR-10 image classification. A Wrapper for a small VGG network for CIFAR-10 image classification.
The detailed architecture of the paper can be found here: The detailed architecture of the paper can be found here:
...@@ -329,7 +340,8 @@ def small_vgg(data_conf, ...@@ -329,7 +340,8 @@ def small_vgg(data_conf,
num_classes: num of classes. num_classes: num of classes.
is_color: whether the input images are color. is_color: whether the input images are color.
""" """
for k, v in data_conf.iteritems(): globals()[k] = v for k, v in data_conf.iteritems():
globals()[k] = v
vgg_conv_net(image_size, num_classes, vgg_conv_net(image_size, num_classes,
num_layers=[2, 2, 3, 3], num_layers=[2, 2, 3, 3],
channels=[64, 128, 256, 512], channels=[64, 128, 256, 512],
...@@ -343,8 +355,11 @@ def small_vgg(data_conf, ...@@ -343,8 +355,11 @@ def small_vgg(data_conf,
is_predict=is_predict) is_predict=is_predict)
def training_settings(learning_rate=0.1, batch_size=128, algorithm="sgd", def training_settings(learning_rate=0.1,
momentum=0.9, decay_rate=0.001): batch_size=128,
algorithm="sgd",
momentum=0.9,
decay_rate=0.001):
""" """
Training settings. Training settings.
learning_rate: learning rate of the training. learning_rate: learning rate of the training.
...@@ -357,8 +372,9 @@ def training_settings(learning_rate=0.1, batch_size=128, algorithm="sgd", ...@@ -357,8 +372,9 @@ def training_settings(learning_rate=0.1, batch_size=128, algorithm="sgd",
momentum: momentum of the training algorithm. momentum: momentum of the training algorithm.
decay_rate: weight decay rate. decay_rate: weight decay rate.
""" """
Settings(algorithm=algorithm, Settings(
batch_size=batch_size, algorithm=algorithm,
learning_rate=learning_rate / float(batch_size)) batch_size=batch_size,
learning_rate=learning_rate / float(batch_size))
default_momentum(momentum) default_momentum(momentum)
default_decay_rate(decay_rate * batch_size) default_decay_rate(decay_rate * batch_size)
...@@ -28,16 +28,18 @@ def resize_image(img, target_size): ...@@ -28,16 +28,18 @@ def resize_image(img, target_size):
img: the input image to be resized. img: the input image to be resized.
target_size: the target resized image size. target_size: the target resized image size.
""" """
percent = (target_size/float(min(img.size[0], img.size[1]))) percent = (target_size / float(min(img.size[0], img.size[1])))
resized_size = int(round(img.size[0] * percent)),\ resized_size = int(round(img.size[0] * percent)),\
int(round(img.size[1] * percent)) int(round(img.size[1] * percent))
img = img.resize(resized_size, Image.ANTIALIAS) img = img.resize(resized_size, Image.ANTIALIAS)
return img return img
class DiskImage: class DiskImage:
""" """
A class of image data on disk. A class of image data on disk.
""" """
def __init__(self, path, target_size): def __init__(self, path, target_size):
""" """
path: path of the image. path: path of the image.
...@@ -77,6 +79,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater): ...@@ -77,6 +79,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
""" """
A class to process data for image classification. A class to process data for image classification.
""" """
def __init__(self, data_path, target_size, color=True): def __init__(self, data_path, target_size, color=True):
""" """
data_path: the path to store the training data and batches. data_path: the path to store the training data and batches.
...@@ -95,8 +98,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater): ...@@ -95,8 +98,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
The meta file contains the meam image, as well as some configs. The meta file contains the meam image, as well as some configs.
data: the training Dataaet. data: the training Dataaet.
""" """
output_path = os.path.join(self.data_path, output_path = os.path.join(self.data_path, self.batch_dir_name,
self.batch_dir_name,
self.meta_filename) self.meta_filename)
if self.color: if self.color:
mean_img = np.zeros((3, self.target_size, self.target_size)) mean_img = np.zeros((3, self.target_size, self.target_size))
...@@ -108,12 +110,13 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater): ...@@ -108,12 +110,13 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
mean_img += cropped_img mean_img += cropped_img
mean_img /= len(data.data) mean_img /= len(data.data)
mean_img = mean_img.astype('int32').flatten() mean_img = mean_img.astype('int32').flatten()
preprocess_util.save_file({"data_mean": mean_img, preprocess_util.save_file({
"image_size": self.target_size, "data_mean": mean_img,
"mean_image_size": self.target_size, "image_size": self.target_size,
"num_classes": self.num_classes, "mean_image_size": self.target_size,
"color": self.color}, "num_classes": self.num_classes,
output_path) "color": self.color
}, output_path)
pass pass
def create_dataset_from_list(self, path): def create_dataset_from_list(self, path):
...@@ -125,12 +128,11 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater): ...@@ -125,12 +128,11 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
label_name = items[1] label_name = items[1]
if not label_name in label_set: if not label_name in label_set:
label_set[label_name] = len(label_set.keys()) label_set[label_name] = len(label_set.keys())
img = DiskImage(path = image_path, target_size = self.target_size) img = DiskImage(path=image_path, target_size=self.target_size)
label = preprocess_util.Lablel(label = label_set[label_name], label = preprocess_util.Lablel(
name=label_name) label=label_set[label_name], name=label_name)
return preprocess_util.Dataset(data, self.keys), label_set return preprocess_util.Dataset(data, self.keys), label_set
def create_dataset_from_dir(self, path): def create_dataset_from_dir(self, path):
""" """
Create a Dataset object for image classfication. Create a Dataset object for image classfication.
...@@ -143,11 +145,12 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater): ...@@ -143,11 +145,12 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
label_set = preprocess_util.get_label_set_from_dir(path) label_set = preprocess_util.get_label_set_from_dir(path)
data = [] data = []
for l_name in label_set.keys(): for l_name in label_set.keys():
image_paths = preprocess_util.list_images(os.path.join(path, l_name)) image_paths = preprocess_util.list_images(
os.path.join(path, l_name))
for p in image_paths: for p in image_paths:
img = DiskImage(path = p, target_size = self.target_size) img = DiskImage(path=p, target_size=self.target_size)
label = preprocess_util.Label(label = label_set[l_name], label = preprocess_util.Label(
name = l_name) label=label_set[l_name], name=l_name)
data.append((img, label)) data.append((img, label))
random.shuffle(data) random.shuffle(data)
return preprocess_util.Dataset(data, self.keys), label_set return preprocess_util.Dataset(data, self.keys), label_set
...@@ -18,6 +18,7 @@ import cPickle as pickle ...@@ -18,6 +18,7 @@ import cPickle as pickle
import random import random
import collections import collections
def save_file(data, filename): def save_file(data, filename):
""" """
Save data into pickle format. Save data into pickle format.
...@@ -26,6 +27,7 @@ def save_file(data, filename): ...@@ -26,6 +27,7 @@ def save_file(data, filename):
""" """
pickle.dump(data, open(filename, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(data, open(filename, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
def save_list(l, outfile): def save_list(l, outfile):
""" """
Save a list of string into a text file. There is one line for each string. Save a list of string into a text file. There is one line for each string.
...@@ -42,15 +44,20 @@ def exclude_pattern(f): ...@@ -42,15 +44,20 @@ def exclude_pattern(f):
""" """
return f.startswith(".") or f.endswith("~") return f.startswith(".") or f.endswith("~")
def list_dirs(path): def list_dirs(path):
""" """
Return a list of directories in path. Exclude all the directories that Return a list of directories in path. Exclude all the directories that
start with '.'. start with '.'.
path: the base directory to search over. path: the base directory to search over.
""" """
return [os.path.join(path, d) for d in next(os.walk(path))[1] if not exclude_pattern(d)] return [
os.path.join(path, d) for d in next(os.walk(path))[1]
if not exclude_pattern(d)
]
def list_images(path, exts = set(["jpg", "png", "bmp", "jpeg"])):
def list_images(path, exts=set(["jpg", "png", "bmp", "jpeg"])):
""" """
Return a list of images in path. Return a list of images in path.
path: the base directory to search over. path: the base directory to search over.
...@@ -60,6 +67,7 @@ def list_images(path, exts = set(["jpg", "png", "bmp", "jpeg"])): ...@@ -60,6 +67,7 @@ def list_images(path, exts = set(["jpg", "png", "bmp", "jpeg"])):
if os.path.isfile(os.path.join(path, d)) and not exclude_pattern(d)\ if os.path.isfile(os.path.join(path, d)) and not exclude_pattern(d)\
and os.path.splitext(d)[-1][1:] in exts] and os.path.splitext(d)[-1][1:] in exts]
def list_files(path): def list_files(path):
""" """
Return a list of files in path. Return a list of files in path.
...@@ -69,6 +77,7 @@ def list_files(path): ...@@ -69,6 +77,7 @@ def list_files(path):
return [os.path.join(path, d) for d in os.listdir(path) \ return [os.path.join(path, d) for d in os.listdir(path) \
if os.path.isfile(os.path.join(path, d)) and not exclude_pattern(d)] if os.path.isfile(os.path.join(path, d)) and not exclude_pattern(d)]
def get_label_set_from_dir(path): def get_label_set_from_dir(path):
""" """
Return a dictionary of the labels and label ids from a path. Return a dictionary of the labels and label ids from a path.
...@@ -84,6 +93,7 @@ class Label: ...@@ -84,6 +93,7 @@ class Label:
""" """
A class of label data. A class of label data.
""" """
def __init__(self, label, name): def __init__(self, label, name):
""" """
label: the id of the label. label: the id of the label.
...@@ -98,9 +108,10 @@ class Label: ...@@ -98,9 +108,10 @@ class Label:
""" """
return int(self.label) return int(self.label)
def __hash__(self): def __hash__(self):
return hash((self.label)) return hash((self.label))
class Dataset: class Dataset:
""" """
A class to represent a dataset. A dataset contains a set of items. A class to represent a dataset. A dataset contains a set of items.
...@@ -108,6 +119,7 @@ class Dataset: ...@@ -108,6 +119,7 @@ class Dataset:
For example: in image classification dataset, each item contains two slot, For example: in image classification dataset, each item contains two slot,
The first slot is an image, and the second slot is a label. The first slot is an image, and the second slot is a label.
""" """
def __init__(self, data, keys): def __init__(self, data, keys):
""" """
data: a list of data. data: a list of data.
...@@ -120,7 +132,7 @@ class Dataset: ...@@ -120,7 +132,7 @@ class Dataset:
def check_valid(self): def check_valid(self):
for d in self.data: for d in self.data:
assert(len(d) == len(self.keys)) assert (len(d) == len(self.keys))
def permute(self, key_id, num_per_batch): def permute(self, key_id, num_per_batch):
""" """
...@@ -167,8 +179,9 @@ class Dataset: ...@@ -167,8 +179,9 @@ class Dataset:
while len(permuted_data) < len(self.data): while len(permuted_data) < len(self.data):
for k in keyvalue_indices: for k in keyvalue_indices:
begin_idx = keyvalue_readpointer[k] begin_idx = keyvalue_readpointer[k]
end_idx = int(min(begin_idx + num_data_per_key_batch, end_idx = int(
len(keyvalue_indices[k]))) min(begin_idx + num_data_per_key_batch,
len(keyvalue_indices[k])))
print "begin_idx, end_idx" print "begin_idx, end_idx"
print begin_idx, end_idx print begin_idx, end_idx
for idx in range(begin_idx, end_idx): for idx in range(begin_idx, end_idx):
...@@ -177,12 +190,12 @@ class Dataset: ...@@ -177,12 +190,12 @@ class Dataset:
self.data = permuted_data self.data = permuted_data
class DataBatcher: class DataBatcher:
""" """
A class that is used to create batches for both training and testing A class that is used to create batches for both training and testing
datasets. datasets.
""" """
def __init__(self, train_data, test_data, label_set): def __init__(self, train_data, test_data, label_set):
""" """
train_data, test_data: Each one is a dataset object repesenting train_data, test_data: Each one is a dataset object repesenting
...@@ -190,10 +203,10 @@ class DataBatcher: ...@@ -190,10 +203,10 @@ class DataBatcher:
label_set: a dictionary storing the mapping from label name to label id. label_set: a dictionary storing the mapping from label name to label id.
""" """
self.train_data = train_data self.train_data = train_data
self.test_data = test_data self.test_data = test_data
self.label_set = label_set self.label_set = label_set
self.num_per_batch = 5000 self.num_per_batch = 5000
assert(self.train_data.keys == self.test_data.keys) assert (self.train_data.keys == self.test_data.keys)
def create_batches_and_list(self, output_path, train_list_name, def create_batches_and_list(self, output_path, train_list_name,
test_list_name, label_set_name): test_list_name, label_set_name):
...@@ -202,16 +215,19 @@ class DataBatcher: ...@@ -202,16 +215,19 @@ class DataBatcher:
It also create train.list and test.list to indicate the list It also create train.list and test.list to indicate the list
of the batch files for training and testing data, respectively. of the batch files for training and testing data, respectively.
""" """
train_list = self.create_batches(self.train_data, output_path, train_list = self.create_batches(self.train_data, output_path, "train_",
"train_", self.num_per_batch) self.num_per_batch)
test_list = self.create_batches(self.test_data, output_path, "test_", test_list = self.create_batches(self.test_data, output_path, "test_",
self.num_per_batch) self.num_per_batch)
save_list(train_list, os.path.join(output_path, train_list_name)) save_list(train_list, os.path.join(output_path, train_list_name))
save_list(test_list, os.path.join(output_path, test_list_name)) save_list(test_list, os.path.join(output_path, test_list_name))
save_file(self.label_set, os.path.join(output_path, label_set_name)) save_file(self.label_set, os.path.join(output_path, label_set_name))
def create_batches(self, data, output_path, def create_batches(self,
prefix = "", num_data_per_batch=5000): data,
output_path,
prefix="",
num_data_per_batch=5000):
""" """
Create batches for a Dataset object. Create batches for a Dataset object.
data: the Dataset object to process. data: the Dataset object to process.
...@@ -244,6 +260,7 @@ class DatasetCreater(object): ...@@ -244,6 +260,7 @@ class DatasetCreater(object):
- create_dataset() - create_dataset()
- create_meta_file() - create_meta_file()
""" """
def __init__(self, data_path): def __init__(self, data_path):
""" """
data_path: the path to store the training data and batches. data_path: the path to store the training data and batches.
...@@ -324,24 +341,22 @@ class DatasetCreater(object): ...@@ -324,24 +341,22 @@ class DatasetCreater(object):
out_path = os.path.join(self.data_path, self.batch_dir_name) out_path = os.path.join(self.data_path, self.batch_dir_name)
if not os.path.exists(out_path): if not os.path.exists(out_path):
os.makedirs(out_path) os.makedirs(out_path)
if (self.overwrite or if (self.overwrite or not os.path.exists(
not os.path.exists(os.path.join(out_path, self.train_list_name))): os.path.join(out_path, self.train_list_name))):
train_data, train_label_set = \ train_data, train_label_set = \
self.create_dataset(train_path) self.create_dataset(train_path)
test_data, test_label_set = \ test_data, test_label_set = \
self.create_dataset(test_path) self.create_dataset(test_path)
train_data.permute(self.keys.index(self.permutate_key), train_data.permute(
self.num_per_batch) self.keys.index(self.permutate_key), self.num_per_batch)
assert(train_label_set == test_label_set) assert (train_label_set == test_label_set)
data_batcher = DataBatcher(train_data, test_data, data_batcher = DataBatcher(train_data, test_data, train_label_set)
train_label_set)
data_batcher.num_per_batch = self.num_per_batch data_batcher.num_per_batch = self.num_per_batch
data_batcher.create_batches_and_list(self.output_path, data_batcher.create_batches_and_list(
self.train_list_name, self.output_path, self.train_list_name, self.test_list_name,
self.test_list_name, self.label_set_name)
self.label_set_name)
self.num_classes = len(train_label_set.keys()) self.num_classes = len(train_label_set.keys())
self.create_meta_file(train_data) self.create_meta_file(train_data)
return out_path return out_path
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Show the content of proto buffer data file of PADDLE Show the content of proto buffer data file of PADDLE
""" """
...@@ -21,6 +20,7 @@ import sys ...@@ -21,6 +20,7 @@ import sys
from google.protobuf.internal.decoder import _DecodeVarint from google.protobuf.internal.decoder import _DecodeVarint
import paddle.proto.DataFormat_pb2 as DataFormat import paddle.proto.DataFormat_pb2 as DataFormat
def read_proto(file, message): def read_proto(file, message):
""" """
read a protobuffer struct from file, the length of the struct is stored as read a protobuffer struct from file, the length of the struct is stored as
...@@ -39,7 +39,7 @@ def read_proto(file, message): ...@@ -39,7 +39,7 @@ def read_proto(file, message):
def usage(): def usage():
print >>sys.stderr, "Usage: python show_pb.py PROTO_DATA_FILE" print >> sys.stderr, "Usage: python show_pb.py PROTO_DATA_FILE"
exit(1) exit(1)
...@@ -51,10 +51,7 @@ if __name__ == '__main__': ...@@ -51,10 +51,7 @@ if __name__ == '__main__':
header = DataFormat.DataHeader() header = DataFormat.DataHeader()
read_proto(f, header) read_proto(f, header)
print header print header
sample = DataFormat.DataSample() sample = DataFormat.DataSample()
while read_proto(f, sample): while read_proto(f, sample):
print sample print sample
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Convert torch parameter file to paddle model files. Convert torch parameter file to paddle model files.
...@@ -28,10 +27,11 @@ import torchfile ...@@ -28,10 +27,11 @@ import torchfile
import cPickle as pickle import cPickle as pickle
import argparse import argparse
# save parameters # save parameters
def save_layer_parameters(outfile, feats): def save_layer_parameters(outfile, feats):
version = 0 version = 0
value_size = 4; value_size = 4
ret = "" ret = ""
for feat in feats: for feat in feats:
ret += feat.tostring() ret += feat.tostring()
...@@ -41,16 +41,18 @@ def save_layer_parameters(outfile, feats): ...@@ -41,16 +41,18 @@ def save_layer_parameters(outfile, feats):
fo.write(ret) fo.write(ret)
fo.close() fo.close()
def save_net_parameters(layers, params, output_path): def save_net_parameters(layers, params, output_path):
for i in range(len(layers)): for i in range(len(layers)):
weight = params[i*2] weight = params[i * 2]
biases = params[i*2+1] biases = params[i * 2 + 1]
weight_file = os.path.join(output_path, '_%s.w0' % layers[i]) weight_file = os.path.join(output_path, '_%s.w0' % layers[i])
biases_file = os.path.join(output_path, '_%s.wbias' % layers[i]) biases_file = os.path.join(output_path, '_%s.wbias' % layers[i])
print "Saving for layer %s." % layers[i] print "Saving for layer %s." % layers[i]
save_layer_parameters(weight_file, [weight]) save_layer_parameters(weight_file, [weight])
save_layer_parameters(biases_file, biases) save_layer_parameters(biases_file, biases)
def load_layer_parameters(filename): def load_layer_parameters(filename):
fn = open(filename, 'rb') fn = open(filename, 'rb')
version, = struct.unpack('i', fn.read(4)) version, = struct.unpack('i', fn.read(4))
...@@ -60,16 +62,20 @@ def load_layer_parameters(filename): ...@@ -60,16 +62,20 @@ def load_layer_parameters(filename):
value = np.fromfile(fn, dtype) value = np.fromfile(fn, dtype)
return value return value
def main(argv): def main(argv):
""" """
main method of converting torch to paddle files. main method of converting torch to paddle files.
:param argv: :param argv:
:return: :return:
""" """
cmdparser = argparse.ArgumentParser("Convert torch parameter file to paddle model files.") cmdparser = argparse.ArgumentParser(
cmdparser.add_argument('-i', '--input', help='input filename of torch parameters') "Convert torch parameter file to paddle model files.")
cmdparser.add_argument(
'-i', '--input', help='input filename of torch parameters')
cmdparser.add_argument('-l', '--layers', help='list of layer names') cmdparser.add_argument('-l', '--layers', help='list of layer names')
cmdparser.add_argument('-o', '--output', help='output file path of paddle model') cmdparser.add_argument(
'-o', '--output', help='output file path of paddle model')
args = cmdparser.parse_args(argv) args = cmdparser.parse_args(argv)
if args.input and args.layers and args.output: if args.input and args.layers and args.output:
...@@ -77,7 +83,10 @@ def main(argv): ...@@ -77,7 +83,10 @@ def main(argv):
layers = [line.strip() for line in open(args.layers, 'r')] layers = [line.strip() for line in open(args.layers, 'r')]
save_net_parameters(layers, params, args.output) save_net_parameters(layers, params, args.output)
else: else:
print('Usage: python torch2paddle.py -i torchfile.t7 -l layers.txt -o path/to/paddle_model') print(
'Usage: python torch2paddle.py -i torchfile.t7 -l layers.txt -o path/to/paddle_model'
)
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv[1:]) main(sys.argv[1:])
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册