diff --git a/python/paddle/trainer/PyDataProvider2.py b/python/paddle/trainer/PyDataProvider2.py deleted file mode 100644 index 05635833bf1645f78f5ba15caee3e9b8da9f5544..0000000000000000000000000000000000000000 --- a/python/paddle/trainer/PyDataProvider2.py +++ /dev/null @@ -1,541 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cPickle -import logging -import collections -import functools -import itertools - -logging.basicConfig(format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]" - " %(message)s") - - -class SequenceType(object): - NO_SEQUENCE = 0 - SEQUENCE = 1 - SUB_SEQUENCE = 2 - - @classmethod - def tostring(cls, value): - for k in cls.__dict__: - if not k.startswith('__'): - if getattr(cls, k) == value: - return cls.__name__ + '.' + k - return 'INVALID(' + str(value) + ')' - - -# TODO(yuyang18): Add string data type here. -class DataType(object): - Dense = 0 - SparseNonValue = 1 - SparseValue = 2 - Index = 3 - - @classmethod - def tostring(cls, value): - for k in cls.__dict__: - if not k.startswith('__'): - if getattr(cls, k) == value: - return cls.__name__ + '.' + k - return 'INVALID(' + str(value) + ')' - - -class CacheType(object): - NO_CACHE = 0 # No cache at all - - # First pass, read data from python. And store them in memory. Read from - # memory during rest passes. - CACHE_PASS_IN_MEM = 1 - - -class InputType(object): - """ - InputType is the base class for paddle input types. - - .. note:: - - this is a base class, and should never be used by user. - - :param dim: dimension of input. If the input is an integer, it means the - value range. Otherwise, it means the size of layer. - :type dim: int - :param seq_type: sequence type of input. 0 means it is not a sequence. 1 - means it is a variable length sequence. 2 means it is a - nested sequence. - :type seq_type: int - :param type: data type of input. - :type type: int - """ - __slots__ = ['dim', 'seq_type', 'type'] - - def __init__(self, dim, seq_type, tp): - self.dim = dim - self.seq_type = seq_type - self.type = tp - - def __repr__(self): - """ - Return a human readable representation like 'InputType(dim=25921, - seq_type=SequenceType.NO_SEQUENCE, type=DataType.Dense)' - """ - repr_str = type(self).__name__ - repr_str += '(' - serialize_func_map = { - 'dim': repr, - 'seq_type': SequenceType.tostring, - 'type': DataType.tostring - } - for idx, k in enumerate(self.__slots__): - if idx != 0: - repr_str += ', ' - repr_str += ( - k + '=' + serialize_func_map.get(k, repr)(getattr(self, k))) - repr_str += ')' - return repr_str - - -def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE): - """ - Dense Array. It means the input feature is dense array with float type. - For example, if the input is an image with 28*28 pixels, the input of - Paddle neural network could be a dense vector with dimension 784 or a - numpy array with shape (28, 28). - - For the 2-D convolution operation, each sample in one mini-batch must have - the similarly size in PaddlePaddle now. But, it supports variable-dimension - feature across mini-batch. For the variable-dimension, the param dim is not - used. While the data reader must yield numpy array and the data feeder will - set the data shape correctly. - - :param dim: dimension of this vector. - :type dim: int - :param seq_type: sequence type of input. - :type seq_type: int - :return: An input type object. - :rtype: InputType - """ - return InputType(dim, seq_type, DataType.Dense) - - -def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE): - """ - Sparse binary vector. It means the input feature is a sparse vector and the - every element in this vector is either zero or one. - - :param dim: dimension of this vector. - :type dim: int - :param seq_type: sequence type of this input. - :type seq_type: int - :return: An input type object. - :rtype: InputType - """ - return InputType(dim, seq_type, DataType.SparseNonValue) - - -def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE): - """ - Sparse vector. It means the input feature is a sparse vector. Most of the - elements in this vector are zero, others could be any float value. - - :param dim: dimension of this vector. - :type dim: int - :param seq_type: sequence type of this input. - :type seq_type: int - :return: An input type object. - :rtype: InputType - """ - return InputType(dim, seq_type, DataType.SparseValue) - - -def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE): - """ - Data type of integer. - - :param seq_type: sequence type of this input. - :type seq_type: int - :param value_range: range of this integer. - :type value_range: int - :return: An input type object - :rtype: InputType - """ - return InputType(value_range, seq_type, DataType.Index) - - -dense_vector = dense_slot -sparse_binary_vector = sparse_non_value_slot -sparse_float_vector = sparse_value_slot -integer_value = index_slot - -# dense_array can be used for variable-length input feature. -# Each feature is not a vector, but a multi-dimensional array. -dense_array = dense_slot - - -def dense_vector_sequence(dim): - """ - Data type of a sequence of dense vector. - - :param dim: dimension of dense vector. - :type dim: int - :return: An input type object - :rtype: InputType - """ - return dense_vector(dim, seq_type=SequenceType.SEQUENCE) - - -def dense_vector_sub_sequence(dim): - return dense_vector(dim, seq_type=SequenceType.SUB_SEQUENCE) - - -def sparse_binary_vector_sequence(dim): - """ - Data type of a sequence of sparse vector, which every element is either zero - or one. - - :param dim: dimension of sparse vector. - :type dim: int - :return: An input type object - :rtype: InputType - """ - return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE) - - -def sparse_binary_vector_sub_sequence(dim): - return sparse_binary_vector(dim, seq_type=SequenceType.SUB_SEQUENCE) - - -def sparse_float_vector_sequence(dim): - """ - Data type of a sequence of sparse vector, which most elements are zero, - others could be any float value. - - :param dim: dimension of sparse vector. - :type dim: int - :return: An input type object - :rtype: InputType - """ - return sparse_float_vector(dim, seq_type=SequenceType.SEQUENCE) - - -def sparse_float_vector_sub_sequence(dim): - return sparse_float_vector(dim, seq_type=SequenceType.SUB_SEQUENCE) - - -def integer_value_sequence(value_range): - """ - Data type of a sequence of integer. - - :param value_range: range of each element. - :type value_range: int - """ - return integer_value(value_range, seq_type=SequenceType.SEQUENCE) - - -def integer_value_sub_sequence(dim): - return integer_value(dim, seq_type=SequenceType.SUB_SEQUENCE) - - -integer_sequence = integer_value_sequence - - -class SingleSlotWrapper(object): - def __init__(self, generator): - self.generator = generator - - def __call__(self, obj, filename): - for item in self.generator(obj, filename): - if isinstance(item, dict): - yield item - else: - yield [item] - - -class InputOrderWrapper(object): - def __init__(self, generator, input_order): - self.generator = generator - self.input_order = input_order - - def __call__(self, obj, filename): - for item in self.generator(obj, filename): - if isinstance(item, dict): - yield [ - item.get(input_name, None) - for input_name in self.input_order - ] - else: - yield item - - -class CheckWrapper(object): - def __init__(self, generator, input_types, check_fail_continue, logger): - self.generator = generator - self.input_types = input_types - self.check_fail_continue = check_fail_continue - self.logger = logger - - def __call__(self, obj, filename): - for items in self.generator(obj, filename): - try: - assert len(items) == len(self.input_types) - assert len(filter(lambda x: x is None, items)) == 0 - for item, input_type in itertools.izip(items, self.input_types): - callback = functools.partial(CheckWrapper.loop_callback, - input_type) - - for _ in xrange(input_type.seq_type): - callback = functools.partial(CheckWrapper.loop_check, - callback) - callback(item) - - yield items - except AssertionError as e: - self.logger.warning( - "Item (%s) is not fit the input type with error %s" % - (repr(item), repr(e))) - - if self.check_fail_continue: - continue - else: - raise - - @staticmethod - def loop_callback(input_type, each): - assert isinstance(input_type, InputType) - if input_type.type == DataType.Dense: - assert isinstance(each, collections.Sequence) - for d in each: - assert isinstance(d, float) - assert len(each) == input_type.dim - elif input_type.type == DataType.Index: - assert isinstance(each, int) - assert each < input_type.dim - elif input_type.type == DataType.SparseNonValue \ - or input_type.type == DataType.SparseValue: - assert isinstance(each, collections.Sequence) - sparse_id = set() - for k in each: - if input_type.type == DataType.SparseValue: - k, v = k - assert isinstance(v, float) - assert isinstance(k, int) - assert k < input_type.dim - sparse_id.add(k) - assert len(sparse_id) == len(each) - else: - raise RuntimeError("Not support input type") - - @staticmethod - def loop_check(callback, item): - for each in item: - callback(each) - - -class CheckInputTypeWrapper(object): - def __init__(self, generator, input_types, logger): - self.generator = generator - self.input_types = input_types - self.logger = logger - - def __call__(self, obj, filename): - for items in self.generator(obj, filename): - try: - # dict type is required for input_types when item is dict type - assert (isinstance(items, dict) and \ - not isinstance(self.input_types, dict))==False - yield items - except AssertionError as e: - self.logger.error( - "%s type is required for input type but got %s" % - (repr(type(items)), repr(type(self.input_types)))) - raise - - -def provider(input_types=None, - should_shuffle=None, - pool_size=-1, - min_pool_size=-1, - can_over_batch_size=True, - calc_batch_size=None, - cache=CacheType.NO_CACHE, - check=False, - check_fail_continue=False, - init_hook=None, - **outter_kwargs): - """ - Provider decorator. Use it to make a function into PyDataProvider2 object. - In this function, user only need to get each sample for some train/test - file. - - The basic usage is: - - .. code-block:: python - - @provider(some data provider config here...) - def process(settings, file_name): - while not at end of file_name: - sample = readOneSampleFromFile(file_name) - yield sample. - - The configuration of data provider should be setup by\: - - :param input_types: Specify the input types, can also be set in init_hook. - It could be a list of InputType object. For example, - input_types=[dense_vector(9), integer_value(2)]. Or user - can set a dict of InputType object, which key is - data_layer's name. For example, input_types=\ - {'img': img_features, 'label': label}. when using dict of - InputType, user could yield a dict of feature values, which - key is also data_layer's name. - - :type input_types: list|tuple|dict - - :param should_shuffle: True if data should shuffle. Pass None means shuffle - when is training and not to shuffle when is testing. - :type should_shuffle: bool - - :param pool_size: Max number of sample in data pool. - :type pool_size: int - - :param min_pool_size: Set minimal sample in data pool. The PaddlePaddle will - random pick sample in pool. So the min_pool_size - effect the randomize of data. - :type min_pool_size: int - - :param can_over_batch_size: True if paddle can return a mini-batch larger - than batch size in settings. It is useful when - custom calculate one sample's batch_size. - - It is very danger to set it to false and use - calc_batch_size together. Default is true. - :type can_over_batch_size: bool - - :param calc_batch_size: a method to calculate each sample's batch size. - Default each sample's batch size is 1. But to you - can customize each sample's batch size. - :type calc_batch_size: callable - - :param cache: Cache strategy of Data Provider. Default is CacheType.NO_CACHE - :type cache: int - - :param init_hook: Initialize hook. Useful when data provider need load some - external data like dictionary. The parameter is - (settings, file_list, \*\*kwargs). - - - settings. It is the global settings object. User can set - settings.input_types here. - - file_list. All file names for passed to data provider. - - is_train. Is this data provider used for training or not. - - kwargs. Other keyword arguments passed from - trainer_config's args parameter. - :type init_hook: callable - - :param check: Check the yield data format is as same as input_types. Enable - this will make data provide process slow but it is very useful - for debug. Default is disabled. - :type check: bool - - :param check_fail_continue: Continue train or not when check failed. Just - drop the wrong format data when it is True. Has - no effect when check set to False. - :type check_fail_continue: bool - """ - - def __wrapper__(generator): - class DataProvider(object): - def __init__(self, file_list, **kwargs): - self.logger = logging.getLogger("") - self.logger.setLevel(logging.INFO) - self.input_types = None - self.should_shuffle = should_shuffle - - true_table = [1, 't', 'true', 'on'] - false_table = [0, 'f', 'false', 'off'] - if not isinstance(self.should_shuffle, bool) and \ - self.should_shuffle is not None: - - if isinstance(self.should_shuffle, basestring): - self.should_shuffle = self.should_shuffle.lower() - - if self.should_shuffle in true_table: - self.should_shuffle = True - elif self.should_shuffle in false_table: - self.should_shuffle = False - else: - self.logger.warning( - "Could not recognize should_shuffle (%s), " - "just use default value of should_shuffle." - " Please set should_shuffle to bool value or " - "something in %s" % - (repr(self.should_shuffle), - repr(true_table + false_table))) - self.should_shuffle = None - - self.pool_size = pool_size - self.can_over_batch_size = can_over_batch_size - self.calc_batch_size = calc_batch_size - self.file_list = file_list - self.generator = generator - self.cache = cache - self.min_pool_size = min_pool_size - self.input_order = kwargs['input_order'] - self.check = check - if init_hook is not None: - init_hook(self, file_list=file_list, **kwargs) - - if 'slots' in outter_kwargs: - self.logger.warning('setting slots value is deprecated, ' - 'please use input_types instead.') - self.slots = outter_kwargs['slots'] - if input_types is not None: - self.slots = input_types - - if self.input_types is not None: - self.slots = self.input_types - - assert self.slots is not None, \ - "Data Provider's input_types must be set" - assert self.generator is not None - - use_dynamic_order = False - if isinstance(self.slots, dict): # reorder input_types - self.slots = [self.slots[ipt] for ipt in self.input_order] - use_dynamic_order = True - - if len(self.slots) == 1: - self.generator = SingleSlotWrapper(self.generator) - - if use_dynamic_order: - self.generator = InputOrderWrapper(self.generator, - self.input_order) - else: - self.generator = CheckInputTypeWrapper( - self.generator, self.slots, self.logger) - if self.check: - self.generator = CheckWrapper(self.generator, self.slots, - check_fail_continue, - self.logger) - - return DataProvider - - return __wrapper__ - - -def deserialize_args(args): - """ - Internal use only. - :param args: - :return: - """ - return cPickle.loads(args) diff --git a/python/paddle/trainer/PyDataProviderWrapper.py b/python/paddle/trainer/PyDataProviderWrapper.py deleted file mode 100644 index 374976db9f17ad9b1fd33c5d4adf77155336d100..0000000000000000000000000000000000000000 --- a/python/paddle/trainer/PyDataProviderWrapper.py +++ /dev/null @@ -1,749 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This module provide a wrapper(decorator) to wrap a data process method into a -PyDataProvider. Some examples are shown `here `_. -""" - -import struct -import array -import random -import gc -import logging -import pstats -import sys -import numpy -import functools - -__all__ = [ - 'DenseSlot', 'SlotType', 'SparseNonValueSlot', 'StringSlot', - 'SparseValueSlot', 'IndexSlot', 'PoolSize', 'GeneralPyDataProvider', - 'provider', 'init_hook_wrapper' -] - -try: # Just for profile mode, will try to import cProfile first. - # Most python will contains cProfile, cProfile/profile are basically same. - # ref: https://docs.python.org/2/library/profile.html#introduction-to-the-profilers - import cProfile as profile -except ImportError: - import profile - -try: - import cPickle as pickle -except ImportError: - import six.moves.cPickle as pickle - -import io - - -class SlotType(object): # Just a hint for user. - pass - - -class DenseSlot(SlotType): - """ - Dense Slot Type: Each item is the value of a Dense Vector. - - Its yield format for :code:`provider` is: - - - **NonSeq**: [float, float, ... ] - - **Seq**: [[float, float, ...], [float, float ....], ... ] - - **SubSeq**: [[[float, float, ...], [float ....], ...] , \ - [[float, float, ...], [float ....], ...] , ...] - """ - - def __init__(self, dim): - """ - :param dim: slot dimension - :type dim: int - """ - self.dim = dim - self.type = 0 - - -class SparseNonValueSlot(SlotType): - """ - Sparse NonValue Slot Type: Each item is the id of a Sparse Vector. - - Its yield format for :code:`provider` is: - - - **NonSeq**: [int, int, ...] - - **Seq**: [[int, int, ...], [int, int, ...], ... ] - - **SubSeq**: [[[int, int, ...], [int, ....], ...] , \ - [[int, int, ...], [int, ....], ...] , ...] - """ - - def __init__(self, dim): - """ - :param dim: slot dimension - :type dim: int - """ - self.dim = dim - self.type = 1 - - -class SparseValueSlot(SlotType): - """ - Sparse Value Slot Type: Each item is the id and value of a Sparse Vector. - - Its yield format for :code:`provider` is: - - - **NonSeq**: [(int, float), (int, float), ... ] - - **Seq**: [[(int,float), (int, float), ... ], \ - [(int, float), (int, float), ...], ... ] - - **SubSeq**: [[[(int,float), ...], [(int, float), ....], ...] , \ - [[(int,float), ...], [(int, float), ....], ...] , ...] - """ - - def __init__(self, dim): - """ - :param dim: slot dimension. - :type dim: int - """ - self.dim = dim - self.type = 2 - - -class IndexSlot(SlotType): - """ - Index Value Slot Type: Each item is the id of Label. - - Its yield format for :code:`provider` is: - - - **NonSeq**: int - - **Seq**: [int, int, ....] - - **SubSeq**: [[int, int, ...], [int, int, ...], ... ] - """ - - def __init__(self, dim): - """ - :param dim: slot dimension - :type dim: int - """ - self.dim = dim - self.type = 3 - - -class StringSlot(SlotType): - """ - String Value Slot Type: Each item is a string for printout, \ - can be used in DataLayer too. - - Its yield format for :code:`provider` is: - - - **NonSeq**: string - - **Seq**: [string, string, ....] - - **SubSeq**: [[string, string, ...], [string, string, ...], ... ] - """ - - def __init__(self, dim): - """ - :param dim: slot dimension - :type dim: string - """ - self.dim = dim - self.type = 6 - - -class SparseNonValueHandler(object): - """ - Private Class, Use for converting python object to paddle string. - """ - - def __init__(self): - self.offsets = [] - self.value = [] - self.offset_count = 0 - - def __call__(self, ele): - """ - It will be invoked when scan each sparse data. - - :param ele: list of sparse data, maybe non-value [ idx, ... ] or value. - [ (idx, val), ... ] - :type ele: list - """ - self.offsets.append(self.offset_count) - self.offset_count += len(ele) - self.processElement(ele) - - def processElement(self, ele): - """ - Process for element list. See __call__ for more document. - """ - self.value += ele - - def done(self, data_stream, int_packer): - """ - Dump data to stream. - :param data_stream: Output Stream. - :param int_packer: A struct.Struct("i") object - """ - data_stream.write(array.array("i", self.offsets).tostring()) - data_stream.write(int_packer.pack(self.offset_count)) - data_stream.write(array.array("i", self.value).tostring()) - - -class SparseValueHandler(SparseNonValueHandler): - """ - Private class, use for converting python obj to paddle string. - """ - - def __init__(self): - SparseNonValueHandler.__init__(self) - self.weight = [] - - def processElement(self, ele): - for idx, w in ele: - self.value.append(idx) - self.weight.append(w) - - def done(self, data_stream, int_packer): - SparseNonValueHandler.done(self, data_stream, int_packer) - data_stream.write(int_packer.pack(self.offset_count)) - data_stream.write(array.array("f", self.weight).tostring()) - - -class StringHandler(object): - """ - Private Class, Use for converting python object to paddle string. - """ - - def __init__(self, data_stream, int_packer): - self.data_stream = data_stream - self.int_packer = int_packer - - def __call__(self, ele): - """ - It will be invoked when scan each string data. - :param ele: string data - :type ele: str - """ - self.data_stream.write(self.int_packer.pack(len(ele))) - self.data_stream.write(array.array("c", ele).tostring()) - - -class GeneralPyDataProvider: - def __init__(self, *file_list, **kwargs): - """ - :param file_list: input file_list - """ - del kwargs # unused - gc.disable() - assert isinstance(self.logger, logging.Logger) - self.use_seq_flag = hasattr(self, "use_seq_flag") and self.use_seq_flag - self.slots_num = len(self.getSlots()) - self.file_list = list(file_list) - self.generators = map(self.generateData, self.file_list) - self.int_packer = struct.Struct("i") - self.head_packer = struct.Struct("ii") - self.float_packer = struct.Struct("f") - self.shuffler = lambda *args, **kwargs: None - self.data_pool = [] - self.has_subseq = [] - self.has_checked = False - - self.debug = hasattr(self, "debug") and self.debug - - if hasattr(self, "profile_filename") and isinstance( - self.profile_filename, str): - self.profile_count = 0 - self.is_profile = True - else: - self.is_profile = False - - if not hasattr(self, "file_count") or not isinstance(self.file_count, - int): - self.file_count = sys.maxint - - if not hasattr(self, "can_over_batch_size"): - self.can_over_batch_size = True - elif not self.can_over_batch_size: - self.logger.warn( - "User should ensure every data size is not larger than batch" - " size when can_over_batch_size = False") - - self.data_pool_idx = 0 - - def reset(self): - """Reset all data in provider.""" - - self.logger.debug("reset dataprovider.") - self.generators = map(self.generateData, self.file_list) - self.shuffler = lambda *args, **kwargs: None - self.data_pool = [] - self.data_pool_idx = 0 - if self.file_count != 0: - self.max_pool_size = 0 - - # When use Profile, each pass will print a profile result. - if self.is_profile: - if hasattr(self, "profiler") and isinstance(self.profiler, - profile.Profile): - self.profiler.disable() - fn = "%s_%d" % (self.profile_filename, self.profile_count) - sortby = "cumulative" - with open(fn, "w") as f: - pstats.Stats( - self.profiler, - stream=f).sort_stats(sortby).print_stats() - self.logger.info("saving profile to file %s" % fn) - self.profile_count += 1 - self.logger.info("resetting profile") - self.profiler = profile.Profile() - self.profiler.enable() - - def shuffle(self): - """ shuffle data""" - if not self.should_shuffle: - return - else: - self.logger.debug("shuffling data.") - random.shuffle(self.generators) - self.shuffler = random.shuffle - - def getSlots(self): - """ - :return : return a list of SlotType - :rtype: list - """ - return [] - - def generateData(self, fn): - """ - :param fn: file name - :return: a generator to yield data one by one. - """ - raise NotImplementedError - - def calculateDataBatchSize(self, data): - """ - :param data: One sample which yield by generateData - :type data: list - :return: The batch size that the data contribute. - :rtype: int - """ - return 1 - - def getHeader(self): - """return paddle header format""" - ret = self.head_packer.pack(self.slots_num, self.use_seq_flag) - for obj in self.getSlots(): - ret += self.head_packer.pack(obj.type, obj.dim) - return ret - - def getHeaderNative(self): - return self.use_seq_flag, self.getSlots() - - def getNextBatchNative(self, batch_size): - ret_list = [] - self.__prepareData(batch_size, ret_list) - return ret_list - - def getNextBatch(self, batch_size): - """ - :param batch_size: the batch_size approximately return. - :return: return paddle pyDataProvider format, just see documents. - :rtype: str - - NOTE: If can_over_batch_size is True, the return batch_size >= input batch_size. - Otherwise, the return batch_size < input batch_size, BUT USER MUST ENSURE THAT each data's batch size - is less than input batch_size. - """ - ret_list = [] - current_batch_size = self.__prepareData(batch_size, ret_list) - # create unified format for ret_list with differnt slots_num - if self.slots_num == 1: - ret_list = [ret_list] - - if current_batch_size == 0: - return self.int_packer.pack(current_batch_size) - data_bytes = io.BytesIO() - seq_bytes = io.BytesIO() - subseq_bytes = io.BytesIO() - data_stream = io.BufferedWriter(data_bytes) - seq_stream = io.BufferedWriter(seq_bytes) - subseq_stream = io.BufferedWriter(subseq_bytes) - - def convertDataImpl(idx, data_callback): - """ - This method will handle sequence in return data. invoke data_callback one by one. - :param idx: the slot index. - :param data_callback: a callback, which type is (each sample) => None. - """ - indices = 0 - slot_sample_num = len(ret_list) - if self.use_seq_flag: - slot_sample_num = 0 - if self.has_subseq[idx]: # has sub-sequence - slot_subseq_num = 0 - for dat in ret_list: - dat = dat[idx] - slot_subseq_num += len(dat) - for sub_dat in dat: - slot_sample_num += len(sub_dat) - subseq_stream.write(self.int_packer.pack(slot_subseq_num)) - else: - for dat in ret_list: - dat = dat[idx] - slot_sample_num += len(dat) - seq_stream.write(self.int_packer.pack(len(ret_list))) - data_stream.write(self.int_packer.pack(slot_sample_num)) - - for dat in ret_list: - dat = dat[idx] - if self.use_seq_flag: - seq_stream.write(self.int_packer.pack(indices)) - if self.has_subseq[idx]: # has sub-sequence - for sub_dat in dat: - writeDataStream(sub_dat, data_callback) - subseq_stream.write(self.int_packer.pack(indices)) - indices += len(sub_dat) - else: - writeDataStream(dat, data_callback) - indices += len(dat) - else: - writeDataStream(dat, data_callback) - - def writeDataStream(dat, data_callback): - if self.use_seq_flag > 0: - if data_callback is None: # Special for index slot - data_stream.write(array.array("i", dat).tostring()) - else: - for ele in dat: - data_callback(ele) - else: - if data_callback is None: # Special for index slot - data_stream.write(self.int_packer.pack(dat)) - else: - data_callback(dat) - - try: - for i in range(self.slots_num): - slot = self.getSlots()[i] - # According to the data_type, each slot data will be converted to binary - if isinstance(slot, DenseSlot): - convertDataImpl(i, lambda e: data_stream.write( - array.array("f", e).tostring())) - elif isinstance(slot, SparseNonValueSlot): - handler = SparseNonValueHandler() - convertDataImpl(i, handler) - handler.done(data_stream, self.int_packer) - elif isinstance(slot, SparseValueSlot): - handler = SparseValueHandler() - convertDataImpl(i, handler) - handler.done(data_stream, self.int_packer) - elif isinstance(slot, IndexSlot): - convertDataImpl(i, None) - elif isinstance(slot, StringSlot): - handler = StringHandler(data_stream, self.int_packer) - convertDataImpl(i, handler) - else: - raise RuntimeError("The data_type must be 0/1/2/3/6") - data_stream.flush() - seq_stream.flush() - subseq_stream.flush() - - return "".join([ - self.int_packer.pack(current_batch_size), data_bytes.getvalue(), - seq_bytes.getvalue(), subseq_bytes.getvalue() - ]) - - finally: - data_stream.close() - seq_stream.close() - subseq_stream.close() - data_bytes.close() - seq_bytes.close() - subseq_bytes.close() - - def hasSubseq(self, ret_list): - # create unified format for ret_list with differnt slots_num - if self.slots_num == 1: - ret_list = [ret_list] - # decide whether slot has sub-sequence using its first sample - for i in range(self.slots_num): - slot = self.getSlots()[i] - dat = ret_list[0][i][0] - if isinstance(slot, IndexSlot) or isinstance(slot, StringSlot): - if isinstance(dat, list) or isinstance(dat, numpy.ndarray): - self.has_subseq.append(1) # has_subseq = True - continue - elif isinstance(dat[0], list) or isinstance(dat[0], numpy.ndarray): - self.has_subseq.append(1) # has_subseq = True - continue - self.has_subseq.append(0) # has_subseq = False - - def checkOrder(self): - first_noSubseq_slot = self.slots_num - last_subseq_slot = -1 - for i in range(self.slots_num): - if not self.has_subseq[i]: - first_noSubseq_slot = i - break - for i in range(self.slots_num): - if self.has_subseq[i]: - last_subseq_slot = i - if first_noSubseq_slot < last_subseq_slot: - raise RuntimeError( - "slot hasSubseq must put before than slot without subseq") - self.has_checked = True - - def __prepareData(self, batch_size, ret_list): - current_batch_size = 0 - could_exit = False - while not could_exit: - if len(self.data_pool) == 0: - self.data_pool_idx = 0 - self.fillPool() - if len(self.data_pool) != 0: - for idx in xrange(self.data_pool_idx, len(self.data_pool)): - current_batch_size += self.calculateDataBatchSize( - self.data_pool[idx]) - if current_batch_size >= batch_size: - could_exit = True - break - if current_batch_size > batch_size and not self.can_over_batch_size: # if cannot over batch size - current_batch_size -= self.calculateDataBatchSize( - self.data_pool[idx]) - idx -= 1 - - ret_list += self.data_pool[self.data_pool_idx:idx + 1] - - # for speed reason, just shift left index, not delete data actually. - self.data_pool_idx = idx + 1 - - if self.data_pool_idx == len(self.data_pool): - self.data_pool = [] - else: - break - if self.use_seq_flag and not self.has_checked: # compute self.has_subseq and checkOrder only at first time - self.hasSubseq(ret_list) - self.checkOrder() - return current_batch_size - - def fillPool(self): - """ - Fill the pool to max_pool_size. If max_pool_size is None, then read file_count to pool. - """ - if self.max_pool_size == 0: - for i in xrange(min(self.file_count, len(self.generators))): - self.data_pool += list(self.generators[i]) - self.generators = self.generators[min(self.file_count, - len(self.generators)):] - self.max_pool_size = len(self.data_pool) - else: - while len(self.data_pool) < self.max_pool_size and len( - self.generators) != 0: - try: - self.data_pool.append(self.generators[0].next()) - except StopIteration: - self.generators.pop(0) - self.shuffler(self.data_pool) - - -class PoolSize(object): - """Max number of sample which contains in provider.""" - - def __init__(self, pool_size): - self.size = pool_size - - -def default_init_hook(cls, *args, **kwargs): - """ default hook, do nothing """ - del cls, args, kwargs - - -def provider(slots=None, - use_seq=False, - should_shuffle=True, - pool_size=1, - can_over_batch_size=True, - calc_batch_size=lambda data: 1, - debug=False, - init_hook=default_init_hook, - profile_filename=None): - """ - The decorator for PyDataProvider. User should use this to create Provider class. - User should only concern how to read sample from file. - - So the basic usage is: - - .. code-block:: python - - @provider(some data provider config here...) - def process(obj, file_name): - while not at end of file_name: - sample = readOneSampleFromFile(file_name) - yield sample. - - The configuration of data provider should be setup by: - - :param init_hook: A callback will be invoked when PyDataProvider instance \ - created. The parameter is (obj, \*args, \*\*kwargs). - - - **obj**: actually data provider instance, which \ - contains some global objects in obj.xxxxx, \ - and is used by process function. - - 1. **obj.slots**: a list of SlotType Object. Can be \ - set in init. For example, obj.slots = \ - [DenseSlot(9), IndexSlot(2)]. - 2. **obj.logger**: a logger object. User can invoke \ - obj.logger.info(), obj.logger.fatal(), etc. - - - **args** and **kwargs**: the data provider __init__ \ - parameters. For example, load_data_args \ - will be found in \*\*kwargs, \ - and if you want to recieve \ - it from trainer_config, \ - recommand to use init_hook_wrapper - :type init_hook: callable - - :param pool_size: - - **int**: it will read at most pool_size files to memory. - - **PoolSize**: it will read at most PoolSize.size samples to memory. - - If not set, it will read all the files to memory. - :type pool_size: int | PoolSize - - :param slots: Specify the SlotTypes, can also be set in init_hook. It has two formats: - - - A list of SlotType objects. For example, slots = \ - [DenseSlot(9), IndexSlot(2)]. - - A method return a list of SlotTypes, and the parameter of \ - method is (obj, \*file_list, \*\*kwargs). - :type slots: list | callable - - :param use_seq: False if use no sequence (Default). True if use sequence: - - - If sequence has **no sub-sequence**: Each slot will \ - return a list of data. This list is one sequence. \ - So the return format likes \ - [[a0, a1, a2], [b1, b2, b3, b4], [c1]]. - - If sequence has **sub-sequence**: Each slot will return \ - a nested-list of data. This list contains several \ - sub-lists, each sub-list is one sub-sequence. \ - So the return format likes \ - [[[a0, a1, a2], [a4, a5]], [[b1, b2, b3, b4], [b5, b6]], [[c1], [c2]]]. - :type use_seq: bool - - :param should_shuffle: True if data should shuffle. - :type should_shuffle: bool - - :param calc_batch_size: The method calculate each data's batch size. - - - Default is the batch size of one sample. - - User can customize by **lamda** funtion. For example, \ - :code:`calc_batch_size = lambda data : len(data)` \ - means calculating the token number of a sequence data. - :type calc_batch_size: callable - - :param can_over_batch_size: Whether :code:`actual batch size >= input batch size` - - - **True** (>=): getNextBatch method can return more data (Default). - - **False** (<): user must ensure that each data's batch size < input batch size. - :type can_over_batch_size: bool - - :param debug: True if enable debug logger and some debug check. Default is False. - :type debug: bool - - :param profile_filename: None if disable profile (Default). Otherwise, \ - the data provider will dump profile result when \ - reset. And the dump filename is \ - **_**. - :type profile_filename: None | Str - """ - - def _wrapper(handler): - class Cls(GeneralPyDataProvider): - """ Real PyDataProvider Class. """ - - def __init__(self, *file_list, **kwargs): - logging.basicConfig( - format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]" - " %(message)s") - - self.logger = logging.getLogger("") - if debug: - self.logger.setLevel(logging.DEBUG) - self.logger.debug("Running pydataprovider in debug mode.") - else: - self.logger.setLevel(logging.INFO) - - init_hook(self, *file_list, **kwargs) - if callable(slots): - self.slots = slots(self, *file_list, **kwargs) - elif slots is not None: - self.slots = slots - - if isinstance(pool_size, int): - self.max_pool_size = 0 - self.file_count = pool_size - elif isinstance(pool_size, PoolSize): - self.max_pool_size = pool_size.size - self.file_count = 0 - else: - raise RuntimeError - self.can_over_batch_size = can_over_batch_size - self.debug = debug - self.profile_filename = profile_filename - self.use_seq_flag = use_seq - self.should_shuffle = should_shuffle - GeneralPyDataProvider.__init__(self, *file_list, **kwargs) - - def getSlots(self): - return self.slots - - def generateData(self, f): - return handler(self, f) - - def calculateDataBatchSize(self, data): - return calc_batch_size(data) - - return Cls - - return _wrapper - - -def init_hook_wrapper(func): - """ - Wrap a method for PyDataProviderWrapper's init_hook. This method can - receive parameter from trainer_config's load_data_args. The load_data_args - must pass a pickle.dumps() value, and dump a map as keyword args. The - wrapped method :code:`func` will receive them as keyword args. - - So an example usage is: - - .. code-block:: python - - @init_hook_wrapper - def hook(obj, dictionary, file_list, **kwargs): - obj.dictionary = dictionary - obj.slots = [IndexSlot(len(obj.dictionary)), - IndexSlot(len(open(file_list[0], "r").readlines()))] - - :param func: init_hook function - :type func: callable - :return: wrapped method, can be passed into @provider. - """ - - @functools.wraps(func) - def wrapper(obj, *file_list, **kwargs): - args = kwargs.get("load_data_args", dict()) - if isinstance(args, basestring): - args = pickle.loads(args) - args['file_list'] = file_list - func(obj=obj, **args) - - return wrapper diff --git a/python/paddle/trainer/__init__.py b/python/paddle/trainer/__init__.py deleted file mode 100644 index f662d6826321eb840739382558f76327d27b5847..0000000000000000000000000000000000000000 --- a/python/paddle/trainer/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py deleted file mode 100644 index 5b90facd49d655f56c037e087d86e41372cbfdb9..0000000000000000000000000000000000000000 --- a/python/paddle/trainer/config_parser.py +++ /dev/null @@ -1,4447 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -''' -The following functions are available in the config file: - -Bias: define bias. To be used as value of bias argument in Layer(). - -Data: define data provider. - -Input: define input layer for a layer. To be used as element of inputs argument - in Layer(). - -Conv: define a convolution operation for an input of a layer. - -Norm: define a normalization operation for an input of a layer. - -Pool: define a pooling operation for an input of a layer. - -Layer: define a layer. - -Parameter: define a parameter. - -Import: import another config file. If the imported config file name is - a relative path, then it will be searched under the directory of the - current config file. - -Inputs(layer_names...): - Define the name of the input layers of the NeuralNetwork. - The type of these layers must be "data". - These layers will be provided with the DataBatch obtained - from DataProvider. The data streams from DataProvider must - have the same order. - -Outputs(layer_names...): - Define the name of the output layers of the NeuralNetwork. - Usually the output is simply the cost layer. - You can specify other layers as outputs and calculate the - cost (and its derivative) yourself. - - -default_initial_std(val) -default_initial_mean(val) -default_momentum(val): -default_decay_rate(val): Set the default value for these parameters - - -get_config_arg(name, type, default): Get the value for a config parameter. - - -*** customized extension to config_parser *** -The functionality of the config_parser can be extended. -If the config_arg_str for parse_config() contains -extension_module_name=[MODULE_NAME], then config_parser will call -MODULE_NAME.get_config_funcs(g_config) -MODULE_NAME.get_config_funcs() should return a dictionary of name to functions, -those functions will be available in the config file. -See legacy/trainer/tests/config_parser_test.py for example - -To use this from paddle_trainer, paddle_trainer should be called with ---config_args=extension_module_name=[MODULE_NAME] - -''' -import copy -import logging -import os -import sys -import traceback -import math -import shutil - -try: - from paddle.proto.DataConfig_pb2 import DataConfig - from paddle.proto.ModelConfig_pb2 import ModelConfig - from paddle.proto.ModelConfig_pb2 import LayerConfig - from paddle.proto.ModelConfig_pb2 import LayerInputConfig - from paddle.proto.ModelConfig_pb2 import ProjectionConfig - from paddle.proto.ModelConfig_pb2 import OperatorConfig - from paddle.proto.ModelConfig_pb2 import GeneratorConfig - from paddle.proto.ModelConfig_pb2 import LinkConfig - from paddle.proto.ParameterConfig_pb2 import ParameterConfig - from paddle.proto.ParameterConfig_pb2 import ParameterUpdaterHookConfig - from paddle.proto.TrainerConfig_pb2 import TrainerConfig - -except Exception as e: - traceback.print_exc() - raise - -logging.basicConfig( - format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', ) -logger = logging.getLogger('paddle') -logger.setLevel(logging.INFO) -__real_print__ = print -print = logger.info - -# from layer type name to layer class -g_layer_type_map = {} - - -# Initialize global variables. We use this function so that we can -# call parse_config() multiple times -def init_config_environment( - g_default_momentum=None, - g_default_decay_rate=None, - g_default_initial_mean=0., - g_default_initial_std=0.01, - g_default_num_batches_regularization=None, - g_default_initial_strategy=0, - g_default_initial_smart=False, - g_default_gradient_clipping_threshold=None, - g_default_device=None, - g_default_update_hooks=None, - g_default_compact_func=None, - g_config=TrainerConfig(), - g_layer_map={}, - g_parameter_map={}, - g_parameter_initializer_map={}, - g_extended_config_funcs={}, - - # store command args of paddle_trainer - g_command_config_args={}, - - # Used for PyDataProvider to avoid duplicate module name - g_py_module_name_list=[], - g_current_submodel=None, - g_root_submodel=None, - g_submodel_map={}, - g_submodel_stack=[], - g_add_submodel_suffix=False, ): - - # directly iterate through locals().iteritems() will change - # the size of locals() due to introducing k, v into scope - # which will break the process in some env - - local_vars = copy.deepcopy(locals()) - for k, v in local_vars.iteritems(): - globals()[k] = v - - -# Because type is widely used as a variable name in this code. -# we need a different function name for the builtin type() -def type_of(x): - return type(x) - - -# Check a condition derived config file -def config_assert(b, msg): - if not b: - logger.fatal(msg) - - -g_config_funcs = {} - - -# decorator for indicating a function which can be used in config file -def config_func(func): - g_config_funcs[func.func_name] = func - return func - - -# decorator for indicating a class which can be used in config file -def config_class(cls): - g_config_funcs[cls.__name__] = cls - return cls - - -# decorator for indicating a class for a layer type -def config_layer(layer_type): - def wrap(cls): - g_config_funcs[cls.__name__] = cls - g_layer_type_map[layer_type] = cls - return cls - - return wrap - - -def gen_parameter_name(layer_name, input_index): - return '_%s.w%d' % (layer_name, input_index) - - -def gen_bias_parameter_name(layer_name): - return '_%s.wbias' % layer_name - - -def default(x, default_value): - return default_value if x is None else x - - -class Cfg(object): - def add_keys(self, locals): - for k, v in locals.iteritems(): - if not k.startswith('_'): - self.__setattr__(k, v) - - -# functions available in config file - - -# Define the name of the input layers of the NeuralNetwork. -# The type of these layers must be "data". -# These layers will be provided with the DataBatch obtained -# from DataProvider. The data streams from DataProvider must -# have the same order. -@config_func -def Inputs(*args): - for name in args: - name = MakeLayerNameInSubmodel(name) - global g_current_submodel, g_root_submodel - if g_current_submodel.is_recurrent_layer_group: - config_assert(False, "Do not set Inputs in recurrent layer group") - else: - g_current_submodel.input_layer_names.append(name) - - if g_current_submodel is g_root_submodel: - g_config.model_config.input_layer_names.append(name) - - -@config_func -def HasInputsSet(): - return len(g_current_submodel.input_layer_names) != 0 - - -# Define the name of the output layers of the NeuralNetwork. -# Usually the output is simply the cost layer. -# You can specify other layers as outputs and calculate the -# cost (and its derivative) yourself. -@config_func -def Outputs(*args): - for name in args: - name = MakeLayerNameInSubmodel(name) - global g_current_submodel, g_root_submodel - if g_current_submodel.is_recurrent_layer_group: - config_assert(False, "Do not set Outputs in recurrent layer group") - else: - g_current_submodel.output_layer_names.append(name) - - if g_current_submodel is g_root_submodel: - g_config.model_config.output_layer_names.append(name) - - -@config_func -def SubModelBegin(name): - global g_current_submodel, g_root_submodel, g_submodel_stack - g_submodel_stack.append(g_current_submodel) - - name = MakeLayerNameInParentSubmodel(name) #rename in nested submodel - - config_assert(name not in g_submodel_map, - 'Duplicated submodel name: %s' % name) - - sub_model = g_config.model_config.sub_models.add() - sub_model.name = name - g_submodel_map[name] = sub_model - g_current_submodel = sub_model - - -@config_func -def SubModelEnd(name=None): - global g_current_submodel, g_root_submodel, g_submodel_stack - config_assert(g_current_submodel is not g_root_submodel, - "submodel not begin") - if name is not None: - config_assert( - g_current_submodel.name == MakeLayerNameInParentSubmodel(name), - "submodel name error") - - g_current_submodel = g_submodel_stack.pop() - - -def MakeLayerNameInParentSubmodel(name): - suffix = "" - if len(g_submodel_stack) > 1: - suffix = "@" + g_submodel_stack[-1].name - return name + suffix - - -def GetLayerBaseName(name): - return name.split('@')[0] - - -def MakeLayerNameInSubmodel(name, submodel_name=None): - global g_current_submodel - global g_add_submodel_suffix - if (submodel_name is None and not g_add_submodel_suffix and - not g_current_submodel.is_recurrent_layer_group): - return name - if submodel_name is None: - submodel_name = g_current_submodel.name - return name + "@" + submodel_name - - -# Define a recurrent layer group begin with RecurrentLayerGroupBegin -# and end with RecurrentLayerGroupEnd. -# A recurrent layer group forward/backward one frame after previous frame -# forward/backward through all layers in layer group. -# in_links are names of layer used as input layer in the layer group. -# out_links are names of layer in layer group used as outside layer's input. -# -# If generator is set, the layer group need one or more than one outlinks. -# The first outlink should always be the generated token ids. -# If generator.num_results_per_sample is not set, the output for one sample is -# a ids sequence. Else if num_results_per_sample is more than one, -# the output for one sample is up to #num_results_per_sample generated -# sequences, which are packed in one sequence in output ids vector. Each -# generated sequence has a generation probability. The probabilities for one -# sample are stored in one row of output value matrix. -# Packed generated sequences format, for each i: -# seq_i_length: one interger, seq_i content length, -# [seq_i content], length = seq_i_length -# seq_i_end_mark: one interger, for format check, always -1 -# You can use "seq_text_printer" to print the output of the generator. -@config_func -def RecurrentLayerGroupWithoutOutLinksBegin(name, - in_links, - seq_reversed=False, - target_inlinkname=""): - global g_current_submodel - config_assert(g_config.model_config.type == "recurrent_nn", - "RecurrentLayerGroup should be used only in recurrent_nn") - RecurrentLayerGroup(name=name) # add to father model - SubModelBegin(name) - g_current_submodel.is_recurrent_layer_group = True - g_current_submodel.reversed = seq_reversed - in_links_count = 0 - for linkid, link in enumerate(in_links): - if isinstance(link, basestring): - name = link - else: - name = link.link_name - - in_links_count += 1 - layer_name = MakeLayerNameInParentSubmodel(name) - layer = g_layer_map[layer_name] - ScatterAgentLayer( - name=name, size=layer.size, width=layer.width, height=layer.height) - - pair = g_current_submodel.in_links.add() - pair.layer_name = layer_name - pair.link_name = MakeLayerNameInSubmodel(name) - - -@config_func -def RecurrentLayerGroupSetOutLink(link): - if isinstance(link, basestring): - name = link - else: - name = link.link_name - layer_name = MakeLayerNameInParentSubmodel(name) - pair = g_current_submodel.out_links.add() - pair.layer_name = MakeLayerNameInSubmodel(name) - pair.link_name = layer_name - - -def RecurrentLayerGroupSetGenerator(generator=None): - generator.eos_layer_name = MakeLayerNameInSubmodel(generator.eos_layer_name) - g_current_submodel.generator.CopyFrom(generator) - - -@config_func -def RecurrentLayerGroupBegin(name, - in_links, - out_links, - generator=None, - target_inlinkname="", - seq_reversed=False): - RecurrentLayerGroupWithoutOutLinksBegin(name, in_links, seq_reversed) - for link in out_links: - RecurrentLayerGroupSetOutLink(link) - - if generator is not None: - RecurrentLayerGroupSetGenerator(generator) - config_assert( - len(in_links) == 0, "no in_links should be passed to generator") - config_assert( - len(out_links) >= 1, - "one or more than one out_links should be passed to generator") - - -@config_func -def RecurrentLayerGroupEnd(name): - global g_current_submodel - config_assert(g_current_submodel.is_recurrent_layer_group, - "RecurrentLayerGroup not begin") - for pair in g_current_submodel.memories: #check exist - layer = g_layer_map[pair.layer_name] - config_assert(layer is not None, - "memory declare wrong name:%s" % pair.layer_name) - memory_link = g_layer_map[pair.link_name] - config_assert(layer.size == memory_link.size, - "memory declare wrong size:%d" % memory_link.size) - - prev_submodel = g_current_submodel - SubModelEnd(name) - - for pair in prev_submodel.out_links: - layer = g_layer_map[pair.layer_name] - # add out agent to father model - agent_name = GetLayerBaseName(pair.link_name) - if prev_submodel.HasField("generator"): - DataLayer(name=agent_name, size=layer.size) - else: - GatherAgentLayer(name=agent_name, size=layer.size) - - -# Define the model type -# currently, the paddle supports "nn", "recurrent_nn", "recursive_nn" and "multi_nn" -@config_func -def model_type(name): - g_config.model_config.type = name - - -@config_class -class Bias(Cfg): - def __init__(self, - parameter_name=None, - learning_rate=None, - momentum=None, - decay_rate=None, - decay_rate_l1=None, - initial_mean=None, - initial_std=None, - initial_strategy=None, - initial_smart=None, - num_batches_regularization=None, - sparse_remote_update=None, - gradient_clipping_threshold=None, - is_static=None, - is_shared=None, - initializer=None): - self.add_keys(locals()) - - -# Define one input for a layer -@config_class -class Input(Cfg): - def __init__( - self, - input_layer_name, - parameter_name=None, - initializer=None, - learning_rate=None, - momentum=None, - decay_rate=None, - decay_rate_l1=None, - initial_mean=None, - initial_std=None, - initial_strategy=None, - initial_smart=None, - num_batches_regularization=None, - sparse_remote_update=None, - sparse_update=None, - gradient_clipping_threshold=None, - conv=None, - bilinear_interp=None, - norm=None, - pool=None, - image=None, - block_expand=None, - maxout=None, - spp=None, - pad=None, - upsample=None, - format=None, - nnz=None, - is_static=None, - is_shared=None, - update_hooks=None, - input_layer_argument=None, - make_layer_name_in_submodel=True, ): - """ - @param make_layer_name_in_submodel True by defalut, you might need to - set it carefully when adding Input in config_parser.py. - """ - self.add_keys(locals()) - self.input_layer_name = MakeLayerNameInSubmodel( - input_layer_name - ) if make_layer_name_in_submodel else input_layer_name - - -# Define a projection for iexed layer -@config_class -class Projection(Input): - type = None # subclass should set it correctly - - def __init__( - self, - input_layer_name, - size=0, # projection output size - parameter_name=None, - learning_rate=None, - momentum=None, - decay_rate=None, - decay_rate_l1=None, - initial_mean=None, - initial_std=None, - initial_strategy=None, - initial_smart=None, - initializer=None, - num_batches_regularization=None, - sparse_remote_update=None, - sparse_update=None, - gradient_clipping_threshold=None, - ptype=None, - format=None, - nnz=None, - is_static=None, - is_shared=None, - update_hooks=None, - input_layer_argument=None, ): - self.add_keys(locals()) - self.input_layer_name = MakeLayerNameInSubmodel(input_layer_name) - - self.proj_conf = ProjectionConfig() - if ptype is not None: - self.proj_conf.type = ptype - else: - self.proj_conf.type = self.type - - # calculate the output_size given input_size. return 0 - # to indicate using the size from Layer config - def calc_output_size(self, input_layer_config): - return self.size - - def calc_parameter_size(self, input_size, output_size): - raise NotimplementedError - - def calc_parameter_dims(self, input_size, output_size): - raise NotimplementedError - - -@config_class -class IdentityProjection(Projection): - type = 'identity' - - def calc_output_size(self, input_layer_config): - return input_layer_config.size - - def calc_parameter_size(self, input_size, output_size): - return 0 - - def calc_parameter_dims(self, input_size, output_size): - return [] - - -# Like IdentityProjection, but layer size may smaller than input size, -# the projection select dimesions [offset, offset+layer_size) from input -@config_class -class IdentityOffsetProjection(Projection): - type = 'identity_offset' - - def __init__(self, input_layer_name, offset, **xargs): - super(IdentityOffsetProjection, self).__init__(input_layer_name, - **xargs) - self.proj_conf.offset = offset - - def calc_output_size(self, input_layer_config): - return 0 # depends on the outside MixedLayer - - def calc_parameter_size(self, input_size, output_size): - return 0 - - def calc_parameter_dims(self, input_size, output_size): - return [] - - -@config_class -class SliceProjection(Projection): - type = 'slice' - - def __init__(self, input_layer_name, slices, **xargs): - super(SliceProjection, self).__init__(input_layer_name, **xargs) - input = g_layer_map[input_layer_name] - if input.type in ["exconv", "cudnn_conv"]: - # the slice operator is for the channel dimension - assert input.num_filters is not None - channels = input.num_filters - image_size = input.size / channels - assert slices[len(slices) - 1][1] <= channels - for i in xrange(len(slices)): - slice = self.proj_conf.slices.add() - slice.start = slices[i][0] * image_size - slice.end = slices[i][1] * image_size - self.size += slice.end - slice.start - else: - config_assert(False, - 'Currently the input should be convolution layer') - - def calc_parameter_size(self, input_size, output_size): - return 0 - - def calc_parameter_dims(self, input_size, output_size): - return [] - - -# DotMulProjection performs element-wise multiplication with weight -@config_class -class DotMulProjection(Projection): - type = 'dot_mul' - - def calc_output_size(self, input_layer_config): - return input_layer_config.size - - def calc_parameter_size(self, input_size, output_size): - return output_size - - def calc_parameter_dims(self, input_size, output_size): - return [1, output_size] - - -# ScalingProjection -@config_class -class ScalingProjection(Projection): - type = 'scaling' - - def calc_output_size(self, input_layer_config): - return input_layer_config.size - - def calc_parameter_size(self, input_size, output_size): - return 1 - - def calc_parameter_dims(self, input_size, output_size): - return [1, 1] - - -@config_class -class TableProjection(Projection): - type = 'table' - - def calc_parameter_size(self, input_size, output_size): - return input_size * output_size - - def calc_parameter_dims(self, input_size, output_size): - return [input_size, output_size] - - -@config_class -class FullMatrixProjection(Projection): - type = 'fc' - - def calc_parameter_size(self, input_size, output_size): - return input_size * output_size - - def calc_parameter_dims(self, input_size, output_size): - return [input_size, output_size] - - -@config_class -class TransposedFullMatrixProjection(Projection): - type = 'trans_fc' - - def calc_parameter_size(self, input_size, output_size): - return input_size * output_size - - def calc_parameter_dims(self, input_size, output_size): - return [output_size, input_size] - - -@config_class -class ContextProjection(Projection): - type = 'context' - - def __init__(self, input_layer_name, context_start, context_length, - trainable_padding, **xargs): - super(ContextProjection, self).__init__(input_layer_name, **xargs) - self.proj_conf.context_start = context_start - self.proj_conf.context_length = context_length - self.proj_conf.trainable_padding = trainable_padding - self._total_pad = max(0, -self.proj_conf.context_start) \ - + max(0, self.proj_conf.context_start \ - + self.proj_conf.context_length - 1) - - def calc_output_size(self, input_layer_config): - return input_layer_config.size * self.proj_conf.context_length - - def calc_parameter_size(self, input_size, output_size): - if self.proj_conf.trainable_padding == False: - return 0 - else: - return input_size * self._total_pad - - def calc_parameter_dims(self, input_size, output_size): - return [self._total_pad, input_size] - - _total_pad = 0 - - -@config_class -class ConvBaseProjection(Projection): - def __init__(self, - input_layer_name, - num_filters=None, - conv_conf=None, - **xargs): - super(ConvBaseProjection, self).__init__(input_layer_name, **xargs) - - if num_filters is not None: - self.proj_conf.num_filters = num_filters - - def calc_output_size(self, input_layer_config): - return self.proj_conf.output_size - - def calc_parameter_size(self, input_size, output_size): - co = self.proj_conf.num_filters - ci = self.proj_conf.conv_conf.channels - fh = self.proj_conf.conv_conf.filter_size - fw = self.proj_conf.conv_conf.filter_size_y - gr = self.proj_conf.conv_conf.groups - return co * ci * fh * fw / gr - - def calc_bias_size(self): - return self.proj_conf.num_filters - - def calc_parameter_dims(self, input_size, output_size): - return None - - -@config_class -class ConvProjection(ConvBaseProjection): - type = 'conv' - - def __init__(self, - input_layer_name, - num_filters=None, - conv_conf=None, - **xargs): - super(ConvProjection, self).__init__(input_layer_name, num_filters, - conv_conf, **xargs) - - parse_conv(conv_conf, self.input_layer_name, self.proj_conf.conv_conf, - num_filters) - self.proj_conf.output_size = self.proj_conf.conv_conf.output_x * \ - self.proj_conf.conv_conf.output_y * \ - num_filters - - -@config_class -class ConvTransProjection(ConvBaseProjection): - type = 'convt' - - def __init__(self, - input_layer_name, - num_filters=None, - conv_conf=None, - **xargs): - super(ConvTransProjection, self).__init__(input_layer_name, num_filters, - conv_conf, **xargs) - - parse_conv( - conv_conf, - self.input_layer_name, - self.proj_conf.conv_conf, - num_filters, - trans=True) - self.proj_conf.output_size = self.proj_conf.conv_conf.img_size_y * \ - self.proj_conf.conv_conf.img_size * \ - num_filters - - -# Define a operator for mixed layer -@config_class -class Operator(Cfg): - type = None # subclass should set it correctly - - def __init__( - self, - input_layer_names, ): - self.add_keys(locals()) - self.operator_conf = OperatorConfig() - self.operator_conf.type = self.type - - def check_dims(self): - pass - - def calc_output_size(self, input_sizes): - return 0 - - -@config_class -class DotMulOperator(Operator): - type = 'dot_mul' - - def __init__(self, input_layer_names, scale=None, **xargs): - super(DotMulOperator, self).__init__(input_layer_names, **xargs) - if scale is not None: - self.operator_conf.dotmul_scale = scale - - config_assert(len(input_layer_names) == 2, "DotMul is binary operator") - - def check_dims(self): - for i in range(2): - config_assert(self.operator_conf.input_sizes[i] == - self.operator_conf.output_size, - "DotMul input_size != output_size") - - def calc_output_size(self, input_sizes): - return input_sizes[0] - - -@config_class -class ConvOperator(Operator): - type = 'conv' - - def __init__(self, - input_layer_names, - num_filters=None, - conv_conf=None, - **xargs): - super(ConvOperator, self).__init__(input_layer_names, **xargs) - if num_filters is not None: - self.operator_conf.num_filters = num_filters - - parse_conv(conv_conf, - MakeLayerNameInSubmodel(input_layer_names[0]), - self.operator_conf.conv_conf, num_filters) - self.operator_conf.output_size = self.operator_conf.conv_conf.output_x * \ - self.operator_conf.conv_conf.output_y * \ - num_filters - - config_assert(len(input_layer_names) == 2, "Conv is binary operator") - - def calc_output_size(self, input_sizes): - return self.operator_conf.output_size - - -@config_class -class ConvTransOperator(Operator): - type = 'convt' - - def __init__(self, - input_layer_names, - num_filters=None, - conv_conf=None, - **xargs): - super(ConvTransOperator, self).__init__(input_layer_names, **xargs) - if num_filters is not None: - self.operator_conf.num_filters = num_filters - - parse_conv( - conv_conf, - MakeLayerNameInSubmodel(input_layer_names[0]), - self.operator_conf.conv_conf, - num_filters, - trans=True) - self.operator_conf.output_size = \ - self.operator_conf.conv_conf.img_size * \ - self.operator_conf.conv_conf.img_size_y * \ - num_filters - - config_assert(len(input_layer_names) == 2, "Conv is binary operator") - - def calc_output_size(self, input_sizes): - return self.operator_conf.output_size - - -# please refer to the comments in proto/ModelConfig.proto -@config_class -class Conv(Cfg): - def __init__(self, - filter_size, - channels, - padding=None, - stride=None, - groups=None, - filter_channels=None, - output_x=None, - img_size=None, - caffe_mode=True, - filter_size_y=None, - padding_y=None, - stride_y=None, - dilation=None, - dilation_y=None): - self.add_keys(locals()) - if filter_size_y is None: - self.filter_size_y = filter_size - if padding_y is None: - self.padding_y = padding - if dilation_y is None: - self.dilation_y = dilation - if stride_y is None: - self.stride_y = stride - if output_x is not None: - config_assert(output_x <= 0) - - -# please refer to the comments in proto/ModelConfig.proto -@config_class -class Conv3D(Cfg): - def __init__(self, - filter_size, - channels, - padding=None, - stride=None, - groups=None, - filter_channels=None, - output_x=None, - img_size=None, - caffe_mode=True, - filter_size_y=None, - padding_y=None, - stride_y=None, - filter_size_z=None, - padding_z=None, - stride_z=None): - self.add_keys(locals()) - self.filter_size_y = filter_size_y if filter_size_y else filter_size - self.filter_size_z = filter_size_z if filter_size_z else filter_size - self.padding_y = padding_y if padding_y else padding - self.padding_z = padding_z if padding_z else padding - self.stride_y = stride_y if stride_y else stride - self.stride_z = stride_z if stride_z else stride - if output_x is not None: - config_assert(output_x <= 0) - - -@config_class -class BilinearInterp(Cfg): - def __init__(self, out_size_x=None, out_size_y=None, channels=None): - self.add_keys(locals()) - - -@config_class -class Pool(Cfg): - def __init__( - self, - pool_type, - channels, - size_x, - size_y=None, - start=None, - stride=None, # 1 by defalut in protobuf - stride_y=None, - padding=None, # 0 by defalut in protobuf - padding_y=None): - self.add_keys(locals()) - - -@config_class -class Pool3d(Cfg): - def __init__( - self, - pool_type, - channels, - size_x, - size_y=None, - size_z=None, - start=None, - stride=None, # 1 by defalut in protobuf - stride_y=None, - stride_z=None, - padding=None, # 0 by defalut in protobuf - padding_y=None, - padding_z=None): - self.add_keys(locals()) - self.filter_size_y = size_y if size_y else size_x - self.filter_size_z = size_z if size_z else size_x - self.padding_y = padding_y if padding_y else padding - self.padding_z = padding_z if padding_z else padding - self.stride_y = stride_y if stride_y else stride - self.stride_z = stride_z if stride_z else stride - - -@config_class -class SpatialPyramidPool(Cfg): - def __init__(self, pool_type, pyramid_height, channels): - self.add_keys(locals()) - - -@config_class -class Pad(Cfg): - def __init__(self, channels, pad_c, pad_h, pad_w): - self.add_keys(locals()) - - -@config_class -class Upsample(Cfg): - def __init__(self, scale, scale_y, pad_out_x, pad_out_y, upsample_size, - upsample_size_y): - self.add_keys(locals()) - - -@config_class -class Norm(Cfg): - def __init__(self, - norm_type, - channels, - size, - scale, - pow, - output_x=None, - img_size=None, - blocked=None): - self.add_keys(locals()) - - -@config_class -class Image(Cfg): - def __init__(self, channels, img_size=None): - self.add_keys(locals()) - - -@config_class -class BlockExpand(Cfg): - def __init__(self, - channels, - padding_x=0, - padding_y=0, - stride_x=0, - stride_y=0, - block_x=0, - block_y=0, - img_size_x=0, - img_size_y=0, - output_x=0, - output_y=0): - self.add_keys(locals()) - - -@config_class -class MaxOut(Cfg): - def __init__(self, channels, groups, img_size_x=0, img_size_y=0): - self.add_keys(locals()) - - -def create_data_config_proto(async_load_data=False, - constant_slots=None, - data_ratio=1, - is_main_data=True, - usage_ratio=None): - # default: all sub dataproviders are treat as "main data". - # see proto/DataConfig.proto for is_main_data - data_config = DataConfig() - - data_config.async_load_data = async_load_data - - if constant_slots: - data_config.constant_slots.extend(constant_slots) - data_config.data_ratio = data_ratio - data_config.is_main_data = is_main_data - - usage_ratio = default(usage_ratio, settings_deprecated["usage_ratio"]) - config_assert(usage_ratio >= 0 and usage_ratio <= 1, - "The range of usage_ratio is [0, 1]") - data_config.usage_ratio = usage_ratio - - return data_config - - -@config_func -def SimpleData(files=None, - feat_dim=None, - context_len=None, - buffer_capacity=None, - **xargs): - data_config = create_data_config_proto(**xargs) - data_config.type = 'simple' - data_config.files = files - data_config.feat_dim = feat_dim - if context_len is not None: - data_config.context_len = context_len - if buffer_capacity: - data_config.buffer_capacity = buffer_capacity - return data_config - - -@config_func -def PyData(files=None, - type=None, - file_group_queue_capacity=None, - load_data_module=None, - load_data_object=None, - load_data_args="", - load_file_count=None, - constant_slots=None, - load_thread_num=None, - **xargs): - data_config = create_data_config_proto(**xargs) - data_config.type = 'py' - if load_data_module in g_py_module_name_list: - - def get_path(module): - m = __import__(load_data_module) - return os.path.split(os.path.realpath(m.__file__))[0] - - # python C-api is not thread safe, one module can only be import once, - # so here we nedd to copy the module with different names if it has to be - # imported several times. - module_new_name = "%s_copy_%d" % (load_data_module, - len(g_py_module_name_list)) - g_py_module_name_list.append(module_new_name) - module_path = "%s/%s.py" % (get_path(load_data_module), - load_data_module) - new_module_path = "%s/%s.py" % (get_path(load_data_module), - module_new_name) - if os.path.isfile(module_path) == False: - raise Exception("File %s is not exist." % module_path) - shutil.copy2(module_path, new_module_path) - load_data_module = module_new_name - else: - g_py_module_name_list.append(load_data_module) - if load_data_module is not None and load_data_object is not None: - data_config.load_data_module = load_data_module - data_config.load_data_object = load_data_object - else: - raise ValueError('load_data_module, load_data_object is not defined.') - data_config.load_data_args = load_data_args - - data_config.files = files or '' - if file_group_queue_capacity is not None: - data_config.file_group_conf.queue_capacity = file_group_queue_capacity - if load_file_count is not None: - data_config.file_group_conf.load_file_count = load_file_count - if load_thread_num is not None: - data_config.file_group_conf.load_thread_num = load_thread_num - if constant_slots: - data_config.constant_slots.extend(constant_slots) - return data_config - - -#real data for training is actually provided by "sub_data" data providers. -@config_func -def MultiData(sub_data=[]): - data_config = DataConfig() - data_config.type = 'multi' - data_config.sub_data_configs.extend(sub_data) - return data_config - - -@config_func -def Data(type, - files=None, - feat_dim=None, - slot_dims=None, - context_len=None, - buffer_capacity=None, - **xargs): - - data_config = create_data_config_proto(**xargs) - data_config.type = type - data_config.files = files - data_config.feat_dim = feat_dim - data_config.slot_dims.extend(slot_dims) - if context_len is not None: - data_config.context_len = context_len - data_config.buffer_capacity = buffer_capacity - return data_config - - -@config_func -def TrainData(data_config, async_load_data=None): - config_assert(not g_config.HasField('data_config'), - 'Only one TrainData definition is allowed') - g_config.data_config.CopyFrom(data_config) - g_config.data_config.for_test = False - if async_load_data is not None: - logger.warning("Deprecated: async_load_data should be used inside" - " Data definition") - g_config.data_config.async_load_data = async_load_data - - -@config_func -def TestData(data_config, async_load_data=None): - config_assert(not g_config.HasField('test_data_config'), - 'Only one TestData definition is allowed') - g_config.test_data_config.CopyFrom(data_config) - g_config.test_data_config.for_test = True - if async_load_data is not None: - logger.warning("Deprecated: async_load_data should be used inside" - " Data definition") - g_config.test_data_config.async_load_data = async_load_data - - -#caffe_mode: compute the output size using floor instead of ceil, -# which is consistent of caffe and CuDNN's convention. -def cnn_output_size(img_size, - filter_size, - padding, - stride, - caffe_mode, - dilation=1): - filter_s = (filter_size - 1) * dilation + 1 - output = (2 * padding + img_size - filter_s) / float(stride) - if caffe_mode: - return 1 + int(math.floor(output)) - else: - return 1 + int(math.ceil(output)) - - -#calcualte image_size based on output_size for de-convolution (ConvTransLayer). -#It is the reverse function of cnn_output_size -def cnn_image_size(output_size, - filter_size, - padding, - stride, - caffe_mode, - dilation=1): - filter_s = (filter_size - 1) * dilation + 1 - img_size = (output_size - 1) * stride + filter_s - 2 * padding - if not caffe_mode: - img_size = img_size + 1 - return img_size - - -def get_img_size(input_layer_name, channels): - input = g_layer_map[input_layer_name] - img_pixels = input.size / channels - img_size = input.width if input.width > 0 else int(img_pixels**0.5) - img_size_y = input.height if input.height > 0 else int(img_pixels / - img_size) - config_assert( - img_size * img_size_y == img_pixels, - "Input layer %s: Incorrect input image size %d * %d for input image pixels %d" - % (input_layer_name, img_size, img_size_y, img_pixels)) - return img_size, img_size_y - - -def get_img3d_size(input_layer_name, channels): - input = g_layer_map[input_layer_name] - img_pixels = input.size / channels - img_size = input.width - img_size_y = input.height - img_size_z = input.depth - - config_assert( - img_size * img_size_y * img_size_z == img_pixels, - "Input layer %s: Incorrect input image size %d * %d * %d for input image pixels %d" - % (input_layer_name, img_size, img_size_y, img_size_z, img_pixels)) - return img_size, img_size_y, img_size_z - - -def parse_bilinear(bilinear, input_layer_name, bilinear_conf): - parse_image(bilinear, input_layer_name, bilinear_conf.image_conf) - bilinear_conf.out_size_x = bilinear.out_size_x - bilinear_conf.out_size_y = bilinear.out_size_y - - -def parse_pool(pool, input_layer_name, pool_conf, ceil_mode, exclude_mode): - pool_conf.pool_type = pool.pool_type - config_assert(pool.pool_type in [ - 'max-projection', 'avg-projection', 'max-pool-with-mask', 'cudnn-max-pool', 'cudnn-avg-pool' - ], "pool-type %s is not in " \ - "['max-projection', 'avg-projection', 'max-pool-with-mask'," \ - "'cudnn-max-pool', 'cudnn-avg-pool']" % pool.pool_type) - - pool_conf.channels = pool.channels - pool_conf.size_x = pool.size_x - pool_conf.stride = pool.stride - - pool_conf.size_y = default(pool.size_y, pool_conf.size_x) - pool_conf.stride_y = default(pool.stride_y, pool_conf.stride) - - pool_conf.img_size, pool_conf.img_size_y = \ - get_img_size(input_layer_name, pool.channels) - - config_assert(not pool.start, "start is deprecated in pooling.") - - if pool.padding is not None: - pool_conf.padding = pool.padding - pool_conf.padding_y = default(pool.padding_y, pool_conf.padding) - pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x, - pool_conf.padding, pool_conf.stride, - not ceil_mode) - pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y, - pool_conf.padding_y, - pool_conf.stride_y, not ceil_mode) - if exclude_mode != None: - pool_conf.exclude_mode = exclude_mode - - -def parse_pool3d(pool, input_layer_name, pool_conf, ceil_mode): - pool_conf.pool_type = pool.pool_type - config_assert(pool.pool_type in ['max-projection', 'avg-projection'], - "pool-type %s is not in " - "['max-projection', 'avg-projection']" % pool.pool_type) - - pool_conf.channels = pool.channels - - pool_conf.size_x = pool.size_x - pool_conf.stride = pool.stride - pool_conf.padding = pool.padding - - pool_conf.size_y = default(pool.size_y, pool_conf.size_x) - pool_conf.size_z = default(pool.size_z, pool_conf.size_x) - pool_conf.stride_y = default(pool.stride_y, pool_conf.stride) - pool_conf.stride_z = default(pool.stride_z, pool_conf.stride) - pool_conf.padding_y = default(pool.padding_y, pool_conf.padding) - pool_conf.padding_z = default(pool.padding_z, pool_conf.padding) - - pool_conf.img_size, pool_conf.img_size_y, pool_conf.img_size_z = \ - get_img3d_size(input_layer_name, pool.channels) - - config_assert(not pool.start, "start is deprecated in pooling.") - - if pool.padding is not None: - pool_conf.padding = pool.padding - pool_conf.padding_y = default(pool.padding_y, pool_conf.padding) - pool_conf.padding_z = default(pool.padding_z, pool_conf.padding) - pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x, - pool_conf.padding, pool_conf.stride, - not ceil_mode) - pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y, - pool_conf.padding_y, - pool_conf.stride_y, not ceil_mode) - pool_conf.output_z = cnn_output_size(pool_conf.img_size_z, pool_conf.size_z, - pool_conf.padding_z, - pool_conf.stride_z, not ceil_mode) - - -def parse_spp(spp, input_layer_name, spp_conf): - parse_image(spp, input_layer_name, spp_conf.image_conf) - spp_conf.pool_type = spp.pool_type - config_assert(spp.pool_type in ['max-projection', 'avg-projection'], - "pool-type %s is not in " - "['max-projection', 'avg-projection']" % spp.pool_type) - spp_conf.pyramid_height = spp.pyramid_height - - -def parse_image(image, input_layer_name, image_conf): - image_conf.channels = image.channels - image_conf.img_size, image_conf.img_size_y = \ - get_img_size(input_layer_name, image_conf.channels) - - -def parse_image3d(image, input_layer_name, image_conf): - image_conf.channels = image.channels - image_conf.img_size, image_conf.img_size_y, image_conf.img_size_z = \ - get_img3d_size(input_layer_name, image_conf.channels) - - -def parse_norm(norm, input_layer_name, norm_conf): - norm_conf.norm_type = norm.norm_type - config_assert( - norm.norm_type in - ['rnorm', 'cmrnorm-projection', 'cross-channel-norm'], - "norm-type %s is not in [rnorm, cmrnorm-projection, cross-channel-norm]" - % norm.norm_type) - norm_conf.channels = norm.channels - norm_conf.size = norm.size - norm_conf.scale = norm.scale - norm_conf.pow = norm.pow - norm_conf.blocked = norm.blocked - - norm_conf.img_size, norm_conf.img_size_y = \ - get_img_size(input_layer_name, norm.channels) - norm_conf.output_x = norm_conf.img_size - norm_conf.output_y = norm_conf.img_size_y - if norm.norm_type in ['cmrnorm-projection']: - norm_conf.scale /= norm.size - else: - norm_conf.scale /= norm.size**2 - - -#caffe_mode: compute the output size using floor instead of ceil, -# which is consistent of caffe and CuDNN's convention. -def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): - conv_conf.filter_size = conv.filter_size - conv_conf.filter_size_y = conv.filter_size_y - conv_conf.channels = conv.channels - conv_conf.padding = conv.padding - conv_conf.padding_y = conv.padding_y - conv_conf.stride = conv.stride - conv_conf.stride_y = conv.stride_y - conv_conf.groups = conv.groups - conv_conf.caffe_mode = conv.caffe_mode - if not conv.dilation: - conv.dilation = 1 - conv.dilation_y = 1 - else: - conv_conf.dilation = conv.dilation - conv_conf.dilation_y = conv.dilation_y - - if not trans: - conv_conf.filter_channels = conv.channels / conv.groups - conv_conf.img_size, conv_conf.img_size_y = \ - get_img_size(input_layer_name, conv.channels) - conv_conf.output_x = cnn_output_size( - conv_conf.img_size, conv_conf.filter_size, conv_conf.padding, - conv_conf.stride, conv_conf.caffe_mode, conv.dilation) - conv_conf.output_y = cnn_output_size( - conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y, - conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y) - else: - conv_conf.filter_channels = num_filters / conv.groups - conv_conf.output_x, conv_conf.output_y = \ - get_img_size(input_layer_name, conv.channels) - conv_conf.img_size = cnn_image_size( - conv_conf.output_x, conv_conf.filter_size, conv_conf.padding, - conv_conf.stride, conv_conf.caffe_mode, conv.dilation) - conv_conf.img_size_y = cnn_image_size( - conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y, - conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y) - - -#caffe_mode: compute the output size using floor instead of ceil, -# which is consistent of caffe and CuDNN's convention. -def parse_conv3d(conv, input_layer_name, conv_conf, num_filters, trans=False): - conv_conf.filter_size = conv.filter_size - conv_conf.filter_size_y = conv.filter_size_y - conv_conf.filter_size_z = conv.filter_size_z - conv_conf.channels = conv.channels - conv_conf.padding = conv.padding - conv_conf.padding_y = conv.padding_y - conv_conf.padding_z = conv.padding_z - conv_conf.stride = conv.stride - conv_conf.stride_y = conv.stride_y - conv_conf.stride_z = conv.stride_z - conv_conf.groups = conv.groups - conv_conf.caffe_mode = conv.caffe_mode - - if not trans: - conv_conf.filter_channels = conv.channels / conv.groups - conv_conf.img_size, conv_conf.img_size_y, conv_conf.img_size_z = \ - get_img3d_size(input_layer_name, conv.channels) - conv_conf.output_x = cnn_output_size( - conv_conf.img_size, conv_conf.filter_size, conv_conf.padding, - conv_conf.stride, conv_conf.caffe_mode) - conv_conf.output_y = cnn_output_size( - conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y, - conv_conf.stride_y, conv_conf.caffe_mode) - conv_conf.output_z = cnn_output_size( - conv_conf.img_size_z, conv_conf.filter_size_z, conv_conf.padding_z, - conv_conf.stride_z, conv_conf.caffe_mode) - else: - conv_conf.filter_channels = num_filters / conv.groups - conv_conf.output_x, conv_conf.output_y, conv_conf.output_z = \ - get_img3d_size(input_layer_name, conv.channels) - conv_conf.img_size = cnn_image_size( - conv_conf.output_x, conv_conf.filter_size, conv_conf.padding, - conv_conf.stride, conv_conf.caffe_mode) - conv_conf.img_size_y = cnn_image_size( - conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y, - conv_conf.stride_y, conv_conf.caffe_mode) - conv_conf.img_size_z = cnn_image_size( - conv_conf.output_z, conv_conf.filter_size_z, conv_conf.padding_z, - conv_conf.stride_z, conv_conf.caffe_mode) - - -def parse_block_expand(block_expand, input_layer_name, block_expand_conf): - block_expand_conf.channels = block_expand.channels - block_expand_conf.stride_x = block_expand.stride_x - block_expand_conf.stride_y = block_expand.stride_y - block_expand_conf.padding_x = block_expand.padding_x - block_expand_conf.padding_y = block_expand.padding_y - block_expand_conf.block_x = block_expand.block_x - block_expand_conf.block_y = block_expand.block_y - block_expand_conf.img_size_x = block_expand.img_size_x - block_expand_conf.img_size_y = block_expand.img_size_y - if block_expand_conf.img_size_x == 0: - block_expand_conf.output_x = 0 - else: - block_expand_conf.output_x = cnn_output_size( - block_expand.img_size_x, block_expand.block_x, - block_expand.padding_x, block_expand.stride_x, False) - - if block_expand_conf.img_size_y == 0: - block_expand_conf.output_y = 0 - else: - block_expand_conf.output_y = cnn_output_size( - block_expand.img_size_y, block_expand.block_y, - block_expand.padding_y, block_expand.stride_y, False) - - -def parse_maxout(maxout, input_layer_name, maxout_conf): - parse_image(maxout, input_layer_name, maxout_conf.image_conf) - maxout_conf.groups = maxout.groups - - -# Define an evaluator -@config_func -def Evaluator(name, - type, - inputs, - chunk_scheme=None, - num_chunk_types=None, - classification_threshold=None, - positive_label=None, - dict_file=None, - result_file=None, - num_results=None, - top_k=None, - delimited=None, - excluded_chunk_types=None, - overlap_threshold=None, - background_id=None, - evaluate_difficult=None, - ap_type=None): - evaluator = g_config.model_config.evaluators.add() - evaluator.type = type - evaluator.name = MakeLayerNameInSubmodel(name) - if type_of(inputs) == str: - inputs = [inputs] - - evaluator.input_layers.extend( - [MakeLayerNameInSubmodel(name) for name in inputs]) - - if chunk_scheme is not None: - evaluator.chunk_scheme = chunk_scheme - evaluator.num_chunk_types = num_chunk_types - g_current_submodel.evaluator_names.append(evaluator.name) - - if classification_threshold is not None: - evaluator.classification_threshold = classification_threshold - if positive_label is not None: - evaluator.positive_label = positive_label - if dict_file is not None: - evaluator.dict_file = dict_file - - if result_file is not None: - evaluator.result_file = result_file - if num_results is not None: - evaluator.num_results = num_results - if top_k is not None: - evaluator.top_k = top_k - if delimited is not None: - evaluator.delimited = delimited - - if excluded_chunk_types: - evaluator.excluded_chunk_types.extend(excluded_chunk_types) - - if overlap_threshold is not None: - evaluator.overlap_threshold = overlap_threshold - - if background_id is not None: - evaluator.background_id = background_id - - if evaluate_difficult is not None: - evaluator.evaluate_difficult = evaluate_difficult - - if ap_type is not None: - evaluator.ap_type = ap_type - - -class LayerBase(object): - def __init__( - self, - name, - type, - size, # size can be 0. In this case, subclass should set it. - inputs, - device=None, - active_type="", - drop_rate=0., - coeff=None, - error_clipping_threshold=None): - config_assert('@' not in name, - "layer name: %s contain special character @" % name) - global g_current_submodel - name = MakeLayerNameInSubmodel(name) - - config_assert(name not in g_layer_map, - 'Duplicated layer name: %s' % name) - - self.inputs = copy.deepcopy(inputs) - self.operators = [] - - if self.inputs is None: - self.inputs = [] - elif type_of(self.inputs) != list: - self.inputs = [self.inputs] - - self.config = g_config.model_config.layers.add() - assert isinstance(self.config, LayerConfig) - use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) - mkldnn_acts = ['relu', 'tanh', 'softmax'] - if use_mkldnn and active_type in mkldnn_acts: - active_type = "mkldnn_" + active_type - self.config.name = name - self.config.type = type - self.config.active_type = active_type - if coeff is not None: - self.config.coeff = float(coeff) - if size != 0: - self.config.size = size - if drop_rate != 0: - self.config.drop_rate = drop_rate - - if device is not None: - self.config.device = device - elif g_default_device is not None: - self.config.device = g_default_device - - if error_clipping_threshold is not None: - self.config.error_clipping_threshold = error_clipping_threshold - - for input_index in xrange(len(self.inputs)): - input = self.inputs[input_index] - input_config = None - input_layer_name = '' - if type_of(input) == str: - input_layer_name = input - input_config = Input( - input_layer_name=input, - parameter_name=gen_parameter_name(name, input_index)) - input_layer_name = input_config.input_layer_name - elif isinstance(input, Input): - input_layer_name = input.input_layer_name - input_config = input - if input_config.parameter_name is None: - input_config.parameter_name = \ - gen_parameter_name(name, input_index) - elif isinstance(input, Operator): - self.operators.append(input) - input.operator_conf.input_indices.append(input_index) - input_config = Input(input.input_layer_names[0]) - input_layer_name = input_config.input_layer_name - else: - raise ValueError('Wrong type for inputs: %s' % type_of(input)) - config_assert(input_layer_name in g_layer_map, - "Unknown input layer '%s' for layer %s" % - (input_layer_name, name)) - self.inputs[input_index] = input_config - layer_input = self.config.inputs.add() - layer_input.input_layer_name = input_config.input_layer_name - if input_config.input_layer_argument is not None: - layer_input.input_layer_argument = \ - input_config.input_layer_argument - - g_layer_map[name] = self.config - - g_current_submodel.layer_names.append(self.config.name) - - def get_input_layer(self, input_index): - return g_layer_map[self.config.inputs[input_index].input_layer_name] - - # will return the bias created if not *for_self* - def create_bias_parameter( - self, - bias, # True/False or BiasCfg - size, - dims=None, - for_self=True, # whether create bias for layer self - ): - - if size == 0: - return - if dims is None: - dims = [1, size] - - config_assert( - type_of(bias) == bool or type_of(bias) == Bias, - 'Incorrect type for bias: %s' % type_of(bias)) - - if type_of(bias) == bool: - if bias: - bias = Bias() - - if type_of(bias) == Bias: - if bias.parameter_name is None: - bias.parameter_name = gen_bias_parameter_name(self.config.name) - if bias.parameter_name not in g_parameter_map: - assert isinstance(self.config, LayerConfig) - - Parameter( - bias.parameter_name, - size, - self.config.device - if self.config.HasField('device') else None, - dims, - bias.learning_rate, - bias.momentum, - decay_rate=bias.decay_rate, - decay_rate_l1=bias.decay_rate_l1, - initial_mean=bias.initial_mean, - initial_std=bias.initial_std, - initial_strategy=bias.initial_strategy, - initial_smart=bias.initial_smart, - num_batches_regularization=bias.num_batches_regularization, - sparse_remote_update=bias.sparse_remote_update, - gradient_clipping_threshold=bias. - gradient_clipping_threshold, - is_static=bias.is_static, - is_shared=bias.is_shared, - initializer=bias.initializer) - if for_self: - self.config.bias_parameter_name = bias.parameter_name - else: - return bias.parameter_name - - def create_input_parameter(self, - input_index, - size, - dims=None, - sparse=None, - format=None): - if dims is None: - # TODO(yuyang18): print warning and callstack here! - dims = list() - - if size == 0: - return - - input_config = self.inputs[input_index] - - self.config.inputs[input_index].input_parameter_name = \ - input_config.parameter_name - - if input_config.parameter_name in g_parameter_map: - para = g_parameter_map[input_config.parameter_name] - config_assert(size == para.size, ( - 'Shared parameter "%s" does not ' + 'have same size: %s vs. %s') - % (input_config.parameter_name, para.size, size)) - - config_assert(dims == para.dims, ( - 'Shared parameter "%s" does not ' + 'have same dims: %s vs. %s') - % (input_config.parameter_name, para.dims, dims)) - return - - Parameter( - input_config.parameter_name, - size, - self.config.device if self.config.HasField("device") else None, - dims, - input_config.learning_rate, - input_config.momentum, - decay_rate=input_config.decay_rate, - decay_rate_l1=input_config.decay_rate_l1, - initial_mean=input_config.initial_mean, - initial_std=input_config.initial_std, - initial_strategy=input_config.initial_strategy, - initial_smart=input_config.initial_smart, - num_batches_regularization=input_config.num_batches_regularization, - sparse_remote_update=input_config.sparse_remote_update, - sparse_update=input_config.sparse_update, - gradient_clipping_threshold=input_config. - gradient_clipping_threshold, - sparse=sparse, - format=format, - is_static=input_config.is_static, - is_shared=input_config.is_shared, - update_hooks=input_config.update_hooks, - initializer=input_config.initializer) - - def set_layer_size(self, size): - if self.config.size == 0: - self.config.size = size - else: - config_assert(self.config.size == size, - 'Different inputs result in' + - 'different layer size at layer %s' % self.config.name) - - def set_layer_height_width(self, height, width): - self.config.height = height - self.config.width = width - - def set_layer_depth(self, depth): - self.config.depth = depth - - def set_cnn_layer(self, - input_layer_name, - height, - width, - channels, - is_print=True): - size = height * width * channels - self.set_layer_size(size) - self.set_layer_height_width(height, width) - if is_print: - print("output for %s: c = %d, h = %d, w = %d, size = %d" % - (input_layer_name, channels, height, width, size)) - - -@config_layer('multi_class_cross_entropy_with_selfnorm') -class MultiClassCrossEntropySelfNormCostLayer(LayerBase): - def __init__(self, name, inputs, softmax_selfnorm_alpha=0.1, **xargs): - super(MultiClassCrossEntropySelfNormCostLayer, self).__init__( - name, 'multi_class_cross_entropy_with_selfnorm', 0, inputs, **xargs) - self.config.softmax_selfnorm_alpha = softmax_selfnorm_alpha - - -@config_layer('cross_entropy_over_beam') -class CrossEntropyOverBeamLayer(LayerBase): - def __init__(self, name, inputs, **xargs): - config_assert(len(inputs) % 3 == 0, "Error input number.") - super(CrossEntropyOverBeamLayer, self).__init__( - name, 'cross_entropy_over_beam', 0, inputs, **xargs) - input_num = len(inputs) / 3 - for i in range(input_num): - input_layer = self.get_input_layer(i * 3) - config_assert(input_layer.size == 1, ( - "Inputs for this layer are made up of " - "several triples, in which the first one is scores over " - "all candidate paths, whose size should be equal to 1.")) - - -@config_layer('fc') -class FCLayer(LayerBase): - layer_type = 'fc' - - def __init__(self, - name, - size, - inputs, - bias=True, - error_clipping_threshold=None, - **xargs): - use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) - use_mkldnn_wgt = bool( - int(g_command_config_args.get("use_mkldnn_wgt", 0))) - if use_mkldnn: - self.layer_type = 'mkldnn_fc' - config_assert( - len(inputs) == 1, - "MKLDNNFCLayer support one and only one input!") - super(FCLayer, self).__init__( - name, self.layer_type, size, inputs=inputs, **xargs) - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - psize = self.config.size * input_layer.size - dims = [input_layer.size, self.config.size] - format = self.inputs[input_index].format - sparse = format == "csr" or format == "csc" - if use_mkldnn: - config_assert(not sparse, - "MKLDNNFCLayer do not support sparse format yet") - if use_mkldnn_wgt: - dims = [self.config.size, input_layer.size] - if sparse: - psize = self.inputs[input_index].nnz - else: - sparse = None - - self.create_input_parameter(input_index, psize, dims, sparse, - format) - self.create_bias_parameter(bias, self.config.size) - if error_clipping_threshold is not None: - self.config.error_clipping_threshold = error_clipping_threshold - - -@config_layer('mkldnn_fc') -class MKLDNNFcLayer(FCLayer): - layer_type = 'mkldnn_fc' - - -@config_layer('selective_fc') -class SelectiveFCLayer(LayerBase): - def __init__(self, - name, - size, - inputs, - bias=True, - selective_fc_pass_generation=False, - has_selected_colums=True, - selective_fc_full_mul_ratio=0.02, - selective_fc_parallel_plain_mul_thread_num=None, - **xargs): - super(SelectiveFCLayer, self).__init__( - name, 'selective_fc', size, inputs=inputs, **xargs) - # user MUST know if selctive fc is used in training, - # parameter matrices saved by this layer are automatically transposed, - # BUT bias is not. - - # if selective_fc is used only in testing mode, and parameters for - # this layer are trained by fully connected layers, - # then TranposedFullMatrixProjectin MUST be used in training - # to avoid manual transpose in testing. - - self.config.selective_fc_pass_generation = selective_fc_pass_generation - self.config.has_selected_colums = has_selected_colums - self.config.selective_fc_full_mul_ratio = selective_fc_full_mul_ratio - if selective_fc_parallel_plain_mul_thread_num is not None: - self.config.selective_fc_parallel_plain_mul_thread_num = selective_fc_parallel_plain_mul_thread_num - - input_num = len(self.inputs) - if has_selected_colums: - config_assert(input_num >= 2, - ("if indices of selected columns are not specified, " - "selective_fc Layer has at least two inputs")) - input_num -= 1 - - for input_index in xrange(input_num): - input_layer = self.get_input_layer(input_index) - psize = self.config.size * input_layer.size - dims = [input_layer.size, self.config.size] - dims = dims[::-1] # transpose the parameter - format = self.inputs[input_index].format - sparse = format == "csr" or format == "csc" - if sparse: - psize = self.inputs[input_index].nnz - - self.create_input_parameter(input_index, psize, dims, sparse, - format) - self.create_bias_parameter(bias, self.config.size) - - -@config_layer('print') -class PrintLayer(LayerBase): - def __init__(self, name, inputs, format=None): - super(PrintLayer, self).__init__(name, 'print', 0, inputs) - if format is None: - format = "\n".join([ - "layer=" + input.input_layer_name + " %s" - for input in self.inputs - ]) - self.config.user_arg = format - - -@config_layer('priorbox') -class PriorBoxLayer(LayerBase): - def __init__(self, name, inputs, size, min_size, max_size, aspect_ratio, - variance): - super(PriorBoxLayer, self).__init__(name, 'priorbox', 0, inputs) - config_assert(len(inputs) == 2, 'PriorBoxLayer must have 2 inputs') - input_layer = self.get_input_layer(1) - config_assert( - input_layer.type == 'data', - 'Expecting the second input layer of an priorbox layer to be ' - 'a data layer') - config_assert(input_layer.width > 0, 'The data layer must set width') - config_assert(input_layer.height > 0, 'The data layer must set height') - config_assert(len(variance) == 4, 'The variance must have 4 inputs') - self.config.inputs[0].priorbox_conf.min_size.extend(min_size) - self.config.inputs[0].priorbox_conf.max_size.extend(max_size) - self.config.inputs[0].priorbox_conf.aspect_ratio.extend(aspect_ratio) - self.config.inputs[0].priorbox_conf.variance.extend(variance) - self.config.size = size - - -@config_layer('multibox_loss') -class MultiBoxLossLayer(LayerBase): - def __init__(self, name, inputs, input_num, num_classes, overlap_threshold, - neg_pos_ratio, neg_overlap, background_id, **xargs): - super(MultiBoxLossLayer, self).__init__(name, 'multibox_loss', 0, - inputs) - config_assert( - len(inputs) == (input_num * 2 + 2), - 'MultiBoxLossLayer does not have enough inputs') - config_assert(num_classes > background_id, - 'Classes number must greater than background ID') - self.config.inputs[0].multibox_loss_conf.num_classes = num_classes - self.config.inputs[ - 0].multibox_loss_conf.overlap_threshold = overlap_threshold - self.config.inputs[0].multibox_loss_conf.neg_pos_ratio = neg_pos_ratio - self.config.inputs[0].multibox_loss_conf.neg_overlap = neg_overlap - self.config.inputs[0].multibox_loss_conf.background_id = background_id - self.config.inputs[0].multibox_loss_conf.input_num = input_num - self.config.size = 1 - - -@config_layer('detection_output') -class DetectionOutputLayer(LayerBase): - def __init__(self, name, inputs, size, input_num, num_classes, - nms_threshold, nms_top_k, keep_top_k, confidence_threshold, - background_id, **xargs): - super(DetectionOutputLayer, self).__init__(name, 'detection_output', 0, - inputs) - config_assert( - len(inputs) == (input_num * 2 + 1), - 'DetectionOutputLayer does not have enough inputs') - config_assert(num_classes > background_id, - 'Classes number must greater than background ID') - self.config.inputs[0].detection_output_conf.num_classes = num_classes - self.config.inputs[ - 0].detection_output_conf.nms_threshold = nms_threshold - self.config.inputs[0].detection_output_conf.nms_top_k = nms_top_k - self.config.inputs[0].detection_output_conf.keep_top_k = keep_top_k - self.config.inputs[ - 0].detection_output_conf.confidence_threshold = confidence_threshold - self.config.inputs[ - 0].detection_output_conf.background_id = background_id - self.config.inputs[0].detection_output_conf.input_num = input_num - self.config.size = size - - -@config_layer('roi_pool') -class ROIPoolLayer(LayerBase): - def __init__(self, name, inputs, pooled_width, pooled_height, spatial_scale, - num_channels, **xargs): - super(ROIPoolLayer, self).__init__(name, 'roi_pool', 0, inputs) - config_assert(len(inputs) == 2, 'ROIPoolLayer must have 2 inputs') - self.config.inputs[0].roi_pool_conf.pooled_width = pooled_width - self.config.inputs[0].roi_pool_conf.pooled_height = pooled_height - self.config.inputs[0].roi_pool_conf.spatial_scale = spatial_scale - self.set_cnn_layer(name, pooled_height, pooled_width, num_channels) - - -@config_layer('data') -class DataLayer(LayerBase): - def __init__(self, - name, - size, - depth=None, - height=None, - width=None, - device=None): - super(DataLayer, self).__init__( - name, 'data', size, inputs=[], device=device) - if height and width: - self.set_layer_height_width(height, width) - if depth: - self.set_layer_depth(depth) - - -''' -DataNormLayer: A layer for data normalization -Input: One and only one input layer is accepted. The input layer must - be DataLayer with dense data type -Output: The normalization of the input data - -Reference: - LA Shalabi, Z Shaaban, B Kasasbeh. Data mining: A preprocessing engine - -Example: - Layer( - name = "norm_input_layer", - type = "data_norm", - inputs = [Input("input_layer", - parameter_name = "_slot0.stats")], - data_norm_strategy = "z-score", - ) - -Note: - (1) The parameter has been calculated in the preprocessing stage, - and should be initialized by --init_model_path when training. - (2) Three data normalization methoeds are considered - z-score: y = (x-mean)/std - min-max: y = (x-min)/(max-min) - decimal-scaling: y = x/10^j, where j is the smallest integer such that max(|y|)<1 -''' - - -@config_layer('data_norm') -class DataNormLayer(LayerBase): - def __init__(self, name, inputs, data_norm_strategy="z-score", device=None): - super(DataNormLayer, self).__init__( - name, 'data_norm', 0, inputs=inputs, device=device) - self.config.data_norm_strategy = data_norm_strategy - config_assert(len(inputs) == 1, 'DataNormLayer must have 1 input') - input_layer = self.get_input_layer(0) - self.set_layer_size(input_layer.size) - para_size = 5 * input_layer.size - para_dims = [5, input_layer.size] - self.inputs[0].is_static = True - self.create_input_parameter(0, para_size, para_dims) - - -@config_layer('prelu') -class ParameterReluLayer(LayerBase): - layer_type = 'prelu' - - def __init__(self, name, inputs, partial_sum=1, **args): - super(ParameterReluLayer, self).__init__( - name, self.layer_type, 0, inputs=inputs, **args) - - input_layer = self.get_input_layer(0) - config_assert(len(self.inputs) == 1, "prelu layer has only one input.") - config_assert(input_layer.size % partial_sum == 0, - "a wrong setting for partial_sum") - - dims = [1, input_layer.size / partial_sum] - self.set_layer_size(input_layer.size) - self.config.partial_sum = partial_sum - self.create_input_parameter(0, input_layer.size / partial_sum, dims) - - self.set_layer_height_width(self.get_input_layer(0).height, \ - self.get_input_layer(0).width) - self.set_layer_depth(self.get_input_layer(0).depth) - - -@config_layer('conv') -class ConvLayerBase(LayerBase): - layer_type = 'conv' - - def __init__(self, - name, - inputs=[], - bias=True, - num_filters=None, - shared_biases=False, - **xargs): - super(ConvLayerBase, self).__init__( - name, self.layer_type, 0, inputs=inputs, **xargs) - - if num_filters is not None: - self.config.num_filters = num_filters - - use_mkldnn = int(g_command_config_args.get("use_mkldnn", 0)) - use_gpu = int(g_command_config_args.get("use_gpu", 0)) - parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) - - # Automatically select cudnn_type for GPU, exconv for CPU - # and mkldnn_conv for MKLDNN - # if set type=conv, but still reserve the way user specify - # exconv, mkldnn_conv or cudnn_conv manually. - if self.layer_type == "cudnn_conv": - config_assert(use_gpu, "cudnn_conv only support GPU") - - if self.layer_type == "mkldnn_conv": - config_assert(use_mkldnn, "mkldnn_conv only support MKLDNN") - - if (use_gpu == 1 and self.layer_type != "exconv" and - self.layer_type != "mkldnn_conv" and - (parallel_nn == 0 or self.config.device > -1)): - self.layer_type = "cudnn_conv" - else: - self.layer_type = "mkldnn_conv" if use_mkldnn else "exconv" - # need to specify layer in config - self.config.type = self.layer_type - - if shared_biases is not None: - self.config.shared_biases = shared_biases - - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - conv_conf = self.config.inputs[input_index].conv_conf - parse_conv(self.inputs[input_index].conv, input_layer.name, - conv_conf, num_filters) - psize = self.calc_parameter_size(conv_conf) - self.create_input_parameter(input_index, psize) - self.set_cnn_layer(name, conv_conf.output_y, conv_conf.output_x, - self.config.num_filters) - - psize = self.config.size - if shared_biases: - psize = self.config.num_filters - self.create_bias_parameter(bias, psize, [psize, 1]) - - def calc_parameter_size(self, conv_conf): - return self.config.num_filters * conv_conf.filter_channels \ - * (conv_conf.filter_size * conv_conf.filter_size_y) - - -@config_layer('exconv') -class ConvLayer(ConvLayerBase): - layer_type = 'exconv' - - -@config_layer('mkldnn_conv') -class ConvLayer(ConvLayerBase): - layer_type = 'mkldnn_conv' - - -@config_layer('cudnn_conv') -class ConvLayer(ConvLayerBase): - layer_type = 'cudnn_conv' - - -@config_layer('convt') -class ConvTransLayerBase(LayerBase): - layer_type = 'convt' - - def __init__(self, - name, - inputs=[], - bias=True, - num_filters=None, - shared_biases=False, - **xargs): - super(ConvTransLayerBase, self).__init__( - name, self.layer_type, 0, inputs=inputs, **xargs) - - if num_filters is not None: - self.config.num_filters = num_filters - - use_gpu = int(g_command_config_args.get("use_gpu", 0)) - parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) - - # Automatically select cudnn_type for GPU and exconvt for CPU - # if set type=exconvt, but still reserve the way user specify - # exconvt or cudnn_convt manually. - if self.layer_type == "cudnn_convt": - config_assert(use_gpu, "cudnn_convt only support GPU") - - if (use_gpu == 1 and self.layer_type != "exconvt" and - (parallel_nn == 0 or self.config.device > -1)): - self.layer_type = "cudnn_convt" - else: - self.layer_type = "exconvt" - # need to specify layer in config - self.config.type = self.layer_type - - if shared_biases is not None: - self.config.shared_biases = shared_biases - - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - parse_conv( - self.inputs[input_index].conv, - input_layer.name, - self.config.inputs[input_index].conv_conf, - num_filters, - trans=True) - conv_conf = self.config.inputs[input_index].conv_conf - psize = self.calc_parameter_size(conv_conf) - self.create_input_parameter(input_index, psize) - self.set_cnn_layer(name, conv_conf.img_size_y, conv_conf.img_size, - self.config.num_filters) - - psize = self.config.size - if shared_biases: - psize = self.config.num_filters - self.create_bias_parameter(bias, psize, [psize, 1]) - - def calc_parameter_size(self, conv_conf): - return conv_conf.channels * conv_conf.filter_channels \ - * (conv_conf.filter_size * conv_conf.filter_size_y) - - -@config_layer('exconvt') -class ConvTransLayer(ConvTransLayerBase): - layer_type = 'exconvt' - - -@config_layer('cudnn_convt') -class ConvTransLayer(ConvTransLayerBase): - layer_type = 'cudnn_convt' - - -@config_layer('conv_3d') -class Conv3DLayerBase(LayerBase): - def __init__(self, - name, - inputs=[], - bias=True, - num_filters=None, - shared_biases=True, - **xargs): - super(Conv3DLayerBase, self).__init__( - name, self.layer_type, 0, inputs=inputs, **xargs) - - if num_filters is not None: - self.config.num_filters = num_filters - - # need to specify layer in config - self.config.type = self.layer_type - - trans = False - if self.config.type == "deconv3d": - trans = True - - if shared_biases is not None: - self.config.shared_biases = shared_biases - - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - conv_conf = self.config.inputs[input_index].conv_conf - parse_conv3d( - self.inputs[input_index].conv, - input_layer.name, - conv_conf, - num_filters, - trans=trans - ) # for z-axis pad:0, strid:1, filter_size:1, img_size:1 - psize = self.calc_parameter_size(conv_conf) - self.create_input_parameter(input_index, psize) - if trans: - self.set_cnn_layer(name, conv_conf.img_size_z, - conv_conf.img_size_y, conv_conf.img_size, - self.config.num_filters) - else: - self.set_cnn_layer(name, conv_conf.output_z, conv_conf.output_y, - conv_conf.output_x, self.config.num_filters) - - psize = self.config.size - if shared_biases: - psize = self.config.num_filters - self.create_bias_parameter(bias, psize, [psize, 1]) - - def calc_parameter_size(self, conv_conf): - return self.config.num_filters * conv_conf.filter_channels \ - * (conv_conf.filter_size * conv_conf.filter_size_y \ - * conv_conf.filter_size_z) - - def set_cnn_layer(self, - input_layer_name, - depth, - height, - width, - channels, - is_print=True): - size = depth * height * width * channels - self.set_layer_size(size) - self.set_layer_height_width(height, width) - self.set_layer_depth(depth) - if is_print: - print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" % - (input_layer_name, channels, depth, height, width, size)) - - -@config_layer('conv3d') -class Conv3DLayer(Conv3DLayerBase): - layer_type = 'conv3d' - - -@config_layer('deconv3d') -class Conv3DLayer(Conv3DLayerBase): - layer_type = 'deconv3d' - - -@config_layer('norm') -class NormLayer(LayerBase): - def __init__(self, name, inputs, **xargs): - super(NormLayer, self).__init__(name, 'norm', 0, inputs=inputs, **xargs) - use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) - use_mkldnn = True if use_mkldnn and self.inputs[ - 0].norm.norm_type == 'cmrnorm-projection' else False - self.config.type = 'mkldnn_lrn' if use_mkldnn else self.config.type - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - norm_conf = self.config.inputs[input_index].norm_conf - parse_norm(self.inputs[input_index].norm, input_layer.name, - norm_conf) - norm_conf.scale = self.inputs[ - input_index].norm.scale if use_mkldnn else norm_conf.scale - self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x, - norm_conf.channels, False) - if norm_conf.norm_type == "cross-channel-norm": - self.create_input_parameter(0, norm_conf.channels, - [norm_conf.channels, 1]) - - -@config_layer('pool') -class PoolLayer(LayerBase): - layer_type = 'pool' - - def __init__(self, name, inputs, ceil_mode=True, exclude_mode=None, - **xargs): - use_mkldnn = int(g_command_config_args.get("use_mkldnn", 0)) - if self.layer_type == "mkldnn_pool": - config_assert(use_mkldnn, "mkldnn_pool only support MKLDNN") - self.layer_type = 'mkldnn_pool' if use_mkldnn else 'pool' - super(PoolLayer, self).__init__( - name, self.layer_type, 0, inputs=inputs, **xargs) - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - pool_conf = self.config.inputs[input_index].pool_conf - parse_pool(self.inputs[input_index].pool, input_layer.name, - pool_conf, ceil_mode, exclude_mode) - self.set_cnn_layer(name, pool_conf.output_y, pool_conf.output_x, - pool_conf.channels) - - -@config_layer('mkldnn_pool') -class MKLDNNPoolLayer(PoolLayer): - layer_type = 'mkldnn_pool' - - -@config_layer('pool3d') -class Pool3DLayer(LayerBase): - def __init__(self, name, inputs, ceil_mode=True, **xargs): - super(Pool3DLayer, self).__init__( - name, 'pool3d', 0, inputs=inputs, **xargs) - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - pool_conf = self.config.inputs[input_index].pool_conf - parse_pool3d(self.inputs[input_index].pool, input_layer.name, - pool_conf, ceil_mode) - self.set_cnn_layer(name, pool_conf.output_z, pool_conf.output_y, - pool_conf.output_x, pool_conf.channels) - - def set_cnn_layer(self, - input_layer_name, - depth, - height, - width, - channels, - is_print=True): - size = depth * height * width * channels - self.set_layer_size(size) - self.set_layer_height_width(height, width) - self.set_layer_depth(depth) - if is_print: - print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" % - (input_layer_name, channels, depth, height, width, size)) - - -@config_layer('spp') -class SpatialPyramidPoolLayer(LayerBase): - def __init__(self, name, inputs, **xargs): - super(SpatialPyramidPoolLayer, self).__init__( - name, 'spp', 0, inputs=inputs, **xargs) - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - spp_conf = self.config.inputs[input_index].spp_conf - parse_spp(self.inputs[input_index].spp, input_layer.name, spp_conf) - output_x = (pow(4, spp_conf.pyramid_height) - 1) / (4 - 1) - self.set_cnn_layer(name, 1, output_x, spp_conf.image_conf.channels) - - -@config_layer('upsample') -class UpsampleLayer(LayerBase): - def __init__(self, name, inputs, **xargs): - super(UpsampleLayer, self).__init__( - name, 'upsample', 0, inputs=inputs, **xargs) - - input_layer = self.get_input_layer(0) - image_conf = self.config.inputs[0].upsample_conf.image_conf - image_conf.img_size = input_layer.width - image_conf.img_size_y = input_layer.height - image_conf.channels = input_layer.size / (input_layer.width * - input_layer.height) - - upsample = self.inputs[0].upsample - output_x = 0 - output_y = 0 - output_size = 0 - - if upsample.scale: - self.config.inputs[0].upsample_conf.scale = upsample.scale - self.config.inputs[0].upsample_conf.scale_y = upsample.scale_y - output_x = input_layer.width * upsample.scale - output_y = input_layer.height * upsample.scale_y - self.config.inputs[0].upsample_conf.pad_out_x = upsample.pad_out_x - self.config.inputs[0].upsample_conf.pad_out_y = upsample.pad_out_y - if upsample.upsample_size: - self.config.inputs[ - 0].upsample_conf.upsample_size = upsample.upsample_size - self.config.inputs[ - 0].upsample_conf.upsample_size_y = upsample.upsample_size_y - output_x = upsample.upsample_size - output_y = upsample.upsample_size_y - - output_size = image_conf.channels * output_x * output_y - - self.set_layer_height_width(output_y, output_x) - self.set_layer_depth(input_layer.depth) - self.set_layer_size(output_size) - - -@config_layer('pad') -class PadLayer(LayerBase): - def __init__(self, name, inputs, **xargs): - super(PadLayer, self).__init__(name, 'pad', 0, inputs=inputs, **xargs) - pad = self.inputs[0].pad - self.config.inputs[0].pad_conf.pad_c.extend(pad.pad_c) - self.config.inputs[0].pad_conf.pad_h.extend(pad.pad_h) - self.config.inputs[0].pad_conf.pad_w.extend(pad.pad_w) - - input_layer = self.get_input_layer(0) - image_conf = self.config.inputs[0].pad_conf.image_conf - parse_image(pad, input_layer.name, image_conf) - out_ch = pad.channels + pad.pad_c[0] + pad.pad_c[1] - out_h = image_conf.img_size_y + pad.pad_h[0] + pad.pad_h[1] - out_w = image_conf.img_size + pad.pad_w[0] + pad.pad_w[1] - self.set_cnn_layer(name, out_h, out_w, out_ch) - self.config.size = out_ch * out_h * out_w - - -@config_layer('crop') -class CropLayer(LayerBase): - def __init__(self, name, inputs, axis, offset, shape, **xargs): - super(CropLayer, self).__init__(name, 'crop', 0, inputs=inputs, **xargs) - self.config.axis = axis - self.config.offset.extend(offset) - self.config.shape.extend(shape) - - # get channel, width and height from input_0 layer - input_layer = self.get_input_layer(0) - image_conf = self.config.inputs[0].image_conf - image_conf.img_size = input_layer.width - image_conf.img_size_y = input_layer.height - image_conf.channels = input_layer.size / (input_layer.width * - input_layer.height) - # only support for 4-dims inputs and NCHW order - if (len(self.config.inputs) == 2): - self.set_layer_height_width( - self.get_input_layer(1).height, self.get_input_layer(1).width) - self.set_layer_size(self.get_input_layer(1).size) - else: - self.set_layer_height_width(shape[-2], shape[-1]) - self.set_layer_size(reduce(lambda x, y: x * y, shape[1:])) - - -@config_layer('batch_norm') -class BatchNormLayer(LayerBase): - layer_type = 'batch_norm' - - def __init__(self, - name, - inputs, - bias=True, - img3D=False, - use_global_stats=True, - epsilon=1e-5, - moving_average_fraction=0.9, - batch_norm_type=None, - mean_var_names=None, - **xargs): - if inputs is None: - inputs = [] - elif not isinstance(inputs, list): - inputs = [inputs] - config_assert( - len(inputs) == 1, "BatchNormLayer must have one and only one input") - # Create Input for moving mean and std, - # in batch normalization layer. - # These paras no need to update, so set is_static is true. - # If not use is_static, even set learning_rate = 0, decay_rate = 0, - # these paras will change if set average_window in configure. - use_gpu = bool(int(g_command_config_args.get("use_gpu", 0))) - use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) - is_shared = True if not use_gpu else False - for i in xrange(2): - inputs.append( - Input( - inputs[0].input_layer_name, - initial_std=0.0, - initial_mean=0.0, - is_static=True, - is_shared=is_shared, - make_layer_name_in_submodel=False, )) - - parallel_nn = bool(int(g_command_config_args.get("parallel_nn", 0))) - cudnn_version = int(g_command_config_args.get("cudnn_version", 0)) - # Automatically select cudnn_batch_norm for GPU, batch_norm for CPU - # and mkldnn_batch_norm for MKLDNN. Also based on cudnn version. - if batch_norm_type == "mkldnn_batch_norm": - config_assert(use_mkldnn, "mkldnn_batch_norm only support MKLDNN") - use_cudnn = use_gpu and batch_norm_type != "batch_norm" and \ - not use_mkldnn and batch_norm_type != "mkldnn_batch_norm" and \ - ((not parallel_nn) or self.config.device > -1) - if use_cudnn: - self.layer_type = "cudnn_batch_norm" - else: - self.layer_type = "mkldnn_batch_norm" if use_mkldnn else "batch_norm" - super(BatchNormLayer, self).__init__( - name, self.layer_type, 0, inputs=inputs, **xargs) - - if use_global_stats is not None: - self.config.use_global_stats = use_global_stats - if moving_average_fraction is not None: - self.config.moving_average_fraction = moving_average_fraction - if epsilon is not None: - assert epsilon >= 1e-5, "epsilon must be no less than 1e-5." - self.config.epsilon = epsilon - - input_layer = self.get_input_layer(0) - image_conf = self.config.inputs[0].image_conf - if img3D: - parse_image3d(self.inputs[0].image, input_layer.name, image_conf) - # Only pass the width and height of input to batch_norm layer - # when either of it is non-zero. - if input_layer.width != 0 or input_layer.height != 0: - self.set_cnn_layer( - input_layer_name=name, - depth=image_conf.img_size_z, - height=image_conf.img_size_y, - width=image_conf.img_size, - channels=image_conf.channels, - is_print=True) - else: - self.set_layer_size(input_layer.size) - else: - parse_image(self.inputs[0].image, input_layer.name, image_conf) - # Only pass the width and height of input to batch_norm layer - # when either of it is non-zero. - if input_layer.width != 0 or input_layer.height != 0: - self.set_cnn_layer( - input_layer_name=name, - height=image_conf.img_size_y, - width=image_conf.img_size, - channels=image_conf.channels, - is_print=True) - else: - self.set_layer_size(input_layer.size) - - psize = self.calc_parameter_size(image_conf) - dims = [1, psize] - if mean_var_names is not None: - assert len(mean_var_names) == 2 - self.inputs[1].parameter_name = mean_var_names[0] - self.inputs[2].parameter_name = mean_var_names[1] - - self.create_input_parameter(0, psize) - self.create_input_parameter(1, psize, dims) - self.create_input_parameter(2, psize, dims) - - self.create_bias_parameter(bias, psize) - - def set_cnn_layer(self, - input_layer_name, - depth=None, - height=None, - width=None, - channels=None, - is_print=True): - depthIsNone = False - if depth is None: - depth = 1 - depthIsNone = True - size = depth * height * width * channels - self.set_layer_size(size) - self.set_layer_height_width(height, width) - self.set_layer_depth(depth) - if is_print and depthIsNone: - print("output for %s: c = %d, h = %d, w = %d, size = %d" % - (input_layer_name, channels, height, width, size)) - elif is_print: - print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" % - (input_layer_name, channels, depth, height, width, size)) - - def calc_parameter_size(self, image_conf): - return image_conf.channels - - -@config_layer('trans') -class TransLayer(LayerBase): - def __init__(self, name, inputs, **xargs): - super(TransLayer, self).__init__( - name, 'trans', 0, inputs=inputs, **xargs) - config_assert( - len(self.inputs) == 1, - 'TransLayer must have one and only one input') - self.set_layer_size(self.get_input_layer(0).size) - - -@config_layer('resize') -class ResizeLayer(LayerBase): - def __init__(self, name, size, inputs, **xargs): - super(ResizeLayer, self).__init__( - name, 'resize', size=size, inputs=inputs, **xargs) - config_assert( - len(self.inputs) == 1, - 'ResizeLayer must have one and only one input') - - -@config_layer('rotate') -class RotateLayer(LayerBase): - def __init__(self, name, inputs, height, width, device=None): - super(RotateLayer, self).__init__( - name, 'rotate', 0, inputs=inputs, device=device) - config_assert( - len(self.inputs) == 1, - 'RotateLayer must have one and only one input') - self.set_layer_height_width(height, width) - self.set_layer_size(self.get_input_layer(0).size) - - -@config_layer('blockexpand') -class BlockExpandLayer(LayerBase): - def __init__(self, name, inputs, **xargs): - super(BlockExpandLayer, self).__init__( - name, 'blockexpand', 0, inputs=inputs, **xargs) - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - parse_block_expand( - self.inputs[input_index].block_expand, input_layer.name, - self.config.inputs[input_index].block_expand_conf) - block_expand_conf = self.config.inputs[ - input_index].block_expand_conf - self.set_layer_size(block_expand_conf.block_x * - block_expand_conf.block_y * - block_expand_conf.channels) - - -@config_layer('maxout') -class MaxOutLayer(LayerBase): - def __init__(self, name, inputs, **xargs): - super(MaxOutLayer, self).__init__( - name, 'maxout', 0, inputs=inputs, **xargs) - input_layer = self.get_input_layer(0) - maxout_conf = self.config.inputs[0].maxout_conf - parse_maxout(self.inputs[0].maxout, input_layer.name, maxout_conf) - out_channels = maxout_conf.image_conf.channels / maxout_conf.groups - self.set_cnn_layer(name, maxout_conf.image_conf.img_size_y, - maxout_conf.image_conf.img_size, out_channels) - - -@config_layer('row_conv') -class RowConvLayer(LayerBase): - def __init__(self, name, inputs, context_length, **xargs): - super(RowConvLayer, self).__init__( - name, 'row_conv', 0, inputs=inputs, **xargs) - config_assert( - len(self.inputs) == 1, - 'row convolution layer must have one and only one input.') - input_layer = self.get_input_layer(0) - row_conv_conf = self.config.inputs[0].row_conv_conf - row_conv_conf.context_length = context_length - self.set_layer_size(input_layer.size) - psize = context_length * input_layer.size - dims = [context_length, input_layer.size] - self.create_input_parameter(0, psize, dims) - - -@config_layer('clip') -class ClipLayer(LayerBase): - def __init__(self, name, inputs, min, max, **xargs): - super(ClipLayer, self).__init__(name, 'clip', 0, inputs=inputs, **xargs) - config_assert( - len(self.inputs) == 1, - 'ClipLayer must have one and only one input.') - config_assert(min < max, 'min must be less than max.') - input_layer = self.get_input_layer(0) - self.set_layer_size(input_layer.size) - self.config.inputs[0].clip_conf.min = min - self.config.inputs[0].clip_conf.max = max - - -@config_layer('scale_shift') -class ScaleShiftLayer(LayerBase): - def __init__(self, name, inputs, bias=True, **xargs): - super(ScaleShiftLayer, self).__init__( - name, 'scale_shift', 0, inputs=inputs, **xargs) - config_assert( - len(self.inputs) == 1, - 'ScaleShiftLayer must have one and only one input.') - input_layer = self.get_input_layer(0) - self.set_layer_size(input_layer.size) - self.create_input_parameter(0, 1, [1, 1]) - self.create_bias_parameter(bias, 1) - - -# key: cost type -# value: cost class -g_cost_map = {} - - -# define a cost layer without any parameters -def define_cost(class_name, cost_type): - def init(cls, name, inputs, device=None, coeff=1.): - super(type(cls), cls).__init__( - name, cost_type, 1, inputs, device=device, coeff=coeff) - - cls = type(class_name, (LayerBase, ), dict(__init__=init)) - global g_cost_map - g_cost_map[cost_type] = cls - - -define_cost('MultiClassCrossEntropy', 'multi-class-cross-entropy') -define_cost('CrossEntropyOverBeamCostLayer', 'cross_entropy_over_beam') -define_cost('RankingCost', 'rank-cost') -define_cost('AucValidation', 'auc-validation') -define_cost('PnpairValidation', 'pnpair-validation') -define_cost('SumOfSquaresCostLayer', 'square_error') -define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy') -define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') -define_cost('HuberTwoClassification', 'huber_classification') -define_cost('SumCost', 'sum_cost') -define_cost('SmoothL1Cost', 'smooth_l1') - - -@config_layer('hsigmoid') -class HierarchicalSigmoidLayer(LayerBase): - def __init__(self, name, num_classes, inputs, device=None, bias=True): - super(HierarchicalSigmoidLayer, self).__init__( - name, 'hsigmoid', 1, inputs=inputs, device=device) - config_assert( - len(self.inputs) >= 2, - 'HierarchicalSigmoidLayer must have at least 2 inputs') - self.config.num_classes = num_classes - for input_index in xrange(len(self.inputs) - 1): - input_layer = self.get_input_layer(input_index) - psize = (num_classes - 1) * input_layer.size - dims = [num_classes - 1, input_layer.size] - self.create_input_parameter(input_index, psize, dims) - self.create_bias_parameter(bias, num_classes - 1) - - -''' -lambdaCost for lambdaRank LTR approach - -Usage: - Example: Layer(name = "cost", type = "lambda_cost", NDCG_num = 8, - max_sort_size = -1, inputs = ["output", "score"]) - - Input data: Samples of the same query should be loaded as a sequence, - by PyDataProvider etc.. User should provide - scores for each sample. The score slot should be the 2nd - input of lambdaRank layer. - - NDCG_num = the size of NDCG, e.g., 5 for NDCG@5. - Note: NDCG_num must be less than or equal to the minimum - size of lists. - - max_sort_size = the size of partial sorting in calculating gradient. - Note: If max_sort_size = -1, then for each list, the algorithm will - sort the entire list to get gradient. - In other cases, max_sort_size must be greater than or equal - to NDCG_num. - max_sort_size can be greater than the size of a list, in which - case the algorithm will sort the entire list to get gradient. -''' - - -@config_layer('lambda_cost') -class LambdaCost(LayerBase): - def __init__(self, name, inputs, NDCG_num=5, max_sort_size=-1, device=None): - super(LambdaCost, self).__init__( - name, 'lambda_cost', 1, inputs=inputs, device=device) - config_assert(len(self.inputs) == 2, 'lambdaCost must have 2 inputs') - self.config.NDCG_num = NDCG_num - if max_sort_size != -1: - config_assert( - NDCG_num <= max_sort_size, - 'NDCG_num must be less than or equal to max_sort_size') - self.config.max_sort_size = max_sort_size - - -@config_layer('huber_regression') -class HuberRegressionLoss(LayerBase): - def __init__(self, name, inputs, delta=1., coeff=1., device=None): - super(HuberRegressionLoss, self).__init__( - name, 'huber_regression', 1, inputs=inputs, device=device) - config_assert( - len(self.inputs) == 2, 'HuberRegression must have 2 inputs') - self.config.delta = delta - self.config.coeff = coeff - - -@config_layer('nce') -class NCELayer(LayerBase): - def __init__(self, - name, - num_classes, - inputs, - num_neg_samples=10, - neg_sampling_dist=None, - bias=True, - **xargs): - super(NCELayer, self).__init__(name, 'nce', 1, inputs=inputs, **xargs) - config_assert( - len(self.inputs) >= 2, 'NCELayer must have at least 2 inputs') - self.config.num_classes = num_classes - if neg_sampling_dist is not None: - config_assert( - len(neg_sampling_dist) == num_classes, - 'len(neg_sampling_dist)(%s) is not same as num_classes (%s)' % - (len(neg_sampling_dist), num_classes)) - s = sum(neg_sampling_dist) - config_assert( - abs(s - 1) < 1e-5, - 'The sum of neg_sampling_dist (%s) is not 1' % s) - - self.config.neg_sampling_dist.extend(neg_sampling_dist) - - self.config.num_neg_samples = num_neg_samples - num_real_inputs = len(self.inputs) - 1 - input_layer = self.get_input_layer(num_real_inputs) - config_assert(input_layer.type == 'data', - 'Expecting the last input layer of an nce layer to be ' - 'a data layer') - - if (num_real_inputs > 1 and input_layer.size == 1 and - self.get_input_layer(num_real_inputs - 1).type == 'data'): - # This input layer is assumed to be a sample weight layer - num_real_inputs -= 1 - - for input_index in xrange(num_real_inputs): - input_layer = self.get_input_layer(input_index) - psize = num_classes * input_layer.size - dims = [num_classes, input_layer.size] - self.create_input_parameter(input_index, psize, dims) - self.create_bias_parameter(bias, num_classes) - - -@config_layer('addto') -class AddToLayer(LayerBase): - layer_type = 'addto' - - def __init__(self, name, inputs, bias=True, **xargs): - use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) - if self.layer_type == "mkldnn_addto": - config_assert(use_mkldnn, "mkldnn_addto only support MKLDNN") - self.layer_type = 'mkldnn_addto' if use_mkldnn else 'addto' - super(AddToLayer, self).__init__( - name, self.layer_type, 0, inputs=inputs, **xargs) - config_assert(len(inputs) > 0, 'inputs cannot be empty for AddToLayer') - - layer_size = self.get_input_layer(0).size - # To reserve heght, width, depth. - layer_with_hwc = self.get_input_layer(0) - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - assert layer_size == input_layer.size - if input_layer.height and input_layer.height and input_layer.height: - layer_with_hwc = input_layer - - self.set_layer_size(layer_with_hwc.size) - self.set_layer_height_width(layer_with_hwc.height, layer_with_hwc.width) - self.set_layer_depth(layer_with_hwc.depth) - self.create_bias_parameter(bias, self.config.size) - - -@config_layer('mkldnn_addto') -class MKLDNNAddtoLayer(AddToLayer): - layer_type = 'mkldnn_addto' - - -@config_layer('agent') -class AgentLayer(LayerBase): - def __init__(self, name, size, device=None): - super(AgentLayer, self).__init__( - name, 'agent', size, inputs=[], device=device) - - -@config_layer('gather_agent') -class GatherAgentLayer(LayerBase): - def __init__(self, name, size, device=None): - super(GatherAgentLayer, self).__init__( - name, 'gather_agent', size, inputs=[], device=device) - - -@config_layer('scatter_agent') -class ScatterAgentLayer(LayerBase): - def __init__(self, name, size, width=None, height=None, device=None): - super(ScatterAgentLayer, self).__init__( - name, 'scatter_agent', size, inputs=[], device=device) - if height and width: - self.set_layer_height_width(height, width) - - -@config_layer('multiplex') -class MultiplexLayer(LayerBase): - def __init__(self, name, inputs, size, device=None): - super(MultiplexLayer, self).__init__( - name, 'multiplex', size, inputs=inputs, device=device) - config_assert( - len(inputs) > 2, 'MultiplexLayer should have more than 2 inputs.') - for i in range(1, len(inputs)): - config_assert( - self.get_input_layer(i).size == size, - "All the input layers except the first one should" - "have the same size as the MultiplexLayer.") - - -@config_func -def Link(name, has_subseq=False): - """ - Still keeping has_subseq for backward compatibility - """ - link_config = LinkConfig() - link_config.link_name = name - return link_config - - -# memory for recurrent layer group. -# *name* and *size* are actual layer's name and size. -# If *name* is None, need to provide *memory_name* and need to use -# SetMemoryInput() later to specify the layer which this memory remembers. -# -# return the name of the memory, -# use this name if you assign the memory as other layer's input -# -# boot frame of memory is zeroed by default, -# or initialize by boot layer output if *boot_layer* set, -# or initialize by trainable bias if *boot_bias* set, -# or initialize by a constant id if *boot_with_const_id* set -# -# Memory can be a sequence if *is_sequence* set, this type of memory -# can only be initailized by a *boot_layer* which is a sequence. -# -@config_func -def Memory(name, - size, - is_sequence=False, - boot_layer=None, - boot_bias=False, - boot_bias_active_type="", - boot_with_const_id=None, - memory_name=None): - if not memory_name: - config_assert(name is not None, "name needs cannot be None") - memory_name = name + "+delay1" - agent_name = memory_name - agent_layer = AgentLayer(agent_name, size) - config_assert(g_current_submodel.is_recurrent_layer_group, - 'Memory should be used in recurrent layer group only') - memory = g_current_submodel.memories.add() - if name is not None: - memory.layer_name = MakeLayerNameInSubmodel(name) - memory.link_name = MakeLayerNameInSubmodel(agent_name) - options = sum((boot_layer is not None, bool(boot_bias), - boot_with_const_id is not None)) - config_assert( - options <= 1, - 'take one option at most from boot_layer, boot_bias, or boot_with_const_id' - ) - if boot_layer is not None: - boot_layer = MakeLayerNameInParentSubmodel(boot_layer) - config_assert(boot_layer in g_layer_map, - 'boot_layer "%s" does not correspond to a layer name' % - boot_layer) - memory.boot_layer_name = boot_layer - elif boot_bias: - memory.boot_bias_parameter_name = agent_layer.create_bias_parameter( - boot_bias, size, for_self=False) - memory.boot_bias_active_type = boot_bias_active_type - elif boot_with_const_id is not None: - memory.boot_with_const_id = boot_with_const_id - return agent_name - - -@config_func -def SetMemoryInput(memory_name, layer_name): - memory_name = MakeLayerNameInSubmodel(memory_name) - layer_name = MakeLayerNameInSubmodel(layer_name) - for mem in g_current_submodel.memories: - if mem.link_name == memory_name: - mem.layer_name = layer_name - return - logger.fatal("Nonexistent memory name: " + memory_name) - - -# Generator for recurrent layer group, to use it: -# 1. define a id layer as output of layer group -# 2. define a memory of this id layer, and assign a boot id(begin of sequence) -# 3. define a eos check layer and fill its name in generator's *eos_layer_name* -# Sequence generation will stop when eos check return 1 or *max_num_frames* reached. -# If *beam_size* is greater than one, generator will use beam search. -# in beam search, if *num_results_per_sample* set, one sample sequence can output -# multiple results each with a probility. -@config_func -def Generator( - max_num_frames, - eos_layer_name="eos_check", - num_results_per_sample=1, - beam_size=1, - log_prob=None, ): - generator_config = GeneratorConfig() - generator_config.max_num_frames = max_num_frames - generator_config.eos_layer_name = eos_layer_name - generator_config.num_results_per_sample = num_results_per_sample - generator_config.beam_size = beam_size - if log_prob is not None: - generator_config.log_prob = log_prob - return generator_config - - -@config_layer('expand') -class ExpandLayer(LayerBase): - def __init__(self, name, inputs, trans_type='non-seq', bias=False, **xargs): - super(ExpandLayer, self).__init__( - name, 'expand', 0, inputs=inputs, **xargs) - config_assert( - len(self.inputs) == 2, 'ExpandLayer takes 2 and only 2 inputs') - self.config.trans_type = trans_type - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - self.set_layer_size(self.get_input_layer(0).size) - self.create_bias_parameter(bias, self.config.size) - - -@config_layer('featmap_expand') -class FeatMapExpandLayer(LayerBase): - def __init__(self, - name, - inputs, - num_filters=None, - as_row_vector=True, - bias=False, - **xargs): - super(FeatMapExpandLayer, self).__init__( - name, 'featmap_expand', 0, inputs=inputs, **xargs) - config_assert( - len(self.inputs) == 1, 'ExpandLayer takes 1 and only 1 inputs') - if num_filters is not None: - self.config.num_filters = num_filters - else: - logger.fatal("FeatMapExpandLayer must specify num_filters.") - if not as_row_vector: - self.config.user_arg = "as_col_vec" - self.set_layer_size(self.get_input_layer(0).size * num_filters) - - -@config_layer('max') -class MaxLayer(LayerBase): - def __init__(self, - name, - inputs, - trans_type='non-seq', - bias=False, - output_max_index=None, - stride=-1, - **xargs): - super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs) - config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input') - if trans_type == 'seq': - config_assert(stride == -1, 'subseq does not support stride window') - self.config.trans_type = trans_type - self.config.seq_pool_stride = stride - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - self.set_layer_size(input_layer.size) - self.create_bias_parameter(bias, self.config.size) - if output_max_index is not None: - self.config.output_max_index = output_max_index - - -@config_layer('maxid') -class MaxIdLayer(LayerBase): - def __init__(self, name, inputs, beam_size=None, device=None): - super(MaxIdLayer, self).__init__( - name, 'maxid', 0, inputs=inputs, device=device) - config_assert(len(self.inputs) == 1, 'MaxIdLayer must have 1 input') - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - self.set_layer_size(input_layer.size) - - if beam_size is None: - global g_current_submodel - if g_current_submodel.HasField("generator"): - self.config.beam_size = g_current_submodel.generator.beam_size - else: - self.config.beam_size = beam_size - - -@config_layer('eos_id') -class EosIdLayer(LayerBase): - def __init__(self, name, inputs, eos_id, device=None): - super(EosIdLayer, self).__init__( - name, 'eos_id', 0, inputs=inputs, device=device) - config_assert(len(self.inputs) == 1, 'EosIdLayer must have 1 input') - self.set_layer_size(2) # boolean output - self.config.eos_id = eos_id - - -@config_layer('seqlastins') -class SequenceLastInstanceLayer(LayerBase): - def __init__(self, - name, - inputs, - trans_type='non-seq', - bias=False, - stride=-1, - **xargs): - super(SequenceLastInstanceLayer, self).__init__( - name, 'seqlastins', 0, inputs=inputs, **xargs) - config_assert( - len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input') - if trans_type == 'seq': - config_assert(stride == -1, 'subseq does not support stride window') - self.config.trans_type = trans_type - self.config.seq_pool_stride = stride - self.set_layer_size(self.get_input_layer(0).size) - self.create_bias_parameter(bias, self.config.size) - - -@config_layer('seqfirstins') -class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): - def __init__(self, - name, - inputs, - trans_type='non-seq', - bias=False, - stride=-1, - **xargs): - super(SequenceFirstInstanceLayer, self).__init__( - name, - inputs=inputs, - trans_type=trans_type, - bias=bias, - stride=stride, - **xargs) - self.config.select_first = True - - -@config_layer('seqconcat') -class SequenceConcatLayer(LayerBase): - def __init__(self, name, inputs, bias=False, **xargs): - super(SequenceConcatLayer, self).__init__( - name, 'seqconcat', 0, inputs=inputs, **xargs) - config_assert( - len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs') - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - self.set_layer_size(input_layer.size) - self.create_bias_parameter(bias, self.config.size) - - -@config_layer('seqreshape') -class SequenceReshapeLayer(LayerBase): - def __init__(self, name, size, inputs, bias=False, **xargs): - super(SequenceReshapeLayer, self).__init__( - name, 'seqreshape', size, inputs=inputs, **xargs) - config_assert( - len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs') - self.set_layer_size(size) - self.create_bias_parameter(bias, size) - - -@config_layer('subseq') -class SubSequenceLayer(LayerBase): - def __init__(self, name, inputs, bias=False, **xargs): - super(SubSequenceLayer, self).__init__( - name, 'subseq', 0, inputs=inputs, **xargs) - config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs') - input_layer0 = self.get_input_layer(0) - size = input_layer0.size - self.set_layer_size(size) - self.create_bias_parameter(bias, size) - - -@config_layer('seq_slice') -class SeqSliceLayer(LayerBase): - def __init__(self, name, inputs, starts, ends, bias=False, **xargs): - if isinstance(inputs, list): - assert len(inputs) == 1, ('the first input of sequence slice layer ' - 'is a single sequence input.') - else: - inputs = [inputs] - - if starts is not None: - if isinstance(starts, list): - assert len(starts) == 1, ( - 'the start indices for sequence slice layer cannot ' - 'be a list having more than one element.') - starts = starts[0] - inputs.append(starts) - - if ends is not None: - if isinstance(ends, list): - assert len(ends) == 1, ( - 'the end indices for sequence slice layer cannot ' - 'be a list having more than one element.') - ends = ends[0] - inputs.append(ends) - assert len(inputs) >= 2, ( - 'the sequence slice layer has at least two inputs.') - - super(SeqSliceLayer, self).__init__( - name, 'seq_slice', 0, inputs=inputs, **xargs) - - input_layer0 = self.get_input_layer(0) - size = input_layer0.size - self.set_layer_size(size) - - if len(inputs) == 3: - assert ( - self.get_input_layer(1).size == self.get_input_layer(2).size), ( - 'If start and end indices are both given to' - 'sequence slice layer, they should have the same width.') - elif len(inputs) == 2: - self.config.select_first = (starts is not None) - - -@config_layer('sub_nested_seq') -class SubNestedSequenceLayer(LayerBase): - def __init__(self, name, inputs, selected_indices, bias=False, **xargs): - if isinstance(inputs, list): - assert len(inputs) == 1, ('the first input of sub_nested_seq ' - 'layer is a single nested sequence.') - inputs = inputs[0] - if isinstance(selected_indices, list): - assert len(selected_indices) == 1, ( - 'the second input of ' - 'sub_nested_seq layer is a single layer which is a ' - 'set of selected indices.') - selected_indices = selected_indices[0] - - super(SubNestedSequenceLayer, self).__init__( - name, - 'sub_nested_seq', - 0, - inputs=[inputs, selected_indices], - **xargs) - input_layer0 = self.get_input_layer(0) - size = input_layer0.size - self.set_layer_size(size) - - -@config_layer('dot_prod') -class DotProdLayer(LayerBase): - def __init__(self, name, inputs, device=None): - super(DotProdLayer, self).__init__( - name, 'dot_prod', 0, inputs, device=device) - config_assert(len(inputs) == 2, 'DotProdLayer must have 2 inputs.') - config_assert( - self.get_input_layer(0).size == self.get_input_layer(1).size, - "Two inputs should have the same size.") - self.set_layer_size(1) - - -@config_layer('out_prod') -class OuterProdLayer(LayerBase): - def __init__(self, name, inputs, device=None): - super(OuterProdLayer, self).__init__( - name, 'out_prod', 0, inputs=inputs, device=device) - config_assert(len(inputs) == 2, 'OuterProdLayer must have 2 inputs') - input_layer0 = self.get_input_layer(0) - input_layer1 = self.get_input_layer(1) - self.set_layer_size(input_layer0.size * input_layer1.size) - - -@config_layer('power') -class PowerLayer(LayerBase): - def __init__(self, name, inputs, device=None): - super(PowerLayer, self).__init__( - name, 'power', 0, inputs=inputs, device=device) - config_assert(len(inputs) == 2, 'PowerLayer must have 2 inputs') - input_layer1 = self.get_input_layer(1) - self.set_layer_size(input_layer1.size) - input_layer0 = self.get_input_layer(0) - config_assert(1 == input_layer0.size, - 'The left input is the exponent and should be of size 1') - - -@config_layer('slope_intercept') -class SlopeInterceptLayer(LayerBase): - def __init__(self, name, inputs, slope=1.0, intercept=0.0, device=None): - super(SlopeInterceptLayer, self).__init__( - name, 'slope_intercept', 0, inputs=inputs, device=device) - self.config.slope = slope - self.config.intercept = intercept - config_assert(len(inputs) == 1, 'SlopeInterceptLayer must have 1 input') - input_layer0 = self.get_input_layer(0) - self.set_layer_size(input_layer0.size) - - -@config_layer('scaling') -class ScalingLayer(LayerBase): - def __init__(self, name, inputs, device=None): - super(ScalingLayer, self).__init__( - name, 'scaling', 0, inputs=inputs, device=device) - config_assert(len(inputs) == 2, 'ScalingLayer must have 2 inputs') - input_layer1 = self.get_input_layer(1) - self.set_layer_size(input_layer1.size) - input_layer0 = self.get_input_layer(0) - config_assert(1 == input_layer0.size, - 'The left input should be of size 1') - - -@config_layer('conv_shift') -class ConvShiftLayer(LayerBase): - def __init__(self, name, inputs, device=None): - super(ConvShiftLayer, self).__init__( - name, 'conv_shift', 0, inputs=inputs, device=device) - config_assert(len(inputs) == 2, 'ConvShiftLayer must have 2 inputs') - input_layer0 = self.get_input_layer(0) - self.set_layer_size(input_layer0.size) - - -@config_layer('convex_comb') -class ConvexCombinationLayer(LayerBase): - def __init__(self, name, size, inputs, device=None): - super(ConvexCombinationLayer, self).__init__( - name, 'convex_comb', size, inputs=inputs, device=device) - config_assert( - len(self.inputs) == 2, 'ConvexCombinationLayer must have 2 inputs') - config_assert( - size * self.get_input_layer(0).size == self.get_input_layer(1).size, - 'Wrong input size for ConvexCombinationLayer') - self.set_layer_size(size) - - -@config_layer('interpolation') -class InterpolationLayer(LayerBase): - def __init__(self, name, inputs, device=None): - super(InterpolationLayer, self).__init__( - name, 'interpolation', 0, inputs=inputs, device=device) - config_assert( - len(self.inputs) == 3, 'InterpolationLayer must have 3 inputs') - input_layer0 = self.get_input_layer(0) - input_layer1 = self.get_input_layer(1) - input_layer2 = self.get_input_layer(2) - self.set_layer_size(input_layer1.size) - config_assert(input_layer0.size == 1, 'weight should be of size 1') - config_assert(input_layer1.size == input_layer2.size, - 'the two vector inputs should be of the same size') - - -@config_layer('bilinear_interp') -class BilinearInterpLayer(LayerBase): - def __init__(self, name, inputs, **xargs): - super(BilinearInterpLayer, self).__init__( - name, 'bilinear_interp', 0, inputs=inputs, **xargs) - input_layer = self.get_input_layer(0) - conf = self.config.inputs[0].bilinear_interp_conf - parse_bilinear(self.inputs[0].bilinear_interp, input_layer.name, conf) - self.set_cnn_layer(name, conf.out_size_y, conf.out_size_x, - conf.image_conf.channels) - - -@config_layer('sum_to_one_norm') -class SumToOneNormLayer(LayerBase): - def __init__(self, name, inputs, device=None): - super(SumToOneNormLayer, self).__init__( - name, 'sum_to_one_norm', 0, inputs=inputs, device=device) - config_assert( - len(self.inputs) == 1, 'SumToOneNormLayer must have 1 input') - input_layer0 = self.get_input_layer(0) - self.set_layer_size(input_layer0.size) - - -@config_layer('row_l2_norm') -class RowL2NormLayer(LayerBase): - def __init__(self, name, inputs, **xargs): - super(RowL2NormLayer, self).__init__( - name, 'row_l2_norm', 0, inputs=inputs, **xargs) - config_assert(len(self.inputs) == 1, 'RowL2NormLayer must have 1 input') - input_layer = self.get_input_layer(0) - self.set_layer_size(input_layer.size) - - -@config_layer('cos') -class CosSimLayer(LayerBase): - def __init__(self, name, inputs, cos_scale=1, device=None): - super(CosSimLayer, self).__init__( - name, 'cos', 1, inputs=inputs, device=device) - config_assert( - len(self.inputs) == 2, - 'The CosSimLayer expects two and only two inputs.') - config_assert( - self.get_input_layer(0).size == self.get_input_layer(1).size, - 'The two inputs of CosSimLayer must have the same dimensionality.') - self.config.cos_scale = cos_scale - - -@config_layer('cos_vm') -class CosSimVecMatLayer(LayerBase): - def __init__(self, name, size, inputs, cos_scale=1.0, device=None): - super(CosSimVecMatLayer, self).__init__( - name, 'cos_vm', size, inputs=inputs, device=device) - self.config.cos_scale = cos_scale - config_assert( - len(self.inputs) == 2, 'The CosSimVecMatLayer must have 2 inputs.') - config_assert( - size * self.get_input_layer(0).size == self.get_input_layer(1).size, - 'Wrong input size for CosSimVecMatLayer.') - - -@config_layer('l2_distance') -class L2DistanceLayer(LayerBase): - def __init__(self, name, inputs, device=None): - super(L2DistanceLayer, self).__init__( - name, 'l2_distance', 1, inputs=inputs, device=device) - config_assert( - len(self.inputs) == 2, ('The L2DistanceLayer must have ' - 'and only have 2 inputs.')) - config_assert( - self.get_input_layer(0).size == self.get_input_layer(1).size, - ('Two inputs of the L2DistanceLayer must have ' - 'the same dimensionality.')) - - -@config_layer('sampling_id') -class SamplingIdLayer(LayerBase): - def __init__(self, name, inputs, device=None): - super(SamplingIdLayer, self).__init__( - name, 'sampling_id', 0, inputs=inputs, device=device) - config_assert( - len(self.inputs) == 1, 'SamplingIdLayer must have 1 input') - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - self.set_layer_size(input_layer.size) - - -# AverageLayer: "average" for each sample within a sequence. -# average_stratrgy: set to one of the following: -# 'average': plain average. -# 'sum': sum each sample instead of average (which is divide by sample_num). -# 'squarerootn': sum each sample, but divide by sqrt(sample_num). -@config_layer('average') -class AverageLayer(LayerBase): - def __init__(self, - name, - inputs, - average_strategy='average', - trans_type='non-seq', - bias=False, - stride=-1, - **xargs): - super(AverageLayer, self).__init__( - name, 'average', 0, inputs=inputs, **xargs) - self.config.average_strategy = average_strategy - if trans_type == 'seq': - config_assert(stride == -1, 'subseq does not support stride window') - self.config.trans_type = trans_type - self.config.seq_pool_stride = stride - config_assert(len(inputs) == 1, 'AverageLayer must have 1 input') - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - self.set_layer_size(input_layer.size) - self.create_bias_parameter(bias, self.config.size) - - -@config_layer('tensor') -class TensorLayer(LayerBase): - def __init__(self, name, size, inputs, bias=True, **xargs): - super(TensorLayer, self).__init__( - name, 'tensor', size, inputs=inputs, **xargs) - config_assert(len(self.inputs) == 2, 'TensorLayer must have 2 inputs') - config_assert(size > 0, 'size must be positive') - config_assert(inputs[1].parameter_name == None, - 'second parameter should be None.') - input_layer0 = self.get_input_layer(0) - input_layer1 = self.get_input_layer(1) - psize = size * input_layer0.size * input_layer1.size - dims = [input_layer0.size, input_layer1.size, size] - self.create_input_parameter(0, psize, dims) - self.create_bias_parameter(bias, size) - - -@config_layer('mixed') -class MixedLayer(LayerBase): - def __init__(self, name, inputs, size=0, bias=True, **xargs): - config_assert(inputs, 'inputs cannot be empty') - super(MixedLayer, self).__init__( - name, 'mixed', size, inputs=inputs, **xargs) - operator_input_index = [] - for operator in self.operators: - operator_conf = operator.operator_conf - for i in xrange(1, len(operator.input_layer_names)): - input_index = len(self.config.inputs) - operator_conf.input_indices.append(input_index) - input_config = Input(operator.input_layer_names[i]) - self.inputs.append(input_config) - layer_input = self.config.inputs.add() - layer_input.input_layer_name = input_config.input_layer_name - for input_index in operator_conf.input_indices: - input_layer = self.get_input_layer(input_index) - operator_conf.input_sizes.append(input_layer.size) - operator_input_index.append(input_index) - if self.config.size == 0: - size = operator.calc_output_size(operator_conf.input_sizes) - if size != 0: - self.set_layer_size(size) - else: - sz = operator.calc_output_size(operator_conf.input_sizes) - if sz != 0: - config_assert( - sz == self.config.size, - "different inputs have different size: %s vs. %s" % - (sz, self.config.size)) - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - input = self.inputs[input_index] - if input_index not in operator_input_index: - config_assert( - isinstance(input, Projection), - "input should be projection or operation") - if self.config.size == 0 and isinstance(input, Projection): - size = input.calc_output_size(input_layer) - if size != 0: - self.set_layer_size(size) - elif isinstance(input, Projection): - sz = input.calc_output_size(input_layer) - if sz != 0: - config_assert( - sz == self.config.size, - "different inputs have different size: %s vs. %s" % - (sz, self.config.size)) - config_assert(size != 0, "size is not set") - - for input_index in xrange(len(self.inputs)): - input = self.inputs[input_index] - if isinstance(input, Projection): - input_layer = self.get_input_layer(input_index) - input.proj_conf.input_size = input_layer.size - input.proj_conf.output_size = size - - input_config = self.config.inputs[input_index] - input_config.proj_conf.CopyFrom(input.proj_conf) - input_config.proj_conf.name = gen_parameter_name(name, - input_index) - psize = input.calc_parameter_size(input_layer.size, size) - dims = input.calc_parameter_dims(input_layer.size, size) - self.create_input_parameter(input_index, psize, dims) - - for operator in self.operators: - operator_conf = operator.operator_conf - operator_conf.output_size = self.config.size - operator.check_dims() - record_operator_conf = self.config.operator_confs.add() - record_operator_conf.CopyFrom(operator_conf) - - psize = self.config.size - if isinstance(self.inputs[0], ConvProjection): - self.config.shared_biases = True - psize = 0 - for input in self.inputs: - psize += input.calc_bias_size() - - if bias: - self.config.bias_size = psize - self.create_bias_parameter(bias, psize) - - -# like MixedLayer, but no bias parameter -@config_func -def ExpressionLayer(name, inputs, **xargs): - MixedLayer(name, inputs, bias=False, **xargs) - - -@config_layer('concat') -class ConcatenateLayer(LayerBase): - layer_type = 'concat' - - def __init__(self, name, inputs, bias=False, **xargs): - config_assert(inputs, 'inputs cannot be empty') - config_assert(not bias, 'ConcatenateLayer cannot support bias.') - use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) - if self.layer_type == "mkldnn_concat": - config_assert(use_mkldnn, "mkldnn_concat only support MKLDNN") - self.layer_type = 'mkldnn_concat' if use_mkldnn else 'concat' - super(ConcatenateLayer, self).__init__( - name, self.layer_type, 0, inputs=inputs, **xargs) - size = 0 - for input_index in xrange(len(self.inputs)): - assert self.get_input_layer(0).height == self.get_input_layer( - input_index).height - assert self.get_input_layer(0).width == self.get_input_layer( - input_index).width - assert self.get_input_layer(0).depth == self.get_input_layer( - input_index).depth - input_layer = self.get_input_layer(input_index) - input = self.inputs[input_index] - if self.config.size == 0: - size += input_layer.size - - self.set_layer_height_width(self.get_input_layer(0).height, \ - self.get_input_layer(0).width) - self.set_layer_depth(self.get_input_layer(0).depth) - self.set_layer_size(size) - - -@config_layer('mkldnn_concat') -class MKLDNNConcatLayer(ConcatenateLayer): - layer_type = 'mkldnn_concat' - - -# like concat layer, but each input layer was processed by a Projection. -@config_layer('concat2') -class ConcatenateLayer2(LayerBase): - def __init__(self, name, inputs, bias=False, **xargs): - config_assert(inputs, 'inputs cannot be empty') - super(ConcatenateLayer2, self).__init__( - name, 'concat2', 0, inputs=inputs, **xargs) - - if isinstance(self.inputs[0], ConvProjection): - for input_index in xrange(len(self.inputs) - 1): - input = self.inputs[input_index + 1] - config_assert( - isinstance(input, ConvProjection), - "The first input of ConcatenateLayer2 is ConvProjection, " - "the other inputs should also be ConvProjection.") - - size = 0 - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - input = self.inputs[input_index] - output_size = input.calc_output_size(input_layer) - config_assert(output_size != 0, "proj output size is not set") - size += output_size - - self.set_layer_size(size) - - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - input = self.inputs[input_index] - input.proj_conf.input_size = input_layer.size - input.proj_conf.output_size = input.calc_output_size(input_layer) - - input_config = self.config.inputs[input_index] - input_config.proj_conf.CopyFrom(input.proj_conf) - input_config.proj_conf.name = gen_parameter_name(name, input_index) - psize = input.calc_parameter_size(input.proj_conf.input_size, - input.proj_conf.output_size) - dims = input.calc_parameter_dims(input.proj_conf.input_size, - input.proj_conf.output_size) - self.create_input_parameter(input_index, psize, dims) - - psize = self.config.size - if isinstance(self.inputs[0], ConvProjection): - self.config.shared_biases = True - psize = 0 - for input in self.inputs: - psize += input.calc_bias_size() - - if bias: - self.config.bias_size = psize - self.create_bias_parameter(bias, psize) - - -@config_layer('recurrent') -class RecurrentLayer(LayerBase): - layer_type = 'recurrent' - - def __init__(self, name, inputs, reversed=False, bias=True, **xargs): - use_mkl_packed = bool( - int(g_command_config_args.get("use_mkl_packed", 0))) - self.layer_type = 'mkl_packed_recurrent' if use_mkl_packed else 'recurrent' - super(RecurrentLayer, self).__init__(name, self.layer_type, 0, inputs, - **xargs) - config_assert(len(self.inputs) == 1, 'RecurrentLayer must have 1 input') - input_layer = self.get_input_layer(0) - size = input_layer.size - self.set_layer_size(size) - self.config.reversed = reversed - dims = [size, size] - self.create_input_parameter(0, size * size, dims) - self.create_bias_parameter(bias, self.config.size) - - -@config_layer('lstmemory') -class LstmLayer(LayerBase): - def __init__(self, - name, - inputs, - reversed=False, - active_gate_type="sigmoid", - active_state_type="sigmoid", - bias=True, - **xargs): - super(LstmLayer, self).__init__(name, 'lstmemory', 0, inputs, **xargs) - config_assert(len(self.inputs) == 1, 'LstmLayer must have 1 input') - input_layer = self.get_input_layer(0) - #check input_layer.size is divided by 4 - config_assert(input_layer.size % 4 == 0, "size % 4 should be 0!") - size = input_layer.size / 4 - self.set_layer_size(size) - self.config.reversed = reversed - self.config.active_gate_type = active_gate_type - self.config.active_state_type = active_state_type - self.create_input_parameter(0, size * size * 4, [size, size, 4]) - #bias includes 3 kinds of peephole, 4 + 3 = 7 - self.create_bias_parameter(bias, size * 7) - - -@config_layer('lstm_step') -class LstmStepLayer(LayerBase): - def __init__(self, - name, - size, - inputs, - active_gate_type="sigmoid", - active_state_type="sigmoid", - bias=True, - **xargs): - super(LstmStepLayer, self).__init__(name, 'lstm_step', size, inputs, - **xargs) - config_assert(len(inputs) == 2, 'LstmStepLayer must have 2 inputs') - input_layer0 = self.get_input_layer(0) - input_layer1 = self.get_input_layer(1) - config_assert(input_layer0.size == 4 * size, - 'input_layer0.size != 4 * layer.size') - config_assert(input_layer1.size == size, - 'input_layer1.size != layer.size') - self.config.active_gate_type = active_gate_type - self.config.active_state_type = active_state_type - self.create_bias_parameter(bias, size * 3) - - -# get the specific output from the input layer. -@config_layer('get_output') -class GetOutputLayer(LayerBase): - def __init__(self, name, size, inputs): - super(GetOutputLayer, self).__init__(name, 'get_output', size, inputs) - config_assert( - len(self.inputs) == 1, 'GetOutputLayer must have 1 inputs') - inputs = self.inputs[0] - config_assert(inputs.input_layer_argument, - 'input_layer_argument cannot be empty') - - -@config_layer('mdlstmemory') -class MDLstmLayer(LayerBase): - def __init__(self, - name, - inputs, - directions=True, - active_gate_type="sigmoid", - active_state_type="sigmoid", - bias=True, - **xargs): - super(MDLstmLayer, self).__init__(name, 'mdlstmemory', 0, inputs, - **xargs) - config_assert(len(self.inputs) == 1, 'MDLstmLayer must have 1 input') - input_layer = self.get_input_layer(0) - dim_num = len(directions) - #check input_layer.size is divided by (3+dim_num) - config_assert(input_layer.size % (3 + dim_num) == 0, - "size % (dim_num) should be 0!") - size = input_layer.size / (3 + dim_num) - self.set_layer_size(size) - self.config.active_gate_type = active_gate_type - self.config.active_state_type = active_state_type - for i in xrange(len(directions)): - self.config.directions.append(int(directions[i])) - self.create_input_parameter(0, size * size * (3 + dim_num), - [size, size, 3 + dim_num]) - #bias includes 3 kinds of peephole, 3+dim_num+2+dim_num - self.create_bias_parameter(bias, size * (5 + 2 * dim_num)) - - -@config_layer('gated_recurrent') -class GatedRecurrentLayer(LayerBase): - def __init__(self, - name, - inputs, - reversed=False, - active_gate_type="sigmoid", - bias=True, - **xargs): - super(GatedRecurrentLayer, self).__init__(name, 'gated_recurrent', 0, - inputs, **xargs) - config_assert( - len(self.inputs) == 1, 'GatedRecurrentLayer must have 1 input') - input_layer = self.get_input_layer(0) - #check input_layer.size is divided by 3 - config_assert(input_layer.size % 3 == 0, "size % 3 should be 0!") - size = input_layer.size / 3 - self.set_layer_size(size) - self.config.reversed = reversed - self.config.active_gate_type = active_gate_type - self.create_input_parameter(0, size * size * 3, [size, size * 3]) - self.create_bias_parameter(bias, size * 3) - - -@config_layer('gru_step') -class GruStepLayer(LayerBase): - def __init__(self, - name, - size, - inputs, - active_gate_type="sigmoid", - bias=True, - **xargs): - super(GruStepLayer, self).__init__(name, 'gru_step', size, inputs, - **xargs) - config_assert(len(self.inputs) == 2, 'GruStepLayer must have 2 input') - input_layer0 = self.get_input_layer(0) - input_layer1 = self.get_input_layer(1) - config_assert(input_layer0.size == 3 * size, - 'input_layer0.size != 3 * layer.size') - config_assert(input_layer1.size == size, - 'input_layer1.size != layer.size') - self.config.active_gate_type = active_gate_type - self.create_input_parameter(0, size * size * 3, [size, size * 3]) - self.create_bias_parameter(bias, size * 3) - - -''' - A layer for calculating the cost of sequential conditional random field model. - Example: CRFLayer(name="crf_cost", size=label_num, - inputs=["output", "label", "weight"]) - where "weight" is optional, one weight for each sequence - @param coeff: weight of the layer -''' - - -@config_layer('crf') -class CRFLayer(LayerBase): - def __init__(self, name, size, inputs, coeff=1.0, device=None): - super(CRFLayer, self).__init__(name, 'crf', size, inputs, device=device) - config_assert(2 <= len(self.inputs) <= 3, - 'CRFLayer must have 2 or 3 inputs') - self.create_input_parameter(0, size * (size + 2), [size + 2, size]) - self.config.coeff = coeff - - -''' - A layer for calculating the decoding sequence of sequential conditional - random field model. - The decoding sequence is stored in output_.ids - If a second input is provided, it is treated as the ground-truth label, and - this layer will also calculate error, output_.value[i] is 1 for incorrect - decoding or 0 for correct decoding -''' - - -@config_layer('crf_decoding') -class CRFDecodingLayer(LayerBase): - def __init__(self, name, size, inputs, device=None): - super(CRFDecodingLayer, self).__init__( - name, 'crf_decoding', size, inputs, device=device) - config_assert( - len(self.inputs) <= 2, - 'CRFDecodingLayer cannot have more than 2 inputs') - self.create_input_parameter(0, size * (size + 2), [size + 2, size]) - - -@config_layer('ctc') -class CTCLayer(LayerBase): - def __init__(self, name, size, inputs, norm_by_times=False, device=None): - super(CTCLayer, self).__init__(name, 'ctc', size, inputs, device=device) - self.config.norm_by_times = norm_by_times - config_assert(len(self.inputs) == 2, 'CTCLayer must have 2 inputs') - - -@config_layer('kmax_seq_score') -class KmaxSeqScoreLayer(LayerBase): - def __init__(self, name, inputs, beam_size, **xargs): - super(KmaxSeqScoreLayer, self).__init__( - name, 'kmax_seq_score', 0, inputs=inputs, **xargs) - config_assert( - len(self.inputs) == 1, 'KmaxSeqScoreLayer has only one input.') - self.config.beam_size = beam_size - - -@config_layer('warp_ctc') -class WarpCTCLayer(LayerBase): - def __init__(self, - name, - size, - inputs, - blank=0, - norm_by_times=False, - device=None): - super(WarpCTCLayer, self).__init__( - name, 'warp_ctc', size=size, inputs=inputs, device=device) - self.config.blank = blank - self.config.norm_by_times = norm_by_times - config_assert(len(self.inputs) == 2, 'WarpCTCLayer must have 2 inputs') - input_layer = self.get_input_layer(0) - config_assert( - (input_layer.active_type == '' or - input_layer.active_type == 'linear'), - "Expecting the active_type of input layer to be linear or null") - - -@config_layer('recurrent_layer_group') -class RecurrentLayerGroup(LayerBase): - def __init__(self, name, device=None): - super(RecurrentLayerGroup, self).__init__( - name, 'recurrent_layer_group', 0, inputs=[], device=device) - - -@config_layer('switch_order') -class SwitchOrderLayer(LayerBase): - def __init__(self, name, inputs, reshape, **xargs): - super(SwitchOrderLayer, self).__init__( - name, 'switch_order', 0, inputs=inputs, **xargs) - self.config.reshape_conf.height_axis.extend(reshape['height']) - self.config.reshape_conf.width_axis.extend(reshape['width']) - input_layer = self.get_input_layer(0) - if reshape is None: - self.set_layer_size(input_layer.size) - else: - in_h = input_layer.height - in_w = input_layer.width - out_dims = None - if input_layer.has_depth(): - in_d = input_layer.depth - in_c = input_layer.size / in_h / in_w / in_d - # batch_size, depth, height, width, channel - out_dims = [0, in_d, in_h, in_w, in_c] - else: - in_c = input_layer.size / in_h / in_w - # batch_size, height, width, channel - out_dims = [0, in_h, in_w, in_c] - # Because (reshape['width'][0] > 0) always be true. - # So out_dims[0] won't be used. - size = reduce(lambda x, y: x * y, out_dims[reshape['width'][0]:]) - self.set_layer_size(size) - - -@config_layer('scale_sub_region') -class ScaleSubRegionLayer(LayerBase): - def __init__(self, name, inputs, value, **xargs): - super(ScaleSubRegionLayer, self).__init__( - name, 'scale_sub_region', 0, inputs=inputs, **xargs) - scale_sub_region_conf = self.config.inputs[0].scale_sub_region_conf - scale_sub_region_conf.value = value - - # get channel, width and height from input_0 layer - input_layer = self.get_input_layer(0) - image_conf = scale_sub_region_conf.image_conf - image_conf.img_size = input_layer.width - image_conf.img_size_y = input_layer.height - image_conf.channels = input_layer.size / (input_layer.width * - input_layer.height) - self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size, - image_conf.channels) - - -@config_layer('factorization_machine') -class FactorizationMachineLayer(LayerBase): - def __init__(self, name, inputs, factor_size, **xargs): - super(FactorizationMachineLayer, self).__init__( - name, 'factorization_machine', size=1, inputs=inputs, **xargs) - config_assert( - len(self.inputs) == 1, - 'factorization machine layer must have one and only one input.') - self.config.factor_size = factor_size - input_layer = self.get_input_layer(0) - psize = input_layer.size * factor_size - dims = [input_layer.size, factor_size] - self.create_input_parameter(0, psize, dims) - - -# Deprecated, use a new layer specific class instead -@config_func -def Layer(name, type, **xargs): - layers = {} - layers.update(g_cost_map) - layers.update(g_layer_type_map) - layer_func = layers.get(type) - config_assert(layer_func, "layer type '%s' not supported." % type) - return layer_func(name, **xargs) - - -@config_func -def ParameterHook(type, **kwargs): - if type == 'pruning': - hook = ParameterUpdaterHookConfig() - hook.type = type - sparsity_ratio = kwargs.get('sparsity_ratio', None) - if sparsity_ratio is not None: - hook.sparsity_ratio = sparsity_ratio - return hook - elif type == 'dpruning': - hook = ParameterUpdaterHookConfig() - hook.type = type - return hook - else: - return None - - -@config_func -def Parameter(name, - size, - device, - dims, - learning_rate=None, - momentum=None, - decay_rate=None, - decay_rate_l1=None, - initial_mean=None, - initial_std=None, - initial_strategy=None, - initial_smart=None, - num_batches_regularization=None, - sparse_remote_update=None, - sparse_update=None, - gradient_clipping_threshold=None, - sparse=None, - format=None, - need_compact=None, - is_static=None, - is_shared=None, - update_hooks=None, - initializer=None): - - config_assert(name not in g_parameter_map, - 'Duplicated parameter name: ' + name) - - para = g_config.model_config.parameters.add() - para.name = name - para.size = size - if device is not None: - para.device = int(device) - para.dims.extend(dims) - - if learning_rate is not None: - para.learning_rate = float(learning_rate) - - momentum = default(momentum, g_default_momentum) - if momentum is not None: - para.momentum = float(momentum) - - config_assert(not momentum or not decay_rate_l1, - "momentum and decay_rate_l1 cannot both be non-zero") - - decay_rate = default(decay_rate, g_default_decay_rate) - if decay_rate is not None: - para.decay_rate = decay_rate - - if decay_rate_l1 is not None: - para.decay_rate_l1 = decay_rate_l1 - para.initial_std = default(initial_std, g_default_initial_std) - para.initial_mean = default(initial_mean, g_default_initial_mean) - - num_batches_regularization = default(num_batches_regularization, - g_default_num_batches_regularization) - if num_batches_regularization is not None: - para.num_batches_regularization = int(num_batches_regularization) - - if sparse_remote_update is not None: - para.sparse_remote_update = sparse_remote_update - if sparse_remote_update: - g_config.opt_config.use_sparse_remote_updater = True - if sparse_update is not None: - para.sparse_update = sparse_update - gradient_clipping_threshold = default(gradient_clipping_threshold, - g_default_gradient_clipping_threshold) - if gradient_clipping_threshold is not None: - para.gradient_clipping_threshold = gradient_clipping_threshold - para.initial_strategy = default(initial_strategy, - g_default_initial_strategy) - para.initial_smart = default(initial_smart, g_default_initial_smart) - if para.initial_smart: - para.initial_mean = 0. - if len(para.dims) != 0: - para.initial_std = 1. / math.sqrt(para.dims[0]) - else: - print( - "Use initial_smart, but dims not set. Initial_smart may not be used in this layer" - ) - traceback.print_exc() - para.initial_std = 1. / math.sqrt(para.size) - if g_default_compact_func is not None: - sparse, format, need_compact = g_default_compact_func(para.name) - - if sparse is not None: - para.is_sparse = sparse - if format is not None: - para.format = format - if need_compact is not None: - para.need_compact = need_compact - if is_static is not None: - para.is_static = is_static - config_assert(not para.sparse_remote_update or not para.is_static, - "sparse_remote_update and is_static cannot both be true") - if is_shared is not None: - para.is_shared = is_shared - - update_hooks = default(update_hooks, g_default_update_hooks) - - if update_hooks is not None: - if hasattr(update_hooks, '__call__'): - update_hooks = update_hooks() - - if isinstance(update_hooks, list): - for hook in update_hooks: - para.update_hooks.extend([hook]) - else: - para.update_hooks.extend([update_hooks]) - - g_parameter_map[name] = para - if initializer is not None: - config_assert( - callable(initializer), - "parameter initializer should be a callable object") - g_parameter_initializer_map[name] = initializer - - -@config_func -def default_initial_std(val): - global g_default_initial_std - g_default_initial_std = val - - -@config_func -def default_initial_mean(val): - global g_default_initial_mean - g_default_initial_mean = val - - -@config_func -def default_initial_strategy(val): - global g_default_initial_strategy - g_default_initial_strategy = val - - -@config_func -def default_initial_smart(val): - global g_default_initial_smart - g_default_initial_smart = val - - -@config_func -def default_momentum(val): - global g_default_momentum - g_default_momentum = val - - -@config_func -def default_decay_rate(val): - global g_default_decay_rate - g_default_decay_rate = val - - -@config_func -def default_num_batches_regularization(val): - global g_default_num_batches_regularization - g_default_num_batches_regularization = val - - -@config_func -def default_gradient_clipping_threshold(val): - global g_default_gradient_clipping_threshold - g_default_gradient_clipping_threshold = val - - -@config_func -def default_device(val): - global g_default_device - g_default_device = val - - -@config_func -def default_update_hooks(val): - global g_default_update_hooks - g_default_update_hooks = val - - -@config_func -def default_compact_func(val): - global g_default_compact_func - g_default_compact_func = val - - -def make_importer(config_dir, config_args): - def Import(config_file, local_args={}): - if not config_file.startswith('/'): - config_file = config_dir + '/' + config_file - g_config.config_files.append(config_file) - execfile(config_file, - make_config_environment(config_file, config_args), local_args) - - return Import - - -DEFAULT_SETTING = dict( - batch_size=None, - mini_batch_size=None, - algorithm='async_sgd', - async_lagged_grad_discard_ratio=1.5, - learning_method='momentum', - gradient_clipping_threshold=None, - num_batches_per_send_parameter=None, - num_batches_per_get_parameter=None, - center_parameter_update_method=None, - learning_rate=1., - learning_rate_decay_a=0., - learning_rate_decay_b=0., - learning_rate_schedule='poly', - learning_rate_args='', - l1weight=0.1, - l2weight=0., - l2weight_zero_iter=0, - c1=0.0001, - backoff=0.5, - owlqn_steps=10, - max_backoff=5, - average_window=0, - do_average_in_cpu=False, - max_average_window=None, - ada_epsilon=1e-6, - ada_rou=0.95, - delta_add_rate=1.0, - shrink_parameter_value=0, - adam_beta1=0.9, - adam_beta2=0.999, - adam_epsilon=1e-8, ) - -settings = copy.deepcopy(DEFAULT_SETTING) - -settings_deprecated = dict(usage_ratio=1., ) - -trainer_settings = dict( - save_dir="./output/model", - init_model_path=None, - start_pass=0, ) - - -@config_func -def Settings(**args): - for k, v in args.iteritems(): - if k == "usage_ratio": - logger.warning( - "Deprecated: define usage_ratio in DataConfig instead") - if g_config.HasField("data_config"): - g_config.data_config.__setattr__(k, v) - settings_deprecated[k] = v - continue - elif k in settings: - settings[k] = v - elif k in trainer_settings: - trainer_settings[k] = v - else: - logger.fatal('Unkown setting: %s' % k) - - -@config_func -def cluster_config(**args): - pass - - -@config_func -def EnableSubmodelSuffix(flag=True): - """ - If enabled, the layer and evaluator names in submodel will be automatically - appended with @submodel_name - """ - global g_add_submodel_suffix - g_add_submodel_suffix = flag - - -def make_config_environment(config_file, config_args): - def make_setter(k): - def setter(v): - logger.fatal("Obsolete: use Settings(%s=%s, ...) instead" % (k, v)) - - return setter - - funcs = {} - funcs.update(g_config_funcs) - - for k in settings.iterkeys(): - funcs[k] = make_setter(k) - for k in settings_deprecated.iterkeys(): - funcs[k] = make_setter(k) - config_dir = os.path.dirname(config_file) - if not config_dir: - config_dir = '.' - - funcs.update( - Import=make_importer(config_dir, config_args), - get_config_arg=make_get_config_arg(config_args), ) - - funcs.update(g_extended_config_funcs) - - return funcs - - -def make_get_config_arg(config_args): - def get_config_arg(name, type, default=None): - if type == bool: - s = config_args.get(name) - if not s: - return default - if s == 'True' or s == '1' or s == 'true': - return True - if s == 'False' or s == '0' or s == 'false': - return False - raise ValueError('Value of config_arg %s is not boolean' % name) - else: - return type(config_args.get(name, default)) - - return get_config_arg - - -def importlib(name): - __import__(name) - return sys.modules[name] - - -def find_caller(): - stack = traceback.extract_stack() - for s in stack[-4::-1]: - if not s[0].endswith('config_parser.py'): - return s[0], s[1], s[2] - return "(unknown file)", 0, "(unknown function)" - - -def my_fatal(s): - logger.critical(s) - raise Exception() - - -_parse_config_hooks = set() - - -def register_parse_config_hook(f): - """ - Register a hook function for parse_config. parse_config will invoke the hook - at the beginning of parse. This make it possible to reset global state for - for constructing the model. - """ - _parse_config_hooks.add(f) - - -def update_g_config(): - ''' - Update g_config after execute config_file or config_functions. - ''' - for k, v in settings.iteritems(): - if v is None: - continue - g_config.opt_config.__setattr__(k, v) - - for k, v in trainer_settings.iteritems(): - if v is None: - continue - g_config.__setattr__(k, v) - - for name in g_config.model_config.input_layer_names: - assert name in g_layer_map, \ - 'input name "%s" does not correspond to a layer name' % name - assert (g_layer_map[name].type == "data" or g_layer_map[name].type == "data_trim"), \ - 'The type of input layer "%s" is not "data"' % name - for name in g_config.model_config.output_layer_names: - assert name in g_layer_map, \ - 'input name "%s" does not correspond to a layer name' % name - return g_config - - -def begin_parse(): - init_config_environment() - for hook in _parse_config_hooks: - hook() - - logger.findCaller = find_caller - logger.fatal = my_fatal - - g_config.model_config.type = "nn" - - global g_current_submodel, g_root_submodel - g_root_submodel = g_config.model_config.sub_models.add() - g_root_submodel.name = 'root' - g_root_submodel.is_recurrent_layer_group = False - g_current_submodel = g_root_submodel - - -def parse_config(trainer_config, config_arg_str): - ''' - @param config_arg_str: a string of the form var1=val1,var2=val2. It will be - passed to config script as a dictionary CONFIG_ARGS - ''' - - begin_parse() - config_args = {} - - if config_arg_str: - config_args = dict([f.split('=') for f in config_arg_str.split(',')]) - - global g_command_config_args - g_command_config_args.update(config_args) - - extension_module_name = config_args.get('extension_module_name') - if extension_module_name: - global g_extended_config_funcs - extension_module = importlib(extension_module_name) - g_extended_config_funcs = extension_module.get_config_funcs(g_config) - - if hasattr(trainer_config, '__call__'): - trainer_config.func_globals.update( - make_config_environment("", config_args)) - trainer_config() - else: - execfile(trainer_config, - make_config_environment(trainer_config, config_args)) - - return update_g_config() - - -def parse_config_and_serialize(trainer_config, config_arg_str): - try: - config = parse_config(trainer_config, config_arg_str) - #logger.info(config) - return config.SerializeToString() - except: - traceback.print_exc() - raise - - -if __name__ == '__main__': - try: - config = parse_config(sys.argv[1], '') - config.SerializeToString() - __real_print__(str(config)) - except: - traceback.print_exc() - raise diff --git a/python/paddle/trainer/config_parser_extension.py b/python/paddle/trainer/config_parser_extension.py deleted file mode 100644 index b9e0f3eb13dd3f54e26a566f4ae937940134fa03..0000000000000000000000000000000000000000 --- a/python/paddle/trainer/config_parser_extension.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.proto.DataConfig_pb2 import DataConfig - -g_config = None - - -def SimpleData(files=None, - feat_dim=None, - context_len=None, - buffer_capacity=None): - - data_config = DataConfig() - data_config.type = 'simple' - data_config.files = files - data_config.feat_dim = feat_dim - if context_len is not None: - data_config.context_len = context_len - if buffer_capacity: - data_config.buffer_capacity = buffer_capacity - return data_config - - -def get_config_funcs(trainer_config): - global g_config - g_config = trainer_config - return dict(SimpleData=SimpleData) diff --git a/python/paddle/trainer/recurrent_units.py b/python/paddle/trainer/recurrent_units.py deleted file mode 100644 index ef92107a1093d2ec2b2a41677e964fdaa60ac829..0000000000000000000000000000000000000000 --- a/python/paddle/trainer/recurrent_units.py +++ /dev/null @@ -1,357 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# recurrent_units.py -# Version 2.0 -# -# Some recurrent units can be used in recurrent layer group, -# to use these units, import this module in your config_file: -# import trainer.recurrent_units -# -# The modules in this file are DEPRECATED. -# If you would like to use lstm/gru -# please use the functions defined in paddle.trainer_config_helpers. - -from paddle.trainer.config_parser import * - - -# long short term memory, can be used in recurrent machine -# *inputs* must be a list of Projections, for example: -# inputs = [FullMatrixProjection("input_layer_name")], -# *para_prefix* defines parameter names, if the *para_prefix* of -# two LstmRecurrentUnit is same, they share same parameters -# *out_memory* can be defined outside if it's used outside -def LstmRecurrentUnit(name, - size, - active_type, - state_active_type, - gate_active_type, - inputs, - para_prefix=None, - error_clipping_threshold=0, - out_memory=None): - - if para_prefix is None: - para_prefix = name - if out_memory is None: - out_memory = Memory(name=name, size=size) - - state_memory = Memory(name=name + "_" + "state", size=size) - - Layer( - name=name + "_" + "input_recurrent", - type="mixed", - size=size * 4, #(input_s, input_gate, forget_gate, output_gate) - error_clipping_threshold=error_clipping_threshold, - bias=Bias( - initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"), - inputs=inputs + [ - FullMatrixProjection( - out_memory, parameter_name=para_prefix + "_input_recurrent.w"), - ], ) - LstmStepLayer( - name=name, - size=size, - bias=Bias(parameter_name=para_prefix + "_check.b"), - inputs=[name + "_" + "input_recurrent", state_memory], - active_type=active_type, - active_gate_type=gate_active_type, - active_state_type=state_active_type, ) - GetOutputLayer( - name=name + "_" + "state", - size=size, - inputs=Input( - name, input_layer_argument="state"), ) - - -def LstmRecurrentUnitNaive(name, - size, - active_type, - state_active_type, - gate_active_type, - inputs, - para_prefix=None, - error_clipping_threshold=0, - out_memory=None): - - if para_prefix is None: - para_prefix = name - if out_memory is None: - out_memory = Memory(name=name, size=size) - - state_memory = Memory(name=name + "_" + "state", size=size) - - Layer( - name=name + "_" + "input_recurrent", - type="mixed", - size=size * 4, #(input_s, input_gate, forget_gate, output_gate) - error_clipping_threshold=error_clipping_threshold, - bias=Bias( - initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"), - inputs=inputs + [ - FullMatrixProjection( - out_memory, parameter_name=para_prefix + "_input_recurrent.w"), - ], ) - ExpressionLayer( - name=name + "_" + "input_s", - size=size, - active_type=active_type, - inputs=[ - IdentityOffsetProjection( - name + "_" + "input_recurrent", offset=0) - ], ) - ExpressionLayer( - name=name + "_" + "input_gate", - active_type=gate_active_type, - inputs=[ - IdentityOffsetProjection( - name + "_" + "input_recurrent", offset=size), DotMulProjection( - state_memory, parameter_name=para_prefix + "_input_check.w") - ], ) - ExpressionLayer( - name=name + "_" + "forget_gate", - active_type=gate_active_type, - inputs=[ - IdentityOffsetProjection( - name + "_" + "input_recurrent", offset=size * 2), - DotMulProjection( - state_memory, parameter_name=para_prefix + "_forget_check.w") - ], ) - ExpressionLayer( - name=name + "_" + "state", - inputs=[ - DotMulOperator([name + "_" + "input_s", name + "_" + "input_gate"]), - DotMulOperator([state_memory, name + "_" + "forget_gate"]), - ], ) - ExpressionLayer( - name=name + "_" + "output_gate", - active_type=gate_active_type, - inputs=[ - IdentityOffsetProjection( - name + "_" + "input_recurrent", offset=size * 3), - DotMulProjection( - name + "_" + "state", - parameter_name=para_prefix + "_output_check.w") - ], ) - ExpressionLayer( - name=name + "_" + "state_atv", - active_type=state_active_type, - inputs=IdentityProjection(name + "_" + "state"), ) - ExpressionLayer( - name=name, - inputs=DotMulOperator( - [name + "_" + "state_atv", name + "_" + "output_gate"]), ) - - -# like LstmRecurrentUnit, but it's a layer group. -# it is equivalent to LstmLayer -def LstmRecurrentLayerGroup(name, - size, - active_type, - state_active_type, - gate_active_type, - inputs, - para_prefix=None, - error_clipping_threshold=0, - seq_reversed=False): - - input_layer_name = name + "_" + "transform_input" - Layer( - name=input_layer_name, - type="mixed", - size=size * 4, - active_type="", - bias=False, - inputs=inputs, ) - - RecurrentLayerGroupBegin( - name + "_layer_group", - in_links=[input_layer_name], - out_links=[name], - seq_reversed=seq_reversed) - - LstmRecurrentUnit( - name=name, - size=size, - active_type=active_type, - state_active_type=state_active_type, - gate_active_type=gate_active_type, - inputs=[IdentityProjection(input_layer_name)], - para_prefix=para_prefix, - error_clipping_threshold=error_clipping_threshold, ) - - RecurrentLayerGroupEnd(name + "_layer_group") - - -# gated recurrent unit, can be used in recurrent machine -# *inputs* should be a list of Projections, for example: -# inputs = [FullMatrixProjection("input_layer_name")], -# *para_prefix* defines parameter names, if the *para_prefix* of -# two GatedRecurrentUnit is same, they share same parameters -# *out_memory* can be defined outside if it's used outside - - -def GatedRecurrentUnit(name, - size, - active_type, - gate_active_type, - inputs, - para_prefix=None, - error_clipping_threshold=0, - out_memory=None): - if type_of(inputs) == str: #only used by GatedRecurrentLayerGroup - input_layer_name = inputs - else: - input_layer_name = name + "_" + "transform_input" - Layer( - name=input_layer_name, - type="mixed", - size=size * 3, - active_type="", - bias=False, - inputs=inputs, ) - - if para_prefix is None: - para_prefix = name - if out_memory is None: - out_memory = Memory(name=name, size=size) - - GruStepLayer( - name=name, - size=size, - bias=Bias(parameter_name=para_prefix + "_gate.b"), - inputs=[ - input_layer_name, Input( - out_memory, parameter_name=para_prefix + "_gate.w") - ], - active_type=active_type, - active_gate_type=gate_active_type, ) - - -def GatedRecurrentUnitNaive(name, - size, - active_type, - gate_active_type, - inputs, - para_prefix=None, - error_clipping_threshold=0, - out_memory=None): - - if type_of(inputs) == str: #only used by GatedRecurrentLayerGroup - input_layer_name = inputs - else: - input_layer_name = name + "_" + "transform_input" - Layer( - name=input_layer_name, - type="mixed", - size=size * 3, - active_type="", - bias=False, - inputs=inputs, ) - - if para_prefix is None: - para_prefix = name - if out_memory is None: - out_memory = Memory(name=name, size=size) - - Layer( - name=name + "_" + "update_gate", - type="mixed", - size=size, - active_type=gate_active_type, - error_clipping_threshold=error_clipping_threshold, - bias=Bias( - initial_std=0, parameter_name=para_prefix + "_update_gate.b"), - inputs=[ - IdentityOffsetProjection( - input_layer_name, offset=0), FullMatrixProjection( - out_memory, parameter_name=para_prefix + "_update_gate.w") - ], ) - Layer( - name=name + "_" + "reset_gate", - type="mixed", - size=size, - active_type=gate_active_type, - error_clipping_threshold=error_clipping_threshold, - bias=Bias( - initial_std=0, parameter_name=para_prefix + "_reset_gate.b"), - inputs=[ - IdentityOffsetProjection( - input_layer_name, offset=size), FullMatrixProjection( - out_memory, parameter_name=para_prefix + "_reset_gate.w") - ], ) - ExpressionLayer( - name=name + "_" + "reset_output", - inputs=DotMulOperator([out_memory, name + "_" + "reset_gate"]), ) - Layer( - name=name + "_" + "output_candidate", - type="mixed", - size=size, - active_type=active_type, - error_clipping_threshold=error_clipping_threshold, - bias=Bias( - initial_std=0, parameter_name=para_prefix + "_output_candidate.b"), - inputs=[ - IdentityOffsetProjection( - input_layer_name, offset=size * 2), FullMatrixProjection( - name + "_" + "reset_output", - parameter_name=para_prefix + "_output_candidate.w") - ], ) - ExpressionLayer( #element-wise interpolation - name=name, - inputs=[ - IdentityProjection(out_memory), - DotMulOperator( - [out_memory, name + "_" + "update_gate"], scale=-1.0), - DotMulOperator( - [name + "_" + "output_candidate", name + "_" + "update_gate"]), - ], ) - - -# like GatedRecurrentUnit, but it's a layer group. -# it is equivalent to GatedRecurrentLayer. -def GatedRecurrentLayerGroup(name, - size, - active_type, - gate_active_type, - inputs, - para_prefix=None, - error_clipping_threshold=0, - seq_reversed=False): - - input_layer_name = name + "_" + "transform_input" - Layer( - name=input_layer_name, - type="mixed", - size=size * 3, - active_type="", - bias=False, - inputs=inputs, ) - - RecurrentLayerGroupBegin( - name + "_layer_group", - in_links=[input_layer_name], - out_links=[name], - seq_reversed=seq_reversed) - - GatedRecurrentUnit( - name=name, - size=size, - active_type=active_type, - gate_active_type=gate_active_type, - inputs=input_layer_name, #transform outside - para_prefix=para_prefix, - error_clipping_threshold=error_clipping_threshold, ) - - RecurrentLayerGroupEnd(name + "_layer_group") diff --git a/python/paddle/trainer_config_helpers/__init__.py b/python/paddle/trainer_config_helpers/__init__.py deleted file mode 100644 index 13155ebddbb49c502d9d4110704ab09f49825be2..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from activations import * -from data_sources import * -from poolings import * -from evaluators import * -from layers import * -from networks import * -from optimizers import * -from attrs import * -from config_parser_utils import * -# This will enable operator overload for LayerOutput -import layer_math diff --git a/python/paddle/trainer_config_helpers/activations.py b/python/paddle/trainer_config_helpers/activations.py deleted file mode 100644 index 3683968262266a2d654d2480b828173bc761152b..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/activations.py +++ /dev/null @@ -1,263 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = [ - "TanhActivation", "SigmoidActivation", "SoftmaxActivation", - "IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation', - 'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation", - "STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation", - "LogActivation", "SqrtActivation", "ReciprocalActivation", - "SoftSignActivation" -] - - -class BaseActivation(object): - """ - A mark for activation class. - Each activation inherit BaseActivation, which has two parameters. - - :param name: activation name in paddle config. - :type name: basestring - :param support_hppl: True if supported by hppl. HPPL is a library used by paddle - internally. Currently, lstm layer can only use activations - supported by hppl. - :type support_hppl: bool - """ - - def __init__(self, name, support_hppl): - self.name = name - self.support_hppl = support_hppl - - def __repr__(self): - return self.name - - -class TanhActivation(BaseActivation): - """ - Tanh activation. - - .. math:: - - f(z)=tanh(z)=\\frac{e^z-e^{-z}}{e^z+e^{-z}} - """ - - def __init__(self): - BaseActivation.__init__(self, 'tanh', True) - - -class SigmoidActivation(BaseActivation): - """ - Sigmoid activation. - - .. math:: - - f(z) = \\frac{1}{1+exp(-z)} - """ - - def __init__(self): - BaseActivation.__init__(self, 'sigmoid', True) - - -class SoftmaxActivation(BaseActivation): - """ - Softmax activation for simple input - - - - .. math:: - - P(y=j|x) = \\frac{e^{x_j}} {\\sum^K_{k=1} e^{x_k} } - """ - - def __init__(self): - BaseActivation.__init__(self, 'softmax', False) - - -class SequenceSoftmaxActivation(BaseActivation): - """ - Softmax activation for one sequence. The dimension of input feature must be - 1 and a sequence. - - .. code:: python - - result = softmax(for each_feature_vector[0] in input_feature) - for i, each_time_step_output in enumerate(output): - each_time_step_output = result[i] - """ - - def __init__(self): - BaseActivation.__init__(self, 'sequence_softmax', False) - - -class IdentityActivation(BaseActivation): - """ - Identity Activation. - - Just do nothing for output both forward/backward. - """ - - def __init__(self): - BaseActivation.__init__(self, '', False) - - -LinearActivation = IdentityActivation - - -class ReluActivation(BaseActivation): - """ - Relu activation. - - forward. :math:`y = max(0, z)` - - derivative: - - .. math:: - - 1 &\\quad if z > 0 \\\\ - 0 &\\quad\\mathrm{otherwize} - """ - - def __init__(self): - BaseActivation.__init__(self, 'relu', True) - - -class BReluActivation(BaseActivation): - """ - BRelu Activation. - - forward. :math:`y = min(24, max(0, z))` - - derivative: - - .. math:: - - 1 &\\quad if 0 < z < 24 \\\\ - 0 &\\quad \\mathrm{otherwise} - """ - - def __init__(self): - BaseActivation.__init__(self, 'brelu', False) - - -class SoftReluActivation(BaseActivation): - """ - SoftRelu Activation. - """ - - def __init__(self): - BaseActivation.__init__(self, 'softrelu', False) - - -class STanhActivation(BaseActivation): - """ - Scaled Tanh Activation. - - .. math:: - - f(z) = 1.7159 * tanh(2/3*z) - """ - - def __init__(self): - BaseActivation.__init__(self, 'stanh', False) - - -class AbsActivation(BaseActivation): - """ - Abs Activation. - - Forward: :math:`f(z) = abs(z)` - - Derivative: - - .. math:: - - 1 &\\quad if \\quad z > 0 \\\\ - -1 &\\quad if \\quad z < 0 \\\\ - 0 &\\quad if \\quad z = 0 - """ - - def __init__(self): - BaseActivation.__init__(self, 'abs', False) - - -class SquareActivation(BaseActivation): - """ - Square Activation. - - .. math:: - f(z) = z^2. - """ - - def __init__(self): - BaseActivation.__init__(self, 'square', False) - - -class ExpActivation(BaseActivation): - """ - Exponential Activation. - - .. math:: - f(z) = e^z. - """ - - def __init__(self): - BaseActivation.__init__(self, 'exponential', False) - - -class LogActivation(BaseActivation): - """ - Logarithm Activation. - - .. math:: - f(z) = log(z) - """ - - def __init__(self): - BaseActivation.__init__(self, 'log', False) - - -class SqrtActivation(BaseActivation): - """ - Square Root Activation. - - .. math:: - f(z) = sqrt(z) - """ - - def __init__(self): - BaseActivation.__init__(self, 'sqrt', False) - - -class ReciprocalActivation(BaseActivation): - """ - Reciprocal Activation. - - .. math:: - f(z)=\\frac{1}{z} - """ - - def __init__(self): - BaseActivation.__init__(self, 'reciprocal', False) - - -class SoftSignActivation(BaseActivation): - """ - SoftSign Activation. - - .. math:: - f(z)=\\frac{z}{1 + |z|} - """ - - def __init__(self): - BaseActivation.__init__(self, 'softsign', False) diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py deleted file mode 100644 index 4e3beaf639bad9fed2862a5477095b66ef4b9aee..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/attrs.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.config_parser import * -__all__ = [ - 'HookAttr', 'ParamAttr', 'ExtraAttr', 'ParameterAttribute', - 'ExtraLayerAttribute' -] - - -def convert_and_compare(x, Type): - """ - Convert x to be the same type as Type and then convert back to - check whether there is a loss of information - :param x: object to be checked - :param Type: target type to check x over - - """ - return type(x)(Type(x)) == x - - -def is_compatible_with(x, Type): - """ - Check if x has a type compatible with Type - :param x: object to be checked - :param Type: target type to check x over - - """ - if type(x) == Type: - return True - try: - if float == Type or int == Type: - # avoid those types that can be converted to float/int but not very - # meaningful and could potentially lead to error - # i.e., str and bool typed value should not be used for initializing float/int variable - if not isinstance(x, str) and not isinstance(x, bool): - return convert_and_compare(x, Type) - elif bool == Type: - # should not use string type to initialize bool variable - if not isinstance(x, str): - return convert_and_compare(x, Type) - else: - return False - except: - return False - - -class HookAttribute(object): - """ - Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs - during training process of a layer with parameters, such as img_conv layer, fc layer. - - :param type: Hook type, currently supported types: - 'pruning' : user specify a sparsity_ratio before training started, and the - network will prune the parameters based on the sparsity_ratio. - eg: The definition of Hook object can be hk = HookAttribute('pruning', 0.6) - The specific usage can be paddle.layer.img_conv(input=img, filter_size=3, - num_channels=3, num_filters=64, - param_attr=ParameterAttribute(update_hooks=hk) ) - The pruning details can be found https://arxiv.org/pdf/1506.02626.pdf - :type type: string - - :param sparsity_ratio: Must be specified if hook type is 'pruning', - it represents the ratio of the zero elements to be set by the Parameter. - :type sparsity_ratio: float or None - - """ - - def __init__(self, type, sparsity_ratio=None): - self.type = type - self.sparsity_ratio = sparsity_ratio - if self.sparsity_ratio is not None: - assert is_compatible_with( - self.sparsity_ratio, - float), 'sparisity_ratio must be float type' - assert self.sparsity_ratio <= 1 and self.sparsity_ratio >= 0, 'sparsity_ratio must be a float between [0, 1] ' - - def __call__(self): - return ParameterHook(self.type, sparsity_ratio=self.sparsity_ratio) - - -class ParameterAttribute(object): - """ - Parameter Attributes object. To fine-tuning network training process, user - can set attribute to control training details, such as l1,l2 rate / learning - rate / how to init param. - - NOTE: IT IS A HIGH LEVEL USER INTERFACE. - - :param is_static: True if this parameter will be fixed while training. - :type is_static: bool - - :param initial_std: Gauss Random initialization standard deviation. - None if not using Gauss Random initialize parameter. - :type initial_std: float or None - :param initial_mean: Gauss Random initialization mean. - None if not using Gauss Random initialize parameter. - :type initial_mean: float or None - :param initial_max: Uniform initialization max value. - :type initial_max: float or None - :param initial_min: Uniform initialization min value. - :type initial_min: float or None - :param l1_rate: the l1 regularization factor - :type l1_rate: float or None - :param l2_rate: the l2 regularization factor - :type l2_rate: float or None - :param learning_rate: The parameter learning rate. None means 1. - The learning rate when optimize is LEARNING_RATE = - GLOBAL_LEARNING_RATE * PARAMETER_LEARNING_RATE - * SCHEDULER_FACTOR. - - :type learning_rate: float or None - :param momentum: The parameter momentum. None means use global value. - :type momentum: float or None - :param gradient_clipping_threshold: gradient clipping threshold. If gradient - value larger than some value, will be - clipped. - :type gradient_clipping_threshold: float - :param sparse_update: Enable sparse update for this parameter. It will - enable both local and remote sparse update. - :type sparse_update: bool - :param update_hooks: A HookAttribute object. - :type update_hooks: HookAttribute - :param initializer: If not None, it should be a callable object which accepts - a parameter name and returns numpy array for the initial - value of the parameter - :type initializer: callable object - """ - - def __init__(self, - name=None, - is_static=False, - initial_std=None, - initial_mean=None, - initial_max=None, - initial_min=None, - l1_rate=None, - l2_rate=None, - learning_rate=None, - momentum=None, - gradient_clipping_threshold=None, - sparse_update=False, - update_hooks=None, - initializer=None): - self.attr = {} - - if is_static: - self.attr['is_static'] = True - - if initial_std is None and initial_mean is None and initial_max \ - is None and initial_min is None: - self.attr['initial_smart'] = True - elif is_compatible_with(initial_std, float) or \ - is_compatible_with(initial_mean, float): - if initial_std is not None: - self.attr['initial_std'] = initial_std - if initial_mean is not None: - self.attr['initial_mean'] = initial_mean - self.attr['initial_strategy'] = 0 # Gauss Random - elif is_compatible_with(initial_max, float) and \ - is_compatible_with(initial_min, float): - initial_max = initial_max - initial_min = initial_min - assert initial_min < initial_max - initial_mean = (initial_max + initial_min) / 2 - initial_std = initial_mean - initial_min - self.attr['initial_mean'] = initial_mean - self.attr['initial_std'] = initial_std - self.attr['initial_strategy'] = 1 # Uniform Random - else: - raise RuntimeError("Unexpected branch.") - - if not is_static and is_compatible_with(l1_rate, float): - self.attr['decay_rate_l1'] = l1_rate - - if not is_static and is_compatible_with(l2_rate, float): - self.attr['decay_rate'] = l2_rate - - if not is_static and is_compatible_with(learning_rate, float): - self.attr['learning_rate'] = learning_rate - - if not is_static and is_compatible_with(momentum, float): - self.attr['momentum'] = momentum - - if name is not None: - self.attr['parameter_name'] = name - - if sparse_update: - self.attr['sparse_update'] = True - self.attr['sparse_remote_update'] = True - - if gradient_clipping_threshold is not None and \ - is_compatible_with(gradient_clipping_threshold, float): - self.attr['gradient_clipping_threshold'] = \ - gradient_clipping_threshold - if initializer is not None: - self.attr['initializer'] = initializer - - if update_hooks: - self.attr['update_hooks'] = update_hooks - - def set_default_parameter_name(self, name): - """ - Set default parameter name. If parameter not set, then will use default - parameter name. - - - :param name: default parameter name. - :type name: basestring - """ - if 'parameter_name' not in self.attr: - self.attr['parameter_name'] = name - - @staticmethod - def to_bias(bias_attr): - if isinstance(bias_attr, ParameterAttribute): - return Bias(**bias_attr.attr) - else: - return False - - -class ExtraLayerAttribute(object): - """ - Some high level layer attributes config. You can set all attributes here, - but some layer doesn't support all attributes. If you set an attribute to a - layer that not support this attribute, paddle will print an error and core. - - :param error_clipping_threshold: Error clipping threshold. - :type error_clipping_threshold: float - :param drop_rate: Dropout rate. Dropout will create a mask on layer output. - The dropout rate is the zero rate of this mask. The - details of what dropout is please refer to `JMLRdropout - `_. - :type drop_rate: float - :param device: device ID of layer. device=-1, use CPU. device>=0, use GPU. - The details allocation in parallel_nn please refer to `use_case - `_. - :type device: int - """ - - def __init__(self, - error_clipping_threshold=None, - drop_rate=None, - device=None): - self.attr = dict() - if error_clipping_threshold is not None: - error_clipping_threshold = float(error_clipping_threshold) - if error_clipping_threshold < 0: - raise ValueError("Error clipping must > 0") - self.attr['error_clipping_threshold'] = error_clipping_threshold - if drop_rate is not None: - drop_rate = float(drop_rate) - if drop_rate < 0: - raise ValueError("Dropout rate must > 0") - self.attr["drop_rate"] = drop_rate - - if isinstance(device, int): - self.attr["device"] = device - - def check(self, layer_name): - for key in self.attr: - if not hasattr(self, 'can_%s' % key) or \ - not getattr(self, 'can_%s' % key): - raise NotImplementedError("Layer %s does not support %s" % - (layer_name, key)) - - @staticmethod - def to_kwargs(attr): - if attr is None: - return dict() - else: - return attr.attr - - -HookAttr = HookAttribute -ParamAttr = ParameterAttribute -ExtraAttr = ExtraLayerAttribute diff --git a/python/paddle/trainer_config_helpers/config_parser_utils.py b/python/paddle/trainer_config_helpers/config_parser_utils.py deleted file mode 100644 index ee5bbbfb2de7640ebef04edce34332ce4f44c67e..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/config_parser_utils.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -import paddle.trainer.config_parser as config_parser -from paddle.proto.TrainerConfig_pb2 import OptimizationConfig -''' -This file is a wrapper of formal config_parser. The main idea of this file is to -separete different config logic into different function, such as network configuration - and optimizer configuration. -''' - -__all__ = [ - "parse_trainer_config", "parse_network_config", "parse_optimizer_config", - "reset_parser" -] - - -def parse_trainer_config(trainer_conf, config_arg_str): - return config_parser.parse_config(trainer_conf, config_arg_str) - - -def parse_network_config(network_conf, config_arg_str=''): - config = config_parser.parse_config(network_conf, config_arg_str) - return config.model_config - - -def parse_optimizer_config(optimizer_conf, config_arg_str=''): - config_parser.settings = copy.deepcopy(config_parser.DEFAULT_SETTING) - optimizer_conf() - opt_config = OptimizationConfig() - for k, v in config_parser.settings.iteritems(): - if v is None: - continue - opt_config.__setattr__(k, v) - return opt_config - - -def reset_parser(): - config_parser.begin_parse() diff --git a/python/paddle/trainer_config_helpers/data_sources.py b/python/paddle/trainer_config_helpers/data_sources.py deleted file mode 100644 index a2a32d848cbc4200397e6a12a3662419102da0a9..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/data_sources.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Data Sources are helpers to define paddle training data or testing data. -""" -from paddle.trainer.config_parser import * -from .utils import deprecated - -try: - import cPickle as pickle -except ImportError: - import six.moves.cPickle as pickle - -__all__ = ['define_py_data_sources2'] - - -def define_py_data_source(file_list, - cls, - module, - obj, - args=None, - async=False, - data_cls=PyData): - """ - Define a python data source. - - For example, the simplest usage in trainer_config.py as follow: - - .. code-block:: python - - define_py_data_source("train.list", TrainData, "data_provider", "process") - - Or. if you want to pass arguments from trainer_config to data_provider.py, then - - .. code-block:: python - - define_py_data_source("train.list", TrainData, "data_provider", "process", - args={"dictionary": dict_name}) - - :param data_cls: - :param file_list: file list name, which contains all data file paths - :type file_list: basestring - :param cls: Train or Test Class. - :type cls: TrainData or TestData - :param module: python module name. - :type module: basestring - :param obj: python object name. May be a function name if using - PyDataProviderWrapper. - :type obj: basestring - :param args: The best practice is using dict to pass arguments into - DataProvider, and use :code:`@init_hook_wrapper` to - receive arguments. - :type args: string or picklable object - :param async: Load Data asynchronously or not. - :type async: bool - :return: None - :rtype: None - """ - if isinstance(file_list, list): - file_list_name = 'train.list' - if cls == TestData: - file_list_name = 'test.list' - with open(file_list_name, 'w') as f: - f.writelines(file_list) - file_list = file_list_name - - if not isinstance(args, basestring) and args is not None: - args = pickle.dumps(args, 0) - - cls( - data_cls( - files=file_list, - load_data_module=module, - load_data_object=obj, - load_data_args=args, - async_load_data=async)) - - -def define_py_data_sources(train_list, - test_list, - module, - obj, - args=None, - train_async=False, - data_cls=PyData): - """ - The annotation is almost the same as define_py_data_sources2, except that - it can specific train_async and data_cls. - - :param data_cls: - :param train_list: Train list name. - :type train_list: basestring - :param test_list: Test list name. - :type test_list: basestring - :param module: python module name. If train and test is different, then - pass a tuple or list to this argument. - :type module: basestring or tuple or list - :param obj: python object name. May be a function name if using - PyDataProviderWrapper. If train and test is different, then pass - a tuple or list to this argument. - :type obj: basestring or tuple or list - :param args: The best practice is using dict() to pass arguments into - DataProvider, and use :code:`@init_hook_wrapper` to receive - arguments. If train and test is different, then pass a tuple - or list to this argument. - :type args: string or picklable object or list or tuple. - :param train_async: Is training data load asynchronously or not. - :type train_async: bool - :return: None - :rtype: None - """ - - def __is_splitable__(o): - return (isinstance(o, list) or - isinstance(o, tuple)) and hasattr(o, '__len__') and len(o) == 2 - - assert train_list is not None or test_list is not None - assert module is not None and obj is not None - - test_module = module - train_module = module - if __is_splitable__(module): - train_module, test_module = module - - test_obj = obj - train_obj = obj - if __is_splitable__(obj): - train_obj, test_obj = obj - - if args is None: - args = "" - - train_args = args - test_args = args - if __is_splitable__(args): - train_args, test_args = args - - if train_list is not None: - define_py_data_source(train_list, TrainData, train_module, train_obj, - train_args, train_async, data_cls) - - if test_list is not None: - define_py_data_source(test_list, TestData, test_module, test_obj, - test_args, False, data_cls) - - -def define_py_data_sources2(train_list, test_list, module, obj, args=None): - """ - Define python Train/Test data sources in one method. If train/test use - the same Data Provider configuration, module/obj/args contain one argument, - otherwise contain a list or tuple of arguments. For example\: - - .. code-block:: python - - define_py_data_sources2(train_list="train.list", - test_list="test.list", - module="data_provider" - # if train/test use different configurations, - # obj=["process_train", "process_test"] - obj="process", - args={"dictionary": dict_name}) - - The related data provider can refer to :ref:`api_pydataprovider2_sequential_model` . - - :param train_list: Train list name. - :type train_list: basestring - :param test_list: Test list name. - :type test_list: basestring - :param module: python module name. If train and test is different, then - pass a tuple or list to this argument. - :type module: basestring or tuple or list - :param obj: python object name. May be a function name if using - PyDataProviderWrapper. If train and test is different, then pass - a tuple or list to this argument. - :type obj: basestring or tuple or list - :param args: The best practice is using dict() to pass arguments into - DataProvider, and use :code:`@init_hook_wrapper` to receive - arguments. If train and test is different, then pass a tuple - or list to this argument. - :type args: string or picklable object or list or tuple. - :return: None - :rtype: None - """ - - def py_data2(files, load_data_module, load_data_object, load_data_args, - **kwargs): - data = create_data_config_proto() - data.type = 'py2' - data.files = files - data.load_data_module = load_data_module - data.load_data_object = load_data_object - data.load_data_args = load_data_args - data.async_load_data = False - return data - - define_py_data_sources( - train_list=train_list, - test_list=test_list, - module=module, - obj=obj, - args=args, - data_cls=py_data2) diff --git a/python/paddle/trainer_config_helpers/default_decorators.py b/python/paddle/trainer_config_helpers/default_decorators.py deleted file mode 100644 index 69d860d9dab9c1d90e4d6a6940d66fcb551f6eb6..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/default_decorators.py +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import functools -import inspect -from .attrs import ParamAttr -from .activations import TanhActivation -from paddle.trainer.config_parser import * - -__all__ = [ - 'wrap_name_default', 'wrap_param_attr_default', 'wrap_bias_attr_default', - 'wrap_act_default', 'wrap_param_default' -] - - -def __default_not_set_callback__(kwargs, name): - return name not in kwargs or kwargs[name] is None - - -def wrap_param_default(param_names=None, - default_factory=None, - not_set_callback=__default_not_set_callback__): - assert param_names is not None - assert isinstance(param_names, list) or isinstance(param_names, tuple) - for each_param_name in param_names: - assert isinstance(each_param_name, basestring) - - def __impl__(func): - @functools.wraps(func) - def __wrapper__(*args, **kwargs): - if len(args) != 0: - argspec = inspect.getargspec(func) - num_positional = len(argspec.args) - if argspec.defaults: - num_positional -= len(argspec.defaults) - if not argspec.varargs and len(args) > num_positional: - logger.fatal( - "Must use keyword arguments for non-positional args") - for name in param_names: - if not_set_callback(kwargs, name): # Not set - kwargs[name] = default_factory(func) - return func(*args, **kwargs) - - if hasattr(func, 'argspec'): - __wrapper__.argspec = func.argspec - else: - __wrapper__.argspec = inspect.getargspec(func) - return __wrapper__ - - return __impl__ - - -class DefaultNameFactory(object): - def __init__(self, name_prefix): - self.__counter__ = 0 - self.__name_prefix__ = name_prefix - - def __call__(self, func): - if self.__name_prefix__ is None: - self.__name_prefix__ = func.__name__ - tmp = "__%s_%d__" % (self.__name_prefix__, self.__counter__) - self.__check_name__(tmp) - self.__counter__ += 1 - return tmp - - def __check_name__(self, nm): - """ - @TODO(yuyang18): Implement it! - @param nm: - @return: - """ - pass - - def reset(self): - self.__counter__ = 0 - - -_name_factories = [] - - -def reset_hook(): - for factory in _name_factories: - factory.reset() - - -register_parse_config_hook(reset_hook) - - -def wrap_name_default(name_prefix=None, name_param="name"): - """ - Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}". - - .. code:: python - - @wrap_name_default("some_name") - def func(name=None): - print name # name will never be None. If name is not set, - # name will be "some_name_%d" - - :param name_prefix: name prefix. wrapped function's __name__ if None. - :type name_prefix: basestring - :return: a decorator to set default name - :rtype: callable - """ - factory = DefaultNameFactory(name_prefix) - _name_factories.append(factory) - return wrap_param_default([name_param], factory) - - -def wrap_param_attr_default(param_names=None, default_factory=None): - """ - Setting Default Parameter Attributes Decorator. - - :param default_factory: - :param param_names: Parameter Attribute's Names, list of string - :type param_names: list - :return: decorator - """ - if param_names is None: - param_names = ['param_attr'] - if default_factory is None: - default_factory = lambda _: ParamAttr() - - return wrap_param_default(param_names, default_factory) - - -def wrap_bias_attr_default(param_names=None, - default_factory=None, - has_bias=True): - if param_names is None: - param_names = ['bias_attr'] - if default_factory is None: - default_factory = lambda _: ParamAttr(initial_std=0., initial_mean=0.) - - def __bias_attr_not_set__(kwargs, name): - if has_bias: - return name not in kwargs or kwargs[name] is None or \ - kwargs[name] == True - else: - return name in kwargs and kwargs[name] == True - - return wrap_param_default(param_names, default_factory, - __bias_attr_not_set__) - - -def wrap_act_default(param_names=None, act=None): - if param_names is None: - param_names = ["act"] - - if act is None: - act = TanhActivation() - - return wrap_param_default(param_names, lambda _: act) diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py deleted file mode 100644 index 0eeaf7eabb179f19d2af8dafe821f7baa153fead..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/evaluators.py +++ /dev/null @@ -1,813 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.config_parser import * -from default_decorators import * - -__all__ = [ - "evaluator_base", - "classification_error_evaluator", - "auc_evaluator", - "pnpair_evaluator", - "precision_recall_evaluator", - "ctc_error_evaluator", - "chunk_evaluator", - "sum_evaluator", - "column_sum_evaluator", - "value_printer_evaluator", - "gradient_printer_evaluator", - "maxid_printer_evaluator", - "maxframe_printer_evaluator", - "seqtext_printer_evaluator", - "classification_error_printer_evaluator", - "detection_map_evaluator", -] - - -class EvaluatorAttribute(object): - FOR_CLASSIFICATION = 1 - FOR_REGRESSION = 1 << 1 - FOR_RANK = 1 << 2 - FOR_PRINT = 1 << 3 - FOR_UTILS = 1 << 4 - FOR_DETECTION = 1 << 5 - - KEYS = [ - "for_classification", "for_regression", "for_rank", "for_print", - "for_utils", "for_detection" - ] - - @staticmethod - def to_key(idx): - tmp = 1 - for i in xrange(0, len(EvaluatorAttribute.KEYS)): - if idx == tmp: - return EvaluatorAttribute.KEYS[i] - else: - tmp = (tmp << 1) - - -def evaluator(*attrs): - def impl(method): - for attr in attrs: - setattr(method, EvaluatorAttribute.to_key(attr), True) - method.is_evaluator = True - return method - - return impl - - -def evaluator_base(input, - type, - label=None, - weight=None, - name=None, - chunk_scheme=None, - num_chunk_types=None, - classification_threshold=None, - positive_label=None, - dict_file=None, - result_file=None, - num_results=None, - delimited=None, - top_k=None, - excluded_chunk_types=None, - overlap_threshold=None, - background_id=None, - evaluate_difficult=None, - ap_type=None): - """ - Evaluator will evaluate the network status while training/testing. - - User can use evaluator by classify/regression job. For example. - - .. code-block:: python - - classify(prediction, output, evaluator=classification_error_evaluator) - - And user could define evaluator separately as follow. - - .. code-block:: python - - classification_error_evaluator("ErrorRate", prediction, label) - - The evaluator often contains a name parameter. It will also be printed when - evaluating network. The printed information may look like the following. - - .. code-block:: text - - Batch=200 samples=20000 AvgCost=0.679655 CurrentCost=0.662179 Eval: - classification_error_evaluator=0.4486 - CurrentEval: ErrorRate=0.3964 - - :param input: Input layers, a object of LayerOutput or a list of - LayerOutput. - :type input: list|LayerOutput - :param label: An input layer containing the ground truth label. - :type label: LayerOutput|None - :param weight: An input layer which is a weight for each sample. - Each evaluator may calculate differently to use this weight. - :type weight: LayerOutput. - :param top_k: number k in top-k error rate - :type top_k: int - :param overlap_threshold: In detection tasks to filter detection results - :type overlap_threshold: float - :param background_id: Identifier of background class - :type background_id: int - :param evaluate_difficult: Whether to evaluate difficult objects - :type evaluate_difficult: bool - :param ap_type: How to calculate average persicion - :type ap_type: str - """ - # inputs type assertions. - assert classification_threshold is None or isinstance( - classification_threshold, float) - assert positive_label is None or isinstance(positive_label, int) - assert num_results is None or isinstance(num_results, int) - assert top_k is None or isinstance(top_k, int) - - if not isinstance(input, list): - input = [input] - - if label: - input.append(label) - if weight: - input.append(weight) - - Evaluator( - name=name, - type=type, - inputs=[i.name for i in input], - chunk_scheme=chunk_scheme, - num_chunk_types=num_chunk_types, - classification_threshold=classification_threshold, - positive_label=positive_label, - dict_file=dict_file, - result_file=result_file, - delimited=delimited, - num_results=num_results, - top_k=top_k, - excluded_chunk_types=excluded_chunk_types, - overlap_threshold=overlap_threshold, - background_id=background_id, - evaluate_difficult=evaluate_difficult, - ap_type=ap_type) - - -@evaluator(EvaluatorAttribute.FOR_DETECTION) -@wrap_name_default() -def detection_map_evaluator(input, - label, - overlap_threshold=0.5, - background_id=0, - evaluate_difficult=False, - ap_type="11point", - name=None): - """ - Detection mAP Evaluator. It will print mean Average Precision (mAP) for detection. - - The detection mAP Evaluator based on the output of detection_output layer counts - the true positive and the false positive bbox and integral them to get the - mAP. - - The simple usage is: - - .. code-block:: python - - eval = detection_map_evaluator(input=det_output,label=lbl) - - :param input: Input layer. - :type input: LayerOutput - :param label: Label layer. - :type label: LayerOutput - :param overlap_threshold: The bbox overlap threshold of a true positive. - :type overlap_threshold: float - :param background_id: The background class index. - :type background_id: int - :param evaluate_difficult: Whether evaluate a difficult ground truth. - :type evaluate_difficult: bool - """ - if not isinstance(input, list): - input = [input] - - if label: - input.append(label) - - evaluator_base( - name=name, - type="detection_map", - input=input, - label=label, - overlap_threshold=overlap_threshold, - background_id=background_id, - evaluate_difficult=evaluate_difficult, - ap_type=ap_type) - - -@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) -@wrap_name_default() -def classification_error_evaluator(input, - label, - name=None, - weight=None, - top_k=None, - threshold=None): - """ - Classification Error Evaluator. It will print error rate for classification. - - The classification error is: - - .. math:: - - classification\\_error = \\frac{NumOfWrongPredicts}{NumOfAllSamples} - - The simple usage is: - - .. code-block:: python - - eval = classification_error_evaluator(input=prob,label=lbl) - - :param name: Evaluator name. - :type name: basestring - :param input: Input Layer name. The output prediction of network. - :type input: LayerOutput - :param label: Label layer name. - :type label: basestring - :param weight: Weight Layer name. It should be a matrix with size - [sample_num, 1]. And will just multiply to NumOfWrongPredicts - and NumOfAllSamples. So, the elements of weight are all one, - then means not set weight. The larger weight it is, the more - important this sample is. - :type weight: LayerOutput - :param top_k: number k in top-k error rate - :type top_k: int - :param threshold: The classification threshold. - :type threshold: float - :return: None. - """ - - evaluator_base( - name=name, - type="classification_error", - input=input, - label=label, - weight=weight, - top_k=top_k, - classification_threshold=threshold, ) - - -@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) -@wrap_name_default() -def auc_evaluator( - input, - label, - name=None, - weight=None, ): - """ - Auc Evaluator which adapts to binary classification. - - The simple usage: - - .. code-block:: python - - eval = auc_evaluator(input, label) - - :param name: Evaluator name. - :type name: None|basestring - :param input: Input Layer name. The output prediction of network. - :type input: LayerOutput - :param label: Label layer name. - :type label: None|basestring - :param weight: Weight Layer name. It should be a matrix with size - [sample_num, 1]. - :type weight: LayerOutput - """ - evaluator_base( - name=name, - type="last-column-auc", - input=input, - label=label, - weight=weight) - - -@evaluator(EvaluatorAttribute.FOR_RANK) -@wrap_name_default() -def pnpair_evaluator( - input, - label, - query_id, - weight=None, - name=None, ): - """ - Positive-negative pair rate Evaluator which adapts to rank task like - learning to rank. This evaluator must contain at least three layers. - - The simple usage: - - .. code-block:: python - - eval = pnpair_evaluator(input, label, query_id) - - :param input: Input Layer name. The output prediction of network. - :type input: LayerOutput - :param label: Label layer name. - :type label: LayerOutput - :param query_id: Query_id layer name. Query_id indicates that which query - each sample belongs to. Its shape should be - the same as output of Label layer. - :type query_id: LayerOutput - :param weight: Weight Layer name. It should be a matrix with size - [sample_num, 1] which indicates the weight of each sample. - The default weight of sample is 1 if the weight layer is None. - And the pair weight is the mean of the two samples' weight. - :type weight: LayerOutput - :param name: Evaluator name. - :type name: None|basestring - """ - if not isinstance(input, list): - input = [input] - if label: - input.append(label) - if query_id: - input.append(query_id) - evaluator_base( - input=input, - type="pnpair", - weight=weight, - name=name, ) - - -@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) -@wrap_name_default() -def precision_recall_evaluator( - input, - label, - positive_label=None, - weight=None, - name=None, ): - """ - An Evaluator to calculate precision and recall, F1-score. - It is adapt to the task with multiple labels. - - - If positive_label=-1, it will print the average precision, recall, - F1-score of all labels. - - - If use specify positive_label, it will print the precision, recall, - F1-score of this label. - - The simple usage: - - .. code-block:: python - - eval = precision_recall_evaluator(input, label) - - :param name: Evaluator name. - :type name: None|basestring - :param input: Input Layer name. The output prediction of network. - :type input: LayerOutput - :param label: Label layer name. - :type label: LayerOutput - :param positive_label: The input label layer. - :type positive_label: LayerOutput. - :param weight: Weight Layer name. It should be a matrix with size - [sample_num, 1]. (TODO, explaination) - :type weight: LayerOutput - """ - evaluator_base( - name=name, - type="precision_recall", - input=input, - label=label, - positive_label=positive_label, - weight=weight) - - -@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) -@wrap_name_default() -def ctc_error_evaluator( - input, - label, - name=None, ): - """ - This evaluator is to calculate sequence-to-sequence edit distance. - - The simple usage is : - - .. code-block:: python - - eval = ctc_error_evaluator(input=input, label=lbl) - - :param name: Evaluator name. - :type name: None|basestring - :param input: Input Layer. Should be the same as the input for ctc_layer. - :type input: LayerOutput - :param label: input label, which is a data_layer. Should be the same as the - label for ctc_layer - :type label: LayerOutput - """ - evaluator_base( - name=name, type="ctc_edit_distance", input=input, label=label) - - -@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) -@wrap_name_default() -def chunk_evaluator( - input, - label, - chunk_scheme, - num_chunk_types, - name=None, - excluded_chunk_types=None, ): - """ - Chunk evaluator is used to evaluate segment labelling accuracy for a - sequence. It calculates precision, recall and F1 scores for the chunk detection. - - To use chunk evaluator, several concepts need to be clarified firstly. - - * **Chunk type** is the type of the whole chunk and a chunk consists of one or several words. (For example in NER, ORG for organization name, PER for person name etc.) - - * **Tag type** indicates the position of a word in a chunk. (B for begin, I for inside, E for end, S for single) - We can name a label by combining tag type and chunk type. (ie. B-ORG for begining of an organization name) - - The construction of label dictionary should obey the following rules: - - - Use one of the listed labelling schemes. These schemes differ in ways indicating chunk boundry. - - .. code-block:: text - - Scheme Description - plain Use the same label for the whole chunk. - IOB Two labels for chunk type X, B-X for chunk begining and I-X for chunk inside. - IOE Two labels for chunk type X, E-X for chunk ending and I-X for chunk inside. - IOBES Four labels for chunk type X, B-X for chunk begining, I-X for chunk inside, E-X for chunk end and S-X for single word chunk. - - To make it clear, let's illustrate by an NER example. - Assuming that there are three named entity types including ORG, PER and LOC which are called 'chunk type' here, - if 'IOB' scheme were used, the label set will be extended to a set including B-ORG, I-ORG, B-PER, I-PER, B-LOC, I-LOC and O, - in which B-ORG for begining of ORG and I-ORG for inside of ORG. - Prefixes which are called 'tag type' here are added to chunk types and there are two tag types including B and I. - Of course, the training data should be labeled accordingly. - - - Mapping is done correctly by the listed equations and assigning protocol. - - The following table are equations to extract tag type and chunk type from a label. - - .. code-block:: text - - tagType = label % numTagType - chunkType = label / numTagType - otherChunkType = numChunkTypes - - The following table shows the mapping rule between tagType and tag type in each scheme. - - .. code-block:: text - - Scheme Begin Inside End Single - plain 0 - - - - IOB 0 1 - - - IOE - 0 1 - - IOBES 0 1 2 3 - - Continue the NER example, and the label dict should look like this to satify above equations: - - .. code-block:: text - - B-ORG 0 - I-ORG 1 - B-PER 2 - I-PER 3 - B-LOC 4 - I-LOC 5 - O 6 - - In this example, chunkType has three values: 0 for ORG, 1 for PER, 2 for LOC, because the scheme is - "IOB" so tagType has two values: 0 for B and 1 for I. - Here we will use I-LOC to explain the above mapping rules in detail. - For I-LOC, the label id is 5, so we can get tagType=1 and chunkType=2, which means I-LOC is a part of NER chunk LOC - and the tag is I. - - The simple usage is: - - .. code-block:: python - - eval = chunk_evaluator(input, label, chunk_scheme, num_chunk_types) - - - :param input: The input layers. - :type input: LayerOutput - :param label: An input layer containing the ground truth label. - :type label: LayerOutput - :param chunk_scheme: The labelling schemes support 4 types. It is one of - "IOB", "IOE", "IOBES", "plain". It is required. - :type chunk_scheme: basestring - :param num_chunk_types: number of chunk types other than "other" - :param name: The Evaluator name, it is optional. - :type name: basename|None - :param excluded_chunk_types: chunks of these types are not considered - :type excluded_chunk_types: list of integer|None - """ - evaluator_base( - name=name, - type="chunk", - input=input, - label=label, - chunk_scheme=chunk_scheme, - num_chunk_types=num_chunk_types, - excluded_chunk_types=excluded_chunk_types, ) - - -@evaluator(EvaluatorAttribute.FOR_UTILS) -@wrap_name_default() -def sum_evaluator( - input, - name=None, - weight=None, ): - """ - An Evaluator to sum the result of input. - - The simple usage: - - .. code-block:: python - - eval = sum_evaluator(input) - - :param name: Evaluator name. - :type name: None|basestring - :param input: Input Layer name. - :type input: LayerOutput - :param weight: Weight Layer name. It should be a matrix with size - [sample_num, 1]. (TODO, explaination) - :type weight: LayerOutput - """ - evaluator_base(name=name, type="sum", input=input, weight=weight) - - -@evaluator(EvaluatorAttribute.FOR_UTILS) -@wrap_name_default() -def column_sum_evaluator( - input, - name=None, - weight=None, ): - """ - This Evaluator is used to sum the last column of input. - - The simple usage is: - - .. code-block:: python - - eval = column_sum_evaluator(input, label) - - :param name: Evaluator name. - :type name: None|basestring - :param input: Input Layer name. - :type input: LayerOutput - """ - evaluator_base( - name=name, type="last-column-sum", input=input, weight=weight) - - -""" -The following are printer Evaluators which are usually used to -print the result, like value or gradient of input layers, the -results generated in machine translation, the classification error etc. -""" - - -@evaluator(EvaluatorAttribute.FOR_PRINT) -@wrap_name_default() -def value_printer_evaluator( - input, - name=None, ): - """ - This Evaluator is used to print the values of input layers. It contains - one or more input layers. - - The simple usage is: - - .. code-block:: python - - eval = value_printer_evaluator(input) - - :param input: One or more input layers. - :type input: LayerOutput|list - :param name: Evaluator name. - :type name: None|basestring - """ - evaluator_base(name=name, type="value_printer", input=input) - - -@evaluator(EvaluatorAttribute.FOR_PRINT) -@wrap_name_default() -def gradient_printer_evaluator( - input, - name=None, ): - """ - This Evaluator is used to print the gradient of input layers. It contains - one or more input layers. - - The simple usage is: - - .. code-block:: python - - eval = gradient_printer_evaluator(input) - - :param input: One or more input layers. - :type input: LayerOutput|list - :param name: Evaluator name. - :type name: None|basestring - """ - evaluator_base(name=name, type="gradient_printer", input=input) - - -@evaluator(EvaluatorAttribute.FOR_PRINT) -@wrap_name_default() -def maxid_printer_evaluator( - input, - num_results=None, - name=None, ): - """ - This Evaluator is used to print maximum top k values and their indexes - of each row of input layers. It contains one or more input layers. - k is specified by num_results. - - The simple usage is: - - .. code-block:: python - - eval = maxid_printer_evaluator(input) - - :param input: Input Layer name. - :type input: LayerOutput|list - :param num_results: This number is used to specify the top k numbers. - It is 1 by default. - :type num_results: int. - :param name: Evaluator name. - :type name: None|basestring - """ - evaluator_base( - name=name, type="max_id_printer", input=input, num_results=num_results) - - -@evaluator(EvaluatorAttribute.FOR_PRINT) -@wrap_name_default() -def maxframe_printer_evaluator( - input, - num_results=None, - name=None, ): - """ - This Evaluator is used to print the top k frames of each input layers. - The input layers should contain sequences info or sequences type. - k is specified by num_results. - It contains one or more input layers. - - Note: - The width of each frame is 1. - - The simple usage is: - - .. code-block:: python - - eval = maxframe_printer_evaluator(input) - - :param input: Input Layer name. - :type input: LayerOutput|list - :param name: Evaluator name. - :type name: None|basestring - """ - evaluator_base( - name=name, - type="max_frame_printer", - input=input, - num_results=num_results) - - -@evaluator(EvaluatorAttribute.FOR_PRINT) -@wrap_name_default() -def seqtext_printer_evaluator( - input, - result_file, - id_input=None, - dict_file=None, - delimited=None, - name=None, ): - """ - Sequence text printer will print text according to index matrix and a - dictionary. There can be multiple input to this layer: - - 1. If there is no id_input, the input must be a matrix containing - the sequence of indices; - - 2. If there is id_input, it should be ids, and interpreted as sample ids. - - The output format will be: - - 1. sequence without sub-sequence, and there is probability. - - .. code-block:: python - - id \t prob space_seperated_tokens_from_dictionary_according_to_seq - - 2. sequence without sub-sequence, and there is not probability. - - .. code-block:: python - - id \t space_seperated_tokens_from_dictionary_according_to_seq - - 3. sequence with sub-sequence, and there is not probability. - - .. code-block:: python - - id \t space_seperated_tokens_from_dictionary_according_to_sub_seq - \t \t space_seperated_tokens_from_dictionary_according_to_sub_seq - ... - - Typically SequenceTextPrinter layer takes output of maxid or RecurrentGroup - with maxid (when generating) as an input. - - The simple usage is: - - .. code-block:: python - - eval = seqtext_printer_evaluator(input=maxid_layer, - id_input=sample_id, - dict_file=dict_file, - result_file=result_file) - - :param input: Input Layer name. - :type input: LayerOutput|list - :param result_file: Path of the file to store the generated results. - :type result_file: basestring - :param id_input: Index of the input sequence, and the specified index will - be prited in the gereated results. This an optional - parameter. - :type id_input: LayerOutput - :param dict_file: Path of dictionary. This is an optional parameter. - Every line is a word in the dictionary with - (line number - 1) as the word index. - If this parameter is set to None, or to an empty string, - only word index are printed in the generated results. - :type dict_file: basestring - :param delimited: Whether to use space to separate output tokens. - Default is True. No space is added if set to False. - :type delimited: bool - :param name: Evaluator name. - :type name: None|basestring - :return: The seq_text_printer that prints the generated sequence to a file. - :rtype: evaluator - """ - assert isinstance(result_file, basestring) - if id_input is None: - inputs = [input] - else: - inputs = [id_input, input] - input.parents.append(id_input) - - evaluator_base( - name=name, - type="seq_text_printer", - input=inputs, - dict_file=dict_file, - result_file=result_file, - delimited=delimited) - - -@evaluator(EvaluatorAttribute.FOR_PRINT) -@wrap_name_default() -def classification_error_printer_evaluator( - input, - label, - threshold=0.5, - name=None, ): - """ - This Evaluator is used to print the classification error of each sample. - - The simple usage is: - - .. code-block:: python - - eval = classification_error_printer_evaluator(input) - - :param input: Input layer. - :type input: LayerOutput - :param label: Input label layer. - :type label: LayerOutput - :param name: Evaluator name. - :type name: None|basestring - """ - evaluator_base( - name=name, - type="classification_error_printer", - input=input, - label=label, - classification_threshold=threshold) diff --git a/python/paddle/trainer_config_helpers/layer_math.py b/python/paddle/trainer_config_helpers/layer_math.py deleted file mode 100644 index ee84188baccc8c246b7a791c77556b403698f2d9..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/layer_math.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .layers import LayerOutput, mixed_layer, identity_projection, \ - slope_intercept_layer, scaling_layer, repeat_layer -from .attrs import is_compatible_with -from .default_decorators import * -import activations as act -from paddle.trainer.config_parser import logger - -__all__ = [] - - -def register_unary_math_op(op_name, act): - def op(input, name=None): - return mixed_layer( - input=[identity_projection(input=input)], name=name, act=act) - - op = wrap_name_default(op_name)(op) - op.__doc__ = type(act).__doc__ - globals()[op_name] = op - __all__.append(op_name) - - -register_unary_math_op('exp', act.ExpActivation()) -register_unary_math_op('log', act.LogActivation()) -register_unary_math_op('abs', act.AbsActivation()) -register_unary_math_op('sigmoid', act.SigmoidActivation()) -register_unary_math_op('tanh', act.TanhActivation()) -register_unary_math_op('square', act.SquareActivation()) -register_unary_math_op('relu', act.ReluActivation()) -register_unary_math_op('sqrt', act.SqrtActivation()) -register_unary_math_op('reciprocal', act.ReciprocalActivation()) - - -def add(layeroutput, other): - if is_compatible_with(other, float): - return slope_intercept_layer(input=layeroutput, intercept=other) - if not isinstance(other, LayerOutput): - logger.fatal("LayerOutput can only be added with" - " another LayerOutput or a number") - if layeroutput.size == other.size: - return mixed_layer(input=[ - identity_projection(input=layeroutput), - identity_projection(input=other) - ]) - if other.size != 1 and layeroutput.size != 1: - logger.fatal("Two LayerOutput can be added only if they have equal size" - " or one of their sizes is 1. sizes are %s and %s" % - (layeroutput.size, other.size)) - elif layeroutput.size == 1: - tmp = layeroutput - layeroutput = other - other = tmp - other = repeat_layer(other, layeroutput.size) - return mixed_layer(input=[ - identity_projection(input=layeroutput), identity_projection(input=other) - ]) - - -LayerOutput.__radd__ = add -LayerOutput.__add__ = add - - -def sub(layeroutput, other): - if is_compatible_with(other, float): - return slope_intercept_layer(input=layeroutput, intercept=-other) - if not isinstance(other, LayerOutput): - logger.fatal("LayerOutput can only be subtracted with" - " another Layeroutput or a number") - neg = slope_intercept_layer(input=other, slope=-1.0) - return add(layeroutput, neg) - - -LayerOutput.__sub__ = sub - - -def rsub(layeroutput, other): - neg = slope_intercept_layer(input=layeroutput, slope=-1.0) - return add(neg, other) - - -LayerOutput.__rsub__ = rsub - - -def mul(layeroutput, other): - if is_compatible_with(other, float): - return slope_intercept_layer(input=layeroutput, slope=other) - if not isinstance(other, LayerOutput): - logger.fatal("LayerOutput can only be multiplied with" - " another Layeroutput or a number") - elif layeroutput.size == 1: - return scaling_layer(input=other, weight=layeroutput) - elif other.size == 1: - return scaling_layer(input=layeroutput, weight=other) - else: - logger.fatal("At least one of the operand of '*' must be a number" - " or a LayerOutput with size=1") - - -LayerOutput.__mul__ = mul -LayerOutput.__rmul__ = mul diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py deleted file mode 100644 index ee34c157334b533b9c330b8103424964d7df510b..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/layers.py +++ /dev/null @@ -1,7610 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import functools -import collections -import inspect - -import paddle.trainer.config_parser as cp -from paddle.trainer.config_parser import * -from .activations import LinearActivation, SigmoidActivation, TanhActivation, \ - ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation -from .evaluators import * -from .poolings import MaxPooling, AvgPooling, MaxWithMaskPooling, BasePoolingType, \ - CudnnAvgPooling, CudnnAvgInclPadPooling, CudnnMaxPooling -from .attrs import * -from .default_decorators import * - -try: - import cPickle as pickle -except ImportError: - import six.moves.cPickle as pickle -import copy - -__all__ = [ - 'full_matrix_projection', - 'AggregateLevel', - 'ExpandLevel', - 'identity_projection', - 'dotmul_projection', - 'dotmul_operator', - 'repeat_layer', - 'seq_reshape_layer', - 'table_projection', - 'mixed_layer', - 'data_layer', - 'embedding_layer', - 'fc_layer', - 'grumemory', - 'pooling_layer', - 'lstmemory', - 'last_seq', - 'first_seq', - 'cos_sim', - 'l2_distance_layer', - 'hsigmoid', - 'conv_projection', - 'square_error_cost', - 'regression_cost', - 'classification_cost', - 'LayerOutput', - 'img_conv_layer', - 'img_pool_layer', - 'batch_norm_layer', - 'img_cmrnorm_layer', - 'addto_layer', - 'concat_layer', - 'seq_concat_layer', - 'lstm_step_layer', - 'recurrent_group', - 'memory', - 'StaticInput', - 'expand_layer', - 'scaling_layer', - 'scaling_projection', - 'power_layer', - 'interpolation_layer', - 'bilinear_interp_layer', - 'trans_layer', - 'rotate_layer', - 'sum_to_one_norm_layer', - 'row_l2_norm_layer', - 'get_output_layer', - 'LayerType', - 'context_projection', - 'beam_search', - 'maxid_layer', - 'GeneratedInput', - 'SubsequenceInput', - 'gru_step_layer', - 'gru_step_naive_layer', - 'recurrent_layer', - 'BaseGeneratedInput', - 'conv_operator', - 'conv_shift_layer', - 'tensor_layer', - 'selective_fc_layer', - 'sampling_id_layer', - 'slope_intercept_layer', - 'trans_full_matrix_projection', - 'linear_comb_layer', - 'convex_comb_layer', - 'ctc_layer', - 'warp_ctc_layer', - 'crf_layer', - 'crf_decoding_layer', - 'nce_layer', - 'cross_entropy_with_selfnorm', - 'cross_entropy', - 'BeamInput', - 'cross_entropy_over_beam', - 'multi_binary_label_cross_entropy', - 'sum_cost', - 'rank_cost', - 'lambda_cost', - 'huber_regression_cost', - 'huber_classification_cost', - 'block_expand_layer', - 'maxout_layer', - 'dot_prod_layer', - 'out_prod_layer', - 'printer_layer', - 'print_layer', - 'priorbox_layer', - 'cross_channel_norm_layer', - 'multibox_loss_layer', - 'detection_output_layer', - 'roi_pool_layer', - 'spp_layer', - 'pad_layer', - 'eos_layer', - 'smooth_l1_cost', - 'layer_support', - 'multiplex_layer', - 'row_conv_layer', - 'dropout_layer', - 'prelu_layer', - 'switch_order_layer', - 'gated_unit_layer', - 'crop_layer', - 'sub_nested_seq_layer', - 'clip_layer', - 'slice_projection', - 'seq_slice_layer', - 'kmax_seq_score_layer', - 'img_pool3d_layer', - 'scale_shift_layer', - 'img_conv3d_layer', - 'resize_layer', - 'sub_seq_layer', - 'scale_sub_region_layer', - 'upsample_layer', - 'factorization_machine', -] - - -class LayerType(object): - """ - Layer type enumerations. - """ - - DATA = 'data' - MIXED_LAYER = 'mixed' - LSTMEMORY = 'lstmemory' - GRUMEMORY = 'gated_recurrent' - SEQUENCE_LAST_INSTANCE = 'seqlastins' - SEQUENCE_FIRST_INSTANCE = 'seqfirstins' - SEQUENCE_RESHAPE = 'seqreshape' - POOLING_MAX = 'max' - POOLING_AVG = 'average' - UPSAMPLE_LAYER = 'upsample' - FC_LAYER = 'fc' - COST = 'cost' - COSINE_SIM_VEC = 'cos_vm' - COSINE_SIM = 'cos' - L2_DISTANCE = 'l2_distance' - HSIGMOID = 'hsigmoid' - CONV_LAYER = 'conv' - CONVTRANS_LAYER = 'convt' - EXCONV_LAYER = 'exconv' - EXCONVTRANS_LAYER = 'exconvt' - CUDNNCONV_LAYER = 'cudnn_conv' - CUDNNCONVTRANS_LAYER = 'cudnn_convt' - POOL_LAYER = 'pool' - POOL3D_LAYER = 'pool3d' - BATCH_NORM_LAYER = 'batch_norm' - NORM_LAYER = 'norm' - SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm' - ROW_L2_NORM_LAYER = 'row_l2_norm' - ADDTO_LAYER = 'addto' - - CONCAT_LAYER = 'concat' - CONCAT_PROJ_LAYER = 'concat2' - SEQUENCE_CONCAT_LAYER = 'seqconcat' - - LSTM_STEP_LAYER = 'lstm_step' - GRU_STEP_LAYER = 'gru_step' - GET_OUTPUT_LAYER = 'get_output' - - EXPAND_LAYER = 'expand' - INTERPOLATION_LAYER = 'interpolation' - BILINEAR_INTERP_LAYER = 'bilinear_interp' - POWER_LAYER = 'power' - SCALING_LAYER = 'scaling' - TRANS_LAYER = 'trans' - ROTATE_LAYER = 'rotate' - DOT_PROD_LAYER = 'dot_prod' - OUT_PROD_LAYER = 'out_prod' - FEATURE_MAP_EXPAND_LAYER = 'featmap_expand' - - MEMORY = 'memory' - MAXID_LAYER = 'maxid' - EOSID_LAYER = 'eos_id' - RECURRENT_LAYER = 'recurrent' - - CONV_SHIFT_LAYER = "conv_shift" - TENSOR_LAYER = "tensor" - SEL_FC_LAYER = "selective_fc" - SAMPLING_ID_LAYER = "sampling_id" - SLOPE_INTERCEPT_LAYER = "slope_intercept" - LINEAR_COMBINATION_LAYER = "convex_comb" - BLOCK_EXPAND = "blockexpand" - MAXOUT = "maxout" - SPP_LAYER = "spp" - PAD_LAYER = "pad" - MULTIPLEX_LAYER = "multiplex" - ROW_CONV_LAYER = "row_conv" - - PRINT_LAYER = 'print' - PRIORBOX_LAYER = 'priorbox' - MULTIBOX_LOSS_LAYER = 'multibox_loss' - DETECTION_OUTPUT_LAYER = 'detection_output' - ROI_POOL_LAYER = 'roi_pool' - - CTC_LAYER = 'ctc' - WARP_CTC_LAYER = 'warp_ctc' - CRF_LAYER = 'crf' - CRF_DECODING_LAYER = 'crf_decoding' - NCE_LAYER = 'nce' - - CONV3D_LAYER = 'conv3d' - DECONV3D_LAYER = 'deconv3d' - - RANK_COST = 'rank-cost' - LAMBDA_COST = 'lambda_cost' - HUBER_REGRESSION = 'huber_regression' - HUBER_CLASSIFICATION = 'huber_classification' - CROSS_ENTROPY = 'multi-class-cross-entropy' - CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' - CROSS_ENTROPY_OVER_BEAM = 'cross_entropy_over_beam' - SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy' - MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy' - SUM_COST = 'sum_cost' - SMOOTH_L1 = 'smooth_l1' - - PRELU = 'prelu' - SWITCH_ORDER_LAYER = 'switch_order' - CROP_LAYER = 'crop' - SUB_NESTED_SEQ = 'sub_nested_seq' - CLIP_LAYER = 'clip' - SEQ_SLICE = 'seq_slice' - - KMAX_SEQ_SCORE = 'kmax_seq_score' - SCALE_SHIFT_LAYER = 'scale_shift' - - RESIZE = 'resize' - SUB_SEQ_LAYER = 'subseq' - - SCALE_SUB_REGION_LAYER = 'scale_sub_region' - - FACTORIZATION_MACHINE = 'factorization_machine' - - @staticmethod - def is_layer_type(type_name): - """ - Whether type_name is a layer type. - - :param type_name: layer type name. Because layer type enumerations are - strings. - :type type_name: basestring - :return: True if is a layer_type - :rtype: bool - """ - for key in dir(LayerType): - if key.isupper(): - att = getattr(LayerType, key) - if isinstance(att, basestring) and type_name == att: - return True - return False - - -class AggregateLevel(object): - """ - PaddlePaddle supports three sequence types: - - - :code:`SequenceType.NO_SEQUENCE` means the sample is not a sequence. - - :code:`SequenceType.SEQUENCE` means the sample is a sequence. - - :code:`SequenceType.SUB_SEQUENCE` means the sample is a nested sequence, - each timestep of which is also a sequence. - - Accordingly, AggregateLevel supports two modes: - - - :code:`AggregateLevel.TO_NO_SEQUENCE` means the aggregation acts on each - timestep of a sequence, both :code:`SUB_SEQUENCE` and :code:`SEQUENCE` will - be aggregated to :code:`NO_SEQUENCE`. - - - :code:`AggregateLevel.TO_SEQUENCE` means the aggregation acts on each - sequence of a nested sequence, :code:`SUB_SEQUENCE` will be aggregated to - :code:`SEQUENCE`. - """ - TO_NO_SEQUENCE = 'non-seq' - TO_SEQUENCE = 'seq' - # compatible with previous configuration - EACH_TIMESTEP = TO_NO_SEQUENCE - EACH_SEQUENCE = TO_SEQUENCE - - -class LayerOutput(object): - """ - LayerOutput is output for layer function. It is used internally by several - reasons. - - - Check layer connection make sense. - - - FC(Softmax) => Cost(MSE Error) is not good for example. - - - Tracking layer connection. - - - Pass to layer methods as input. - - :param name: Layer output name. - :type name: basestring - :param layer_type: Current Layer Type. One of LayerType enumeration. - :type layer_type: basestring - :param activation: Layer Activation. - :type activation: BaseActivation. - :param parents: Layer's parents. - :type parents: list | tuple | collections.Sequence - """ - - def __init__(self, - name, - layer_type, - parents=None, - activation=None, - num_filters=None, - img_norm_type=None, - size=None, - outputs=None, - reverse=None): - assert isinstance(name, basestring) - assert isinstance(layer_type, basestring) - assert size is not None - assert LayerType.is_layer_type(layer_type) - self.name = name - self.full_name = MakeLayerNameInSubmodel(name) - self.layer_type = layer_type - if parents is not None and type(parents) != list: - parents = [parents] - self.parents = [] if parents is None else parents - self.activation = activation - self.num_filters = num_filters - self.img_norm_type = img_norm_type - self.size = size - if outputs is None: - outputs = ['default'] - self.outputs = outputs - self.reverse = reverse - - @property - def width(self): - return cp.g_layer_map[self.full_name].width - - @property - def height(self): - return cp.g_layer_map[self.full_name].height - - @property - def depth(self): - return cp.g_layer_map[self.full_name].depth - - def set_input(self, input): - """ - Set the input for a memory layer. Can only be used for memory layer - """ - assert isinstance(input, LayerOutput) - assert self.layer_type == LayerType.MEMORY - SetMemoryInput(self.name, input.name) - - -ERROR_CLIPPING = 'error_clipping_threshold' -DROPOUT = 'drop_rate' -DEVICE = 'device' - - -def layer_support(*attrs): - attrs_list = list(attrs) - attrs_list.append(DEVICE) - - def decorator(method): - @functools.wraps(method) - def wrapper(*args, **kwargs): - for attr in attrs_list: - for each in args: - if isinstance(each, ExtraLayerAttribute): - setattr(each, '_'.join(['can', attr]), True) - for key in kwargs: - val = kwargs[key] - if isinstance(val, ExtraLayerAttribute): - setattr(val, '_'.join(['can', attr]), True) - for each in args: - if isinstance(each, ExtraLayerAttribute): - each.check(method.__name__) - for key in kwargs: - val = kwargs[key] - if isinstance(val, ExtraLayerAttribute): - val.check(method.__name__) - return method(*args, **kwargs) - - if hasattr(method, 'argspec'): - wrapper.argspec = method.argspec - else: - wrapper.argspec = inspect.getargspec(method) - - return wrapper - - return decorator - - -@wrap_param_attr_default() -def full_matrix_projection(input, size=0, param_attr=None): - """ - Full Matrix Projection. It performs full matrix multiplication. - - .. math:: - out.row[i] += in.row[i] * weight - - There are two styles of usage. - - 1. When used in mixed_layer like this, you can only set the input: - - .. code-block:: python - - with mixed_layer(size=100) as m: - m += full_matrix_projection(input=layer) - - 2. When used as an independent object like this, you must set the size: - - .. code-block:: python - - proj = full_matrix_projection(input=layer, - size=100, - param_attr=ParamAttr(name='_proj')) - - :param input: The input of this layer. - :type input: LayerOutput - :param size: The dimension of this layer. - :type size: int - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :return: FullMatrixProjection Object. - :rtype: FullMatrixProjection - """ - proj = FullMatrixProjection( - input_layer_name=input.name, size=size, **param_attr.attr) - proj.origin = input - return proj - - -@wrap_param_attr_default() -def trans_full_matrix_projection(input, size=0, param_attr=None): - """ - Different from full_matrix_projection, this projection performs matrix - multiplication, using the transpose of weight. - - .. math:: - out.row[i] += in.row[i] * w^\mathrm{T} - - :math:`w^\mathrm{T}` means the transpose of weight. - The simply usage is: - - .. code-block:: python - - proj = trans_full_matrix_projection(input=layer, - size=100, - param_attr=ParamAttr( - name='_proj', - initial_mean=0.0, - initial_std=0.01)) - - :param input: The input of this layer. - :type input: LayerOutput - :param size: The parameter size. Means the width of parameter. - :type size: int - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :return: TransposedFullMatrixProjection Object. - :rtype: TransposedFullMatrixProjection - """ - proj = TransposedFullMatrixProjection( - input_layer_name=input.name, size=size, **param_attr.attr) - proj.origin = input - return proj - - -@wrap_param_attr_default() -def table_projection(input, size=0, param_attr=None): - """ - Table Projection. It selects rows from parameter where row\_id - is in input\_ids. - - .. math:: - out.row[i] += table.row[ids[i]] - - where :math:`out` is output, :math:`table` is parameter, :math:`ids` is input\_ids, - and :math:`i` is row\_id. - - There are two styles of usage. - - 1. When used in mixed_layer like this, you can only set the input: - - .. code-block:: python - - with mixed_layer(size=100) as m: - m += table_projection(input=layer) - - 2. When used as an independent object like this, you must set the size: - - .. code-block:: python - - proj = table_projection(input=layer, - size=100, - param_attr=ParamAttr(name='_proj')) - - - :param input: The input of this layer, which must contains id fields. - :type input: LayerOutput - :param size: The dimension of the output. - :type size: int - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :return: TableProjection Object. - :rtype: TableProjection - """ - proj = TableProjection( - input_layer_name=input.name, size=size, **param_attr.attr) - proj.origin = input - return proj - - -def identity_projection(input, offset=None, size=None): - """ - 1. If offset=None, it performs IdentityProjection as follows: - - .. math:: - out.row[i] += in.row[i] - - The example usage is: - - .. code-block:: python - - proj = identity_projection(input=layer) - - - 2. If offset!=None, It executes IdentityOffsetProjection and takes the - elements of the input in the range [offset, offset+size) as output. - - .. math:: - out.row[i] += in.row[i + \\textrm{offset}] - - The example usage is: - - .. code-block:: python - - proj = identity_projection(input=layer, - offset=10) - - Note that neither of the projections have trainable parameter. - - :param input: The input of this layer. - :type input: LayerOutput - :param offset: The offset from the start of the input. The input's - elements in the range [offset, offset+size) will be - taken as output. If this parameter is not set or set - to None, the output will be the same as the input. - :type offset: int - :param size: The dimension of this layer. It will be neglected - when offset is None or not set. - :type size: int - :return: IdentityProjection or IdentityOffsetProjection object - :rtype: IdentityProjection | IdentityOffsetProjection - """ - if offset is None: - proj = IdentityProjection(input_layer_name=input.name) - proj.origin = input - else: - if size is None: - size = input.size - offset - proj = IdentityOffsetProjection( - input_layer_name=input.name, offset=offset, size=size) - proj.origin = input - return proj - - -def slice_projection(input, slices): - """ - slice_projection slices the input value into multiple parts, - then selects and merges some of them into a new output. - - .. math:: - output = [input.slices()] - - The example usage is: - - .. code-block:: python - - proj = slice_projection(input=layer, slices=[(0, 10), (20, 30)]) - - Note that slice_projection has no trainable parameter. - - :param input: The input of this layer. - :type input: LayerOutput - :param slices: A list of start and end offsets of each slice. - :type slices: list of tuple - :return: SliceProjection object. - :rtype: SliceProjection - """ - assert len(slices) >= 1 - start = 0 - for i in xrange(len(slices)): - assert len(slices[i]) == 2 - # The start position of the next slice needs to be greater than - # or equal to the end position of the previous slice. - assert slices[i][0] >= start - assert slices[i][1] >= slices[i][0] - start = slices[i][1] - proj = SliceProjection(input_layer_name=input.name, slices=slices) - proj.origin = input - return proj - - -@wrap_param_attr_default() -def scaling_projection(input, param_attr=None): - """ - scaling_projection multiplies the input with a scalar parameter. - - .. math:: - out += w * in - - The example usage is: - - .. code-block:: python - - proj = scaling_projection(input=layer) - - :param input: The input of this layer. - :type input: LayerOutput - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :return: ScalingProjection object. - :rtype: ScalingProjection - """ - proj = ScalingProjection(input_layer_name=input.name, **param_attr.attr) - proj.origin = input - return proj - - -@wrap_param_attr_default() -def dotmul_projection(input, param_attr=None): - """ - DotMulProjection takes a layer as input and performs - element-wise multiplication with weight. - - .. math:: - out.row[i] += in.row[i] .* weight - - where :math:`.*` means element-wise multiplication. - - The example usage is: - - .. code-block:: python - - proj = dotmul_projection(input=layer) - - :param input: The input of this layer. - :type input: LayerOutput - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :return: DotMulProjection object. - :rtype: DotMulProjection - """ - proj = DotMulProjection( - input_layer_name=input.name, size=input.size, **param_attr.attr) - proj.origin = input - return proj - - -def dotmul_operator(a=None, b=None, scale=1, **kwargs): - """ - DotMulOperator takes two inputs and performs element-wise multiplication: - - .. math:: - out.row[i] += scale * (a.row[i] .* b.row[i]) - - where :math:`.*` means element-wise multiplication, and - scale is a config scalar, its default value is 1. - - The example usage is: - - .. code-block:: python - - op = dotmul_operator(a=layer1, b=layer2, scale=0.5) - - :param a: The first input of this layer. - :type a: LayerOutput - :param b: The second input of this layer. - :type b: LayerOutput - :param scale: A scalar to scale the product. Its default value is 1. - :type scale: float - :return: DotMulOperator object. - :rtype: DotMulOperator - """ - if 'x' in kwargs or 'y' in kwargs: - logger.warning('x and y arguments for dotmul_operator is deprecated. ' - 'Please use a and b as parameter.') - a = kwargs.get('x', a) # For Backward capacity. - b = kwargs.get('y', b) - assert isinstance(a, LayerOutput) - assert isinstance(b, LayerOutput) - if a.size is not None and b.size is not None: - assert a.size == b.size - - op = DotMulOperator(input_layer_names=[a.name, b.name], scale=scale) - op.origin = [a, b] - return op - - -@wrap_bias_attr_default(['padding_attr']) -def context_projection(input, - context_len, - context_start=None, - padding_attr=False): - """ - Context Projection. - - It just reorganizes input sequence, combines "context_len" elements of the - sequence to one context from context_start. "context_start" will be set to - -(context_len - 1) / 2 by default. When context position is out of sequence - length, padding will be filled as zero if padding_attr = False, otherwise - it is trainable. - - For example, origin sequence is [A B C D E F G], context len is 3, padding_attr - is not set, then after context projection, sequence will - be [ 0AB ABC BCD CDE DEF EFG FG0 ]. - - :param input: The input of this layer, which should be a sequence. - :type input: LayerOutput - :param context_len: The length of the context. - :type context_len: int - :param context_start: The start position of the context. The default value is - -(context_len - 1)/2 - :type context_start: int - :param padding_attr: Parameter attribute of the padding. If the parameter is - set to False, padding will be zero. In other cases, the - padding is trainable, and its parameter attribute is set - by this parameter. - :type padding_attr: bool | ParameterAttribute - :return: Projection object. - :rtype: Projection - """ - context_start = -( - context_len - 1) / 2 if context_start is None else context_start - - extra_dict = dict() - trainable = isinstance(padding_attr, ParameterAttribute) - if trainable: - extra_dict = padding_attr.attr - - proj = ContextProjection( - input_layer_name=input.name, - context_length=context_len, - context_start=context_start, - trainable_padding=trainable, - **extra_dict) - proj.origin = input - return proj - - -class MixedLayerType(LayerOutput): - """ - The internal object for trainer_helpers. - """ - - class AddToSealedMixedLayerException(Exception): - def __init__(self): - Exception.__init__(self) - - def __init__(self, name, size, act, bias_attr, layer_attr, parents=None): - """ - :param name: The name of this layer. - :type name: basestring - :param size: The dimension of this layer. - :type size: int - :param act: Activation type. - :type act: BaseActivation - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute | None - """ - LayerOutput.__init__( - self, - name, - LayerType.MIXED_LAYER, - parents, - size=size, - activation=act) - self.bias_attr = bias_attr - self.layer_attr = layer_attr - self.inputs = [] - self.finalized = False - - def __iadd__(self, other): - """ - + += operator - :param other: Other projection. - :type other: Projection - :return: self. - :rtype: MixedLayerType - """ - if not self.finalized: - assert isinstance(other, Projection) or isinstance(other, Operator) - self.inputs.append(other) - if isinstance(other, Projection): - self.parents.append(other.origin) - else: - self.parents.extend(other.origin) - return self - else: - raise MixedLayerType.AddToSealedMixedLayerException() - - def __enter__(self): - assert len(self.inputs) == 0 - return self - - def __exit__(self, exc_type, exc_value, tb): - if exc_value is not None: - raise exc_value - assert len(self.inputs) != 0 - ml = MixedLayer( - name=self.name, - size=self.size, - active_type=self.activation.name, - bias=ParamAttr.to_bias(self.bias_attr), - inputs=self.inputs, - **ExtraLayerAttribute.to_kwargs(self.layer_attr)) - # update the size which might be computed inside MixedLayer - # according to the operator's output size - self.size = ml.config.size - self.finalized = True - - -@wrap_name_default("mixed") -@wrap_act_default(act=LinearActivation()) -@wrap_bias_attr_default(has_bias=False) -@layer_support(ERROR_CLIPPING, DROPOUT) -def mixed_layer(size=0, - input=None, - name=None, - act=None, - bias_attr=False, - layer_attr=None): - """ - Mixed Layer. A mixed layer will add all inputs together, then activate the sum. - Each input is a projection or operator. - - There are two styles of usages. - - 1. When the parameter input is not set, use mixed_layer like this: - - .. code-block:: python - - with mixed_layer(size=256) as m: - m += full_matrix_projection(input=layer1) - m += identity_projection(input=layer2) - - 2. You can also set all inputs when invoke mixed_layer as follows: - - .. code-block:: python - - m = mixed_layer(size=256, - input=[full_matrix_projection(input=layer1), - full_matrix_projection(input=layer2)]) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param size: The dimension of this layer. - :type size: int - :param input: The input of this layer. It is an optional parameter. - :param act: Activation Type. LinearActivation is the default activation. - :type act: BaseActivation - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: MixedLayerType object. - :rtype: MixedLayerType - """ - - if input is None: - return MixedLayerType(name, size, act, bias_attr, layer_attr) - else: - with mixed_layer( - name=name, - size=size, - act=act, - bias_attr=bias_attr, - layer_attr=layer_attr) as m: - if isinstance(input, collections.Sequence): - for each in input: - m += each - else: - m += input - return m - - -@layer_support() -def data_layer(name, size, depth=None, height=None, width=None, - layer_attr=None): - """ - Define DataLayer For NeuralNetwork. - - The example usage is: - - .. code-block:: python - - data = data_layer(name="input", size=1000) - - :param name: The name of this layer. - :type name: basestring - :param size: The dimension of this data layer. - :type size: int - :param height: The height of the input image data. - :type height: int | None - :param width: The width of the input image data. - :type width: int | None - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - Layer( - type=LayerType.DATA, - name=name, - size=size, - depth=depth, - height=height, - width=width, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - if depth is None: - depth = 1 - num_filters = None - if height is not None and width is not None: - num_filters = size / (width * height * depth) - assert num_filters * width * height * depth == size, \ - "size=%s width=%s height=%s depth=%s" % (size, width, height, depth) - - return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters) - - -@wrap_name_default("embedding") -@wrap_param_attr_default() -@layer_support(ERROR_CLIPPING, DROPOUT) -def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None): - """ - Define a embedding Layer. - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer, whose type must be Index Data. - :type input: LayerOutput - :param size: The dimension of the embedding vector. - :type size: int - :param param_attr: The embedding parameter attribute. See ParameterAttribute - for details. - :type param_attr: ParameterAttribute - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute | None - :return: LayerOutput object. - :rtype: LayerOutput - """ - with mixed_layer( - name=name, - size=size, - act=LinearActivation(), - bias_attr=False, - layer_attr=layer_attr) as mix: - mix += table_projection(input=input, size=size, param_attr=param_attr) - return mix - - -@wrap_name_default() -@wrap_param_attr_default() -@wrap_bias_attr_default() -@wrap_act_default() -@layer_support(ERROR_CLIPPING, DROPOUT) -def fc_layer(input, - size, - act=None, - name=None, - param_attr=None, - bias_attr=None, - layer_attr=None): - """ - The fully connected layer. - - The example usage is: - - .. code-block:: python - - fc = fc_layer(input=layer, - size=1024, - act=LinearActivation(), - bias_attr=False) - - which is equal to: - - .. code-block:: python - - with mixed_layer(size=1024) as fc: - fc += full_matrix_projection(input=layer) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput | list | tuple - :param size: The dimension of this layer. - :type size: int - :param act: Activation Type. TanhActivation is the default activation. - :type act: BaseActivation - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute | None - :return: LayerOutput object. - :rtype: LayerOutput - """ - if isinstance(input, LayerOutput): - input = [input] - assert not isinstance(param_attr, collections.Sequence) - param_attr = [param_attr] - else: - if isinstance(param_attr, collections.Sequence): - assert len(input) == len(param_attr) - else: - if "parameter_name" in param_attr.attr and len(input) > 1: - logger.fatal( - "When the name field of param_attr is manually specified " - "and the input is a list, the param_attr should also be a " - "list with each item being the param_attr for each input " - "item. If only one named param_attr is provided, all the " - "input items would share this parameter.") - param_attr = [copy.deepcopy(param_attr) for _ in range(len(input))] - - assert isinstance(input, collections.Sequence) - - Layer( - inputs=[ - Input(ipt.name, **attr.attr) for ipt, attr in zip(input, param_attr) - ], - name=name, - type=LayerType.FC_LAYER, - size=size, - bias=ParamAttr.to_bias(bias_attr), - active_type=act.name, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.FC_LAYER, input, activation=act, size=size) - - -@wrap_name_default("print") -def printer_layer(input, format=None, name=None): - """ - Print the output value of the layers specified by the parameter input. - This layer is useful for debugging. - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput | list | tuple - :return: LayerOutput object. - :rtype: LayerOutput - """ - if isinstance(input, LayerOutput): - input = [input] - assert isinstance(input, collections.Sequence) # list or tuple - for each in input: - assert isinstance(each, LayerOutput) - - Layer( - name=name, - format=format, - type=LayerType.PRINT_LAYER, - inputs=[l.name for l in input], ) - # this layer don't return anything, can not be input of other layer. - -# Keep print_layer for compatibility with V1 API. -# 'print_layer' does not work for V2 API because it will be changed to -# 'print' for V2 API. But 'print' is a reserved key word in python. - - -print_layer = printer_layer - - -@wrap_name_default("priorbox") -def priorbox_layer(input, - image, - aspect_ratio, - variance, - min_size, - max_size=[], - name=None): - """ - Compute the priorbox and set the variance. This layer is necessary for ssd. - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param image: The network input image. - :type image: LayerOutput - :param aspect_ratio: The aspect ratio. - :type aspect_ratio: list - :param variance: The bounding box variance. - :type min_size: The minimum size of the priorbox width/height. - :param min_size: list - :type max_size: The maximum size of the priorbox width/height. It could be NULL. - :param max_size: list - :return: LayerOutput object. - :rtype: LayerOutput - """ - # plus one for ratio 1. - num_filters = (len(aspect_ratio) * 2 + 1 + len(max_size)) * 4 - size = (input.size / input.num_filters) * num_filters * 2 - Layer( - name=name, - type=LayerType.PRIORBOX_LAYER, - inputs=[input.name, image.name], - size=size, - min_size=min_size, - max_size=max_size, - aspect_ratio=aspect_ratio, - variance=variance) - return LayerOutput( - name, - LayerType.PRIORBOX_LAYER, - parents=[input, image], - num_filters=num_filters, - size=size) - - -@wrap_name_default("multibox_loss") -def multibox_loss_layer(input_loc, - input_conf, - priorbox, - label, - num_classes, - overlap_threshold=0.5, - neg_pos_ratio=3.0, - neg_overlap=0.5, - background_id=0, - name=None): - """ - Compute the location loss and the confidence loss for ssd. - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input_loc: The input predicted locations. - :type input_loc: LayerOutput | List of LayerOutput - :param input_conf: The input priorbox confidence. - :type input_conf: LayerOutput | List of LayerOutput - :param priorbox: The input priorbox location and the variance. - :type priorbox: LayerOutput - :param label: The input label. - :type label: LayerOutput - :param num_classes: The number of the classification. - :type num_classes: int - :param overlap_threshold: The threshold of the overlap. - :type overlap_threshold: float - :param neg_pos_ratio: The ratio of the negative bounding box to - the positive bounding box. - :type neg_pos_ratio: float - :param neg_overlap: The negative bounding box overlap threshold. - :type neg_overlap: float - :param background_id: The background class index. - :type background_id: int - :return: LayerOutput object. - :rtype: LayerOutput - """ - if isinstance(input_loc, LayerOutput): - input_loc = [input_loc] - assert isinstance(input_loc, collections.Sequence) # list or tuple - for each in input_loc: - assert isinstance(each, LayerOutput) - input_loc_num = len(input_loc) - - if isinstance(input_conf, LayerOutput): - input_conf = [input_conf] - assert isinstance(input_conf, collections.Sequence) # list or tuple - for each in input_conf: - assert isinstance(each, LayerOutput) - input_conf_num = len(input_conf) - # Check the input layer number. - assert input_loc_num == input_conf_num - - inputs = [priorbox.name, label.name] - inputs.extend([l.name for l in input_loc]) - inputs.extend([l.name for l in input_conf]) - parents = [priorbox, label] - parents.extend(input_loc) - parents.extend(input_conf) - - Layer( - name=name, - type=LayerType.MULTIBOX_LOSS_LAYER, - inputs=inputs, - input_num=input_loc_num, - num_classes=num_classes, - overlap_threshold=overlap_threshold, - neg_pos_ratio=neg_pos_ratio, - neg_overlap=neg_overlap, - background_id=background_id) - return LayerOutput( - name, LayerType.MULTIBOX_LOSS_LAYER, parents=parents, size=1) - - -@wrap_name_default("detection_output") -def detection_output_layer(input_loc, - input_conf, - priorbox, - num_classes, - nms_threshold=0.45, - nms_top_k=400, - keep_top_k=200, - confidence_threshold=0.01, - background_id=0, - name=None): - """ - Apply the NMS to the output of network and compute the predict bounding - box location. The output's shape of this layer could be zero if there is - no valid bounding box. - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input_loc: The input predict locations. - :type input_loc: LayerOutput | List of LayerOutput. - :param input_conf: The input priorbox confidence. - :type input_conf: LayerOutput | List of LayerOutput. - :param priorbox: The input priorbox location and the variance. - :type priorbox: LayerOutput - :param num_classes: The number of the classes. - :type num_classes: int - :param nms_threshold: The Non-maximum suppression threshold. - :type nms_threshold: float - :param nms_top_k: The bounding boxes number kept of the NMS's output. - :type nms_top_k: int - :param keep_top_k: The bounding boxes number kept of the layer's output. - :type keep_top_k: int - :param confidence_threshold: The classification confidence threshold. - :type confidence_threshold: float - :param background_id: The background class index. - :type background_id: int - :return: LayerOutput object. - :rtype: LayerOutput - """ - if isinstance(input_loc, LayerOutput): - input_loc = [input_loc] - assert isinstance(input_loc, collections.Sequence) # list or tuple - for each in input_loc: - assert isinstance(each, LayerOutput) - input_loc_num = len(input_loc) - - if isinstance(input_conf, LayerOutput): - input_conf = [input_conf] - assert isinstance(input_conf, collections.Sequence) # list or tuple - for each in input_conf: - assert isinstance(each, LayerOutput) - input_conf_num = len(input_conf) - - # Check the input layer number. - assert input_loc_num == input_conf_num - - inputs = [priorbox.name] - inputs.extend([l.name for l in input_loc]) - inputs.extend([l.name for l in input_conf]) - parents = [priorbox] - parents.extend(input_loc) - parents.extend(input_conf) - - size = keep_top_k * 7 - - Layer( - name=name, - type=LayerType.DETECTION_OUTPUT_LAYER, - inputs=inputs, - size=size, - input_num=input_loc_num, - num_classes=num_classes, - nms_threshold=nms_threshold, - nms_top_k=nms_top_k, - keep_top_k=keep_top_k, - confidence_threshold=confidence_threshold, - background_id=background_id) - return LayerOutput( - name, LayerType.DETECTION_OUTPUT_LAYER, parents=parents, size=size) - - -@wrap_name_default("roi_pool") -def roi_pool_layer(input, - rois, - pooled_width, - pooled_height, - spatial_scale, - num_channels=None, - name=None): - """ - A layer used by Fast R-CNN to extract feature maps of ROIs from the last - feature map. - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input layer. - :type input: LayerOutput. - :param rois: The input ROIs' data. - :type rois: LayerOutput. - :param pooled_width: The width after pooling. - :type pooled_width: int - :param pooled_height: The height after pooling. - :type pooled_height: int - :param spatial_scale: The spatial scale between the image and feature map. - :type spatial_scale: float - :param num_channels: The number of the input channels. - :type num_channels: int - :return: LayerOutput object. - :rtype: LayerOutput - """ - if num_channels is None: - assert input.num_filters is not None - num_channels = input.num_filters - size = num_channels * pooled_width * pooled_height - Layer( - name=name, - type=LayerType.ROI_POOL_LAYER, - inputs=[input.name, rois.name], - pooled_width=pooled_width, - pooled_height=pooled_height, - spatial_scale=spatial_scale, - num_channels=num_channels) - return LayerOutput( - name, LayerType.ROI_POOL_LAYER, parents=[input, rois], size=size) - - -@wrap_name_default("cross_channel_norm") -def cross_channel_norm_layer(input, name=None, param_attr=None): - """ - Normalize a layer's output. This layer is necessary for ssd. This - layer applys normalization across the channels of each sample to - a convolutional layer's output and scales the output by a group of - trainable factors whose dimensions equal to the channel's number. - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert input.num_filters is not None - Layer( - name=name, - type=LayerType.NORM_LAYER, - inputs=[ - Input( - input.name, - norm=Norm( - norm_type="cross-channel-norm", - channels=input.num_filters, - size=input.size, - scale=0, - pow=0, - blocked=0), - **param_attr.attr) - ]) - return LayerOutput( - name, - LayerType.NORM_LAYER, - parents=input, - num_filters=input.num_filters, - size=input.size) - - -@wrap_name_default("seq_pooling") -@wrap_bias_attr_default(has_bias=False) -@wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling()) -@layer_support() -def pooling_layer(input, - pooling_type=None, - name=None, - bias_attr=None, - agg_level=AggregateLevel.TO_NO_SEQUENCE, - stride=-1, - layer_attr=None): - """ - Pooling layer for sequence inputs, not used for Image. - - If stride > 0, this layer slides a window whose size is determined by stride, - and returns the pooling value of the sequence in the window as the output. Thus, - a long sequence will be shortened. Note that for sequence with sub-sequence, the - default value of stride is -1. - - The example usage is: - - .. code-block:: python - - seq_pool = pooling_layer(input=layer, - pooling_type=AvgPooling(), - agg_level=AggregateLevel.TO_NO_SEQUENCE) - - :param agg_level: AggregateLevel.TO_NO_SEQUENCE or - AggregateLevel.TO_SEQUENCE - :type agg_level: AggregateLevel - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param pooling_type: Type of pooling. MaxPooling is the default pooling. - :type pooling_type: BasePoolingType | None - :param stride: The step size between successive pooling regions. - :type stride: int - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute | None - :return: LayerOutput object. - :rtype: LayerOutput - """ - extra_dict = dict() - # noinspection PyUnresolvedReferences - if isinstance(pooling_type, AvgPooling): - extra_dict['average_strategy'] = pooling_type.strategy - elif isinstance(pooling_type, MaxPooling) and \ - pooling_type.output_max_index is not None: - assert isinstance(pooling_type.output_max_index, bool) - extra_dict['output_max_index'] = pooling_type.output_max_index - extra_dict.update(ExtraLayerAttribute.to_kwargs(layer_attr)) - - if agg_level == AggregateLevel.TO_SEQUENCE: - assert stride == -1 - - Layer( - name=name, - type=pooling_type.name, - inputs=[Input(input.name)], - bias=ParamAttr.to_bias(bias_attr), - trans_type=agg_level, - stride=stride, - **extra_dict) - - return LayerOutput( - name, pooling_type.name, parents=[input], size=input.size) - - -@wrap_bias_attr_default() -@wrap_param_attr_default() -@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation()) -@wrap_act_default(param_names=["act", 'state_act'], act=TanhActivation()) -@wrap_name_default("lstmemory") -@layer_support() -def lstmemory(input, - name=None, - size=None, - reverse=False, - act=None, - gate_act=None, - state_act=None, - bias_attr=None, - param_attr=None, - layer_attr=None): - """ - Long Short-term Memory Cell. - - The memory cell was implemented as follow equations. - - .. math:: - - i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i) - - f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f) - - c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c) - - o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o) - - h_t & = o_t tanh(c_t) - - - NOTE: In PaddlePaddle's implementation, the multiplications - :math:`W_{xi}x_{t}` , :math:`W_{xf}x_{t}`, - :math:`W_{xc}x_t`, :math:`W_{xo}x_{t}` are not done in the lstmemory layer, - so an additional mixed_layer with full_matrix_projection or a fc_layer must - be included in the configuration file to complete the input-to-hidden - mappings before lstmemory is called. - - NOTE: This is a low level user interface. You can use network.simple_lstm - to config a simple plain lstm layer. - - Reference: - `Generating Sequences With Recurrent Neural Networks - `_ - - :param name: The name of this layer. It is optional. - :type name: basestring - :param size: DEPRECATED. The dimension of the lstm cell. - :type size: int - :param input: The input of this layer. - :type input: LayerOutput - :param reverse: Whether the input sequence is processed in a reverse order. - :type reverse: bool - :param act: Activation type. TanhActivation is the default activation. - :type act: BaseActivation - :param gate_act: Activation type of this layer's gates. SigmoidActivation is the - default activation. - :type gate_act: BaseActivation - :param state_act: Activation type of the state. TanhActivation is the default activation. - :type state_act: BaseActivation - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute | None - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert gate_act.support_hppl - assert state_act.support_hppl - assert act.support_hppl - assert input.size is not None and input.size % 4 == 0 - - if size is not None: - if input.size / 4 == size: - plog = logger.warning - else: - plog = logger.fatal - plog("size of lstmemory layer: %s is automatically set to " - "size of input layer / 4. The parameter size passing to " - "this layer is ignored." % (name)) - - Layer( - name=name, - type=LayerType.LSTMEMORY, - active_type=act.name, - active_state_type=state_act.name, - active_gate_type=gate_act.name, - reversed=reverse, - bias=ParamAttr.to_bias(bias_attr), - inputs=[Input(input.name, **param_attr.attr)], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput( - name, - LayerType.LSTMEMORY, [input], - size=input.size / 4, - reverse=reverse) - - -@wrap_bias_attr_default() -@wrap_param_attr_default() -@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation()) -@wrap_act_default(param_names=["act"], act=TanhActivation()) -@wrap_name_default("gru") -@layer_support() -def grumemory(input, - size=None, - name=None, - reverse=False, - act=None, - gate_act=None, - bias_attr=None, - param_attr=None, - layer_attr=None): - """ - Gate Recurrent Unit Layer. - - The memory cell was implemented as follow equations. - - 1. update gate :math:`z`: defines how much of the previous memory to - keep around or the unit updates its activations. The update gate - is computed by: - - .. math:: - - z_t = \\sigma(W_{z}x_{t} + U_{z}h_{t-1} + b_z) - - 2. reset gate :math:`r`: determines how to combine the new input with the - previous memory. The reset gate is computed similarly to the update gate: - - .. math:: - - r_t = \\sigma(W_{r}x_{t} + U_{r}h_{t-1} + b_r) - - 3. The candidate activation :math:`\\tilde{h_t}` is computed similarly to - that of the traditional recurrent unit: - - .. math:: - - {\\tilde{h_t}} = tanh(W x_{t} + U (r_{t} \odot h_{t-1}) + b) - - 4. The hidden activation :math:`h_t` of the GRU at time t is a linear - interpolation between the previous activation :math:`h_{t-1}` and the - candidate activation :math:`\\tilde{h_t}`: - - .. math:: - - h_t = (1 - z_t) h_{t-1} + z_t {\\tilde{h_t}} - - NOTE: In PaddlePaddle's implementation, the multiplication operations - :math:`W_{r}x_{t}`, :math:`W_{z}x_{t}` and :math:`W x_t` are not performed - in gate_recurrent layer. Consequently, an additional mixed_layer with - full_matrix_projection or a fc_layer must be included before grumemory - is called. - - Reference: - `Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling - `_ - - The simple usage is: - - .. code-block:: python - - gru = grumemory(input) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput. - :param size: DEPRECATED. The dimension of the gru cell. - :type size: int - :param reverse: Whether the input sequence is processed in a reverse order. - :type reverse: bool - :param act: Activation type, TanhActivation is the default. This activation - affects the :math:`{\\tilde{h_t}}`. - :type act: BaseActivation - :param gate_act: Activation type of this layer's two gates. SigmoidActivation is - the default activation. This activation affects the :math:`z_t` - and :math:`r_t`. It is the :math:`\\sigma` in the above formula. - :type gate_act: BaseActivation - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute | None - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert act.support_hppl - assert gate_act.support_hppl - assert input.size is not None and input.size % 3 == 0 - if size is not None: - if input.size / 3 == size: - plog = logger.warning - else: - plog = logger.fatal - plog("size of grumemory layer: %s is automatically set to " - "size of input layer / 3. The parameter size passing to this " - "layer is ignored." % (name)) - - Layer( - name=name, - type=LayerType.GRUMEMORY, - active_type=act.name, - active_gate_type=gate_act.name, - reversed=reverse, - bias=ParamAttr.to_bias(bias_attr), - inputs=[Input(input.name, **param_attr.attr)], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput( - name, - LayerType.GRUMEMORY, [input], - size=input.size / 3, - reverse=reverse) - - -@wrap_name_default() -@layer_support() -def last_seq(input, - name=None, - agg_level=AggregateLevel.TO_NO_SEQUENCE, - stride=-1, - layer_attr=None): - """ - Get Last Timestamp Activation of a sequence. - - If stride > 0, this layer will slide a window whose size is determined by stride, - and return the last value of the sequence in the window as the output. Thus, a - long sequence will be shortened. Note that for sequence with sub-sequence, the - default value of stride is -1. - - The simple usage is: - - .. code-block:: python - - seq = last_seq(input=layer) - - :param agg_level: Aggregated level - :type agg_level: AggregateLevel - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param stride: The step size between successive pooling regions. - :type stride: int - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - if input.reverse is not None and input.reverse: - logger.warning("You are getting the last instance of a sequence that" - " is a output of a REVERSED layer. There is no time" - " series information at all. Maybe you want to use" - " first_seq instead.") - - if agg_level == AggregateLevel.TO_SEQUENCE: - assert stride == -1 - - Layer( - name=name, - type=LayerType.SEQUENCE_LAST_INSTANCE, - inputs=[input.name], - trans_type=agg_level, - stride=stride, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - LayerType.SEQUENCE_LAST_INSTANCE, - parents=[input], - size=input.size) - - -@wrap_name_default() -@layer_support() -def first_seq(input, - name=None, - agg_level=AggregateLevel.TO_NO_SEQUENCE, - stride=-1, - layer_attr=None): - """ - Get First Timestamp Activation of a sequence. - - If stride > 0, this layer will slide a window whose size is determined by stride, - and return the first value of the sequence in the window as the output. Thus, a - long sequence will be shortened. Note that for sequence with sub-sequence, the - default value of stride is -1. - - The simple usage is: - - .. code-block:: python - - seq = first_seq(input=layer) - - :param agg_level: aggregation level - :type agg_level: AggregateLevel - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param stride: The step size between successive pooling regions. - :type stride: int - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - - if input.reverse is not None and not input.reverse: - logger.warning('You are getting the first instance for a time series,' - ' and it is a normal recurrent layer output. There is no' - ' time series information at all. Maybe you want to use' - ' last_seq instead.') - - if agg_level == AggregateLevel.TO_SEQUENCE: - assert stride == -1 - - Layer( - name=name, - type=LayerType.SEQUENCE_FIRST_INSTANCE, - inputs=[input.name], - trans_type=agg_level, - stride=stride, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - LayerType.SEQUENCE_FIRST_INSTANCE, - parents=[input], - size=input.size) - - -class ExpandLevel(object): - """ - Please refer to AggregateLevel first. - - ExpandLevel supports two modes: - - - :code:`ExpandLevel.FROM_NO_SEQUENCE` means the expansion acts on - :code:`NO_SEQUENCE`, which will be expanded to - :code:`SEQUENCE` or :code:`SUB_SEQUENCE`. - - - :code:`ExpandLevel.FROM_SEQUENCE` means the expansion acts on - :code:`SEQUENCE`, which will be expanded to - :code:`SUB_SEQUENCE`. - """ - FROM_NO_SEQUENCE = AggregateLevel.TO_NO_SEQUENCE - FROM_SEQUENCE = AggregateLevel.TO_SEQUENCE - # compatible with previous configuration - FROM_TIMESTEP = FROM_NO_SEQUENCE - - -@wrap_name_default() -@layer_support() -def expand_layer(input, - expand_as, - name=None, - bias_attr=False, - expand_level=ExpandLevel.FROM_NO_SEQUENCE, - layer_attr=None): - """ - A layer for expanding dense data or (sequence data where the length of each - sequence is one) to sequence data. - - The example usage is: - - .. code-block:: python - - expand = expand_layer(input=layer1, - expand_as=layer2, - expand_level=ExpandLevel.FROM_NO_SEQUENCE) - - :param input: The input of this layer. - :type input: LayerOutput - :param expand_as: Expand the input according to this layer's sequence infomation. And - after the operation, the input expanded will have the same number of - elememts as this layer. - :type expand_as: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param expand_level: Whether the input layer is a sequence or the element of a sequence. - :type expand_level: ExpandLevel - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - - Layer( - inputs=[input.name, expand_as.name], - name=name, - bias=ParamAttr.to_bias(bias_attr=bias_attr), - type=LayerType.EXPAND_LAYER, - trans_type=expand_level, - **ExtraAttr.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - size=input.size, - layer_type=LayerType.EXPAND_LAYER, - parents=[input, expand_as]) - - -@wrap_name_default() -@wrap_act_default(act=IdentityActivation()) -@layer_support() -def repeat_layer(input, - num_repeats, - as_row_vector=True, - act=None, - name=None, - layer_attr=None): - """ - A layer for repeating the input for num_repeats times. - - If as_row_vector: - - .. math:: - y = [x_1,\cdots, x_n, \cdots, x_1, \cdots, x_n] - - If not as_row_vector: - - .. math:: - y = [x_1,\cdots, x_1, \cdots, x_n, \cdots, x_n] - - - The example usage is: - - .. code-block:: python - - expand = repeat_layer(input=layer, num_repeats=4) - - :param input: The input of this layer. - :type input: LayerOutput - :param num_repeats: The times of repeating the input. - :type num_repeats: int - :param name: The name of this layer. It is optional. - :type name: basestring - :param as_row_vector: Whether to treat the input as row vectors or not. If - the parameter is set to True, the repeating operation - will be performed in the column direction. Otherwise, - it will be performed in the row direction. - :type as_row_vector: bool - :param act: Activation type. IdentityActivation is the default activation. - :type act: BaseActivation - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - - l = Layer( - inputs=[input.name], - name=name, - active_type=act.name, - num_filters=num_repeats, - as_row_vector=as_row_vector, - type=LayerType.FEATURE_MAP_EXPAND_LAYER, - **ExtraAttr.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - size=l.config.size, - layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER, - activation=act, - parents=[input]) - - -@wrap_name_default("seqreshape") -@wrap_act_default(act=IdentityActivation()) -@wrap_bias_attr_default(has_bias=False) -@layer_support(ERROR_CLIPPING, DROPOUT) -def seq_reshape_layer(input, - reshape_size, - act=None, - name=None, - layer_attr=None, - bias_attr=None): - """ - A layer for reshaping the sequence. Assume the input sequence has T instances, - the dimension of each instance is M, and the input reshape_size is N, then the - output sequence has T*M/N instances, the dimension of each instance is N. - - Note that T*M/N must be an integer. - - The example usage is: - - .. code-block:: python - - reshape = seq_reshape_layer(input=layer, reshape_size=4) - - :param input: The input of this layer. - :type input: LayerOutput - :param reshape_size: The dimension of the reshaped sequence. - :type reshape_size: int - :param name: The name of this layer. It is optional. - :type name: basestring - :param act: Activation type. IdentityActivation is the default activation. - :type act: BaseActivation - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :return: LayerOutput object. - :rtype: LayerOutput - """ - - Layer( - inputs=[input.name], - name=name, - size=reshape_size, - type=LayerType.SEQUENCE_RESHAPE, - bias=ParamAttr.to_bias(bias_attr), - **ExtraAttr.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - size=reshape_size, - layer_type=LayerType.SEQUENCE_RESHAPE, - parents=[input]) - - -@wrap_name_default() -@layer_support() -def interpolation_layer(input, weight, name=None, layer_attr=None): - """ - This layer performs linear interpolation on two inputs, - which is used in NEURAL TURING MACHINE. - - .. math:: - y.row[i] = w[i] * x_1.row[i] + (1 - w[i]) * x_2.row[i] - - where :math:`x_1` and :math:`x_2` are two (batchSize x dataDim) inputs, - :math:`w` is (batchSize x 1) weight vector, and :math:`y` is - (batchSize x dataDim) output. - - The example usage is: - - .. code-block:: python - - interpolation = interpolation_layer(input=[layer1, layer2], weight=layer3) - - :param input: The input of this layer. - :type input: list | tuple - :param weight: Weight layer. - :type weight: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, collections.Sequence) - assert len(input) == 2 - assert isinstance(input[0], LayerOutput) and isinstance(input[1], - LayerOutput) - if input[0].size is not None and input[1].size is not None: - assert input[0].size == input[1].size - assert isinstance(weight, LayerOutput) - if weight.size is not None: - assert weight.size == 1 - Layer( - name=name, - type=LayerType.INTERPOLATION_LAYER, - inputs=[weight.name, input[0].name, input[1].name], - **ExtraAttr.to_kwargs(layer_attr)) - return LayerOutput( - name, - LayerType.INTERPOLATION_LAYER, - parents=[weight, input[0], input[1]], - size=input[0].size) - - -@wrap_name_default() -@layer_support() -def bilinear_interp_layer(input, - out_size_x=None, - out_size_y=None, - name=None, - layer_attr=None): - """ - This layer implements bilinear interpolation on convolutional layer's output. - - Please refer to Wikipedia: https://en.wikipedia.org/wiki/Bilinear_interpolation - - The simple usage is: - - .. code-block:: python - - bilinear = bilinear_interp_layer(input=layer1, out_size_x=64, out_size_y=64) - - :param input: The input of this layer. - :type input: LayerOutput. - :param out_size_x: The width of the output. - :type out_size_x: int - :param out_size_y: The height of the output. - :type out_size_y: int - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert input.layer_type == LayerType.CONV_LAYER - assert isinstance(input.activation, LinearActivation) - assert out_size_x > 0 and out_size_y > 0 - assert input.num_filters is not None - num_channels = input.num_filters - l = Layer( - name=name, - inputs=Input( - input.name, - bilinear_interp=BilinearInterp( - out_size_x=out_size_x, - out_size_y=out_size_y, - channels=num_channels)), - type=LayerType.BILINEAR_INTERP_LAYER, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - LayerType.BILINEAR_INTERP_LAYER, - parents=[input], - num_filters=num_channels, - size=l.config.size) - - -@wrap_name_default() -@layer_support() -def power_layer(input, weight, name=None, layer_attr=None): - """ - This layer applies a power function to a vector element-wise, - which is used in NEURAL TURING MACHINE. - - .. math:: - y = x^w - - where :math:`x` is an input vector, :math:`w` is a scalar exponent, - and :math:`y` is an output vector. - - The example usage is: - - .. code-block:: python - - power = power_layer(input=layer1, weight=layer2) - - :param input: The input of this layer. - :type input: LayerOutput - :param weight: The exponent of the power. - :type weight: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput) and isinstance(weight, LayerOutput) - if weight.size is not None: - assert weight.size == 1 - Layer( - name=name, - type=LayerType.POWER_LAYER, - inputs=[weight.name, input.name], - **ExtraAttr.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.POWER_LAYER, parents=[input, weight], size=input.size) - - -@wrap_name_default() -@layer_support() -def scaling_layer(input, weight, name=None, layer_attr=None): - """ - A layer for multiplying input vector by weight scalar. - - .. math:: - y = w x - - where :math:`x` is size=dataDim input, :math:`w` is size=1 weight, - and :math:`y` is size=dataDim output. - - Note that the above computation is for one sample. Multiple samples are - processed in one batch. - - The example usage is: - - .. code-block:: python - - scale = scaling_layer(input=layer1, weight=layer2) - - :param input: The input of this layer. - :type input: LayerOutput - :param weight: The weight of each sample. - :type weight: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(weight, LayerOutput) and isinstance(input, LayerOutput) - if weight.size is not None: - assert weight.size == 1 - Layer( - name=name, - type=LayerType.SCALING_LAYER, - inputs=[weight.name, input.name], - **ExtraAttr.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.SCALING_LAYER, parents=[weight, input], size=input.size) - - -@wrap_name_default() -@layer_support() -def trans_layer(input, name=None, layer_attr=None): - """ - A layer for transposing a minibatch matrix. - - .. math:: - y = x^\mathrm{T} - - where :math:`x` is (M x N) input, and :math:`y` is (N x M) output. - - The example usage is: - - .. code-block:: python - - trans = trans_layer(input=layer) - - :param input: The input of this layer. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - Layer( - name=name, - type=LayerType.TRANS_LAYER, - inputs=[input.name], - **ExtraAttr.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.TRANS_LAYER, parents=[input], size=input.size) - - -@wrap_name_default() -@layer_support() -def rotate_layer(input, height, width, name=None, layer_attr=None): - """ - A layer for rotating 90 degrees (clock-wise) for each feature channel, - usually used when the input sample is some image or feature map. - - .. math:: - y(j,i,:) = x(M-i-1,j,:) - - where :math:`x` is (M x N x C) input, and :math:`y` is (N x M x C) output. - - The example usage is: - - .. code-block:: python - - rot = rotate_layer(input=layer, - height=100, - width=100) - - :param input: The input of this layer. - :type input: LayerOutput - :param height: The height of the sample matrix. - :type height: int - :param width: The width of the sample matrix. - :type width: int - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput) - l = Layer( - name=name, - height=height, - width=width, - type=LayerType.ROTATE_LAYER, - inputs=[input.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - layer_type=LayerType.ROTATE_LAYER, - parents=[input], - size=l.config.size) - - -@wrap_name_default() -@layer_support() -def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): - """ - Cosine Similarity Layer. The cosine similarity equation is here. - - .. math:: - similarity = cos(\\theta) = {\\mathbf{a} \\cdot \\mathbf{b} - \\over \\|\\mathbf{a}\\| \\|\\mathbf{b}\\|} - - The size of a is M, size of b is M*N, - Similarity will be calculated N times by step M. The output size is - N. The scale will be multiplied to similarity. - - Note that the above computation is for one sample. Multiple samples are - processed in one batch. - - The example usage is: - - .. code-block:: python - - cos = cos_sim(a=layer1, b=layer2, size=3) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param a: The first input of this layer. - :type a: LayerOutput - :param b: The second input of this layer. - :type b: LayerOutput - :param scale: The scale of the cosine similarity. 1 is the default value. - :type scale: float - :param size: The dimension of this layer. NOTE size_a * size should equal size_b. - :type size: int - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(a, LayerOutput) and isinstance(b, LayerOutput) - if size == 1: - Layer( - name=name, - type=LayerType.COSINE_SIM, - cos_scale=scale, - inputs=[a.name, b.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - else: - if a.size is not None and b.size is not None: - assert size == b.size / a.size - Layer( - name=name, - type=LayerType.COSINE_SIM_VEC, - size=size, - cos_scale=scale, - inputs=[a.name, b.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b], size=size) - - -@wrap_name_default() -@layer_support() -def l2_distance_layer(x, y, name=None, layer_attr=None): - """ - This layer calculates and returns the Euclidean distance between two input - vectors x and y. The equation is as follows: - - .. math:: - l2_distance(\\mathbf{x}, \\mathbf{y}) = \\sqrt{\\sum_{i=1}^D(x_i - y_i)} - - The output size of this layer is fixed to be 1. Note that the above - computation is for one sample. Multiple samples are processed in one batch. - - The example usage is: - - .. code-block:: python - - l2_sim = l2_distance(x=layer1, y=layer2) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param x: The first input x for this layer, whose output is a matrix with - dimensionality N x D. N is the sample number in a mini-batch. - D is the dimensionality of x's output. - :type x: LayerOutput - :param y: The second input y for this layer, whose output is a matrix with - dimensionality N x D. N is the sample number in a mini-batch. - D is the dimensionality of y's output. - :type y: LayerOutput - :param layer_attr: The extra layer attributes, for example, drop rate. - See ExtraLayerAttribute for more details. - :type layer_attr: ExtraLayerAttribute - :return: The returned LayerOutput object. - :rtype: LayerOutput - """ - - assert isinstance(x, LayerOutput) and isinstance(y, LayerOutput) - Layer( - name=name, - type=LayerType.L2_DISTANCE, - inputs=[x.name, y.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.L2_DISTANCE, parents=[x, y], size=1) - - -@wrap_name_default() -@wrap_bias_attr_default(has_bias=True) -@wrap_param_attr_default() -@layer_support() -def hsigmoid(input, - label, - num_classes=None, - name=None, - bias_attr=None, - param_attr=None, - layer_attr=None): - """ - Organize the classes into a binary tree. At each node, a sigmoid function - is used to calculate the probability of belonging to the right branch. - - Reference: - `Hierarchical Probabilistic Neural Network Language Model - `_ - - The example usage is: - - .. code-block:: python - - cost = hsigmoid(input=[layer1, layer2], - label=data_layer) - - :param input: The input of this layer. - :type input: LayerOutput | list | tuple - :param label: The input label. - :type label: LayerOutput - :param num_classes: The number of classes. And it should be larger than 2. If the parameter - is not set or set to None, its actual value will be automatically set to - the number of labels. - :type num_classes: int - :param name: The name of this layer. It is optional. - :type name: basestring - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - if isinstance(input, LayerOutput): - input = [input] - if not isinstance(param_attr, collections.Sequence): - param_attr = [param_attr] - else: - if not isinstance(param_attr, collections.Sequence): - param_attr = [param_attr] * len(input) - else: - assert len(param_attr) == len(input) - - assert isinstance(input, collections.Sequence) - assert isinstance(label, LayerOutput) - assert label.layer_type == LayerType.DATA - - if num_classes is None: - num_classes = label.size - if num_classes is None or num_classes <= 2: - raise ValueError("hsigmoid label size must larger than 2.") - - ipts_for_layer = [] - parents = [] - for each_input, each_param_attr in zip(input, param_attr): - assert isinstance(each_input, LayerOutput) - ipts_for_layer.append(Input(each_input.name, **each_param_attr.attr)) - parents.append(each_input) - ipts_for_layer.append(label.name) - parents.append(label) - - l = Layer( - name=name, - type=LayerType.HSIGMOID, - num_classes=num_classes, - bias=ParamAttr.to_bias(bias_attr), - inputs=ipts_for_layer, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.HSIGMOID, parents=parents, size=l.config.size) - - -@wrap_name_default("conv") -@wrap_param_attr_default() -@wrap_bias_attr_default() -@wrap_act_default(act=ReluActivation()) -@layer_support(DROPOUT) -def img_conv_layer(input, - filter_size, - num_filters, - name=None, - num_channels=None, - act=None, - groups=1, - stride=1, - padding=0, - dilation=1, - bias_attr=None, - param_attr=None, - shared_biases=True, - layer_attr=None, - filter_size_y=None, - stride_y=None, - padding_y=None, - dilation_y=None, - trans=False, - layer_type=None): - """ - Convolution layer for image. Paddle can support both square and non-square - input currently. - - The details of convolution layer, please refer UFLDL's `convolution - `_ . - - Convolution Transpose (deconv) layer for image. Paddle can support both square - and non-square input currently. - - The details of convolution transpose layer, - please refer to the following explanation and references therein - `_ . - The num_channel means input image's channel number. It may be 1 or 3 when - input is raw pixels of image(mono or RGB), or it may be the previous layer's - num_filters. - - There are several groups of filters in PaddlePaddle implementation. - If the groups attribute is greater than 1, for example groups=2, - the input will be splitted into 2 parts along the channel axis, and - the filters will also be splitted into 2 parts. The first half of the filters - is only connected to the first half of the input channels, while the second - half of the filters is only connected to the second half of the input. After - the computation of convolution for each part of input, - the output will be obtained by concatenating the two results. - - The details of grouped convolution, please refer to: - `ImageNet Classification With Deep Convolutional Neural Networks - `_ - - The example usage is: - - .. code-block:: python - - conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=False, - act=ReluActivation()) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param filter_size: The dimensions of the filter kernel. If the parameter is - set to one integer, the two dimensions on x and y axises - will be same when filter_size_y is not set. If it is set - to a list, the first element indicates the dimension on - the x axis, and the second is used to specify the dimension - on the y axis when filter_size_y is not provided. - :type filter_size: int | tuple | list - :param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter - is not set, it will be set automatically according to filter_size. - :type filter_size_y: int - :param num_filters: The number of filters. It is as same as the output image channel. - :type num_filters: int - :param act: Activation type. ReluActivation is the default activation. - :type act: BaseActivation - :param groups: The group number. 1 is the default group number. - :type groups: int - :param stride: The strides. If the parameter is set to one integer, the strides - on x and y axises will be same when stride_y is not set. If it is - set to a list, the first element indicates the stride on the x axis, - and the second is used to specify the stride on the y axis when - stride_y is not provided. 1 is the default value. - :type stride: int | tuple | list - :param stride_y: The stride on the y axis. - :type stride_y: int - :param padding: The padding sizes. If the parameter is set to one integer, the padding - sizes on x and y axises will be same when padding_y is not set. If it - is set to a list, the first element indicates the padding size on the - x axis, and the second is used to specify the padding size on the y axis - when padding_y is not provided. 0 is the default padding size. - :type padding: int | tuple | list - :param padding_y: The padding size on the y axis. - :type padding_y: int - :param dilation: The dimensions of the dilation. If the parameter is set to one integer, - the two dimensions on x and y axises will be same when dilation_y is not - set. If it is set to a list, the first element indicates the dimension - on the x axis, and the second is used to specify the dimension on the y - axis when dilation_y is not provided. 1 is the default dimension. - :type dilation: int | tuple | list - :param dilation_y: The dimension of the dilation on the y axis. - :type dilation_y: int - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param num_channels: The number of input channels. If the parameter is not set or - set to None, its actual value will be automatically set to - the channel number of the input. - :type num_channels: int - :param param_attr: The parameter attribute. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param shared_biases: Whether biases will be shared between filters or not. - :type shared_biases: bool - :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :param trans: True if it is a convTransLayer, False if it is a convLayer - :type trans: bool - :param layer_type: Specify the layer type. If the dilation's dimension on one axis is - larger than 1, layer_type has to be "cudnn_conv" or "cudnn_convt". - If trans=True, layer_type has to be "exconvt" or "cudnn_convt", - otherwise layer_type has to be either "exconv" or "cudnn_conv". - :type layer_type: basestring - :return: LayerOutput object. - :rtype: LayerOutput - """ - if num_channels is None: - assert input.num_filters is not None - num_channels = input.num_filters - - if filter_size_y is None: - if isinstance(filter_size, collections.Sequence): - assert len(filter_size) == 2 - filter_size, filter_size_y = filter_size - else: - filter_size_y = filter_size - - if stride_y is None: - if isinstance(stride, collections.Sequence): - assert len(stride) == 2 - stride, stride_y = stride - else: - stride_y = stride - - if padding_y is None: - if isinstance(padding, collections.Sequence): - assert len(padding) == 2 - padding, padding_y = padding - else: - padding_y = padding - - if dilation_y is None: - if isinstance(dilation, collections.Sequence): - assert len(dilation) == 2 - dilation, dilation_y = dilation - else: - dilation_y = dilation - - if param_attr.attr.get('initial_smart'): - # special initial for conv layers. - init_w = (2.0 / (filter_size**2 * num_channels))**0.5 - param_attr.attr["initial_mean"] = 0.0 - param_attr.attr["initial_std"] = init_w - param_attr.attr["initial_strategy"] = 0 - param_attr.attr["initial_smart"] = False - - if layer_type: - if dilation > 1 or dilation_y > 1: - assert layer_type in [ - "cudnn_conv", "cudnn_convt", "exconv", "exconvt" - ] - if trans: - assert layer_type in ["exconvt", "cudnn_convt"] - else: - assert layer_type in ["exconv", "cudnn_conv"] - lt = layer_type - else: - lt = LayerType.CONVTRANS_LAYER if trans else LayerType.CONV_LAYER - - l = Layer( - name=name, - inputs=Input( - input.name, - conv=Conv( - filter_size=filter_size, - padding=padding, - dilation=dilation, - stride=stride, - channels=num_channels, - groups=groups, - filter_size_y=filter_size_y, - padding_y=padding_y, - dilation_y=dilation_y, - stride_y=stride_y), - **param_attr.attr), - active_type=act.name, - num_filters=num_filters, - bias=ParamAttr.to_bias(bias_attr), - shared_biases=shared_biases, - type=lt, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - lt, - parents=[input], - activation=act, - num_filters=num_filters, - size=l.config.size) - - -@wrap_name_default("pool") -@layer_support() -def img_pool_layer(input, - pool_size, - name=None, - num_channels=None, - pool_type=None, - stride=1, - padding=0, - layer_attr=None, - pool_size_y=None, - stride_y=None, - padding_y=None, - ceil_mode=True, - exclude_mode=None): - """ - Image pooling Layer. - - The details of pooling layer, please refer to ufldl's pooling_ . - - .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ - - - ceil_mode=True: - - .. math:: - - w & = 1 + ceil(\\frac{input\_width + 2 * padding - pool\_size}{stride}) - - h & = 1 + ceil(\\frac{input\_height + 2 * padding\_y - pool\_size\_y}{stride\_y}) - - - ceil_mode=False: - - .. math:: - - w & = 1 + floor(\\frac{input\_width + 2 * padding - pool\_size}{stride}) - - h & = 1 + floor(\\frac{input\_height + 2 * padding\_y - pool\_size\_y}{stride\_y}) - - The example usage is: - - .. code-block:: python - - maxpool = img_pool_layer(input=conv, - pool_size=3, - pool_size_y=5, - num_channels=8, - stride=1, - stride_y=2, - padding=1, - padding_y=2, - pool_type=MaxPooling()) - - :param padding: The padding size on the x axis. 0 is the default padding size. - :type padding: int - :param padding_y: The padding size on the y axis. If the parameter is not set - or set to None, it will be set to 'padding' automatically. - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param pool_size: The pooling window length on the x axis. - :type pool_size: int - :param pool_size_y: The pooling window length on the y axis. If the parameter is - not set or set to None, its actual value will be automatically - set to pool_size. - :type pool_size_y: int - :param num_channels: The number of input channels. If the parameter is not set or - set to None, its actual value will be automatically set to - the channels number of the input. - :type num_channels: int - :param pool_type: Pooling type. MaxPooling is the default pooling. - :type pool_type: BasePoolingType - :param stride: The stride on the x axis. 1 is the default value. - :type stride: int - :param stride_y: The stride on the y axis. If the parameter is not set or set to - None, its actual value will be automatically set to 'stride'. - :type stride_y: int - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :param ceil_mode: Whether to use the ceil function to calculate output height and width. - True is the default. If it is set to False, the floor function will - be used. - :type ceil_mode: bool - :param exclude_mode: Whether to exclude the padding cells when calculating, but only - work when pool_type is AvgPooling. If None, also exclude the padding - cells. If use cudnn, use CudnnAvgPooling or CudnnAvgInclPadPooling - as pool_type to identify the mode. - :type exclude_mode: bool - :return: LayerOutput object. - :rtype: LayerOutput - """ - if num_channels is None: - assert input.num_filters is not None - num_channels = input.num_filters - - if pool_type is None: - pool_type = MaxPooling() - elif isinstance(pool_type, AvgPooling): - pool_type.name = 'avg' - - assert type(pool_type) in [AvgPooling, MaxPooling, MaxWithMaskPooling, CudnnAvgPooling, - CudnnMaxPooling, CudnnAvgInclPadPooling], \ - "only (Cudnn)AvgPooling, (Cudnn)MaxPooling, MaxWithMaskPooling are supported" - - type_name = pool_type.name + '-projection' \ - if ( - isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ - else pool_type.name - pool_size_y = pool_size if pool_size_y is None else pool_size_y - stride_y = stride if stride_y is None else stride_y - padding_y = padding if padding_y is None else padding_y - - l = Layer( - name=name, - type=LayerType.POOL_LAYER, - inputs=[ - Input( - input.name, - pool=Pool( - pool_type=type_name, - channels=num_channels, - size_x=pool_size, - start=None, - stride=stride, - padding=padding, - size_y=pool_size_y, - stride_y=stride_y, - padding_y=padding_y)) - ], - ceil_mode=ceil_mode, - exclude_mode=exclude_mode, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - LayerType.POOL_LAYER, - parents=[input], - num_filters=num_channels, - size=l.config.size) - - -@wrap_name_default("pool3d") -@layer_support() -def img_pool3d_layer(input, - pool_size, - name=None, - num_channels=None, - pool_type=None, - stride=1, - padding=0, - layer_attr=None, - pool_size_y=None, - stride_y=None, - padding_y=None, - pool_size_z=None, - stride_z=None, - padding_z=None, - ceil_mode=True): - """ - Image pooling Layer. - - The details of pooling layer, please refer ufldl's pooling_ . - - .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ - - - ceil_mode=True: - - .. math:: - - w & = 1 + \\frac{ceil(input\_width + 2 * padding - pool\_size)}{stride} - - h & = 1 + \\frac{ceil(input\_height + 2 * padding\_y - pool\_size\_y)}{stride\_y} - - d & = 1 + \\frac{ceil(input\_depth + 2 * padding\_z - pool\_size\_z)}{stride\_z} - - - ceil_mode=False: - - .. math:: - - w & = 1 + \\frac{floor(input\_width + 2 * padding - pool\_size)}{stride} - - h & = 1 + \\frac{floor(input\_height + 2 * padding\_y - pool\_size\_y)}{stride\_y} - - d & = 1 + \\frac{floor(input\_depth + 2 * padding\_z - pool\_size\_z)}{stride\_z} - - The example usage is: - - .. code-block:: python - - maxpool = img_pool3d_layer(input=conv, - pool_size=3, - num_channels=8, - stride=1, - padding=1, - pool_type=MaxPooling()) - - :param padding: pooling padding width. - :type padding: int | tuple | list - :param name: The name of this layer. It is optional. - :type name: basestring. - :param input: The input of this layer. - :type input: LayerOutput - :param pool_size: The pooling window lengths along three axises. If the parameter - is set to one integer, the three lengths will be same. - :type pool_size: int | tuple | list - :param num_channels: The number of input channels. If the parameter is not set or - set to None, its actual value will be automatically set to - the channels number of the input. - :type num_channels: int - :param pool_type: Pooling type. MaxPooling is the default pooling. - :type pool_type: BasePoolingType - :param stride: The strides of the pooling along three axises. If the parameter - is set to one integer, the three strides will be same. 1 is the - default value. - :type stride: int | tuple | list - :param padding: The sizes of padding along three axises. If the parameter is set to - one integer, they will be same. 0 is the default padding size. - :type padding: int | tuple | list - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :param ceil_mode: Wether to use the ceil function to calculate output height and width. - True is the default. If it is set to False, the floor function will - be used. - :type ceil_mode: bool - :return: LayerOutput object. - :rtype: LayerOutput - """ - if num_channels is None: - assert input.num_filters is not None - num_channels = input.num_filters - - if pool_type is None: - pool_type = MaxPooling() - elif isinstance(pool_type, AvgPooling): - pool_type.name = 'avg' - - type_name = pool_type.name + '-projection' \ - if ( - isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ - else pool_type.name - - if isinstance(pool_size, collections.Sequence): - assert len(pool_size) == 3 - pool_size, pool_size_y, pool_size_z = pool_size - else: - pool_size_y = pool_size - pool_size_z = pool_size - - if isinstance(stride, collections.Sequence): - assert len(stride) == 3 - stride, stride_y, stride_z = stride - else: - stride_y = stride - stride_z = stride - - if isinstance(padding, collections.Sequence): - assert len(padding) == 3 - padding, padding_y, padding_y = padding - else: - padding_y = padding - padding_z = padding - - l = Layer( - name=name, - type=LayerType.POOL3D_LAYER, - inputs=[ - Input( - input.name, - pool=Pool3d( - pool_type=type_name, - channels=num_channels, - size_x=pool_size, - start=None, - stride=stride, - padding=padding, - size_y=pool_size_y, - stride_y=stride_y, - padding_y=padding_y, - size_z=pool_size_z, - stride_z=stride_z, - padding_z=padding_z)) - ], - ceil_mode=ceil_mode, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - LayerType.POOL_LAYER, - parents=[input], - num_filters=num_channels, - size=l.config.size) - - -@wrap_name_default("upsample") -@layer_support() -def upsample_layer(input, - name=None, - scale=None, - scale_y=None, - upsample_size=None, - upsample_size_y=None, - pad_out_x=False, - pad_out_y=False, - layer_attr=None): - """ - The DePooling process. - Inputs should be a list of length 2. The first input is a layer, - and the second input should be the MaxWithMaskPoolingLayer - - The example usage is: - - .. code-block:: python - pool1 = paddle.v2.layer.img_pool(input=input, pool_size=2, stride=2, - pool_type=paddle.pooling.MaxWithMask()) - upsample = paddle.v2.layer.upsample(input=[layer1, pool1]) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: contains an input layer and a MaxWithMaskPoolingLayer - :type input: list | tuple | collections.Sequence - :param scale: outputSize = scale * inputSize - :type scale: int | list | tuple | . - :param scale_y: scale_y will be equal to scale, if it's value is None, - :type scale: int | None. - :param upsample_size: specify the outputSize. - :type upsample_size: int | list | tuple. - :param upsample_size_y: specify the y dimension outputSize. - :type upsample_size_y: int. - :param pad_out_x: specify exact x dimension size. This parameter only works when scale is 2 - :type pad_out_x: bool. - :param pad_out_y: specify exact y dimension size. This parameter only works when scale is 2 - :type pad_out_y: bool. - :param layer_attr: Extra Layer Attribute. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert (scale is not None) or (upsample_size is not None), \ - 'scale or upsample_size, there must be one to be designated' - - assert len(input) == 2, 'layer input size must be 2' - - assert input[1].layer_type == LayerType.POOL_LAYER, \ - 'the second input should be the MaxPoolWithMaskLayer' - - scale_y = scale \ - if scale is not None else scale_y - upsample_size_y = upsample_size \ - if upsample_size is not None else upsample_size_y - - layer_type = LayerType.UPSAMPLE_LAYER - - layer = Layer( - name=name, - type=layer_type, - inputs=[ - Input( - input[0].name, - upsample=Upsample(scale, scale_y, pad_out_x, pad_out_y, - upsample_size, upsample_size_y)), - Input(input[1].name) - ], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - sz = layer.config.size - - return LayerOutput(name, layer_type=layer_type, parents=input, size=sz) - - -@wrap_name_default("spp") -@layer_support() -def spp_layer(input, - name=None, - num_channels=None, - pool_type=None, - pyramid_height=None, - layer_attr=None): - """ - A layer performs spatial pyramid pooling. - - Reference: - `Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition - `_ - - The example usage is: - - .. code-block:: python - - spp = spp_layer(input=data, - pyramid_height=2, - num_channels=16, - pool_type=MaxPooling()) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param num_channels: The number of input channels. If the parameter is not set or - set to None, its actual value will be automatically set to - the channels number of the input. - :type num_channels: int - :param pool_type: Pooling type. MaxPooling is the default pooling. - :type scale: BasePoolingType - :param pyramid_height: The pyramid height of this pooling. - :type pyramid_height: int - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - if num_channels is None: - assert input.num_filters is not None - num_channels = input.num_filters - - if pool_type is None: - pool_type = MaxPooling() - elif isinstance(pool_type, AvgPooling): - pool_type.name = 'avg' - - type_name = pool_type.name - if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)): - type_name += '-projection' - - l = Layer( - name=name, - type=LayerType.SPP_LAYER, - inputs=Input( - input.name, - spp=SpatialPyramidPool( - pool_type=type_name, - channels=num_channels, - pyramid_height=pyramid_height)), - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - layer_type=LayerType.SPP_LAYER, - parents=[input], - num_filters=num_channels, - size=l.config.size) - - -def __img_norm_layer__(name, input, size, norm_type, scale, power, num_channels, - blocked, layer_attr): - if num_channels is None: - assert input.num_filters is not None - num_channels = input.num_filters - - l = Layer( - name=name, - type=LayerType.NORM_LAYER, - inputs=Input( - input.name, - norm=Norm( - norm_type=norm_type, - channels=num_channels, - size=size, - scale=scale, - pow=power, - blocked=blocked)), - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - layer_type=LayerType.NORM_LAYER, - parents=[input], - num_filters=num_channels, - img_norm_type=norm_type, - size=l.config.size) - - -@wrap_name_default("crmnorm") -@layer_support() -def img_cmrnorm_layer(input, - size, - scale=0.0128, - power=0.75, - name=None, - num_channels=None, - layer_attr=None): - """ - Response normalization across feature maps. - - Reference: - `ImageNet Classification with Deep Convolutional Neural Networks - `_ - - The example usage is: - - .. code-block:: python - - norm = img_cmrnorm_layer(input=net, size=5) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param size: Normalize in number of :math:`size` feature maps. - :type size: int - :param scale: The hyper-parameter. - :type scale: float - :param power: The hyper-parameter. - :type power: float - :param num_channels: The number of input channels. If the parameter is not set or - set to None, its actual value will be automatically set to - the channels number of the input. - :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - return __img_norm_layer__(name, input, size, "cmrnorm-projection", scale, - power, num_channels, 0, layer_attr) - - -@wrap_bias_attr_default() -@wrap_param_attr_default( - default_factory=lambda _: ParamAttr(initial_mean=1.0, initial_std=0.)) -@wrap_act_default(act=ReluActivation()) -@wrap_name_default("batch_norm") -@layer_support(DROPOUT, ERROR_CLIPPING) -def batch_norm_layer(input, - act=None, - name=None, - img3D=False, - num_channels=None, - bias_attr=None, - param_attr=None, - layer_attr=None, - batch_norm_type=None, - epsilon=1e-5, - moving_average_fraction=0.9, - use_global_stats=None, - mean_var_names=None): - """ - Batch Normalization Layer. The notation of this layer is as follows. - - :math:`x` is the input features over a mini-batch. - - .. math:: - - \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\ - \ mini-batch\ mean \\\\ - \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\ - \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\ - \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\ - \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\ - y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift - - Reference: - `Batch Normalization: Accelerating Deep Network Training by Reducing - Internal Covariate Shift - `_ - - The example usage is: - - .. code-block:: python - - norm = batch_norm_layer(input=net, act=ReluActivation()) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: This layer's input which is to be performed batch normalization on. - :type input: LayerOutput - :param batch_norm_type: We have batch_norm, mkldnn_batch_norm and cudnn_batch_norm. - batch_norm supports CPU, MKLDNN and GPU. cudnn_batch_norm - requires cuDNN version greater or equal to v4 (>=v4). - But cudnn_batch_norm is faster and needs less - memory than batch_norm. mkldnn_batch_norm requires - use_mkldnn is enabled. By default (None), we will - automatically select cudnn_batch_norm for GPU, - mkldnn_batch_norm for MKLDNN and batch_norm for CPU. - Users can specify the batch norm type. If you use - cudnn_batch_norm, we suggested you use latest version, - such as v5.1. - :type batch_norm_type: None | string, None or "batch_norm" or "cudnn_batch_norm" - or "mkldnn_batch_norm" - :param act: Activation type. ReluActivation is the default activation. - :type act: BaseActivation - :param num_channels: The number of input channels. If the parameter is not set or - set to None, its actual value will be automatically set to - the channels number of the input. - :type num_channels: int - :param bias_attr: :math:`\\beta`. The bias attribute. If the parameter is set to - False or an object whose type is not ParameterAttribute, no - bias is defined. If the parameter is set to True, the bias is - initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: :math:`\\gamma`. The parameter attribute. See ParameterAttribute - for details. - :type param_attr: ParameterAttribute - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :param use_global_stats: Whether use moving mean/variance statistics during - testing peroid. If the parameter is set to None or - True, it will use moving mean/variance statistics - during testing. If the parameter is set to False, it - will use the mean and variance of the current batch - of test data. - :type use_global_stats: bool | None. - :param epsilon: The small constant added to the variance to improve numeric stability. - :type epsilon: float. - :param moving_average_fraction: Factor used in the moving average computation. - :math:`runningMean = newMean*(1-factor) + runningMean*factor` - :type moving_average_fraction: float. - :param mean_var_names: [mean name, variance name] - :type mean_var_names: string list - :return: LayerOutput object. - :rtype: LayerOutput - """ - - if num_channels is None: - if input.num_filters is not None: - num_channels = input.num_filters - else: - num_channels = input.size - assert (batch_norm_type is None) or (batch_norm_type == "batch_norm") or \ - (batch_norm_type == "mkldnn_batch_norm") or \ - (batch_norm_type == "cudnn_batch_norm") - - l = Layer( - name=name, - img3D=img3D, - inputs=Input( - input.name, image=Image(channels=num_channels), **param_attr.attr), - active_type=act.name, - type=LayerType.BATCH_NORM_LAYER, - batch_norm_type=batch_norm_type, - bias=ParamAttr.to_bias(bias_attr), - epsilon=epsilon, - moving_average_fraction=moving_average_fraction, - use_global_stats=use_global_stats, - mean_var_names=mean_var_names, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput( - name=name, - layer_type=LayerType.BATCH_NORM_LAYER, - parents=[input], - activation=act, - num_filters=num_channels, - size=l.config.size) - - -@wrap_name_default() -@layer_support() -def sum_to_one_norm_layer(input, name=None, layer_attr=None): - """ - A layer for sum-to-one normalization, - which is used in NEURAL TURING MACHINE. - - .. math:: - out[i] = \\frac {in[i]} {\sum_{k=1}^N in[k]} - - where :math:`in` is a (batchSize x dataDim) input vector, - and :math:`out` is a (batchSize x dataDim) output vector. - - The example usage is: - - .. code-block:: python - - sum_to_one_norm = sum_to_one_norm_layer(input=layer) - - :param input: The input of this layer. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute - for details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - Layer( - name=name, - type=LayerType.SUM_TO_ONE_NORM_LAYER, - inputs=[input.name], - **ExtraAttr.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.SUM_TO_ONE_NORM_LAYER, parents=[input], size=input.size) - - -@wrap_name_default() -@layer_support() -def row_l2_norm_layer(input, name=None, layer_attr=None): - """ - A layer for L2-normalization in each row. - - .. math:: - out[i] = \\frac{in[i]} {\\sqrt{\\sum_{k=1}^N in[k]^{2}}} - - where the size of :math:`in` is (batchSize x dataDim) , - and the size of :math:`out` is a (batchSize x dataDim) . - - The example usage is: - - .. code-block:: python - - row_l2_norm_layer = row_l2_norm_layer(input=layer) - - :param input: The input of this layer. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute - for details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - Layer( - name=name, - type=LayerType.ROW_L2_NORM_LAYER, - inputs=[input.name], - **ExtraAttr.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.ROW_L2_NORM_LAYER, parents=[input], size=input.size) - - -@wrap_name_default("addto") -@wrap_act_default(act=LinearActivation()) -@wrap_bias_attr_default(has_bias=False) -@layer_support(DROPOUT, ERROR_CLIPPING) -def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): - """ - AddtoLayer. - - .. math:: - - y = f(\\sum_{i} x_i + b) - - where :math:`y` is output, :math:`x` is input, :math:`b` is bias, - and :math:`f` is activation function. - - The example usage is: - - .. code-block:: python - - addto = addto_layer(input=[layer1, layer2], - act=ReluActivation(), - bias_attr=False) - - This layer just simply adds all input layers together, then activates the - sum. All inputs should share the same dimension, which is also the dimension - of this layer's output. - - There is no weight matrix for each input, because it just a simple add - operation. If you want a complicated operation before add, please use - mixed_layer. - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input layers. It could be a LayerOutput or list/tuple of - LayerOutput. - :type input: LayerOutput | list | tuple - :param act: Activation Type. LinearActivation is the default activation. - :type act: BaseActivation - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - num_filters = None - if isinstance(input, LayerOutput): - input = [input] - - assert isinstance(input, collections.Sequence) - ipts_for_layer = [] - for each_input in input: - assert isinstance(each_input, LayerOutput) - ipts_for_layer.append(Input(each_input.name)) - if each_input.num_filters is not None: - num_filters = each_input.num_filters - - l = Layer( - name=name, - type=LayerType.ADDTO_LAYER, - inputs=ipts_for_layer, - bias=ParamAttr.to_bias(bias_attr), - active_type=act.name, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput( - name, - LayerType.ADDTO_LAYER, - parents=input, - activation=act, - num_filters=num_filters, - size=l.config.size) - - -@wrap_act_default(act=IdentityActivation()) -@wrap_name_default("concat") -@layer_support(DROPOUT, ERROR_CLIPPING) -def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): - """ - Concatenate all input vectors to one vector. - Inputs can be a list of LayerOutput or a list of projection. - - The example usage is: - - .. code-block:: python - - concat = concat_layer(input=[layer1, layer2]) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input layers or projections - :type input: list | tuple | collections.Sequence - :param act: Activation type. IdentityActivation is the default activation. - :type act: BaseActivation - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - - if isinstance(input, LayerOutput): - input = [input] - elif isinstance(input, Projection): - input = [input] - else: - assert isinstance(input, collections.Sequence) - - def __is_type__(o, tp): - if not isinstance(o, collections.Sequence): - if o == tp: - return True - elif len(o.__bases__) == 0: - return False - else: - for bs in o.__bases__: - if __is_type__(bs, tp): - return True - return False - else: - tmp = map(lambda _x: __is_type__(_x, tp), o) - a = tmp[0] - for b in tmp[1:]: - assert a == b - return a - - def __reduce_concat_type__(a, b): - assert __is_type__([a, b], Projection) or __is_type__([a, b], - LayerOutput) - return a - - is_concat_layer = __is_type__( - reduce(__reduce_concat_type__, map(type, input)), LayerOutput) - - layer_type = (LayerType.CONCAT_LAYER - if is_concat_layer else LayerType.CONCAT_PROJ_LAYER) - - if layer_type == LayerType.CONCAT_LAYER: - assert not bias_attr - - layer = Layer( - name=name, - type=layer_type, - inputs=[x.name for x in input] if is_concat_layer else input, - active_type=act.name, - bias=ParamAttr.to_bias(bias_attr), - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - sz = layer.config.size - - return LayerOutput( - name, - layer_type=layer_type, - parents=input if is_concat_layer else [x.origin for x in input], - activation=act, - size=sz) - - -@wrap_name_default("seqconcat") -@wrap_act_default(act=IdentityActivation()) -@wrap_bias_attr_default(has_bias=False) -@layer_support(DROPOUT, ERROR_CLIPPING) -def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, - bias_attr=None): - """ - Concatenate sequence a and sequence b. - - Inputs: - - a = [a1, a2, ..., am] - - b = [b1, b2, ..., bn] - - Output: [a1, ..., am, b1, ..., bn] - - Note that the above computation is for one sample. Multiple samples are - processed in one batch. - - The example usage is: - - .. code-block:: python - - concat = seq_concat_layer(a=layer1, b=layer2) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param a: The first input sequence layer - :type a: LayerOutput - :param b: The second input sequence layer - :type b: LayerOutput - :param act: Activation type. IdentityActivation is the default activation. - :type act: BaseActivation - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(a, LayerOutput) and isinstance(b, LayerOutput) - assert a.size == b.size - Layer( - name=name, - type=LayerType.SEQUENCE_CONCAT_LAYER, - inputs=[a.name, b.name], - active_type=act.name, - bias=ParamAttr.to_bias(bias_attr), - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput( - name, - layer_type=LayerType.SEQUENCE_CONCAT_LAYER, - parents=[a, b], - activation=act, - size=a.size) - - -@wrap_name_default("memory", "memory_name") -def memory(name, - size, - memory_name=None, - is_seq=False, - boot_layer=None, - boot_bias=None, - boot_bias_active_type=None, - boot_with_const_id=None): - """ - The memory takes a layer's output at previous time step as its own output. - - If boot_bias, the activation of the bias is the initial value of the memory. - - If boot_with_const_id is set, then the memory's output at the first time step - is a IndexSlot, the Arguments.ids()[0] is this :code:`cost_id`. - - If boot_layer is specified, the memory's output at the first time step will - be the boot_layer's output. - - In other case, the default memory's output at the first time step is zero. - - .. code-block:: python - - mem = memory(size=256, name='state') - state = fc_layer(input=mem, size=256, name='state') - - If you do not want to specify the name, you can also use set_input() - to specify the layer to be remembered as the following: - - .. code-block:: python - - mem = memory(size=256) - state = fc_layer(input=mem, size=256) - mem.set_input(mem) - - :param name: The name of the layer which this memory remembers. - If name is None, user should call set_input() to specify the - name of the layer which this memory remembers. - :type name: basestring - :param size: The dimensionality of memory. - :type size: int - :param memory_name: The name of the memory. It is ignored when name is provided. - :type memory_name: basestring - :param is_seq: DEPRECATED. is sequence for boot_layer - :type is_seq: bool - :param boot_layer: This parameter specifies memory's output at the first time - step and the output is boot_layer's output. - :type boot_layer: LayerOutput | None - :param boot_bias: The bias attribute of memory's output at the first time step. - If the parameter is set to False or an object whose type is not - ParameterAttribute, no bias is defined. If the parameter is set - to True, the bias is initialized to zero. - :type boot_bias: ParameterAttribute | None - :param boot_bias_active_type: Activation type for memory's bias at the first time - step. LinearActivation is the default activation. - :type boot_bias_active_type: BaseActivation - :param boot_with_const_id: This parameter specifies memory's output at the first - time step and the output is an index. - :type boot_with_const_id: int - :return: LayerOutput object. - :rtype: LayerOutput - """ - if boot_bias_active_type is None: - boot_bias_active_type = LinearActivation() - - assert boot_bias is None or isinstance(boot_bias, ParameterAttribute) - if isinstance(boot_bias, ParameterAttribute): - boot_bias = ParamAttr.to_bias(boot_bias) - - assert boot_layer is None or isinstance(boot_layer, LayerOutput) - if name is not None: - memory_name = None - - memory_name = Memory( - name, - size, - boot_layer=boot_layer.name if boot_layer is not None else None, - boot_bias=boot_bias, - boot_bias_active_type=boot_bias_active_type.name, - boot_with_const_id=boot_with_const_id, - memory_name=memory_name) - - lout = LayerOutput( - name=memory_name, - size=size, - layer_type=LayerType.MEMORY, - parents=[boot_layer] if boot_layer is not None else None) - return lout - - -@wrap_bias_attr_default() -@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation()) -@wrap_act_default(param_names=['state_act'], act=TanhActivation()) -@wrap_act_default(act=TanhActivation()) -@wrap_name_default('lstm_step') -@layer_support() -def lstm_step_layer(input, - state, - size=None, - act=None, - name=None, - gate_act=None, - state_act=None, - bias_attr=None, - layer_attr=None): - """ - LSTM Step Layer. This function is used only in recurrent_group. - The lstm equations are shown as follows. - - .. math:: - - i_t & = \\sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i) - - f_t & = \\sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f) - - c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c) - - o_t & = \\sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o) - - h_t & = o_t tanh(c_t) - - - The input of lstm step is :math:`Wx_t + Wh_{t-1}`, and user should use - :code:`mixed_layer` and :code:`full_matrix_projection` to calculate these - input vectors. - - The state of lstm step is :math:`c_{t-1}`. And lstm step layer will do - - .. math:: - - i_t = \\sigma(input + W_{ci}c_{t-1} + b_i) - - ... - - - This layer has two outputs. The default output is :math:`h_t`. The other - output is :math:`o_t`, whose name is 'state' and users can use - :code:`get_output_layer` to extract this output. - - :param name: The name of this layer. It is optional. - :type name: basestring - :param size: The dimension of this layer's output, which must be - equal to the dimension of the state. - :type size: int - :param input: The input of this layer. - :type input: LayerOutput - :param state: The state of the LSTM unit. - :type state: LayerOutput - :param act: Activation type. TanhActivation is the default activation. - :type act: BaseActivation - :param gate_act: Activation type of the gate. SigmoidActivation is the - default activation. - :type gate_act: BaseActivation - :param state_act: Activation type of the state. TanhActivation is the - default activation. - :type state_act: BaseActivation - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert size is None or state.size == size - size = state.size - Layer( - name=name, - type=LayerType.LSTM_STEP_LAYER, - active_type=act.name, - active_gate_type=gate_act.name, - active_state_type=state_act.name, - bias=ParamAttr.to_bias(bias_attr), - size=state.size, - inputs=[input.name, state.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput( - name=name, - layer_type=LayerType.LSTM_STEP_LAYER, - parents=[input, state], - activation=act, - size=size, - outputs=['default', 'state']) - - -@wrap_bias_attr_default() -@wrap_param_attr_default() -@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation()) -@wrap_act_default(act=TanhActivation()) -@wrap_name_default('gru_step') -@layer_support() -def gru_step_layer(input, - output_mem, - size=None, - act=None, - name=None, - gate_act=None, - bias_attr=None, - param_attr=None, - layer_attr=None): - """ - - :param input: The input of this layer, whose dimension can be divided by 3. - :type input: LayerOutput - :param output_mem: A memory which memorizes the output of this layer at previous - time step. - :type output_mem: LayerOutput - :param size: The dimension of this layer's output. If it is not set or set to None, - it will be set to one-third of the dimension of the input automatically. - :type size: int - :param act: Activation type of this layer's output. TanhActivation - is the default activation. - :type act: BaseActivation - :param name: The name of this layer. It is optional. - :type name: basestring - :param gate_act: Activation type of this layer's two gates. SigmoidActivation is - the default activation. - :type gate_act: BaseActivation - :param bias_attr: The parameter attribute for bias. If this parameter is set to - False or an object whose type is not ParameterAttribute, no bias - is defined. If this parameter is set to True, - the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert input.size % 3 == 0 - if size is None: - size = input.size / 3 - Layer( - name=name, - type=LayerType.GRU_STEP_LAYER, - # The parameter here is for transforming the output_mem. The input has - # already been transformed outside this module so it does not need - # parameter associated with it. - # The parameter here is instead grouped with input is due to - # backward model compatibility. - inputs=[Input(input.name, **param_attr.attr), output_mem.name], - bias=ParamAttr.to_bias(bias_attr), - size=size, - active_type=act.name, - active_gate_type=gate_act.name, - **ExtraAttr.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - layer_type=LayerType.GRU_STEP_LAYER, - parents=[input, output_mem], - size=size, - activation=act) - - -@wrap_bias_attr_default() -@wrap_param_attr_default() -@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation()) -@wrap_act_default(act=TanhActivation()) -@wrap_name_default('gru_step_naive') -@layer_support(ERROR_CLIPPING, DROPOUT) -def gru_step_naive_layer(input, - output_mem, - size=None, - name=None, - act=None, - gate_act=None, - bias_attr=None, - param_attr=None, - layer_attr=None): - """ - GRU Step Layer, which is realized using PaddlePaddle API. It supports ERROR_CLIPPING - and DROPOUT. - - :param input: The input of this layer, whose dimensionality can be divided by 3. - :param output_mem: A memory which memorizes the output of this layer at previous - time step. - :type output_mem: LayerOutput - :param size: The dimension of this layer's output. If it is not set or set to None, - it will be set to one-third of the dimension of the input automatically. - :type size: int - :param name: The name of this layer. It is optional. - :type name: basestring - :param act: Activation type of this layer's output. TanhActivation - is the default activation. - :type act: BaseActivation - :param gate_act: Activation type of this layer's two gates. SigmoidActivation - is the default activation. - :type gate_act: BaseActivation - :param bias_attr: The parameter attribute for bias. If this parameter is set to - False or an object whose type is not ParameterAttribute, no bias - is defined. If this parameter is set to True, - the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - if input.size % 3 != 0: - raise ValueError("GruStep input size must be divided by 3") - if size is None: - size = input.size / 3 - - if bias_attr and bias_attr.attr.get("parameter_name", None) is not None: - raise ValueError("You should not specify the field `name` in bias_attr." - " Otherwise, the three biases, which correponding to " - " the two gates and the mixed layer for computing Wx+b" - ", will share the same parameter matrix unexpectedly.") - - def __gate__(gate_name, offset): - with mixed_layer( - name=name + "_" + gate_name, - size=size, - layer_attr=layer_attr, - bias_attr=bias_attr, - act=gate_act) as gate: - gate += identity_projection(input=input, offset=offset) - gate += full_matrix_projection( - input=output_mem, param_attr=param_attr) - return gate - - update_gate = __gate__("update", 0) - reset_gate = __gate__("reset", size) - - with mixed_layer( - name=name + "_reset_output", bias_attr=False) as reset_output: - reset_output += dotmul_operator(a=output_mem, b=reset_gate) - - with mixed_layer( - name=name + "_output_candidate", - size=size, - layer_attr=layer_attr, - bias_attr=bias_attr, - act=act) as output_candidate: - output_candidate += identity_projection(input=input, offset=2 * size) - output_candidate += full_matrix_projection( - input=reset_output, param_attr=param_attr) - - with mixed_layer(name=name) as output: - output += identity_projection(output_mem) - output += dotmul_operator(a=output_mem, b=update_gate, scale=-1.0) - output += dotmul_operator(a=output_candidate, b=update_gate) - - return output - - -@wrap_name_default() -@layer_support() -def get_output_layer(input, arg_name, name=None, layer_attr=None): - """ - Get layer's output by name. In PaddlePaddle, a layer might return multiple - values, but returns one layer's output. If the user wants to use another - output besides the default one, please use get_output_layer first to get - the output from input. - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input layer. And this layer should contain - multiple outputs. - :type input: LayerOutput - :param arg_name: The name of the output to be extracted from the input layer. - :type arg_name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :return: LayerOutput object. - :rtype: LayerOutput - """ - # GetOutputLayer - assert arg_name in input.outputs, 'Get Output From an not existed input.' \ - ' The get output name is %s, which not' \ - ' in %s' % ( - arg_name, ",".join(input.outputs)) - Layer( - name=name, - type=LayerType.GET_OUTPUT_LAYER, - inputs=[Input( - input.name, input_layer_argument=arg_name)], - size=input.size, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput( - name=name, - layer_type=LayerType.GET_OUTPUT_LAYER, - parents=[input], - size=input.size) - - -@wrap_name_default() -@wrap_act_default() -@wrap_bias_attr_default() -@wrap_param_attr_default() -@layer_support() -def recurrent_layer(input, - act=None, - bias_attr=None, - param_attr=None, - name=None, - reverse=False, - layer_attr=None): - """ - Simple recurrent unit layer. It is just a fully connect layer through both - time and neural network. - - For each sequence [start, end] it performs the following computation\: - - .. math:: - - out_{i} = act(in_{i}) \\ \\ \\text{for} \\ i = start \\\\ - out_{i} = act(in_{i} + out_{i-1} * W) \\ \\ \\text{for} \\ start < i <= end - - If reversed is true, the order is reversed\: - - .. math:: - - out_{i} = act(in_{i}) \\ \\ \\text{for} \\ i = end \\\\ - out_{i} = act(in_{i} + out_{i+1} * W) \\ \\ \\text{for} \\ start <= i < end - - - :param input: The input of this layer. - :type input: LayerOutput - :param act: Activation type. TanhActivation is the default activation. - :type act: BaseActivation - :param bias_attr: The parameter attribute for bias. If this parameter is set to - False or an object whose type is not ParameterAttribute, - no bias is defined. If the parameter is set to True, - the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: The parameter attribute. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - Layer( - name=name, - type=LayerType.RECURRENT_LAYER, - inputs=Input(input.name, **param_attr.attr), - active_type=act.name, - bias=ParamAttr.to_bias(bias_attr), - reversed=reverse, - **ExtraAttr.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - layer_type=LayerType.RECURRENT_LAYER, - parents=[input], - size=input.size, - activation=act, - reverse=reverse) - - -class StaticInput(object): - """ - StaticInput is only used in recurrent_group which defines a read-only memory - and can be a sequence or non-sequence. - :param size: DEPRECATED - :param is_seq: DEPRECATED - """ - - def __init__(self, input, is_seq=False, size=None): - assert isinstance(input, LayerOutput) - self.input = input - assert input.size is not None - if size is not None: - assert input.size == size - - -def SubsequenceInput(input): - """ - DEPRECATED. - Input sequence has sub-sequence, used in recurrent_group. - - The example usage is: - - .. code-block:: python - - input = SubsequenceInput(layer) - """ - return input - - -@wrap_name_default("recurrent_group") -def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): - """ - Recurrent layer group is an extremely flexible recurrent unit in - PaddlePaddle. As long as the user defines the calculation done within a - time step, PaddlePaddle will iterate such a recurrent calculation over - sequence input. This is useful for attention-based models, or Neural - Turning Machine like models. - - The basic usage (time steps) is: - - .. code-block:: python - - def step(input): - output = fc_layer(input=layer, - size=1024, - act=LinearActivation(), - bias_attr=False) - return output - - group = recurrent_group(input=layer, - step=step) - - You can see following configs for further usages: - - - time steps: lstmemory_group, paddle/legacy/gserver/tests/sequence_layer_group.conf, \ - demo/seqToseq/seqToseq_net.py - - sequence steps: paddle/legacy/gserver/tests/sequence_nest_layer_group.conf - - :param step: A step function which takes the input of recurrent_group as its own - input and returns values as recurrent_group's output every time step. - - The recurrent group scatters a sequence into time steps. And - for each time step, it will invoke step function, and return - a time step result. Then gather outputs of each time step into - layer group's output. - - :type step: callable - - :param name: The recurrent_group's name. It is optional. - :type name: basestring - - :param input: Input links array. - - LayerOutput will be scattered into time steps. - SubsequenceInput will be scattered into sequence steps. - StaticInput will be imported to each time step, and doesn't change - over time. It's a mechanism to access layer outside step function. - - :type input: LayerOutput | StaticInput | SubsequenceInput | list | tuple - - :param reverse: If reverse is set to True, the recurrent unit will process the - input sequence in a reverse order. - :type reverse: bool - - :param targetInlink: DEPRECATED. - The input layer which share info with layer group's output - - Param input specifies multiple input layers. For - SubsequenceInput inputs, config should assign one input - layer that share info(the number of sentences and the number - of words in each sentence) with all layer group's outputs. - targetInlink should be one of the layer group's input. - - :type targetInlink: LayerOutput | SubsequenceInput - - :return: LayerOutput object. - :rtype: LayerOutput - """ - model_type('recurrent_nn') - - if isinstance(input, LayerOutput) or isinstance(input, StaticInput): - input = [input] - assert isinstance(input, collections.Sequence) - - def is_in_links(x): - return isinstance(x, LayerOutput) - - in_links = filter(is_in_links, input) - - RecurrentLayerGroupWithoutOutLinksBegin( - name=name, - in_links=map(lambda x: x.name, in_links), - seq_reversed=reverse) - in_args = [] - for each_input in input: - if isinstance(each_input, StaticInput): # StaticInput - mem_name = "__%s_memory__" % each_input.input.name - mem = memory( - name=None, - size=each_input.input.size, - boot_layer=each_input.input) - mem.set_input(mem) - in_args.append(mem) - else: - in_args.append(each_input) - - layer_outs = step(*in_args) - - if isinstance(layer_outs, LayerOutput): - layer_outs = [layer_outs] - - for layer_out in layer_outs: - assert isinstance( - layer_out, LayerOutput - ), "Type of step function's return value must be LayerOutput." - layer_out.reverse = reverse - RecurrentLayerGroupSetOutLink(layer_out.name) - - RecurrentLayerGroupEnd(name=name) - - for layer_out in layer_outs: - # The previous full_name is the name inside the recurrent group. - # We need a full_name outside the recurrent group. - layer_out.full_name = MakeLayerNameInSubmodel(layer_out.name) - - if len(layer_outs) == 1: - return layer_outs[0] - else: - return layer_outs - - -class BaseGeneratedInput(object): - def __init__(self): - self.bos_id = None - self.eos_id = None - - def before_real_step(self): - raise NotImplementedError() - - def after_real_step(self, *args): - raise NotImplementedError() - - -class GeneratedInput(BaseGeneratedInput): - def after_real_step(self, input): - if isinstance(input, LayerOutput): - input = [input] - elif isinstance(input, collections.Sequence): - input = list(input) - if len(input) > 1: - logger.info( - ("More than one layers inside the recurrent_group " - "are returned as outputs of the entire recurrent_group " - "PLEASE garantee the first output is probability of " - "the predicted next word.")) - - return [maxid_layer( - input=input[0], name='__beam_search_predict__')] + ( - input[1:] if len(input) > 1 else []) - - def before_real_step(self): - predict_id = memory( - name='__beam_search_predict__', - size=self.size, - boot_with_const_id=self.bos_id) - - trg_emb = embedding_layer( - input=predict_id, - size=self.embedding_size, - param_attr=ParamAttr(name=self.embedding_name)) - return trg_emb - - def __init__(self, size, embedding_name, embedding_size): - super(GeneratedInput, self).__init__() - self.size = size - self.embedding_name = embedding_name - self.embedding_size = embedding_size - - -@wrap_name_default() -def maxid_layer(input, name=None, layer_attr=None): - """ - A layer for finding the id which has the maximal value for each sample. - The result is stored in output.ids. - - The example usage is: - - .. code-block:: python - - maxid = maxid_layer(input=layer) - - :param input: The input of this layer. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert isinstance(input, LayerOutput) - l = Layer( - name=name, - type='maxid', - inputs=[input.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - layer_type=LayerType.MAXID_LAYER, - parents=[input], - size=l.config.size) - - -@wrap_name_default() -def dot_prod_layer(input1, input2, name=None, layer_attr=None): - """ - A layer for computing the dot product of two vectors. - - The example usage is: - - .. code-block:: python - - dot_prod = dot_prod_layer(input1=vec1, input2=vec2) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input1: The first input layer. - :type input1: LayerOutput - :param input2: The second input layer. - :type input2: LayerOutput - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input1, LayerOutput) - assert isinstance(input2, LayerOutput) - assert input1.size == input2.size, ("Two inputs should have the same size.") - - l = Layer( - name=name, - type=LayerType.DOT_PROD_LAYER, - inputs=[input1.name, input2.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - layer_type=LayerType.DOT_PROD_LAYER, - parents=[input1, input2], - size=l.config.size) - - -@wrap_name_default() -def out_prod_layer(input1, input2, name=None, layer_attr=None): - """ - A layer for computing the outer product of two vectors - The result is a matrix of size(input1) x size(input2) - - The example usage is: - - .. code-block:: python - - out_prod = out_prod_layer(input1=vec1, input2=vec2) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input1: The first input layer. - :type input: LayerOutput - :param input2: The second input layer. - :type input2: LayerOutput - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert isinstance(input1, LayerOutput) - assert isinstance(input2, LayerOutput) - l = Layer( - name=name, - type=LayerType.OUT_PROD_LAYER, - inputs=[input1.name, input2.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - layer_type=LayerType.OUT_PROD_LAYER, - parents=[input1, input2], - size=l.config.size) - - -@wrap_name_default() -def eos_layer(input, eos_id, name=None, layer_attr=None): - """ - A layer for checking EOS for each sample: - - output_id = (input_id == conf.eos_id) - - The result is stored in output\_.ids. - It is used by recurrent layer group. - - The example usage is: - - .. code-block:: python - - eos = eos_layer(input=layer, eos_id=id) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param eos_id: End id of sequence - :type eos_id: int - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - l = Layer( - name=name, - type=LayerType.EOSID_LAYER, - eos_id=eos_id, - inputs=[input.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - layer_type=LayerType.EOSID_LAYER, - parents=[input], - size=l.config.size) - - -@wrap_name_default() -def beam_search(step, - input, - bos_id, - eos_id, - beam_size, - max_length=500, - name=None, - num_results_per_sample=None): - """ - Beam search is a heuristic search algorithm used in sequence generation. - It explores a graph by expanding the most promising nodes in a limited set - to maintain tractability. - - The example usage is: - - .. code-block:: python - - def rnn_step(input): - last_time_step_output = memory(name='rnn', size=512) - with mixed_layer(size=512, name='rnn') as simple_rnn: - simple_rnn += full_matrix_projection(input) - simple_rnn += last_time_step_output - return simple_rnn - - generated_word_embedding = GeneratedInput( - size=target_dictionary_dim, - embedding_name="target_language_embedding", - embedding_size=word_vector_dim) - - beam_gen = beam_search(name="decoder", - step=rnn_step, - input=[StaticInput(encoder_last), - generated_word_embedding], - bos_id=0, - eos_id=1, - beam_size=5) - - Please see the following demo for more details: - - - machine translation : demo/seqToseq/translation/gen.conf \ - demo/seqToseq/seqToseq_net.py - - :param name: The name of the recurrent unit that is responsible for - generating sequences. It is optional. - :type name: basestring - :param step: A callable function that defines the calculation in a time - step, and it is applied to sequences with arbitrary length by - sharing a same set of weights. - - You can refer to the first parameter of recurrent_group, or - demo/seqToseq/seqToseq_net.py for more details. - :type step: callable - :param input: Input data for the recurrent unit, which should include the - previously generated words as a GeneratedInput object. - In beam_search, none of the input's type should be LayerOutput. - :type input: list - :param bos_id: Index of the start symbol in the dictionary. The start symbol - is a special token for NLP task, which indicates the - beginning of a sequence. In the generation task, the start - symbol is essential, since it is used to initialize the RNN - internal state. - :type bos_id: int - :param eos_id: Index of the end symbol in the dictionary. The end symbol is - a special token for NLP task, which indicates the end of a - sequence. The generation process will stop once the end - symbol is generated, or a pre-defined max iteration number - is exceeded. - :type eos_id: int - :param max_length: Max generated sequence length. - :type max_length: int - :param beam_size: Beam search for sequence generation is an iterative search - algorithm. To maintain tractability, every iteration only - only stores a predetermined number, called the beam_size, - of the most promising next words. The greater the beam - size, the fewer candidate words are pruned. - :type beam_size: int - :param num_results_per_sample: Number of the generated results per input - sequence. This number must always be less than - beam size. - :type num_results_per_sample: int - :return: The generated word index. - :rtype: LayerOutput - """ - - if num_results_per_sample is None: - num_results_per_sample = beam_size - if num_results_per_sample > beam_size: - logger.warning("num_results_per_sample should be less than beam_size") - - if isinstance(input, StaticInput) or isinstance(input, BaseGeneratedInput): - input = [input] - - generated_input_index = -1 - - real_input = [] - for i, each_input in enumerate(input): - assert not isinstance(each_input, LayerOutput), ( - "in beam_search, " - "none of the input should has a type of LayerOutput.") - if isinstance(each_input, BaseGeneratedInput): - assert generated_input_index == -1, ("recurrent_group accepts " - "only one GeneratedInput.") - generated_input_index = i - - else: - real_input.append(each_input) - - assert generated_input_index != -1, "No GeneratedInput is given." - - gipt = input[generated_input_index] - - gipt.bos_id = bos_id - gipt.eos_id = eos_id - - def __real_step__(*args): - eos_name = "__%s_eos_layer__" % name - RecurrentLayerGroupSetGenerator( - Generator( - eos_layer_name=eos_name, - max_num_frames=max_length, - beam_size=beam_size, - num_results_per_sample=num_results_per_sample)) - - args = list(args) - args.insert(generated_input_index, gipt.before_real_step()) - - predict = gipt.after_real_step(step(*args)) - - eos_layer(input=predict[0], eos_id=eos_id, name=eos_name) - return predict - - return recurrent_group( - step=__real_step__, input=real_input, reverse=False, name=name) - - -def __cost_input__(input, label, weight=None): - """ - inputs and parents for cost layers. - """ - if isinstance(input, LayerOutput): - input = [input] - if isinstance(label, LayerOutput): - label = [label] - ipts = [Input(ipt.name) for ipt in (input + label)] - parents = [ipt for ipt in (input + label)] - if weight is not None: - assert weight.size == 1 - ipts.append(Input(weight.name)) - parents.append(weight) - return ipts, parents - - -@wrap_name_default() -@layer_support() -def square_error_cost(input, - label, - weight=None, - name=None, - coeff=1.0, - layer_attr=None): - """ - sum of square error cost: - - .. math:: - - cost = \\sum_{i=1}^N(t_i-y_i)^2 - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The first input layer. - :type input: LayerOutput - :param label: The input label. - :type label: LayerOutput - :param weight: The weight layer defines a weight for each sample in the - mini-batch. It is optional. - :type weight: LayerOutput - :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default value. - :type coeff: float - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - ipts, parents = __cost_input__(input, label, weight) - - Layer( - inputs=ipts, - type="square_error", - name=name, - coeff=coeff, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.COST, parents=parents, size=1) - - -regression_cost = square_error_cost - - -@wrap_name_default("cost") -@layer_support() -def classification_cost(input, - label, - weight=None, - name=None, - evaluator=classification_error_evaluator, - layer_attr=None, - coeff=1.): - """ - classification cost Layer. - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The first input layer. - :type input: LayerOutput - :param label: The input label. - :type label: LayerOutput - :param weight: The weight layer defines a weight for each sample in the - mini-batch. It is optional. - :type weight: LayerOutput - :param evaluator: Evaluator method. classification_error_evaluator is the default. - :type evaluator: Evaluator method - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default value. - :type coeff: float - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert input.layer_type != LayerType.DATA - assert isinstance(input.activation, SoftmaxActivation) - assert label.layer_type == LayerType.DATA - - ipts, parents = __cost_input__(input, label, weight) - - Layer( - name=name, - type="multi-class-cross-entropy", - inputs=ipts, - coeff=coeff, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - def __add_evaluator__(e): - assert callable(e) - assert hasattr(e, 'is_evaluator') - assert isinstance(e.is_evaluator, bool) - assert e.is_evaluator - assert hasattr(e, "for_classification") - assert isinstance(e.for_classification, bool) - assert e.for_classification - - e(name=e.__name__, input=input, label=label, weight=weight) - - if not isinstance(evaluator, collections.Sequence): - evaluator = [evaluator] - - for each_evaluator in evaluator: - __add_evaluator__(each_evaluator) - - return LayerOutput(name, LayerType.COST, parents=parents, size=1) - - -def conv_operator(img, - filter, - filter_size, - num_filters, - num_channels=None, - stride=1, - padding=0, - filter_size_y=None, - stride_y=None, - padding_y=None, - trans=False): - """ - Different from img_conv_layer, conv_op is an Operator, which can be used - in mixed_layer. And conv_op takes two inputs to perform convolution. - The first input is the image and the second is filter kernel. It only - supports GPU mode. - - The example usage is: - - .. code-block:: python - - op = conv_operator(img=input1, - filter=input2, - filter_size=3, - num_filters=64, - num_channels=64) - - :param img: The input image. - :type img: LayerOutput - :param filter: The input filter. - :type filter: LayerOutput - :param filter_size: The dimension of the filter kernel on the x axis. - :type filter_size: int - :param filter_size_y: The dimension of the filter kernel on the y axis. - If the parameter is not set or set to None, it will - set to 'filter_size' automatically. - :type filter_size_y: int - :param num_filters: The number of the output channels. - :type num_filters: int - :param num_channels: The number of the input channels. If the parameter is not set - or set to None, it will be automatically set to the channel - number of the 'img'. - :type num_channels: int - :param stride: The stride on the x axis. - :type stride: int - :param stride_y: The stride on the y axis. If the parameter is not set or - set to None, it will be set to 'stride' automatically. - :type stride_y: int - :param padding: The padding size on the x axis. - :type padding: int - :param padding_y: The padding size on the y axis. If the parameter is not set - or set to None, it will be set to 'padding' automatically. - :type padding_y: int - :return: A ConvOperator Object. - :rtype: ConvOperator - """ - if filter_size_y is None: - filter_size_y = filter_size - if stride_y is None: - stride_y = stride - if padding_y is None: - padding_y = padding - - if num_channels is None: - num_channels = img.num_filters - - assert isinstance(filter, LayerOutput) - assert filter.size is not None - - opCls = ConvTransOperator if trans else ConvOperator - - op = opCls( - input_layer_names=[img.name, filter.name], - num_filters=num_filters, - conv_conf=Conv( - filter_size=filter_size, - padding=padding, - stride=stride, - channels=num_channels, - filter_size_y=filter_size_y, - padding_y=padding_y, - stride_y=stride_y, - groups=1)) - - op.origin = [img, filter] - return op - - -@wrap_param_attr_default() -def conv_projection(input, - filter_size, - num_filters, - num_channels=None, - stride=1, - padding=0, - filter_size_y=None, - stride_y=None, - padding_y=None, - groups=1, - param_attr=None, - trans=False): - """ - Different from img_conv_layer and conv_op, conv_projection is a Projection, - which can be used in mixed_layer and concat_layer. It uses cudnn to implement - convolution and only supports GPU mode. - - The example usage is: - - .. code-block:: python - - proj = conv_projection(input=input1, - filter_size=3, - num_filters=64, - num_channels=64) - - :param input: The input of this layer. - :type input: LayerOutput - :param filter_size: The dimensions of the filter kernel. If the parameter is - set to one integer, the two dimensions on x and y axises - will be same when filter_size_y is not set. If it is set - to a list, the first element indicates the dimension on - the x axis, and the second is used to specify the dimension - on the y axis when filter_size_y is not provided. - :type filter_size: int | tuple | list - :param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter - is not set, it will be set automatically according to filter_size. - :type filter_size_y: int - :param num_filters: The number of filters. - :type num_filters: int - :param num_channels: The number of the input channels. - :type num_channels: int - :param stride: The strides. If the parameter is set to one integer, the strides - on x and y axises will be same when stride_y is not set. If it is - set to a list, the first element indicates the stride on the x axis, - and the second is used to specify the stride on the y axis when - stride_y is not provided. - :type stride: int | tuple | list - :param stride_y: The stride on the y axis. - :type stride_y: int - :param padding: The padding sizes. If the parameter is set to one integer, the padding - sizes on x and y axises will be same when padding_y is not set. If it - is set to a list, the first element indicates the padding size on the - x axis, and the second is used to specify the padding size on the y axis - when padding_y is not provided. - :type padding: int | tuple | list - :param padding_y: The padding size on the y axis. - :type padding_y: int - :param groups: The group number. - :type groups: int - :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param trans: Whether it is ConvTransProjection or ConvProjection - :type trans: bool - :return: A Projection Object. - :rtype: ConvTransProjection | ConvProjection - """ - if num_channels is None: - assert input.num_filters is not None - num_channels = input.num_filters - - if filter_size_y is None: - if isinstance(filter_size, collections.Sequence): - assert len(filter_size) == 2 - filter_size, filter_size_y = filter_size - else: - filter_size_y = filter_size - - if stride_y is None: - if isinstance(stride, collections.Sequence): - assert len(stride) == 2 - stride, stride_y = stride - else: - stride_y = stride - - if padding_y is None: - if isinstance(padding, collections.Sequence): - assert len(padding) == 2 - padding, padding_y = padding - else: - padding_y = padding - - if param_attr.attr.get('initial_smart'): - # special initial for conv layers. - init_w = (2.0 / (filter_size**2 * num_channels))**0.5 - param_attr.attr["initial_mean"] = 0.0 - param_attr.attr["initial_std"] = init_w - param_attr.attr["initial_strategy"] = 0 - param_attr.attr["initial_smart"] = False - - projCls = ConvTransProjection if trans else ConvProjection - - proj = projCls( - input_layer_name=input.name, - num_filters=num_filters, - conv_conf=Conv( - filter_size=filter_size, - padding=padding, - stride=stride, - channels=num_channels, - filter_size_y=filter_size_y, - padding_y=padding_y, - stride_y=stride_y, - groups=groups), - **param_attr.attr) - - proj.origin = input - return proj - - -@wrap_name_default("pad") -@layer_support() -def pad_layer(input, - pad_c=None, - pad_h=None, - pad_w=None, - name=None, - layer_attr=None): - """ - This operation pads zeros to the input data according to pad_c,pad_h - and pad_w. pad_c, pad_h, pad_w specify the size in the corresponding - dimension. And the input data shape is NCHW. - - For example, pad_c=[2,3] means padding 2 zeros before the input data - and 3 zeros after the input data in the channel dimension. pad_h means - padding zeros in the height dimension. pad_w means padding zeros in the - width dimension. - - For example, - - .. code-block:: python - - input(2,2,2,3) = [ - [ [[1,2,3], [3,4,5]], - [[2,3,5], [1,6,7]] ], - [ [[4,3,1], [1,8,7]], - [[3,8,9], [2,3,5]] ] - ] - - pad_c=[1,1], pad_h=[0,0], pad_w=[0,0] - - output(2,4,2,3) = [ - [ [[0,0,0], [0,0,0]], - [[1,2,3], [3,4,5]], - [[2,3,5], [1,6,7]], - [[0,0,0], [0,0,0]] ], - [ [[0,0,0], [0,0,0]], - [[4,3,1], [1,8,7]], - [[3,8,9], [2,3,5]], - [[0,0,0], [0,0,0]] ] - ] - - The simply usage is: - - .. code-block:: python - - pad = pad_layer(input=ipt, - pad_c=[4,4], - pad_h=[0,0], - pad_w=[2,2]) - - :param input: The input of this layer. - :type input: LayerOutput - :param pad_c: The padding size in the channel dimension. - :type pad_c: list | None - :param pad_h: The padding size in the height dimension. - :type pad_h: list | None - :param pad_w: The padding size in the width dimension. - :type pad_w: list | None - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :param name: The name of this layer. It is optional. - :type name: basestring - :return: LayerOutput object. - :rtype: LayerOutput - """ - if pad_c is not None: - assert isinstance(pad_c, collections.Sequence) and len(pad_c) == 2 - else: - pad_c = [0, 0] - - if pad_h is not None: - assert isinstance(pad_h, collections.Sequence) and len(pad_h) == 2 - else: - pad_h = [0, 0] - - if pad_w is not None: - assert isinstance(pad_w, collections.Sequence) and len(pad_w) == 2 - else: - pad_w = [0, 0] - - assert input.num_filters is not None - in_ch = input.num_filters - out_ch = in_ch + pad_c[0] + pad_c[1] - - l = Layer( - name=name, - type=LayerType.PAD_LAYER, - inputs=Input( - input.name, - pad=Pad( - channels=in_ch, - pad_c=pad_c, - pad_h=pad_h, - pad_w=pad_w, )), - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - layer_type=LayerType.PAD_LAYER, - parents=[input], - num_filters=out_ch, - size=l.config.size) - - -@wrap_name_default() -@layer_support() -def conv_shift_layer(a, b, name=None, layer_attr=None): - """ - This layer performs cyclic convolution on two inputs. For example: - - a[in]: contains M elements. - - b[in]: contains N elements (N should be odd). - - c[out]: contains M elements. - - .. math:: - - c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j} - - In this formula: - - a's index is computed modulo M. When it is negative, then get item from - the right side (which is the end of array) to the left. - - b's index is computed modulo N. When it is negative, then get item from - the right size (which is the end of array) to the left. - - The example usage is: - - .. code-block:: python - - conv_shift = conv_shift_layer(a=layer1, b=layer2) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param a: The first input of this layer. - :type a: LayerOutput - :param b: The second input of this layer. - :type b: LayerOutput - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(a, LayerOutput) and isinstance(b, LayerOutput) - assert b.size is None or b.size % 2 == 1 # size of b must be odd. - Layer( - name=name, - type=LayerType.CONV_SHIFT_LAYER, - inputs=[a.name, b.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput( - name, LayerType.CONV_SHIFT_LAYER, parents=[a, b], size=a.size) - - -@wrap_name_default() -@wrap_param_attr_default() -@wrap_bias_attr_default() -@wrap_act_default(act=LinearActivation()) -@layer_support(ERROR_CLIPPING, DROPOUT) -def tensor_layer(a, - b, - size, - act=None, - name=None, - param_attr=None, - bias_attr=None, - layer_attr=None): - """ - This layer performs tensor operation on two inputs. - For example: - - .. math:: - y_{i} = a * W_{i} * {b^\mathrm{T}}, i=0,1,...,K-1 - - In this formular: - - :math:`a`: the first input contains M elements. - - :math:`b`: the second input contains N elements. - - :math:`y_{i}`: the i-th element of y. - - :math:`W_{i}`: the i-th learned weight, shape if [M, N] - - :math:`b^\mathrm{T}`: the transpose of :math:`b_{2}`. - - The simple usage is: - - .. code-block:: python - - tensor = tensor_layer(a=layer1, b=layer2, size=1000) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param a: The first input of this layer. - :type a: LayerOutput - :param b: The second input of this layer. - :type b: LayerOutput - :param size: The dimension of this layer. - :type size: int - :param act: Activation type. LinearActivation is the default activation. - :type act: BaseActivation - :param param_attr: The parameter attribute. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param bias_attr: The parameter attribute for bias. If this parameter is set to - False or an object whose type is not ParameterAttribute, - no bias is defined. If this parameter is set to True, - the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute | None - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(a, LayerOutput) and isinstance(b, LayerOutput) - Layer( - name=name, - size=size, - type=LayerType.TENSOR_LAYER, - active_type=act.name, - bias=ParamAttr.to_bias(bias_attr), - inputs=[Input(a.name, **param_attr.attr), Input(b.name)], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.TENSOR_LAYER, parents=[a, b], activation=act, size=size) - - -@wrap_name_default() -@wrap_param_attr_default() -@wrap_bias_attr_default() -@wrap_act_default() -@layer_support(DROPOUT, ERROR_CLIPPING) -def selective_fc_layer(input, - size, - select=None, - act=None, - name=None, - pass_generation=False, - has_selected_colums=True, - mul_ratio=0.02, - param_attr=None, - bias_attr=None, - layer_attr=None): - """ - Selectived fully connected layer. Different from fc_layer, the output - of this layer can be sparse. It requires an additional input to indicate - several selected columns for output. If the selected columns is not - specified, selective_fc_layer acts exactly like fc_layer. - - The simple usage is: - - .. code-block:: python - - sel_fc = selective_fc_layer(input=input, size=128, act=TanhActivation()) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput | list | tuple - :param select: The layer to select columns to output. It should be a sparse - binary matrix, and is treated as the mask of selective fc. If - it is not set or set to None, selective_fc_layer acts exactly - like fc_layer. - :type select: LayerOutput - :param size: The dimension of this layer, which should be equal to that of - the layer 'select'. - :type size: int - :param act: Activation type. TanhActivation is the default activation. - :type act: BaseActivation - :param pass_generation: The flag which indicates whether it is during generation. - :type pass_generation: bool - :param has_selected_colums: The flag which indicates whether the parameter 'select' - has been set. True is the default. - :type has_selected_colums: bool - :param mul_ratio: A ratio helps to judge how sparse the output is and determine - the computation method for speed consideration. - :type mul_ratio: float - :param param_attr: The parameter attribute. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param bias_attr: The parameter attribute for bias. If this parameter is set to - False or an object whose type is not ParameterAttribute, - no bias is defined. If this parameter is set to True, - the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute | None - :return: LayerOutput object. - :rtype: LayerOutput - """ - if isinstance(input, LayerOutput): - input = [input] - assert not isinstance(param_attr, collections.Sequence) - param_attr = [param_attr] - else: - if isinstance(param_attr, collections.Sequence): - assert len(input) == len(param_attr) - else: - if "parameter_name" in param_attr.attr and len(input) > 1: - logger.fatal( - "When the name field of param_attr is manually specified " - "and the input is a list, the param_attr should also be a " - "list with each item being the param_attr for each input " - "item. If only one named param_attr is provided, all the " - "input items would share this parameter.") - param_attr = [copy.deepcopy(param_attr) for _ in range(len(input))] - - assert isinstance(input, collections.Sequence) - assert isinstance(select, LayerOutput) - if select.size is not None: - assert select.size == size - Layer( - inputs=[ - Input(ipt.name, **attr.attr) for ipt, attr in zip(input, param_attr) - ] + [select.name], - name=name, - type=LayerType.SEL_FC_LAYER, - size=size, - bias=ParameterAttribute.to_bias(bias_attr), - active_type=act.name, - selective_fc_pass_generation=pass_generation, - has_selected_colums=has_selected_colums, - selective_fc_full_mul_ratio=mul_ratio, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - LayerType.SEL_FC_LAYER, - list(input) + [select], - activation=act, - size=size) - - -@wrap_name_default() -@layer_support() -def sampling_id_layer(input, name=None, layer_attr=None): - """ - A layer for sampling id from a multinomial distribution from the input layer. - Sampling one id for one sample. - - The simple usage is: - - .. code-block:: python - - samping_id = sampling_id_layer(input=input) - - :param input: The input of this layer. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - l = Layer( - name=name, - type=LayerType.SAMPLING_ID_LAYER, - inputs=[Input(input.name)], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.SAMPLING_ID_LAYER, input, size=l.config.size) - - -@wrap_name_default() -@layer_support() -def slope_intercept_layer(input, - name=None, - slope=1.0, - intercept=0.0, - layer_attr=None): - """ - This layer for applying a slope and an intercept to the input. - - .. math:: - y = slope * x + intercept - - The simple usage is: - - .. code-block:: python - - scale = slope_intercept_layer(input=input, slope=-1.0, intercept=1.0) - - :param input: The input of this layer. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param slope: The scale factor. - :type slope: float - :param intercept: The offset. - :type intercept: float - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - Layer( - name=name, - type=LayerType.SLOPE_INTERCEPT_LAYER, - slope=slope, - intercept=intercept, - inputs=[Input(input.name)], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.SLOPE_INTERCEPT_LAYER, input, size=input.size) - - -@wrap_name_default() -@layer_support() -def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None): - """ - A layer for weighted sum of vectors takes two inputs. - - Input: size of weights is M - size of vectors is M*N - - Output: a vector of size=N - - .. math:: - - z(i) = \sum_{j=0}^{M-1} x(j) y(i+Nj) - - where :math:`0 \le i \le N-1` - - Or in the matrix notation: - - .. math:: - - z = x^\mathrm{T} Y - - In this formular: - - :math:`x`: weights - - :math:`y`: vectors. - - :math:`z`: the output. - - Note that the above computation is for one sample. Multiple samples are - processed in one batch. - - The simple usage is: - - .. code-block:: python - - linear_comb = linear_comb_layer(weights=weight, vectors=vectors, - size=elem_dim) - - :param weights: The weight layer. - :type weights: LayerOutput - :param vectors: The vector layer. - :type vectors: LayerOutput - :param size: The dimension of this layer. - :type size: int - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(weights, LayerOutput) and isinstance(vectors, LayerOutput) - if vectors.size is not None and weights.size is not None: - assert vectors.size % weights.size == 0 - if size is None: - size = vectors.size / weights.size - else: - assert size == vectors.size / weights.size - Layer( - name=name, - type=LayerType.LINEAR_COMBINATION_LAYER, - size=size, - inputs=[Input(weights.name), Input(vectors.name)], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.LINEAR_COMBINATION_LAYER, [weights, vectors], size=size) - - -convex_comb_layer = linear_comb_layer - - -@wrap_name_default() -@layer_support() -def block_expand_layer(input, - block_x=0, - block_y=0, - stride_x=0, - stride_y=0, - padding_x=0, - padding_y=0, - num_channels=None, - name=None, - layer_attr=None): - """ - Expand feature map to minibatch matrix. - - matrix width is: block_y * block_x * num_channels - - matirx height is: outputH * outputW - - .. math:: - - outputH = 1 + (2 * padding_y + imgSizeH - block_y + stride_y - 1) / stride_y - - outputW = 1 + (2 * padding_x + imgSizeW - block_x + stride_x - 1) / stride_x - - The expanding method is the same with ExpandConvLayer, but saved the transposed - value. After expanding, output.sequenceStartPositions will store timeline. - The number of time steps is outputH * outputW and the dimension of each - time step is block_y * block_x * num_channels. This layer can be used after - convolutional neural network, and before recurrent neural network. - - The simple usage is: - - .. code-block:: python - - block_expand = block_expand_layer(input=layer, - num_channels=128, - stride_x=1, - stride_y=1, - block_x=1, - block_x=3) - - :param input: The input of this layer. - :type input: LayerOutput - :param num_channels: The number of input channels. If the parameter is not set or - set to None, its actual value will be automatically set to - the channels number of the input. - :type num_channels: int - :param block_x: The width of sub block. - :type block_x: int - :param block_y: The width of sub block. - :type block_y: int - :param stride_x: The stride size in horizontal direction. - :type stride_x: int - :param stride_y: The stride size in vertical direction. - :type stride_y: int - :param padding_x: The padding size in horizontal direction. - :type padding_x: int - :param padding_y: The padding size in vertical direction. - :type padding_y: int - :param name: The name of this layer. It is optional. - :type name: basestring. - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - if num_channels is None: - assert input.num_filters is not None - num_channels = input.num_filters - l = Layer( - name=name, - inputs=Input( - input.name, - block_expand=BlockExpand( - channels=num_channels, - block_x=block_x, - block_y=block_y, - stride_x=stride_x, - stride_y=stride_y, - padding_x=padding_x, - padding_y=padding_y)), - type=LayerType.BLOCK_EXPAND, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput( - name, LayerType.BLOCK_EXPAND, parents=[input], size=l.config.size) - - -@wrap_name_default() -@layer_support() -def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): - """ - A layer to do max out on convolutional layer output. - - Input: the output of a convolutional layer. - - Output: feature map size same as the input's, and its channel number is - (input channel) / groups. - - So groups should be larger than 1, and the num of channels should be able - to be devided by groups. - - Reference: - `Maxout Networks - `_ - `Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks - `_ - - - .. math:: - - & out = \max_k (in[n, k, o_c , s]) - - & out_{i * s + j} = \max_k in_{ k * o_{c} * s + i * s + j} - - & s = \\frac{input.size}{ num\_channels} - - & o_{c} = \\frac{num\_channels}{groups} - - & 0 \le i < o_{c} - - & 0 \le j < s - - & 0 \le k < groups - - - The simple usage is: - - .. code-block:: python - - maxout = maxout_layer(input, - num_channels=128, - groups=4) - - :param input: The input of this layer. - :type input: LayerOutput - :param num_channels: The number of input channels. If the parameter is not set or - set to None, its actual value will be automatically set to - the channels number of the input. - :type num_channels: int - :param groups: The group number of input layer. - :type groups: int - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input.activation, LinearActivation) - assert groups > 1 - if num_channels is None: - assert input.num_filters is not None - num_channels = input.num_filters - assert num_channels % groups == 0 - l = Layer( - name=name, - inputs=Input( - input.name, maxout=MaxOut( - channels=num_channels, groups=groups)), - type=LayerType.MAXOUT, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.MAXOUT, parents=[input], size=l.config.size) - - -@wrap_name_default() -@layer_support() -def ctc_layer(input, - label, - size=None, - name=None, - norm_by_times=False, - layer_attr=None): - """ - Connectionist Temporal Classification (CTC) is designed for temporal - classication task. e.g. sequence labeling problems where the - alignment between the inputs and the target labels is unknown. - - Reference: - `Connectionist Temporal Classification: Labelling Unsegmented Sequence Data - with Recurrent Neural Networks - `_ - - Note: - Considering the 'blank' label needed by CTC, you need to use (num_classes + 1) - as the size of the input, where num_classes is the category number. - And the 'blank' is the last category index. So the size of 'input' layer (e.g. - fc_layer with softmax activation) should be (num_classes + 1). The size of - ctc_layer should also be (num_classes + 1). - - The example usage is: - - .. code-block:: python - - ctc = ctc_layer(input=input, - label=label, - size=9055, - norm_by_times=True) - - :param input: The input of this layer. - :type input: LayerOutput - :param label: The input label. - :type label: LayerOutput - :param size: The dimension of this layer, which must be equal to (category number + 1). - :type size: int - :param name: The name of this layer. It is optional. - :type name: basestring - :param norm_by_times: Whether to do normalization by times. False is the default. - :type norm_by_times: bool - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput) - assert isinstance(label, LayerOutput) - if label.size is not None: - if size is not None: - assert size == label.size + 1 - else: - size = label.size + 1 - Layer( - name=name, - type=LayerType.CTC_LAYER, - size=size, - norm_by_times=norm_by_times, - inputs=[input.name, label.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.CTC_LAYER, [input, label], size=size) - - -@wrap_name_default() -@layer_support() -def warp_ctc_layer(input, - label, - size=None, - name=None, - blank=0, - norm_by_times=False, - layer_attr=None): - """ - A layer intergrating the open-source `warp-ctc - `_ library, which is used in - `Deep Speech 2: End-toEnd Speech Recognition in English and Mandarin - `_, to compute Connectionist Temporal - Classification (CTC) loss. Besides, another `warp-ctc repository - `_ , which is forked from - the official one, is maintained to enable more compiling options. During the - building process, PaddlePaddle will clone the source codes, build and - install it to :code:`third_party/install/warpctc` directory. - - Reference: - `Connectionist Temporal Classification: Labelling Unsegmented Sequence Data - with Recurrent Neural Networks - `_ - - Note: - - Let num_classes represents the category number. Considering the 'blank' - label needed by CTC, you need to use (num_classes + 1) as the size of - warp_ctc layer. - - You can set 'blank' to any value ranged in [0, num_classes], which - should be consistent with those used in your labels. - - As a native 'softmax' activation is interated to the warp-ctc library, - 'linear' activation is expected to be used instead in the 'input' layer. - - The example usage is: - - .. code-block:: python - - ctc = warp_ctc_layer(input=input, - label=label, - size=1001, - blank=1000, - norm_by_times=False) - - :param input: The input of this layer. - :type input: LayerOutput - :param label: The input label. - :type label: LayerOutput - :param size: The dimension of this layer, which must be equal to (category number + 1). - :type size: int - :param name: The name of this layer. It is optional. - :type name: basestring - :param blank: The 'blank' label used in ctc. - :type blank: int - :param norm_by_times: Whether to do normalization by times. False is the default. - :type norm_by_times: bool - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput) - assert isinstance(label, LayerOutput) - if label.size is not None: - if size is not None: - assert size == label.size + 1 - else: - size = label.size + 1 - Layer( - name=name, - type=LayerType.WARP_CTC_LAYER, - size=size, - blank=blank, - norm_by_times=norm_by_times, - inputs=[input.name, label.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.WARP_CTC_LAYER, parents=[input, label], size=size) - - -@wrap_name_default() -@wrap_param_attr_default() -@layer_support() -def crf_layer(input, - label, - size=None, - weight=None, - param_attr=None, - name=None, - coeff=1.0, - layer_attr=None): - """ - A layer for calculating the cost of sequential conditional random - field model. - - The example usage is: - - .. code-block:: python - - crf = crf_layer(input=input, - label=label, - size=label_dim) - - :param input: The first input layer. - :type input: LayerOutput - :param label: The input label. - :type label: LayerOutput - :param size: The category number. - :type size: int - :param weight: The weight layer defines a weight for each sample in the - mini-batch. It is optional. - :type weight: LayerOutput - :param param_attr: The parameter attribute. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param name: The name of this layer. It is optional. - :type name: basestring - :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default value. - :type coeff: float - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput) - assert isinstance(label, LayerOutput) - assert weight is None or isinstance(weight, LayerOutput) - if input.size is not None and label.size is not None: - assert input.size == label.size - if size is None: - size = input.size - else: - assert size == input.size - - ipts = [Input(input.name, **param_attr.attr), Input(label.name)] - if weight is not None: - ipts.append(Input(weight.name)) - - Layer( - name=name, - type=LayerType.CRF_LAYER, - size=size, - inputs=ipts, - coeff=coeff, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - parents = [input, label] - if weight is not None: - parents.append(weight) - # The size for LayerOutput means the dimension of the output. - # It's different from the meaning of crf layer, which is the number of - # classes. - return LayerOutput(name, LayerType.CRF_LAYER, parents, size=1) - - -@wrap_name_default() -@wrap_param_attr_default() -@layer_support() -def crf_decoding_layer(input, - size, - label=None, - param_attr=None, - name=None, - layer_attr=None): - """ - A layer for calculating the decoding sequence of sequential conditional - random field model. The decoding sequence is stored in output.ids. - If the input 'label' is provided, it is treated as the ground-truth label, and - this layer will also calculate error. output.value[i] is 1 for an incorrect - decoding and 0 for the correct. - - The example usage is: - - .. code-block:: python - - crf_decoding = crf_decoding_layer(input=input, - size=label_dim) - - :param input: The first input layer. - :type input: LayerOutput - :param size: The dimension of this layer. - :type size: int - :param label: The input label. - :type label: LayerOutput | None - :param param_attr: The parameter attribute. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert isinstance(input, LayerOutput) - assert label is None or isinstance(label, LayerOutput) - - ipts = [Input(input.name, **param_attr.attr)] - if label is not None: - ipts.append(Input(label.name)) - - Layer( - name=name, - type=LayerType.CRF_DECODING_LAYER, - size=size, - inputs=ipts, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - parents = [input] - if label is not None: - parents.append(label) - # The size for LayerOutput means the dimension of the output. - # It's different from the meaning of crf layer, which is the number of - # classes. - return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1) - - -""" -Following are cost Layers. -""" - - -@wrap_bias_attr_default(has_bias=True) -@wrap_param_attr_default() -@wrap_name_default() -@layer_support() -def nce_layer(input, - label, - num_classes=None, - param_attr=None, - weight=None, - num_neg_samples=10, - neg_distribution=None, - name=None, - bias_attr=None, - layer_attr=None): - """ - Noise-contrastive estimation. - - Reference: - `A fast and simple algorithm for training neural probabilistic language - models. `_ - - The example usage is: - - .. code-block:: python - - cost = nce_layer(input=[layer1, layer2], label=layer2, - param_attr=[attr1, attr2], weight=layer3, - num_classes=3, neg_distribution=[0.1,0.3,0.6]) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The first input of this layer. - :type input: LayerOutput | list | tuple | collections.Sequence - :param label: The input label. - :type label: LayerOutput - :param weight: The weight layer defines a weight for each sample in the - mini-batch. It is optional. - :type weight: LayerOutput - :param num_classes: The number of classes. - :type num_classes: int - :param act: Activation type. SigmoidActivation is the default activation. - :type act: BaseActivation - :param param_attr: The parameter attribute. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param num_neg_samples: The number of sampled negative labels. 10 is the - default value. - :type num_neg_samples: int - :param neg_distribution: The discrete noisy distribution over the output - space from which num_neg_samples negative labels - are sampled. If this parameter is not set, a - uniform distribution will be used. A user-defined - distribution is a list whose length must be equal - to the num_classes. Each member of the list defines - the probability of a class given input x. - :type neg_distribution: list | tuple | collections.Sequence | None - :param bias_attr: The parameter attribute for bias. If this parameter is set to - False or an object whose type is not ParameterAttribute, - no bias is defined. If this parameter is set to True, - the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - if isinstance(input, LayerOutput): - input = [input] - assert not isinstance(param_attr, collections.Sequence) - param_attr = [param_attr] - else: - if isinstance(param_attr, collections.Sequence): - assert len(input) == len(param_attr) - else: - param_attr = [copy.deepcopy(param_attr) for _ in range(len(input))] - - assert isinstance(input, collections.Sequence) - - assert isinstance(label, LayerOutput) - assert label.layer_type == LayerType.DATA - if num_classes is None: - num_classes = label.size - if neg_distribution is not None: - assert isinstance(neg_distribution, collections.Sequence) - assert len(neg_distribution) == num_classes - assert abs(sum(neg_distribution) - 1.0) < 1e-5 - - ipts_for_layer = [] - parents = [] - for each_input, attr in zip(input, param_attr): - assert isinstance(each_input, LayerOutput) - ipts_for_layer.append(Input(each_input.name, **attr.attr)) - parents.append(each_input) - ipts_for_layer.append(label.name) - parents.append(label) - - if weight is not None: - assert isinstance(weight, LayerOutput) - assert weight.layer_type == LayerType.DATA - ipts_for_layer.append(weight.name) - parents.append(weight) - - l = Layer( - name=name, - type=LayerType.NCE_LAYER, - num_classes=num_classes, - neg_sampling_dist=neg_distribution, - active_type=SigmoidActivation().name, - num_neg_samples=num_neg_samples, - inputs=ipts_for_layer, - bias=ParamAttr.to_bias(bias_attr), - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - LayerType.NCE_LAYER, - parents=parents, - size=l.config.size, - activation=SigmoidActivation()) - - -@wrap_name_default() -@layer_support() -def rank_cost(left, - right, - label, - weight=None, - name=None, - coeff=1.0, - layer_attr=None): - """ - A cost Layer for learning to rank using gradient descent. - - Reference: - `Learning to Rank using Gradient Descent - `_ - - .. math:: - - C_{i,j} & = -\\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) - - o_{i,j} & = o_i - o_j - - \\tilde{P_{i,j}} & = \\{0, 0.5, 1\\} \ or \ \\{0, 1\\} - - In this formula: - - :math:`C_{i,j}` is the cross entropy cost. - - :math:`\\tilde{P_{i,j}}` is the label. 1 means positive order - and 0 means reverse order. - - :math:`o_i` and :math:`o_j`: the left output and right output. - Their dimension is one. - - The example usage is: - - .. code-block:: python - - cost = rank_cost(left=out_left, - right=out_right, - label=label) - - :param left: The first input, the size of this layer is 1. - :type left: LayerOutput - :param right: The right input, the size of this layer is 1. - :type right: LayerOutput - :param label: Label is 1 or 0, means positive order and reverse order. - :type label: LayerOutput - :param weight: The weight layer defines a weight for each sample in the - mini-batch. It is optional. - :type weight: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default value. - :type coeff: float - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert left.size == 1 - assert right.size == 1 - assert label.size == 1 - - ipts = [left.name, right.name, label.name] - parents = [left, right, label] - if weight is not None: - ipts.append(weight.name) - parents.append(weight) - - Layer( - name=name, - type=LayerType.RANK_COST, - inputs=ipts, - coeff=coeff, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput(name, LayerType.RANK_COST, parents=parents, size=1) - - -@wrap_name_default() -@layer_support() -def lambda_cost(input, - score, - name, - NDCG_num=5, - max_sort_size=-1, - layer_attr=None): - """ - lambdaCost for lambdaRank LTR approach. - - The example usage is: - - .. code-block:: python - - cost = lambda_cost(input=input, - score=score, - NDCG_num=8, - max_sort_size=-1) - - :param input: The first input of this layer, which is often a document - samples list of the same query and whose type must be sequence. - :type input: LayerOutput - :param score: The scores of the samples. - :type input: LayerOutput - :param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain), - e.g., 5 for NDCG@5. It must be less than or equal to the - minimum size of the list. - :type NDCG_num: int - :param max_sort_size: The size of partial sorting in calculating gradient. If - max_sort_size is equal to -1 or greater than the number - of the samples in the list, then the algorithm will sort - the entire list to compute the gradient. In other cases, - max_sort_size must be greater than or equal to NDCG_num. - :type max_sort_size: int - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput) and isinstance(score, LayerOutput) - if score.size is not None: - assert score.size == 1 - Layer( - name=name, - type=LayerType.LAMBDA_COST, - inputs=[input.name, score.name], - NDCG_num=NDCG_num, - max_sort_size=max_sort_size, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput( - name, LayerType.LAMBDA_COST, parents=[input, score], size=1) - - -@wrap_name_default() -@layer_support() -def cross_entropy(input, - label, - name=None, - coeff=1.0, - weight=None, - layer_attr=None): - """ - A loss layer for multi class entropy. - - The example usage is: - - .. code-block:: python - - cost = cross_entropy(input=input_layer, - label=label_layer) - - :param input: The first input layer. - :type input: LayerOutput. - :param label: The input label. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default value. - :type coeff: float - :param weight: The weight layer defines a weight for each sample in the - mini-batch. It is optional. - :type weight: LayerOutout - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - - ipts, parents = __cost_input__(input, label, weight) - Layer( - name=name, - type=LayerType.CROSS_ENTROPY, - inputs=ipts, - coeff=coeff, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1) - - -@wrap_name_default() -@layer_support() -def cross_entropy_with_selfnorm(input, - label, - name=None, - coeff=1.0, - softmax_selfnorm_alpha=0.1, - layer_attr=None): - """ - A loss layer for multi class entropy with selfnorm. - Input should be a vector of positive numbers, without normalization. - - The example usage is: - - .. code-block:: python - - cost = cross_entropy_with_selfnorm(input=input_layer, - label=label_layer) - - :param input: The first input layer. - :type input: LayerOutput - :param label: The input label. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default value. - :type coeff: float - :param softmax_selfnorm_alpha: The scale factor affects the cost. - :type softmax_selfnorm_alpha: float - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - Layer( - name=name, - type=LayerType.CROSS_ENTROPY_WITH_SELFNORM, - inputs=[input.name, label.name], - coeff=coeff, - softmax_selfnorm_alpha=softmax_selfnorm_alpha, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput( - name, - LayerType.CROSS_ENTROPY_WITH_SELFNORM, - parents=[input, label], - size=1) - - -@wrap_name_default() -@layer_support() -def sum_cost(input, name=None, layer_attr=None): - """ - A loss layer which calculates the sum of the input as loss. - - The example usage is: - - .. code-block:: python - - cost = sum_cost(input=input_layer) - - :param input: The input of this layer. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput. - """ - assert isinstance(input, LayerOutput) - Layer( - name=name, - type=LayerType.SUM_COST, - inputs=[input.name], - **ExtraLayerAttribute.to_kwargs(layer_attr)) - - return LayerOutput(name, LayerType.SUM_COST, parents=[input], size=1) - - -@wrap_name_default() -@layer_support() -def huber_regression_cost(input, - label, - name=None, - delta=1.0, - coeff=1.0, - layer_attr=None): - """ - In statistics, the Huber loss is a loss function used in robust regression, - that is less sensitive to outliers in data than the squared error loss. - Given a prediction f(x), a label y and :math:`\delta`, the loss function - is defined as: - - .. math:: - - loss = 0.5*(y-f(x))^{2}, | y-f(x) | < \delta - - loss = \delta | y-f(x) | - 0.5 \delta ^2, otherwise - - The example usage is: - - .. code-block:: python - - cost = huber_regression_cost(input=input_layer, label=label_layer) - - :param input: The first input layer. - :type input: LayerOutput - :param label: The input label. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param delta: The difference between the observed and predicted values. - :type delta: float - :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default value. - :type coeff: float - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput. - """ - assert isinstance(input, LayerOutput) - Layer( - name=name, - type=LayerType.HUBER_REGRESSION, - inputs=[input.name, label.name], - delta=delta, - coeff=coeff, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.HUBER_REGRESSION, parents=[input, label], size=1) - - -@wrap_name_default() -@layer_support() -def huber_classification_cost(input, - label, - name=None, - coeff=1.0, - layer_attr=None): - """ - For classification purposes, a variant of the Huber loss called modified Huber - is sometimes used. Given a prediction f(x) (a real-valued classifier score) and - a true binary class label :math:`y\in \{-1, 1 \}`, the modified Huber - loss is defined as: - - .. math: - - loss = \max ( 0, 1-yf(x) )^2, yf(x) \geq -1 - - loss = -4yf(x), otherwise - - The example usage is: - - .. code-block:: python - - cost = huber_classification_cost(input=input_layer, label=label_layer) - - :param input: The first input layer. - :type input: LayerOutput - :param label: The input label. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default value. - :type coeff: float - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput) - if input.size is not None: - assert input.size == 1 - Layer( - name=name, - type=LayerType.HUBER_CLASSIFICATION, - inputs=[input.name, label.name], - coeff=coeff, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.HUBER_CLASSIFICATION, parents=[input, label], size=1) - - -@wrap_name_default() -@layer_support() -def multi_binary_label_cross_entropy(input, - label, - name=None, - coeff=1.0, - layer_attr=None): - """ - A loss layer for multi binary label cross entropy. - - The example usage is: - - .. code-block:: python - - cost = multi_binary_label_cross_entropy(input=input_layer, - label=label_layer) - - :param input: The first input layer. - :type input: LayerOutput - :param label: The input label. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default value. - :type coeff: float - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - - if input.activation is None or \ - not isinstance(input.activation, SigmoidActivation): - logger.log(logging.WARN, - ("%s is not a recommended activation for " - "multi_binary_label_cross_entropy, sigmoid is better") % - repr(input.activation)) - - Layer( - name=name, - type=LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY, - inputs=[input.name, label.name], - coeff=coeff, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY, - parents=[input, label], - size=1) - - -class BeamInput(object): - """ - Define the input for cross_entropy_over_beam layer. - - A beam is made up of a triple: the first one is scores over all - candidates; the second one is indices of top k selected candidates; the - third one is the index of ground truth, which is also always called - gold. - """ - - def __init__(self, candidate_scores, selected_candidates, gold): - assert isinstance(candidate_scores, LayerOutput) - self.candidate_scores = candidate_scores - assert candidate_scores.size == 1 - - assert isinstance(selected_candidates, LayerOutput) - self.selected_candidates = selected_candidates - - assert isinstance(gold, LayerOutput) - self.gold = gold - - -@wrap_name_default() -@layer_support() -def cross_entropy_over_beam(input, name=None): - """ - This layer is used in learning to search models, which is to solve complex - joint prediction problems based on learning to search through a - problem-defined search space. - - Specifically, the learning to search process for this layer begins with - searching a target sequence from a nested sequence. In the first search - step, top beam size sequences with highest scores, indices of these top k - sequences in the original nested sequence, and the ground truth (also - called gold) altogether (a triple) make up of the first beam. - - Then, several special positions, for example, start and end positions - that define meaningful segments are searched. In these searches, top k - positions with highest scores are selected, and then sequence, starting - from the selected starts till ends of the sequences (or a fixed position) - are taken to search next. - - We call the possible top k results returned in one search the beam. This - search process can be repeated for pre-defined turns and leads to several - beam expansions. - - Finally, the layer cross_entropy_over_beam takes all the beam expansions - which contain several candidate targets found along the multi-step search. - cross_entropy_over_beam calculates cross entropy over the expanded beams - which all the candidates in the beam as the normalized factor. - - Note that, if gold falls off the beam at search step t, then the cost is - calculated over the beam at step t. - - This cost layer always works together with kmax_seq_score_layer, - sub_nested_seq_layer, and sequence_slice_layer to trim the input to form a - sub-search space. - - - The example usage is: - - .. code-block:: python - - cost = cross_entropy_over_beam(input=[ - BeamInput( - candidate_scores=beam1_candidates, - selected_candidates=beam1_topk, - gold=gold1), - BeamInput( - candidate_scores=beam2_candidates, - selected_candidates=beam2_topk, - gold=gold2), - ]) - - - :param input: Input beams for this layer. - :type input: BeamInput - :param name: The name of this layer. It is optional. - :type name: basestring - :return: LayerOutput object. - :rtype: LayerOutput - """ - - if isinstance(input, BeamInput): - input = [input] - else: - assert isinstance(input, list), ( - 'input for cross_entropy_over_beam shold be a python list ' - 'of BeamInput object.') - for ipt in input: - assert isinstance(ipt, BeamInput), ( - 'input for cross_entropy_over_beam ' - 'should be a BeamInput object.') - - ipts = [] - parents = [] - for beam in input: - parents += [beam.candidate_scores, beam.selected_candidates, beam.gold] - ipts += [ - beam.candidate_scores.name, beam.selected_candidates.name, - beam.gold.name - ] - - Layer(name=name, type=LayerType.CROSS_ENTROPY_OVER_BEAM, inputs=ipts) - return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1) - - -@wrap_name_default() -@layer_support() -def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): - """ - This is a L1 loss but more smooth. It requires that the - sizes of input and label are equal. The formula is as follows, - - .. math:: - - L = \sum_{i} smooth_{L1}(input_i - label_i) - - in which - - .. math:: - - smooth_{L1}(x) = \\begin{cases} 0.5x^2& \\text{if} \\ |x| < 1 \\\\ |x|-0.5& \\text{otherwise} \end{cases} - - Reference: - `Fast R-CNN - `_ - - The example usage is: - - .. code-block:: python - - cost = smooth_l1_cost(input=input_layer, - label=label_layer) - - :param input: The input layer. - :type input: LayerOutput - :param label: The input label. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default value. - :type coeff: float - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput) - assert isinstance(label, LayerOutput) - assert input.size == label.size - - Layer( - name=name, - type=LayerType.SMOOTH_L1, - inputs=[input.name, label.name], - coeff=coeff, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.SMOOTH_L1, parents=[input, label], size=1) - - -@wrap_name_default() -def multiplex_layer(input, name=None, layer_attr=None): - """ - This layer multiplex multiple layers according to the indexes, - which are provided by the first input layer. - inputs[0]: the indexes of the layers to form the output of size batchSize. - inputs[1:N]; the candidate output data. - For each index i from 0 to batchSize - 1, the i-th row of the output is the - the same to the i-th row of the (index[i] + 1)-th layer. - - For each i-th row of output: - .. math:: - y[i][j] = x_{x_{0}[i] + 1}[i][j], j = 0,1, ... , (x_{1}.width - 1) - - where, y is output. :math:`x_{k}` is the k-th input layer and - :math:`k = x_{0}[i] + 1`. - - The example usage is: - - .. code-block:: python - - maxid = multiplex_layer(input=layers) - - :param input: Input layers. - :type input: list of LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute. - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert isinstance(input, collections.Sequence) - assert len(input) > 2, 'multiplex_layer should have more than 2 inputs' - for i in range(1, len(input)): - assert isinstance(input[i], LayerOutput) - assert input[i].size == input[1].size, \ - "All the input layers except the first one should have the same size" - - l = Layer( - name=name, - type='multiplex', - inputs=[x.name for x in input], - size=input[1].size, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - layer_type=LayerType.MULTIPLEX_LAYER, - parents=input, - size=l.config.size) - - -@wrap_name_default("dropout") -def dropout_layer(input, dropout_rate, name=None): - """ - - The example usage is: - - .. code-block:: python - - dropout = dropout_layer(input=input_layer, dropout_rate=0.5) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param dropout_rate: The probability of dropout. - :type dropout_rate: float - :return: LayerOutput object. - :rtype: LayerOutput - """ - return addto_layer( - name=name, - input=input, - act=LinearActivation(), - bias_attr=False, - layer_attr=ExtraAttr(drop_rate=dropout_rate)) - - -@wrap_name_default() -@wrap_act_default(act=LinearActivation()) -@wrap_param_attr_default() -@layer_support(DROPOUT) -def row_conv_layer(input, - context_len, - act=None, - name=None, - param_attr=None, - layer_attr=None): - """ - - The row convolution is called lookahead convolution. It is firstly - introduced in paper of `Deep Speech 2: End-to-End Speech Recognition - in English and Mandarin `_ . - - The bidirectional RNN that learns representation for a sequence by - performing a forward and a backward pass through the entire sequence. - However, unlike unidirectional RNNs, bidirectional RNNs are challenging - to deploy in an online and low-latency setting. The lookahead convolution - incorporates information from future subsequences in a computationally - efficient manner to improve unidirectional RNNs. - - The connection of row convolution is different from the 1D sequence - convolution. Assumed that, the future context-length is k, that is to say, - it can get the output at timestep t by using the the input feature from t-th - timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input - activations are d, the activations r_t for the new layer at time-step t are: - - .. math:: - - r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}} - \quad \\text{for} \quad (1 \leq i \leq d) - - Note: - The `context_len` is `k + 1`. That is to say, the lookahead step - number plus one equals context_len. - - - .. code-block:: python - - row_conv = row_conv_layer(input=input_layer, context_len=3) - - - :param input: The input of this layer. - :type input: LayerOutput - :param context_len: The context length equals the lookahead step number - plus one. - :type context_len: int - :param act: Activation Type. LinearActivation is the default activation. - :type act: BaseActivation - :param param_attr: The parameter attribute. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute | None - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput) - assert context_len > 0, "the context_len must be greatet than 0." - - Layer( - inputs=[Input(input.name, **param_attr.attr)], - name=name, - context_length=context_len, - type=LayerType.ROW_CONV_LAYER, - active_type=act.name, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.ROW_CONV_LAYER, input, activation=act, size=input.size) - - -@layer_support() -@wrap_name_default() -def prelu_layer(input, - name=None, - partial_sum=1, - channel_shared=None, - num_channels=None, - param_attr=None, - layer_attr=None): - """ - The Parametric Relu activation that actives outputs with a learnable weight. - - Reference: - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on - ImageNet Classification `_ - - .. math:: - z_i &\\quad if \\quad z_i > 0 \\\\ - a_i * z_i &\\quad \\mathrm{otherwise} - - The example usage is: - - .. code-block:: python - - prelu = prelu_layer(input=layers, partial_sum=1) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param partial_sum: this parameter makes a group of inputs share the same weight. - - - partial_sum = 1, indicates the element-wise activation: each element has a weight. - - partial_sum = number of elements in one channel, indicates the channel-wise activation, elements in a channel share the same weight. - - partial_sum = number of outputs, indicates all elements share the same weight. - - :type partial_sum: int - :param channel_shared: whether or not the parameter are shared across channels. - - - channel_shared = True, we set the partial_sum to the number of outputs. - - channel_shared = False, we set the partial_sum to the number of elements in one channel. - - :type channel_shared: bool - :param num_channels: number of input channel. - :type num_channels: int - :param param_attr: The parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute - :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute | None - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert isinstance(input, LayerOutput), 'prelu_layer accepts only one input.' - - if not param_attr: - param_attr = ParamAttr(initial_mean=0.25, initial_std=0.0) - else: - assert isinstance(param_attr, ParameterAttribute) - - if num_channels is None: - assert input.num_filters is not None, \ - 'the input channel cannot be detected, please specify the num_channels parameter' - num_channels = input.num_filters - - if channel_shared is not None: - assert isinstance(channel_shared, bool) - assert (input.height != 0 and input.width != 0), \ - 'input height and widht must be setted' - if channel_shared: - partial_sum = input.height * input.width * num_channels - else: - partial_sum = input.height * input.width - - l = Layer( - name=name, - type=LayerType.PRELU, - inputs=Input(input.name, **param_attr.attr), - partial_sum=partial_sum, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - layer_type=LayerType.PRELU, - parents=input, - num_filters=num_channels, - size=l.config.size) - - -@wrap_name_default() -@layer_support(ERROR_CLIPPING, DROPOUT) -@wrap_act_default(act=LinearActivation()) -def gated_unit_layer(input, - size, - act=None, - name=None, - gate_attr=None, - gate_param_attr=None, - gate_bias_attr=True, - inproj_attr=None, - inproj_param_attr=None, - inproj_bias_attr=True, - layer_attr=None): - """ - The gated unit layer implements a simple gating mechanism over the input. - The input :math:`X` is first projected into a new space :math:`X'`, and - it is also used to produce a gate weight :math:`\sigma`. Element-wise - product between :math:`X'` and :math:`\sigma` is finally returned. - - Reference: - `Language Modeling with Gated Convolutional Networks - `_ - - .. math:: - y=\\text{act}(X \cdot W + b)\otimes \sigma(X \cdot V + c) - - The example usage is: - - .. code-block:: python - gated_unit = gated_unit_layer(size=128, input=input_layer)) - - :param input: The input of this layer. - :type input: LayerOutput - :param size: The dimension of this layer's output. - :type size: int - :param act: Activation type of the projection. LinearActivation is the default - activation. - :type act: BaseActivation - :param name: The name of this layer. It is optional. - :type name: basestring - :param gate_attr: The extra layer attribute of the gate. See ExtraLayerAttribute for - details. - :type gate_attr: ExtraLayerAttribute | None - :param gate_param_attr: The parameter attribute of the gate. See ParameterAttribute - for details. - :type gate_param_attr: ParameterAttribute - :param gate_bias_attr: The bias attribute of the gate. If this parameter is set to False or - an object whose type is not ParameterAttribute, no bias is defined. - If this parameter is set to True, the bias is initialized to zero. - :type gate_bias_attr: ParameterAttribute | bool | None | Any - :param inproj_attr: Extra layer attributes of the projection. See ExtraLayerAttribute for - details. - :type inproj_attr: ExtraLayerAttribute | None - :param inproj_param_attr: The parameter attribute of the projection. See ParameterAttribute - for details. - :type inproj_param_attr: ParameterAttribute - :param inproj_bias_attr: The bias attribute of the projection. If this parameter is set to False - or an object whose type is not ParameterAttribute, no bias is defined. - If this parameter is set to True, the bias is initialized to zero. - :type inproj_bias_attr: ParameterAttribute | bool | None | Any - :param layer_attr: Extra layer attribute of the product. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute | None - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert isinstance( - input, LayerOutput), 'The gated linear unit accepts only one input.' - - input_proj = fc_layer( - input=input, - name="%s_input_proj" % name, - size=size, - act=act, - layer_attr=inproj_attr, - param_attr=inproj_param_attr, - bias_attr=inproj_bias_attr) - - gate = fc_layer( - size=size, - name="%s_gate" % name, - act=SigmoidActivation(), - input=input, - layer_attr=gate_attr, - param_attr=gate_param_attr, - bias_attr=gate_bias_attr) - return mixed_layer( - name="%s_gated_act" % name, - input=dotmul_operator(input_proj, gate), - layer_attr=layer_attr) - - -@layer_support() -@wrap_name_default('switch_order') -def switch_order_layer(input, - name=None, - reshape_axis=None, - act=None, - layer_attr=None): - """ - This layer switch dimension order of image input. - From order "batchSize, channels, height, width" - to order "batchSize, height, width, channels". - - The example usage is: - - .. code-block:: python - reshape_axis = 3 - switch = switch_order(input=layer, name='switch', reshape_axis=reshape_axis) - reshape = {'height':[ 0, 1, 2], 'width':[3]} - - :param input: The input of this layer. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :param reshape_axis: Specify the axises of 'height'. Its value should be positive and less than 4. - :type reshape_axis: int - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput) - assert reshape_axis != None and (reshape_axis > 0 and reshape_axis < 4) - height = [ele for ele in xrange(reshape_axis)] - width = [ele for ele in range(reshape_axis, 4)] - reshape = {'height': height, 'width': width} - - l = Layer( - name=name, - inputs=input.name, - reshape=reshape, - type=LayerType.SWITCH_ORDER_LAYER, - active_type=act.name, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - layer_type=LayerType.SWITCH_ORDER_LAYER, - activation=act, - parents=input, - size=l.config.size) - - -@wrap_name_default() -@layer_support() -def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): - """ - This layer crops images according to the offset and shape. Users can set - the crop shape through the argument 'shape' explicitly or by specifying a - reference input layer. - - The example usage is: - - .. code-block:: python - crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3]) - - :param input: The input of this layer. If two inputs are given, the second one - will be regarded as the reference. - And the input must be 4-dims and in NCHW order. - :type input: LayerOutput | Sequence - :param offset: The crop offset. - :type offset: Sequence - :param axis: The start axis to be cropped. For image input layer: - - 0: batch size - - 1: channels - - 2: height - - 3: width - :type axis: int - :param shape: The shape to be cropped to. Default is None. - :type shape: Sequence | None - :param name: The name of this layer. It is optional. - :type name: basestring - :return: LayerOutput object. - :rtype: LayerOutput - """ - if isinstance(input, LayerOutput): - input = [input] - else: - assert isinstance(input, collections.Sequence) - l = Layer( - inputs=[x.name for x in input], - axis=axis, - offset=offset, - shape=shape, - name=name, - type=LayerType.CROP_LAYER, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name=name, - layer_type=LayerType.CROP_LAYER, - parents=input, - size=l.config.size) - - -@wrap_name_default() -@layer_support() -def sub_nested_seq_layer(input, selected_indices, name=None): - """ - The sub_nested_seq_layer accepts two inputs: the first one is a nested - sequence; the second one is a set of selceted indices in the nested sequence. - - Then sub_nest_seq_layer trims the first nested sequence input according - to the selected indices to form a new output. This layer is useful in - beam training. - - The example usage is: - - .. code-block:: python - - sub_nest_seq = sub_nested_seq_layer(input=data, selected_indices=selected_ids) - - - :param input: The input of this layer. It is a nested sequence. - :type input: LayerOutput - :param selected_indices: A set of sequence indices in the nested sequence. - :type input: LayerOutput - :param name: The name of this layer. It is optional. - :type name: basestring - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert isinstance(input, LayerOutput), ( - 'The first input of ' - 'sub_nested_seq_layer must be a Paddle layer.') - assert isinstance(selected_indices, LayerOutput), ( - 'The second input of ' - 'sub_nested_seq_layer must be a Paddle layer.') - - l = Layer( - inputs=input.name, - selected_indices=selected_indices.name, - name=name, - type=LayerType.SUB_NESTED_SEQ) - return LayerOutput( - name=name, - layer_type=LayerType.SUB_NESTED_SEQ, - parents=input, - size=l.config.size) - - -@wrap_name_default("clip") -def clip_layer(input, min, max, name=None): - """ - A layer for clipping the input value by the threshold. - - .. math:: - - out[i] = \min (\max (in[i],p_{1} ),p_{2} ) - - .. code-block:: python - - clip = clip_layer(input=input_layer, min=-10, max=10) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput. - :param min: The lower threshold for clipping. - :type min: float - :param max: The upper threshold for clipping. - :type max: float - :return: LayerOutput object. - :rtype: LayerOutput - """ - Layer( - name=name, - type=LayerType.CLIP_LAYER, - inputs=[input.name], - min=min, - max=max) - return LayerOutput( - name, LayerType.CLIP_LAYER, parents=[input], size=input.size) - - -@wrap_name_default() -def seq_slice_layer(input, starts, ends, name=None): - """ - seq_slice_layer will return one or several sub-sequences from the - input sequence layer given start and end indices. - - - If only start indices are given, and end indices are set to None, - this layer slices the input sequence from the given start indices - to its end. - - If only end indices are given, and start indices are set to None, - this layer slices the input sequence from its beginning to the - given end indices. - - If start and end indices are both given, they should have the same - number of elements. - - If start or end indices contains more than one elements, the input sequence - will be sliced for multiple times. - - - .. code-block:: python - - seq_silce = seq_slice_layer(input=input_seq, - starts=start_pos, ends=end_pos) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer, which should be a sequence. - :type input: LayerOutput - :param starts: The start indices to slice the input sequence. - :type starts: LayerOutput | None - :param ends: The end indices to slice the input sequence. - :type ends: LayerOutput | None - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert isinstance(input, LayerOutput), ( - 'The first input of seq_slice layer must be a PaddlePaddle layer.') - - if starts is not None: - assert isinstance(starts, LayerOutput), ( - 'The start indices for seq_slice layer ' - 'must be a PaddlePaddle layer.') - if ends is not None: - assert isinstance(ends, LayerOutput), ( - 'The end indices for seq_slice layer must be a PaddlePaddle layer.') - assert starts is not None or ends is not None, ( - 'start and end indices ' - 'cannot be set to None at the same time, at least one of ' - 'them should be given.') - if starts is not None and ends is not None: - assert starts.size == ends.size, ( - 'If start and end indices are both given to seq_slice_layer, ' - 'they should have the same width.') - - Layer( - name=name, - type=LayerType.SEQ_SLICE, - inputs=input.name, - starts=starts.name if starts is not None else None, - ends=ends.name if ends is not None else None) - return LayerOutput( - name, LayerType.SEQ_SLICE, parents=[input], size=input.size) - - -@wrap_name_default() -@layer_support() -def kmax_seq_score_layer(input, name=None, beam_size=1): - """ - This layer accepts one input which is scores over a sequence or a nested - sequence, and returns indices of beam_size sequences with highest scores. - - .. code-block:: python - - kmax_indices = kmax_seq_score_layer(input=input_layer, beam_size) - - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. It stores scores over a sequence or - a nested sequence and its size must be 1. - :type input: LayerOutput - :param beam_size: The indices of the sequences with top beam_size scores are returned. - :type beam_size: int - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput), ("kmax_seq_score_layer " - "accepts only one input.") - assert input.size == 1, ( - "input of kmax_seq_score_layer is a score " - "over a sequence or a nested sequence, so its width must be 1.") - - Layer( - name=name, - type=LayerType.KMAX_SEQ_SCORE, - inputs=[input.name], - beam_size=beam_size) - - return LayerOutput( - name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size) - - -@wrap_name_default("conv3d") -@wrap_param_attr_default() -@wrap_bias_attr_default() -@wrap_act_default(act=ReluActivation()) -@layer_support(DROPOUT) -def img_conv3d_layer(input, - filter_size, - num_filters, - name=None, - num_channels=None, - act=None, - groups=1, - stride=1, - padding=0, - bias_attr=None, - param_attr=None, - shared_biases=True, - layer_attr=None, - trans=False, - layer_type=None): - """ - - The example usage is: - - .. code-block:: python - - conv = img_conv3d_layer(input=data, filter_size=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=False, - act=ReluActivation()) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param filter_size: The dimensions of the filter kernel along three axises. If the parameter - is set to one integer, the three dimensions will be same. - :type filter_size: int | tuple | list - :param num_filters: The number of filters. It is as same as the output image channel. - :type num_filters: int - :param act: Activation type. ReluActivation is the default activation. - :type act: BaseActivation - :param groups: The number of the filter groups. - :type groups: int - :param stride: The strides of the convolution along three axises. If the parameter - is set to one integer, the three strides will be same. - :type stride: int | tuple | list - :param padding: The numbers of padding along three axises. If the parameter is set to - one integer, they will be same. - :type padding: int | tuple | list - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :param num_channels: The number of input channels. If the parameter is not set or - set to None, its actual value will be automatically set to - the channels number of the input. - :type num_channels: int - :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param shared_biases: Whether biases will be shared between filters or not. - :type shared_biases: bool - :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for - details. - :type layer_attr: ExtraLayerAttribute - :param trans: True if it is a convTransLayer, False if it is a convLayer - :type trans: bool - :param layer_type: Specify the layer type. If the parameter is set, it must be "deconv3d" - when trans=True. If not set, it will be automatically set to "deconv3d" - when trans=True and "conv3d" when trans=False. - :type layer_type: basestring - :return: LayerOutput object. - :rtype: LayerOutput - """ - if num_channels is None: - assert input.num_filters is not None - num_channels = input.num_filters - - if isinstance(filter_size, collections.Sequence): - assert len(filter_size) == 3 - filter_size, filter_size_y, filter_size_z = filter_size - else: - filter_size_y = filter_size - filter_size_z = filter_size - - if isinstance(stride, collections.Sequence): - assert len(stride) == 3 - stride, stride_y, stride_z = stride - else: - stride_y = stride - stride_z = stride - - if isinstance(padding, collections.Sequence): - assert len(padding) == 3 - padding, padding_y, padding_z = padding - else: - padding_y = padding - padding_z = padding - - if param_attr.attr.get('initial_smart'): - # special initial for conv layers. - init_w = (2.0 / (filter_size**2 * num_channels))**0.5 - param_attr.attr["initial_mean"] = 0.0 - param_attr.attr["initial_std"] = init_w - param_attr.attr["initial_strategy"] = 0 - param_attr.attr["initial_smart"] = False - - if layer_type: - if trans: - assert layer_type in ["deconv3d"] - lt = layer_type - else: - lt = LayerType.DECONV3D_LAYER if trans else LayerType.CONV3D_LAYER - - l = Layer( - name=name, - inputs=Input( - input.name, - conv=Conv3D( - filter_size=filter_size, - padding=padding, - stride=stride, - channels=num_channels, - groups=groups, - filter_size_y=filter_size_y, - padding_y=padding_y, - stride_y=stride_y, - filter_size_z=filter_size_z, - padding_z=padding_z, - stride_z=stride_z), - **param_attr.attr), - active_type=act.name, - num_filters=num_filters, - bias=ParamAttr.to_bias(bias_attr), - shared_biases=shared_biases, - type=lt, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, - lt, - parents=[input], - activation=act, - num_filters=num_filters, - size=l.config.size) - - -@wrap_name_default("scale_shift") -@wrap_param_attr_default() -@wrap_bias_attr_default() -def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): - """ - A layer applies a linear transformation to each element in each row of - the input matrix. For each element, the layer first re-scales it and then - adds a bias to it. - - This layer is very like the SlopeInterceptLayer, except the scale and - bias are trainable. - - .. math:: - - y = w * x + b - - .. code-block:: python - - scale_shift = scale_shift_layer(input=input_layer, bias_attr=False) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer. - :type input: LayerOutput - :param param_attr: The parameter attribute of scaling. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :return: LayerOutput object. - :rtype: LayerOutput - """ - Layer( - name=name, - type=LayerType.SCALE_SHIFT_LAYER, - inputs=Input(input.name, **param_attr.attr), - bias=ParamAttr.to_bias(bias_attr)) - return LayerOutput( - name, LayerType.SCALE_SHIFT_LAYER, parents=[input], size=input.size) - - -@wrap_name_default("resize") -def resize_layer(input, size, name=None): - """ - The resize layer resizes the input matrix with a shape of [Height, Width] - into the output matrix with a shape of [Height x Width / size, size], - where size is the parameter of this layer indicating the output dimension. - - :param input: The input of this layer. - :type input: LayerOutput. - :param name: The name of this layer. It is optional. - :type name: basestring - :param size: The resized output dimension of this layer. - :type size: int - :return: A LayerOutput object. - :rtype: LayerOutput - """ - Layer(name=name, type=LayerType.RESIZE, inputs=Input(input.name), size=size) - return LayerOutput(name, LayerType.RESIZE, parents=[input], size=input.size) - - -@wrap_act_default(act=LinearActivation()) -@wrap_name_default('sub_seq') -def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None): - """ - sub_seq_layer will return sub-sequences from the input sequences. For each - sequence in the input sequence layer, sub_seq_layer will slice it by given - offset and size. Please notice that, number of offset value and size value - both are equal to the number of sequence in the input layer. - - .. code-block:: python - - sub_seq = sub_seq_layer(input=input_seq, offsets=offsets, sizes=sizes) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer, which should be sequence. - :type input: LayerOutput - :param offsets: The offset indices to slice the input sequence, which should - be sequence type. - :type offsets: LayerOutput - :param sizes: The sizes of the sub-sequences, which should be sequence type. - :type sizes: LayerOutput - :param act: Activation type, LinearActivation is the default activation. - :type act: BaseActivation. - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert isinstance(input, LayerOutput), ( - 'The first input of sub_seq_layer layer must be a PaddlePaddle layer.') - assert isinstance(offsets, LayerOutput), ( - 'The offset indices for sub_seq_layer, ' - 'must be a PaddlePaddle layer.') - assert isinstance(sizes, LayerOutput), ( - 'The sizes of sub-sequences, must be a PaddlePaddle layer.') - - Layer( - name=name, - type=LayerType.SUB_SEQ_LAYER, - inputs=[input.name, offsets.name, sizes.name], - active_type=act.name, - bias=ParamAttr.to_bias(bias_attr)) - - return LayerOutput( - name, - LayerType.SUB_SEQ_LAYER, - parents=[input, offsets, sizes], - size=input.size) - - -@wrap_name_default('scale_sub_region') -def scale_sub_region_layer(input, indices, value, name=None): - """ - Given an image or feature map with CHW information, scale_sub_region_layer - can be used to multiply a real value to values of a sub continuous region. - You can provide start and end indices of CHW for each instance. - Please notice that all start indices are counting from 1. - The shape of indices should be [batch_size, 6] and the layout for each row - is [C_Start, C_End, H_Start, H_End, W_Start, W_End]. - - .. code-block:: python - - scale_sub_region = scale_sub_region_layer(input=input, - indices=indices, - value=value) - - :param name: The name of this layer. It is optional. - :type name: basestring - :param input: The input of this layer which should contains CHW information. - :type input: LayerOutput - :param indices: Start index and end index for C H W, the input value should - be a 2-D matrix with shape [batch_size, 6]. - :type indices: LayerOutput. - :param value: value to multiply. - :type value: float - :return: LayerOutput object. - :rtype: LayerOutput - """ - - assert isinstance(input, LayerOutput), ( - 'The first input of scale_sub_region_layer, ' - 'must be a PaddlePaddle layer.') - assert isinstance(indices, LayerOutput), ( - 'The start and end indices for CHW, must be a PaddlePaddle layer.') - assert isinstance(value, float), ( - 'The value to multiply, must be a real value.') - - Layer( - name=name, - type=LayerType.SCALE_SUB_REGION_LAYER, - inputs=[input.name, indices.name], - value=value) - - return LayerOutput( - name, - LayerType.SCALE_SUB_REGION_LAYER, - parents=[input, indices], - num_filters=input.num_filters, - size=input.size) - - -@wrap_name_default() -@wrap_act_default(act=LinearActivation()) -@wrap_param_attr_default() -@layer_support() -def factorization_machine(input, - factor_size, - act=None, - name=None, - param_attr=None, - layer_attr=None): - """ - The Factorization Machine models pairwise feature interactions as inner - product of the learned latent vectors corresponding to each input feature. - The Factorization Machine can effectively capture feature interactions - especially when the input is sparse. - - This implementation only consider the 2-order feature interactions using - Factorization Machine with the formula: - - .. math:: - y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \\rangle x_i x_j - - Note: - X is the input vector with size n. V is the factor matrix. Each row of V - is the latent vector corresponding to each input dimesion. The size of - each latent vector is k. - - For details of Factorization Machine, please refer to the paper: - Factorization machines. - - .. code-block:: python - first_order = paddle.layer.fc(input=input, - size=1, - act=paddle.activation.Linear()) - second_order = paddle.layer.factorization_machine(input=input, - factor_size=10) - fm = paddle.layer.addto(input=[first_order, second_order], - act=paddle.activation.Linear(), - bias_attr=False) - - :param input: The input layer. Supported input types: all input data types - on CPU, and only dense input types on GPU. - :type input: LayerOutput - :param factor_size: The hyperparameter that defines the dimensionality of - the latent vector size. - :type context_len: int - :param act: Activation Type. Default is linear activation. - :type act: BaseActivation - :param param_attr: The parameter attribute. See ParameterAttribute for - details. - :type param_attr: ParameterAttribute - :param layer_attr: Extra Layer config. - :type layer_attr: ExtraLayerAttribute|None - :return: LayerOutput object. - :rtype: LayerOutput - """ - assert isinstance(input, LayerOutput) - assert factor_size > 0, "the factor_size must be greater than 0." - - Layer( - inputs=[Input(input.name, **param_attr.attr)], - name=name, - factor_size=factor_size, - type=LayerType.FACTORIZATION_MACHINE, - active_type=act.name, - **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.FACTORIZATION_MACHINE, input, activation=act, size=1) diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py deleted file mode 100644 index b5cde7bac779ee1d54395b68941df2693e1ed0f5..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/networks.py +++ /dev/null @@ -1,1813 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math - -from activations import LinearActivation, ReluActivation, SoftmaxActivation, \ - IdentityActivation, TanhActivation, SequenceSoftmaxActivation -from attrs import ExtraAttr -from default_decorators import wrap_name_default, wrap_act_default, \ - wrap_param_default, wrap_bias_attr_default, wrap_param_attr_default -from layers import * # There are too many layers used in network, so import * -from poolings import MaxPooling, SumPooling -from paddle.trainer.config_parser import * - -__all__ = [ - 'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool", - "img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg', - 'img_conv_group', 'img_separable_conv', 'vgg_16_network', 'gru_unit', - 'gru_group', 'simple_gru', 'simple_attention', 'dot_product_attention', - 'multi_head_attention', 'simple_gru2', 'bidirectional_gru', - 'text_conv_pool', 'bidirectional_lstm', 'inputs', 'outputs' -] - -###################################################### -# Text CNN # -###################################################### - - -@wrap_name_default("sequence_conv_pooling") -def sequence_conv_pool(input, - context_len, - hidden_size, - name=None, - context_start=None, - pool_type=None, - context_proj_layer_name=None, - context_proj_param_attr=False, - fc_layer_name=None, - fc_param_attr=None, - fc_bias_attr=None, - fc_act=None, - pool_bias_attr=None, - fc_attr=None, - context_attr=None, - pool_attr=None): - """ - Text convolution pooling group. - - Text input => Context Projection => FC Layer => Pooling => Output. - - :param name: group name. - :type name: basestring - :param input: input layer. - :type input: LayerOutput - :param context_len: context projection length. See - context_projection's document. - :type context_len: int - :param hidden_size: FC Layer size. - :type hidden_size: int - :param context_start: context start position. See - context_projection's context_start. - :type context_start: int|None - :param pool_type: pooling layer type. See pooling_layer's document. - :type pool_type: BasePoolingType - :param context_proj_layer_name: context projection layer name. - None if user don't care. - :type context_proj_layer_name: basestring - :param context_proj_param_attr: padding parameter attribute of context projection layer. - If false, it means padding always be zero. - :type context_proj_param_attr: ParameterAttribute|None - :param fc_layer_name: fc layer name. None if user don't care. - :type fc_layer_name: basestring - :param fc_param_attr: fc layer parameter attribute. None if user don't care. - :type fc_param_attr: ParameterAttribute|None - :param fc_bias_attr: fc bias parameter attribute. False if no bias, - None if user don't care. - :type fc_bias_attr: ParameterAttribute|False|None - :param fc_act: fc layer activation type. None means tanh. - :type fc_act: BaseActivation - :param pool_bias_attr: pooling layer bias attr. False if no bias. - None if user don't care. - :type pool_bias_attr: ParameterAttribute|False|None - :param fc_attr: fc layer extra attribute. - :type fc_attr: ExtraLayerAttribute - :param context_attr: context projection layer extra attribute. - :type context_attr: ExtraLayerAttribute - :param pool_attr: pooling layer extra attribute. - :type pool_attr: ExtraLayerAttribute - :return: layer's output. - :rtype: LayerOutput - """ - # Set Default Value to param - context_proj_layer_name = "%s_conv_proj" % name \ - if context_proj_layer_name is None else context_proj_layer_name - - with mixed_layer( - name=context_proj_layer_name, - size=input.size * context_len, - act=LinearActivation(), - layer_attr=context_attr) as m: - m += context_projection( - input, - context_len=context_len, - context_start=context_start, - padding_attr=context_proj_param_attr) - - fc_layer_name = "%s_conv_fc" % name \ - if fc_layer_name is None else fc_layer_name - fl = fc_layer( - name=fc_layer_name, - input=m, - size=hidden_size, - act=fc_act, - layer_attr=fc_attr, - param_attr=fc_param_attr, - bias_attr=fc_bias_attr) - - return pooling_layer( - name=name, - input=fl, - pooling_type=pool_type, - bias_attr=pool_bias_attr, - layer_attr=pool_attr) - - -text_conv_pool = sequence_conv_pool - -############################################################################ -# Images # -############################################################################ - - -@wrap_name_default("conv_pool") -def simple_img_conv_pool(input, - filter_size, - num_filters, - pool_size, - name=None, - pool_type=None, - act=None, - groups=1, - conv_stride=1, - conv_padding=0, - bias_attr=None, - num_channel=None, - param_attr=None, - shared_bias=True, - conv_layer_attr=None, - pool_stride=1, - pool_padding=0, - pool_layer_attr=None): - """ - Simple image convolution and pooling group. - - Img input => Conv => Pooling => Output. - - :param name: group name. - :type name: basestring - :param input: input layer. - :type input: LayerOutput - :param filter_size: see img_conv_layer for details. - :type filter_size: int - :param num_filters: see img_conv_layer for details. - :type num_filters: int - :param pool_size: see img_pool_layer for details. - :type pool_size: int - :param pool_type: see img_pool_layer for details. - :type pool_type: BasePoolingType - :param act: see img_conv_layer for details. - :type act: BaseActivation - :param groups: see img_conv_layer for details. - :type groups: int - :param conv_stride: see img_conv_layer for details. - :type conv_stride: int - :param conv_padding: see img_conv_layer for details. - :type conv_padding: int - :param bias_attr: see img_conv_layer for details. - :type bias_attr: ParameterAttribute - :param num_channel: see img_conv_layer for details. - :type num_channel: int - :param param_attr: see img_conv_layer for details. - :type param_attr: ParameterAttribute - :param shared_bias: see img_conv_layer for details. - :type shared_bias: bool - :param conv_layer_attr: see img_conv_layer for details. - :type conv_layer_attr: ExtraLayerAttribute - :param pool_stride: see img_pool_layer for details. - :type pool_stride: int - :param pool_padding: see img_pool_layer for details. - :type pool_padding: int - :param pool_layer_attr: see img_pool_layer for details. - :type pool_layer_attr: ExtraLayerAttribute - :return: layer's output - :rtype: LayerOutput - """ - _conv_ = img_conv_layer( - name="%s_conv" % name, - input=input, - filter_size=filter_size, - num_filters=num_filters, - num_channels=num_channel, - act=act, - groups=groups, - stride=conv_stride, - padding=conv_padding, - bias_attr=bias_attr, - param_attr=param_attr, - shared_biases=shared_bias, - layer_attr=conv_layer_attr) - return img_pool_layer( - name="%s_pool" % name, - input=_conv_, - pool_size=pool_size, - pool_type=pool_type, - stride=pool_stride, - padding=pool_padding, - layer_attr=pool_layer_attr) - - -@wrap_name_default("conv_bn_pool") -def img_conv_bn_pool(input, - filter_size, - num_filters, - pool_size, - name=None, - pool_type=None, - act=None, - groups=1, - conv_stride=1, - conv_padding=0, - conv_bias_attr=None, - num_channel=None, - conv_param_attr=None, - shared_bias=True, - conv_layer_attr=None, - bn_param_attr=None, - bn_bias_attr=None, - bn_layer_attr=None, - pool_stride=1, - pool_padding=0, - pool_layer_attr=None): - """ - Convolution, batch normalization, pooling group. - - Img input => Conv => BN => Pooling => Output. - - :param name: group name. - :type name: basestring - :param input: input layer. - :type input: LayerOutput - :param filter_size: see img_conv_layer for details. - :type filter_size: int - :param num_filters: see img_conv_layer for details. - :type num_filters: int - :param pool_size: see img_pool_layer for details. - :type pool_size: int - :param pool_type: see img_pool_layer for details. - :type pool_type: BasePoolingType - :param act: see batch_norm_layer for details. - :type act: BaseActivation - :param groups: see img_conv_layer for details. - :type groups: int - :param conv_stride: see img_conv_layer for details. - :type conv_stride: int - :param conv_padding: see img_conv_layer for details. - :type conv_padding: int - :param conv_bias_attr: see img_conv_layer for details. - :type conv_bias_attr: ParameterAttribute - :param num_channel: see img_conv_layer for details. - :type num_channel: int - :param conv_param_attr: see img_conv_layer for details. - :type conv_param_attr: ParameterAttribute - :param shared_bias: see img_conv_layer for details. - :type shared_bias: bool - :param conv_layer_attr: see img_conv_layer for details. - :type conv_layer_attr: ExtraLayerOutput - :param bn_param_attr: see batch_norm_layer for details. - :type bn_param_attr: ParameterAttribute - :param bn_bias_attr: see batch_norm_layer for details. - :type bn_bias_attr: ParameterAttribute - :param bn_layer_attr: see batch_norm_layer for details. - :type bn_layer_attr: ExtraLayerAttribute - :param pool_stride: see img_pool_layer for details. - :type pool_stride: int - :param pool_padding: see img_pool_layer for details. - :type pool_padding: int - :param pool_layer_attr: see img_pool_layer for details. - :type pool_layer_attr: ExtraLayerAttribute - :return: layer's output - :rtype: LayerOutput - """ - __conv__ = img_conv_layer( - name="%s_conv" % name, - input=input, - filter_size=filter_size, - num_filters=num_filters, - num_channels=num_channel, - act=LinearActivation(), - groups=groups, - stride=conv_stride, - padding=conv_padding, - bias_attr=conv_bias_attr, - param_attr=conv_param_attr, - shared_biases=shared_bias, - layer_attr=conv_layer_attr) - __bn__ = batch_norm_layer( - name="%s_bn" % name, - input=__conv__, - act=act, - bias_attr=bn_bias_attr, - param_attr=bn_param_attr, - layer_attr=bn_layer_attr) - return img_pool_layer( - name="%s_pool" % name, - input=__bn__, - pool_type=pool_type, - pool_size=pool_size, - stride=pool_stride, - padding=pool_padding, - layer_attr=pool_layer_attr) - - -@wrap_act_default(param_names=['conv_act'], act=ReluActivation()) -@wrap_param_default( - param_names=['pool_type'], default_factory=lambda _: MaxPooling()) -def img_conv_group(input, - conv_num_filter, - pool_size, - num_channels=None, - conv_padding=1, - conv_filter_size=3, - conv_act=None, - conv_with_batchnorm=False, - conv_batchnorm_drop_rate=0, - pool_stride=1, - pool_type=None, - param_attr=None): - """ - Image Convolution Group, Used for vgg net. - - :param conv_batchnorm_drop_rate: if conv_with_batchnorm[i] is true, - conv_batchnorm_drop_rate[i] represents the drop rate of each batch norm. - :type conv_batchnorm_drop_rate: list - :param input: input layer. - :type input: LayerOutput - :param conv_num_filter: list of output channels num. - :type conv_num_filter: list|tuple - :param pool_size: pooling filter size. - :type pool_size: int - :param num_channels: input channels num. - :type num_channels: int - :param conv_padding: convolution padding size. - :type conv_padding: int - :param conv_filter_size: convolution filter size. - :type conv_filter_size: int - :param conv_act: activation funciton after convolution. - :type conv_act: BaseActivation - :param conv_with_batchnorm: if conv_with_batchnorm[i] is true, - there is a batch normalization operation after each convolution. - :type conv_with_batchnorm: list - :param pool_stride: pooling stride size. - :type pool_stride: int - :param pool_type: pooling type. - :type pool_type: BasePoolingType - :param param_attr: param attribute of convolution layer, - None means default attribute. - :type param_attr: ParameterAttribute - :return: layer's output - :rtype: LayerOutput - """ - tmp = input - - # Type checks - assert isinstance(tmp, LayerOutput) - assert isinstance(conv_num_filter, list) or isinstance(conv_num_filter, - tuple) - for each_num_filter in conv_num_filter: - assert isinstance(each_num_filter, int) - - assert isinstance(pool_size, int) - - def __extend_list__(obj): - if not hasattr(obj, '__len__'): - return [obj] * len(conv_num_filter) - else: - return obj - - conv_padding = __extend_list__(conv_padding) - conv_filter_size = __extend_list__(conv_filter_size) - conv_act = __extend_list__(conv_act) - conv_with_batchnorm = __extend_list__(conv_with_batchnorm) - conv_batchnorm_drop_rate = __extend_list__(conv_batchnorm_drop_rate) - - for i in xrange(len(conv_num_filter)): - extra_kwargs = dict() - if num_channels is not None: - extra_kwargs['num_channels'] = num_channels - num_channels = None - if conv_with_batchnorm[i]: - extra_kwargs['act'] = LinearActivation() - else: - extra_kwargs['act'] = conv_act[i] - - tmp = img_conv_layer( - input=tmp, - padding=conv_padding[i], - filter_size=conv_filter_size[i], - num_filters=conv_num_filter[i], - param_attr=param_attr, - **extra_kwargs) - - # logger.debug("tmp.num_filters = %d" % tmp.num_filters) - - if conv_with_batchnorm[i]: - dropout = conv_batchnorm_drop_rate[i] - if dropout == 0 or abs(dropout) < 1e-5: # dropout not set - tmp = batch_norm_layer(input=tmp, act=conv_act[i]) - else: - tmp = batch_norm_layer( - input=tmp, - act=conv_act[i], - layer_attr=ExtraAttr(drop_rate=dropout)) - - return img_pool_layer( - input=tmp, stride=pool_stride, pool_size=pool_size, pool_type=pool_type) - - -@wrap_name_default("separable_conv") -def img_separable_conv(input, - num_channels, - num_out_channels, - filter_size, - stride=1, - padding=0, - depth_multiplier=1, - act=None, - bias_attr=None, - param_attr=None, - shared_bias=True, - layer_type='exconv', - name=None): - """ - Separable Convolution. - - The separable convolution module is consisted of a depthwise convolution - that acts separately on input channels, followed by a pointwise convolution - with 1*1 kernels that mixes channels. It is used for Xception: - https://arxiv.org/pdf/1610.02357.pdf - - :param input: input layer. - :type input: LayerOutput - :param num_channels: the number of input channels. - :type num_channels: int - :param num_out_channels: the number of output channels. - :type num_out_channels: int - :param filter_size: the filter size for the depthwise convolution. - :type filter_size: int|tuple - :param stride: the stride size for the depthwise convolution. - :type stride: int|tuple - :param padding: the padding size for the depthwise convolution. - :type padding: int|tuple - :param depth_multiplier: the number of filter for one channel in the - depthwize convolution. - :type depth_multiplier: int - :param act: the activation function for the output. - :type act: BaseActivation - :param bias_attr: see img_conv_layer for details. - :type bias_attr: ParameterAttribute - :param param_attr: see img_conv_layer for details. - :type param_attr: ParameterAttribute - :param shared_bias: see img_conv_layer for details. - :type shared_bias: bool - :param layer_type: see img_conv_layer for details. - :type layer_type: bool - :return: layer's output - :rtype: LayerOutput - """ - __depthwise_conv__ = img_conv_layer( - name="%s_depthwise_conv" % name, - input=input, - num_channels=num_channels, - num_filters=num_channels * depth_multiplier, - groups=num_channels, - filter_size=filter_size, - stride=stride, - padding=padding, - act=LinearActivation(), - bias_attr=bias_attr, - param_attr=param_attr, - shared_biases=shared_bias, - layer_type=layer_type) - __pointwise_conv__ = img_conv_layer( - name="%s_pointwise_conv" % name, - input=__depthwise_conv__, - num_channels=num_channels * depth_multiplier, - num_filters=num_out_channels, - filter_size=1, - stride=1, - padding=0, - act=act, - bias_attr=bias_attr, - param_attr=param_attr, - shared_biases=shared_bias) - return __pointwise_conv__ - - -def small_vgg(input_image, num_channels, num_classes): - def __vgg__(ipt, num_filter, times, dropouts, num_channels_=None): - return img_conv_group( - input=ipt, - num_channels=num_channels_, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * times, - conv_filter_size=3, - conv_act=ReluActivation(), - conv_with_batchnorm=True, - conv_batchnorm_drop_rate=dropouts, - pool_type=MaxPooling()) - - tmp = __vgg__(input_image, 64, 2, [0.3, 0], num_channels) - tmp = __vgg__(tmp, 128, 2, [0.4, 0]) - tmp = __vgg__(tmp, 256, 3, [0.4, 0.4, 0]) - tmp = __vgg__(tmp, 512, 3, [0.4, 0.4, 0]) - tmp = img_pool_layer( - input=tmp, stride=2, pool_size=2, pool_type=MaxPooling()) - tmp = dropout_layer(input=tmp, dropout_rate=0.5) - tmp = fc_layer( - input=tmp, - size=512, - layer_attr=ExtraAttr(drop_rate=0.5), - act=LinearActivation()) - tmp = batch_norm_layer(input=tmp, act=ReluActivation()) - return fc_layer(input=tmp, size=num_classes, act=SoftmaxActivation()) - - -def vgg_16_network(input_image, num_channels, num_classes=1000): - """ - Same model from https://gist.github.com/ksimonyan/211839e770f7b538e2d8 - - :param num_classes: number of class. - :type num_classes: int - :param input_image: input layer. - :type input_image: LayerOutput - :param num_channels: input channels num. - :type num_channels: int - :return: layer's output - :rtype: LayerOutput - """ - - tmp = img_conv_group( - input=input_image, - num_channels=num_channels, - conv_padding=1, - conv_num_filter=[64, 64], - conv_filter_size=3, - conv_act=ReluActivation(), - pool_size=2, - pool_stride=2, - pool_type=MaxPooling()) - - tmp = img_conv_group( - input=tmp, - conv_num_filter=[128, 128], - conv_padding=1, - conv_filter_size=3, - conv_act=ReluActivation(), - pool_stride=2, - pool_type=MaxPooling(), - pool_size=2) - - tmp = img_conv_group( - input=tmp, - conv_num_filter=[256, 256, 256], - conv_padding=1, - conv_filter_size=3, - conv_act=ReluActivation(), - pool_stride=2, - pool_type=MaxPooling(), - pool_size=2) - - tmp = img_conv_group( - input=tmp, - conv_num_filter=[512, 512, 512], - conv_padding=1, - conv_filter_size=3, - conv_act=ReluActivation(), - pool_stride=2, - pool_type=MaxPooling(), - pool_size=2) - tmp = img_conv_group( - input=tmp, - conv_num_filter=[512, 512, 512], - conv_padding=1, - conv_filter_size=3, - conv_act=ReluActivation(), - pool_stride=2, - pool_type=MaxPooling(), - pool_size=2) - - tmp = fc_layer( - input=tmp, - size=4096, - act=ReluActivation(), - layer_attr=ExtraAttr(drop_rate=0.5)) - - tmp = fc_layer( - input=tmp, - size=4096, - act=ReluActivation(), - layer_attr=ExtraAttr(drop_rate=0.5)) - - return fc_layer(input=tmp, size=num_classes, act=SoftmaxActivation()) - - -############################################################################ -# Recurrent # -############################################################################ - - -@wrap_name_default("lstm") -def simple_lstm(input, - size, - name=None, - reverse=False, - mat_param_attr=None, - bias_param_attr=None, - inner_param_attr=None, - act=None, - gate_act=None, - state_act=None, - mixed_layer_attr=None, - lstm_cell_attr=None): - """ - Simple LSTM Cell. - - It just combines a mixed layer with fully_matrix_projection and a lstmemory - layer. The simple lstm cell was implemented with follow equations. - - .. math:: - - i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i) - - f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f) - - c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c) - - o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o) - - h_t & = o_t tanh(c_t) - - Please refer to **Generating Sequences With Recurrent Neural Networks** for more - details about lstm. Link_ is here. - - .. _Link: http://arxiv.org/abs/1308.0850 - - :param name: lstm layer name. - :type name: basestring - :param input: layer's input. - :type input: LayerOutput - :param size: lstm layer size. - :type size: int - :param reverse: process the input in a reverse order or not. - :type reverse: bool - :param mat_param_attr: parameter attribute of matrix projection in mixed layer. - :type mat_param_attr: ParameterAttribute - :param bias_param_attr: bias parameter attribute. False means no bias, None - means default bias. - :type bias_param_attr: ParameterAttribute|False - :param inner_param_attr: parameter attribute of lstm cell. - :type inner_param_attr: ParameterAttribute - :param act: last activiation type of lstm. - :type act: BaseActivation - :param gate_act: gate activiation type of lstm. - :type gate_act: BaseActivation - :param state_act: state activiation type of lstm. - :type state_act: BaseActivation - :param mixed_layer_attr: extra attribute of mixed layer. - :type mixed_layer_attr: ExtraLayerAttribute - :param lstm_cell_attr: extra attribute of lstm. - :type lstm_cell_attr: ExtraLayerAttribute - :return: layer's output. - :rtype: LayerOutput - """ - fc_name = 'lstm_transform_%s' % name - with mixed_layer( - name=fc_name, - size=size * 4, - act=IdentityActivation(), - layer_attr=mixed_layer_attr, - bias_attr=False) as m: - m += full_matrix_projection(input, param_attr=mat_param_attr) - - return lstmemory( - name=name, - input=m, - reverse=reverse, - bias_attr=bias_param_attr, - param_attr=inner_param_attr, - act=act, - gate_act=gate_act, - state_act=state_act, - layer_attr=lstm_cell_attr) - - -@wrap_name_default('lstm_unit') -def lstmemory_unit(input, - out_memory=None, - name=None, - size=None, - param_attr=None, - act=None, - gate_act=None, - state_act=None, - input_proj_bias_attr=None, - input_proj_layer_attr=None, - lstm_bias_attr=None, - lstm_layer_attr=None): - """ - lstmemory_unit defines the caculation process of a LSTM unit during a - single time step. This function is not a recurrent layer, so it can not be - directly used to process sequence input. This function is always used in - recurrent_group (see layers.py for more details) to implement attention - mechanism. - - Please refer to **Generating Sequences With Recurrent Neural Networks** - for more details about LSTM. The link goes as follows: - .. _Link: https://arxiv.org/abs/1308.0850 - - .. math:: - - i_t & = \\sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i) - - f_t & = \\sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f) - - c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c) - - o_t & = \\sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o) - - h_t & = o_t tanh(c_t) - - The example usage is: - - .. code-block:: python - - lstm_step = lstmemory_unit(input=[layer1], - size=256, - act=TanhActivation(), - gate_act=SigmoidActivation(), - state_act=TanhActivation()) - - - :param input: Input layer. - :type input: LayerOutput - :param out_memory: The output of previous time step. - :type out_memory: LayerOutput | None - :param name: The lstmemory unit name. - :type name: basestring - :param size: The lstmemory unit size. - :type size: int - :param param_attr: The parameter attribute for the weights in - input to hidden projection. - None means default attribute. - :type param_attr: ParameterAttribute - :param act: The last activiation type of lstm. - :type act: BaseActivation - :param gate_act: The gate activiation type of lstm. - :type gate_act: BaseActivation - :param state_act: The state activiation type of lstm. - :type state_act: BaseActivation - :param input_proj_bias_attr: The parameter attribute for the bias in - input to hidden projection. - False or None means no bias. - If this parameter is set to True, - the bias is initialized to zero. - :type input_proj_bias_attr: ParameterAttribute|bool|None - :param input_proj_layer_attr: The extra layer attribute for - input to hidden projection of the LSTM unit, - such as dropout, error clipping. - :type input_proj_layer_attr: ExtraLayerAttribute - :param lstm_bias_attr: The parameter attribute for the bias in lstm layer. - False or None means no bias. - If this parameter is set to True, - the bias is initialized to zero. - :type lstm_bias_attr: ParameterAttribute|True|None - :param lstm_layer_attr: The extra attribute of lstm layer. - :type lstm_layer_attr: ExtraLayerAttribute - :return: The lstmemory unit name. - :rtype: LayerOutput - """ - if size is None: - assert input.size % 4 == 0 - size = input.size / 4 - if out_memory is None: - out_mem = memory(name=name, size=size) - else: - out_mem = out_memory - - state_mem = memory(name="%s_state" % name, size=size) - - with mixed_layer( - name="%s_input_recurrent" % name, - size=size * 4, - bias_attr=input_proj_bias_attr, - layer_attr=input_proj_layer_attr, - act=IdentityActivation()) as m: - m += identity_projection(input=input) - m += full_matrix_projection(input=out_mem, param_attr=param_attr) - - lstm_out = lstm_step_layer( - name=name, - input=m, - state=state_mem, - size=size, - bias_attr=lstm_bias_attr, - act=act, - gate_act=gate_act, - state_act=state_act, - layer_attr=lstm_layer_attr) - get_output_layer(name='%s_state' % name, input=lstm_out, arg_name='state') - - return lstm_out - - -@wrap_name_default('lstm_group') -def lstmemory_group(input, - size=None, - name=None, - out_memory=None, - reverse=False, - param_attr=None, - act=None, - gate_act=None, - state_act=None, - input_proj_bias_attr=None, - input_proj_layer_attr=None, - lstm_bias_attr=None, - lstm_layer_attr=None): - """ - lstm_group is a recurrent_group version of Long Short Term Memory. It - does exactly the same calculation as the lstmemory layer (see lstmemory in - layers.py for the maths) does. A promising benefit is that LSTM memory - cell states(or hidden states) in every time step are accessible to the - user. This is especially useful in attention model. If you do not need to - access the internal states of the lstm and merely use its outputs, - it is recommended to use the lstmemory, which is relatively faster than - lstmemory_group. - - NOTE: In PaddlePaddle's implementation, the following input-to-hidden - multiplications: - :math:`W_{x_i}x_{t}` , :math:`W_{x_f}x_{t}`, - :math:`W_{x_c}x_t`, :math:`W_{x_o}x_{t}` are not done in lstmemory_unit to - speed up the calculations. Consequently, an additional mixed_layer with - full_matrix_projection must be included before lstmemory_unit is called. - - The example usage is: - - .. code-block:: python - - lstm_step = lstmemory_group(input=[layer1], - size=256, - act=TanhActivation(), - gate_act=SigmoidActivation(), - state_act=TanhActivation()) - - :param input: Input layer. - :type input: LayerOutput - :param size: The lstmemory group size. - :type size: int - :param name: The name of lstmemory group. - :type name: basestring - :param out_memory: The output of previous time step. - :type out_memory: LayerOutput | None - :param reverse: Process the input in a reverse order or not. - :type reverse: bool - :param param_attr: The parameter attribute for the weights in - input to hidden projection. - None means default attribute. - :type param_attr: ParameterAttribute - :param act: The last activiation type of lstm. - :type act: BaseActivation - :param gate_act: The gate activiation type of lstm. - :type gate_act: BaseActivation - :param state_act: The state activiation type of lstm. - :type state_act: BaseActivation - :param input_proj_bias_attr: The parameter attribute for the bias in - input to hidden projection. - False or None means no bias. - If this parameter is set to True, - the bias is initialized to zero. - :type input_proj_bias_attr: ParameterAttribute|bool|None - :param input_proj_layer_attr: The extra layer attribute for - input to hidden projection of the LSTM unit, - such as dropout, error clipping. - :type input_proj_layer_attr: ExtraLayerAttribute - :param lstm_bias_attr: The parameter attribute for the bias in lstm layer. - False or None means no bias. - If this parameter is set to True, - the bias is initialized to zero. - :type lstm_bias_attr: ParameterAttribute|True|None - :param lstm_layer_attr: The extra attribute of lstm layer. - :type lstm_layer_attr: ExtraLayerAttribute - :return: the lstmemory group. - :rtype: LayerOutput - """ - - def __lstm_step__(ipt): - return lstmemory_unit( - input=ipt, - name=name, - size=size, - act=act, - gate_act=gate_act, - state_act=state_act, - out_memory=out_memory, - input_proj_bias_attr=input_proj_bias_attr, - input_proj_layer_attr=input_proj_layer_attr, - param_attr=param_attr, - lstm_layer_attr=lstm_layer_attr, - lstm_bias_attr=lstm_bias_attr) - - return recurrent_group( - name='%s_recurrent_group' % name, - step=__lstm_step__, - reverse=reverse, - input=input) - - -@wrap_name_default('gru_unit') -def gru_unit(input, - memory_boot=None, - size=None, - name=None, - gru_bias_attr=None, - gru_param_attr=None, - act=None, - gate_act=None, - gru_layer_attr=None, - naive=False): - """ - gru_unit defines the calculation process of a gated recurrent unit during a single - time step. This function is not a recurrent layer, so it can not be - directly used to process sequence input. This function is always used in - the recurrent_group (see layers.py for more details) to implement attention - mechanism. - - Please see grumemory in layers.py for the details about the maths. - - :param input: input layer. - :type input: LayerOutput - :param memory_boot: the initialization state of the LSTM cell. - :type memory_boot: LayerOutput | None - :param name: name of the gru group. - :type name: basestring - :param size: hidden size of the gru. - :type size: int - :param act: activation type of gru - :type act: BaseActivation - :param gate_act: gate activation type or gru - :type gate_act: BaseActivation - :param gru_layer_attr: Extra attribute of the gru layer. - :type gru_layer_attr: ExtraLayerAttribute - :return: the gru output layer. - :rtype: LayerOutput - """ - - assert input.size % 3 == 0 - if size is None: - size = input.size / 3 - - out_mem = memory(name=name, size=size, boot_layer=memory_boot) - - if naive: - __step__ = gru_step_naive_layer - else: - __step__ = gru_step_layer - - gru_out = __step__( - name=name, - input=input, - output_mem=out_mem, - size=size, - bias_attr=gru_bias_attr, - param_attr=gru_param_attr, - act=act, - gate_act=gate_act, - layer_attr=gru_layer_attr) - return gru_out - - -@wrap_name_default('gru_group') -def gru_group(input, - memory_boot=None, - size=None, - name=None, - reverse=False, - gru_bias_attr=None, - gru_param_attr=None, - act=None, - gate_act=None, - gru_layer_attr=None, - naive=False): - """ - gru_group is a recurrent_group version of Gated Recurrent Unit. It - does exactly the same calculation as the grumemory layer does. A promising - benefit is that gru hidden states are accessible to the user. This is - especially useful in attention model. If you do not need to access - any internal state and merely use the outputs of a GRU, it is recommended - to use the grumemory, which is relatively faster. - - Please see grumemory in layers.py for more detail about the maths. - - The example usage is: - - .. code-block:: python - - gru = gru_group(input=[layer1], - size=256, - act=TanhActivation(), - gate_act=SigmoidActivation()) - - :param input: input layer. - :type input: LayerOutput - :param memory_boot: the initialization state of the LSTM cell. - :type memory_boot: LayerOutput | None - :param name: name of the gru group. - :type name: basestring - :param size: hidden size of the gru. - :type size: int - :param reverse: process the input in a reverse order or not. - :type reverse: bool - :param act: activiation type of gru - :type act: BaseActivation - :param gate_act: gate activiation type of gru - :type gate_act: BaseActivation - :param gru_bias_attr: bias parameter attribute of gru layer, - False means no bias, None means default bias. - :type gru_bias_attr: ParameterAttribute|False|None - :param gru_layer_attr: Extra attribute of the gru layer. - :type gru_layer_attr: ExtraLayerAttribute - :return: the gru group. - :rtype: LayerOutput - """ - - def __gru_step__(ipt): - return gru_unit( - input=ipt, - memory_boot=memory_boot, - name=name, - size=size, - gru_bias_attr=gru_bias_attr, - gru_param_attr=gru_param_attr, - act=act, - gate_act=gate_act, - gru_layer_attr=gru_layer_attr, - naive=naive) - - return recurrent_group( - name='%s_recurrent_group' % name, - step=__gru_step__, - reverse=reverse, - input=input) - - -@wrap_name_default('simple_gru') -def simple_gru(input, - size, - name=None, - reverse=False, - mixed_param_attr=None, - mixed_bias_param_attr=None, - mixed_layer_attr=None, - gru_bias_attr=None, - gru_param_attr=None, - act=None, - gate_act=None, - gru_layer_attr=None, - naive=False): - """ - You may see gru_step_layer, grumemory in layers.py, gru_unit, gru_group, - simple_gru in network.py. The reason why there are so many interfaces is - that we have two ways to implement recurrent neural network. One way is to - use one complete layer to implement rnn (including simple rnn, gru and lstm) - with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But - the multiplication operation :math:`W x_t` is not computed in these layers. - See details in their interfaces in layers.py. - The other implementation is to use an recurrent group which can ensemble a - series of layers to compute rnn step by step. This way is flexible for - attenion mechanism or other complex connections. - - - gru_step_layer: only compute rnn by one step. It needs an memory as input - and can be used in recurrent group. - - gru_unit: a wrapper of gru_step_layer with memory. - - gru_group: a GRU cell implemented by a combination of multiple layers in - recurrent group. - But :math:`W x_t` is not done in group. - - gru_memory: a GRU cell implemented by one layer, which does same calculation - with gru_group and is faster than gru_group. - - simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and - gru_group. :math:`W` contains :math:`W_r`, :math:`W_z` and :math:`W`, see - formula in grumemory. - - The computational speed is that, grumemory is relatively better than - gru_group, and gru_group is relatively better than simple_gru. - - The example usage is: - - .. code-block:: python - - gru = simple_gru(input=[layer1], size=256) - - :param input: input layer. - :type input: LayerOutput - :param name: name of the gru group. - :type name: basestring - :param size: hidden size of the gru. - :type size: int - :param reverse: process the input in a reverse order or not. - :type reverse: bool - :param act: activiation type of gru - :type act: BaseActivation - :param gate_act: gate activiation type of gru - :type gate_act: BaseActivation - :param gru_bias_attr: bias parameter attribute of gru layer, - False means no bias, None means default bias. - :type gru_bias_attr: ParameterAttribute|False|None - :param gru_layer_attr: Extra attribute of the gru layer. - :type gru_layer_attr: ExtraLayerAttribute - :return: the gru group. - :rtype: LayerOutput - """ - with mixed_layer( - name='%s_transform' % name, - size=size * 3, - bias_attr=mixed_bias_param_attr, - layer_attr=mixed_layer_attr) as m: - m += full_matrix_projection(input=input, param_attr=mixed_param_attr) - - return gru_group( - name=name, - size=size, - input=m, - reverse=reverse, - gru_bias_attr=gru_bias_attr, - gru_param_attr=gru_param_attr, - act=act, - gate_act=gate_act, - gru_layer_attr=gru_layer_attr, - naive=naive) - - -@wrap_name_default('simple_gru2') -def simple_gru2(input, - size, - name=None, - reverse=False, - mixed_param_attr=None, - mixed_bias_attr=None, - gru_param_attr=None, - gru_bias_attr=None, - act=None, - gate_act=None, - mixed_layer_attr=None, - gru_cell_attr=None): - """ - simple_gru2 is the same with simple_gru, but using grumemory instead. - Please refer to grumemory in layers.py for more detail about the math. - simple_gru2 is faster than simple_gru. - - The example usage is: - - .. code-block:: python - - gru = simple_gru2(input=[layer1], size=256) - - :param input: input layer. - :type input: LayerOutput - :param name: name of the gru group. - :type name: basestring - :param size: hidden size of the gru. - :type size: int - :param reverse: process the input in a reverse order or not. - :type reverse: bool - :param act: activiation type of gru - :type act: BaseActivation - :param gate_act: gate activiation type of gru - :type gate_act: BaseActivation - :param gru_bias_attr: bias parameter attribute of gru layer, - False means no bias, None means default bias. - :type gru_bias_attr: ParameterAttribute|False|None - :param gru_param_attr: param parameter attribute of gru layer, - None means default param. - :type gru_param_attr: ParameterAttribute|None - :return: the gru group. - :rtype: LayerOutput - """ - with mixed_layer( - name='%s_transform' % name, - size=size * 3, - bias_attr=mixed_bias_attr, - layer_attr=mixed_layer_attr) as m: - m += full_matrix_projection(input=input, param_attr=mixed_param_attr) - - return grumemory( - name=name, - input=m, - reverse=reverse, - bias_attr=gru_bias_attr, - param_attr=gru_param_attr, - act=act, - gate_act=gate_act, - layer_attr=gru_cell_attr) - - -@wrap_name_default("bidirectional_gru") -def bidirectional_gru(input, - size, - name=None, - return_seq=False, - fwd_mixed_param_attr=None, - fwd_mixed_bias_attr=None, - fwd_gru_param_attr=None, - fwd_gru_bias_attr=None, - fwd_act=None, - fwd_gate_act=None, - fwd_mixed_layer_attr=None, - fwd_gru_cell_attr=None, - bwd_mixed_param_attr=None, - bwd_mixed_bias_attr=None, - bwd_gru_param_attr=None, - bwd_gru_bias_attr=None, - bwd_act=None, - bwd_gate_act=None, - bwd_mixed_layer_attr=None, - bwd_gru_cell_attr=None, - last_seq_attr=None, - first_seq_attr=None, - concat_attr=None, - concat_act=None): - """ - A bidirectional_gru is a recurrent unit that iterates over the input - sequence both in forward and backward orders, and then concatenate two - outputs to form a final output. However, concatenation of two outputs - is not the only way to form the final output, you can also, for example, - just add them together. - - The example usage is: - - .. code-block:: python - - bi_gru = bidirectional_gru(input=[input1], size=512) - - :param name: bidirectional gru layer name. - :type name: basestring - :param input: input layer. - :type input: LayerOutput - :param size: gru layer size. - :type size: int - :param return_seq: If set False, the last time step of output are - concatenated and returned. - If set True, the entire output sequences in forward - and backward directions are concatenated and returned. - :type return_seq: bool - :return: LayerOutput object. - :rtype: LayerOutput - """ - args = locals() - - fw = simple_gru2( - name='%s_fw' % name, - input=input, - size=size, - **dict((k[len('fwd_'):], v) for k, v in args.iteritems() - if k.startswith('fwd_'))) - - bw = simple_gru2( - name="%s_bw" % name, - input=input, - size=size, - reverse=True, - **dict((k[len('bwd_'):], v) for k, v in args.iteritems() - if k.startswith('bwd_'))) - - if return_seq: - return concat_layer( - name=name, input=[fw, bw], layer_attr=concat_attr, act=concat_act) - else: - fw_seq = last_seq( - name="%s_fw_last" % name, input=fw, layer_attr=last_seq_attr) - bw_seq = first_seq( - name="%s_bw_last" % name, input=bw, layer_attr=first_seq_attr) - return concat_layer( - name=name, - input=[fw_seq, bw_seq], - layer_attr=concat_attr, - act=concat_act) - - -@wrap_name_default("bidirectional_lstm") -def bidirectional_lstm(input, - size, - name=None, - return_seq=False, - fwd_mat_param_attr=None, - fwd_bias_param_attr=None, - fwd_inner_param_attr=None, - fwd_act=None, - fwd_gate_act=None, - fwd_state_act=None, - fwd_mixed_layer_attr=None, - fwd_lstm_cell_attr=None, - bwd_mat_param_attr=None, - bwd_bias_param_attr=None, - bwd_inner_param_attr=None, - bwd_act=None, - bwd_gate_act=None, - bwd_state_act=None, - bwd_mixed_layer_attr=None, - bwd_lstm_cell_attr=None, - last_seq_attr=None, - first_seq_attr=None, - concat_attr=None, - concat_act=None): - """ - A bidirectional_lstm is a recurrent unit that iterates over the input - sequence both in forward and backward orders, and then concatenate two - outputs to form a final output. However, concatenation of two outputs - is not the only way to form the final output, you can also, for example, - just add them together. - - Please refer to **Neural Machine Translation by Jointly Learning to Align - and Translate** for more details about the bidirectional lstm. - The link goes as follows: - .. _Link: https://arxiv.org/pdf/1409.0473v3.pdf - - The example usage is: - - .. code-block:: python - - bi_lstm = bidirectional_lstm(input=[input1], size=512) - - :param name: bidirectional lstm layer name. - :type name: basestring - :param input: input layer. - :type input: LayerOutput - :param size: lstm layer size. - :type size: int - :param return_seq: If set False, the last time step of output are - concatenated and returned. - If set True, the entire output sequences in forward - and backward directions are concatenated and returned. - :type return_seq: bool - :return: LayerOutput object. - :rtype: LayerOutput - """ - args = locals() - - fw = simple_lstm( - name='%s_fw' % name, - input=input, - size=size, - **dict((k[len('fwd_'):], v) for k, v in args.iteritems() - if k.startswith('fwd_'))) - - bw = simple_lstm( - name="%s_bw" % name, - input=input, - size=size, - reverse=True, - **dict((k[len('bwd_'):], v) for k, v in args.iteritems() - if k.startswith('bwd_'))) - - if return_seq: - return concat_layer( - name=name, input=[fw, bw], layer_attr=concat_attr, act=concat_act) - else: - fw_seq = last_seq( - name="%s_fw_last" % name, input=fw, layer_attr=last_seq_attr) - bw_seq = first_seq( - name="%s_bw_last" % name, input=bw, layer_attr=first_seq_attr) - return concat_layer( - name=name, - input=[fw_seq, bw_seq], - layer_attr=concat_attr, - act=concat_act) - - -@wrap_name_default() -@wrap_act_default(param_names=['weight_act'], act=TanhActivation()) -def simple_attention(encoded_sequence, - encoded_proj, - decoder_state, - transform_param_attr=None, - softmax_param_attr=None, - weight_act=None, - name=None): - """ - Calculate and return a context vector with attention mechanism. - Size of the context vector equals to size of the encoded_sequence. - - .. math:: - - a(s_{i-1},h_{j}) & = v_{a}f(W_{a}s_{t-1} + U_{a}h_{j}) - - e_{i,j} & = a(s_{i-1}, h_{j}) - - a_{i,j} & = \\frac{exp(e_{i,j})}{\\sum_{k=1}^{T_x}{exp(e_{i,k})}} - - c_{i} & = \\sum_{j=1}^{T_{x}}a_{i,j}h_{j} - - where :math:`h_{j}` is the jth element of encoded_sequence, - :math:`U_{a}h_{j}` is the jth element of encoded_proj - :math:`s_{i-1}` is decoder_state - :math:`f` is weight_act, and is set to tanh by default. - - Please refer to **Neural Machine Translation by Jointly Learning to - Align and Translate** for more details. The link is as follows: - https://arxiv.org/abs/1409.0473. - - The example usage is: - - .. code-block:: python - - context = simple_attention(encoded_sequence=enc_seq, - encoded_proj=enc_proj, - decoder_state=decoder_prev,) - - :param name: name of the attention model. - :type name: basestring - :param softmax_param_attr: parameter attribute of sequence softmax - that is used to produce attention weight. - :type softmax_param_attr: ParameterAttribute - :param weight_act: activation of the attention model. - :type weight_act: BaseActivation - :param encoded_sequence: output of the encoder - :type encoded_sequence: LayerOutput - :param encoded_proj: attention weight is computed by a feed forward neural - network which has two inputs : decoder's hidden state - of previous time step and encoder's output. - encoded_proj is output of the feed-forward network for - encoder's output. Here we pre-compute it outside - simple_attention for speed consideration. - :type encoded_proj: LayerOutput - :param decoder_state: hidden state of decoder in previous time step - :type decoder_state: LayerOutput - :param transform_param_attr: parameter attribute of the feed-forward - network that takes decoder_state as inputs to - compute attention weight. - :type transform_param_attr: ParameterAttribute - :return: a context vector - :rtype: LayerOutput - """ - assert encoded_proj.size == decoder_state.size - proj_size = encoded_proj.size - - with mixed_layer(size=proj_size, name="%s_transform" % name) as m: - m += full_matrix_projection( - decoder_state, param_attr=transform_param_attr) - - expanded = expand_layer( - input=m, expand_as=encoded_sequence, name='%s_expand' % name) - - with mixed_layer( - size=proj_size, act=weight_act, name="%s_combine" % name) as m: - m += identity_projection(expanded) - m += identity_projection(encoded_proj) - - # sequence softmax is used to normalize similarities between decoder state - # and encoder outputs into a distribution - attention_weight = fc_layer( - input=m, - size=1, - act=SequenceSoftmaxActivation(), - param_attr=softmax_param_attr, - name="%s_softmax" % name, - bias_attr=False) - - scaled = scaling_layer( - weight=attention_weight, - input=encoded_sequence, - name='%s_scaling' % name) - - return pooling_layer( - input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name) - - -@wrap_name_default() -def dot_product_attention(encoded_sequence, - attended_sequence, - transformed_state, - softmax_param_attr=None, - name=None): - """ - Calculate and return a context vector with dot-product attention mechanism. - The dimension of the context vector equals to that of the attended_sequence. - - .. math:: - - a(s_{i-1},h_{j}) & = s_{i-1}^\mathrm{T} h_{j} - - e_{i,j} & = a(s_{i-1}, h_{j}) - - a_{i,j} & = \\frac{exp(e_{i,j})}{\\sum_{k=1}^{T_x}{exp(e_{i,k})}} - - c_{i} & = \\sum_{j=1}^{T_{x}}a_{i,j}z_{j} - - where :math:`h_{j}` is the jth element of encoded_sequence, - :math:`z_{j}` is the jth element of attended_sequence, - :math:`s_{i-1}` is transformed_state. - - The example usage is: - - .. code-block:: python - - context = dot_product_attention(encoded_sequence=enc_seq, - attended_sequence=att_seq, - transformed_state=state,) - - :param name: A prefix attached to the name of each layer that defined inside - the dot_product_attention. - :type name: basestring - :param softmax_param_attr: The parameter attribute of sequence softmax - that is used to produce attention weight. - :type softmax_param_attr: ParameterAttribute - :param encoded_sequence: The output hidden vectors of the encoder. - :type encoded_sequence: LayerOutput - :param attended_sequence: The attention weight is computed by a feed forward neural - network which has two inputs : decoder's transformed hidden - state of previous time step and encoder's output. - attended_sequence is the sequence to be attended. - :type attended_sequence: LayerOutput - :param transformed_state: The transformed hidden state of decoder in previous time step. - Since the dot-product operation will be performed on it and the - encoded_sequence, their dimensions must be equal. For flexibility, - we suppose transformations of the decoder's hidden state have been - done outside dot_product_attention and no more will be performed - inside. Then users can use either the original or transformed one. - :type transformed_state: LayerOutput - :return: The context vector. - :rtype: LayerOutput - """ - assert transformed_state.size == encoded_sequence.size - - expanded = expand_layer( - input=transformed_state, - expand_as=encoded_sequence, - name='%s_expand' % name) - - m = dot_prod_layer( - input1=expanded, input2=encoded_sequence, name='%s_dot-product' % name) - - attention_weight = fc_layer( - input=m, - size=1, - act=SequenceSoftmaxActivation(), - param_attr=softmax_param_attr, - name="%s_softmax" % name, - bias_attr=False) - - scaled = scaling_layer( - weight=attention_weight, - input=attended_sequence, - name='%s_scaling' % name) - - return pooling_layer( - input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name) - - -@wrap_name_default() -def multi_head_attention(query, - key, - value, - key_proj_size, - value_proj_size, - head_num, - attention_type, - softmax_param_attr=None, - name=None): - """ - Calculate and return a context vector with dot-product attention mechanism. - The dimension of the context vector equals to value_proj_size * head_num. - - Please refer to **Attention Is All You Need** for more details. The link is - as follows: - https://arxiv.org/abs/1706.03762. - - The example usage is: - - .. code-block:: python - - context = multi_head_attention(query=decoder_state, - key=enc_seq, - value=enc_seq, - key_proj_size=64, - value_pro_size=64, - head_num=8, - attention_type='dot-product attention') - - :param name: A prefix attached to the name of each layer that defined inside - the multi_head_attention. - :type name: basestring - :param softmax_param_attr: The parameter attribute of sequence softmax - that is used to produce attention weight. - :type softmax_param_attr: ParameterAttribute - :param query: query is used to calculate attention weights over values at current step. - :type query: LayerOutput - :param key: key is used to calculate the attention weight of the corresponding value. - :type key: LayerOutput - :param value: value is the sequence to be attended. - :type value: LayerOutput - :param key_proj_size: The dimension of the linear projection performed on key and query. - :type key_proj_size: int - :param value_proj_size: The dimension of the linear projection performed on value. - :type value_proj_size: int - :param head_num: The number of attention heads. - :type head_num: int - :param attention_type: The type of the attention mechanism used in each attention - heads. Now, we only support scaled dot-product attention and - additive attention. - :type attention_type: basestring - :return: The context vector. - :rtype: LayerOutput - """ - assert attention_type in ['dot-product attention', 'additive attention'] - - with mixed_layer( - size=key_proj_size * head_num, - name='%s_query_proj' % name) as query_proj: - query_proj += full_matrix_projection(query) - query_proj = expand_layer(input=query_proj, expand_as=key) - - with mixed_layer( - size=key_proj_size * head_num, - name='%s_key_proj' % name) as key_proj: - key_proj += full_matrix_projection(key) - - with mixed_layer( - size=value_proj_size * head_num, - name='%s_value_proj' % name) as value_proj: - value_proj += full_matrix_projection(value) - - head_list = [] - for i in range(head_num): - with mixed_layer(size=key_proj_size) as sub_query_proj: - sub_query_proj += identity_projection( - query_proj, offset=key_proj_size * i, size=key_proj_size) - - with mixed_layer(size=key_proj_size) as sub_key_proj: - sub_key_proj += identity_projection( - key_proj, offset=key_proj_size * i, size=key_proj_size) - - with mixed_layer(size=value_proj_size) as sub_value_proj: - sub_value_proj += identity_projection( - value_proj, offset=value_proj_size * i, size=value_proj_size) - - if attention_type == 'dot-product attention': - m = dot_prod_layer( - input1=sub_query_proj, - input2=sub_key_proj, - name='%s_dot-product_%d' % (name, i)) - m = slope_intercept_layer( - input=m, - slope=math.sqrt(1.0 / key_proj_size), - name='%s_dot-product_scaling_%d' % (name, i)) - else: - with mixed_layer( - size=key_proj_size, - act=TanhActivation(), - name='%s_combine_%d' % (name, i)) as m: - m += identity_projection(sub_query_proj) - m += identity_projection(sub_key_proj) - - attention_weight = fc_layer( - input=m, - size=1, - act=SequenceSoftmaxActivation(), - param_attr=softmax_param_attr, - name="%s_softmax_%d" % (name, i), - bias_attr=False) - - scaled = scaling_layer( - weight=attention_weight, - input=sub_value_proj, - name='%s_scaling_%d' % (name, i)) - head = pooling_layer( - input=scaled, - pooling_type=SumPooling(), - name="%s_pooling_%d" % (name, i)) - - head_list.append(head) - - attended = concat_layer(head_list) - - return attended - - -def inputs(layers, *args): - """ - Declare the inputs of network. The order of input should be as same as - the data provider's return order. - - :param layers: Input Layers. - :type layers: list|tuple|LayerOutput. - :return: - """ - - if isinstance(layers, LayerOutput) or isinstance(layers, basestring): - layers = [layers] - if len(args) != 0: - layers.extend(args) - - Inputs(*[l.name for l in layers]) - - -def outputs(layers, *args): - """ - Declare the outputs of network. If user has not defined the inputs of - network, this method will calculate the input order by dfs travel. - - :param layers: Output layers. - :type layers: list|tuple|LayerOutput - :return: - """ - - traveled = set() - - def __dfs_travel__(layer, - predicate=lambda x: x.layer_type == LayerType.DATA): - """ - DFS LRV Travel for output layer. - - The return order is define order for data_layer in this leaf node. - - :param layer: - :type layer: LayerOutput - :return: - """ - if layer in traveled: - return [] - else: - traveled.add(layer) - - assert isinstance(layer, LayerOutput), "layer is %s" % (layer) - retv = [] - if layer.parents is not None: - for p in layer.parents: - retv.extend(__dfs_travel__(p, predicate)) - - if predicate(layer): - retv.append(layer) - return retv - - if isinstance(layers, LayerOutput): - layers = [layers] - - if len(args) != 0: - layers.extend(args) - - assert len(layers) > 0 - - if HasInputsSet(): # input already set - Outputs(*[l.name for l in layers]) - return # just return outputs. - - if len(layers) != 1: - logger.warning("`outputs` routine try to calculate network's" - " inputs and outputs order. It might not work well." - "Please see follow log carefully.") - inputs = [] - outputs_ = [] - for each_layer in layers: - assert isinstance(each_layer, LayerOutput) - inputs.extend(__dfs_travel__(each_layer)) - outputs_.extend( - __dfs_travel__(each_layer, - lambda x: x.layer_type == LayerType.COST)) - - # Currently, we got each leaf node's inputs order, output order. - # We merge them together. - - final_inputs = [] - final_outputs = [] - - for each_input in inputs: - assert isinstance(each_input, LayerOutput) - if each_input.name not in final_inputs: - final_inputs.append(each_input.name) - - for each_output in outputs_: - assert isinstance(each_output, LayerOutput) - if each_output.name not in final_outputs: - final_outputs.append(each_output.name) - - logger.info("".join(["The input order is [", ", ".join(final_inputs), "]"])) - - if len(final_outputs) == 0: - final_outputs = map(lambda x: x.name, layers) - - logger.info("".join( - ["The output order is [", ", ".join(final_outputs), "]"])) - - Inputs(*final_inputs) - Outputs(*final_outputs) diff --git a/python/paddle/trainer_config_helpers/optimizers.py b/python/paddle/trainer_config_helpers/optimizers.py deleted file mode 100644 index 32698e5b2cb52acf960d83a05d2d49c632cd73fc..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/optimizers.py +++ /dev/null @@ -1,447 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.config_parser import Settings, default_decay_rate, \ - default_gradient_clipping_threshold, default_momentum - -from .default_decorators import wrap_param_default - -__all__ = [ - 'Optimizer', 'BaseSGDOptimizer', 'MomentumOptimizer', 'AdamaxOptimizer', - 'AdamOptimizer', 'AdaGradOptimizer', 'RMSPropOptimizer', - 'DecayedAdaGradOptimizer', 'AdaDeltaOptimizer', 'BaseRegularization', - 'L2Regularization', 'settings', 'ModelAverage' -] - - -class Optimizer(object): - def to_setting_kwargs(self): - raise NotImplementedError() - - def extra_settings(self): - pass - - @property - def is_support_sparse(self): - return True - - -class BaseSGDOptimizer(Optimizer): - """ - SGD Optimizer. - - SGD is an optimization method, trying to find a neural network that - minimize the "cost/error" of it by iteration. In paddle's implementation - SGD Optimizer is synchronized, which means all gradients will be wait to - calculate and reduced into one gradient, then do optimize operation. - - The neural network consider the learning problem of minimizing an objective - function, that has the form of a sum - - .. math:: - - Q(w) = \\sum_{i}^{n} Q_i(w) - - The value of function Q sometimes is the cost of neural network (Mean - Square Error between prediction and label for example). The function Q is - parametrised by w, the weight/bias of neural network. And weights is what to - be learned. The i is the i-th observation in (trainning) data. - - So, the SGD method will optimize the weight by - - .. math:: - - w = w - \\eta \\nabla Q(w) = w - \\eta \\sum_{i}^{n} \\nabla Q_i(w) - - where :math:`\\eta` is learning rate. And :math:`n` is batch size. - """ - - def to_setting_kwargs(self): - raise NotImplementedError() - - -class MomentumOptimizer(BaseSGDOptimizer): - """ - MomentumOptimizer. - - When sparse=True, the update scheme: - - .. math:: - - \\alpha_t &= \\alpha_{t-1} / k \\\\ - \\beta_t &= \\beta_{t-1} / (1 + \\lambda \\gamma_t) \\\\ - u_t &= u_{t-1} - \\alpha_t \\gamma_t g_t \\\\ - v_t &= v_{t-1} + \\tau_{t-1} \\alpha_t \\gamma_t g_t \\\\ - \\tau_t &= \\tau_{t-1} + \\beta_t / \\alpha_t - - where :math:`k` is momentum, :math:`\\lambda` is decay rate, - :math:`\\gamma_t` is learning rate at the t'th step. - - :param sparse: with sparse support or not. - :type sparse: bool - """ - - def extra_settings(self): - default_momentum(self.momentum) - - def to_setting_kwargs(self): - if self.sparse: - return {'learning_method': 'sparse_momentum'} - else: - return {'learning_method': 'momentum'} - - def __init__(self, momentum=None, sparse=False): - self.momentum = momentum - self.sparse = sparse - - -class AdamOptimizer(BaseSGDOptimizer): - """ - Adam optimizer. - The details of please refer `Adam: A Method for Stochastic Optimization - `_ - - .. math:: - - m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\ - v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\ - w & = w - \\frac{\\eta m(w, t)}{\\sqrt{v(w,t) + \\epsilon}} - - :param beta1: the :math:`\\beta_1` in equation. - :type beta1: float - :param beta2: the :math:`\\beta_2` in equation. - :type beta2: float - :param epsilon: the :math:`\\epsilon` in equation. It is used to prevent - divided by zero. - :type epsilon: float - """ - - @property - def is_support_sparse(self): - return False - - def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8): - self.beta1 = beta1 - self.beta2 = beta2 - self.epsilon = epsilon - - def to_setting_kwargs(self): - return { - 'learning_method': 'adam', - 'adam_beta1': self.beta1, - 'adam_beta2': self.beta2, - 'adam_epsilon': self.epsilon - } - - -class AdamaxOptimizer(BaseSGDOptimizer): - """ - Adamax optimizer. - - The details of please refer this `Adam: A Method for Stochastic Optimization - `_ - - .. math:: - - m_t & = \\beta_1 * m_{t-1} + (1-\\beta_1)* \\nabla Q_i(w) \\\\ - u_t & = max(\\beta_2*u_{t-1}, abs(\\nabla Q_i(w))) \\\\ - w_t & = w_{t-1} - (\\eta/(1-\\beta_1^t))*m_t/u_t - - :param beta1: the :math:`\\beta_1` in the equation. - :type beta1: float - :param beta2: the :math:`\\beta_2` in the equation. - :type beta2: float - """ - - def __init__(self, beta1, beta2): - self.beta1 = beta1 - self.beta2 = beta2 - - def to_setting_kwargs(self): - return { - 'learning_method': 'adamax', - 'adam_beta1': self.beta1, - 'adam_beta2': self.beta2 - } - - @property - def is_support_sparse(self): - return False - - -class AdaGradOptimizer(BaseSGDOptimizer): - """ - Adagrad(for ADAptive GRAdient algorithm) optimizer. - - For details please refer this `Adaptive Subgradient Methods for - Online Learning and Stochastic Optimization - `_. - - .. math:: - - G &= \\sum_{\\tau=1}^{t} g_{\\tau} g_{\\tau}^T \\\\ - w & = w - \\eta diag(G)^{-\\frac{1}{2}} \\circ g - """ - - def to_setting_kwargs(self): - return {'learning_method': 'adagrad'} - - def __init__(self): - pass - - -class RMSPropOptimizer(BaseSGDOptimizer): - """ - RMSProp(for Root Mean Square Propagation) optimizer. For details please - refer this `slide `_. - - The equations of this method as follows: - - .. math:: - - v(w, t) & = \\rho v(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\ - w & = w - \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w) - - :param rho: the :math:`\\rho` in the equation. The forgetting factor. - :type rho: float - :param epsilon: the :math:`\\epsilon` in the equation. - :type epsilon: float - """ - - def to_setting_kwargs(self): - return { - 'learning_method': 'rmsprop', - 'ada_rou': self.rho, - 'ada_epsilon': self.epsilon - } - - def __init__(self, rho=0.95, epsilon=1e-6): - self.rho = rho - self.epsilon = epsilon - - -class DecayedAdaGradOptimizer(BaseSGDOptimizer): - """ - AdaGrad method with decayed sum gradients. The equations of this method - show as follow. - - .. math:: - - E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\ - learning\\_rate &= 1/sqrt( ( E(g_t^2) + \\epsilon ) - - :param rho: The :math:`\\rho` parameter in that equation - :type rho: float - :param epsilon: The :math:`\\epsilon` parameter in that equation. - :type epsilon: float - """ - - def to_setting_kwargs(self): - return { - 'learning_method': 'decayed_adagrad', - 'ada_rou': self.rho, - 'ada_epsilon': self.epsilon - } - - def __init__(self, rho=0.95, epsilon=1e-6): - self.rho = rho - self.epsilon = epsilon - - -class AdaDeltaOptimizer(BaseSGDOptimizer): - """ - AdaDelta method. The details of adadelta please refer to this - `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD - `_. - - .. math:: - - E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\ - learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\ - E(g_t^2) + \\epsilon ) ) \\\\ - E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2 - - :param rho: :math:`\\rho` in equation - :type rho: float - :param epsilon: :math:`\\rho` in equation - :type epsilon: float - """ - - def to_setting_kwargs(self): - return { - 'learning_method': 'adadelta', - 'ada_rou': self.rho, - 'ada_epsilon': self.epsilon - } - - def __init__(self, rho=0.95, epsilon=1e-6): - self.rho = rho - self.epsilon = epsilon - - -class BaseRegularization(Optimizer): - def __init__(self): - self.algorithm = "" - self.learning_method = "" - - def to_setting_kwargs(self): - return {} - - -class L2Regularization(BaseRegularization): - def __init__(self, rate): - super(L2Regularization, self).__init__() - self.decay_rate = rate - - def to_setting_kwargs(self): - if self.algorithm == 'owlqn': - return {'l2weight': self.decay_rate} - else: - return dict() - - def extra_settings(self): - if self.algorithm == 'sgd' or self.algorithm == 'async_sgd': - default_decay_rate(self.decay_rate) - - -class ModelAverage(Optimizer): - def to_setting_kwargs(self): - return { - 'average_window': self.average_window, - 'max_average_window': self.max_average_window, - 'do_average_in_cpu': self.do_average_in_cpu - } - - def __init__(self, - average_window, - max_average_window=None, - do_average_in_cpu=False): - self.average_window = average_window - self.max_average_window = max_average_window - self.do_average_in_cpu = do_average_in_cpu - - -class GradientClippingThreshold(Optimizer): - def extra_settings(self): - default_gradient_clipping_threshold(self.threshold) - - def __init__(self, threshold): - self.threshold = threshold - - def to_setting_kwargs(self): - return dict() - - -def __extends__(dict1, dict2): - for key in dict2: - assert key not in dict1 - dict1[key] = dict2[key] - return dict1 - - -@wrap_param_default( - ['learning_method'], default_factory=lambda _: MomentumOptimizer()) -@wrap_param_default( - ['regularization'], default_factory=lambda _: BaseRegularization()) -def settings(batch_size, - learning_rate=1e-3, - learning_rate_decay_a=0., - learning_rate_decay_b=0., - learning_rate_schedule='poly', - learning_rate_args='', - async_lagged_grad_discard_ratio=1.5, - learning_method=None, - regularization=None, - is_async=False, - model_average=None, - gradient_clipping_threshold=None): - """ - Set the optimization method, learning rate, batch size, and other training - settings. The currently supported algorithms are SGD and Async-SGD. - - .. warning:: - - Note that the 'batch_size' in PaddlePaddle is not equal to global - training batch size. It represents the single training process's batch - size. If you use N processes to train one model, for example use three - GPU machines, the global batch size is N*'batch_size'. - - :param batch_size: batch size for one training process. - :type batch_size: int - :param learning_rate: learning rate for SGD - :type learning_rate: float - :param learning_method: The extension optimization algorithms of gradient - descent, such as momentum, adagrad, rmsprop, etc. - Note that it should be instance with base type - BaseSGDOptimizer. - :type learning_method: BaseSGDOptimizer - :param regularization: The regularization method. - :type regularization: BaseRegularization - :param is_async: Is Async-SGD or not. Default value is False. - :type is_async: bool - :param model_average: Model Average Settings. - :type model_average: ModelAverage - :param gradient_clipping_threshold: gradient clipping threshold. If gradient - value larger than some value, will be - clipped. - :type gradient_clipping_threshold: float - :param async_lagged_grad_discard_ratio: async SGD gradient commit control, - when async_lagged_grad_discard_ratio * num_gradient_servers commit passed, - the current async SGD gradient is discarded. - :type async_lagged_grad_discard_ratio: float - """ - if isinstance(regularization, BaseRegularization): - regularization = [regularization] - - assert isinstance(learning_method, Optimizer) - if isinstance(learning_method, BaseSGDOptimizer): - algorithm = 'async_sgd' if is_async else 'sgd' - else: - algorithm = 'owlqn' - - args = [ - 'batch_size', 'learning_rate', 'learning_rate_decay_a', - 'learning_rate_decay_b', 'learning_rate_schedule', 'learning_rate_args', - 'gradient_clipping_threshold', 'async_lagged_grad_discard_ratio' - ] - kwargs = dict() - kwargs['algorithm'] = algorithm - for arg in args: - kwargs[arg] = locals()[arg] - - kwargs = __extends__(kwargs, learning_method.to_setting_kwargs()) - learning_method.extra_settings() - - for regular in regularization: - assert isinstance(regular, BaseRegularization) - regular.algorithm = algorithm - regular.learning_method = kwargs['learning_method'] - kwargs = __extends__(kwargs, regular.to_setting_kwargs()) - regular.extra_settings() - - if gradient_clipping_threshold is not None: - gradient_clipping_threshold = GradientClippingThreshold( - threshold=gradient_clipping_threshold) - - for each in [model_average, gradient_clipping_threshold]: - if each is not None: - assert isinstance(each, Optimizer) - each.algorithm = algorithm - each.learning_method = kwargs['learning_method'] - kwargs = __extends__(kwargs, each.to_setting_kwargs()) - each.extra_settings() - - # Do Check? - Settings(**kwargs) diff --git a/python/paddle/trainer_config_helpers/poolings.py b/python/paddle/trainer_config_helpers/poolings.py deleted file mode 100644 index e0aeb311b3ae842aee337dbbf869e2f947d22bd9..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/poolings.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -""" - -__all__ = [ - "BasePoolingType", "MaxPooling", "AvgPooling", "MaxWithMaskPooling", - "CudnnMaxPooling", "CudnnAvgPooling", "CudnnAvgInclPadPooling", - "SumPooling", "SquareRootNPooling" -] - - -class BasePoolingType(object): - """ - Base Pooling Type. - Note these pooling types are used for sequence input, not for images. - Each PoolingType contains one parameter: - - :param name: pooling layer type name used by paddle. - :type name: basestring - - """ - - def __init__(self, name): - self.name = name - - -class MaxPooling(BasePoolingType): - """ - Max pooling. - - Return the very large values for each dimension in sequence or time steps. - - .. math:: - - max(samples\\_of\\_a\\_sequence) - - :param output_max_index: True if output sequence max index instead of max - value. None means use default value in proto. - :type output_max_index: bool|None - """ - - def __init__(self, output_max_index=None): - BasePoolingType.__init__(self, "max") - self.output_max_index = output_max_index - - -class MaxWithMaskPooling(BasePoolingType): - """ - MaxWithMask pooling. - - Not only return the very large values for each dimension in sequence or time steps, - but also the location indices of found maxinum values. - - """ - - def __init__(self): - BasePoolingType.__init__(self, "max-pool-with-mask") - - -class CudnnMaxPooling(BasePoolingType): - """ - Cudnn max pooling only support GPU. Return the maxinum value in the - pooling window. - """ - - def __init__(self): - BasePoolingType.__init__(self, "cudnn-max-pool") - - -class CudnnAvgPooling(BasePoolingType): - """ - Cudnn average pooling only support GPU. Return the average value in the - pooling window. - """ - - def __init__(self): - BasePoolingType.__init__(self, "cudnn-avg-pool") - - -class CudnnAvgInclPadPooling(BasePoolingType): - """ - Cudnn average pooling only support GPU. Return the average value in the - pooling window taking into account the padding cells. - """ - - def __init__(self): - BasePoolingType.__init__(self, "cudnn-avg-incl-pad-pool") - - -class AvgPooling(BasePoolingType): - """ - Average pooling. - - Return the average values for each dimension in sequence or time steps. - - .. math:: - - sum(samples\\_of\\_a\\_sequence)/sample\\_num - """ - STRATEGY_AVG = "average" - STRATEGY_SUM = "sum" - STRATEGY_SQROOTN = "squarerootn" - - def __init__(self, strategy=STRATEGY_AVG): - BasePoolingType.__init__(self, "average") - self.strategy = strategy - - -class SumPooling(AvgPooling): - """ - Sum pooling. - - Return the sum values of each dimension in sequence or time steps. - - .. math:: - - sum(samples\\_of\\_a\\_sequence) - """ - - def __init__(self): - AvgPooling.__init__(self, AvgPooling.STRATEGY_SUM) - - -class SquareRootNPooling(AvgPooling): - """ - Square Root Pooling. - - Return the square root values of each dimension in sequence or time steps. - - .. math:: - - sum(samples\\_of\\_a\\_sequence)/sqrt(sample\\_num) - """ - - def __init__(self): - AvgPooling.__init__(self, AvgPooling.STRATEGY_SQROOTN) diff --git a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt deleted file mode 100644 index 30e0b9906c406d846d4b086a1a1c89587394afea..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -#################### test_config_parser ######################### -add_test(NAME layers_test - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/ - ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/layers_test.py - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle) - -add_test(NAME test_reset_hook - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/ - ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle) - -add_paddle_exe(protobuf_equal ProtobufEqualMain.cpp) -add_test(NAME test_layerHelpers - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/ - ${PADDLE_BINARY_DIR}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} - ${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal -) diff --git a/python/paddle/trainer_config_helpers/tests/ProtobufEqualMain.cpp b/python/paddle/trainer_config_helpers/tests/ProtobufEqualMain.cpp deleted file mode 100644 index 7b10e0b7a605f92b142aa11ac39911f0993b077f..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/ProtobufEqualMain.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include "TrainerConfig.pb.h" - -bool loadPb(google::protobuf::Message* conf, const std::string& filename) { - std::ifstream fin; - fin.open(filename.c_str()); - if (fin.is_open()) { - std::string str((std::istreambuf_iterator(fin)), - std::istreambuf_iterator()); - bool ok = google::protobuf::TextFormat::ParseFromString(str, conf); - fin.close(); - return ok; - } else { - return false; - } -} - -int main(int argc, char** argv) { - std::unique_ptr config1; - std::unique_ptr config2; - if (argc == 3) { - config1.reset(new paddle::ModelConfig()); - config2.reset(new paddle::ModelConfig()); - } else if (argc == 4) { - config1.reset(new paddle::TrainerConfig()); - config2.reset(new paddle::TrainerConfig()); - } - if (!config1 || !config2) { - return 1; - } else if (!loadPb(config1.get(), argv[1])) { - return 2; - } else if (!loadPb(config2.get(), argv[2])) { - return 3; - } else { - if (google::protobuf::util::MessageDifferencer::ApproximatelyEquals( - *config1, *config2)) { - return 0; - } else { - return 4; - } - } -} diff --git a/python/paddle/trainer_config_helpers/tests/configs/.gitignore b/python/paddle/trainer_config_helpers/tests/configs/.gitignore deleted file mode 100644 index c654bd41b0b4dd0cb510943540b660b4e4a147d9..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/.gitignore +++ /dev/null @@ -1 +0,0 @@ -protostr/*.unittest diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh deleted file mode 100755 index 10c941f707498ec45e79bed9d3f8054eea19887d..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -export configs=(test_repeat_layer test_fc layer_activations projections test_print_layer -test_sequence_pooling test_lstmemory_layer test_grumemory_layer -last_first_seq test_expand_layer test_ntm_layers test_hsigmoid -img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers -test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight -test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops -test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer -test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer -test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer -test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer -test_seq_slice_layer test_cross_entropy_over_beam test_roi_pool_layer test_pooling3D_layer -test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer -test_scale_sub_region_layer test_dot_prod_layer test_l2_distance_layer -test_factorization_machine) - -export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh deleted file mode 100755 index 44a75a60cc78e85f85d111a911999b7812db0f49..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -set -e -cd `dirname $0` - -protostr=$PWD/protostr -. file_list.sh - -for conf in ${configs[*]} -do - echo "Generating " $conf - $1 -m paddle.utils.dump_config $conf.py > $protostr/$conf.protostr.unittest - if [ ! -f "$protostr/$conf.protostr" ]; then - cp $protostr/$conf.protostr.unittest $protostr/$conf.protostr - fi - cat ${conf}.py |$1 test_config_parser_for_non_file_config.py > $protostr/$conf.protostr.non_file_config.unittest -done - -for conf in ${whole_configs[*]} -do - echo "Generating " $conf - $1 -m paddle.utils.dump_config $conf.py "" --whole > $protostr/$conf.protostr.unittest - if [ ! -f "$protostr/$conf.protostr" ]; then - cp $protostr/$conf.protostr.unittest $protostr/$conf.protostr - fi - cat ${conf}.py |$1 test_config_parser_for_non_file_config.py --whole > $protostr/$conf.protostr.non_file_config.unittest -done diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py deleted file mode 100644 index 767b6454242e40bbd53fb6e1a7d4c8bb5281d327..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-3, batch_size=1000) - -img = data_layer(name='image', size=256 * 256) - -# the parse_conv in config_parse.py is not strictly accurate when filter_size -# is not square. So here set square filter_size. -img_conv = img_conv_layer( - input=img, - num_channels=1, - num_filters=64, - filter_size=(32, 32), - padding=(1, 1), - dilation=(1, 1), - stride=(1, 1), - act=LinearActivation()) -img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) - -img_norm = img_cmrnorm_layer(input=img_bn, size=32) - -img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling()) - -outputs(img_pool, img_norm) diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py deleted file mode 100644 index e17c8fa7c0ae9349db89448f405c71256d50e268..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-3, batch_size=1000) - -img = data_layer(name='image', size=227 * 227) - -# the parse_conv in config_parse.py is not strictly accurate when filter_size -# is not square. So here set square filter_size. -img_conv = img_conv_layer( - input=img, - num_channels=1, - num_filters=64, - filter_size=(32, 32), - padding=(1, 1), - stride=(1, 1), - act=LinearActivation(), - trans=True) -img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) - -img_norm = img_cmrnorm_layer(input=img_bn, size=32) - -img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling()) - -outputs(img_pool, img_norm) diff --git a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py deleted file mode 100644 index 5b6d2627e431ac9e2d988a43a7541e177fd8ac19..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -din = data_layer(name='data', size=30) - -seq_op = [first_seq, last_seq] - -agg_level = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE] - -opts = [] - -for op in seq_op: - for al in agg_level: - opts.append(op(input=din, agg_level=al)) - -for op in seq_op: - opts.append( - op(input=din, agg_level=AggregateLevel.TO_NO_SEQUENCE, stride=5)) - -outputs(opts) diff --git a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py deleted file mode 100644 index ac1f7e02c098a26ea4acecb618db6d79d2ab7134..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -Test all activations. -''' - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-4, batch_size=1000) - -din = data_layer(name='input', size=100) - -acts = [ - TanhActivation, SigmoidActivation, SoftmaxActivation, IdentityActivation, - LinearActivation, ExpActivation, ReluActivation, BReluActivation, - SoftReluActivation, STanhActivation, AbsActivation, SquareActivation -] - -outputs([ - fc_layer( - input=din, size=100, act=act(), name="layer_%d" % i) - for i, act in enumerate(acts) -]) diff --git a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py deleted file mode 100644 index 29dc634fb3992ba51c06672a3e6047e44176c29b..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -x = data_layer(name='data', size=100) -x = layer_math.exp(x) -x = layer_math.sqrt(x) -x = layer_math.reciprocal(x) -x = layer_math.log(x) -x = layer_math.abs(x) -x = layer_math.sigmoid(x) -x = layer_math.tanh(x) -x = layer_math.square(x) -x = layer_math.relu(x) -y = 1 + x -y = y + 1 -y = x + y -y = y - x -y = y - 2 -y = 2 - y -y = 2 * y -y = y * 3 -z = data_layer(name='data_2', size=1) -y = y * z -y = z * y -y = y + z -y = z + y -outputs(y) diff --git a/python/paddle/trainer_config_helpers/tests/configs/projections.py b/python/paddle/trainer_config_helpers/tests/configs/projections.py deleted file mode 100644 index 3b7a196d1c12aa5fe597dcbe3c4663525be2c85c..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/projections.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -Test mixed layer, projections and operators. -''' -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-4) - -din = data_layer(name='test', size=100) - -din = embedding_layer(input=din, size=256) - -with mixed_layer(size=100) as m1: - m1 += full_matrix_projection(input=din) - -with mixed_layer(size=100) as m2: - m2 += table_projection(input=m1) - -with mixed_layer(size=100) as m3: - m3 += identity_projection(input=m2) - -with mixed_layer(size=100) as m4: - m4 += dotmul_projection(input=m3) - -with mixed_layer() as m5: - m5 += context_projection(input=m4, context_len=3) - -with mixed_layer() as m6: - m6 += dotmul_operator(a=m3, b=m4) - m6 += scaling_projection(m3) - -img = data_layer(name='img', size=32 * 32) -flt = data_layer(name='filter', size=3 * 3 * 1 * 64) - -with mixed_layer() as m7: - m7 += conv_operator( - img=img, filter=flt, num_filters=64, num_channels=1, filter_size=3) - m7 += conv_projection(img, filter_size=3, num_filters=64, num_channels=1) - -with mixed_layer() as m8: - m8 += conv_operator( - img=img, - filter=flt, - num_filters=64, - num_channels=1, - filter_size=3, - stride=2, - padding=1, - trans=True) - m8 += conv_projection( - img, - filter_size=3, - num_filters=64, - num_channels=1, - stride=2, - padding=1, - trans=True) -end = mixed_layer( - input=[ - full_matrix_projection(input=m5), - trans_full_matrix_projection(input=m6), - full_matrix_projection(input=m7), full_matrix_projection(input=m8) - ], - size=100, - layer_attr=ExtraAttr( - drop_rate=0.5, error_clipping_threshold=40)) - -outputs(end) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr deleted file mode 100644 index 3e0f957648879d4350d662b336c953273bac1378..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr +++ /dev/null @@ -1,193 +0,0 @@ -type: "nn" -layers { - name: "image" - type: "data" - size: 65536 - active_type: "" -} -layers { - name: "__conv_0__" - type: "exconv" - size: 3297856 - active_type: "" - inputs { - input_layer_name: "image" - input_parameter_name: "___conv_0__.w0" - conv_conf { - filter_size: 32 - channels: 1 - stride: 1 - padding: 1 - groups: 1 - filter_channels: 1 - output_x: 227 - img_size: 256 - caffe_mode: true - filter_size_y: 32 - padding_y: 1 - stride_y: 1 - output_y: 227 - img_size_y: 256 - dilation: 1 - dilation_y: 1 - } - } - bias_parameter_name: "___conv_0__.wbias" - num_filters: 64 - shared_biases: true - height: 227 - width: 227 -} -layers { - name: "__batch_norm_0__" - type: "batch_norm" - size: 3297856 - active_type: "relu" - inputs { - input_layer_name: "__conv_0__" - input_parameter_name: "___batch_norm_0__.w0" - image_conf { - channels: 64 - img_size: 227 - img_size_y: 227 - } - } - inputs { - input_layer_name: "__conv_0__" - input_parameter_name: "___batch_norm_0__.w1" - } - inputs { - input_layer_name: "__conv_0__" - input_parameter_name: "___batch_norm_0__.w2" - } - bias_parameter_name: "___batch_norm_0__.wbias" - moving_average_fraction: 0.9 - height: 227 - width: 227 - depth: 1 - epsilon: 1e-05 -} -layers { - name: "__crmnorm_0__" - type: "norm" - size: 3297856 - active_type: "" - inputs { - input_layer_name: "__batch_norm_0__" - norm_conf { - norm_type: "cmrnorm-projection" - channels: 64 - size: 32 - scale: 0.0004 - pow: 0.75 - output_x: 227 - img_size: 227 - blocked: false - output_y: 227 - img_size_y: 227 - } - } - height: 227 - width: 227 -} -layers { - name: "__pool_0__" - type: "pool" - size: 2458624 - active_type: "" - inputs { - input_layer_name: "__conv_0__" - pool_conf { - pool_type: "max-projection" - channels: 64 - size_x: 32 - stride: 1 - output_x: 196 - img_size: 227 - padding: 0 - size_y: 32 - stride_y: 1 - output_y: 196 - img_size_y: 227 - padding_y: 0 - } - } - height: 196 - width: 196 -} -parameters { - name: "___conv_0__.w0" - size: 65536 - initial_mean: 0.0 - initial_std: 0.0441941738242 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___conv_0__.wbias" - size: 64 - initial_mean: 0.0 - initial_std: 0.0 - dims: 64 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___batch_norm_0__.w0" - size: 64 - initial_mean: 1.0 - initial_std: 0.0 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___batch_norm_0__.w1" - size: 64 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 64 - initial_strategy: 0 - initial_smart: false - is_static: true - is_shared: true -} -parameters { - name: "___batch_norm_0__.w2" - size: 64 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 64 - initial_strategy: 0 - initial_smart: false - is_static: true - is_shared: true -} -parameters { - name: "___batch_norm_0__.wbias" - size: 64 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 64 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "image" -output_layer_names: "__pool_0__" -output_layer_names: "__crmnorm_0__" -sub_models { - name: "root" - layer_names: "image" - layer_names: "__conv_0__" - layer_names: "__batch_norm_0__" - layer_names: "__crmnorm_0__" - layer_names: "__pool_0__" - input_layer_names: "image" - output_layer_names: "__pool_0__" - output_layer_names: "__crmnorm_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr deleted file mode 100644 index a18a4652e14c0cfc4dbca87e67d31aa663ee756b..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr +++ /dev/null @@ -1,193 +0,0 @@ -type: "nn" -layers { - name: "image" - type: "data" - size: 51529 - active_type: "" -} -layers { - name: "__conv_0__" - type: "exconvt" - size: 4194304 - active_type: "" - inputs { - input_layer_name: "image" - input_parameter_name: "___conv_0__.w0" - conv_conf { - filter_size: 32 - channels: 1 - stride: 1 - padding: 1 - groups: 1 - filter_channels: 64 - output_x: 227 - img_size: 256 - caffe_mode: true - filter_size_y: 32 - padding_y: 1 - stride_y: 1 - output_y: 227 - img_size_y: 256 - dilation: 1 - dilation_y: 1 - } - } - bias_parameter_name: "___conv_0__.wbias" - num_filters: 64 - shared_biases: true - height: 256 - width: 256 -} -layers { - name: "__batch_norm_0__" - type: "batch_norm" - size: 4194304 - active_type: "relu" - inputs { - input_layer_name: "__conv_0__" - input_parameter_name: "___batch_norm_0__.w0" - image_conf { - channels: 64 - img_size: 256 - img_size_y: 256 - } - } - inputs { - input_layer_name: "__conv_0__" - input_parameter_name: "___batch_norm_0__.w1" - } - inputs { - input_layer_name: "__conv_0__" - input_parameter_name: "___batch_norm_0__.w2" - } - bias_parameter_name: "___batch_norm_0__.wbias" - moving_average_fraction: 0.9 - height: 256 - width: 256 - depth: 1 - epsilon: 1e-05 -} -layers { - name: "__crmnorm_0__" - type: "norm" - size: 4194304 - active_type: "" - inputs { - input_layer_name: "__batch_norm_0__" - norm_conf { - norm_type: "cmrnorm-projection" - channels: 64 - size: 32 - scale: 0.0004 - pow: 0.75 - output_x: 256 - img_size: 256 - blocked: false - output_y: 256 - img_size_y: 256 - } - } - height: 256 - width: 256 -} -layers { - name: "__pool_0__" - type: "pool" - size: 3240000 - active_type: "" - inputs { - input_layer_name: "__conv_0__" - pool_conf { - pool_type: "max-projection" - channels: 64 - size_x: 32 - stride: 1 - output_x: 225 - img_size: 256 - padding: 0 - size_y: 32 - stride_y: 1 - output_y: 225 - img_size_y: 256 - padding_y: 0 - } - } - height: 225 - width: 225 -} -parameters { - name: "___conv_0__.w0" - size: 65536 - initial_mean: 0.0 - initial_std: 0.0441941738242 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___conv_0__.wbias" - size: 64 - initial_mean: 0.0 - initial_std: 0.0 - dims: 64 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___batch_norm_0__.w0" - size: 64 - initial_mean: 1.0 - initial_std: 0.0 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___batch_norm_0__.w1" - size: 64 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 64 - initial_strategy: 0 - initial_smart: false - is_static: true - is_shared: true -} -parameters { - name: "___batch_norm_0__.w2" - size: 64 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 64 - initial_strategy: 0 - initial_smart: false - is_static: true - is_shared: true -} -parameters { - name: "___batch_norm_0__.wbias" - size: 64 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 64 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "image" -output_layer_names: "__pool_0__" -output_layer_names: "__crmnorm_0__" -sub_models { - name: "root" - layer_names: "image" - layer_names: "__conv_0__" - layer_names: "__batch_norm_0__" - layer_names: "__crmnorm_0__" - layer_names: "__pool_0__" - input_layer_names: "image" - output_layer_names: "__pool_0__" - output_layer_names: "__crmnorm_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr deleted file mode 100644 index fee0f8e462bfd211e6aa7698ebfeaf0a19428a62..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr +++ /dev/null @@ -1,102 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 30 - active_type: "" -} -layers { - name: "__first_seq_0__" - type: "seqlastins" - size: 30 - active_type: "" - inputs { - input_layer_name: "data" - } - select_first: true - trans_type: "seq" - seq_pool_stride: -1 -} -layers { - name: "__first_seq_1__" - type: "seqlastins" - size: 30 - active_type: "" - inputs { - input_layer_name: "data" - } - select_first: true - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__last_seq_0__" - type: "seqlastins" - size: 30 - active_type: "" - inputs { - input_layer_name: "data" - } - trans_type: "seq" - seq_pool_stride: -1 -} -layers { - name: "__last_seq_1__" - type: "seqlastins" - size: 30 - active_type: "" - inputs { - input_layer_name: "data" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__first_seq_2__" - type: "seqlastins" - size: 30 - active_type: "" - inputs { - input_layer_name: "data" - } - select_first: true - trans_type: "non-seq" - seq_pool_stride: 5 -} -layers { - name: "__last_seq_2__" - type: "seqlastins" - size: 30 - active_type: "" - inputs { - input_layer_name: "data" - } - trans_type: "non-seq" - seq_pool_stride: 5 -} -input_layer_names: "data" -output_layer_names: "__first_seq_0__" -output_layer_names: "__first_seq_1__" -output_layer_names: "__last_seq_0__" -output_layer_names: "__last_seq_1__" -output_layer_names: "__first_seq_2__" -output_layer_names: "__last_seq_2__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__first_seq_0__" - layer_names: "__first_seq_1__" - layer_names: "__last_seq_0__" - layer_names: "__last_seq_1__" - layer_names: "__first_seq_2__" - layer_names: "__last_seq_2__" - input_layer_names: "data" - output_layer_names: "__first_seq_0__" - output_layer_names: "__first_seq_1__" - output_layer_names: "__last_seq_0__" - output_layer_names: "__last_seq_1__" - output_layer_names: "__first_seq_2__" - output_layer_names: "__last_seq_2__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr deleted file mode 100644 index ecf39e4d32167d4e838c43929cc4e7a87ff421a8..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr +++ /dev/null @@ -1,423 +0,0 @@ -type: "nn" -layers { - name: "input" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "layer_0" - type: "fc" - size: 100 - active_type: "tanh" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_0.w0" - } - bias_parameter_name: "_layer_0.wbias" -} -layers { - name: "layer_1" - type: "fc" - size: 100 - active_type: "sigmoid" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_1.w0" - } - bias_parameter_name: "_layer_1.wbias" -} -layers { - name: "layer_2" - type: "fc" - size: 100 - active_type: "softmax" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_2.w0" - } - bias_parameter_name: "_layer_2.wbias" -} -layers { - name: "layer_3" - type: "fc" - size: 100 - active_type: "" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_3.w0" - } - bias_parameter_name: "_layer_3.wbias" -} -layers { - name: "layer_4" - type: "fc" - size: 100 - active_type: "" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_4.w0" - } - bias_parameter_name: "_layer_4.wbias" -} -layers { - name: "layer_5" - type: "fc" - size: 100 - active_type: "exponential" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_5.w0" - } - bias_parameter_name: "_layer_5.wbias" -} -layers { - name: "layer_6" - type: "fc" - size: 100 - active_type: "relu" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_6.w0" - } - bias_parameter_name: "_layer_6.wbias" -} -layers { - name: "layer_7" - type: "fc" - size: 100 - active_type: "brelu" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_7.w0" - } - bias_parameter_name: "_layer_7.wbias" -} -layers { - name: "layer_8" - type: "fc" - size: 100 - active_type: "softrelu" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_8.w0" - } - bias_parameter_name: "_layer_8.wbias" -} -layers { - name: "layer_9" - type: "fc" - size: 100 - active_type: "stanh" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_9.w0" - } - bias_parameter_name: "_layer_9.wbias" -} -layers { - name: "layer_10" - type: "fc" - size: 100 - active_type: "abs" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_10.w0" - } - bias_parameter_name: "_layer_10.wbias" -} -layers { - name: "layer_11" - type: "fc" - size: 100 - active_type: "square" - inputs { - input_layer_name: "input" - input_parameter_name: "_layer_11.w0" - } - bias_parameter_name: "_layer_11.wbias" -} -parameters { - name: "_layer_0.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_0.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_layer_1.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_1.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_layer_2.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_2.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_layer_3.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_3.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_layer_4.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_4.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_layer_5.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_5.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_layer_6.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_6.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_layer_7.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_7.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_layer_8.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_8.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_layer_9.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_9.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_layer_10.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_10.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_layer_11.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_layer_11.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "input" -output_layer_names: "layer_0" -output_layer_names: "layer_1" -output_layer_names: "layer_2" -output_layer_names: "layer_3" -output_layer_names: "layer_4" -output_layer_names: "layer_5" -output_layer_names: "layer_6" -output_layer_names: "layer_7" -output_layer_names: "layer_8" -output_layer_names: "layer_9" -output_layer_names: "layer_10" -output_layer_names: "layer_11" -sub_models { - name: "root" - layer_names: "input" - layer_names: "layer_0" - layer_names: "layer_1" - layer_names: "layer_2" - layer_names: "layer_3" - layer_names: "layer_4" - layer_names: "layer_5" - layer_names: "layer_6" - layer_names: "layer_7" - layer_names: "layer_8" - layer_names: "layer_9" - layer_names: "layer_10" - layer_names: "layer_11" - input_layer_names: "input" - output_layer_names: "layer_0" - output_layer_names: "layer_1" - output_layer_names: "layer_2" - output_layer_names: "layer_3" - output_layer_names: "layer_4" - output_layer_names: "layer_5" - output_layer_names: "layer_6" - output_layer_names: "layer_7" - output_layer_names: "layer_8" - output_layer_names: "layer_9" - output_layer_names: "layer_10" - output_layer_names: "layer_11" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr deleted file mode 100644 index 582207741ab76370d9c5c09598c3f7a81f013b73..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr +++ /dev/null @@ -1,413 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "__exp_0__" - type: "mixed" - size: 100 - active_type: "exponential" - inputs { - input_layer_name: "data" - proj_conf { - type: "identity" - name: "___exp_0__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__sqrt_0__" - type: "mixed" - size: 100 - active_type: "sqrt" - inputs { - input_layer_name: "__exp_0__" - proj_conf { - type: "identity" - name: "___sqrt_0__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__reciprocal_0__" - type: "mixed" - size: 100 - active_type: "reciprocal" - inputs { - input_layer_name: "__sqrt_0__" - proj_conf { - type: "identity" - name: "___reciprocal_0__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__log_0__" - type: "mixed" - size: 100 - active_type: "log" - inputs { - input_layer_name: "__reciprocal_0__" - proj_conf { - type: "identity" - name: "___log_0__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__abs_0__" - type: "mixed" - size: 100 - active_type: "abs" - inputs { - input_layer_name: "__log_0__" - proj_conf { - type: "identity" - name: "___abs_0__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__sigmoid_0__" - type: "mixed" - size: 100 - active_type: "sigmoid" - inputs { - input_layer_name: "__abs_0__" - proj_conf { - type: "identity" - name: "___sigmoid_0__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__tanh_0__" - type: "mixed" - size: 100 - active_type: "tanh" - inputs { - input_layer_name: "__sigmoid_0__" - proj_conf { - type: "identity" - name: "___tanh_0__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__square_0__" - type: "mixed" - size: 100 - active_type: "square" - inputs { - input_layer_name: "__tanh_0__" - proj_conf { - type: "identity" - name: "___square_0__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__relu_0__" - type: "mixed" - size: 100 - active_type: "relu" - inputs { - input_layer_name: "__square_0__" - proj_conf { - type: "identity" - name: "___relu_0__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__slope_intercept_layer_0__" - type: "slope_intercept" - size: 100 - active_type: "" - inputs { - input_layer_name: "__relu_0__" - } - slope: 1.0 - intercept: 1 -} -layers { - name: "__slope_intercept_layer_1__" - type: "slope_intercept" - size: 100 - active_type: "" - inputs { - input_layer_name: "__slope_intercept_layer_0__" - } - slope: 1.0 - intercept: 1 -} -layers { - name: "__mixed_0__" - type: "mixed" - size: 100 - active_type: "" - inputs { - input_layer_name: "__relu_0__" - proj_conf { - type: "identity" - name: "___mixed_0__.w0" - input_size: 100 - output_size: 100 - } - } - inputs { - input_layer_name: "__slope_intercept_layer_1__" - proj_conf { - type: "identity" - name: "___mixed_0__.w1" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__slope_intercept_layer_2__" - type: "slope_intercept" - size: 100 - active_type: "" - inputs { - input_layer_name: "__relu_0__" - } - slope: -1.0 - intercept: 0.0 -} -layers { - name: "__mixed_1__" - type: "mixed" - size: 100 - active_type: "" - inputs { - input_layer_name: "__mixed_0__" - proj_conf { - type: "identity" - name: "___mixed_1__.w0" - input_size: 100 - output_size: 100 - } - } - inputs { - input_layer_name: "__slope_intercept_layer_2__" - proj_conf { - type: "identity" - name: "___mixed_1__.w1" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__slope_intercept_layer_3__" - type: "slope_intercept" - size: 100 - active_type: "" - inputs { - input_layer_name: "__mixed_1__" - } - slope: 1.0 - intercept: -2 -} -layers { - name: "__slope_intercept_layer_4__" - type: "slope_intercept" - size: 100 - active_type: "" - inputs { - input_layer_name: "__slope_intercept_layer_3__" - } - slope: -1.0 - intercept: 0.0 -} -layers { - name: "__slope_intercept_layer_5__" - type: "slope_intercept" - size: 100 - active_type: "" - inputs { - input_layer_name: "__slope_intercept_layer_4__" - } - slope: 1.0 - intercept: 2 -} -layers { - name: "__slope_intercept_layer_6__" - type: "slope_intercept" - size: 100 - active_type: "" - inputs { - input_layer_name: "__slope_intercept_layer_5__" - } - slope: 2 - intercept: 0.0 -} -layers { - name: "__slope_intercept_layer_7__" - type: "slope_intercept" - size: 100 - active_type: "" - inputs { - input_layer_name: "__slope_intercept_layer_6__" - } - slope: 3 - intercept: 0.0 -} -layers { - name: "data_2" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "__scaling_layer_0__" - type: "scaling" - size: 100 - active_type: "" - inputs { - input_layer_name: "data_2" - } - inputs { - input_layer_name: "__slope_intercept_layer_7__" - } -} -layers { - name: "__scaling_layer_1__" - type: "scaling" - size: 100 - active_type: "" - inputs { - input_layer_name: "data_2" - } - inputs { - input_layer_name: "__scaling_layer_0__" - } -} -layers { - name: "__repeat_layer_0__" - type: "featmap_expand" - size: 100 - active_type: "" - inputs { - input_layer_name: "data_2" - } - num_filters: 100 -} -layers { - name: "__mixed_2__" - type: "mixed" - size: 100 - active_type: "" - inputs { - input_layer_name: "__scaling_layer_1__" - proj_conf { - type: "identity" - name: "___mixed_2__.w0" - input_size: 100 - output_size: 100 - } - } - inputs { - input_layer_name: "__repeat_layer_0__" - proj_conf { - type: "identity" - name: "___mixed_2__.w1" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__repeat_layer_1__" - type: "featmap_expand" - size: 100 - active_type: "" - inputs { - input_layer_name: "data_2" - } - num_filters: 100 -} -layers { - name: "__mixed_3__" - type: "mixed" - size: 100 - active_type: "" - inputs { - input_layer_name: "__mixed_2__" - proj_conf { - type: "identity" - name: "___mixed_3__.w0" - input_size: 100 - output_size: 100 - } - } - inputs { - input_layer_name: "__repeat_layer_1__" - proj_conf { - type: "identity" - name: "___mixed_3__.w1" - input_size: 100 - output_size: 100 - } - } -} -input_layer_names: "data_2" -input_layer_names: "data" -output_layer_names: "__mixed_3__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__exp_0__" - layer_names: "__sqrt_0__" - layer_names: "__reciprocal_0__" - layer_names: "__log_0__" - layer_names: "__abs_0__" - layer_names: "__sigmoid_0__" - layer_names: "__tanh_0__" - layer_names: "__square_0__" - layer_names: "__relu_0__" - layer_names: "__slope_intercept_layer_0__" - layer_names: "__slope_intercept_layer_1__" - layer_names: "__mixed_0__" - layer_names: "__slope_intercept_layer_2__" - layer_names: "__mixed_1__" - layer_names: "__slope_intercept_layer_3__" - layer_names: "__slope_intercept_layer_4__" - layer_names: "__slope_intercept_layer_5__" - layer_names: "__slope_intercept_layer_6__" - layer_names: "__slope_intercept_layer_7__" - layer_names: "data_2" - layer_names: "__scaling_layer_0__" - layer_names: "__scaling_layer_1__" - layer_names: "__repeat_layer_0__" - layer_names: "__mixed_2__" - layer_names: "__repeat_layer_1__" - layer_names: "__mixed_3__" - input_layer_names: "data_2" - input_layer_names: "data" - output_layer_names: "__mixed_3__" - is_recurrent_layer_group: false -} diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr deleted file mode 100644 index d8bd7b9dfb71a392d0dc53872a0d72f47530530f..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr +++ /dev/null @@ -1,466 +0,0 @@ -type: "nn" -layers { - name: "test" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "__embedding_0__" - type: "mixed" - size: 256 - active_type: "" - inputs { - input_layer_name: "test" - input_parameter_name: "___embedding_0__.w0" - proj_conf { - type: "table" - name: "___embedding_0__.w0" - input_size: 100 - output_size: 256 - } - } -} -layers { - name: "__mixed_0__" - type: "mixed" - size: 100 - active_type: "" - inputs { - input_layer_name: "__embedding_0__" - input_parameter_name: "___mixed_0__.w0" - proj_conf { - type: "fc" - name: "___mixed_0__.w0" - input_size: 256 - output_size: 100 - } - } -} -layers { - name: "__mixed_1__" - type: "mixed" - size: 100 - active_type: "" - inputs { - input_layer_name: "__mixed_0__" - input_parameter_name: "___mixed_1__.w0" - proj_conf { - type: "table" - name: "___mixed_1__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__mixed_2__" - type: "mixed" - size: 100 - active_type: "" - inputs { - input_layer_name: "__mixed_1__" - proj_conf { - type: "identity" - name: "___mixed_2__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__mixed_3__" - type: "mixed" - size: 100 - active_type: "" - inputs { - input_layer_name: "__mixed_2__" - input_parameter_name: "___mixed_3__.w0" - proj_conf { - type: "dot_mul" - name: "___mixed_3__.w0" - input_size: 100 - output_size: 100 - } - } -} -layers { - name: "__mixed_4__" - type: "mixed" - size: 300 - active_type: "" - inputs { - input_layer_name: "__mixed_3__" - input_parameter_name: "___mixed_4__.w0" - proj_conf { - type: "context" - name: "___mixed_4__.w0" - input_size: 100 - output_size: 300 - context_start: -1 - context_length: 3 - trainable_padding: true - } - } -} -layers { - name: "__mixed_5__" - type: "mixed" - size: 100 - active_type: "" - inputs { - input_layer_name: "__mixed_2__" - } - inputs { - input_layer_name: "__mixed_2__" - input_parameter_name: "___mixed_5__.w1" - proj_conf { - type: "scaling" - name: "___mixed_5__.w1" - input_size: 100 - output_size: 100 - } - } - inputs { - input_layer_name: "__mixed_3__" - } - operator_confs { - type: "dot_mul" - input_indices: 0 - input_indices: 2 - input_sizes: 100 - input_sizes: 100 - output_size: 100 - dotmul_scale: 1 - } -} -layers { - name: "img" - type: "data" - size: 1024 - active_type: "" -} -layers { - name: "filter" - type: "data" - size: 576 - active_type: "" -} -layers { - name: "__mixed_6__" - type: "mixed" - size: 57600 - active_type: "" - inputs { - input_layer_name: "img" - } - inputs { - input_layer_name: "img" - input_parameter_name: "___mixed_6__.w1" - proj_conf { - type: "conv" - name: "___mixed_6__.w1" - input_size: 1024 - output_size: 57600 - conv_conf { - filter_size: 3 - channels: 1 - stride: 1 - padding: 0 - groups: 1 - filter_channels: 1 - output_x: 30 - img_size: 32 - caffe_mode: true - filter_size_y: 3 - padding_y: 0 - stride_y: 1 - output_y: 30 - img_size_y: 32 - } - num_filters: 64 - } - } - inputs { - input_layer_name: "filter" - } - operator_confs { - type: "conv" - input_indices: 0 - input_indices: 2 - input_sizes: 1024 - input_sizes: 576 - output_size: 57600 - conv_conf { - filter_size: 3 - channels: 1 - stride: 1 - padding: 0 - groups: 1 - filter_channels: 1 - output_x: 30 - img_size: 32 - caffe_mode: true - filter_size_y: 3 - padding_y: 0 - stride_y: 1 - output_y: 30 - img_size_y: 32 - } - num_filters: 64 - } -} -layers { - name: "__mixed_7__" - type: "mixed" - size: 254016 - active_type: "" - inputs { - input_layer_name: "img" - } - inputs { - input_layer_name: "img" - input_parameter_name: "___mixed_7__.w1" - proj_conf { - type: "convt" - name: "___mixed_7__.w1" - input_size: 1024 - output_size: 254016 - conv_conf { - filter_size: 3 - channels: 1 - stride: 2 - padding: 1 - groups: 1 - filter_channels: 64 - output_x: 32 - img_size: 63 - caffe_mode: true - filter_size_y: 3 - padding_y: 1 - stride_y: 2 - output_y: 32 - img_size_y: 63 - } - num_filters: 64 - } - } - inputs { - input_layer_name: "filter" - } - operator_confs { - type: "convt" - input_indices: 0 - input_indices: 2 - input_sizes: 1024 - input_sizes: 576 - output_size: 254016 - conv_conf { - filter_size: 3 - channels: 1 - stride: 2 - padding: 1 - groups: 1 - filter_channels: 64 - output_x: 32 - img_size: 63 - caffe_mode: true - filter_size_y: 3 - padding_y: 1 - stride_y: 2 - output_y: 32 - img_size_y: 63 - } - num_filters: 64 - } -} -layers { - name: "__mixed_8__" - type: "mixed" - size: 100 - active_type: "" - inputs { - input_layer_name: "__mixed_4__" - input_parameter_name: "___mixed_8__.w0" - proj_conf { - type: "fc" - name: "___mixed_8__.w0" - input_size: 300 - output_size: 100 - } - } - inputs { - input_layer_name: "__mixed_5__" - input_parameter_name: "___mixed_8__.w1" - proj_conf { - type: "trans_fc" - name: "___mixed_8__.w1" - input_size: 100 - output_size: 100 - } - } - inputs { - input_layer_name: "__mixed_6__" - input_parameter_name: "___mixed_8__.w2" - proj_conf { - type: "fc" - name: "___mixed_8__.w2" - input_size: 57600 - output_size: 100 - } - } - inputs { - input_layer_name: "__mixed_7__" - input_parameter_name: "___mixed_8__.w3" - proj_conf { - type: "fc" - name: "___mixed_8__.w3" - input_size: 254016 - output_size: 100 - } - } - drop_rate: 0.5 - error_clipping_threshold: 40.0 -} -parameters { - name: "___embedding_0__.w0" - size: 25600 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 256 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___mixed_0__.w0" - size: 25600 - initial_mean: 0.0 - initial_std: 0.0625 - dims: 256 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___mixed_1__.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___mixed_3__.w0" - size: 100 - initial_mean: 0.0 - initial_std: 1.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___mixed_4__.w0" - size: 200 - initial_mean: 0.0 - initial_std: 0.0 - dims: 2 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___mixed_5__.w1" - size: 1 - initial_mean: 0.0 - initial_std: 1.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___mixed_6__.w1" - size: 576 - initial_mean: 0.0 - initial_std: 0.471404520791 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___mixed_7__.w1" - size: 576 - initial_mean: 0.0 - initial_std: 0.471404520791 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___mixed_8__.w0" - size: 30000 - initial_mean: 0.0 - initial_std: 0.057735026919 - dims: 300 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___mixed_8__.w1" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___mixed_8__.w2" - size: 5760000 - initial_mean: 0.0 - initial_std: 0.00416666666667 - dims: 57600 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___mixed_8__.w3" - size: 25401600 - initial_mean: 0.0 - initial_std: 0.00198412698413 - dims: 254016 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -input_layer_names: "test" -input_layer_names: "img" -input_layer_names: "filter" -output_layer_names: "__mixed_8__" -sub_models { - name: "root" - layer_names: "test" - layer_names: "__embedding_0__" - layer_names: "__mixed_0__" - layer_names: "__mixed_1__" - layer_names: "__mixed_2__" - layer_names: "__mixed_3__" - layer_names: "__mixed_4__" - layer_names: "__mixed_5__" - layer_names: "img" - layer_names: "filter" - layer_names: "__mixed_6__" - layer_names: "__mixed_7__" - layer_names: "__mixed_8__" - input_layer_names: "test" - input_layer_names: "img" - input_layer_names: "filter" - output_layer_names: "__mixed_8__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr deleted file mode 100644 index 3e8633b0798318bfc50988dbd329256629d5176c..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr +++ /dev/null @@ -1,125 +0,0 @@ -type: "nn" -layers { - name: "feature_a" - type: "data" - size: 200 - active_type: "" -} -layers { - name: "feature_b" - type: "data" - size: 200 - active_type: "" -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 200 - active_type: "tanh" - inputs { - input_layer_name: "feature_a" - input_parameter_name: "fc_param" - } - bias_parameter_name: "bias_param" -} -layers { - name: "__fc_layer_1__" - type: "fc" - size: 200 - active_type: "tanh" - inputs { - input_layer_name: "feature_b" - input_parameter_name: "fc_param" - } - bias_parameter_name: "bias_param" -} -layers { - name: "__fc_layer_2__" - type: "fc" - size: 10 - active_type: "softmax" - inputs { - input_layer_name: "__fc_layer_0__" - input_parameter_name: "softmax_param" - } - inputs { - input_layer_name: "__fc_layer_1__" - input_parameter_name: "softmax_param" - } -} -layers { - name: "label" - type: "data" - size: 10 - active_type: "" -} -layers { - name: "__cost_0__" - type: "multi-class-cross-entropy" - size: 1 - active_type: "" - inputs { - input_layer_name: "__fc_layer_2__" - } - inputs { - input_layer_name: "label" - } - coeff: 1.0 -} -parameters { - name: "fc_param" - size: 40000 - initial_mean: 0.0 - initial_std: 1.0 - dims: 200 - dims: 200 - initial_strategy: 1 - initial_smart: false -} -parameters { - name: "bias_param" - size: 200 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 200 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "softmax_param" - size: 2000 - initial_mean: 0.0 - initial_std: 1.0 - dims: 200 - dims: 10 - initial_strategy: 1 - initial_smart: false -} -input_layer_names: "feature_a" -input_layer_names: "feature_b" -input_layer_names: "label" -output_layer_names: "__cost_0__" -evaluators { - name: "classification_error_evaluator" - type: "classification_error" - input_layers: "__fc_layer_2__" - input_layers: "label" -} -sub_models { - name: "root" - layer_names: "feature_a" - layer_names: "feature_b" - layer_names: "__fc_layer_0__" - layer_names: "__fc_layer_1__" - layer_names: "__fc_layer_2__" - layer_names: "label" - layer_names: "__cost_0__" - input_layer_names: "feature_a" - input_layer_names: "feature_b" - input_layer_names: "label" - output_layer_names: "__cost_0__" - evaluator_names: "classification_error_evaluator" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr deleted file mode 100644 index 7254deb368963914fd1fff7925b6aeedbed59318..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr +++ /dev/null @@ -1,289 +0,0 @@ -type: "recurrent_nn" -layers { - name: "data_a" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "data_b" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "__simple_gru_0___transform" - type: "mixed" - size: 600 - active_type: "" - inputs { - input_layer_name: "data_a" - input_parameter_name: "mixed_param" - proj_conf { - type: "fc" - name: "___simple_gru_0___transform.w0" - input_size: 100 - output_size: 600 - } - } -} -layers { - name: "__simple_gru_0___recurrent_group" - type: "recurrent_layer_group" - active_type: "" -} -layers { - name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group" - type: "scatter_agent" - size: 600 - active_type: "" -} -layers { - name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group" - type: "agent" - size: 200 - active_type: "" -} -layers { - name: "__simple_gru_0__@__simple_gru_0___recurrent_group" - type: "gru_step" - size: 200 - active_type: "tanh" - inputs { - input_layer_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group" - input_parameter_name: "gru_param" - } - inputs { - input_layer_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group" - } - bias_parameter_name: "gru_bias" - active_gate_type: "sigmoid" -} -layers { - name: "__simple_gru_0__" - type: "gather_agent" - size: 200 - active_type: "" -} -layers { - name: "__simple_gru_1___transform" - type: "mixed" - size: 600 - active_type: "" - inputs { - input_layer_name: "data_b" - input_parameter_name: "mixed_param" - proj_conf { - type: "fc" - name: "___simple_gru_1___transform.w0" - input_size: 100 - output_size: 600 - } - } -} -layers { - name: "__simple_gru_1___recurrent_group" - type: "recurrent_layer_group" - active_type: "" -} -layers { - name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group" - type: "scatter_agent" - size: 600 - active_type: "" -} -layers { - name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group" - type: "agent" - size: 200 - active_type: "" -} -layers { - name: "__simple_gru_1__@__simple_gru_1___recurrent_group" - type: "gru_step" - size: 200 - active_type: "tanh" - inputs { - input_layer_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group" - input_parameter_name: "gru_param" - } - inputs { - input_layer_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group" - } - bias_parameter_name: "gru_bias" - active_gate_type: "sigmoid" -} -layers { - name: "__simple_gru_1__" - type: "gather_agent" - size: 200 - active_type: "" -} -layers { - name: "__last_seq_0__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "__simple_gru_0__" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__last_seq_1__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "__simple_gru_1__" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 10 - active_type: "softmax" - inputs { - input_layer_name: "__last_seq_0__" - input_parameter_name: "softmax_param" - } - inputs { - input_layer_name: "__last_seq_1__" - input_parameter_name: "softmax_param" - } -} -layers { - name: "label" - type: "data" - size: 10 - active_type: "" -} -layers { - name: "__cost_0__" - type: "multi-class-cross-entropy" - size: 1 - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - } - inputs { - input_layer_name: "label" - } - coeff: 1.0 -} -parameters { - name: "mixed_param" - size: 60000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 600 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "gru_param" - size: 120000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 600 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "gru_bias" - size: 600 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 600 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "softmax_param" - size: 2000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 10 - initial_strategy: 0 - initial_smart: true -} -input_layer_names: "data_a" -input_layer_names: "data_b" -input_layer_names: "label" -output_layer_names: "__cost_0__" -evaluators { - name: "classification_error_evaluator" - type: "classification_error" - input_layers: "__fc_layer_0__" - input_layers: "label" -} -sub_models { - name: "root" - layer_names: "data_a" - layer_names: "data_b" - layer_names: "__simple_gru_0___transform" - layer_names: "__simple_gru_0___recurrent_group" - layer_names: "__simple_gru_0__" - layer_names: "__simple_gru_1___transform" - layer_names: "__simple_gru_1___recurrent_group" - layer_names: "__simple_gru_1__" - layer_names: "__last_seq_0__" - layer_names: "__last_seq_1__" - layer_names: "__fc_layer_0__" - layer_names: "label" - layer_names: "__cost_0__" - input_layer_names: "data_a" - input_layer_names: "data_b" - input_layer_names: "label" - output_layer_names: "__cost_0__" - evaluator_names: "classification_error_evaluator" - is_recurrent_layer_group: false -} -sub_models { - name: "__simple_gru_0___recurrent_group" - layer_names: "__simple_gru_0___transform@__simple_gru_0___recurrent_group" - layer_names: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group" - layer_names: "__simple_gru_0__@__simple_gru_0___recurrent_group" - is_recurrent_layer_group: true - reversed: false - memories { - layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group" - link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group" - } - in_links { - layer_name: "__simple_gru_0___transform" - link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group" - } - out_links { - layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group" - link_name: "__simple_gru_0__" - } -} -sub_models { - name: "__simple_gru_1___recurrent_group" - layer_names: "__simple_gru_1___transform@__simple_gru_1___recurrent_group" - layer_names: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group" - layer_names: "__simple_gru_1__@__simple_gru_1___recurrent_group" - is_recurrent_layer_group: true - reversed: false - memories { - layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group" - link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group" - } - in_links { - layer_name: "__simple_gru_1___transform" - link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group" - } - out_links { - layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group" - link_name: "__simple_gru_1__" - } -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr deleted file mode 100644 index 75cf2312032e187dafc66199e933d3ad0fa33050..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr +++ /dev/null @@ -1,385 +0,0 @@ -type: "recurrent_nn" -layers { - name: "data_a" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "data_b" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "__mixed_0__" - type: "mixed" - size: 400 - active_type: "" - inputs { - input_layer_name: "data_a" - input_parameter_name: "mixed_param" - proj_conf { - type: "fc" - name: "___mixed_0__.w0" - input_size: 100 - output_size: 400 - } - } -} -layers { - name: "__mixed_1__" - type: "mixed" - size: 400 - active_type: "" - inputs { - input_layer_name: "data_b" - input_parameter_name: "mixed_param" - proj_conf { - type: "fc" - name: "___mixed_1__.w0" - input_size: 100 - output_size: 400 - } - } -} -layers { - name: "__lstm_group_0___recurrent_group" - type: "recurrent_layer_group" - active_type: "" -} -layers { - name: "__mixed_0__@__lstm_group_0___recurrent_group" - type: "scatter_agent" - size: 400 - active_type: "" -} -layers { - name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" - type: "agent" - size: 100 - active_type: "" -} -layers { - name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" - type: "agent" - size: 100 - active_type: "" -} -layers { - name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" - type: "mixed" - size: 400 - active_type: "" - inputs { - input_layer_name: "__mixed_0__@__lstm_group_0___recurrent_group" - proj_conf { - type: "identity" - name: "___lstm_group_0___input_recurrent.w0" - input_size: 400 - output_size: 400 - } - } - inputs { - input_layer_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" - input_parameter_name: "lstm_param" - proj_conf { - type: "fc" - name: "___lstm_group_0___input_recurrent.w1" - input_size: 100 - output_size: 400 - } - } -} -layers { - name: "__lstm_group_0__@__lstm_group_0___recurrent_group" - type: "lstm_step" - size: 100 - active_type: "tanh" - inputs { - input_layer_name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" - } - inputs { - input_layer_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" - } - bias_parameter_name: "lstm_bias" - active_gate_type: "sigmoid" - active_state_type: "tanh" -} -layers { - name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" - type: "get_output" - size: 100 - active_type: "" - inputs { - input_layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" - input_layer_argument: "state" - } -} -layers { - name: "__lstm_group_0__" - type: "gather_agent" - size: 100 - active_type: "" -} -layers { - name: "__lstm_group_1___recurrent_group" - type: "recurrent_layer_group" - active_type: "" -} -layers { - name: "__mixed_1__@__lstm_group_1___recurrent_group" - type: "scatter_agent" - size: 400 - active_type: "" -} -layers { - name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group" - type: "agent" - size: 100 - active_type: "" -} -layers { - name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group" - type: "agent" - size: 100 - active_type: "" -} -layers { - name: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group" - type: "mixed" - size: 400 - active_type: "" - inputs { - input_layer_name: "__mixed_1__@__lstm_group_1___recurrent_group" - proj_conf { - type: "identity" - name: "___lstm_group_1___input_recurrent.w0" - input_size: 400 - output_size: 400 - } - } - inputs { - input_layer_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group" - input_parameter_name: "lstm_param" - proj_conf { - type: "fc" - name: "___lstm_group_1___input_recurrent.w1" - input_size: 100 - output_size: 400 - } - } -} -layers { - name: "__lstm_group_1__@__lstm_group_1___recurrent_group" - type: "lstm_step" - size: 100 - active_type: "tanh" - inputs { - input_layer_name: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group" - } - inputs { - input_layer_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group" - } - bias_parameter_name: "lstm_bias" - active_gate_type: "sigmoid" - active_state_type: "tanh" -} -layers { - name: "__lstm_group_1___state@__lstm_group_1___recurrent_group" - type: "get_output" - size: 100 - active_type: "" - inputs { - input_layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group" - input_layer_argument: "state" - } -} -layers { - name: "__lstm_group_1__" - type: "gather_agent" - size: 100 - active_type: "" -} -layers { - name: "__last_seq_0__" - type: "seqlastins" - size: 100 - active_type: "" - inputs { - input_layer_name: "__lstm_group_0__" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__last_seq_1__" - type: "seqlastins" - size: 100 - active_type: "" - inputs { - input_layer_name: "__lstm_group_1__" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 10 - active_type: "softmax" - inputs { - input_layer_name: "__last_seq_0__" - input_parameter_name: "softmax_param" - } - inputs { - input_layer_name: "__last_seq_1__" - input_parameter_name: "softmax_param" - } -} -layers { - name: "label" - type: "data" - size: 10 - active_type: "" -} -layers { - name: "__cost_0__" - type: "multi-class-cross-entropy" - size: 1 - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - } - inputs { - input_layer_name: "label" - } - coeff: 1.0 -} -parameters { - name: "mixed_param" - size: 40000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 400 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "lstm_param" - size: 40000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 400 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "lstm_bias" - size: 300 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 300 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "softmax_param" - size: 1000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 10 - initial_strategy: 0 - initial_smart: true -} -input_layer_names: "data_a" -input_layer_names: "data_b" -input_layer_names: "label" -output_layer_names: "__cost_0__" -evaluators { - name: "classification_error_evaluator" - type: "classification_error" - input_layers: "__fc_layer_0__" - input_layers: "label" -} -sub_models { - name: "root" - layer_names: "data_a" - layer_names: "data_b" - layer_names: "__mixed_0__" - layer_names: "__mixed_1__" - layer_names: "__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0__" - layer_names: "__lstm_group_1___recurrent_group" - layer_names: "__lstm_group_1__" - layer_names: "__last_seq_0__" - layer_names: "__last_seq_1__" - layer_names: "__fc_layer_0__" - layer_names: "label" - layer_names: "__cost_0__" - input_layer_names: "data_a" - input_layer_names: "data_b" - input_layer_names: "label" - output_layer_names: "__cost_0__" - evaluator_names: "classification_error_evaluator" - is_recurrent_layer_group: false -} -sub_models { - name: "__lstm_group_0___recurrent_group" - layer_names: "__mixed_0__@__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0__@__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0___state@__lstm_group_0___recurrent_group" - is_recurrent_layer_group: true - reversed: false - memories { - layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" - link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" - } - memories { - layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" - link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" - } - in_links { - layer_name: "__mixed_0__" - link_name: "__mixed_0__@__lstm_group_0___recurrent_group" - } - out_links { - layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" - link_name: "__lstm_group_0__" - } -} -sub_models { - name: "__lstm_group_1___recurrent_group" - layer_names: "__mixed_1__@__lstm_group_1___recurrent_group" - layer_names: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group" - layer_names: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group" - layer_names: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group" - layer_names: "__lstm_group_1__@__lstm_group_1___recurrent_group" - layer_names: "__lstm_group_1___state@__lstm_group_1___recurrent_group" - is_recurrent_layer_group: true - reversed: false - memories { - layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group" - link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group" - } - memories { - layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group" - link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group" - } - in_links { - layer_name: "__mixed_1__" - link_name: "__mixed_1__@__lstm_group_1___recurrent_group" - } - out_links { - layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group" - link_name: "__lstm_group_1__" - } -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr deleted file mode 100644 index 0d51f70ee01b913051f7d20547f68a22663200a0..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr +++ /dev/null @@ -1,424 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 200 - active_type: "" -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 200 - active_type: "sigmoid" - inputs { - input_layer_name: "data" - input_parameter_name: "___fc_layer_0__.w0" - } - bias_parameter_name: "___fc_layer_0__.wbias" -} -layers { - name: "__recurrent_layer_0__" - type: "recurrent" - size: 200 - active_type: "sigmoid" - inputs { - input_layer_name: "__fc_layer_0__" - input_parameter_name: "___recurrent_layer_0__.w0" - } - bias_parameter_name: "___recurrent_layer_0__.wbias" - reversed: false -} -layers { - name: "__recurrent_layer_1__" - type: "recurrent" - size: 200 - active_type: "sigmoid" - inputs { - input_layer_name: "__fc_layer_0__" - input_parameter_name: "___recurrent_layer_1__.w0" - } - bias_parameter_name: "___recurrent_layer_1__.wbias" - reversed: true -} -layers { - name: "__fc_layer_1__" - type: "fc" - size: 800 - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - input_parameter_name: "___fc_layer_1__.w0" - } -} -layers { - name: "__lstmemory_0__" - type: "lstmemory" - size: 200 - active_type: "sigmoid" - inputs { - input_layer_name: "__fc_layer_1__" - input_parameter_name: "___lstmemory_0__.w0" - } - bias_parameter_name: "___lstmemory_0__.wbias" - reversed: false - active_gate_type: "sigmoid" - active_state_type: "tanh" -} -layers { - name: "__fc_layer_2__" - type: "fc" - size: 800 - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - input_parameter_name: "___fc_layer_2__.w0" - } -} -layers { - name: "__lstmemory_1__" - type: "lstmemory" - size: 200 - active_type: "sigmoid" - inputs { - input_layer_name: "__fc_layer_2__" - input_parameter_name: "___lstmemory_1__.w0" - } - bias_parameter_name: "___lstmemory_1__.wbias" - reversed: true - active_gate_type: "sigmoid" - active_state_type: "tanh" -} -layers { - name: "__fc_layer_3__" - type: "fc" - size: 600 - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - input_parameter_name: "___fc_layer_3__.w0" - } -} -layers { - name: "__gru_0__" - type: "gated_recurrent" - size: 200 - active_type: "sigmoid" - inputs { - input_layer_name: "__fc_layer_3__" - input_parameter_name: "___gru_0__.w0" - } - bias_parameter_name: "___gru_0__.wbias" - reversed: false - active_gate_type: "sigmoid" -} -layers { - name: "__fc_layer_4__" - type: "fc" - size: 600 - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - input_parameter_name: "___fc_layer_4__.w0" - } -} -layers { - name: "__gru_1__" - type: "gated_recurrent" - size: 200 - active_type: "sigmoid" - inputs { - input_layer_name: "__fc_layer_4__" - input_parameter_name: "___gru_1__.w0" - } - bias_parameter_name: "___gru_1__.wbias" - reversed: true - active_gate_type: "sigmoid" -} -layers { - name: "__last_seq_0__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "__recurrent_layer_0__" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__first_seq_0__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "__recurrent_layer_1__" - } - select_first: true - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__last_seq_1__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "__lstmemory_0__" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__first_seq_1__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "__lstmemory_1__" - } - select_first: true - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__last_seq_2__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "__gru_0__" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__first_seq_2__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "__gru_1__" - } - select_first: true - trans_type: "non-seq" - seq_pool_stride: -1 -} -parameters { - name: "___fc_layer_0__.w0" - size: 40000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 200 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_0__.wbias" - size: 200 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 200 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___recurrent_layer_0__.w0" - size: 40000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 200 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___recurrent_layer_0__.wbias" - size: 200 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 200 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___recurrent_layer_1__.w0" - size: 40000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 200 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___recurrent_layer_1__.wbias" - size: 200 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 200 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___fc_layer_1__.w0" - size: 160000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 800 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___lstmemory_0__.w0" - size: 160000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 200 - dims: 4 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___lstmemory_0__.wbias" - size: 1400 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1400 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___fc_layer_2__.w0" - size: 160000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 800 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___lstmemory_1__.w0" - size: 160000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 200 - dims: 4 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___lstmemory_1__.wbias" - size: 1400 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1400 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___fc_layer_3__.w0" - size: 120000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 600 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___gru_0__.w0" - size: 120000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 600 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___gru_0__.wbias" - size: 600 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 600 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___fc_layer_4__.w0" - size: 120000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 600 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___gru_1__.w0" - size: 120000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 600 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___gru_1__.wbias" - size: 600 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 600 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -output_layer_names: "__last_seq_0__" -output_layer_names: "__first_seq_0__" -output_layer_names: "__last_seq_1__" -output_layer_names: "__first_seq_1__" -output_layer_names: "__last_seq_2__" -output_layer_names: "__first_seq_2__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__fc_layer_0__" - layer_names: "__recurrent_layer_0__" - layer_names: "__recurrent_layer_1__" - layer_names: "__fc_layer_1__" - layer_names: "__lstmemory_0__" - layer_names: "__fc_layer_2__" - layer_names: "__lstmemory_1__" - layer_names: "__fc_layer_3__" - layer_names: "__gru_0__" - layer_names: "__fc_layer_4__" - layer_names: "__gru_1__" - layer_names: "__last_seq_0__" - layer_names: "__first_seq_0__" - layer_names: "__last_seq_1__" - layer_names: "__first_seq_1__" - layer_names: "__last_seq_2__" - layer_names: "__first_seq_2__" - input_layer_names: "data" - output_layer_names: "__last_seq_0__" - output_layer_names: "__first_seq_0__" - output_layer_names: "__last_seq_1__" - output_layer_names: "__first_seq_1__" - output_layer_names: "__last_seq_2__" - output_layer_names: "__first_seq_2__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr deleted file mode 100644 index 9b69ae4a3b3cbcc7c0c69a2d5b3728e2f0204f33..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr +++ /dev/null @@ -1,93 +0,0 @@ -type: "nn" -layers { - name: "data3D" - type: "data" - size: 360 - active_type: "" - height: 6 - width: 20 - depth: 3 -} -layers { - name: "__batch_norm_0__" - type: "batch_norm" - size: 360 - active_type: "relu" - inputs { - input_layer_name: "data3D" - input_parameter_name: "___batch_norm_0__.w0" - image_conf { - channels: 1 - img_size: 20 - img_size_y: 6 - img_size_z: 3 - } - } - inputs { - input_layer_name: "data3D" - input_parameter_name: "___batch_norm_0__.w1" - } - inputs { - input_layer_name: "data3D" - input_parameter_name: "___batch_norm_0__.w2" - } - bias_parameter_name: "___batch_norm_0__.wbias" - moving_average_fraction: 0.9 - height: 6 - width: 20 - depth: 3 - epsilon: 1e-05 -} -parameters { - name: "___batch_norm_0__.w0" - size: 1 - initial_mean: 1.0 - initial_std: 0.0 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___batch_norm_0__.w1" - size: 1 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: false - is_static: true - is_shared: true -} -parameters { - name: "___batch_norm_0__.w2" - size: 1 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: false - is_static: true - is_shared: true -} -parameters { - name: "___batch_norm_0__.wbias" - size: 1 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data3D" -output_layer_names: "__batch_norm_0__" -sub_models { - name: "root" - layer_names: "data3D" - layer_names: "__batch_norm_0__" - input_layer_names: "data3D" - output_layer_names: "__batch_norm_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr deleted file mode 100644 index 8a1399efad0ff339e35f69400ac654a4787a6018..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr +++ /dev/null @@ -1,155 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 120 - active_type: "" -} -layers { - name: "__bidirectional_gru_0___fw_transform" - type: "mixed" - size: 120 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "___bidirectional_gru_0___fw_transform.w0" - proj_conf { - type: "fc" - name: "___bidirectional_gru_0___fw_transform.w0" - input_size: 120 - output_size: 120 - } - } -} -layers { - name: "__bidirectional_gru_0___fw" - type: "gated_recurrent" - size: 40 - active_type: "tanh" - inputs { - input_layer_name: "__bidirectional_gru_0___fw_transform" - input_parameter_name: "___bidirectional_gru_0___fw.w0" - } - bias_parameter_name: "___bidirectional_gru_0___fw.wbias" - reversed: false - active_gate_type: "sigmoid" -} -layers { - name: "__bidirectional_gru_0___bw_transform" - type: "mixed" - size: 120 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "___bidirectional_gru_0___bw_transform.w0" - proj_conf { - type: "fc" - name: "___bidirectional_gru_0___bw_transform.w0" - input_size: 120 - output_size: 120 - } - } -} -layers { - name: "__bidirectional_gru_0___bw" - type: "gated_recurrent" - size: 40 - active_type: "tanh" - inputs { - input_layer_name: "__bidirectional_gru_0___bw_transform" - input_parameter_name: "___bidirectional_gru_0___bw.w0" - } - bias_parameter_name: "___bidirectional_gru_0___bw.wbias" - reversed: true - active_gate_type: "sigmoid" -} -layers { - name: "__bidirectional_gru_0__" - type: "concat" - size: 80 - active_type: "" - inputs { - input_layer_name: "__bidirectional_gru_0___fw" - } - inputs { - input_layer_name: "__bidirectional_gru_0___bw" - } - height: 0 - width: 0 - depth: 1 -} -parameters { - name: "___bidirectional_gru_0___fw_transform.w0" - size: 14400 - initial_mean: 0.0 - initial_std: 0.0912870929175 - dims: 120 - dims: 120 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___bidirectional_gru_0___fw.w0" - size: 4800 - initial_mean: 0.0 - initial_std: 0.158113883008 - dims: 40 - dims: 120 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___bidirectional_gru_0___fw.wbias" - size: 120 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 120 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___bidirectional_gru_0___bw_transform.w0" - size: 14400 - initial_mean: 0.0 - initial_std: 0.0912870929175 - dims: 120 - dims: 120 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___bidirectional_gru_0___bw.w0" - size: 4800 - initial_mean: 0.0 - initial_std: 0.158113883008 - dims: 40 - dims: 120 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___bidirectional_gru_0___bw.wbias" - size: 120 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 120 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -output_layer_names: "__bidirectional_gru_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__bidirectional_gru_0___fw_transform" - layer_names: "__bidirectional_gru_0___fw" - layer_names: "__bidirectional_gru_0___bw_transform" - layer_names: "__bidirectional_gru_0___bw" - layer_names: "__bidirectional_gru_0__" - input_layer_names: "data" - output_layer_names: "__bidirectional_gru_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr deleted file mode 100644 index 25ec6323751fae5778657945a765d8ca162ee2c4..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr +++ /dev/null @@ -1,137 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 2304 - active_type: "" -} -layers { - name: "__conv_0__" - type: "exconv" - size: 36864 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "___conv_0__.w0" - conv_conf { - filter_size: 3 - channels: 1 - stride: 1 - padding: 1 - groups: 1 - filter_channels: 1 - output_x: 48 - img_size: 48 - caffe_mode: true - filter_size_y: 3 - padding_y: 1 - stride_y: 1 - output_y: 48 - img_size_y: 48 - dilation: 1 - dilation_y: 1 - } - } - bias_parameter_name: "___conv_0__.wbias" - num_filters: 16 - shared_biases: true - height: 48 - width: 48 -} -layers { - name: "__bilinear_interp_layer_0__" - type: "bilinear_interp" - size: 65536 - active_type: "" - inputs { - input_layer_name: "__conv_0__" - bilinear_interp_conf { - image_conf { - channels: 16 - img_size: 48 - img_size_y: 48 - } - out_size_x: 64 - out_size_y: 64 - } - } - height: 64 - width: 64 -} -layers { - name: "__pool_0__" - type: "pool" - size: 16384 - active_type: "" - inputs { - input_layer_name: "__bilinear_interp_layer_0__" - pool_conf { - pool_type: "max-projection" - channels: 16 - size_x: 2 - stride: 2 - output_x: 32 - img_size: 64 - padding: 0 - size_y: 2 - stride_y: 2 - output_y: 32 - img_size_y: 64 - padding_y: 0 - } - } - height: 32 - width: 32 -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 384 - active_type: "tanh" - inputs { - input_layer_name: "__pool_0__" - input_parameter_name: "___fc_layer_0__.w0" - } -} -parameters { - name: "___conv_0__.w0" - size: 144 - initial_mean: 0.0 - initial_std: 0.471404520791 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___conv_0__.wbias" - size: 16 - initial_mean: 0.0 - initial_std: 0.0 - dims: 16 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___fc_layer_0__.w0" - size: 6291456 - initial_mean: 0.0 - initial_std: 0.0078125 - dims: 16384 - dims: 384 - initial_strategy: 0 - initial_smart: true -} -input_layer_names: "data" -output_layer_names: "__fc_layer_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__conv_0__" - layer_names: "__bilinear_interp_layer_0__" - layer_names: "__pool_0__" - layer_names: "__fc_layer_0__" - input_layer_names: "data" - output_layer_names: "__fc_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr deleted file mode 100644 index 4b9578a0c050ef74f186485fec3f6c1f7a0f0814..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr +++ /dev/null @@ -1,31 +0,0 @@ -type: "nn" -layers { - name: "input" - type: "data" - size: 300 - active_type: "" -} -layers { - name: "__clip_0__" - type: "clip" - size: 300 - active_type: "" - inputs { - input_layer_name: "input" - clip_conf { - min: -10 - max: 10 - } - } -} -input_layer_names: "input" -output_layer_names: "__clip_0__" -sub_models { - name: "root" - layer_names: "input" - layer_names: "__clip_0__" - input_layer_names: "input" - output_layer_names: "__clip_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr deleted file mode 100644 index 9fe2bc29d3cd06231b67102e28f7a49c28306958..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr +++ /dev/null @@ -1,132 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 36288 - active_type: "" - height: 48 - width: 42 - depth: 6 -} -layers { - name: "conv3d_1" - type: "conv3d" - size: 24192 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "_conv3d_1.w0" - conv_conf { - filter_size: 3 - channels: 3 - stride: 2 - padding: 1 - groups: 1 - filter_channels: 3 - output_x: 21 - img_size: 42 - caffe_mode: true - filter_size_y: 3 - padding_y: 1 - stride_y: 2 - output_y: 24 - img_size_y: 48 - filter_size_z: 3 - padding_z: 1 - stride_z: 2 - output_z: 3 - img_size_z: 6 - } - } - bias_parameter_name: "_conv3d_1.wbias" - num_filters: 16 - shared_biases: true - height: 24 - width: 21 - depth: 3 -} -layers { - name: "conv3d_2" - type: "conv3d" - size: 24192 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "_conv3d_2.w0" - conv_conf { - filter_size: 3 - channels: 3 - stride: 2 - padding: 1 - groups: 1 - filter_channels: 3 - output_x: 21 - img_size: 42 - caffe_mode: true - filter_size_y: 3 - padding_y: 1 - stride_y: 2 - output_y: 24 - img_size_y: 48 - filter_size_z: 3 - padding_z: 1 - stride_z: 2 - output_z: 3 - img_size_z: 6 - } - } - bias_parameter_name: "_conv3d_2.wbias" - num_filters: 16 - shared_biases: true - height: 24 - width: 21 - depth: 3 -} -parameters { - name: "_conv3d_1.w0" - size: 1296 - initial_mean: 0.0 - initial_std: 0.272165526976 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_conv3d_1.wbias" - size: 16 - initial_mean: 0.0 - initial_std: 0.0 - dims: 16 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_conv3d_2.w0" - size: 1296 - initial_mean: 0.0 - initial_std: 0.272165526976 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_conv3d_2.wbias" - size: 16 - initial_mean: 0.0 - initial_std: 0.0 - dims: 16 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -output_layer_names: "conv3d_2" -sub_models { - name: "root" - layer_names: "data" - layer_names: "conv3d_1" - layer_names: "conv3d_2" - input_layer_names: "data" - output_layer_names: "conv3d_2" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr deleted file mode 100644 index 55ab464ddf88f55bfb7b93ec0a189d4e53633468..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr +++ /dev/null @@ -1,375 +0,0 @@ -type: "nn" -layers { - name: "input" - type: "data" - size: 200 - active_type: "" -} -layers { - name: "labels" - type: "data" - size: 5000 - active_type: "" -} -layers { - name: "probs" - type: "data" - size: 10 - active_type: "" -} -layers { - name: "xe-label" - type: "data" - size: 10 - active_type: "" -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 4 - active_type: "tanh" - inputs { - input_layer_name: "input" - input_parameter_name: "___fc_layer_0__.w0" - } - bias_parameter_name: "___fc_layer_0__.wbias" -} -layers { - name: "__ctc_layer_0__" - type: "ctc" - size: 5001 - active_type: "" - inputs { - input_layer_name: "input" - } - inputs { - input_layer_name: "labels" - } - norm_by_times: false -} -layers { - name: "__warp_ctc_layer_0__" - type: "warp_ctc" - size: 5001 - active_type: "" - inputs { - input_layer_name: "input" - } - inputs { - input_layer_name: "labels" - } - norm_by_times: false - blank: 0 -} -layers { - name: "crf_label" - type: "data" - size: 4 - active_type: "" -} -layers { - name: "__crf_layer_0__" - type: "crf" - size: 4 - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - input_parameter_name: "___crf_layer_0__.w0" - } - inputs { - input_layer_name: "crf_label" - } - coeff: 1.0 -} -layers { - name: "left" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "right" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "label" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "__rank_cost_0__" - type: "rank-cost" - size: 1 - active_type: "" - inputs { - input_layer_name: "left" - } - inputs { - input_layer_name: "right" - } - inputs { - input_layer_name: "label" - } - coeff: 1.0 -} -layers { - name: "list_feature" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "list_scores" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "__lambda_cost_0__" - type: "lambda_cost" - size: 1 - active_type: "" - inputs { - input_layer_name: "list_feature" - } - inputs { - input_layer_name: "list_scores" - } - NDCG_num: 5 - max_sort_size: -1 -} -layers { - name: "__cross_entropy_0__" - type: "multi-class-cross-entropy" - size: 1 - active_type: "" - inputs { - input_layer_name: "probs" - } - inputs { - input_layer_name: "xe-label" - } - coeff: 1.0 -} -layers { - name: "__cross_entropy_with_selfnorm_0__" - type: "multi_class_cross_entropy_with_selfnorm" - active_type: "" - inputs { - input_layer_name: "probs" - } - inputs { - input_layer_name: "xe-label" - } - softmax_selfnorm_alpha: 0.1 - coeff: 1.0 -} -layers { - name: "__huber_regression_cost_0__" - type: "huber_regression" - size: 1 - active_type: "" - inputs { - input_layer_name: "input" - } - inputs { - input_layer_name: "labels" - } - coeff: 1.0 - delta: 1.0 -} -layers { - name: "huber_probs" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "huber_label" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "__huber_classification_cost_0__" - type: "huber_classification" - size: 1 - active_type: "" - inputs { - input_layer_name: "huber_probs" - } - inputs { - input_layer_name: "huber_label" - } - coeff: 1.0 -} -layers { - name: "__multi_binary_label_cross_entropy_0__" - type: "multi_binary_label_cross_entropy" - size: 1 - active_type: "" - inputs { - input_layer_name: "probs" - } - inputs { - input_layer_name: "xe-label" - } - coeff: 1.0 -} -layers { - name: "__sum_cost_0__" - type: "sum_cost" - size: 1 - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - } - coeff: 1.0 -} -layers { - name: "__nce_layer_0__" - type: "nce" - size: 1 - active_type: "sigmoid" - inputs { - input_layer_name: "__fc_layer_0__" - input_parameter_name: "___nce_layer_0__.w0" - } - inputs { - input_layer_name: "labels" - } - bias_parameter_name: "___nce_layer_0__.wbias" - num_classes: 5000 - num_neg_samples: 10 -} -parameters { - name: "___fc_layer_0__.w0" - size: 800 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 4 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_0__.wbias" - size: 4 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 4 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___crf_layer_0__.w0" - size: 24 - initial_mean: 0.0 - initial_std: 0.408248290464 - dims: 6 - dims: 4 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___nce_layer_0__.w0" - size: 20000 - initial_mean: 0.0 - initial_std: 0.0141421356237 - dims: 5000 - dims: 4 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___nce_layer_0__.wbias" - size: 5000 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 5000 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "input" -input_layer_names: "labels" -input_layer_names: "crf_label" -input_layer_names: "left" -input_layer_names: "right" -input_layer_names: "label" -input_layer_names: "list_feature" -input_layer_names: "list_scores" -input_layer_names: "probs" -input_layer_names: "xe-label" -input_layer_names: "huber_probs" -input_layer_names: "huber_label" -output_layer_names: "__ctc_layer_0__" -output_layer_names: "__warp_ctc_layer_0__" -output_layer_names: "__crf_layer_0__" -output_layer_names: "__rank_cost_0__" -output_layer_names: "__lambda_cost_0__" -output_layer_names: "__cross_entropy_0__" -output_layer_names: "__cross_entropy_with_selfnorm_0__" -output_layer_names: "__huber_regression_cost_0__" -output_layer_names: "__huber_classification_cost_0__" -output_layer_names: "__multi_binary_label_cross_entropy_0__" -output_layer_names: "__sum_cost_0__" -output_layer_names: "__nce_layer_0__" -sub_models { - name: "root" - layer_names: "input" - layer_names: "labels" - layer_names: "probs" - layer_names: "xe-label" - layer_names: "__fc_layer_0__" - layer_names: "__ctc_layer_0__" - layer_names: "__warp_ctc_layer_0__" - layer_names: "crf_label" - layer_names: "__crf_layer_0__" - layer_names: "left" - layer_names: "right" - layer_names: "label" - layer_names: "__rank_cost_0__" - layer_names: "list_feature" - layer_names: "list_scores" - layer_names: "__lambda_cost_0__" - layer_names: "__cross_entropy_0__" - layer_names: "__cross_entropy_with_selfnorm_0__" - layer_names: "__huber_regression_cost_0__" - layer_names: "huber_probs" - layer_names: "huber_label" - layer_names: "__huber_classification_cost_0__" - layer_names: "__multi_binary_label_cross_entropy_0__" - layer_names: "__sum_cost_0__" - layer_names: "__nce_layer_0__" - input_layer_names: "input" - input_layer_names: "labels" - input_layer_names: "crf_label" - input_layer_names: "left" - input_layer_names: "right" - input_layer_names: "label" - input_layer_names: "list_feature" - input_layer_names: "list_scores" - input_layer_names: "probs" - input_layer_names: "xe-label" - input_layer_names: "huber_probs" - input_layer_names: "huber_label" - output_layer_names: "__ctc_layer_0__" - output_layer_names: "__warp_ctc_layer_0__" - output_layer_names: "__crf_layer_0__" - output_layer_names: "__rank_cost_0__" - output_layer_names: "__lambda_cost_0__" - output_layer_names: "__cross_entropy_0__" - output_layer_names: "__cross_entropy_with_selfnorm_0__" - output_layer_names: "__huber_regression_cost_0__" - output_layer_names: "__huber_classification_cost_0__" - output_layer_names: "__multi_binary_label_cross_entropy_0__" - output_layer_names: "__sum_cost_0__" - output_layer_names: "__nce_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr deleted file mode 100644 index cec8a73db66f6091ec971527b3a42aa9e08154eb..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr +++ /dev/null @@ -1,162 +0,0 @@ -type: "nn" -layers { - name: "input" - type: "data" - size: 300 - active_type: "" -} -layers { - name: "label" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "weight" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 10 - active_type: "softmax" - inputs { - input_layer_name: "input" - input_parameter_name: "___fc_layer_0__.w0" - } - bias_parameter_name: "___fc_layer_0__.wbias" -} -layers { - name: "__cost_0__" - type: "multi-class-cross-entropy" - size: 1 - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - } - inputs { - input_layer_name: "label" - } - inputs { - input_layer_name: "weight" - } - coeff: 1.0 -} -layers { - name: "__square_error_cost_0__" - type: "square_error" - size: 1 - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - } - inputs { - input_layer_name: "label" - } - inputs { - input_layer_name: "weight" - } - coeff: 1.0 -} -layers { - name: "multi_class_label" - type: "data" - size: 500 - active_type: "" -} -layers { - name: "__nce_layer_0__" - type: "nce" - size: 1 - active_type: "sigmoid" - inputs { - input_layer_name: "__fc_layer_0__" - input_parameter_name: "___nce_layer_0__.w0" - } - inputs { - input_layer_name: "multi_class_label" - } - inputs { - input_layer_name: "weight" - } - bias_parameter_name: "___nce_layer_0__.wbias" - num_classes: 500 - num_neg_samples: 10 -} -parameters { - name: "___fc_layer_0__.w0" - size: 3000 - initial_mean: 0.0 - initial_std: 0.057735026919 - dims: 300 - dims: 10 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_0__.wbias" - size: 10 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 10 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___nce_layer_0__.w0" - size: 5000 - initial_mean: 0.0 - initial_std: 0.04472135955 - dims: 500 - dims: 10 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___nce_layer_0__.wbias" - size: 500 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 500 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "input" -input_layer_names: "label" -input_layer_names: "weight" -input_layer_names: "multi_class_label" -output_layer_names: "__cost_0__" -output_layer_names: "__square_error_cost_0__" -output_layer_names: "__nce_layer_0__" -evaluators { - name: "classification_error_evaluator" - type: "classification_error" - input_layers: "__fc_layer_0__" - input_layers: "label" - input_layers: "weight" -} -sub_models { - name: "root" - layer_names: "input" - layer_names: "label" - layer_names: "weight" - layer_names: "__fc_layer_0__" - layer_names: "__cost_0__" - layer_names: "__square_error_cost_0__" - layer_names: "multi_class_label" - layer_names: "__nce_layer_0__" - input_layer_names: "input" - input_layer_names: "label" - input_layer_names: "weight" - input_layer_names: "multi_class_label" - output_layer_names: "__cost_0__" - output_layer_names: "__square_error_cost_0__" - output_layer_names: "__nce_layer_0__" - evaluator_names: "classification_error_evaluator" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr deleted file mode 100644 index a602569697e91b11b8d421ac359c2e523a00fa98..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr +++ /dev/null @@ -1,207 +0,0 @@ -type: "nn" -layers { - name: "sentence_states" - type: "data" - size: 32 - active_type: "" -} -layers { - name: "sentence_scores" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "__kmax_seq_score_layer_0__" - type: "kmax_seq_score" - active_type: "" - inputs { - input_layer_name: "sentence_scores" - } - beam_size: 5 -} -layers { - name: "__sub_nested_seq_layer_0__" - type: "sub_nested_seq" - size: 32 - active_type: "" - inputs { - input_layer_name: "sentence_states" - } - inputs { - input_layer_name: "__kmax_seq_score_layer_0__" - } -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 1 - active_type: "" - inputs { - input_layer_name: "__sub_nested_seq_layer_0__" - input_parameter_name: "___fc_layer_0__.w0" - } - bias_parameter_name: "___fc_layer_0__.wbias" -} -layers { - name: "__kmax_seq_score_layer_1__" - type: "kmax_seq_score" - active_type: "" - inputs { - input_layer_name: "sentence_scores" - } - beam_size: 5 -} -layers { - name: "__seq_slice_layer_0__" - type: "seq_slice" - size: 32 - active_type: "" - inputs { - input_layer_name: "__sub_nested_seq_layer_0__" - } - inputs { - input_layer_name: "__kmax_seq_score_layer_1__" - } - select_first: true -} -layers { - name: "__fc_layer_1__" - type: "fc" - size: 1 - active_type: "" - inputs { - input_layer_name: "__seq_slice_layer_0__" - input_parameter_name: "___fc_layer_1__.w0" - } - bias_parameter_name: "___fc_layer_1__.wbias" -} -layers { - name: "__kmax_seq_score_layer_2__" - type: "kmax_seq_score" - active_type: "" - inputs { - input_layer_name: "__fc_layer_1__" - } - beam_size: 5 -} -layers { - name: "sentences_ids" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "start_ids" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "end_ids" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "__cross_entropy_over_beam_0__" - type: "cross_entropy_over_beam" - active_type: "" - inputs { - input_layer_name: "sentence_scores" - } - inputs { - input_layer_name: "__kmax_seq_score_layer_0__" - } - inputs { - input_layer_name: "sentences_ids" - } - inputs { - input_layer_name: "__fc_layer_0__" - } - inputs { - input_layer_name: "__kmax_seq_score_layer_1__" - } - inputs { - input_layer_name: "start_ids" - } - inputs { - input_layer_name: "__fc_layer_1__" - } - inputs { - input_layer_name: "__kmax_seq_score_layer_2__" - } - inputs { - input_layer_name: "end_ids" - } -} -parameters { - name: "___fc_layer_0__.w0" - size: 32 - initial_mean: 0.0 - initial_std: 0.176776695297 - dims: 32 - dims: 1 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_0__.wbias" - size: 1 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___fc_layer_1__.w0" - size: 32 - initial_mean: 0.0 - initial_std: 0.176776695297 - dims: 32 - dims: 1 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_1__.wbias" - size: 1 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "sentence_scores" -input_layer_names: "sentences_ids" -input_layer_names: "sentence_states" -input_layer_names: "start_ids" -input_layer_names: "end_ids" -output_layer_names: "__cross_entropy_over_beam_0__" -sub_models { - name: "root" - layer_names: "sentence_states" - layer_names: "sentence_scores" - layer_names: "__kmax_seq_score_layer_0__" - layer_names: "__sub_nested_seq_layer_0__" - layer_names: "__fc_layer_0__" - layer_names: "__kmax_seq_score_layer_1__" - layer_names: "__seq_slice_layer_0__" - layer_names: "__fc_layer_1__" - layer_names: "__kmax_seq_score_layer_2__" - layer_names: "sentences_ids" - layer_names: "start_ids" - layer_names: "end_ids" - layer_names: "__cross_entropy_over_beam_0__" - input_layer_names: "sentence_scores" - input_layer_names: "sentences_ids" - input_layer_names: "sentence_states" - input_layer_names: "start_ids" - input_layer_names: "end_ids" - output_layer_names: "__cross_entropy_over_beam_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr deleted file mode 100644 index 7bf409731cbf8d5d98341b03c7c09d91fa8328d9..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr +++ /dev/null @@ -1,132 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 36288 - active_type: "" - height: 48 - width: 42 - depth: 6 -} -layers { - name: "deconv3d_1" - type: "deconv3d" - size: 1387760 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "_deconv3d_1.w0" - conv_conf { - filter_size: 3 - channels: 3 - stride: 2 - padding: 1 - groups: 1 - filter_channels: 16 - output_x: 42 - img_size: 83 - caffe_mode: true - filter_size_y: 3 - padding_y: 1 - stride_y: 2 - output_y: 48 - img_size_y: 95 - filter_size_z: 3 - padding_z: 1 - stride_z: 2 - output_z: 6 - img_size_z: 11 - } - } - bias_parameter_name: "_deconv3d_1.wbias" - num_filters: 16 - shared_biases: true - height: 95 - width: 83 - depth: 11 -} -layers { - name: "deconv3d_2" - type: "deconv3d" - size: 1387760 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "_deconv3d_2.w0" - conv_conf { - filter_size: 3 - channels: 3 - stride: 2 - padding: 1 - groups: 1 - filter_channels: 16 - output_x: 42 - img_size: 83 - caffe_mode: true - filter_size_y: 3 - padding_y: 1 - stride_y: 2 - output_y: 48 - img_size_y: 95 - filter_size_z: 3 - padding_z: 1 - stride_z: 2 - output_z: 6 - img_size_z: 11 - } - } - bias_parameter_name: "_deconv3d_2.wbias" - num_filters: 16 - shared_biases: true - height: 95 - width: 83 - depth: 11 -} -parameters { - name: "_deconv3d_1.w0" - size: 6912 - initial_mean: 0.0 - initial_std: 0.272165526976 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_deconv3d_1.wbias" - size: 16 - initial_mean: 0.0 - initial_std: 0.0 - dims: 16 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_deconv3d_2.w0" - size: 6912 - initial_mean: 0.0 - initial_std: 0.272165526976 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_deconv3d_2.wbias" - size: 16 - initial_mean: 0.0 - initial_std: 0.0 - dims: 16 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -output_layer_names: "deconv3d_2" -sub_models { - name: "root" - layer_names: "data" - layer_names: "deconv3d_1" - layer_names: "deconv3d_2" - input_layer_names: "data" - output_layer_names: "deconv3d_2" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr deleted file mode 100644 index 6690f9852a31b1909df7df99720db639eb2a564d..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr +++ /dev/null @@ -1,66 +0,0 @@ -type: "nn" -layers { - name: "input_loc" - type: "data" - size: 16 - active_type: "" - height: 16 - width: 1 -} -layers { - name: "input_conf" - type: "data" - size: 8 - active_type: "" - height: 1 - width: 8 -} -layers { - name: "priorbox" - type: "data" - size: 32 - active_type: "" - height: 4 - width: 8 -} -layers { - name: "test_detection_output" - type: "detection_output" - size: 1400 - active_type: "" - inputs { - input_layer_name: "priorbox" - detection_output_conf { - num_classes: 21 - nms_threshold: 0.45 - nms_top_k: 400 - background_id: 0 - input_num: 1 - keep_top_k: 200 - confidence_threshold: 0.01 - } - } - inputs { - input_layer_name: "input_loc" - } - inputs { - input_layer_name: "input_conf" - } -} -input_layer_names: "priorbox" -input_layer_names: "input_loc" -input_layer_names: "input_conf" -output_layer_names: "test_detection_output" -sub_models { - name: "root" - layer_names: "input_loc" - layer_names: "input_conf" - layer_names: "priorbox" - layer_names: "test_detection_output" - input_layer_names: "priorbox" - input_layer_names: "input_loc" - input_layer_names: "input_conf" - output_layer_names: "test_detection_output" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr deleted file mode 100644 index f1530c382c3d81a82592af2c43c06eb4278e2b4a..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr +++ /dev/null @@ -1,38 +0,0 @@ -type: "nn" -layers { - name: "vector1" - type: "data" - size: 10 - active_type: "" -} -layers { - name: "vector2" - type: "data" - size: 10 - active_type: "" -} -layers { - name: "__dot_prod_layer_0__" - type: "dot_prod" - size: 1 - active_type: "" - inputs { - input_layer_name: "vector1" - } - inputs { - input_layer_name: "vector2" - } -} -input_layer_names: "vector1" -input_layer_names: "vector2" -output_layer_names: "__dot_prod_layer_0__" -sub_models { - name: "root" - layer_names: "vector1" - layer_names: "vector2" - layer_names: "__dot_prod_layer_0__" - input_layer_names: "vector1" - input_layer_names: "vector2" - output_layer_names: "__dot_prod_layer_0__" - is_recurrent_layer_group: false -} diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr deleted file mode 100644 index f4b36052264bc41b4c06826c3b3c1428c103add7..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr +++ /dev/null @@ -1,56 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 30 - active_type: "" -} -layers { - name: "data_seq" - type: "data" - size: 30 - active_type: "" -} -layers { - name: "__expand_layer_0__" - type: "expand" - size: 30 - active_type: "" - inputs { - input_layer_name: "data" - } - inputs { - input_layer_name: "data_seq" - } - trans_type: "seq" -} -layers { - name: "__expand_layer_1__" - type: "expand" - size: 30 - active_type: "" - inputs { - input_layer_name: "data" - } - inputs { - input_layer_name: "data_seq" - } - trans_type: "non-seq" -} -input_layer_names: "data" -input_layer_names: "data_seq" -output_layer_names: "__expand_layer_0__" -output_layer_names: "__expand_layer_1__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "data_seq" - layer_names: "__expand_layer_0__" - layer_names: "__expand_layer_1__" - input_layer_names: "data" - input_layer_names: "data_seq" - output_layer_names: "__expand_layer_0__" - output_layer_names: "__expand_layer_1__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr deleted file mode 100644 index 4f3002b19942ed58970bfd64e5978c1601273992..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr +++ /dev/null @@ -1,39 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 1024 - active_type: "" -} -layers { - name: "__factorization_machine_0__" - type: "factorization_machine" - size: 1 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "___factorization_machine_0__.w0" - } - factor_size: 10 -} -parameters { - name: "___factorization_machine_0__.w0" - size: 10240 - initial_mean: 0.0 - initial_std: 0.03125 - dims: 1024 - dims: 10 - initial_strategy: 0 - initial_smart: true -} -input_layer_names: "data" -output_layer_names: "__factorization_machine_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__factorization_machine_0__" - input_layer_names: "data" - output_layer_names: "__factorization_machine_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr deleted file mode 100644 index 8151898832ded3796fb8c56b201d5ebfca3ce6cb..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr +++ /dev/null @@ -1,98 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "__trans_layer_0__" - type: "trans" - size: 100 - active_type: "" - inputs { - input_layer_name: "data" - } -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 100 - active_type: "tanh" - inputs { - input_layer_name: "__trans_layer_0__" - input_parameter_name: "___fc_layer_0__.w0" - } -} -layers { - name: "mask" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "__selective_fc_layer_0__" - type: "selective_fc" - size: 100 - active_type: "sigmoid" - inputs { - input_layer_name: "data" - input_parameter_name: "___selective_fc_layer_0__.w0" - } - inputs { - input_layer_name: "mask" - } - bias_parameter_name: "___selective_fc_layer_0__.wbias" - selective_fc_pass_generation: false - has_selected_colums: true - selective_fc_full_mul_ratio: 0.02 -} -parameters { - name: "___fc_layer_0__.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___selective_fc_layer_0__.w0" - size: 10000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - initial_strategy: 0 - initial_smart: true - is_sparse: false -} -parameters { - name: "___selective_fc_layer_0__.wbias" - size: 100 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 100 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -input_layer_names: "mask" -output_layer_names: "__fc_layer_0__" -output_layer_names: "__selective_fc_layer_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__trans_layer_0__" - layer_names: "__fc_layer_0__" - layer_names: "mask" - layer_names: "__selective_fc_layer_0__" - input_layer_names: "data" - input_layer_names: "mask" - output_layer_names: "__fc_layer_0__" - output_layer_names: "__selective_fc_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr deleted file mode 100644 index f1e4d894a5fb0040f48bdb5a751c3f0d956c23bb..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr +++ /dev/null @@ -1,106 +0,0 @@ -type: "nn" -layers { - name: "input" - type: "data" - size: 256 - active_type: "" -} -layers { - name: "__gated_unit_layer_0___input_proj" - type: "fc" - size: 512 - active_type: "tanh" - inputs { - input_layer_name: "input" - input_parameter_name: "___gated_unit_layer_0___input_proj.w0" - } - bias_parameter_name: "___gated_unit_layer_0___input_proj.wbias" - error_clipping_threshold: 100.0 -} -layers { - name: "__gated_unit_layer_0___gate" - type: "fc" - size: 512 - active_type: "sigmoid" - inputs { - input_layer_name: "input" - input_parameter_name: "___gated_unit_layer_0___gate.w0" - } - bias_parameter_name: "___gated_unit_layer_0___gate.wbias" - error_clipping_threshold: 100.0 -} -layers { - name: "__gated_unit_layer_0___gated_act" - type: "mixed" - size: 512 - active_type: "" - inputs { - input_layer_name: "__gated_unit_layer_0___input_proj" - } - inputs { - input_layer_name: "__gated_unit_layer_0___gate" - } - error_clipping_threshold: 100.0 - operator_confs { - type: "dot_mul" - input_indices: 0 - input_indices: 1 - input_sizes: 512 - input_sizes: 512 - output_size: 512 - dotmul_scale: 1 - } -} -parameters { - name: "___gated_unit_layer_0___input_proj.w0" - size: 131072 - initial_mean: 0.0 - initial_std: 0.0001 - dims: 256 - dims: 512 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___gated_unit_layer_0___input_proj.wbias" - size: 512 - initial_mean: 0.0 - initial_std: 1 - dims: 1 - dims: 512 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___gated_unit_layer_0___gate.w0" - size: 131072 - initial_mean: 0.0 - initial_std: 0.0001 - dims: 256 - dims: 512 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___gated_unit_layer_0___gate.wbias" - size: 512 - initial_mean: 0.0 - initial_std: 1 - dims: 1 - dims: 512 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "input" -output_layer_names: "__gated_unit_layer_0___gated_act" -sub_models { - name: "root" - layer_names: "input" - layer_names: "__gated_unit_layer_0___input_proj" - layer_names: "__gated_unit_layer_0___gate" - layer_names: "__gated_unit_layer_0___gated_act" - input_layer_names: "input" - output_layer_names: "__gated_unit_layer_0___gated_act" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr deleted file mode 100644 index 2c19b2fd120e7c01ee9aa088f674a74498540a3c..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr +++ /dev/null @@ -1,51 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 120 - active_type: "" -} -layers { - name: "__gru_0__" - type: "gated_recurrent" - size: 40 - active_type: "sigmoid" - inputs { - input_layer_name: "data" - input_parameter_name: "___gru_0__.w0" - } - bias_parameter_name: "___gru_0__.wbias" - reversed: true - active_gate_type: "tanh" -} -parameters { - name: "___gru_0__.w0" - size: 4800 - initial_mean: 0.0 - initial_std: 0.158113883008 - dims: 40 - dims: 120 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___gru_0__.wbias" - size: 120 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 120 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -output_layer_names: "__gru_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__gru_0__" - input_layer_names: "data" - output_layer_names: "__gru_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr deleted file mode 100644 index e81fcb13c4c6ee8e76036d71d47fdaac9cd3d716..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr +++ /dev/null @@ -1,62 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "label" - type: "data" - size: 10 - active_type: "" -} -layers { - name: "__hsigmoid_0__" - type: "hsigmoid" - size: 1 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "___hsigmoid_0__.w0" - } - inputs { - input_layer_name: "label" - } - bias_parameter_name: "___hsigmoid_0__.wbias" - num_classes: 10 -} -parameters { - name: "___hsigmoid_0__.w0" - size: 900 - initial_mean: 0.0 - initial_std: 0.333333333333 - dims: 9 - dims: 100 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___hsigmoid_0__.wbias" - size: 9 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 9 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -input_layer_names: "label" -output_layer_names: "__hsigmoid_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "label" - layer_names: "__hsigmoid_0__" - input_layer_names: "data" - input_layer_names: "label" - output_layer_names: "__hsigmoid_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr deleted file mode 100644 index f93d368c8687573db80106b9cc4defa56a881e46..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr +++ /dev/null @@ -1,59 +0,0 @@ -type: "nn" -layers { - name: "input_seq" - type: "data" - size: 128 - active_type: "" -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 1 - active_type: "exponential" - inputs { - input_layer_name: "input_seq" - input_parameter_name: "___fc_layer_0__.w0" - } - bias_parameter_name: "___fc_layer_0__.wbias" -} -layers { - name: "__kmax_seq_score_layer_0__" - type: "kmax_seq_score" - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - } - beam_size: 5 -} -parameters { - name: "___fc_layer_0__.w0" - size: 128 - initial_mean: 0.0 - initial_std: 0.0883883476483 - dims: 128 - dims: 1 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_0__.wbias" - size: 1 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "input_seq" -output_layer_names: "__kmax_seq_score_layer_0__" -sub_models { - name: "root" - layer_names: "input_seq" - layer_names: "__fc_layer_0__" - layer_names: "__kmax_seq_score_layer_0__" - input_layer_names: "input_seq" - output_layer_names: "__kmax_seq_score_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr deleted file mode 100644 index 9ba33689edc893c2169a73679a04a6f51cfc83a8..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr +++ /dev/null @@ -1,39 +0,0 @@ -type: "nn" -layers { - name: "x" - type: "data" - size: 128 - active_type: "" -} -layers { - name: "y" - type: "data" - size: 128 - active_type: "" -} -layers { - name: "__l2_distance_layer_0__" - type: "l2_distance" - size: 1 - active_type: "" - inputs { - input_layer_name: "x" - } - inputs { - input_layer_name: "y" - } -} -input_layer_names: "x" -input_layer_names: "y" -output_layer_names: "__l2_distance_layer_0__" -sub_models { - name: "root" - layer_names: "x" - layer_names: "y" - layer_names: "__l2_distance_layer_0__" - input_layer_names: "x" - input_layer_names: "y" - output_layer_names: "__l2_distance_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr deleted file mode 100644 index 76a4afab82c59196564128cb9cb8d72ba2a7b101..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr +++ /dev/null @@ -1,53 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 128 - active_type: "" -} -layers { - name: "__lstmemory_0__" - type: "lstmemory" - size: 32 - active_type: "tanh" - inputs { - input_layer_name: "data" - input_parameter_name: "___lstmemory_0__.w0" - } - bias_parameter_name: "___lstmemory_0__.wbias" - reversed: true - active_gate_type: "tanh" - active_state_type: "tanh" -} -parameters { - name: "___lstmemory_0__.w0" - size: 4096 - initial_mean: 0.0 - initial_std: 0.176776695297 - dims: 32 - dims: 32 - dims: 4 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___lstmemory_0__.wbias" - size: 224 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 224 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -output_layer_names: "__lstmemory_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__lstmemory_0__" - input_layer_names: "data" - output_layer_names: "__lstmemory_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr deleted file mode 100644 index 39dc4871469785fbe667e43f1f0fb9da7a19e2d2..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr +++ /dev/null @@ -1,233 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 2304 - active_type: "" - height: 48 - width: 48 -} -layers { - name: "__conv_0__" - type: "exconv" - size: 36864 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "___conv_0__.w0" - conv_conf { - filter_size: 3 - channels: 1 - stride: 1 - padding: 1 - groups: 1 - filter_channels: 1 - output_x: 48 - img_size: 48 - caffe_mode: true - filter_size_y: 3 - padding_y: 1 - stride_y: 1 - output_y: 48 - img_size_y: 48 - dilation: 1 - dilation_y: 1 - } - } - bias_parameter_name: "___conv_0__.wbias" - num_filters: 16 - shared_biases: true - height: 48 - width: 48 -} -layers { - name: "__maxout_layer_0__" - type: "maxout" - size: 18432 - active_type: "" - inputs { - input_layer_name: "__conv_0__" - maxout_conf { - image_conf { - channels: 16 - img_size: 48 - img_size_y: 48 - } - groups: 2 - } - } - height: 48 - width: 48 -} -layers { - name: "__pool_0__" - type: "pool" - size: 4608 - active_type: "" - inputs { - input_layer_name: "__maxout_layer_0__" - pool_conf { - pool_type: "max-projection" - channels: 8 - size_x: 2 - stride: 2 - output_x: 24 - img_size: 48 - padding: 0 - size_y: 2 - stride_y: 2 - output_y: 24 - img_size_y: 48 - padding_y: 0 - } - } - height: 24 - width: 24 -} -layers { - name: "__conv_1__" - type: "exconv" - size: 73728 - active_type: "" - inputs { - input_layer_name: "__pool_0__" - input_parameter_name: "___conv_1__.w0" - conv_conf { - filter_size: 3 - channels: 8 - stride: 1 - padding: 1 - groups: 1 - filter_channels: 8 - output_x: 24 - img_size: 24 - caffe_mode: true - filter_size_y: 3 - padding_y: 1 - stride_y: 1 - output_y: 24 - img_size_y: 24 - dilation: 1 - dilation_y: 1 - } - } - bias_parameter_name: "___conv_1__.wbias" - num_filters: 128 - shared_biases: true - height: 24 - width: 24 -} -layers { - name: "__maxout_layer_1__" - type: "maxout" - size: 18432 - active_type: "" - inputs { - input_layer_name: "__conv_1__" - maxout_conf { - image_conf { - channels: 128 - img_size: 24 - img_size_y: 24 - } - groups: 4 - } - } - height: 24 - width: 24 -} -layers { - name: "__block_expand_layer_0__" - type: "blockexpand" - size: 192 - active_type: "" - inputs { - input_layer_name: "__maxout_layer_1__" - block_expand_conf { - channels: 32 - stride_x: 1 - stride_y: 1 - padding_x: 0 - padding_y: 0 - block_x: 1 - block_y: 6 - output_x: 0 - output_y: 0 - img_size_x: 0 - img_size_y: 0 - } - } -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 384 - active_type: "tanh" - inputs { - input_layer_name: "__block_expand_layer_0__" - input_parameter_name: "___fc_layer_0__.w0" - } -} -parameters { - name: "___conv_0__.w0" - size: 144 - initial_mean: 0.0 - initial_std: 0.471404520791 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___conv_0__.wbias" - size: 16 - initial_mean: 0.0 - initial_std: 0.0 - dims: 16 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___conv_1__.w0" - size: 9216 - initial_mean: 0.0 - initial_std: 0.166666666667 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___conv_1__.wbias" - size: 128 - initial_mean: 0.0 - initial_std: 0.0 - dims: 128 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___fc_layer_0__.w0" - size: 73728 - initial_mean: 0.0 - initial_std: 0.0721687836487 - dims: 192 - dims: 384 - initial_strategy: 0 - initial_smart: true -} -input_layer_names: "data" -output_layer_names: "__fc_layer_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__conv_0__" - layer_names: "__maxout_layer_0__" - layer_names: "__pool_0__" - layer_names: "__conv_1__" - layer_names: "__maxout_layer_1__" - layer_names: "__block_expand_layer_0__" - layer_names: "__fc_layer_0__" - input_layer_names: "data" - output_layer_names: "__fc_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr deleted file mode 100644 index 0ba84dcc6db6b7025a98b2698312f5fc9e0ed634..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr +++ /dev/null @@ -1,79 +0,0 @@ -type: "nn" -layers { - name: "input_loc" - type: "data" - size: 16 - active_type: "" - height: 16 - width: 1 -} -layers { - name: "input_conf" - type: "data" - size: 8 - active_type: "" - height: 1 - width: 8 -} -layers { - name: "priorbox" - type: "data" - size: 32 - active_type: "" - height: 4 - width: 8 -} -layers { - name: "label" - type: "data" - size: 24 - active_type: "" - height: 4 - width: 6 -} -layers { - name: "test_multibox_loss" - type: "multibox_loss" - size: 1 - active_type: "" - inputs { - input_layer_name: "priorbox" - multibox_loss_conf { - num_classes: 21 - overlap_threshold: 0.5 - neg_pos_ratio: 3.0 - neg_overlap: 0.5 - background_id: 0 - input_num: 1 - } - } - inputs { - input_layer_name: "label" - } - inputs { - input_layer_name: "input_loc" - } - inputs { - input_layer_name: "input_conf" - } -} -input_layer_names: "priorbox" -input_layer_names: "label" -input_layer_names: "input_loc" -input_layer_names: "input_conf" -output_layer_names: "test_multibox_loss" -sub_models { - name: "root" - layer_names: "input_loc" - layer_names: "input_conf" - layer_names: "priorbox" - layer_names: "label" - layer_names: "test_multibox_loss" - input_layer_names: "priorbox" - input_layer_names: "label" - input_layer_names: "input_loc" - input_layer_names: "input_conf" - output_layer_names: "test_multibox_loss" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multiplex_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multiplex_layer.protostr deleted file mode 100644 index 379842ba8d32fa7cdad448dd86559c7d02f58e0a..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multiplex_layer.protostr +++ /dev/null @@ -1,63 +0,0 @@ -type: "nn" -layers { - name: "index" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "data1" - type: "data" - size: 30 - active_type: "" -} -layers { - name: "data2" - type: "data" - size: 30 - active_type: "" -} -layers { - name: "data3" - type: "data" - size: 30 - active_type: "" -} -layers { - name: "__multiplex_layer_0__" - type: "multiplex" - size: 30 - active_type: "" - inputs { - input_layer_name: "index" - } - inputs { - input_layer_name: "data1" - } - inputs { - input_layer_name: "data2" - } - inputs { - input_layer_name: "data3" - } -} -input_layer_names: "index" -input_layer_names: "data1" -input_layer_names: "data2" -input_layer_names: "data3" -output_layer_names: "__multiplex_layer_0__" -sub_models { - name: "root" - layer_names: "index" - layer_names: "data1" - layer_names: "data2" - layer_names: "data3" - layer_names: "__multiplex_layer_0__" - input_layer_names: "index" - input_layer_names: "data1" - input_layer_names: "data2" - input_layer_names: "data3" - output_layer_names: "__multiplex_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr deleted file mode 100644 index c1bfdf1b19c61d096c25af061c6fbb3bbfc50265..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr +++ /dev/null @@ -1,225 +0,0 @@ -type: "nn" -layers { - name: "w" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "a" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "b" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "c" - type: "data" - size: 200 - active_type: "" -} -layers { - name: "d" - type: "data" - size: 31 - active_type: "" -} -layers { - name: "__interpolation_layer_0__" - type: "interpolation" - size: 100 - active_type: "" - inputs { - input_layer_name: "w" - } - inputs { - input_layer_name: "a" - } - inputs { - input_layer_name: "b" - } -} -layers { - name: "__power_layer_0__" - type: "power" - size: 100 - active_type: "" - inputs { - input_layer_name: "w" - } - inputs { - input_layer_name: "a" - } -} -layers { - name: "__scaling_layer_0__" - type: "scaling" - size: 100 - active_type: "" - inputs { - input_layer_name: "w" - } - inputs { - input_layer_name: "a" - } -} -layers { - name: "__cos_sim_0__" - type: "cos" - size: 1 - active_type: "" - inputs { - input_layer_name: "a" - } - inputs { - input_layer_name: "b" - } - cos_scale: 1 -} -layers { - name: "__cos_sim_1__" - type: "cos_vm" - size: 2 - active_type: "" - inputs { - input_layer_name: "a" - } - inputs { - input_layer_name: "c" - } - cos_scale: 1 -} -layers { - name: "__sum_to_one_norm_layer_0__" - type: "sum_to_one_norm" - size: 100 - active_type: "" - inputs { - input_layer_name: "a" - } -} -layers { - name: "__conv_shift_layer_0__" - type: "conv_shift" - size: 100 - active_type: "" - inputs { - input_layer_name: "a" - } - inputs { - input_layer_name: "d" - } -} -layers { - name: "__tensor_layer_0__" - type: "tensor" - size: 1000 - active_type: "" - inputs { - input_layer_name: "a" - input_parameter_name: "___tensor_layer_0__.w0" - } - inputs { - input_layer_name: "b" - } - bias_parameter_name: "___tensor_layer_0__.wbias" -} -layers { - name: "__slope_intercept_layer_0__" - type: "slope_intercept" - size: 100 - active_type: "" - inputs { - input_layer_name: "a" - } - slope: 0.7 - intercept: 0.9 -} -layers { - name: "__linear_comb_layer_0__" - type: "convex_comb" - size: 2 - active_type: "" - inputs { - input_layer_name: "b" - } - inputs { - input_layer_name: "c" - } -} -parameters { - name: "___tensor_layer_0__.w0" - size: 10000000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 100 - dims: 1000 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___tensor_layer_0__.wbias" - size: 1000 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1000 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "w" -input_layer_names: "a" -input_layer_names: "b" -input_layer_names: "c" -input_layer_names: "d" -output_layer_names: "__interpolation_layer_0__" -output_layer_names: "__power_layer_0__" -output_layer_names: "__scaling_layer_0__" -output_layer_names: "__cos_sim_0__" -output_layer_names: "__cos_sim_1__" -output_layer_names: "__sum_to_one_norm_layer_0__" -output_layer_names: "__conv_shift_layer_0__" -output_layer_names: "__tensor_layer_0__" -output_layer_names: "__slope_intercept_layer_0__" -output_layer_names: "__linear_comb_layer_0__" -sub_models { - name: "root" - layer_names: "w" - layer_names: "a" - layer_names: "b" - layer_names: "c" - layer_names: "d" - layer_names: "__interpolation_layer_0__" - layer_names: "__power_layer_0__" - layer_names: "__scaling_layer_0__" - layer_names: "__cos_sim_0__" - layer_names: "__cos_sim_1__" - layer_names: "__sum_to_one_norm_layer_0__" - layer_names: "__conv_shift_layer_0__" - layer_names: "__tensor_layer_0__" - layer_names: "__slope_intercept_layer_0__" - layer_names: "__linear_comb_layer_0__" - input_layer_names: "w" - input_layer_names: "a" - input_layer_names: "b" - input_layer_names: "c" - input_layer_names: "d" - output_layer_names: "__interpolation_layer_0__" - output_layer_names: "__power_layer_0__" - output_layer_names: "__scaling_layer_0__" - output_layer_names: "__cos_sim_0__" - output_layer_names: "__cos_sim_1__" - output_layer_names: "__sum_to_one_norm_layer_0__" - output_layer_names: "__conv_shift_layer_0__" - output_layer_names: "__tensor_layer_0__" - output_layer_names: "__slope_intercept_layer_0__" - output_layer_names: "__linear_comb_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr deleted file mode 100644 index d5d6d31a17b84d8ddb4e555caca804f2f6c50992..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr +++ /dev/null @@ -1,122 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 2016 - active_type: "" - height: 48 - width: 42 -} -layers { - name: "__conv_0__" - type: "exconv" - size: 32256 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "___conv_0__.w0" - conv_conf { - filter_size: 3 - channels: 1 - stride: 1 - padding: 1 - groups: 1 - filter_channels: 1 - output_x: 42 - img_size: 42 - caffe_mode: true - filter_size_y: 3 - padding_y: 1 - stride_y: 1 - output_y: 48 - img_size_y: 48 - dilation: 1 - dilation_y: 1 - } - } - bias_parameter_name: "___conv_0__.wbias" - num_filters: 16 - shared_biases: true - height: 48 - width: 42 -} -layers { - name: "__pool_0__" - type: "pool" - size: 8064 - active_type: "" - inputs { - input_layer_name: "__conv_0__" - pool_conf { - pool_type: "max-projection" - channels: 16 - size_x: 2 - stride: 2 - output_x: 21 - img_size: 42 - padding: 0 - size_y: 2 - stride_y: 2 - output_y: 24 - img_size_y: 48 - padding_y: 0 - } - } - height: 24 - width: 21 -} -layers { - name: "__pad_0__" - type: "pad" - size: 14175 - active_type: "" - inputs { - input_layer_name: "__pool_0__" - pad_conf { - image_conf { - channels: 16 - img_size: 21 - img_size_y: 24 - } - pad_c: 2 - pad_c: 3 - pad_h: 1 - pad_h: 2 - pad_w: 3 - pad_w: 1 - } - } - height: 27 - width: 25 -} -parameters { - name: "___conv_0__.w0" - size: 144 - initial_mean: 0.0 - initial_std: 0.471404520791 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___conv_0__.wbias" - size: 16 - initial_mean: 0.0 - initial_std: 0.0 - dims: 16 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -output_layer_names: "__pad_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__conv_0__" - layer_names: "__pool_0__" - layer_names: "__pad_0__" - input_layer_names: "data" - output_layer_names: "__pad_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr deleted file mode 100644 index 8eb98593f6f692a445cf5088e101e9da3763b41d..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr +++ /dev/null @@ -1,123 +0,0 @@ -type: "nn" -layers { - name: "data_2d" - type: "data" - size: 6000 - active_type: "" - height: 20 - width: 10 -} -layers { - name: "pool___2d" - type: "pool" - size: 840 - active_type: "" - inputs { - input_layer_name: "data_2d" - pool_conf { - pool_type: "avg-projection" - channels: 30 - size_x: 5 - stride: 3 - output_x: 4 - img_size: 10 - padding: 1 - size_y: 5 - stride_y: 3 - output_y: 7 - img_size_y: 20 - padding_y: 1 - } - } - height: 7 - width: 4 -} -layers { - name: "data_3d_1" - type: "data" - size: 60000 - active_type: "" - height: 20 - width: 10 - depth: 10 -} -layers { - name: "pool_3d_1" - type: "pool3d" - size: 3360 - active_type: "" - inputs { - input_layer_name: "data_3d_1" - pool_conf { - pool_type: "avg-projection" - channels: 30 - size_x: 5 - stride: 3 - output_x: 4 - img_size: 10 - padding: 1 - size_y: 5 - stride_y: 3 - output_y: 7 - img_size_y: 20 - padding_y: 1 - size_z: 5 - stride_z: 3 - output_z: 4 - img_size_z: 10 - padding_z: 1 - } - } - height: 7 - width: 4 - depth: 4 -} -layers { - name: "pool_3d_2" - type: "pool3d" - size: 3360 - active_type: "" - inputs { - input_layer_name: "data_3d_1" - pool_conf { - pool_type: "max-projection" - channels: 30 - size_x: 5 - stride: 3 - output_x: 4 - img_size: 10 - padding: 1 - size_y: 5 - stride_y: 3 - output_y: 7 - img_size_y: 20 - padding_y: 1 - size_z: 5 - stride_z: 3 - output_z: 4 - img_size_z: 10 - padding_z: 1 - } - } - height: 7 - width: 4 - depth: 4 -} -input_layer_names: "data_2d" -output_layer_names: "pool___2d" -output_layer_names: "pool_3d_1" -output_layer_names: "pool_3d_2" -sub_models { - name: "root" - layer_names: "data_2d" - layer_names: "pool___2d" - layer_names: "data_3d_1" - layer_names: "pool_3d_1" - layer_names: "pool_3d_2" - input_layer_names: "data_2d" - output_layer_names: "pool___2d" - output_layer_names: "pool_3d_1" - output_layer_names: "pool_3d_2" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr deleted file mode 100644 index 63fb38c6508675d379f577b965ea17ad4c3b4942..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr +++ /dev/null @@ -1,144 +0,0 @@ -type: "nn" -layers { - name: "input" - type: "data" - size: 300 - active_type: "" - height: 10 - width: 10 -} -layers { - name: "__prelu_layer_0__" - type: "prelu" - size: 300 - active_type: "" - inputs { - input_layer_name: "input" - input_parameter_name: "___prelu_layer_0__.w0" - } - partial_sum: 1 - height: 10 - width: 10 - depth: 1 -} -layers { - name: "__prelu_layer_1__" - type: "prelu" - size: 300 - active_type: "" - inputs { - input_layer_name: "input" - input_parameter_name: "___prelu_layer_1__.w0" - } - partial_sum: 1 - height: 10 - width: 10 - depth: 1 -} -layers { - name: "__prelu_layer_2__" - type: "prelu" - size: 300 - active_type: "" - inputs { - input_layer_name: "input" - input_parameter_name: "___prelu_layer_2__.w0" - } - partial_sum: 5 - height: 10 - width: 10 - depth: 1 -} -layers { - name: "__prelu_layer_3__" - type: "prelu" - size: 300 - active_type: "" - inputs { - input_layer_name: "input" - input_parameter_name: "___prelu_layer_3__.w0" - } - partial_sum: 300 - height: 10 - width: 10 - depth: 1 -} -layers { - name: "__prelu_layer_4__" - type: "prelu" - size: 300 - active_type: "" - inputs { - input_layer_name: "input" - input_parameter_name: "___prelu_layer_4__.w0" - } - partial_sum: 100 - height: 10 - width: 10 - depth: 1 -} -parameters { - name: "___prelu_layer_0__.w0" - size: 300 - initial_mean: 0.25 - initial_std: 0.0 - dims: 1 - dims: 300 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___prelu_layer_1__.w0" - size: 300 - initial_mean: 0.25 - initial_std: 0.0 - dims: 1 - dims: 300 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___prelu_layer_2__.w0" - size: 60 - initial_mean: 0.25 - initial_std: 0.0 - dims: 1 - dims: 60 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___prelu_layer_3__.w0" - size: 1 - initial_mean: 0.25 - initial_std: 0.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___prelu_layer_4__.w0" - size: 3 - initial_mean: 0.25 - initial_std: 0.0 - dims: 1 - dims: 3 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "input" -output_layer_names: "__prelu_layer_4__" -sub_models { - name: "root" - layer_names: "input" - layer_names: "__prelu_layer_0__" - layer_names: "__prelu_layer_1__" - layer_names: "__prelu_layer_2__" - layer_names: "__prelu_layer_3__" - layer_names: "__prelu_layer_4__" - input_layer_names: "input" - output_layer_names: "__prelu_layer_4__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr deleted file mode 100644 index f4cc492dfb9b5a8c04f6f41cfab017fc613e2a66..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr +++ /dev/null @@ -1,27 +0,0 @@ -type: "nn" -layers { - name: "input" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "__print_0__" - type: "print" - active_type: "" - inputs { - input_layer_name: "input" - } - user_arg: "layer=input %s" -} -input_layer_names: "input" -output_layer_names: "input" -sub_models { - name: "root" - layer_names: "input" - layer_names: "__print_0__" - input_layer_names: "input" - output_layer_names: "input" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr deleted file mode 100644 index 046037936a6d85f54095c65f206e468aa69065d7..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr +++ /dev/null @@ -1,593 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "__addto_0__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "data" - } - inputs { - input_layer_name: "data" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_1__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_0__" - } - inputs { - input_layer_name: "__addto_0__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_2__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_1__" - } - inputs { - input_layer_name: "__addto_1__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_3__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_2__" - } - inputs { - input_layer_name: "__addto_2__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_4__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_3__" - } - inputs { - input_layer_name: "__addto_3__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_5__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_4__" - } - inputs { - input_layer_name: "__addto_4__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_6__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_5__" - } - inputs { - input_layer_name: "__addto_5__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_7__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_6__" - } - inputs { - input_layer_name: "__addto_6__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_8__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_7__" - } - inputs { - input_layer_name: "__addto_7__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_9__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_8__" - } - inputs { - input_layer_name: "__addto_8__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_10__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_9__" - } - inputs { - input_layer_name: "__addto_9__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_11__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_10__" - } - inputs { - input_layer_name: "__addto_10__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_12__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_11__" - } - inputs { - input_layer_name: "__addto_11__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_13__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_12__" - } - inputs { - input_layer_name: "__addto_12__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_14__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_13__" - } - inputs { - input_layer_name: "__addto_13__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_15__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_14__" - } - inputs { - input_layer_name: "__addto_14__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_16__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_15__" - } - inputs { - input_layer_name: "__addto_15__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_17__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_16__" - } - inputs { - input_layer_name: "__addto_16__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_18__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_17__" - } - inputs { - input_layer_name: "__addto_17__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_19__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_18__" - } - inputs { - input_layer_name: "__addto_18__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_20__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_19__" - } - inputs { - input_layer_name: "__addto_19__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_21__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_20__" - } - inputs { - input_layer_name: "__addto_20__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_22__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_21__" - } - inputs { - input_layer_name: "__addto_21__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_23__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_22__" - } - inputs { - input_layer_name: "__addto_22__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_24__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_23__" - } - inputs { - input_layer_name: "__addto_23__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_25__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_24__" - } - inputs { - input_layer_name: "__addto_24__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_26__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_25__" - } - inputs { - input_layer_name: "__addto_25__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_27__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_26__" - } - inputs { - input_layer_name: "__addto_26__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_28__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_27__" - } - inputs { - input_layer_name: "__addto_27__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_29__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_28__" - } - inputs { - input_layer_name: "__addto_28__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_30__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_29__" - } - inputs { - input_layer_name: "__addto_29__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__addto_31__" - type: "addto" - size: 100 - active_type: "" - inputs { - input_layer_name: "__addto_30__" - } - inputs { - input_layer_name: "__addto_30__" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__fc_layer_0__" - type: "fc" - size: 32 - active_type: "relu" - inputs { - input_layer_name: "__addto_31__" - input_parameter_name: "___fc_layer_0__.w0" - } - bias_parameter_name: "___fc_layer_0__.wbias" -} -layers { - name: "__fc_layer_1__" - type: "fc" - size: 10 - active_type: "softmax" - inputs { - input_layer_name: "__fc_layer_0__" - input_parameter_name: "___fc_layer_1__.w0" - } - bias_parameter_name: "___fc_layer_1__.wbias" -} -parameters { - name: "___fc_layer_0__.w0" - size: 3200 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 32 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_0__.wbias" - size: 32 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 32 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___fc_layer_1__.w0" - size: 320 - initial_mean: 0.0 - initial_std: 0.176776695297 - dims: 32 - dims: 10 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_1__.wbias" - size: 10 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 10 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -output_layer_names: "__fc_layer_1__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__addto_0__" - layer_names: "__addto_1__" - layer_names: "__addto_2__" - layer_names: "__addto_3__" - layer_names: "__addto_4__" - layer_names: "__addto_5__" - layer_names: "__addto_6__" - layer_names: "__addto_7__" - layer_names: "__addto_8__" - layer_names: "__addto_9__" - layer_names: "__addto_10__" - layer_names: "__addto_11__" - layer_names: "__addto_12__" - layer_names: "__addto_13__" - layer_names: "__addto_14__" - layer_names: "__addto_15__" - layer_names: "__addto_16__" - layer_names: "__addto_17__" - layer_names: "__addto_18__" - layer_names: "__addto_19__" - layer_names: "__addto_20__" - layer_names: "__addto_21__" - layer_names: "__addto_22__" - layer_names: "__addto_23__" - layer_names: "__addto_24__" - layer_names: "__addto_25__" - layer_names: "__addto_26__" - layer_names: "__addto_27__" - layer_names: "__addto_28__" - layer_names: "__addto_29__" - layer_names: "__addto_30__" - layer_names: "__addto_31__" - layer_names: "__fc_layer_0__" - layer_names: "__fc_layer_1__" - input_layer_names: "data" - output_layer_names: "__fc_layer_1__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr deleted file mode 100644 index e012386ff9515947d40ddddb6804de08207e1154..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr +++ /dev/null @@ -1,42 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 30 - active_type: "" -} -layers { - name: "__repeat_layer_0__" - type: "featmap_expand" - size: 300 - active_type: "" - inputs { - input_layer_name: "data" - } - num_filters: 10 -} -layers { - name: "__repeat_layer_1__" - type: "featmap_expand" - size: 300 - active_type: "tanh" - inputs { - input_layer_name: "data" - } - num_filters: 10 - user_arg: "as_col_vec" -} -input_layer_names: "data" -output_layer_names: "__repeat_layer_0__" -output_layer_names: "__repeat_layer_1__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__repeat_layer_0__" - layer_names: "__repeat_layer_1__" - input_layer_names: "data" - output_layer_names: "__repeat_layer_0__" - output_layer_names: "__repeat_layer_1__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_resize_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_resize_layer.protostr deleted file mode 100644 index 9399252b23d0ec0cce918196bf4077a51e757eaf..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_resize_layer.protostr +++ /dev/null @@ -1,27 +0,0 @@ -type: "nn" -layers { - name: "input" - type: "data" - size: 300 - active_type: "" -} -layers { - name: "__resize_0__" - type: "resize" - size: 150 - active_type: "" - inputs { - input_layer_name: "input" - } -} -input_layer_names: "input" -output_layer_names: "__resize_0__" -sub_models { - name: "root" - layer_names: "input" - layer_names: "__resize_0__" - input_layer_names: "input" - output_layer_names: "__resize_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr deleted file mode 100644 index 711785be37dbe7f2decc161d1b8e1ead62927b20..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr +++ /dev/null @@ -1,738 +0,0 @@ -type: "recurrent_nn" -layers { - name: "seq_input" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "sub_seq_input" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "label" - type: "data" - size: 1 - active_type: "" -} -layers { - name: "__mixed_0__" - type: "mixed" - size: 400 - active_type: "" - inputs { - input_layer_name: "seq_input" - input_parameter_name: "___mixed_0__.w0" - proj_conf { - type: "fc" - name: "___mixed_0__.w0" - input_size: 100 - output_size: 400 - } - } -} -layers { - name: "__mixed_1__" - type: "mixed" - size: 300 - active_type: "" - inputs { - input_layer_name: "seq_input" - input_parameter_name: "___mixed_1__.w0" - proj_conf { - type: "fc" - name: "___mixed_1__.w0" - input_size: 100 - output_size: 300 - } - } -} -layers { - name: "__recurrent_group_0__" - type: "recurrent_layer_group" - active_type: "" -} -layers { - name: "seq_input@__recurrent_group_0__" - type: "scatter_agent" - size: 100 - active_type: "" -} -layers { - name: "rnn_forward+delay1@__recurrent_group_0__" - type: "agent" - size: 200 - active_type: "" -} -layers { - name: "rnn_forward@__recurrent_group_0__" - type: "fc" - size: 200 - active_type: "tanh" - inputs { - input_layer_name: "seq_input@__recurrent_group_0__" - input_parameter_name: "_rnn_forward@__recurrent_group_0__.w0" - } - inputs { - input_layer_name: "rnn_forward+delay1@__recurrent_group_0__" - input_parameter_name: "_rnn_forward@__recurrent_group_0__.w1" - } - bias_parameter_name: "_rnn_forward@__recurrent_group_0__.wbias" -} -layers { - name: "rnn_forward" - type: "gather_agent" - size: 200 - active_type: "" -} -layers { - name: "__last_seq_0__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "rnn_forward" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__recurrent_group_1__" - type: "recurrent_layer_group" - active_type: "" -} -layers { - name: "seq_input@__recurrent_group_1__" - type: "scatter_agent" - size: 100 - active_type: "" -} -layers { - name: "rnn_back+delay1@__recurrent_group_1__" - type: "agent" - size: 200 - active_type: "" -} -layers { - name: "rnn_back@__recurrent_group_1__" - type: "fc" - size: 200 - active_type: "tanh" - inputs { - input_layer_name: "seq_input@__recurrent_group_1__" - input_parameter_name: "_rnn_back@__recurrent_group_1__.w0" - } - inputs { - input_layer_name: "rnn_back+delay1@__recurrent_group_1__" - input_parameter_name: "_rnn_back@__recurrent_group_1__.w1" - } - bias_parameter_name: "_rnn_back@__recurrent_group_1__.wbias" -} -layers { - name: "rnn_back" - type: "gather_agent" - size: 200 - active_type: "" -} -layers { - name: "__first_seq_0__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "rnn_back" - } - select_first: true - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__recurrent_group_2__" - type: "recurrent_layer_group" - active_type: "" -} -layers { - name: "sub_seq_input@__recurrent_group_2__" - type: "scatter_agent" - size: 100 - active_type: "" -} -layers { - name: "rnn_subseq_forward+delay1@__recurrent_group_2__" - type: "agent" - size: 200 - active_type: "" -} -layers { - name: "rnn_subseq_forward@__recurrent_group_2__" - type: "fc" - size: 200 - active_type: "tanh" - inputs { - input_layer_name: "sub_seq_input@__recurrent_group_2__" - input_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.w0" - } - inputs { - input_layer_name: "rnn_subseq_forward+delay1@__recurrent_group_2__" - input_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.w1" - } - bias_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.wbias" -} -layers { - name: "rnn_subseq_forward" - type: "gather_agent" - size: 200 - active_type: "" -} -layers { - name: "__last_seq_1__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "rnn_subseq_forward" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__lstm_group_0___recurrent_group" - type: "recurrent_layer_group" - active_type: "" -} -layers { - name: "__mixed_0__@__lstm_group_0___recurrent_group" - type: "scatter_agent" - size: 400 - active_type: "" -} -layers { - name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" - type: "agent" - size: 100 - active_type: "" -} -layers { - name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" - type: "agent" - size: 100 - active_type: "" -} -layers { - name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" - type: "mixed" - size: 400 - active_type: "" - inputs { - input_layer_name: "__mixed_0__@__lstm_group_0___recurrent_group" - proj_conf { - type: "identity" - name: "___lstm_group_0___input_recurrent.w0" - input_size: 400 - output_size: 400 - } - } - inputs { - input_layer_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" - input_parameter_name: "___lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group.w1" - proj_conf { - type: "fc" - name: "___lstm_group_0___input_recurrent.w1" - input_size: 100 - output_size: 400 - } - } -} -layers { - name: "__lstm_group_0__@__lstm_group_0___recurrent_group" - type: "lstm_step" - size: 100 - active_type: "tanh" - inputs { - input_layer_name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" - } - inputs { - input_layer_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" - } - bias_parameter_name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias" - active_gate_type: "sigmoid" - active_state_type: "tanh" -} -layers { - name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" - type: "get_output" - size: 100 - active_type: "" - inputs { - input_layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" - input_layer_argument: "state" - } -} -layers { - name: "__lstm_group_0__" - type: "gather_agent" - size: 100 - active_type: "" -} -layers { - name: "__last_seq_2__" - type: "seqlastins" - size: 100 - active_type: "" - inputs { - input_layer_name: "__lstm_group_0__" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__gru_group_0___recurrent_group" - type: "recurrent_layer_group" - active_type: "" -} -layers { - name: "__mixed_1__@__gru_group_0___recurrent_group" - type: "scatter_agent" - size: 300 - active_type: "" -} -layers { - name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group" - type: "agent" - size: 100 - active_type: "" -} -layers { - name: "__gru_group_0__@__gru_group_0___recurrent_group" - type: "gru_step" - size: 100 - active_type: "tanh" - inputs { - input_layer_name: "__mixed_1__@__gru_group_0___recurrent_group" - input_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.w0" - } - inputs { - input_layer_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group" - } - bias_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias" - active_gate_type: "sigmoid" -} -layers { - name: "__gru_group_0__" - type: "gather_agent" - size: 100 - active_type: "" -} -layers { - name: "__last_seq_3__" - type: "seqlastins" - size: 100 - active_type: "" - inputs { - input_layer_name: "__gru_group_0__" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__recurrent_group_3__" - type: "recurrent_layer_group" - active_type: "" -} -layers { - name: "seq_input@__recurrent_group_3__" - type: "scatter_agent" - size: 100 - active_type: "" -} -layers { - name: "__memory_6__@__recurrent_group_3__" - type: "agent" - size: 200 - active_type: "" -} -layers { - name: "__fc_layer_0__@__recurrent_group_3__" - type: "fc" - size: 200 - active_type: "tanh" - inputs { - input_layer_name: "seq_input@__recurrent_group_3__" - input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0" - } - inputs { - input_layer_name: "__memory_6__@__recurrent_group_3__" - input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1" - } - bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias" -} -layers { - name: "__fc_layer_0__" - type: "gather_agent" - size: 200 - active_type: "" -} -layers { - name: "__last_seq_4__" - type: "seqlastins" - size: 200 - active_type: "" - inputs { - input_layer_name: "__fc_layer_0__" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -parameters { - name: "___mixed_0__.w0" - size: 40000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 400 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___mixed_1__.w0" - size: 30000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 300 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_rnn_forward@__recurrent_group_0__.w0" - size: 20000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 200 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_rnn_forward@__recurrent_group_0__.w1" - size: 40000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 200 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_rnn_forward@__recurrent_group_0__.wbias" - size: 200 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 200 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_rnn_back@__recurrent_group_1__.w0" - size: 20000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 200 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_rnn_back@__recurrent_group_1__.w1" - size: 40000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 200 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_rnn_back@__recurrent_group_1__.wbias" - size: 200 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 200 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "_rnn_subseq_forward@__recurrent_group_2__.w0" - size: 20000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 200 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_rnn_subseq_forward@__recurrent_group_2__.w1" - size: 40000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 200 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "_rnn_subseq_forward@__recurrent_group_2__.wbias" - size: 200 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 200 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group.w1" - size: 40000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 400 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias" - size: 300 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 300 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___gru_group_0__@__gru_group_0___recurrent_group.w0" - size: 30000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 300 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias" - size: 300 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 300 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___fc_layer_0__@__recurrent_group_3__.w0" - size: 20000 - initial_mean: 0.0 - initial_std: 0.1 - dims: 100 - dims: 200 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_0__@__recurrent_group_3__.w1" - size: 40000 - initial_mean: 0.0 - initial_std: 0.0707106781187 - dims: 200 - dims: 200 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_0__@__recurrent_group_3__.wbias" - size: 200 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 200 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "seq_input" -input_layer_names: "sub_seq_input" -output_layer_names: "__last_seq_0__" -output_layer_names: "__first_seq_0__" -output_layer_names: "__last_seq_1__" -output_layer_names: "__last_seq_2__" -output_layer_names: "__last_seq_3__" -output_layer_names: "__last_seq_4__" -sub_models { - name: "root" - layer_names: "seq_input" - layer_names: "sub_seq_input" - layer_names: "label" - layer_names: "__mixed_0__" - layer_names: "__mixed_1__" - layer_names: "__recurrent_group_0__" - layer_names: "rnn_forward" - layer_names: "__last_seq_0__" - layer_names: "__recurrent_group_1__" - layer_names: "rnn_back" - layer_names: "__first_seq_0__" - layer_names: "__recurrent_group_2__" - layer_names: "rnn_subseq_forward" - layer_names: "__last_seq_1__" - layer_names: "__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0__" - layer_names: "__last_seq_2__" - layer_names: "__gru_group_0___recurrent_group" - layer_names: "__gru_group_0__" - layer_names: "__last_seq_3__" - layer_names: "__recurrent_group_3__" - layer_names: "__fc_layer_0__" - layer_names: "__last_seq_4__" - input_layer_names: "seq_input" - input_layer_names: "sub_seq_input" - output_layer_names: "__last_seq_0__" - output_layer_names: "__first_seq_0__" - output_layer_names: "__last_seq_1__" - output_layer_names: "__last_seq_2__" - output_layer_names: "__last_seq_3__" - output_layer_names: "__last_seq_4__" - is_recurrent_layer_group: false -} -sub_models { - name: "__recurrent_group_0__" - layer_names: "seq_input@__recurrent_group_0__" - layer_names: "rnn_forward+delay1@__recurrent_group_0__" - layer_names: "rnn_forward@__recurrent_group_0__" - is_recurrent_layer_group: true - reversed: false - memories { - layer_name: "rnn_forward@__recurrent_group_0__" - link_name: "rnn_forward+delay1@__recurrent_group_0__" - } - in_links { - layer_name: "seq_input" - link_name: "seq_input@__recurrent_group_0__" - } - out_links { - layer_name: "rnn_forward@__recurrent_group_0__" - link_name: "rnn_forward" - } -} -sub_models { - name: "__recurrent_group_1__" - layer_names: "seq_input@__recurrent_group_1__" - layer_names: "rnn_back+delay1@__recurrent_group_1__" - layer_names: "rnn_back@__recurrent_group_1__" - is_recurrent_layer_group: true - reversed: true - memories { - layer_name: "rnn_back@__recurrent_group_1__" - link_name: "rnn_back+delay1@__recurrent_group_1__" - } - in_links { - layer_name: "seq_input" - link_name: "seq_input@__recurrent_group_1__" - } - out_links { - layer_name: "rnn_back@__recurrent_group_1__" - link_name: "rnn_back" - } -} -sub_models { - name: "__recurrent_group_2__" - layer_names: "sub_seq_input@__recurrent_group_2__" - layer_names: "rnn_subseq_forward+delay1@__recurrent_group_2__" - layer_names: "rnn_subseq_forward@__recurrent_group_2__" - is_recurrent_layer_group: true - reversed: false - memories { - layer_name: "rnn_subseq_forward@__recurrent_group_2__" - link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__" - } - in_links { - layer_name: "sub_seq_input" - link_name: "sub_seq_input@__recurrent_group_2__" - } - out_links { - layer_name: "rnn_subseq_forward@__recurrent_group_2__" - link_name: "rnn_subseq_forward" - } -} -sub_models { - name: "__lstm_group_0___recurrent_group" - layer_names: "__mixed_0__@__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0__@__lstm_group_0___recurrent_group" - layer_names: "__lstm_group_0___state@__lstm_group_0___recurrent_group" - is_recurrent_layer_group: true - reversed: false - memories { - layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" - link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" - } - memories { - layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" - link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" - } - in_links { - layer_name: "__mixed_0__" - link_name: "__mixed_0__@__lstm_group_0___recurrent_group" - } - out_links { - layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" - link_name: "__lstm_group_0__" - } -} -sub_models { - name: "__gru_group_0___recurrent_group" - layer_names: "__mixed_1__@__gru_group_0___recurrent_group" - layer_names: "__gru_group_0__+delay1@__gru_group_0___recurrent_group" - layer_names: "__gru_group_0__@__gru_group_0___recurrent_group" - is_recurrent_layer_group: true - reversed: false - memories { - layer_name: "__gru_group_0__@__gru_group_0___recurrent_group" - link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group" - } - in_links { - layer_name: "__mixed_1__" - link_name: "__mixed_1__@__gru_group_0___recurrent_group" - } - out_links { - layer_name: "__gru_group_0__@__gru_group_0___recurrent_group" - link_name: "__gru_group_0__" - } -} -sub_models { - name: "__recurrent_group_3__" - layer_names: "seq_input@__recurrent_group_3__" - layer_names: "__memory_6__@__recurrent_group_3__" - layer_names: "__fc_layer_0__@__recurrent_group_3__" - is_recurrent_layer_group: true - reversed: false - memories { - layer_name: "__fc_layer_0__@__recurrent_group_3__" - link_name: "__memory_6__@__recurrent_group_3__" - } - in_links { - layer_name: "seq_input" - link_name: "seq_input@__recurrent_group_3__" - } - out_links { - layer_name: "__fc_layer_0__@__recurrent_group_3__" - link_name: "__fc_layer_0__" - } -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr deleted file mode 100644 index 0ec88aa998cce91be4d0ca5430ad49aa4dc6aa63..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr +++ /dev/null @@ -1,100 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 588 - active_type: "" - height: 14 - width: 14 -} -layers { - name: "rois" - type: "data" - size: 10 - active_type: "" -} -layers { - name: "__conv_0__" - type: "exconv" - size: 3136 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "___conv_0__.w0" - conv_conf { - filter_size: 3 - channels: 3 - stride: 1 - padding: 1 - groups: 1 - filter_channels: 3 - output_x: 14 - img_size: 14 - caffe_mode: true - filter_size_y: 3 - padding_y: 1 - stride_y: 1 - output_y: 14 - img_size_y: 14 - dilation: 1 - dilation_y: 1 - } - } - bias_parameter_name: "___conv_0__.wbias" - num_filters: 16 - shared_biases: true - height: 14 - width: 14 -} -layers { - name: "__roi_pool_0__" - type: "roi_pool" - size: 784 - active_type: "" - inputs { - input_layer_name: "__conv_0__" - roi_pool_conf { - pooled_width: 7 - pooled_height: 7 - spatial_scale: 0.0625 - } - } - inputs { - input_layer_name: "rois" - } - height: 7 - width: 7 -} -parameters { - name: "___conv_0__.w0" - size: 432 - initial_mean: 0.0 - initial_std: 0.272165526976 - initial_strategy: 0 - initial_smart: false -} -parameters { - name: "___conv_0__.wbias" - size: 16 - initial_mean: 0.0 - initial_std: 0.0 - dims: 16 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -input_layer_names: "rois" -output_layer_names: "__roi_pool_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "rois" - layer_names: "__conv_0__" - layer_names: "__roi_pool_0__" - input_layer_names: "data" - input_layer_names: "rois" - output_layer_names: "__roi_pool_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr deleted file mode 100644 index 19c9f16574ca6fb3a9e9dbfb2d1f52024e604239..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr +++ /dev/null @@ -1,41 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 2560 - active_type: "" -} -layers { - name: "__row_conv_layer_0__" - type: "row_conv" - size: 2560 - active_type: "relu" - inputs { - input_layer_name: "data" - input_parameter_name: "___row_conv_layer_0__.w0" - row_conv_conf { - context_length: 19 - } - } -} -parameters { - name: "___row_conv_layer_0__.w0" - size: 48640 - initial_mean: 0.0 - initial_std: 0.229415733871 - dims: 19 - dims: 2560 - initial_strategy: 0 - initial_smart: true -} -input_layer_names: "data" -output_layer_names: "__row_conv_layer_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__row_conv_layer_0__" - input_layer_names: "data" - output_layer_names: "__row_conv_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr deleted file mode 100644 index c2786ff55c7023d856d739face5e747cc5fee870..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr +++ /dev/null @@ -1,27 +0,0 @@ -type: "nn" -layers { - name: "input" - type: "data" - size: 300 - active_type: "" -} -layers { - name: "__row_l2_norm_layer_0__" - type: "row_l2_norm" - size: 300 - active_type: "" - inputs { - input_layer_name: "input" - } -} -input_layer_names: "input" -output_layer_names: "__row_l2_norm_layer_0__" -sub_models { - name: "root" - layer_names: "input" - layer_names: "__row_l2_norm_layer_0__" - input_layer_names: "input" - output_layer_names: "__row_l2_norm_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr deleted file mode 100644 index 35ade126a2586a8e3eee6f0ac3c7e49523c8f5c5..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr +++ /dev/null @@ -1,72 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "__scale_shift_0__" - type: "scale_shift" - size: 100 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "___scale_shift_0__.w0" - } -} -layers { - name: "__scale_shift_1__" - type: "scale_shift" - size: 100 - active_type: "" - inputs { - input_layer_name: "data" - input_parameter_name: "___scale_shift_1__.w0" - } - bias_parameter_name: "___scale_shift_1__.wbias" -} -parameters { - name: "___scale_shift_0__.w0" - size: 1 - initial_mean: 0.0 - initial_std: 1.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___scale_shift_1__.w0" - size: 1 - initial_mean: 0.0 - initial_std: 1.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___scale_shift_1__.wbias" - size: 1 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: false -} -input_layer_names: "data" -output_layer_names: "__scale_shift_0__" -output_layer_names: "__scale_shift_1__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__scale_shift_0__" - layer_names: "__scale_shift_1__" - input_layer_names: "data" - output_layer_names: "__scale_shift_0__" - output_layer_names: "__scale_shift_1__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_sub_region_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_sub_region_layer.protostr deleted file mode 100644 index d20133a10ec605654bd3744297673068a77020b8..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_sub_region_layer.protostr +++ /dev/null @@ -1,51 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 2016 - active_type: "" - height: 48 - width: 42 -} -layers { - name: "indices" - type: "data" - size: 6 - active_type: "" -} -layers { - name: "__scale_sub_region_0__" - type: "scale_sub_region" - size: 2016 - active_type: "" - inputs { - input_layer_name: "data" - scale_sub_region_conf { - image_conf { - channels: 1 - img_size: 42 - img_size_y: 48 - } - value: 0.0 - } - } - inputs { - input_layer_name: "indices" - } - height: 48 - width: 42 -} -input_layer_names: "data" -input_layer_names: "indices" -output_layer_names: "__scale_sub_region_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "indices" - layer_names: "__scale_sub_region_0__" - input_layer_names: "data" - input_layer_names: "indices" - output_layer_names: "__scale_sub_region_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr deleted file mode 100644 index 9d1b41c9d5586235984771d610f5df40a8754522..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr +++ /dev/null @@ -1,51 +0,0 @@ -type: "nn" -layers { - name: "data1" - type: "data" - size: 30 - active_type: "" -} -layers { - name: "data2" - type: "data" - size: 30 - active_type: "" -} -layers { - name: "__seqconcat_0__" - type: "seqconcat" - size: 30 - active_type: "" - inputs { - input_layer_name: "data1" - } - inputs { - input_layer_name: "data2" - } -} -layers { - name: "__seqreshape_0__" - type: "seqreshape" - size: 5 - active_type: "" - inputs { - input_layer_name: "data1" - } -} -input_layer_names: "data1" -input_layer_names: "data2" -output_layer_names: "__seqconcat_0__" -output_layer_names: "__seqreshape_0__" -sub_models { - name: "root" - layer_names: "data1" - layer_names: "data2" - layer_names: "__seqconcat_0__" - layer_names: "__seqreshape_0__" - input_layer_names: "data1" - input_layer_names: "data2" - output_layer_names: "__seqconcat_0__" - output_layer_names: "__seqreshape_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr deleted file mode 100644 index 5b73d614fe862e74c8dc5c24a776c0020334224c..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr +++ /dev/null @@ -1,79 +0,0 @@ -type: "nn" -layers { - name: "word" - type: "data" - size: 128 - active_type: "" -} -layers { - name: "starts" - type: "data" - size: 5 - active_type: "" -} -layers { - name: "ends" - type: "data" - size: 5 - active_type: "" -} -layers { - name: "__seq_slice_layer_0__" - type: "seq_slice" - size: 128 - active_type: "" - inputs { - input_layer_name: "word" - } - inputs { - input_layer_name: "starts" - } - inputs { - input_layer_name: "ends" - } -} -layers { - name: "__seq_slice_layer_1__" - type: "seq_slice" - size: 128 - active_type: "" - inputs { - input_layer_name: "word" - } - inputs { - input_layer_name: "starts" - } - select_first: true -} -layers { - name: "__seq_slice_layer_2__" - type: "seq_slice" - size: 128 - active_type: "" - inputs { - input_layer_name: "word" - } - inputs { - input_layer_name: "ends" - } - select_first: false -} -input_layer_names: "word" -output_layer_names: "__seq_slice_layer_0__" -output_layer_names: "__seq_slice_layer_1__" -output_layer_names: "__seq_slice_layer_2__" -sub_models { - name: "root" - layer_names: "word" - layer_names: "starts" - layer_names: "ends" - layer_names: "__seq_slice_layer_0__" - layer_names: "__seq_slice_layer_1__" - layer_names: "__seq_slice_layer_2__" - input_layer_names: "word" - output_layer_names: "__seq_slice_layer_0__" - output_layer_names: "__seq_slice_layer_1__" - output_layer_names: "__seq_slice_layer_2__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr deleted file mode 100644 index 8989561df04a60c906c06432fd857227a3814194..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr +++ /dev/null @@ -1,162 +0,0 @@ -type: "nn" -layers { - name: "dat_in" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "__seq_pooling_0__" - type: "max" - size: 100 - active_type: "" - inputs { - input_layer_name: "dat_in" - } - trans_type: "seq" - seq_pool_stride: -1 -} -layers { - name: "__seq_pooling_1__" - type: "max" - size: 100 - active_type: "" - inputs { - input_layer_name: "dat_in" - } - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__seq_pooling_2__" - type: "average" - size: 100 - active_type: "" - inputs { - input_layer_name: "dat_in" - } - average_strategy: "average" - trans_type: "seq" - seq_pool_stride: -1 -} -layers { - name: "__seq_pooling_3__" - type: "average" - size: 100 - active_type: "" - inputs { - input_layer_name: "dat_in" - } - average_strategy: "average" - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__seq_pooling_4__" - type: "average" - size: 100 - active_type: "" - inputs { - input_layer_name: "dat_in" - } - average_strategy: "sum" - trans_type: "seq" - seq_pool_stride: -1 -} -layers { - name: "__seq_pooling_5__" - type: "average" - size: 100 - active_type: "" - inputs { - input_layer_name: "dat_in" - } - average_strategy: "sum" - trans_type: "non-seq" - seq_pool_stride: -1 -} -layers { - name: "__seq_pooling_6__" - type: "max" - size: 100 - active_type: "" - inputs { - input_layer_name: "dat_in" - } - trans_type: "non-seq" - seq_pool_stride: 5 -} -layers { - name: "__seq_pooling_7__" - type: "average" - size: 100 - active_type: "" - inputs { - input_layer_name: "dat_in" - } - average_strategy: "average" - trans_type: "non-seq" - seq_pool_stride: 5 -} -layers { - name: "__seq_pooling_8__" - type: "average" - size: 100 - active_type: "" - inputs { - input_layer_name: "dat_in" - } - average_strategy: "sum" - trans_type: "non-seq" - seq_pool_stride: 5 -} -layers { - name: "__seq_pooling_9__" - type: "max" - size: 100 - active_type: "" - inputs { - input_layer_name: "dat_in" - } - output_max_index: true - trans_type: "non-seq" - seq_pool_stride: -1 -} -input_layer_names: "dat_in" -output_layer_names: "__seq_pooling_0__" -output_layer_names: "__seq_pooling_1__" -output_layer_names: "__seq_pooling_2__" -output_layer_names: "__seq_pooling_3__" -output_layer_names: "__seq_pooling_4__" -output_layer_names: "__seq_pooling_5__" -output_layer_names: "__seq_pooling_6__" -output_layer_names: "__seq_pooling_7__" -output_layer_names: "__seq_pooling_8__" -output_layer_names: "__seq_pooling_9__" -sub_models { - name: "root" - layer_names: "dat_in" - layer_names: "__seq_pooling_0__" - layer_names: "__seq_pooling_1__" - layer_names: "__seq_pooling_2__" - layer_names: "__seq_pooling_3__" - layer_names: "__seq_pooling_4__" - layer_names: "__seq_pooling_5__" - layer_names: "__seq_pooling_6__" - layer_names: "__seq_pooling_7__" - layer_names: "__seq_pooling_8__" - layer_names: "__seq_pooling_9__" - input_layer_names: "dat_in" - output_layer_names: "__seq_pooling_0__" - output_layer_names: "__seq_pooling_1__" - output_layer_names: "__seq_pooling_2__" - output_layer_names: "__seq_pooling_3__" - output_layer_names: "__seq_pooling_4__" - output_layer_names: "__seq_pooling_5__" - output_layer_names: "__seq_pooling_6__" - output_layer_names: "__seq_pooling_7__" - output_layer_names: "__seq_pooling_8__" - output_layer_names: "__seq_pooling_9__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr deleted file mode 100644 index 4aa041ea2e173a6cc2ab21e3c9ea703601929cde..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr +++ /dev/null @@ -1,40 +0,0 @@ -type: "nn" -layers { - name: "input" - type: "data" - size: 300 - active_type: "" -} -layers { - name: "label" - type: "data" - size: 300 - active_type: "" -} -layers { - name: "__smooth_l1_cost_0__" - type: "smooth_l1" - size: 1 - active_type: "" - inputs { - input_layer_name: "input" - } - inputs { - input_layer_name: "label" - } - coeff: 1.0 -} -input_layer_names: "input" -input_layer_names: "label" -output_layer_names: "__smooth_l1_cost_0__" -sub_models { - name: "root" - layer_names: "input" - layer_names: "label" - layer_names: "__smooth_l1_cost_0__" - input_layer_names: "input" - input_layer_names: "label" - output_layer_names: "__smooth_l1_cost_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr deleted file mode 100644 index 569b0b945a762e8b596e197adc06df64e33311af..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr +++ /dev/null @@ -1,72 +0,0 @@ -model_config { - type: "nn" - layers { - name: "a" - type: "data" - size: 10 - active_type: "" - } - input_layer_names: "a" - output_layer_names: "a" - sub_models { - name: "root" - layer_names: "a" - input_layer_names: "a" - output_layer_names: "a" - is_recurrent_layer_group: false - } -} -data_config { - type: "py2" - files: "train.list" - async_load_data: false - for_test: false - load_data_module: "a" - load_data_object: "c" - load_data_args: "" - data_ratio: 1 - is_main_data: true - usage_ratio: 1.0 -} -opt_config { - batch_size: 1000 - algorithm: "sgd" - learning_rate: 0.001 - learning_rate_decay_a: 0.0 - learning_rate_decay_b: 0.0 - l1weight: 0.1 - l2weight: 0.0 - c1: 0.0001 - backoff: 0.5 - owlqn_steps: 10 - max_backoff: 5 - l2weight_zero_iter: 0 - average_window: 0 - learning_method: "momentum" - ada_epsilon: 1e-06 - do_average_in_cpu: false - ada_rou: 0.95 - learning_rate_schedule: "poly" - delta_add_rate: 1.0 - shrink_parameter_value: 0 - adam_beta1: 0.9 - adam_beta2: 0.999 - adam_epsilon: 1e-08 - learning_rate_args: "" - async_lagged_grad_discard_ratio: 1.5 -} -test_data_config { - type: "py2" - files: "test.list" - async_load_data: false - for_test: true - load_data_module: "b" - load_data_object: "d" - load_data_args: "" - data_ratio: 1 - is_main_data: true - usage_ratio: 1.0 -} -save_dir: "./output/model" -start_pass: 0 - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr deleted file mode 100644 index ca1b2d8cffd6b472dfe40feeeb762e169bc853c7..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr +++ /dev/null @@ -1,40 +0,0 @@ -type: "nn" -layers { - name: "data" - type: "data" - size: 3200 - active_type: "" - height: 20 - width: 10 -} -layers { - name: "__spp_0__" - type: "spp" - size: 80 - active_type: "" - inputs { - input_layer_name: "data" - spp_conf { - image_conf { - channels: 16 - img_size: 10 - img_size_y: 20 - } - pool_type: "max-projection" - pyramid_height: 2 - } - } - height: 1 - width: 5 -} -input_layer_names: "data" -output_layer_names: "__spp_0__" -sub_models { - name: "root" - layer_names: "data" - layer_names: "__spp_0__" - input_layer_names: "data" - output_layer_names: "__spp_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sub_nested_seq_select_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sub_nested_seq_select_layer.protostr deleted file mode 100644 index 4b906b113e3c0569d5576127e100d097e4923436..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sub_nested_seq_select_layer.protostr +++ /dev/null @@ -1,37 +0,0 @@ -type: "nn" -layers { - name: "input_seq" - type: "data" - size: 300 - active_type: "" -} -layers { - name: "input" - type: "data" - size: 5 - active_type: "" -} -layers { - name: "__sub_nested_seq_layer_0__" - type: "sub_nested_seq" - size: 300 - active_type: "" - inputs { - input_layer_name: "input_seq" - } - inputs { - input_layer_name: "input" - } -} -input_layer_names: "input_seq" -output_layer_names: "__sub_nested_seq_layer_0__" -sub_models { - name: "root" - layer_names: "input_seq" - layer_names: "input" - layer_names: "__sub_nested_seq_layer_0__" - input_layer_names: "input_seq" - output_layer_names: "__sub_nested_seq_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr deleted file mode 100644 index 89ed28406e553ba93bec8c86879a85f0a5c1caa1..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr +++ /dev/null @@ -1,27 +0,0 @@ -type: "nn" -layers { - name: "probs" - type: "data" - size: 100 - active_type: "" -} -layers { - name: "__sampling_id_layer_0__" - type: "sampling_id" - size: 100 - active_type: "" - inputs { - input_layer_name: "probs" - } -} -input_layer_names: "probs" -output_layer_names: "__sampling_id_layer_0__" -sub_models { - name: "root" - layer_names: "probs" - layer_names: "__sampling_id_layer_0__" - input_layer_names: "probs" - output_layer_names: "__sampling_id_layer_0__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr deleted file mode 100644 index 7a2f3eab38808a031c27cf7ab9d6273952e389eb..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr +++ /dev/null @@ -1,87 +0,0 @@ -type: "nn" -layers { - name: "a" - type: "data" - size: 10 - active_type: "" -} -layers { - name: "b" - type: "data" - size: 10 - active_type: "" -} -layers { - name: "__addto_0__" - type: "addto" - size: 10 - active_type: "" - inputs { - input_layer_name: "a" - } - inputs { - input_layer_name: "b" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__concat_0__" - type: "concat" - size: 20 - active_type: "" - inputs { - input_layer_name: "a" - } - inputs { - input_layer_name: "b" - } - height: 0 - width: 0 - depth: 1 -} -layers { - name: "__concat_1__" - type: "concat2" - size: 20 - active_type: "" - inputs { - input_layer_name: "a" - proj_conf { - type: "identity" - name: "___concat_1__.w0" - input_size: 10 - output_size: 10 - } - } - inputs { - input_layer_name: "b" - proj_conf { - type: "identity" - name: "___concat_1__.w1" - input_size: 10 - output_size: 10 - } - } -} -input_layer_names: "a" -input_layer_names: "b" -output_layer_names: "__addto_0__" -output_layer_names: "__concat_0__" -output_layer_names: "__concat_1__" -sub_models { - name: "root" - layer_names: "a" - layer_names: "b" - layer_names: "__addto_0__" - layer_names: "__concat_0__" - layer_names: "__concat_1__" - input_layer_names: "a" - input_layer_names: "b" - output_layer_names: "__addto_0__" - output_layer_names: "__concat_0__" - output_layer_names: "__concat_1__" - is_recurrent_layer_group: false -} - diff --git a/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh b/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh deleted file mode 100755 index c8a3b190b19148ddb701020f5be55c4c29a17079..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -cd `dirname $0` - -set -e -PYTHON_EXEC=$1 -COMPARE_PROTO_UTIL=$2 - -protostr=`dirname $0`/protostr - -files=`ls $protostr | grep -v "unittest"` - -./generate_protostr.sh ${PYTHON_EXEC} - -. ./file_list.sh - -if [ -z ${COMPARE_PROTO_UTIL} ]; then - for file in $files - do - base_protostr=$protostr/$file - new_protostr=$protostr/$file.unittest - diff $base_protostr $new_protostr -u - diff $protostr/$file $protostr/$file.non_file_config.unittest -u - done -else - for file in ${configs[*]} - do - if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.unittest; then - diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u - fi - if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest; then - diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u - fi - done - - for file in ${whole_configs[*]} - do - if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.unittest --whole; then - diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u - fi - if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest --whole; then - diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u - fi - done -fi diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py b/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py deleted file mode 100644 index 3229252a2f4f5083041fd04c5fb9e8400453e601..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-4, batch_size=1000) - -a = data_layer(name='feature_a', size=200) -b = data_layer(name='feature_b', size=200) - -fc_param = ParamAttr(name='fc_param', initial_max=1.0, initial_min=-1.0) -bias_param = ParamAttr(name='bias_param', initial_mean=0.0, initial_std=0.0) - -softmax_param = ParamAttr( - name='softmax_param', initial_max=1.0, initial_min=-1.0) - -hidden_a = fc_layer( - input=a, size=200, param_attr=fc_param, bias_attr=bias_param) -hidden_b = fc_layer( - input=b, size=200, param_attr=fc_param, bias_attr=bias_param) - -predict = fc_layer( - input=[hidden_a, hidden_b], - param_attr=[softmax_param, softmax_param], - bias_attr=False, - size=10, - act=SoftmaxActivation()) - -outputs( - classification_cost( - input=predict, label=data_layer( - name='label', size=10))) diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py b/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py deleted file mode 100644 index dff561fdf7818b8bb0684dbf9260fbe8badf363d..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-4, batch_size=1000) - -data_1 = data_layer(name='data_a', size=100) -data_2 = data_layer(name='data_b', size=100) - -mixed_param = ParamAttr(name='mixed_param') - -gru_param = ParamAttr(name='gru_param') -gru_bias = ParamAttr(name='gru_bias', initial_mean=0., initial_std=0.) - -gru1 = simple_gru( - input=data_1, - size=200, - mixed_param_attr=mixed_param, - mixed_bias_param_attr=False, - gru_bias_attr=gru_bias, - gru_param_attr=gru_param) - -gru2 = simple_gru( - input=data_2, - size=200, - mixed_param_attr=mixed_param, - mixed_bias_param_attr=False, - gru_bias_attr=gru_bias, - gru_param_attr=gru_param) - -softmax_param = ParamAttr(name='softmax_param') - -predict = fc_layer( - input=[last_seq(input=gru1), last_seq(input=gru2)], - size=10, - param_attr=[softmax_param, softmax_param], - bias_attr=False, - act=SoftmaxActivation()) -outputs( - classification_cost( - input=predict, label=data_layer( - name='label', size=10))) diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py deleted file mode 100644 index 97ef2d07ae88fbb8a9cf9ec9f7997e05514bd46f..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-4, batch_size=1000) - -data_1 = data_layer(name='data_a', size=100) -data_2 = data_layer(name='data_b', size=100) - -mixed_param = ParamAttr(name='mixed_param') - -with mixed_layer(size=400, bias_attr=False) as m1: - m1 += full_matrix_projection(input=data_1, param_attr=mixed_param) - -with mixed_layer(size=400, bias_attr=False) as m2: - m2 += full_matrix_projection(input=data_2, param_attr=mixed_param) - -lstm_param = ParamAttr(name='lstm_param') -lstm_bias = ParamAttr(name='lstm_bias', initial_mean=0., initial_std=0.) - -lstm1 = lstmemory_group( - input=m1, - param_attr=lstm_param, - lstm_bias_attr=lstm_bias, - input_proj_bias_attr=False) - -lstm2 = lstmemory_group( - input=m2, - param_attr=lstm_param, - lstm_bias_attr=lstm_bias, - input_proj_bias_attr=False) - -softmax_param = ParamAttr(name='softmax_param') - -predict = fc_layer( - input=[last_seq(input=lstm1), last_seq(input=lstm2)], - size=10, - param_attr=[softmax_param, softmax_param], - bias_attr=False, - act=SoftmaxActivation()) -outputs( - classification_cost( - input=predict, label=data_layer( - name='label', size=10))) diff --git a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py deleted file mode 100644 index f882efcba21112231a4c09627fd58ad88833038c..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-4) - -din = data_layer(name='data', size=200) - -hidden = fc_layer(input=din, size=200, act=SigmoidActivation()) - -rnn = recurrent_layer(input=hidden, act=SigmoidActivation()) - -rnn2 = recurrent_layer(input=hidden, act=SigmoidActivation(), reverse=True) - -lstm1_param = fc_layer( - input=hidden, size=200 * 4, act=LinearActivation(), bias_attr=False) - -lstm1 = lstmemory(input=lstm1_param, act=SigmoidActivation()) - -lstm2_param = fc_layer( - input=hidden, size=200 * 4, act=LinearActivation(), bias_attr=False) - -lstm2 = lstmemory(input=lstm2_param, act=SigmoidActivation(), reverse=True) - -gru1_param = fc_layer( - input=hidden, size=200 * 3, act=LinearActivation(), bias_attr=False) -gru1 = grumemory(input=gru1_param, act=SigmoidActivation()) - -gru2_param = fc_layer( - input=hidden, size=200 * 3, act=LinearActivation(), bias_attr=False) -gru2 = grumemory(input=gru2_param, act=SigmoidActivation(), reverse=True) - -outputs( - last_seq(input=rnn), - first_seq(input=rnn2), - last_seq(input=lstm1), - first_seq(input=lstm2), - last_seq(input=gru1), - first_seq(gru2)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py b/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py deleted file mode 100644 index 169038deb1915a1899b46b48d6b661045d3c0a52..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-4) - -#data = data_layer(name='data', size=180, width=30, height=6) -#batchNorm = batch_norm_layer(data, num_channels=1) -#outputs(batchNorm) - -data3D = data_layer(name='data3D', size=120 * 3, width=20, height=6, depth=3) -batchNorm3D = batch_norm_layer(data3D, num_channels=1, img3D=True) -outputs(batchNorm3D) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py b/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py deleted file mode 100644 index d29e4e5c4d6356c9ac8726efb92adcf15b191f67..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-4) - -din = data_layer(name='data', size=120) - -outputs(bidirectional_gru(input=din, size=40, return_seq=True)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py deleted file mode 100644 index 5e724ba7d1730efa2ee26910abd68b89a4f1c737..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -data = data_layer(name='data', size=2304) - -conv = img_conv_layer( - input=data, - filter_size=3, - num_channels=1, - num_filters=16, - padding=1, - act=LinearActivation(), - bias_attr=True) - -bilinear = bilinear_interp_layer(input=conv, out_size_x=64, out_size_y=64) - -pool = img_pool_layer( - input=bilinear, - num_channels=16, - pool_size=2, - stride=2, - pool_type=MaxPooling()) - -fc = fc_layer(input=pool, size=384, bias_attr=False) - -outputs(fc) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py deleted file mode 100644 index 95a1192bfae396fd96df9c8678020c089b4f7d41..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -data = data_layer(name='input', size=300) -clip = clip_layer(input=data, min=-10, max=10) - -outputs(clip) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py b/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py deleted file mode 100644 index 9b791a0222dd60e9ae2fca8b2798cddd13ed1d1c..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import re -import getopt - - -def main(print_whole_config, globals, locals): - ''' - this test will all test_config.py - ''' - cmdstr = """from paddle.trainer.config_parser import parse_config\n""" - importstr = "" - functionstr = "" - - for line in sys.stdin: - if re.match("^import", line) or re.match("^from.*import", line): - importstr = importstr + line - else: - functionstr = functionstr + " " + line - - cmdstr = cmdstr + importstr + """def configs():\n""" + functionstr - #cmdstr = cmdstr + """def configs():\n""" + importstr + functionstr - if print_whole_config: - cmdstr = cmdstr + """print parse_config(configs, "")""" - else: - cmdstr = cmdstr + """print parse_config(configs, "").model_config""" - - exec (cmdstr, globals, locals) - - -if __name__ == '__main__': - whole = False - opts, args = getopt.getopt(sys.argv[1:], "", ["whole"]) - for op, value in opts: - if op == "--whole": - whole = True - main(whole, globals(), locals()) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py deleted file mode 100644 index f9966e399e73c112c02fa1cab02050aea362b36e..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -num_channels = 3 -filter_size = 3 -filter_size_y = 3 -filter_size_z = 3 -stride = 2 -stride_y = 2 -stride_z = 2 -padding = 1 -padding_y = 1 -padding_z = 1 -groups = 1 - -data = data_layer( - name='data', size=12096 * num_channels, height=48, width=42, depth=6) -# first -conv3d_1 = img_conv3d_layer( - input=data, - name='conv3d_1', - num_filters=16, - num_channels=num_channels, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=groups, - bias_attr=True, - shared_biases=True, - trans=False, - layer_type="conv3d", - act=LinearActivation()) -# second -conv3d_2 = img_conv3d_layer( - input=data, - name='conv3d_2', - num_filters=16, - num_channels=num_channels, - filter_size=[filter_size, filter_size_y, filter_size_z], - stride=[stride, stride_y, stride_z], - padding=[padding, padding_y, padding_z], - groups=groups, - bias_attr=True, - shared_biases=True, - trans=False, - layer_type="conv3d", - act=LinearActivation()) -outputs(conv3d_2) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py deleted file mode 100644 index 351694fd55caea9a402cdd5dcab94ebde147d814..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-4, batch_size=1000) - -seq_in = data_layer(name='input', size=200) -labels = data_layer(name='labels', size=5000) - -probs = data_layer(name='probs', size=10) -xe_label = data_layer(name='xe-label', size=10) - -hidden = fc_layer(input=seq_in, size=4) -outputs( - ctc_layer( - input=seq_in, label=labels), - warp_ctc_layer( - input=seq_in, label=labels, blank=0), - crf_layer( - input=hidden, label=data_layer( - name='crf_label', size=4)), - rank_cost( - left=data_layer( - name='left', size=1), - right=data_layer( - name='right', size=1), - label=data_layer( - name='label', size=1)), - lambda_cost( - input=data_layer( - name='list_feature', size=100), - score=data_layer( - name='list_scores', size=1)), - cross_entropy( - input=probs, label=xe_label), - cross_entropy_with_selfnorm( - input=probs, label=xe_label), - huber_regression_cost( - input=seq_in, label=labels), - huber_classification_cost( - input=data_layer( - name='huber_probs', size=1), - label=data_layer( - name='huber_label', size=1)), - multi_binary_label_cross_entropy( - input=probs, label=xe_label), - sum_cost(input=hidden), - nce_layer( - input=hidden, label=labels)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py deleted file mode 100644 index 8cbcf5de0a3e3ae25a58bb60befff6072594f555..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-4, batch_size=1000) - -data = data_layer(name='input', size=300) -lbl = data_layer(name='label', size=1) -wt = data_layer(name='weight', size=1) -fc = fc_layer(input=data, size=10, act=SoftmaxActivation()) - -outputs( - classification_cost( - input=fc, label=lbl, weight=wt), - square_error_cost( - input=fc, label=lbl, weight=wt), - nce_layer( - input=fc, - label=data_layer( - name='multi_class_label', size=500), - weight=wt)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_crop.py b/python/paddle/trainer_config_helpers/tests/configs/test_crop.py deleted file mode 100644 index b4ffff252bb50c518875c03d4e00af9aa9040fba..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_crop.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -data = data_layer(name='data', size=2016, height=48, width=42) -refernce_data = data_layer(name='data', size=768, height=16, width=16) - -conv = img_conv_layer( - input=data, - filter_size=3, - num_channels=1, - num_filters=16, - padding=1, - act=LinearActivation(), - bias_attr=True) - -pool = img_pool_layer(input=conv, pool_size=2, stride=2, pool_type=MaxPooling()) - -crop = crop_layer(input=[pool, refernce_data], axis=2) - -outputs(pad) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py deleted file mode 100644 index 4a5bdf1181dc4538418a8b89b41a1ff713e423c8..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python -#coding=utf-8 - -from paddle.trainer_config_helpers import * -beam_size = 5 - -# the first beam expansion. -sentence_states = data_layer(name="sentence_states", size=32) -sentence_scores = data_layer(name="sentence_scores", size=1) -topk_sentence_ids = kmax_seq_score_layer( - input=sentence_scores, beam_size=beam_size) - -# the second beam expansion. -topk_sen = sub_nested_seq_layer( - input=sentence_states, selected_indices=topk_sentence_ids) -start_pos_scores = fc_layer(input=topk_sen, size=1, act=LinearActivation()) -topk_start_pos_ids = kmax_seq_score_layer( - input=sentence_scores, beam_size=beam_size) - -# the final beam expansion. -topk_start_spans = seq_slice_layer( - input=topk_sen, starts=topk_start_pos_ids, ends=None) -end_pos_scores = fc_layer( - input=topk_start_spans, size=1, act=LinearActivation()) -topk_end_pos_ids = kmax_seq_score_layer( - input=end_pos_scores, beam_size=beam_size) - -# define the cost -sentence_idx = data_layer(name="sentences_ids", size=1) -start_idx = data_layer(name="start_ids", size=1) -end_idx = data_layer(name="end_ids", size=1) -cost = cross_entropy_over_beam(input=[ - BeamInput( - candidate_scores=sentence_scores, - selected_candidates=topk_sentence_ids, - gold=sentence_idx), BeamInput( - candidate_scores=start_pos_scores, - selected_candidates=topk_start_pos_ids, - gold=start_idx), BeamInput( - candidate_scores=end_pos_scores, - selected_candidates=topk_end_pos_ids, - gold=end_idx) -]) - -outputs(cost) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py deleted file mode 100644 index 08e701c7a8ddae48df2ac01565a5ce2120a7d592..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -num_channels = 3 -filter_size = 3 -filter_size_y = 3 -filter_size_z = 3 -stride = 2 -stride_y = 2 -stride_z = 2 -padding = 1 -padding_y = 1 -padding_z = 1 -groups = 1 - -data = data_layer( - name='data', size=12096 * num_channels, height=48, width=42, depth=6) - -# first -deconv3d_1 = img_conv3d_layer( - input=data, - name='deconv3d_1', - num_filters=16, - num_channels=num_channels, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=groups, - bias_attr=True, - shared_biases=True, - trans=True, - layer_type="deconv3d", - act=LinearActivation()) -# second -deconv3d_2 = img_conv3d_layer( - input=data, - name='deconv3d_2', - num_filters=16, - num_channels=num_channels, - filter_size=[filter_size, filter_size_y, filter_size_z], - stride=[stride, stride_y, stride_z], - padding=[padding, padding_y, padding_z], - groups=groups, - bias_attr=True, - shared_biases=True, - trans=True, - layer_type="deconv3d", - act=LinearActivation()) -outputs(deconv3d_2) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py deleted file mode 100644 index 4ecd1c2b7e0e6c9fd894c6fa268b1496fbf4fd9e..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -input_loc = data_layer(name='input_loc', size=16, height=16, width=1) - -input_conf = data_layer(name='input_conf', size=8, height=1, width=8) - -priorbox = data_layer(name='priorbox', size=32, height=4, width=8) - -detout = detection_output_layer( - input_loc=input_loc, - input_conf=input_conf, - priorbox=priorbox, - num_classes=21, - nms_threshold=0.45, - nms_top_k=400, - keep_top_k=200, - confidence_threshold=0.01, - background_id=0, - name='test_detection_output') - -outputs(detout) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py deleted file mode 100644 index 9b444bc2c02bdc80022a9229fbfd11c009bf740a..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -vec1 = data_layer(name='vector1', size=10) -vec2 = data_layer(name='vector2', size=10) -dot_product = dot_prod_layer(input1=vec1, input2=vec2) - -outputs(dot_product) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py deleted file mode 100644 index 85101d2b927d5cee112e6ea14b54a9a8fae8ac79..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -din = data_layer(name='data', size=30) -data_seq = data_layer(name='data_seq', size=30) - -outputs( - expand_layer( - input=din, expand_as=data_seq, expand_level=ExpandLevel.FROM_SEQUENCE), - expand_layer( - input=din, - expand_as=data_seq, - expand_level=ExpandLevel.FROM_NO_SEQUENCE)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py b/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py deleted file mode 100644 index 48ac46c5bb654f18889297da3d249b2449e30078..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -data = data_layer(name='data', size=1024) - -fm = factorization_machine(input=data, factor_size=10) - -outputs(fm) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py b/python/paddle/trainer_config_helpers/tests/configs/test_fc.py deleted file mode 100644 index f1e454d21129c92312f6b2b065112cb1019d3fd1..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -din = data_layer(name='data', size=100) - -trans = trans_layer(input=din) - -hidden = fc_layer(input=trans, size=100, bias_attr=False) - -mask = data_layer(name='mask', size=100) - -hidden_sel = selective_fc_layer( - input=din, select=mask, size=100, act=SigmoidActivation()) - -outputs(hidden, hidden_sel) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py deleted file mode 100644 index afc3e9207c5427ea123cf67fd0a154e15949dcdf..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -data = data_layer(name='input', size=256) -glu = gated_unit_layer( - size=512, - input=data, - act=TanhActivation(), - gate_attr=ExtraLayerAttribute(error_clipping_threshold=100.0), - gate_param_attr=ParamAttr(initial_std=1e-4), - gate_bias_attr=ParamAttr(initial_std=1), - inproj_attr=ExtraLayerAttribute(error_clipping_threshold=100.0), - inproj_param_attr=ParamAttr(initial_std=1e-4), - inproj_bias_attr=ParamAttr(initial_std=1), - layer_attr=ExtraLayerAttribute(error_clipping_threshold=100.0)) - -outputs(glu) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py deleted file mode 100644 index ac9902d08c60a0d9f0aa4af91d4b91e458944f21..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-4) - -din = data_layer(name='data', size=120) - -outputs( - grumemory( - input=din, - size=40, - reverse=True, - gate_act=TanhActivation(), - act=SigmoidActivation())) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py deleted file mode 100644 index da781c149b86563469ae308d748f1a5b63c2327f..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-4, batch_size=1000) - -din = data_layer(name='data', size=100) -label = data_layer(name='label', size=10) - -outputs(hsigmoid(input=din, label=label, num_classes=10)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py deleted file mode 100644 index 171da10f75dae03eed7e110d0efd07d6a18e1ecf..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env python -#coding=utf-8 -from paddle.trainer_config_helpers import * - -data = data_layer(name="input_seq", size=128) -scores = fc_layer(input=data, size=1, act=ExpActivation()) -kmax_seq_id = kmax_seq_score_layer(input=scores, beam_size=5) - -outputs(kmax_seq_id) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py deleted file mode 100644 index 42c9b5deea78d218826aab7a0892e113fe1c1b23..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -outputs( - l2_distance_layer( - x=data_layer( - name='x', size=128), y=data_layer( - name='y', size=128))) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py deleted file mode 100644 index 26eeea5461f6ab95707170bf9a2d593ce2031e7a..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -din = data_layer(name='data', size=128) - -outputs( - lstmemory( - input=din, - reverse=True, - gate_act=TanhActivation(), - act=TanhActivation(), - size=32)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py deleted file mode 100644 index 2cd41a306a74ce60f943d381eeba647a00b6780a..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -data = data_layer(name='data', size=2304, height=48, width=48) - -conv = img_conv_layer( - input=data, - filter_size=3, - num_channels=1, - num_filters=16, - padding=1, - act=LinearActivation(), - bias_attr=True) - -maxout = maxout_layer(input=conv, num_channels=16, groups=2) - -pool = img_pool_layer( - input=maxout, num_channels=8, pool_size=2, stride=2, pool_type=MaxPooling()) - -conv2 = img_conv_layer( - input=pool, - filter_size=3, - num_channels=8, - num_filters=128, - padding=1, - act=LinearActivation(), - bias_attr=True) - -maxout2 = maxout_layer(input=conv2, num_channels=128, groups=4) - -block = block_expand_layer( - input=maxout2, - num_channels=32, - stride_x=1, - stride_y=1, - block_x=1, - block_y=6) - -fc = fc_layer(input=block, size=384, bias_attr=False) - -outputs(fc) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py deleted file mode 100644 index b4fd9052c410a776d2192fe9bebf88bd82976565..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -input_loc = data_layer(name='input_loc', size=16, height=16, width=1) - -input_conf = data_layer(name='input_conf', size=8, height=1, width=8) - -priorbox = data_layer(name='priorbox', size=32, height=4, width=8) - -label = data_layer(name='label', size=24, height=4, width=6) - -multibox_loss = multibox_loss_layer( - input_loc=input_loc, - input_conf=input_conf, - priorbox=priorbox, - label=label, - num_classes=21, - overlap_threshold=0.5, - neg_pos_ratio=3.0, - neg_overlap=0.5, - background_id=0, - name='test_multibox_loss') - -outputs(multibox_loss) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py deleted file mode 100644 index bfba07be869184cb497d422a48b450983f6e2945..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -index = data_layer(name='index', size=1) -din1 = data_layer(name='data1', size=30) -din2 = data_layer(name='data2', size=30) -din3 = data_layer(name='data3', size=30) - -dout = multiplex_layer([index, din1, din2, din3]) - -outputs(dout) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py deleted file mode 100644 index 891894172c5d87a29f6bf095f07a2a1ecf5e97bc..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -weight = data_layer(name='w', size=1) -a = data_layer(name='a', size=100) -b = data_layer(name='b', size=100) -c = data_layer(name='c', size=200) -d = data_layer(name='d', size=31) - -outputs( - interpolation_layer( - input=[a, b], weight=weight), - power_layer( - input=a, weight=weight), - scaling_layer( - input=a, weight=weight), - cos_sim( - a=a, b=b), - cos_sim( - a=a, b=c, size=2), - sum_to_one_norm_layer(input=a), - conv_shift_layer( - a=a, b=d), - tensor_layer( - a=a, b=b, size=1000), - slope_intercept_layer( - input=a, slope=0.7, intercept=0.9), - linear_comb_layer( - weights=b, vectors=c)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pad.py b/python/paddle/trainer_config_helpers/tests/configs/test_pad.py deleted file mode 100644 index c5825c82e5bf4d00baf9342040da62a2aae17ac6..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_pad.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -data = data_layer(name='data', size=2016, height=48, width=42) - -conv = img_conv_layer( - input=data, - filter_size=3, - num_channels=1, - num_filters=16, - padding=1, - act=LinearActivation(), - bias_attr=True) - -pool = img_pool_layer(input=conv, pool_size=2, stride=2, pool_type=MaxPooling()) - -pad = pad_layer(input=pool, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1]) - -outputs(pad) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py deleted file mode 100644 index 5ff52c195a4798b2fcf730814217ae44e5c133a4..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=100, learning_rate=1e-5) - -data_2d = data_layer(name='data_2d', size=6000, height=20, width=10) - -pool_2d = img_pool_layer( - name="pool___2d", - input=data_2d, - num_channels=30, - pool_size=5, - stride=3, - padding=1, - pool_type=AvgPooling()) -outputs(pool_2d) - -data_3d = data_layer( - name='data_3d_1', size=60000, depth=10, height=20, width=10) - -pool_3d_1 = img_pool3d_layer( - name="pool_3d_1", - input=data_3d, - num_channels=30, - pool_size=5, - stride=3, - padding=1, - pool_type=AvgPooling()) -outputs(pool_3d_1) - -pool_3d_2 = img_pool3d_layer( - name="pool_3d_2", - input=data_3d, - num_channels=30, - pool_size=[5, 5, 5], - stride=[3, 3, 3], - padding=[1, 1, 1], - pool_type=MaxPooling()) -outputs(pool_3d_2) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py deleted file mode 100644 index d803a0d13d5000e457078d6763599ed66b8ac898..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -data = data_layer(name='input', size=300, height=10, width=10) -prelu = prelu_layer(input=data, num_channels=3) -prelu = prelu_layer(input=data, partial_sum=1, num_channels=3) -prelu = prelu_layer(input=data, partial_sum=5, num_channels=3) -prelu = prelu_layer(input=data, channel_shared=True, num_channels=3) -prelu = prelu_layer(input=data, channel_shared=False, num_channels=3) - -outputs(prelu) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py deleted file mode 100644 index ca1f5a45724bb56d86f0093eeb8821f2077b9963..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-4, batch_size=1000) - -din = data_layer(name='input', size=100) - -print_layer(input=din) - -outputs(din) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py b/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py deleted file mode 100644 index d44870d804f906996dacc6cc7f09bd480b70c86b..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -din = data_layer(name='data', size=100) - -enc = din -for i in range(32): - enc = addto_layer([enc, enc]) - -pred = fc_layer( - input=fc_layer( - input=enc, size=32, act=ReluActivation()), - size=10, - act=SoftmaxActivation()) -outputs(pred) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py deleted file mode 100644 index ee90e830df11ad6f9017ee8c143c82882a713881..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -din = data_layer(name='data', size=30) - -outputs( - repeat_layer( - input=din, num_repeats=10, as_row_vector=True), - repeat_layer( - input=din, num_repeats=10, act=TanhActivation(), as_row_vector=False)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py deleted file mode 100644 index 4aa81919dfd2492053b0733eaa6e24b683ec8602..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -data = data_layer(name='input', size=300) -resized = resize_layer(input=data, size=150) - -outputs(resized) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py deleted file mode 100644 index 3824ef59953cf9209864590b7ba3f66959202c16..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-4, batch_size=1000) - -seq = data_layer(name='seq_input', size=100) -sub_seq = data_layer(name='sub_seq_input', size=100) -lbl = data_layer(name='label', size=1) - - -def generate_rnn_simple(name): - def rnn_simple(s): - m = memory(name=name, size=200) - fc = fc_layer(input=[s, m], size=200, name=name) - return fc - - return rnn_simple - - -def generate_rnn_simple_no_name(): - def rnn_simple(s): - m = memory(name=None, size=200) - fc = fc_layer(input=[s, m], size=200) - m.set_input(fc) - return fc - - return rnn_simple - - -with mixed_layer() as lstm_param: # test lstm unit, rnn group - lstm_param += full_matrix_projection(input=seq, size=100 * 4) - -with mixed_layer() as gru_param: - gru_param += full_matrix_projection(input=seq, size=100 * 3) - -outputs( - last_seq(input=recurrent_group( - step=generate_rnn_simple('rnn_forward'), input=seq)), - first_seq(input=recurrent_group( - step=generate_rnn_simple('rnn_back'), input=seq, reverse=True)), - last_seq(input=recurrent_group( - step=generate_rnn_simple('rnn_subseq_forward'), - input=SubsequenceInput(input=sub_seq))), - last_seq(input=lstmemory_group( - input=lstm_param, size=100)), - last_seq(input=gru_group( - input=gru_param, size=100)), - last_seq(input=recurrent_group( - step=generate_rnn_simple_no_name(), input=seq)), ) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py deleted file mode 100644 index 6929d106c643a5cc73fbb7a70973101940904fdd..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -data = data_layer(name='data', size=3 * 14 * 14, height=14, width=14) - -rois = data_layer(name='rois', size=10) - -conv = img_conv_layer( - input=data, - filter_size=3, - num_channels=3, - num_filters=16, - padding=1, - act=LinearActivation(), - bias_attr=True) - -roi_pool = roi_pool_layer( - input=conv, - rois=rois, - pooled_width=7, - pooled_height=7, - spatial_scale=1. / 16) - -outputs(roi_pool) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py b/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py deleted file mode 100644 index 6381a26fe8417a276846d557ba17b6b0c8f02605..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -data = data_layer(name='data', size=2560) - -row_conv = row_conv_layer(input=data, context_len=19, act=ReluActivation()) - -outputs(row_conv) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py deleted file mode 100644 index 3c17d2ccfd602d34d4bc0daf37932270f872577b..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -data = data_layer(name='input', size=300) -row_l2_norm = row_l2_norm_layer(input=data) - -outputs(row_l2_norm) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py deleted file mode 100644 index ae8a25ba94de3ac927a898b7587bd2299c6b6af0..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -data = data_layer(name='data', size=100) - -scale = scale_shift_layer(input=data, bias_attr=False) - -scale_shift = scale_shift_layer(input=data) - -outputs(scale, scale_shift) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py deleted file mode 100644 index e4f7120bcceb98ab6f9ec6dda682c8ce16a6e3c5..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -data = data_layer(name='data', size=2016, height=48, width=42) -indices = data_layer(name='indices', size=6) - -scale_sub_region = scale_sub_region_layer( - input=data, indices=indices, value=0.0) - -outputs(scale_sub_region) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py deleted file mode 100644 index a6be069e7e28c6fed6ddb4eef7e6f8254aaa8722..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000, learning_rate=1e-5) - -din1 = data_layer(name='data1', size=30) -din2 = data_layer(name='data2', size=30) - -opts = [] -opts.append(seq_concat_layer(a=din1, b=din2)) -opts.append(seq_reshape_layer(input=din1, reshape_size=5)) - -outputs(opts) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py deleted file mode 100644 index 510ad3220893fddac278ba691307d00d57e440a3..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python -#coding=utf-8 -from paddle.trainer_config_helpers import * - -input_seq = data_layer("word", size=128) -starts = data_layer("starts", size=5) -ends = data_layer("ends", size=5) - -seq_slice1 = seq_slice_layer(input=input_seq, starts=starts, ends=ends) -seq_slice2 = seq_slice_layer(input=input_seq, starts=starts, ends=None) -seq_slice3 = seq_slice_layer(input=input_seq, starts=None, ends=ends) - -outputs(seq_slice1, seq_slice2, seq_slice3) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py deleted file mode 100644 index 7b951a4cd79f7295dd9c259e0d0aa27edd9e1f15..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-4, batch_size=1000) - -din = data_layer(name='dat_in', size=100) - -POOL_TYPE = [MaxPooling, AvgPooling, SumPooling] - -AGG_LEVEL = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE] - -opts = [] - -for pt in POOL_TYPE: - for al in AGG_LEVEL: - opts.append(pooling_layer(input=din, agg_level=al, pooling_type=pt())) - -for pt in POOL_TYPE: - opts.append( - pooling_layer( - input=din, - agg_level=AggregateLevel.TO_NO_SEQUENCE, - pooling_type=pt(), - stride=5)) - -opts.append( - pooling_layer( - input=din, pooling_type=MaxPooling(output_max_index=True))) - -outputs(opts) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py deleted file mode 100644 index 32a4e6f6d08b2f9a33a29ef98a2a60cd967a62e3..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -data = data_layer(name='input', size=300) -lbl = data_layer(name='label', size=300) -smooth_l1 = smooth_l1_cost(input=data, label=lbl) - -outputs(smooth_l1) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py b/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py deleted file mode 100644 index ea68b5493ee9a83baf729a5cc32bc1c7a53b1b23..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -define_py_data_sources2( - train_list="train.list", - test_list="test.list", - module=["a", "b"], - obj=("c", "d")) -settings(learning_rate=1e-3, batch_size=1000) - -outputs(data_layer(name="a", size=10)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py deleted file mode 100644 index 0e692d4b62c8744397e182ed6ac86785f882bc51..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=100, learning_rate=1e-5) - -data = data_layer(name='data', size=3200, height=20, width=10) - -spp = spp_layer( - input=data, pyramid_height=2, num_channels=16, pool_type=MaxPooling()) - -outputs(spp) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py deleted file mode 100644 index 6d1c3175ba9801d69f3f9cb9e754858253192270..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -#coding=utf-8 -from paddle.trainer_config_helpers import * - -beam_size = 5 - -data = data_layer(name='input_seq', size=300) -selected_ids = data_layer(name='input', size=beam_size) -sub_nest_seq = sub_nested_seq_layer(input=data, selected_indices=selected_ids) - -outputs(sub_nest_seq) diff --git a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py deleted file mode 100644 index 8878e73fff6cdab9135bb34fe1e8d482944a28cd..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * -settings(batch_size=1000, learning_rate=1e-4) - -probs = data_layer(name='probs', size=100) - -outputs( - sampling_id_layer(input=probs), # It seems not support training - - # It seems this layer is not correct, and should be rewrite. - # block_expand_layer(input=probs, channel=1, block_x=1, block_y=3), -) diff --git a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py b/python/paddle/trainer_config_helpers/tests/configs/util_layers.py deleted file mode 100644 index da134f100b984711dfb186735764c5d96472b28d..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(learning_rate=1e-4, batch_size=1000) - -a = data_layer(name='a', size=10) -b = data_layer(name='b', size=10) - -result = addto_layer(input=[a, b]) -concat1 = concat_layer(input=[a, b]) -concat2 = concat_layer( - input=[identity_projection(input=a), identity_projection(input=b)]) - -outputs(result, concat1, concat2) diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py deleted file mode 100644 index b3dd8f8fc784754e749240e1b895b11ef6aba438..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/layers_test.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.config_parser import parse_config_and_serialize - -if __name__ == '__main__': - parse_config_and_serialize( - 'trainer_config_helpers/tests/layers_test_config.py', '') -# layers_test_config.py diff --git a/python/paddle/trainer_config_helpers/tests/layers_test_config.py b/python/paddle/trainer_config_helpers/tests/layers_test_config.py deleted file mode 100644 index e6cd35ee761d1acd0b5c1943554c7ea1de6a13f5..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -num_classes = 5 - -x = data_layer(name="input1", size=3) -y = data_layer(name="input2", size=5) - -z = out_prod_layer(input1=x, input2=y) - -x1 = fc_layer(input=x, size=5) -y1 = fc_layer(input=y, size=5) - -z1 = mixed_layer( - act=LinearActivation(), - input=[ - conv_operator( - img=x1, - filter=y1, - filter_size=1, - num_filters=5, - num_channels=5, - stride=1) - ]) - -assert z1.size > 0 - -y2 = fc_layer(input=y, size=15) -z2 = rotate_layer(input=y2, height=5, width=3) - -cos1 = cos_sim(a=x1, b=y1) -cos3 = cos_sim(a=x1, b=y2, size=3) - -linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3) - -out = fc_layer( - input=[cos1, cos3, linear_comb, z, z1, z2], - size=num_classes, - act=SoftmaxActivation()) - -print_layer(input=[out]) - -outputs(classification_cost(out, data_layer(name="label", size=num_classes))) - -dotmul = mixed_layer( - input=[dotmul_operator( - a=x1, b=x1), dotmul_projection(input=y1)]) - -proj_with_attr_init = mixed_layer( - input=full_matrix_projection( - input=y1, - param_attr=ParamAttr( - learning_rate=0, initial_mean=0, initial_std=0)), - bias_attr=ParamAttr( - initial_mean=0, initial_std=0, learning_rate=0), - act=LinearActivation(), - size=5, - name='proj_with_attr_init') - -# for ctc -tmp = fc_layer( - input=[x1, dotmul, proj_with_attr_init], - size=num_classes + 1, - act=SoftmaxActivation()) -ctc = ctc_layer(input=tmp, label=y, size=num_classes + 1) -ctc_eval = ctc_error_evaluator(input=tmp, label=y) - -settings( - batch_size=10, - learning_rate=2e-3, - learning_method=AdamOptimizer(), - regularization=L2Regularization(8e-4), - gradient_clipping_threshold=25) diff --git a/python/paddle/trainer_config_helpers/tests/test_reset_hook.py b/python/paddle/trainer_config_helpers/tests/test_reset_hook.py deleted file mode 100644 index 4d7542c35b2b6293ce9653154d670e9c79e0ce91..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/tests/test_reset_hook.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -from paddle.trainer.config_parser import parse_config - - -class TestParse(unittest.TestCase): - def test_parse(self): - a = parse_config('trainer_config_helpers/tests/layers_test_config.py', - '') - b = parse_config('trainer_config_helpers/tests/layers_test_config.py', - '') - self.assertEqual(a, b) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/trainer_config_helpers/utils.py b/python/paddle/trainer_config_helpers/utils.py deleted file mode 100644 index fe6e9cd53cc821d2b6dbdabb7130567e22f8000f..0000000000000000000000000000000000000000 --- a/python/paddle/trainer_config_helpers/utils.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.config_parser import logger -import functools - -__all__ = ['deprecated'] - - -def deprecated(instead): - def __impl__(func): - @functools.wraps(func) - def __wrapper__(*args, **kwargs): - logger.warning("The interface %s is deprecated, " - "will be removed soon. Please use %s instead." % - (func.__name__, instead)) - - return func(*args, **kwargs) - - return __wrapper__ - - return __impl__ diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py deleted file mode 100644 index df710c33d0c0ca16d358dac1eb42327e9cd4c7ae..0000000000000000000000000000000000000000 --- a/python/paddle/v2/__init__.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import optimizer -import layer -import activation -import parameters -import trainer -import event -import data_type -import topology -import networks -import evaluator -from . import dataset -from . import reader -from . import plot -import attr -import op -import pooling -import inference -import networks -import minibatch -import plot -import image -import paddle.trainer.config_parser as cp - -__all__ = [ - 'default_startup_program', - 'default_main_program', - 'optimizer', - 'layer', - 'activation', - 'parameters', - 'init', - 'trainer', - 'event', - 'data_type', - 'attr', - 'pooling', - 'dataset', - 'reader', - 'topology', - 'networks', - 'infer', - 'plot', - 'evaluator', - 'image', - 'master', -] - -cp.begin_parse() - - -def set_env_vars(trainer_count): - '''Auto set CPU environment if have not set before. - For MKL: - export KMP_AFFINITY, OMP_DYNAMIC according to the Hyper Threading status. - export OMP_NUM_THREADS, MKL_NUM_THREADS according to trainer_count. - For OpenBLAS: - export OPENBLAS_NUM_THREADS, OPENBLAS_MAIN_FREE according to trainer_count. - ''' - import platform, paddle - if not platform.system() in ['Linux', 'Darwin']: - return - - def set_env(key, value): - '''If the key has not been set in the environment, set it with value.''' - assert isinstance(key, str) - assert isinstance(value, str) - envset = os.environ.get(key) - if envset is None: - os.environ[key] = value - - def num_physical_cores(): - '''Get the number of physical cores''' - if platform.system() == "Linux": - num_sockets = int( - os.popen("grep 'physical id' /proc/cpuinfo | sort -u | wc -l") - .read()) - num_cores_per_socket = int( - os.popen("grep 'core id' /proc/cpuinfo | sort -u | wc -l") - .read()) - return num_sockets * num_cores_per_socket - else: - cmds = {"Darwin": "sysctl -n hw.physicalcpu"} - return int(os.popen(cmds.get(platform.system(), "expr 1")).read()) - - def num_logical_processors(): - '''Get the number of logical processors''' - cmds = { - "Linux": "grep \"processor\" /proc/cpuinfo|sort -u|wc -l", - "Darwin": "sysctl -n hw.logicalcpu" - } - return int(os.popen(cmds.get(platform.system(), "expr 1")).read()) - - num_cores = num_physical_cores() - num_processors = num_logical_processors() - if paddle.version.mkl() == 'ON': - if num_processors > num_cores: # Hyper Threading is enabled - set_env("OMP_DYNAMIC", "true") - set_env("KMP_AFFINITY", "granularity=fine,compact,1,0") - else: - set_env("OMP_DYNAMIC", "false") - set_env("KMP_AFFINITY", "granularity=fine,compact,0,0") - threads = num_processors / trainer_count - threads = '1' if threads < 1 else str(threads) - if paddle.version.mkl() == 'ON': - set_env("OMP_NUM_THREADS", threads) - set_env("MKL_NUM_THREADS", threads) - else: - set_env("OPENBLAS_NUM_THREADS", threads) - if threads > 1: - set_env("OPENBLAS_MAIN_FREE", '1') - - -def init(**kwargs): - import py_paddle.swig_paddle as api - args = [] - args_dict = {} - # NOTE: append arguments if they are in ENV - for ek, ev in os.environ.iteritems(): - if ek.startswith("PADDLE_INIT_"): - args_dict[ek.replace("PADDLE_INIT_", "").lower()] = str(ev) - - args_dict.update(kwargs) - # NOTE: overwrite arguments from ENV if it is in kwargs - for key in args_dict.keys(): - args.append('--%s=%s' % (key, str(args_dict[key]))) - - set_env_vars(kwargs.get('trainer_count', 1)) - - if 'use_gpu' in kwargs: - cp.g_command_config_args['use_gpu'] = kwargs['use_gpu'] - if 'use_mkldnn' in kwargs: - cp.g_command_config_args['use_mkldnn'] = kwargs['use_mkldnn'] - if 'use_mkl_packed' in kwargs: - cp.g_command_config_args['use_mkl_packed'] = kwargs['use_mkl_packed'] - assert 'parallel_nn' not in kwargs, ("currently 'parallel_nn' is not " - "supported in v2 APIs.") - - api.initPaddle(*args) - - -infer = inference.infer -batch = minibatch.batch diff --git a/python/paddle/v2/activation.py b/python/paddle/v2/activation.py deleted file mode 100644 index 21261a178203b633ca6cf59a5fc89edc24a868b9..0000000000000000000000000000000000000000 --- a/python/paddle/v2/activation.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.trainer_config_helpers.activations -import copy - -__all__ = [] - -suffix = 'Activation' -for act in paddle.trainer_config_helpers.activations.__all__: - new_name = act[:-len(suffix)] - globals()[new_name] = copy.copy( - getattr(paddle.trainer_config_helpers.activations, act)) - globals()[new_name].__name__ = new_name - __all__.append(new_name) diff --git a/python/paddle/v2/attr.py b/python/paddle/v2/attr.py deleted file mode 100644 index 5d23894d735c463d469f842b875ecbec1dbaf476..0000000000000000000000000000000000000000 --- a/python/paddle/v2/attr.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.trainer_config_helpers.attrs - -__all__ = [ - "Param", - "Extra", - "Hook", -] - -Param = paddle.trainer_config_helpers.attrs.ParameterAttribute -Extra = paddle.trainer_config_helpers.attrs.ExtraLayerAttribute -Hook = paddle.trainer_config_helpers.attrs.HookAttribute - -for each in paddle.trainer_config_helpers.attrs.__all__: - globals()[each] = getattr(paddle.trainer_config_helpers.attrs, each) - __all__.append(each) diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py deleted file mode 100644 index d9613e001ac784c9fbee3cd182bdd78354c540a7..0000000000000000000000000000000000000000 --- a/python/paddle/v2/config_base.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import collections -import re -import paddle.trainer_config_helpers as conf_helps - -__layer_map__ = {} - - -def __map_docstr__(doc, name): - if doc is None: - return doc - - assert isinstance(doc, basestring) - - # replace LayerOutput to paddle.v2.config_base.Layer - doc = doc.replace("LayerOutput", "paddle.v2.config_base.Layer") - - doc = doc.replace('ParameterAttribute', 'paddle.v2.attr.ParameterAttribute') - - doc = re.sub(r'ExtraLayerAttribute[^\s]?', 'paddle.v2.attr.ExtraAttribute', - doc) - - # xxx_layer to xxx - doc = re.sub(r"(?P[a-z]+)_layer", r"\g", doc) - - # XxxxActivation to paddle.v2.activation.Xxxx - doc = re.sub(r"(?P[A-Z][a-zA-Z]+)Activation", - r"paddle.v2.activation.\g", doc) - - # xxx_evaluator to paddle.v2.evaluator.xxx - doc = re.sub(r"(?P[a-z]+)_evaluator", r"evaluator.\g", doc) - - # TODO(yuyang18): Add more rules if needed. - return doc - - -def __convert_to_v2__(f, name, module): - def wrapped(*args, **xargs): - out = f(*args, **xargs) - outs = out - if not isinstance(out, collections.Sequence): - outs = [out] - for l in outs: - if isinstance(l, conf_helps.LayerOutput): - __layer_map__[l.full_name] = l - return out - - wrapped.__doc__ = __map_docstr__(f.__doc__, name) - wrapped.__name__ = name - wrapped.__module__ = module - - return wrapped - - -Layer = conf_helps.LayerOutput diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py deleted file mode 100644 index 98dfb85a0ea57050bf8dd8d46fca9574801d8eb3..0000000000000000000000000000000000000000 --- a/python/paddle/v2/data_feeder.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from py_paddle import DataProviderConverter -import collections -import paddle.trainer.PyDataProvider2 as pydp2 - -__all__ = ['DataFeeder'] - - -def default_feeding_map(data_types): - reader_dict = dict() - for i, tp in enumerate(data_types): - reader_dict[tp[0]] = i - return reader_dict - - -class DataFeeder(DataProviderConverter): - """ - DataFeeder converts the data returned by paddle.reader into a data structure - of Arguments which is defined in the API. The paddle.reader usually returns - a list of mini-batch data entries. Each data entry in the list is one sample. - Each sample is a list or a tuple with one feature or multiple features. - DataFeeder converts this mini-batch data entries into Arguments in order - to feed it to C++ interface. - - The simple usage shows below - - .. code-block:: python - - feeding = ['image', 'label'] - data_types = enumerate_data_types_of_data_layers(topology) - feeder = DataFeeder(data_types=data_types, feeding=feeding) - - minibatch_data = [([1.0, 2.0, 3.0, ...], 5)] - - arg = feeder(minibatch_data) - - - If mini-batch data and data layers are not one to one mapping, we - could pass a dictionary to feeding parameter to represent the mapping - relationship. - - - .. code-block:: python - - data_types = [('image', paddle.data_type.dense_vector(784)), - ('label', paddle.data_type.integer_value(10))] - feeding = {'image':0, 'label':1} - feeder = DataFeeder(data_types=data_types, feeding=feeding) - minibatch_data = [ - ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ), # first sample - ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ) # second sample - ] - # or minibatch_data = [ - # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ], # first sample - # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample - # ] - arg = feeder.convert(minibatch_data) - - .. note:: - - This module is for internal use only. Users should use the `reader` - interface. - - - - :param data_types: A list to specify data name and type. Each item is - a tuple of (data_name, data_type). - - :type data_types: list - :param feeding: A dictionary or a sequence to specify the position of each - data in the input data. - :type feeding: dict|collections.Sequence|None - """ - - def __init__(self, data_types, feeding=None): - self.input_names = [] - input_types = [] - if feeding is None: - feeding = default_feeding_map(data_types) - elif isinstance(feeding, collections.Sequence): - feed_list = feeding - feeding = dict() - for i, name in enumerate(feed_list): - feeding[name] = i - elif not isinstance(feeding, dict): - raise TypeError("Feeding should be dict or sequence or None.") - - self.feeding = feeding - for each in data_types: - self.input_names.append(each[0]) - if not isinstance(each[1], pydp2.InputType): - raise TypeError("second item in each data_type should be an " - "InputType") - input_types.append(each[1]) - DataProviderConverter.__init__(self, input_types) - - def __len__(self): - return len(self.input_names) - - def convert(self, dat, argument=None): - """ - :param dat: A list of mini-batch data. Each sample is a list or tuple - one feature or multiple features. - - :type dat: list - :param argument: An Arguments object contains this mini-batch data with - one or multiple features. The Arguments definition is - in the API. - :type argument: py_paddle.swig_paddle.Arguments - """ - - def reorder_data(data): - retv = [] - for each in data: - reorder = [] - for name in self.input_names: - reorder.append(each[self.feeding[name]]) - retv.append(reorder) - return retv - - return DataProviderConverter.convert(self, reorder_data(dat), argument) diff --git a/python/paddle/v2/data_type.py b/python/paddle/v2/data_type.py deleted file mode 100644 index 226997465f2ec97c6224b248427739592e9694df..0000000000000000000000000000000000000000 --- a/python/paddle/v2/data_type.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.trainer.PyDataProvider2 as pydp2 - -import_list = [ - nm for nm in dir(pydp2) - if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm or - 'array' in nm) -] -import_list.extend(['InputType']) - -for nm in import_list: - globals()[nm] = getattr(pydp2, nm) - -__all__ = import_list diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py deleted file mode 100644 index 38056fe0a9496bcb5de76634bbab267e324dc2a4..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Dataset package. -""" - -import mnist -import imikolov -import imdb -import cifar -import movielens -import conll05 -import uci_housing -import sentiment -import wmt14 -import wmt16 -import mq2007 -import flowers -import voc2012 - -__all__ = [ - 'mnist', - 'imikolov', - 'imdb', - 'cifar', - 'movielens', - 'conll05', - 'sentiment', - 'uci_housing', - 'wmt14', - 'wmt16', - 'mq2007', - 'flowers', - 'voc2012', -] diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py deleted file mode 100644 index 662655c836dbc54bd6187dcd3dac7354d6c8ecd1..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/cifar.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -CIFAR dataset. - -This module will download dataset from -https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into -paddle reader creators. - -The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, -with 6000 images per class. There are 50000 training images and 10000 test -images. - -The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes -containing 600 images each. There are 500 training images and 100 testing -images per class. - -""" - -import cPickle -import itertools -import numpy -import paddle.v2.dataset.common -import tarfile - -__all__ = ['train100', 'test100', 'train10', 'test10', 'convert'] - -URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/' -CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz' -CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a' -CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz' -CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85' - - -def reader_creator(filename, sub_name, cycle=False): - def read_batch(batch): - data = batch['data'] - labels = batch.get('labels', batch.get('fine_labels', None)) - assert labels is not None - for sample, label in itertools.izip(data, labels): - yield (sample / 255.0).astype(numpy.float32), int(label) - - def reader(): - with tarfile.open(filename, mode='r') as f: - names = (each_item.name for each_item in f - if sub_name in each_item.name) - - while True: - for name in names: - batch = cPickle.load(f.extractfile(name)) - for item in read_batch(batch): - yield item - if not cycle: - break - - return reader - - -def train100(): - """ - CIFAR-100 training set creator. - - It returns a reader creator, each sample in the reader is image pixels in - [0, 1] and label in [0, 99]. - - :return: Training reader creator - :rtype: callable - """ - return reader_creator( - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), - 'train') - - -def test100(): - """ - CIFAR-100 test set creator. - - It returns a reader creator, each sample in the reader is image pixels in - [0, 1] and label in [0, 9]. - - :return: Test reader creator. - :rtype: callable - """ - return reader_creator( - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), - 'test') - - -def train10(cycle=False): - """ - CIFAR-10 training set creator. - - It returns a reader creator, each sample in the reader is image pixels in - [0, 1] and label in [0, 9]. - - :param cycle: whether to cycle through the dataset - :type cycle: bool - :return: Training reader creator - :rtype: callable - """ - return reader_creator( - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'data_batch', - cycle=cycle) - - -def test10(cycle=False): - """ - CIFAR-10 test set creator. - - It returns a reader creator, each sample in the reader is image pixels in - [0, 1] and label in [0, 9]. - - :param cycle: whether to cycle through the dataset - :type cycle: bool - :return: Test reader creator. - :rtype: callable - """ - return reader_creator( - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'test_batch', - cycle=cycle) - - -def fetch(): - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5) - - -def convert(path): - """ - Converts dataset to recordio format - """ - paddle.v2.dataset.common.convert(path, train100(), 1000, "cifar_train100") - paddle.v2.dataset.common.convert(path, test100(), 1000, "cifar_test100") - paddle.v2.dataset.common.convert(path, train10(), 1000, "cifar_train10") - paddle.v2.dataset.common.convert(path, test10(), 1000, "cifar_test10") diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py deleted file mode 100644 index c6ff09a1d1e3ca56877e986c3ed3ae9ecd0a7316..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/common.py +++ /dev/null @@ -1,236 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import requests -import hashlib -import os -import errno -import shutil -import sys -import importlib -import paddle.v2.dataset -import cPickle -import glob -import cPickle as pickle - -__all__ = [ - 'DATA_HOME', - 'download', - 'md5file', - 'split', - 'cluster_files_reader', - 'convert', -] - -DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') - - -# When running unit tests, there could be multiple processes that -# trying to create DATA_HOME directory simultaneously, so we cannot -# use a if condition to check for the existence of the directory; -# instead, we use the filesystem as the synchronization mechanism by -# catching returned errors. -def must_mkdirs(path): - try: - os.makedirs(DATA_HOME) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - pass - - -must_mkdirs(DATA_HOME) - - -def md5file(fname): - hash_md5 = hashlib.md5() - f = open(fname, "rb") - for chunk in iter(lambda: f.read(4096), b""): - hash_md5.update(chunk) - f.close() - return hash_md5.hexdigest() - - -def download(url, module_name, md5sum, save_name=None): - dirname = os.path.join(DATA_HOME, module_name) - if not os.path.exists(dirname): - os.makedirs(dirname) - - filename = os.path.join(dirname, - url.split('/')[-1] - if save_name is None else save_name) - - retry = 0 - retry_limit = 3 - while not (os.path.exists(filename) and md5file(filename) == md5sum): - if os.path.exists(filename): - print "file md5", md5file(filename), md5sum - if retry < retry_limit: - retry += 1 - else: - raise RuntimeError("Cannot download {0} within retry limit {1}". - format(url, retry_limit)) - print "Cache file %s not found, downloading %s" % (filename, url) - r = requests.get(url, stream=True) - total_length = r.headers.get('content-length') - - if total_length is None: - with open(filename, 'w') as f: - shutil.copyfileobj(r.raw, f) - else: - with open(filename, 'w') as f: - dl = 0 - total_length = int(total_length) - for data in r.iter_content(chunk_size=4096): - dl += len(data) - f.write(data) - done = int(50 * dl / total_length) - sys.stdout.write("\r[%s%s]" % ('=' * done, - ' ' * (50 - done))) - sys.stdout.flush() - - return filename - - -def fetch_all(): - for module_name in filter(lambda x: not x.startswith("__"), - dir(paddle.v2.dataset)): - if "fetch" in dir( - importlib.import_module("paddle.v2.dataset.%s" % module_name)): - getattr( - importlib.import_module("paddle.v2.dataset.%s" % module_name), - "fetch")() - - -def fetch_all_recordio(path): - for module_name in filter(lambda x: not x.startswith("__"), - dir(paddle.v2.dataset)): - if "convert" in dir( - importlib.import_module("paddle.v2.dataset.%s" % module_name)) and \ - not module_name == "common": - ds_path = os.path.join(path, module_name) - must_mkdirs(ds_path) - getattr( - importlib.import_module("paddle.v2.dataset.%s" % module_name), - "convert")(ds_path) - - -def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump): - """ - you can call the function as: - - split(paddle.v2.dataset.cifar.train10(), line_count=1000, - suffix="imikolov-train-%05d.pickle") - - the output files as: - - |-imikolov-train-00000.pickle - |-imikolov-train-00001.pickle - |- ... - |-imikolov-train-00480.pickle - - :param reader: is a reader creator - :param line_count: line count for each file - :param suffix: the suffix for the output files, should contain "%d" - means the id for each file. Default is "%05d.pickle" - :param dumper: is a callable function that dump object to file, this - function will be called as dumper(obj, f) and obj is the object - will be dumped, f is a file object. Default is cPickle.dump. - """ - if not callable(dumper): - raise TypeError("dumper should be callable.") - lines = [] - indx_f = 0 - for i, d in enumerate(reader()): - lines.append(d) - if i >= line_count and i % line_count == 0: - with open(suffix % indx_f, "w") as f: - dumper(lines, f) - lines = [] - indx_f += 1 - if lines: - with open(suffix % indx_f, "w") as f: - dumper(lines, f) - - -def cluster_files_reader(files_pattern, - trainer_count, - trainer_id, - loader=cPickle.load): - """ - Create a reader that yield element from the given files, select - a file set according trainer count and trainer_id - - :param files_pattern: the files which generating by split(...) - :param trainer_count: total trainer count - :param trainer_id: the trainer rank id - :param loader: is a callable function that load object from file, this - function will be called as loader(f) and f is a file object. - Default is cPickle.load - """ - - def reader(): - if not callable(loader): - raise TypeError("loader should be callable.") - file_list = glob.glob(files_pattern) - file_list.sort() - my_file_list = [] - for idx, fn in enumerate(file_list): - if idx % trainer_count == trainer_id: - print "append file: %s" % fn - my_file_list.append(fn) - for fn in my_file_list: - with open(fn, "r") as f: - lines = loader(f) - for line in lines: - yield line - - return reader - - -def convert(output_path, reader, line_count, name_prefix): - import recordio - """ - Convert data from reader to recordio format files. - - :param output_path: directory in which output files will be saved. - :param reader: a data reader, from which the convert program will read - data instances. - :param name_prefix: the name prefix of generated files. - :param max_lines_to_shuffle: the max lines numbers to shuffle before - writing. - """ - - assert line_count >= 1 - indx_f = 0 - - def write_data(indx_f, lines): - filename = "%s/%s-%05d" % (output_path, name_prefix, indx_f) - writer = recordio.writer(filename) - for l in lines: - # FIXME(Yancey1989): - # dumps with protocol: pickle.HIGHEST_PROTOCOL - writer.write(cPickle.dumps(l)) - writer.close() - - lines = [] - for i, d in enumerate(reader()): - lines.append(d) - if i % line_count == 0 and i >= line_count: - write_data(indx_f, lines) - lines = [] - indx_f += 1 - continue - - write_data(indx_f, lines) diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py deleted file mode 100644 index 8312900dc43fdd64cc1a205ab846b6f1deaecf5d..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/conll05.py +++ /dev/null @@ -1,257 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Conll05 dataset. -Paddle semantic role labeling Book and demo use this dataset as an example. -Because Conll05 is not free in public, the default downloaded URL is test set -of Conll05 (which is public). Users can change URL and MD5 to their Conll -dataset. And a pre-trained word vector model based on Wikipedia corpus is used -to initialize SRL model. -""" - -import tarfile -import gzip -import itertools -import paddle.v2.dataset.common - -__all__ = ['test, get_dict', 'get_embedding', 'convert'] - -DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' -DATA_MD5 = '387719152ae52d60422c016e92a742fc' -WORDDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt' -WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa' -VERBDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FverbDict.txt' -VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c' -TRGDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FtargetDict.txt' -TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751' -EMB_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2Femb' -EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7' - -UNK_IDX = 0 - - -def load_label_dict(filename): - d = dict() - tag_dict = set() - with open(filename, 'r') as f: - for i, line in enumerate(f): - line = line.strip() - if line.startswith("B-"): - tag_dict.add(line[2:]) - elif line.startswith("I-"): - tag_dict.add(line[2:]) - index = 0 - for tag in tag_dict: - d["B-" + tag] = index - index += 1 - d["I-" + tag] = index - index += 1 - d["O"] = index - return d - - -def load_dict(filename): - d = dict() - with open(filename, 'r') as f: - for i, line in enumerate(f): - d[line.strip()] = i - return d - - -def corpus_reader(data_path, words_name, props_name): - """ - Read one corpus. It returns an iterator. Each element of - this iterator is a tuple including sentence and labels. The sentence is - consist of a list of word IDs. The labels include a list of label IDs. - :return: a iterator of data. - :rtype: iterator - """ - - def reader(): - tf = tarfile.open(data_path) - wf = tf.extractfile(words_name) - pf = tf.extractfile(props_name) - with gzip.GzipFile(fileobj=wf) as words_file, gzip.GzipFile( - fileobj=pf) as props_file: - sentences = [] - labels = [] - one_seg = [] - for word, label in itertools.izip(words_file, props_file): - word = word.strip() - label = label.strip().split() - - if len(label) == 0: # end of sentence - for i in xrange(len(one_seg[0])): - a_kind_lable = [x[i] for x in one_seg] - labels.append(a_kind_lable) - - if len(labels) >= 1: - verb_list = [] - for x in labels[0]: - if x != '-': - verb_list.append(x) - - for i, lbl in enumerate(labels[1:]): - cur_tag = 'O' - is_in_bracket = False - lbl_seq = [] - verb_word = '' - for l in lbl: - if l == '*' and is_in_bracket == False: - lbl_seq.append('O') - elif l == '*' and is_in_bracket == True: - lbl_seq.append('I-' + cur_tag) - elif l == '*)': - lbl_seq.append('I-' + cur_tag) - is_in_bracket = False - elif l.find('(') != -1 and l.find(')') != -1: - cur_tag = l[1:l.find('*')] - lbl_seq.append('B-' + cur_tag) - is_in_bracket = False - elif l.find('(') != -1 and l.find(')') == -1: - cur_tag = l[1:l.find('*')] - lbl_seq.append('B-' + cur_tag) - is_in_bracket = True - else: - raise RuntimeError('Unexpected label: %s' % - l) - - yield sentences, verb_list[i], lbl_seq - - sentences = [] - labels = [] - one_seg = [] - else: - sentences.append(word) - one_seg.append(label) - - pf.close() - wf.close() - tf.close() - - return reader - - -def reader_creator(corpus_reader, - word_dict=None, - predicate_dict=None, - label_dict=None): - def reader(): - for sentence, predicate, labels in corpus_reader(): - - sen_len = len(sentence) - - verb_index = labels.index('B-V') - mark = [0] * len(labels) - if verb_index > 0: - mark[verb_index - 1] = 1 - ctx_n1 = sentence[verb_index - 1] - else: - ctx_n1 = 'bos' - - if verb_index > 1: - mark[verb_index - 2] = 1 - ctx_n2 = sentence[verb_index - 2] - else: - ctx_n2 = 'bos' - - mark[verb_index] = 1 - ctx_0 = sentence[verb_index] - - if verb_index < len(labels) - 1: - mark[verb_index + 1] = 1 - ctx_p1 = sentence[verb_index + 1] - else: - ctx_p1 = 'eos' - - if verb_index < len(labels) - 2: - mark[verb_index + 2] = 1 - ctx_p2 = sentence[verb_index + 2] - else: - ctx_p2 = 'eos' - - word_idx = [word_dict.get(w, UNK_IDX) for w in sentence] - - ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len - ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len - ctx_0_idx = [word_dict.get(ctx_0, UNK_IDX)] * sen_len - ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len - ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len - - pred_idx = [predicate_dict.get(predicate)] * sen_len - label_idx = [label_dict.get(w) for w in labels] - - yield word_idx, ctx_n2_idx, ctx_n1_idx, \ - ctx_0_idx, ctx_p1_idx, ctx_p2_idx, pred_idx, mark, label_idx - - return reader - - -def get_dict(): - """ - Get the word, verb and label dictionary of Wikipedia corpus. - """ - word_dict = load_dict( - paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', - WORDDICT_MD5)) - verb_dict = load_dict( - paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', - VERBDICT_MD5)) - label_dict = load_label_dict( - paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', - TRGDICT_MD5)) - return word_dict, verb_dict, label_dict - - -def get_embedding(): - """ - Get the trained word vector based on Wikipedia corpus. - """ - return paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) - - -def test(): - """ - Conll05 test set creator. - - Because the training dataset is not free, the test dataset is used for - training. It returns a reader creator, each sample in the reader is nine - features, including sentence sequence, predicate, predicate context, - predicate context flag and tagged sequence. - - :return: Training reader creator - :rtype: callable - """ - word_dict, verb_dict, label_dict = get_dict() - reader = corpus_reader( - paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5), - words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', - props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') - return reader_creator(reader, word_dict, verb_dict, label_dict) - - -def fetch(): - paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) - paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) - paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) - paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) - paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) - - -def convert(path): - """ - Converts dataset to recordio format - """ - paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_test") diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py deleted file mode 100644 index db12076d54064781bd1060947497622b14783768..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/flowers.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This module will download dataset from -http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html -and parse train/test set intopaddle reader creators. - -This set contains images of flowers belonging to 102 different categories. -The images were acquired by searching the web and taking pictures. There are a -minimum of 40 images for each category. - -The database was used in: - -Nilsback, M-E. and Zisserman, A. Automated flower classification over a large - number of classes.Proceedings of the Indian Conference on Computer Vision, -Graphics and Image Processing (2008) -http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}. - -""" -import cPickle -import itertools -import functools -from common import download -import tarfile -import scipy.io as scio -from paddle.v2.image import * -from paddle.v2.reader import * -import os -import numpy as np -from multiprocessing import cpu_count -__all__ = ['train', 'test', 'valid'] - -DATA_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz' -LABEL_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat' -SETID_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat' -DATA_MD5 = '33bfc11892f1e405ca193ae9a9f2a118' -LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d' -SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c' -# In official 'readme', tstid is the flag of test data -# and trnid is the flag of train data. But test data is more than train data. -# So we exchange the train data and test data. -TRAIN_FLAG = 'tstid' -TEST_FLAG = 'trnid' -VALID_FLAG = 'valid' - - -def default_mapper(is_train, sample): - ''' - map image bytes data to type needed by model input layer - ''' - img, label = sample - img = load_image_bytes(img) - img = simple_transform( - img, 256, 224, is_train, mean=[103.94, 116.78, 123.68]) - return img.flatten().astype('float32'), label - - -train_mapper = functools.partial(default_mapper, True) -test_mapper = functools.partial(default_mapper, False) - - -def reader_creator(data_file, - label_file, - setid_file, - dataset_name, - mapper, - buffered_size=1024, - use_xmap=True, - cycle=False): - ''' - 1. read images from tar file and - merge images into batch files in 102flowers.tgz_batch/ - 2. get a reader to read sample from batch file - - :param data_file: downloaded data file - :type data_file: string - :param label_file: downloaded label file - :type label_file: string - :param setid_file: downloaded setid file containing information - about how to split dataset - :type setid_file: string - :param dataset_name: data set name (tstid|trnid|valid) - :type dataset_name: string - :param mapper: a function to map image bytes data to type - needed by model input layer - :type mapper: callable - :param buffered_size: the size of buffer used to process images - :type buffered_size: int - :param cycle: whether to cycle through the dataset - :type cycle: bool - :return: data reader - :rtype: callable - ''' - labels = scio.loadmat(label_file)['labels'][0] - indexes = scio.loadmat(setid_file)[dataset_name][0] - img2label = {} - for i in indexes: - img = "jpg/image_%05d.jpg" % i - img2label[img] = labels[i - 1] - file_list = batch_images_from_tar(data_file, dataset_name, img2label) - - def reader(): - while True: - for file in open(file_list): - file = file.strip() - batch = None - with open(file, 'r') as f: - batch = cPickle.load(f) - data = batch['data'] - labels = batch['label'] - for sample, label in itertools.izip(data, batch['label']): - yield sample, int(label) - 1 - if not cycle: - break - - if use_xmap: - cpu_num = int(os.environ.get('CPU_NUM', cpu_count())) - return xmap_readers(mapper, reader, cpu_num, buffered_size) - else: - return map_readers(mapper, reader) - - -def train(mapper=train_mapper, buffered_size=1024, use_xmap=True, cycle=False): - ''' - Create flowers training set reader. - It returns a reader, each sample in the reader is - image pixels in [0, 1] and label in [1, 102] - translated from original color image by steps: - 1. resize to 256*256 - 2. random crop to 224*224 - 3. flatten - :param mapper: a function to map sample. - :type mapper: callable - :param buffered_size: the size of buffer used to process images - :type buffered_size: int - :param cycle: whether to cycle through the dataset - :type cycle: bool - :return: train data reader - :rtype: callable - ''' - return reader_creator( - download(DATA_URL, 'flowers', DATA_MD5), - download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), - TRAIN_FLAG, - mapper, - buffered_size, - use_xmap, - cycle=cycle) - - -def test(mapper=test_mapper, buffered_size=1024, use_xmap=True, cycle=False): - ''' - Create flowers test set reader. - It returns a reader, each sample in the reader is - image pixels in [0, 1] and label in [1, 102] - translated from original color image by steps: - 1. resize to 256*256 - 2. random crop to 224*224 - 3. flatten - :param mapper: a function to map sample. - :type mapper: callable - :param buffered_size: the size of buffer used to process images - :type buffered_size: int - :param cycle: whether to cycle through the dataset - :type cycle: bool - :return: test data reader - :rtype: callable - ''' - return reader_creator( - download(DATA_URL, 'flowers', DATA_MD5), - download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), - TEST_FLAG, - mapper, - buffered_size, - use_xmap, - cycle=cycle) - - -def valid(mapper=test_mapper, buffered_size=1024, use_xmap=True): - ''' - Create flowers validation set reader. - It returns a reader, each sample in the reader is - image pixels in [0, 1] and label in [1, 102] - translated from original color image by steps: - 1. resize to 256*256 - 2. random crop to 224*224 - 3. flatten - :param mapper: a function to map sample. - :type mapper: callable - :param buffered_size: the size of buffer used to process images - :type buffered_size: int - :return: test data reader - :rtype: callable - ''' - return reader_creator( - download(DATA_URL, 'flowers', DATA_MD5), - download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), VALID_FLAG, mapper, - buffered_size, use_xmap) - - -def fetch(): - download(DATA_URL, 'flowers', DATA_MD5) - download(LABEL_URL, 'flowers', LABEL_MD5) - download(SETID_URL, 'flowers', SETID_MD5) diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py deleted file mode 100644 index 00c2a3b9928d1ca5f3e8cd5e87ba7ad4108e9dad..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/imdb.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -IMDB dataset. - -This module downloads IMDB dataset from -http://ai.stanford.edu/%7Eamaas/data/sentiment/. This dataset contains a set -of 25,000 highly polar movie reviews for training, and 25,000 for testing. -Besides, this module also provides API for building dictionary. -""" - -import paddle.v2.dataset.common -import collections -import tarfile -import re -import string - -__all__ = ['build_dict', 'train', 'test', 'convert'] - -URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz' -MD5 = '7c2ac02c03563afcf9b574c7e56c153a' - - -def tokenize(pattern): - """ - Read files that match the given pattern. Tokenize and yield each file. - """ - - with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb', - MD5)) as tarf: - # Note that we should use tarfile.next(), which does - # sequential access of member files, other than - # tarfile.extractfile, which does random access and might - # destroy hard disks. - tf = tarf.next() - while tf != None: - if bool(pattern.match(tf.name)): - # newline and punctuations removal and ad-hoc tokenization. - yield tarf.extractfile(tf).read().rstrip("\n\r").translate( - None, string.punctuation).lower().split() - tf = tarf.next() - - -def build_dict(pattern, cutoff): - """ - Build a word dictionary from the corpus. Keys of the dictionary are words, - and values are zero-based IDs of these words. - """ - word_freq = collections.defaultdict(int) - for doc in tokenize(pattern): - for word in doc: - word_freq[word] += 1 - - # Not sure if we should prune less-frequent words here. - word_freq = filter(lambda x: x[1] > cutoff, word_freq.items()) - - dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0])) - words, _ = list(zip(*dictionary)) - word_idx = dict(zip(words, xrange(len(words)))) - word_idx[''] = len(words) - return word_idx - - -def reader_creator(pos_pattern, neg_pattern, word_idx): - UNK = word_idx[''] - INS = [] - - def load(pattern, out, label): - for doc in tokenize(pattern): - out.append(([word_idx.get(w, UNK) for w in doc], label)) - - load(pos_pattern, INS, 0) - load(neg_pattern, INS, 1) - - def reader(): - for doc, label in INS: - yield doc, label - - return reader - - -def train(word_idx): - """ - IMDB training set creator. - - It returns a reader creator, each sample in the reader is an zero-based ID - sequence and label in [0, 1]. - - :param word_idx: word dictionary - :type word_idx: dict - :return: Training reader creator - :rtype: callable - """ - return reader_creator( - re.compile("aclImdb/train/pos/.*\.txt$"), - re.compile("aclImdb/train/neg/.*\.txt$"), word_idx) - - -def test(word_idx): - """ - IMDB test set creator. - - It returns a reader creator, each sample in the reader is an zero-based ID - sequence and label in [0, 1]. - - :param word_idx: word dictionary - :type word_idx: dict - :return: Test reader creator - :rtype: callable - """ - return reader_creator( - re.compile("aclImdb/test/pos/.*\.txt$"), - re.compile("aclImdb/test/neg/.*\.txt$"), word_idx) - - -def word_dict(cutoff=150): - """ - Build a word dictionary from the corpus. - - :return: Word dictionary - :rtype: dict - """ - return build_dict( - re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), cutoff) - - -def fetch(): - paddle.v2.dataset.common.download(URL, 'imdb', MD5) - - -def convert(path): - """ - Converts dataset to recordio format - """ - w = word_dict() - paddle.v2.dataset.common.convert(path, lambda: train(w), 1000, "imdb_train") - paddle.v2.dataset.common.convert(path, lambda: test(w), 1000, "imdb_test") diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py deleted file mode 100644 index 617c722c4165cdfed9e650fc968d623ef6ed4391..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/imikolov.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -imikolov's simple dataset. - -This module will download dataset from -http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set -into paddle reader creators. -""" -import paddle.v2.dataset.common -import collections -import tarfile - -__all__ = ['train', 'test', 'build_dict', 'convert'] - -URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz' -MD5 = '30177ea32e27c525793142b6bf2c8e2d' - - -class DataType(object): - NGRAM = 1 - SEQ = 2 - - -def word_count(f, word_freq=None): - if word_freq is None: - word_freq = collections.defaultdict(int) - - for l in f: - for w in l.strip().split(): - word_freq[w] += 1 - word_freq[''] += 1 - word_freq[''] += 1 - - return word_freq - - -def build_dict(min_word_freq=50): - """ - Build a word dictionary from the corpus, Keys of the dictionary are words, - and values are zero-based IDs of these words. - """ - train_filename = './simple-examples/data/ptb.train.txt' - test_filename = './simple-examples/data/ptb.valid.txt' - with tarfile.open( - paddle.v2.dataset.common.download( - paddle.v2.dataset.imikolov.URL, 'imikolov', - paddle.v2.dataset.imikolov.MD5)) as tf: - trainf = tf.extractfile(train_filename) - testf = tf.extractfile(test_filename) - word_freq = word_count(testf, word_count(trainf)) - if '' in word_freq: - # remove for now, since we will set it as last index - del word_freq[''] - - word_freq = filter(lambda x: x[1] > min_word_freq, word_freq.items()) - - word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0])) - words, _ = list(zip(*word_freq_sorted)) - word_idx = dict(zip(words, xrange(len(words)))) - word_idx[''] = len(words) - - return word_idx - - -def reader_creator(filename, word_idx, n, data_type): - def reader(): - with tarfile.open( - paddle.v2.dataset.common.download( - paddle.v2.dataset.imikolov.URL, 'imikolov', - paddle.v2.dataset.imikolov.MD5)) as tf: - f = tf.extractfile(filename) - - UNK = word_idx[''] - for l in f: - if DataType.NGRAM == data_type: - assert n > -1, 'Invalid gram length' - l = [''] + l.strip().split() + [''] - if len(l) >= n: - l = [word_idx.get(w, UNK) for w in l] - for i in range(n, len(l) + 1): - yield tuple(l[i - n:i]) - elif DataType.SEQ == data_type: - l = l.strip().split() - l = [word_idx.get(w, UNK) for w in l] - src_seq = [word_idx['']] + l - trg_seq = l + [word_idx['']] - if n > 0 and len(src_seq) > n: continue - yield src_seq, trg_seq - else: - assert False, 'Unknow data type' - - return reader - - -def train(word_idx, n, data_type=DataType.NGRAM): - """ - imikolov training set creator. - - It returns a reader creator, each sample in the reader is a word ID - tuple. - - :param word_idx: word dictionary - :type word_idx: dict - :param n: sliding window size if type is ngram, otherwise max length of sequence - :type n: int - :param data_type: data type (ngram or sequence) - :type data_type: member variable of DataType (NGRAM or SEQ) - :return: Training reader creator - :rtype: callable - """ - return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n, - data_type) - - -def test(word_idx, n, data_type=DataType.NGRAM): - """ - imikolov test set creator. - - It returns a reader creator, each sample in the reader is a word ID - tuple. - - :param word_idx: word dictionary - :type word_idx: dict - :param n: sliding window size if type is ngram, otherwise max length of sequence - :type n: int - :param data_type: data type (ngram or sequence) - :type data_type: member variable of DataType (NGRAM or SEQ) - :return: Test reader creator - :rtype: callable - """ - return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n, - data_type) - - -def fetch(): - paddle.v2.dataset.common.download(URL, "imikolov", MD5) - - -def convert(path): - """ - Converts dataset to recordio format - """ - N = 5 - word_dict = build_dict() - paddle.v2.dataset.common.convert(path, - train(word_dict, N), 1000, - "imikolov_train") - paddle.v2.dataset.common.convert(path, - test(word_dict, N), 1000, "imikolov_test") diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py deleted file mode 100644 index 026cf501cfb35ab3fe35d24f52d3c271565482ef..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/mnist.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -MNIST dataset. - -This module will download dataset from http://yann.lecun.com/exdb/mnist/ and -parse training set and test set into paddle reader creators. -""" -import paddle.v2.dataset.common -import subprocess -import numpy -import platform -__all__ = ['train', 'test', 'convert'] - -URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/' -TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz' -TEST_IMAGE_MD5 = '9fb629c4189551a2d022fa330f9573f3' -TEST_LABEL_URL = URL_PREFIX + 't10k-labels-idx1-ubyte.gz' -TEST_LABEL_MD5 = 'ec29112dd5afa0611ce80d1b7f02629c' -TRAIN_IMAGE_URL = URL_PREFIX + 'train-images-idx3-ubyte.gz' -TRAIN_IMAGE_MD5 = 'f68b3c2dcbeaaa9fbdd348bbdeb94873' -TRAIN_LABEL_URL = URL_PREFIX + 'train-labels-idx1-ubyte.gz' -TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432' - - -def reader_creator(image_filename, label_filename, buffer_size): - def reader(): - if platform.system() == 'Darwin': - zcat_cmd = 'gzcat' - elif platform.system() == 'Linux': - zcat_cmd = 'zcat' - else: - raise NotImplementedError() - - # According to http://stackoverflow.com/a/38061619/724872, we - # cannot use standard package gzip here. - m = subprocess.Popen([zcat_cmd, image_filename], stdout=subprocess.PIPE) - m.stdout.read(16) # skip some magic bytes - - l = subprocess.Popen([zcat_cmd, label_filename], stdout=subprocess.PIPE) - l.stdout.read(8) # skip some magic bytes - - try: # reader could be break. - while True: - labels = numpy.fromfile( - l.stdout, 'ubyte', count=buffer_size).astype("int") - - if labels.size != buffer_size: - break # numpy.fromfile returns empty slice after EOF. - - images = numpy.fromfile( - m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape( - (buffer_size, 28 * 28)).astype('float32') - - images = images / 255.0 * 2.0 - 1.0 - - for i in xrange(buffer_size): - yield images[i, :], int(labels[i]) - finally: - try: - m.terminate() - except: - pass - try: - l.terminate() - except: - pass - - return reader - - -def train(): - """ - MNIST training set creator. - - It returns a reader creator, each sample in the reader is image pixels in - [0, 1] and label in [0, 9]. - - :return: Training reader creator - :rtype: callable - """ - return reader_creator( - paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', - TRAIN_IMAGE_MD5), - paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', - TRAIN_LABEL_MD5), 100) - - -def test(): - """ - MNIST test set creator. - - It returns a reader creator, each sample in the reader is image pixels in - [0, 1] and label in [0, 9]. - - :return: Test reader creator. - :rtype: callable - """ - return reader_creator( - paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', - TEST_IMAGE_MD5), - paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', - TEST_LABEL_MD5), 100) - - -def fetch(): - paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) - paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) - paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) - paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5) - - -def convert(path): - """ - Converts dataset to recordio format - """ - paddle.v2.dataset.common.convert(path, train(), 1000, "minist_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "minist_test") diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py deleted file mode 100644 index 5b61a9420af1bb81e1d826f8a7b69f34c306d382..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/movielens.py +++ /dev/null @@ -1,262 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Movielens 1-M dataset. - -Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000 -movies, which was collected by GroupLens Research. This module will download -Movielens 1-M dataset from -http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training -set and test set into paddle reader creators. - -""" - -import zipfile -import paddle.v2.dataset.common -import re -import random -import functools - -__all__ = [ - 'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id', - 'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info', - 'convert' -] - -age_table = [1, 18, 25, 35, 45, 50, 56] - -URL = 'http://files.grouplens.org/datasets/movielens/ml-1m.zip' -MD5 = 'c4d9eecfca2ab87c1945afe126590906' - - -class MovieInfo(object): - """ - Movie id, title and categories information are stored in MovieInfo. - """ - - def __init__(self, index, categories, title): - self.index = int(index) - self.categories = categories - self.title = title - - def value(self): - """ - Get information from a movie. - """ - return [ - self.index, [CATEGORIES_DICT[c] for c in self.categories], - [MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()] - ] - - def __str__(self): - return "" % ( - self.index, self.title, self.categories) - - def __repr__(self): - return self.__str__() - - -class UserInfo(object): - """ - User id, gender, age, and job information are stored in UserInfo. - """ - - def __init__(self, index, gender, age, job_id): - self.index = int(index) - self.is_male = gender == 'M' - self.age = age_table.index(int(age)) - self.job_id = int(job_id) - - def value(self): - """ - Get information from a user. - """ - return [self.index, 0 if self.is_male else 1, self.age, self.job_id] - - def __str__(self): - return "" % ( - self.index, "M" - if self.is_male else "F", age_table[self.age], self.job_id) - - def __repr__(self): - return str(self) - - -MOVIE_INFO = None -MOVIE_TITLE_DICT = None -CATEGORIES_DICT = None -USER_INFO = None - - -def __initialize_meta_info__(): - fn = paddle.v2.dataset.common.download(URL, "movielens", MD5) - global MOVIE_INFO - if MOVIE_INFO is None: - pattern = re.compile(r'^(.*)\((\d+)\)$') - with zipfile.ZipFile(file=fn) as package: - for info in package.infolist(): - assert isinstance(info, zipfile.ZipInfo) - MOVIE_INFO = dict() - title_word_set = set() - categories_set = set() - with package.open('ml-1m/movies.dat') as movie_file: - for i, line in enumerate(movie_file): - movie_id, title, categories = line.strip().split('::') - categories = categories.split('|') - for c in categories: - categories_set.add(c) - title = pattern.match(title).group(1) - MOVIE_INFO[int(movie_id)] = MovieInfo( - index=movie_id, categories=categories, title=title) - for w in title.split(): - title_word_set.add(w.lower()) - - global MOVIE_TITLE_DICT - MOVIE_TITLE_DICT = dict() - for i, w in enumerate(title_word_set): - MOVIE_TITLE_DICT[w] = i - - global CATEGORIES_DICT - CATEGORIES_DICT = dict() - for i, c in enumerate(categories_set): - CATEGORIES_DICT[c] = i - - global USER_INFO - USER_INFO = dict() - with package.open('ml-1m/users.dat') as user_file: - for line in user_file: - uid, gender, age, job, _ = line.strip().split("::") - USER_INFO[int(uid)] = UserInfo( - index=uid, gender=gender, age=age, job_id=job) - return fn - - -def __reader__(rand_seed=0, test_ratio=0.1, is_test=False): - fn = __initialize_meta_info__() - rand = random.Random(x=rand_seed) - with zipfile.ZipFile(file=fn) as package: - with package.open('ml-1m/ratings.dat') as rating: - for line in rating: - if (rand.random() < test_ratio) == is_test: - uid, mov_id, rating, _ = line.strip().split("::") - uid = int(uid) - mov_id = int(mov_id) - rating = float(rating) * 2 - 5.0 - - mov = MOVIE_INFO[mov_id] - usr = USER_INFO[uid] - yield usr.value() + mov.value() + [[rating]] - - -def __reader_creator__(**kwargs): - return lambda: __reader__(**kwargs) - - -train = functools.partial(__reader_creator__, is_test=False) -test = functools.partial(__reader_creator__, is_test=True) - - -def get_movie_title_dict(): - """ - Get movie title dictionary. - """ - __initialize_meta_info__() - return MOVIE_TITLE_DICT - - -def __max_index_info__(a, b): - if a.index > b.index: - return a - else: - return b - - -def max_movie_id(): - """ - Get the maximum value of movie id. - """ - __initialize_meta_info__() - return reduce(__max_index_info__, MOVIE_INFO.viewvalues()).index - - -def max_user_id(): - """ - Get the maximum value of user id. - """ - __initialize_meta_info__() - return reduce(__max_index_info__, USER_INFO.viewvalues()).index - - -def __max_job_id_impl__(a, b): - if a.job_id > b.job_id: - return a - else: - return b - - -def max_job_id(): - """ - Get the maximum value of job id. - """ - __initialize_meta_info__() - return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id - - -def movie_categories(): - """ - Get movie categoriges dictionary. - """ - __initialize_meta_info__() - return CATEGORIES_DICT - - -def user_info(): - """ - Get user info dictionary. - """ - __initialize_meta_info__() - return USER_INFO - - -def movie_info(): - """ - Get movie info dictionary. - """ - __initialize_meta_info__() - return MOVIE_INFO - - -def unittest(): - for train_count, _ in enumerate(train()()): - pass - for test_count, _ in enumerate(test()()): - pass - - print train_count, test_count - - -def fetch(): - paddle.v2.dataset.common.download(URL, "movielens", MD5) - - -def convert(path): - """ - Converts dataset to recordio format - """ - paddle.v2.dataset.common.convert(path, train(), 1000, "movielens_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "movielens_test") - - -if __name__ == '__main__': - unittest() diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py deleted file mode 100644 index d3b3dd524c34be660c5f2d4fc5ce2fa0420efbc1..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/mq2007.py +++ /dev/null @@ -1,333 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -MQ2007 dataset - -MQ2007 is a query set from Million Query track of TREC 2007. There are about 1700 queries in it with labeled documents. In MQ2007, the 5-fold cross -validation strategy is adopted and the 5-fold partitions are included in the package. In each fold, there are three subsets for learning: training set, -validation set and testing set. - -MQ2007 dataset from website -http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar and parse training set and test set into paddle reader creators - -""" - -import os -import functools -import rarfile -from common import download -import numpy as np - -# URL = "http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar" -URL = "http://www.bigdatalab.ac.cn/benchmark/upload/download_source/7b6dbbe2-842c-11e4-a536-bcaec51b9163_MQ2007.rar" -MD5 = "7be1640ae95c6408dab0ae7207bdc706" - - -def __initialize_meta_info__(): - """ - download and extract the MQ2007 dataset - """ - fn = fetch() - rar = rarfile.RarFile(fn) - dirpath = os.path.dirname(fn) - rar.extractall(path=dirpath) - return dirpath - - -class Query(object): - """ - queries used for learning to rank algorithms. It is created from relevance scores, query-document feature vectors - - Parameters: - ---------- - query_id : int - query_id in dataset, mapping from query to relevance documents - relevance_score : int - relevance score of query and document pair - feature_vector : array, dense feature - feature in vector format - description : string - comment section in query doc pair data - """ - - def __init__(self, - query_id=-1, - relevance_score=-1, - feature_vector=None, - description=""): - self.query_id = query_id - self.relevance_score = relevance_score - if feature_vector is None: - self.feature_vector = [] - else: - self.feature_vector = feature_vector - self.description = description - - def __str__(self): - string = "%s %s %s" % (str(self.relevance_score), str(self.query_id), - " ".join(str(f) for f in self.feature_vector)) - return string - - # @classmethod - def _parse_(self, text): - """ - parse line into Query - """ - comment_position = text.find('#') - line = text[:comment_position].strip() - self.description = text[comment_position + 1:].strip() - parts = line.split() - if len(parts) != 48: - sys.stdout.write("expect 48 space split parts, get %d" % - (len(parts))) - return None - # format : 0 qid:10 1:0.000272 2:0.000000 .... - self.relevance_score = int(parts[0]) - self.query_id = int(parts[1].split(':')[1]) - for p in parts[2:]: - pair = p.split(':') - self.feature_vector.append(float(pair[1])) - return self - - -class QueryList(object): - """ - group query into list, every item in list is a Query - """ - - def __init__(self, querylist=None): - self.query_id = -1 - if querylist is None: - self.querylist = [] - else: - self.querylist = querylist - for query in self.querylist: - if self.query_id == -1: - self.query_id = query.query_id - else: - if self.query_id != query.query_id: - raise ValueError("query in list must be same query_id") - - def __iter__(self): - for query in self.querylist: - yield query - - def __len__(self): - return len(self.querylist) - - def __getitem__(self, i): - return self.querylist[i] - - def _correct_ranking_(self): - if self.querylist is None: - return - self.querylist.sort(key=lambda x: x.relevance_score, reverse=True) - - def _add_query(self, query): - if self.query_id == -1: - self.query_id = query.query_id - else: - if self.query_id != query.query_id: - raise ValueError("query in list must be same query_id") - self.querylist.append(query) - - -def gen_plain_txt(querylist): - """ - gen plain text in list for other usage - Paramters: - -------- - querylist : querylist, one query match many docment pairs in list, see QueryList - - return : - ------ - query_id : np.array, shape=(samples_num, ) - label : np.array, shape=(samples_num, ) - querylist : np.array, shape=(samples_num, feature_dimension) - """ - if not isinstance(querylist, QueryList): - querylist = QueryList(querylist) - querylist._correct_ranking_() - for query in querylist: - yield querylist.query_id, query.relevance_score, np.array( - query.feature_vector) - - -def gen_point(querylist): - """ - gen item in list for point-wise learning to rank algorithm - Paramters: - -------- - querylist : querylist, one query match many docment pairs in list, see QueryList - - return : - ------ - label : np.array, shape=(samples_num, ) - querylist : np.array, shape=(samples_num, feature_dimension) - """ - if not isinstance(querylist, QueryList): - querylist = QueryList(querylist) - querylist._correct_ranking_() - for query in querylist: - yield query.relevance_score, np.array(query.feature_vector) - - -def gen_pair(querylist, partial_order="full"): - """ - gen pair for pair-wise learning to rank algorithm - Paramters: - -------- - querylist : querylist, one query match many docment pairs in list, see QueryList - pairtial_order : "full" or "neighbour" - there is redudant in all possiable pair combinations, which can be simplifed - gen pairs for neighbour items or the full partial order pairs - - return : - ------ - label : np.array, shape=(1) - query_left : np.array, shape=(1, feature_dimension) - query_right : same as left - """ - if not isinstance(querylist, QueryList): - querylist = QueryList(querylist) - querylist._correct_ranking_() - labels = [] - docpairs = [] - - # C(n,2) - for i in range(len(querylist)): - query_left = querylist[i] - for j in range(i + 1, len(querylist)): - query_right = querylist[j] - if query_left.relevance_score > query_right.relevance_score: - labels.append([1]) - docpairs.append([ - np.array(query_left.feature_vector), - np.array(query_right.feature_vector) - ]) - elif query_left.relevance_score < query_right.relevance_score: - labels.append([1]) - docpairs.append([ - np.array(query_right.feature_vector), - np.array(query_left.feature_vector) - ]) - for label, pair in zip(labels, docpairs): - yield np.array(label), pair[0], pair[1] - - -def gen_list(querylist): - """ - gen item in list for list-wise learning to rank algorithm - Paramters: - -------- - querylist : querylist, one query match many docment pairs in list, see QueryList - - return : - ------ - label : np.array, shape=(samples_num, ) - querylist : np.array, shape=(samples_num, feature_dimension) - """ - if not isinstance(querylist, QueryList): - querylist = QueryList(querylist) - querylist._correct_ranking_() - relevance_score_list = [[query.relevance_score] for query in querylist] - feature_vector_list = [query.feature_vector for query in querylist] - yield np.array(relevance_score_list), np.array(feature_vector_list) - - -def query_filter(querylists): - """ - filter query get only document with label 0. - label 0, 1, 2 means the relevance score document with query - parameters : - querylist : QueyList list - - return : - querylist : QueyList list - """ - filter_query = [] - for querylist in querylists: - relevance_score_list = [query.relevance_score for query in querylist] - if sum(relevance_score_list) != .0: - filter_query.append(querylist) - return filter_query - - -def load_from_text(filepath, shuffle=False, fill_missing=-1): - """ - parse data file into querys - """ - prev_query_id = -1 - querylists = [] - querylist = None - fn = __initialize_meta_info__() - with open(os.path.join(fn, filepath)) as f: - for line in f: - query = Query() - query = query._parse_(line) - if query == None: - continue - if query.query_id != prev_query_id: - if querylist is not None: - querylists.append(querylist) - querylist = QueryList() - prev_query_id = query.query_id - querylist._add_query(query) - if querylist is not None: - querylists.append(querylist) - return querylists - - -def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1): - """ - Parameters - -------- - filename : string - fill_missing : fill the missing value. default in MQ2007 is -1 - - Returns - ------ - yield - label query_left, query_right # format = "pairwise" - label querylist # format = "listwise" - """ - querylists = query_filter( - load_from_text( - filepath, shuffle=shuffle, fill_missing=fill_missing)) - for querylist in querylists: - if format == "plain_txt": - yield next(gen_plain_txt(querylist)) - elif format == "pointwise": - yield next(gen_point(querylist)) - elif format == "pairwise": - for pair in gen_pair(querylist): - yield pair - elif format == "listwise": - yield next(gen_list(querylist)) - - -train = functools.partial(__reader__, filepath="MQ2007/MQ2007/Fold1/train.txt") -test = functools.partial(__reader__, filepath="MQ2007/MQ2007/Fold1/test.txt") - - -def fetch(): - return download(URL, "MQ2007", MD5) - - -if __name__ == "__main__": - fetch() - mytest = functools.partial( - __reader__, filepath="MQ2007/MQ2007/Fold1/sample", format="listwise") - for label, query in mytest(): - print label, query diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py deleted file mode 100644 index b0b9757c1a75d215cf8945b5cedbb1239fd43af7..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/sentiment.py +++ /dev/null @@ -1,141 +0,0 @@ -# /usr/bin/env python -# -*- coding:utf-8 -*- - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -The script fetch and preprocess movie_reviews data set that provided by NLTK - -TODO(yuyang18): Complete dataset. -""" - -import collections -from itertools import chain - -import nltk -from nltk.corpus import movie_reviews - -import paddle.v2.dataset.common - -__all__ = ['train', 'test', 'get_word_dict', 'convert'] -NUM_TRAINING_INSTANCES = 1600 -NUM_TOTAL_INSTANCES = 2000 - - -def download_data_if_not_yet(): - """ - Download the data set, if the data set is not download. - """ - try: - # make sure that nltk can find the data - if paddle.v2.dataset.common.DATA_HOME not in nltk.data.path: - nltk.data.path.append(paddle.v2.dataset.common.DATA_HOME) - movie_reviews.categories() - except LookupError: - print "Downloading movie_reviews data set, please wait....." - nltk.download( - 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) - print "Download data set success....." - print "Path is " + nltk.data.find('corpora/movie_reviews').path - - -def get_word_dict(): - """ - Sorted the words by the frequency of words which occur in sample - :return: - words_freq_sorted - """ - words_freq_sorted = list() - word_freq_dict = collections.defaultdict(int) - download_data_if_not_yet() - - for category in movie_reviews.categories(): - for field in movie_reviews.fileids(category): - for words in movie_reviews.words(field): - word_freq_dict[words] += 1 - words_sort_list = word_freq_dict.items() - words_sort_list.sort(cmp=lambda a, b: b[1] - a[1]) - for index, word in enumerate(words_sort_list): - words_freq_sorted.append((word[0], index)) - return words_freq_sorted - - -def sort_files(): - """ - Sorted the sample for cross reading the sample - :return: - files_list - """ - files_list = list() - neg_file_list = movie_reviews.fileids('neg') - pos_file_list = movie_reviews.fileids('pos') - files_list = list(chain.from_iterable(zip(neg_file_list, pos_file_list))) - return files_list - - -def load_sentiment_data(): - """ - Load the data set - :return: - data_set - """ - data_set = list() - download_data_if_not_yet() - words_ids = dict(get_word_dict()) - for sample_file in sort_files(): - words_list = list() - category = 0 if 'neg' in sample_file else 1 - for word in movie_reviews.words(sample_file): - words_list.append(words_ids[word.lower()]) - data_set.append((words_list, category)) - return data_set - - -def reader_creator(data): - """ - Reader creator, generate an iterator for data set - :param data: - train data set or test data set - """ - for each in data: - yield each[0], each[1] - - -def train(): - """ - Default training set reader creator - """ - data_set = load_sentiment_data() - return reader_creator(data_set[0:NUM_TRAINING_INSTANCES]) - - -def test(): - """ - Default test set reader creator - """ - data_set = load_sentiment_data() - return reader_creator(data_set[NUM_TRAINING_INSTANCES:]) - - -def fetch(): - nltk.download( - 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) - - -def convert(path): - """ - Converts dataset to recordio format - """ - paddle.v2.dataset.common.convert(path, train, 1000, "sentiment_train") - paddle.v2.dataset.common.convert(path, test, 1000, "sentiment_test") diff --git a/python/paddle/v2/dataset/tests/cifar_test.py b/python/paddle/v2/dataset/tests/cifar_test.py deleted file mode 100644 index e0e18229da7818be5752ee592e094a00da286ad9..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/tests/cifar_test.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2.dataset.cifar -import unittest - - -class TestCIFAR(unittest.TestCase): - def check_reader(self, reader): - sum = 0 - label = 0 - for l in reader(): - self.assertEqual(l[0].size, 3072) - if l[1] > label: - label = l[1] - sum += 1 - return sum, label - - def test_test10(self): - instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.test10()) - self.assertEqual(instances, 10000) - self.assertEqual(max_label_value, 9) - - def test_train10(self): - instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.train10()) - self.assertEqual(instances, 50000) - self.assertEqual(max_label_value, 9) - - def test_test100(self): - instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.test100()) - self.assertEqual(instances, 10000) - self.assertEqual(max_label_value, 99) - - def test_train100(self): - instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.train100()) - self.assertEqual(instances, 50000) - self.assertEqual(max_label_value, 99) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/dataset/tests/common_test.py b/python/paddle/v2/dataset/tests/common_test.py deleted file mode 100644 index cfa194eba38ea70311c4deeac2635dc0a0103576..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/tests/common_test.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2.dataset.common -import unittest -import tempfile -import glob - - -class TestCommon(unittest.TestCase): - def test_md5file(self): - _, temp_path = tempfile.mkstemp() - with open(temp_path, 'w') as f: - f.write("Hello\n") - self.assertEqual('09f7e02f1290be211da707a266f153b3', - paddle.v2.dataset.common.md5file(temp_path)) - - def test_download(self): - yi_avatar = 'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460' - self.assertEqual( - paddle.v2.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460', - paddle.v2.dataset.common.download( - yi_avatar, 'test', 'f75287202d6622414c706c36c16f8e0d')) - - def test_split(self): - def test_reader(): - def reader(): - for x in xrange(10): - yield x - - return reader - - _, temp_path = tempfile.mkstemp() - paddle.v2.dataset.common.split( - test_reader(), 4, suffix=temp_path + '/test-%05d.pickle') - files = glob.glob(temp_path + '/test-%05d.pickle') - self.assertEqual(len(files), 3) - - def test_cluster_file_reader(self): - _, temp_path = tempfile.mkstemp() - for x in xrange(5): - with open(temp_path + '/%05d.test' % x) as f: - f.write('%d\n' % x) - reader = paddle.v2.dataset.common.cluster_files_reader( - temp_path + '/*.test', 5, 0) - for idx, e in enumerate(reader()): - self.assertEqual(e, str("0")) - - def test_convert(self): - record_num = 10 - num_shards = 4 - - def test_reader(): - def reader(): - for x in xrange(record_num): - yield x - - return reader - - path = tempfile.mkdtemp() - paddle.v2.dataset.common.convert(path, - test_reader(), num_shards, - 'random_images') - - files = glob.glob(path + '/random_images-*') - self.assertEqual(len(files), num_shards) - - recs = [] - for i in range(0, num_shards): - n = "%s/random_images-%05d-of-%05d" % (path, i, num_shards - 1) - r = recordio.reader(n) - while True: - d = r.read() - if d is None: - break - recs.append(d) - - recs.sort() - self.assertEqual(total, record_num) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/dataset/tests/flowers_test.py b/python/paddle/v2/dataset/tests/flowers_test.py deleted file mode 100644 index a8ae9a07acc22eb9d3c0cc5ebb07f8f11ed21233..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/tests/flowers_test.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2.dataset.flowers -import unittest - - -class TestFlowers(unittest.TestCase): - def check_reader(self, reader): - sum = 0 - label = 0 - size = 224 * 224 * 3 - for l in reader(): - self.assertEqual(l[0].size, size) - if l[1] > label: - label = l[1] - sum += 1 - return sum, label - - def test_train(self): - instances, max_label_value = self.check_reader( - paddle.v2.dataset.flowers.train()) - self.assertEqual(instances, 6149) - self.assertEqual(max_label_value, 102) - - def test_test(self): - instances, max_label_value = self.check_reader( - paddle.v2.dataset.flowers.test()) - self.assertEqual(instances, 1020) - self.assertEqual(max_label_value, 102) - - def test_valid(self): - instances, max_label_value = self.check_reader( - paddle.v2.dataset.flowers.valid()) - self.assertEqual(instances, 1020) - self.assertEqual(max_label_value, 102) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/dataset/tests/imdb_test.py b/python/paddle/v2/dataset/tests/imdb_test.py deleted file mode 100644 index c4d82f26895d77d05c6e936bd636b1239e1a0cd8..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/tests/imdb_test.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2.dataset.imdb -import unittest -import re - -TRAIN_POS_PATTERN = re.compile("aclImdb/train/pos/.*\.txt$") -TRAIN_NEG_PATTERN = re.compile("aclImdb/train/neg/.*\.txt$") -TRAIN_PATTERN = re.compile("aclImdb/train/.*\.txt$") - -TEST_POS_PATTERN = re.compile("aclImdb/test/pos/.*\.txt$") -TEST_NEG_PATTERN = re.compile("aclImdb/test/neg/.*\.txt$") -TEST_PATTERN = re.compile("aclImdb/test/.*\.txt$") - - -class TestIMDB(unittest.TestCase): - word_idx = None - - def test_build_dict(self): - if self.word_idx == None: - self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN, - 150) - - self.assertEqual(len(self.word_idx), 7036) - - def check_dataset(self, dataset, expected_size): - if self.word_idx == None: - self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN, - 150) - - sum = 0 - for l in dataset(self.word_idx): - self.assertEqual(l[1], sum % 2) - sum += 1 - self.assertEqual(sum, expected_size) - - def test_train(self): - self.check_dataset(paddle.v2.dataset.imdb.train, 25000) - - def test_test(self): - self.check_dataset(paddle.v2.dataset.imdb.test, 25000) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/dataset/tests/imikolov_test.py b/python/paddle/v2/dataset/tests/imikolov_test.py deleted file mode 100644 index 714a75d6f1ff31697eec2d893d350a726d6390fe..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/tests/imikolov_test.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2.dataset.imikolov -import unittest - -WORD_DICT = paddle.v2.dataset.imikolov.build_dict() - - -class TestMikolov(unittest.TestCase): - def check_reader(self, reader, n): - for l in reader(): - self.assertEqual(len(l), n) - - def test_train(self): - n = 5 - self.check_reader(paddle.v2.dataset.imikolov.train(WORD_DICT, n), n) - - first_line = 'aer banknote berlitz calloway centrust cluett fromstein '\ - 'gitano guterman hydro-quebec ipo kia memotec mlx nahb punts '\ - 'rake regatta rubens sim snack-food ssangyong swapo wachter' - first_line = [ - WORD_DICT.get(ch, WORD_DICT['']) - for ch in first_line.split(' ') - ] - for l in paddle.v2.dataset.imikolov.train( - WORD_DICT, n=-1, - data_type=paddle.v2.dataset.imikolov.DataType.SEQ)(): - read_line = l[0][1:] - break - self.assertEqual(first_line, read_line) - - def test_test(self): - n = 5 - self.check_reader(paddle.v2.dataset.imikolov.test(WORD_DICT, n), n) - - first_line = 'consumers may want to move their telephones a little '\ - 'closer to the tv set' - first_line = [ - WORD_DICT.get(ch, WORD_DICT['']) - for ch in first_line.split(' ') - ] - for l in paddle.v2.dataset.imikolov.test( - WORD_DICT, n=-1, - data_type=paddle.v2.dataset.imikolov.DataType.SEQ)(): - read_line = l[0][1:] - break - self.assertEqual(first_line, read_line) - - def test_total(self): - _, idx = zip(*WORD_DICT.items()) - self.assertEqual(sorted(idx)[-1], len(WORD_DICT) - 1) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/dataset/tests/mnist_test.py b/python/paddle/v2/dataset/tests/mnist_test.py deleted file mode 100644 index 1d344cac3e7483a351033570fbec75a4d19f4a55..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/tests/mnist_test.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2.dataset.mnist -import unittest - - -class TestMNIST(unittest.TestCase): - def check_reader(self, reader): - sum = 0 - label = 0 - for l in reader(): - self.assertEqual(l[0].size, 784) - if l[1] > label: - label = l[1] - sum += 1 - return sum, label - - def test_train(self): - instances, max_label_value = self.check_reader( - paddle.v2.dataset.mnist.train()) - self.assertEqual(instances, 60000) - self.assertEqual(max_label_value, 9) - - def test_test(self): - instances, max_label_value = self.check_reader( - paddle.v2.dataset.mnist.test()) - self.assertEqual(instances, 10000) - self.assertEqual(max_label_value, 9) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/dataset/tests/mq2007_test.py b/python/paddle/v2/dataset/tests/mq2007_test.py deleted file mode 100644 index 59847b6c18eadb12123cae824e8bce1051a69d4c..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/tests/mq2007_test.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2.dataset.mq2007 -import unittest - - -class TestMQ2007(unittest.TestCase): - def test_pairwise(self): - for label, query_left, query_right in paddle.v2.dataset.mq2007.test( - format="pairwise"): - self.assertEqual(query_left.shape(), (46, )) - self.assertEqual(query_right.shape(), (46, )) - - def test_listwise(self): - for label_array, query_array in paddle.v2.dataset.mq2007.test( - format="listwise"): - self.assertEqual(len(label_array), len(query_array)) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/v2/dataset/tests/test_sentiment.py b/python/paddle/v2/dataset/tests/test_sentiment.py deleted file mode 100644 index 407405290734609059c1767600748d530e8a13a6..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/tests/test_sentiment.py +++ /dev/null @@ -1,55 +0,0 @@ -# /usr/bin/env python -# -*- coding:utf-8 -*- - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import nltk -import paddle.v2.dataset.sentiment as st -from nltk.corpus import movie_reviews - - -class TestSentimentMethods(unittest.TestCase): - def test_get_word_dict(self): - word_dict = st.get_word_dict()[0:10] - test_word_list = [(u',', 0), (u'the', 1), (u'.', 2), (u'a', 3), - (u'and', 4), (u'of', 5), (u'to', 6), (u"'", 7), - (u'is', 8), (u'in', 9)] - for idx, each in enumerate(word_dict): - self.assertEqual(each, test_word_list[idx]) - self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path) - - def test_sort_files(self): - last_label = '' - for sample_file in st.sort_files(): - current_label = sample_file.split("/")[0] - self.assertNotEqual(current_label, last_label) - last_label = current_label - - def test_data_set(self): - data_set = st.load_sentiment_data() - last_label = -1 - for each in st.test(): - self.assertNotEqual(each[1], last_label) - last_label = each[1] - self.assertEqual(len(data_set), st.NUM_TOTAL_INSTANCES) - self.assertEqual(len(list(st.train())), st.NUM_TRAINING_INSTANCES) - self.assertEqual( - len(list(st.test())), - (st.NUM_TOTAL_INSTANCES - st.NUM_TRAINING_INSTANCES)) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/dataset/tests/voc2012_test.py b/python/paddle/v2/dataset/tests/voc2012_test.py deleted file mode 100644 index 31e72ebf5eac0508d12783f9ceaa6eef0fa6d353..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/tests/voc2012_test.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2.dataset.voc2012 -import unittest - - -class TestVOC(unittest.TestCase): - def check_reader(self, reader): - sum = 0 - label = 0 - for l in reader(): - self.assertEqual(l[0].size, 3 * l[1].size) - sum += 1 - return sum - - def test_train(self): - count = self.check_reader(paddle.v2.dataset.voc_seg.train()) - self.assertEqual(count, 2913) - - def test_test(self): - count = self.check_reader(paddle.v2.dataset.voc_seg.test()) - self.assertEqual(count, 1464) - - def test_val(self): - count = self.check_reader(paddle.v2.dataset.voc_seg.val()) - self.assertEqual(count, 1449) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/dataset/tests/wmt16_test.py b/python/paddle/v2/dataset/tests/wmt16_test.py deleted file mode 100644 index cef6c3216e7de8d9785a063976e63f88d90b24df..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/tests/wmt16_test.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2.dataset.wmt16 -import unittest - - -class TestWMT16(unittest.TestCase): - def checkout_one_sample(self, sample): - # train data has 3 field: source language word indices, - # target language word indices, and target next word indices. - self.assertEqual(len(sample), 3) - - # test start mark and end mark in source word indices. - self.assertEqual(sample[0][0], 0) - self.assertEqual(sample[0][-1], 1) - - # test start mask in target word indices - self.assertEqual(sample[1][0], 0) - - # test en mask in target next word indices - self.assertEqual(sample[2][-1], 1) - - def test_train(self): - for idx, sample in enumerate( - paddle.v2.dataset.wmt16.train( - src_dict_size=100000, trg_dict_size=100000)()): - if idx >= 10: break - self.checkout_one_sample(sample) - - def test_test(self): - for idx, sample in enumerate( - paddle.v2.dataset.wmt16.test( - src_dict_size=1000, trg_dict_size=1000)()): - if idx >= 10: break - self.checkout_one_sample(sample) - - def test_val(self): - for idx, sample in enumerate( - paddle.v2.dataset.wmt16.validation( - src_dict_size=1000, trg_dict_size=1000)()): - if idx >= 10: break - self.checkout_one_sample(sample) - - def test_get_dict(self): - dict_size = 1000 - word_dict = paddle.v2.dataset.wmt16.get_dict("en", dict_size, True) - self.assertEqual(len(word_dict), dict_size) - self.assertEqual(word_dict[0], "") - self.assertEqual(word_dict[1], "") - self.assertEqual(word_dict[2], "") - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py deleted file mode 100644 index f10bf7e42a1ead09b3eba0d61e55701215e4360f..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/uci_housing.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -UCI Housing dataset. - -This module will download dataset from -https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and -parse training set and test set into paddle reader creators. -""" - -import numpy as np -import os -import paddle.v2.dataset.common -from paddle.v2.parameters import Parameters - -__all__ = ['train', 'test'] - -URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data' -MD5 = 'd4accdce7a25600298819f8e28e8d593' -feature_names = [ - 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', - 'PTRATIO', 'B', 'LSTAT', 'convert' -] - -UCI_TRAIN_DATA = None -UCI_TEST_DATA = None -URL_MODEL = 'https://github.com/PaddlePaddle/book/raw/develop/01.fit_a_line/fit_a_line.tar' -MD5_MODEL = '52fc3da8ef3937822fcdd87ee05c0c9b' - - -def feature_range(maximums, minimums): - import matplotlib - matplotlib.use('Agg') - import matplotlib.pyplot as plt - fig, ax = plt.subplots() - feature_num = len(maximums) - ax.bar(range(feature_num), maximums - minimums, color='r', align='center') - ax.set_title('feature scale') - plt.xticks(range(feature_num), feature_names) - plt.xlim([-1, feature_num]) - fig.set_figheight(6) - fig.set_figwidth(10) - if not os.path.exists('./image'): - os.makedirs('./image') - fig.savefig('image/ranges.png', dpi=48) - plt.close(fig) - - -def load_data(filename, feature_num=14, ratio=0.8): - global UCI_TRAIN_DATA, UCI_TEST_DATA - if UCI_TRAIN_DATA is not None and UCI_TEST_DATA is not None: - return - - data = np.fromfile(filename, sep=' ') - data = data.reshape(data.shape[0] / feature_num, feature_num) - maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum( - axis=0) / data.shape[0] - feature_range(maximums[:-1], minimums[:-1]) - for i in xrange(feature_num - 1): - data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) - offset = int(data.shape[0] * ratio) - UCI_TRAIN_DATA = data[:offset] - UCI_TEST_DATA = data[offset:] - - -def train(): - """ - UCI_HOUSING training set creator. - - It returns a reader creator, each sample in the reader is features after - normalization and price number. - - :return: Training reader creator - :rtype: callable - """ - global UCI_TRAIN_DATA - load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) - - def reader(): - for d in UCI_TRAIN_DATA: - yield d[:-1], d[-1:] - - return reader - - -def test(): - """ - UCI_HOUSING test set creator. - - It returns a reader creator, each sample in the reader is features after - normalization and price number. - - :return: Test reader creator - :rtype: callable - """ - global UCI_TEST_DATA - load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) - - def reader(): - for d in UCI_TEST_DATA: - yield d[:-1], d[-1:] - - return reader - - -def model(): - tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'fit_a_line.tar', - MD5_MODEL) - with open(tar_file, 'r') as f: - parameters = Parameters.from_tar(f) - return parameters - - -def fetch(): - paddle.v2.dataset.common.download(URL, 'uci_housing', MD5) - - -def convert(path): - """ - Converts dataset to recordio format - """ - paddle.v2.dataset.common.convert(path, train(), 1000, "uci_housing_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "uci_houseing_test") diff --git a/python/paddle/v2/dataset/voc2012.py b/python/paddle/v2/dataset/voc2012.py deleted file mode 100644 index 617e212d67fbe37f9d9663e9c83c62045411fa77..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/voc2012.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Image dataset for segmentation. -The 2012 dataset contains images from 2008-2011 for which additional -segmentations have been prepared. As in previous years the assignment -to training/test sets has been maintained. The total number of images -with segmentation has been increased from 7,062 to 9,993. -""" - -import tarfile -import io -import numpy as np -from paddle.v2.dataset.common import download -from paddle.v2.image import * -from PIL import Image - -__all__ = ['train', 'test', 'val'] - -VOC_URL = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/\ -VOCtrainval_11-May-2012.tar' - -VOC_MD5 = '6cd6e144f989b92b3379bac3b3de84fd' -SET_FILE = 'VOCdevkit/VOC2012/ImageSets/Segmentation/{}.txt' -DATA_FILE = 'VOCdevkit/VOC2012/JPEGImages/{}.jpg' -LABEL_FILE = 'VOCdevkit/VOC2012/SegmentationClass/{}.png' - -CACHE_DIR = 'voc2012' - - -def reader_creator(filename, sub_name): - - tarobject = tarfile.open(filename) - name2mem = {} - for ele in tarobject.getmembers(): - name2mem[ele.name] = ele - - def reader(): - set_file = SET_FILE.format(sub_name) - sets = tarobject.extractfile(name2mem[set_file]) - for line in sets: - line = line.strip() - data_file = DATA_FILE.format(line) - label_file = LABEL_FILE.format(line) - data = tarobject.extractfile(name2mem[data_file]).read() - label = tarobject.extractfile(name2mem[label_file]).read() - data = Image.open(io.BytesIO(data)) - label = Image.open(io.BytesIO(label)) - data = np.array(data) - label = np.array(label) - yield data, label - - return reader - - -def train(): - """ - Create a train dataset reader containing 2913 images in HWC order. - """ - return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'trainval') - - -def test(): - """ - Create a test dataset reader containing 1464 images in HWC order. - """ - return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'train') - - -def val(): - """ - Create a val dataset reader containing 1449 images in HWC order. - """ - return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'val') diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py deleted file mode 100644 index b9e602f324ad9bf43416b420c6d5697050a5c802..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/wmt14.py +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -WMT14 dataset. -The original WMT14 dataset is too large and a small set of data for set is -provided. This module will download dataset from -http://paddlemodels.bj.bcebos.com/wmt/wmt14.tgz and -parse training set and test set into paddle reader creators. - -""" -import tarfile -import gzip - -import paddle.v2.dataset.common -from paddle.v2.parameters import Parameters - -__all__ = [ - 'train', - 'test', - 'get_dict', - 'convert', -] - -URL_DEV_TEST = ('http://www-lium.univ-lemans.fr/~schwenk/' - 'cslm_joint_paper/data/dev+test.tgz') -MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' -# this is a small set of data for test. The original data is too large and -# will be add later. -URL_TRAIN = ('http://paddlemodels.bj.bcebos.com/wmt/wmt14.tgz') -MD5_TRAIN = '0791583d57d5beb693b9414c5b36798c' -# BLEU of this trained model is 26.92 -URL_MODEL = 'http://paddlemodels.bj.bcebos.com/wmt%2Fwmt14.tgz' -MD5_MODEL = '0cb4a5366189b6acba876491c8724fa3' - -START = "" -END = "" -UNK = "" -UNK_IDX = 2 - - -def __read_to_dict(tar_file, dict_size): - def __to_dict(fd, size): - out_dict = dict() - for line_count, line in enumerate(fd): - if line_count < size: - out_dict[line.strip()] = line_count - else: - break - return out_dict - - with tarfile.open(tar_file, mode='r') as f: - names = [ - each_item.name for each_item in f - if each_item.name.endswith("src.dict") - ] - assert len(names) == 1 - src_dict = __to_dict(f.extractfile(names[0]), dict_size) - names = [ - each_item.name for each_item in f - if each_item.name.endswith("trg.dict") - ] - assert len(names) == 1 - trg_dict = __to_dict(f.extractfile(names[0]), dict_size) - return src_dict, trg_dict - - -def reader_creator(tar_file, file_name, dict_size): - def reader(): - src_dict, trg_dict = __read_to_dict(tar_file, dict_size) - with tarfile.open(tar_file, mode='r') as f: - names = [ - each_item.name for each_item in f - if each_item.name.endswith(file_name) - ] - for name in names: - for line in f.extractfile(name): - line_split = line.strip().split('\t') - if len(line_split) != 2: - continue - src_seq = line_split[0] # one source sequence - src_words = src_seq.split() - src_ids = [ - src_dict.get(w, UNK_IDX) - for w in [START] + src_words + [END] - ] - - trg_seq = line_split[1] # one target sequence - trg_words = trg_seq.split() - trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words] - - # remove sequence whose length > 80 in training mode - if len(src_ids) > 80 or len(trg_ids) > 80: - continue - trg_ids_next = trg_ids + [trg_dict[END]] - trg_ids = [trg_dict[START]] + trg_ids - - yield src_ids, trg_ids, trg_ids_next - - return reader - - -def train(dict_size): - """ - WMT14 training set creator. - - It returns a reader creator, each sample in the reader is source language - word ID sequence, target language word ID sequence and next word ID - sequence. - - :return: Training reader creator - :rtype: callable - """ - return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), - 'train/train', dict_size) - - -def test(dict_size): - """ - WMT14 test set creator. - - It returns a reader creator, each sample in the reader is source language - word ID sequence, target language word ID sequence and next word ID - sequence. - - :return: Test reader creator - :rtype: callable - """ - return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), - 'test/test', dict_size) - - -def gen(dict_size): - return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), - 'gen/gen', dict_size) - - -def model(): - tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) - with gzip.open(tar_file, 'r') as f: - parameters = Parameters.from_tar(f) - return parameters - - -def get_dict(dict_size, reverse=True): - # if reverse = False, return dict = {'a':'001', 'b':'002', ...} - # else reverse = true, return dict = {'001':'a', '002':'b', ...} - tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) - src_dict, trg_dict = __read_to_dict(tar_file, dict_size) - if reverse: - src_dict = {v: k for k, v in src_dict.items()} - trg_dict = {v: k for k, v in trg_dict.items()} - return src_dict, trg_dict - - -def fetch(): - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) - paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) - - -def convert(path): - """ - Converts dataset to recordio format - """ - dict_size = 30000 - paddle.v2.dataset.common.convert(path, - train(dict_size), 1000, "wmt14_train") - paddle.v2.dataset.common.convert(path, test(dict_size), 1000, "wmt14_test") diff --git a/python/paddle/v2/dataset/wmt16.py b/python/paddle/v2/dataset/wmt16.py deleted file mode 100644 index 5793002091ba3eabc32dcc156e5bb8eb512d8dfb..0000000000000000000000000000000000000000 --- a/python/paddle/v2/dataset/wmt16.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -ACL2016 Multimodal Machine Translation. Please see this website for more -details: http://www.statmt.org/wmt16/multimodal-task.html#task1 - -If you use the dataset created for your task, please cite the following paper: -Multi30K: Multilingual English-German Image Descriptions. - -@article{elliott-EtAl:2016:VL16, - author = {{Elliott}, D. and {Frank}, S. and {Sima"an}, K. and {Specia}, L.}, - title = {Multi30K: Multilingual English-German Image Descriptions}, - booktitle = {Proceedings of the 6th Workshop on Vision and Language}, - year = {2016}, - pages = {70--74}, - year = 2016 -} -""" - -import os -import tarfile -import gzip -from collections import defaultdict - -import paddle.v2.dataset.common - -__all__ = [ - "train", - "test", - "validation", - "convert", - "fetch", - "get_dict", -] - -DATA_URL = ("http://cloud.dlnel.org/filepub/" - "?uuid=46a0808e-ddd8-427c-bacd-0dbc6d045fed") -DATA_MD5 = "0c38be43600334966403524a40dcd81e" - -TOTAL_EN_WORDS = 11250 -TOTAL_DE_WORDS = 19220 - -START_MARK = "" -END_MARK = "" -UNK_MARK = "" - - -def __build_dict(tar_file, dict_size, save_path, lang): - word_dict = defaultdict(int) - with tarfile.open(tar_file, mode="r") as f: - for line in f.extractfile("wmt16/train"): - line_split = line.strip().split("\t") - if len(line_split) != 2: continue - sen = line_split[0] if lang == "en" else line_split[1] - for w in sen.split(): - word_dict[w] += 1 - - with open(save_path, "w") as fout: - fout.write("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK)) - for idx, word in enumerate( - sorted( - word_dict.iteritems(), key=lambda x: x[1], reverse=True)): - if idx + 3 == dict_size: break - fout.write(word[0].encode('utf-8')) - fout.write('\n') - - -def __load_dict(tar_file, dict_size, lang, reverse=False): - dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME, - "wmt16/%s_%d.dict" % (lang, dict_size)) - if not os.path.exists(dict_path) or ( - len(open(dict_path, "r").readlines()) != dict_size): - __build_dict(tar_file, dict_size, dict_path, lang) - - word_dict = {} - with open(dict_path, "r") as fdict: - for idx, line in enumerate(fdict): - if reverse: - word_dict[idx] = line.strip() - else: - word_dict[line.strip()] = idx - return word_dict - - -def __get_dict_size(src_dict_size, trg_dict_size, src_lang): - src_dict_size = min(src_dict_size, (TOTAL_EN_WORDS if src_lang == "en" else - TOTAL_DE_WORDS)) - trg_dict_size = min(trg_dict_size, (TOTAL_DE_WORDS if src_lang == "en" else - TOTAL_ENG_WORDS)) - return src_dict_size, trg_dict_size - - -def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): - def reader(): - src_dict = __load_dict(tar_file, src_dict_size, src_lang) - trg_dict = __load_dict(tar_file, trg_dict_size, - ("de" if src_lang == "en" else "en")) - - # the indice for start mark, end mark, and unk are the same in source - # language and target language. Here uses the source language - # dictionary to determine their indices. - start_id = src_dict[START_MARK] - end_id = src_dict[END_MARK] - unk_id = src_dict[UNK_MARK] - - src_col = 0 if src_lang == "en" else 1 - trg_col = 1 - src_col - - with tarfile.open(tar_file, mode="r") as f: - for line in f.extractfile(file_name): - line_split = line.strip().split("\t") - if len(line_split) != 2: - continue - src_words = line_split[src_col].split() - src_ids = [start_id] + [ - src_dict.get(w, unk_id) for w in src_words - ] + [end_id] - - trg_words = line_split[trg_col].split() - trg_ids = [trg_dict.get(w, unk_id) for w in trg_words] - - trg_ids_next = trg_ids + [end_id] - trg_ids = [start_id] + trg_ids - - yield src_ids, trg_ids, trg_ids_next - - return reader - - -def train(src_dict_size, trg_dict_size, src_lang="en"): - """ - WMT16 train set reader. - - This function returns the reader for train data. Each sample the reader - returns is made up of three fields: the source language word index sequence, - target language word index sequence and next word index sequence. - - - NOTE: - The original like for training data is: - http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/training.tar.gz - - paddle.dataset.wmt16 provides a tokenized version of the original dataset by - using moses's tokenization script: - https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl - - Args: - src_dict_size(int): Size of the source language dictionary. Three - special tokens will be added into the dictionary: - for start mark, for end mark, and for - unknown word. - trg_dict_size(int): Size of the target language dictionary. Three - special tokens will be added into the dictionary: - for start mark, for end mark, and for - unknown word. - src_lang(string): A string indicating which language is the source - language. Available options are: "en" for English - and "de" for Germany. - - Returns: - callable: The train reader. - """ - - if src_lang not in ["en", "de"]: - raise ValueError("An error language type. Only support: " - "en (for English); de(for Germany).") - src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size, - src_lang) - - return reader_creator( - tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), - file_name="wmt16/train", - src_dict_size=src_dict_size, - trg_dict_size=trg_dict_size, - src_lang=src_lang) - - -def test(src_dict_size, trg_dict_size, src_lang="en"): - """ - WMT16 test set reader. - - This function returns the reader for test data. Each sample the reader - returns is made up of three fields: the source language word index sequence, - target language word index sequence and next word index sequence. - - NOTE: - The original like for test data is: - http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/mmt16_task1_test.tar.gz - - paddle.dataset.wmt16 provides a tokenized version of the original dataset by - using moses's tokenization script: - https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl - - Args: - src_dict_size(int): Size of the source language dictionary. Three - special tokens will be added into the dictionary: - for start mark, for end mark, and for - unknown word. - trg_dict_size(int): Size of the target language dictionary. Three - special tokens will be added into the dictionary: - for start mark, for end mark, and for - unknown word. - src_lang(string): A string indicating which language is the source - language. Available options are: "en" for English - and "de" for Germany. - - Returns: - callable: The test reader. - """ - - if src_lang not in ["en", "de"]: - raise ValueError("An error language type. " - "Only support: en (for English); de(for Germany).") - - src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size, - src_lang) - - return reader_creator( - tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), - file_name="wmt16/test", - src_dict_size=src_dict_size, - trg_dict_size=trg_dict_size, - src_lang=src_lang) - - -def validation(src_dict_size, trg_dict_size, src_lang="en"): - """ - WMT16 validation set reader. - - This function returns the reader for validation data. Each sample the reader - returns is made up of three fields: the source language word index sequence, - target language word index sequence and next word index sequence. - - NOTE: - The original like for validation data is: - http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz - - paddle.dataset.wmt16 provides a tokenized version of the original dataset by - using moses's tokenization script: - https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl - - Args: - src_dict_size(int): Size of the source language dictionary. Three - special tokens will be added into the dictionary: - for start mark, for end mark, and for - unknown word. - trg_dict_size(int): Size of the target language dictionary. Three - special tokens will be added into the dictionary: - for start mark, for end mark, and for - unknown word. - src_lang(string): A string indicating which language is the source - language. Available options are: "en" for English - and "de" for Germany. - - Returns: - callable: The validation reader. - """ - if src_lang not in ["en", "de"]: - raise ValueError("An error language type. " - "Only support: en (for English); de(for Germany).") - src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size, - src_lang) - - return reader_creator( - tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), - file_name="wmt16/val", - src_dict_size=src_dict_size, - trg_dict_size=trg_dict_size, - src_lang=src_lang) - - -def get_dict(lang, dict_size, reverse=False): - """ - return the word dictionary for the specified language. - - Args: - lang(string): A string indicating which language is the source - language. Available options are: "en" for English - and "de" for Germany. - dict_size(int): Size of the specified language dictionary. - reverse(bool): If reverse is set to False, the returned python - dictionary will use word as key and use index as value. - If reverse is set to True, the returned python - dictionary will use index as key and word as value. - - Returns: - dict: The word dictionary for the specific language. - """ - - if lang == "en": - dict_size = min(dict_size, TOTAL_EN_WORDS) - else: - dict_size = min(dict_size, TOTAL_DE_WORDS) - - dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME, - "wmt16/%s_%d.dict" % (lang, dict_size)) - assert os.path.exists(dict_path), "Word dictionary does not exist. " - "Please invoke paddle.dataset.wmt16.train/test/validation first " - "to build the dictionary." - tar_file = os.path.join(paddle.v2.dataset.common.DATA_HOME, "wmt16.tar.gz") - return __load_dict(tar_file, dict_size, lang, reverse) - - -def fetch(): - """download the entire dataset. - """ - paddle.v4.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz") - - -def convert(path, src_dict_size, trg_dict_size, src_lang): - """Converts dataset to recordio format. - """ - - paddle.v2.dataset.common.convert( - path, - train( - src_dict_size=src_dict_size, - trg_dict_size=trg_dict_size, - src_lang=src_lang), - 1000, - "wmt16_train") - paddle.v2.dataset.common.convert( - path, - test( - src_dict_size=src_dict_size, - trg_dict_size=trg_dict_size, - src_lang=src_lang), - 1000, - "wmt16_test") - paddle.v2.dataset.common.convert( - path, - validation( - src_dict_size=src_dict_size, - trg_dict_size=trg_dict_size, - src_lang=src_lang), - 1000, - "wmt16_validation") diff --git a/python/paddle/v2/evaluator.py b/python/paddle/v2/evaluator.py deleted file mode 100644 index eaaadbe53bc776ffde800edb9bd6b313ad026627..0000000000000000000000000000000000000000 --- a/python/paddle/v2/evaluator.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.trainer_config_helpers.evaluators as evs -from config_base import __convert_to_v2__ -import inspect - -__all__ = [] - - -def initialize(): - def convert_to_new_name(nm): - return nm[:-len("_evaluator")] - - for __ev_name__ in filter(lambda x: x.endswith('_evaluator'), evs.__all__): - __ev__ = getattr(evs, __ev_name__) - __new_name__ = convert_to_new_name(__ev_name__) - - globals()[__new_name__] = __convert_to_v2__(__ev__, __new_name__, - __name__) - globals()[__new_name__].__name__ = __new_name__ - __all__.append(__new_name__) - - -initialize() diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py deleted file mode 100644 index c11aa121c196497494f88de78d75f0f9dc072ba0..0000000000000000000000000000000000000000 --- a/python/paddle/v2/event.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Testing and training events. - -There are: - -* TestResult -* BeginIteration -* EndIteration -* BeginPass -* EndPass -""" -__all__ = [ - 'EndIteration', 'BeginIteration', 'BeginPass', 'EndPass', 'TestResult', - 'EndForwardBackward' -] - - -class WithMetric(object): - def __init__(self, evaluator): - import py_paddle.swig_paddle as api - if not isinstance(evaluator, api.Evaluator): - raise TypeError("Evaluator should be api.Evaluator type") - self.__evaluator__ = evaluator - - @property - def metrics(self): - names = self.__evaluator__.getNames() - retv = dict() - for each_name in names: - val = self.__evaluator__.getValue(each_name) - retv[each_name] = val - return retv - - -class TestResult(WithMetric): - """ - Result that trainer.test return. - """ - - def __init__(self, evaluator, cost): - super(TestResult, self).__init__(evaluator) - self.cost = cost - - -class BeginPass(object): - """ - Event On One Pass Training Start. - """ - - def __init__(self, pass_id): - self.pass_id = pass_id - - -class EndPass(WithMetric): - """ - Event On One Pass Training Complete. - To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')" - in your event_handler call back - """ - - def __init__(self, pass_id, evaluator, gm): - self.pass_id = pass_id - self.gm = gm - WithMetric.__init__(self, evaluator) - - -class BeginIteration(object): - """ - Event On One Batch Training Start. - """ - - def __init__(self, pass_id, batch_id): - self.pass_id = pass_id - self.batch_id = batch_id - - -class EndForwardBackward(object): - """ - Event On One Batch ForwardBackward Complete. - """ - - def __init__(self, pass_id, batch_id, gm): - self.pass_id = pass_id - self.batch_id = batch_id - self.gm = gm - - -class EndIteration(WithMetric): - """ - Event On One Batch Training Complete. - To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')" - in your event_handler call back - """ - - def __init__(self, pass_id, batch_id, cost, evaluator, gm): - self.pass_id = pass_id - self.batch_id = batch_id - self.cost = cost - self.gm = gm - WithMetric.__init__(self, evaluator) diff --git a/python/paddle/v2/image.py b/python/paddle/v2/image.py deleted file mode 100644 index 08d8bd68f9b7eb703c15f7cb5ad1300969db5713..0000000000000000000000000000000000000000 --- a/python/paddle/v2/image.py +++ /dev/null @@ -1,380 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This file contains some common interfaces for image preprocess. -Many users are confused about the image layout. We introduce -the image layout as follows. - -- CHW Layout - - - The abbreviations: C=channel, H=Height, W=Width - - The default layout of image opened by cv2 or PIL is HWC. - PaddlePaddle only supports the CHW layout. And CHW is simply - a transpose of HWC. It must transpose the input image. - -- Color format: RGB or BGR - - OpenCV use BGR color format. PIL use RGB color format. Both - formats can be used for training. Noted that, the format should - be keep consistent between the training and inference peroid. -""" -import numpy as np -try: - import cv2 -except ImportError: - cv2 = None -import os -import tarfile -import cPickle - -__all__ = [ - "load_image_bytes", "load_image", "resize_short", "to_chw", "center_crop", - "random_crop", "left_right_flip", "simple_transform", "load_and_transform", - "batch_images_from_tar" -] - - -def batch_images_from_tar(data_file, - dataset_name, - img2label, - num_per_batch=1024): - """ - Read images from tar file and batch them into batch file. - - :param data_file: path of image tar file - :type data_file: string - :param dataset_name: 'train','test' or 'valid' - :type dataset_name: string - :param img2label: a dic with image file name as key - and image's label as value - :type img2label: dic - :param num_per_batch: image number per batch file - :type num_per_batch: int - :return: path of list file containing paths of batch file - :rtype: string - """ - batch_dir = data_file + "_batch" - out_path = "%s/%s" % (batch_dir, dataset_name) - meta_file = "%s/%s.txt" % (batch_dir, dataset_name) - - if os.path.exists(out_path): - return meta_file - else: - os.makedirs(out_path) - - tf = tarfile.open(data_file) - mems = tf.getmembers() - data = [] - labels = [] - file_id = 0 - for mem in mems: - if mem.name in img2label: - data.append(tf.extractfile(mem).read()) - labels.append(img2label[mem.name]) - if len(data) == num_per_batch: - output = {} - output['label'] = labels - output['data'] = data - cPickle.dump( - output, - open('%s/batch_%d' % (out_path, file_id), 'w'), - protocol=cPickle.HIGHEST_PROTOCOL) - file_id += 1 - data = [] - labels = [] - if len(data) > 0: - output = {} - output['label'] = labels - output['data'] = data - cPickle.dump( - output, - open('%s/batch_%d' % (out_path, file_id), 'w'), - protocol=cPickle.HIGHEST_PROTOCOL) - - with open(meta_file, 'a') as meta: - for file in os.listdir(out_path): - meta.write(os.path.abspath("%s/%s" % (out_path, file)) + "\n") - return meta_file - - -def load_image_bytes(bytes, is_color=True): - """ - Load an color or gray image from bytes array. - - Example usage: - - .. code-block:: python - - with open('cat.jpg') as f: - im = load_image_bytes(f.read()) - - :param bytes: the input image bytes array. - :type bytes: str - :param is_color: If set is_color True, it will load and - return a color image. Otherwise, it will - load and return a gray image. - :type is_color: bool - """ - flag = 1 if is_color else 0 - file_bytes = np.asarray(bytearray(bytes), dtype=np.uint8) - img = cv2.imdecode(file_bytes, flag) - return img - - -def load_image(file, is_color=True): - """ - Load an color or gray image from the file path. - - Example usage: - - .. code-block:: python - - im = load_image('cat.jpg') - - :param file: the input image path. - :type file: string - :param is_color: If set is_color True, it will load and - return a color image. Otherwise, it will - load and return a gray image. - :type is_color: bool - """ - # cv2.IMAGE_COLOR for OpenCV3 - # cv2.CV_LOAD_IMAGE_COLOR for older OpenCV Version - # cv2.IMAGE_GRAYSCALE for OpenCV3 - # cv2.CV_LOAD_IMAGE_GRAYSCALE for older OpenCV Version - # Here, use constant 1 and 0 - # 1: COLOR, 0: GRAYSCALE - flag = 1 if is_color else 0 - im = cv2.imread(file, flag) - return im - - -def resize_short(im, size): - """ - Resize an image so that the length of shorter edge is size. - - Example usage: - - .. code-block:: python - - im = load_image('cat.jpg') - im = resize_short(im, 256) - - :param im: the input image with HWC layout. - :type im: ndarray - :param size: the shorter edge size of image after resizing. - :type size: int - """ - h, w = im.shape[:2] - h_new, w_new = size, size - if h > w: - h_new = size * h / w - else: - w_new = size * w / h - im = cv2.resize(im, (w_new, h_new), interpolation=cv2.INTER_CUBIC) - return im - - -def to_chw(im, order=(2, 0, 1)): - """ - Transpose the input image order. The image layout is HWC format - opened by cv2 or PIL. Transpose the input image to CHW layout - according the order (2,0,1). - - Example usage: - - .. code-block:: python - - im = load_image('cat.jpg') - im = resize_short(im, 256) - im = to_chw(im) - - :param im: the input image with HWC layout. - :type im: ndarray - :param order: the transposed order. - :type order: tuple|list - """ - assert len(im.shape) == len(order) - im = im.transpose(order) - return im - - -def center_crop(im, size, is_color=True): - """ - Crop the center of image with size. - - Example usage: - - .. code-block:: python - - im = center_crop(im, 224) - - :param im: the input image with HWC layout. - :type im: ndarray - :param size: the cropping size. - :type size: int - :param is_color: whether the image is color or not. - :type is_color: bool - """ - h, w = im.shape[:2] - h_start = (h - size) / 2 - w_start = (w - size) / 2 - h_end, w_end = h_start + size, w_start + size - if is_color: - im = im[h_start:h_end, w_start:w_end, :] - else: - im = im[h_start:h_end, w_start:w_end] - return im - - -def random_crop(im, size, is_color=True): - """ - Randomly crop input image with size. - - Example usage: - - .. code-block:: python - - im = random_crop(im, 224) - - :param im: the input image with HWC layout. - :type im: ndarray - :param size: the cropping size. - :type size: int - :param is_color: whether the image is color or not. - :type is_color: bool - """ - h, w = im.shape[:2] - h_start = np.random.randint(0, h - size + 1) - w_start = np.random.randint(0, w - size + 1) - h_end, w_end = h_start + size, w_start + size - if is_color: - im = im[h_start:h_end, w_start:w_end, :] - else: - im = im[h_start:h_end, w_start:w_end] - return im - - -def left_right_flip(im, is_color=True): - """ - Flip an image along the horizontal direction. - Return the flipped image. - - Example usage: - - .. code-block:: python - - im = left_right_flip(im) - - :param im: input image with HWC layout or HW layout for gray image - :type im: ndarray - :param is_color: whether input image is color or not - :type is_color: bool - """ - if len(im.shape) == 3 and is_color: - return im[:, ::-1, :] - else: - return im[:, ::-1] - - -def simple_transform(im, - resize_size, - crop_size, - is_train, - is_color=True, - mean=None): - """ - Simply data argumentation for training. These operations include - resizing, croping and flipping. - - Example usage: - - .. code-block:: python - - im = simple_transform(im, 256, 224, True) - - :param im: The input image with HWC layout. - :type im: ndarray - :param resize_size: The shorter edge length of the resized image. - :type resize_size: int - :param crop_size: The cropping size. - :type crop_size: int - :param is_train: Whether it is training or not. - :type is_train: bool - :param is_color: whether the image is color or not. - :type is_color: bool - :param mean: the mean values, which can be element-wise mean values or - mean values per channel. - :type mean: numpy array | list - """ - im = resize_short(im, resize_size) - if is_train: - im = random_crop(im, crop_size, is_color=is_color) - if np.random.randint(2) == 0: - im = left_right_flip(im, is_color) - else: - im = center_crop(im, crop_size, is_color=is_color) - if len(im.shape) == 3: - im = to_chw(im) - - im = im.astype('float32') - if mean is not None: - mean = np.array(mean, dtype=np.float32) - # mean value, may be one value per channel - if mean.ndim == 1 and is_color: - mean = mean[:, np.newaxis, np.newaxis] - elif mean.ndim == 1: - mean = mean - else: - # elementwise mean - assert len(mean.shape) == len(im) - im -= mean - - return im - - -def load_and_transform(filename, - resize_size, - crop_size, - is_train, - is_color=True, - mean=None): - """ - Load image from the input file `filename` and transform image for - data argumentation. Please refer to the `simple_transform` interface - for the transform operations. - - Example usage: - - .. code-block:: python - - im = load_and_transform('cat.jpg', 256, 224, True) - - :param filename: The file name of input image. - :type filename: string - :param resize_size: The shorter edge length of the resized image. - :type resize_size: int - :param crop_size: The cropping size. - :type crop_size: int - :param is_train: Whether it is training or not. - :type is_train: bool - :param is_color: whether the image is color or not. - :type is_color: bool - :param mean: the mean values, which can be element-wise mean values or - mean values per channel. - :type mean: numpy array | list - """ - im = load_image(filename, is_color) - im = simple_transform(im, resize_size, crop_size, is_train, is_color, mean) - return im diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py deleted file mode 100644 index 28ee042282a08be32c13d91312fd97b211277522..0000000000000000000000000000000000000000 --- a/python/paddle/v2/inference.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy -import collections -import topology -import paddle -import cPickle - -__all__ = ['infer', 'Inference'] - - -class Inference(object): - """ - Inference combines neural network output and parameters together - to do inference. - - .. code-block:: python - - inferer = Inference(output_layer=prediction, parameters=parameters) - for data_batch in batches: - print inferer.infer(data_batch) - - - :param output_layer: The neural network that should be inferenced. - :type output_layer: paddle.v2.config_base.Layer or the sequence - of paddle.v2.config_base.Layer - :param parameters: The parameters dictionary. - :type parameters: paddle.v2.parameters.Parameters - """ - - def __init__(self, parameters, output_layer=None, fileobj=None): - import py_paddle.swig_paddle as api - - if output_layer is not None: - topo = topology.Topology(output_layer) - gm = api.GradientMachine.createFromConfigProto( - topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE]) - self.__data_types__ = topo.data_type() - elif fileobj is not None: - tmp = cPickle.load(fileobj) - gm = api.GradientMachine.createByConfigProtoStr( - tmp['protobin'], api.CREATE_MODE_TESTING, - [api.PARAMETER_VALUE]) - self.__data_types__ = tmp['data_type'] - else: - raise ValueError("Either output_layer or fileobj must be set") - - for param in gm.getParameters(): - val = param.getBuf(api.PARAMETER_VALUE) - name = param.getName() - assert isinstance(val, api.Vector) - val.copyFromNumpyArray(parameters.get(name).flatten()) - # the setValueUpdated function is called in randomize, zeroMem, - # load function in paddle/legacy/parameter/Parameter.cpp. But in the - # inference mode, the setValueUpdated is never called, it will - # cause the parameter will not be dispatched - # in MultiGradientMachine for multi-GPU. So setValueUpdated is - # called here, but it's better to call this function in one place. - param.setValueUpdated() - self.__gradient_machine__ = gm - - def iter_infer(self, input, feeding=None): - from data_feeder import DataFeeder - feeder = DataFeeder(self.__data_types__, feeding) - batch_size = len(input) - - def __reader_impl__(): - for each_sample in input: - yield each_sample - - reader = paddle.batch(__reader_impl__, batch_size=batch_size) - - self.__gradient_machine__.start() - for data_batch in reader(): - yield self.__gradient_machine__.forwardTest(feeder(data_batch)) - self.__gradient_machine__.finish() - - def iter_infer_field(self, field, **kwargs): - if not isinstance(field, list) and not isinstance(field, tuple): - field = [field] - - for result in self.iter_infer(**kwargs): - for each_result in result: - item = [each_result[each_field] for each_field in field] - yield item - - def infer(self, input, field='value', flatten_result=True, **kwargs): - """ - Infer a data by model. - :param input: input data batch. Should be python iterable object. - :param field: output field. - """ - retv = None - kwargs['input'] = input - for result in self.iter_infer_field(field=field, **kwargs): - if retv is None: - retv = [[] for i in xrange(len(result))] - for i, item in enumerate(result): - retv[i].append(item) - - if retv == None: - return [] - - if flatten_result: - retv = [numpy.concatenate(out) for out in retv] - - if len(retv) == 1: - return retv[0] - else: - return retv - - -def infer(output_layer, parameters, input, feeding=None, field='value'): - """ - Infer a neural network by given neural network output and parameters. The - user should pass either a batch of input data or reader method. - - Example usage for sinlge output_layer: - - .. code-block:: python - - result = paddle.infer(output_layer=prediction, - parameters=parameters, - input=SomeData) - print result - - Example usage for multiple outout_layers and fields: - - .. code-block:: python - - result = paddle.infer(output_layer=[prediction1, prediction2], - parameters=parameters, - input=SomeData, - field=[id, value]]) - print result - - :param output_layer: output of the neural network that would be inferred - :type output_layer: paddle.v2.config_base.Layer or a list of - paddle.v2.config_base.Layer - :param parameters: parameters of the neural network. - :type parameters: paddle.v2.parameters.Parameters - :param input: input data batch. Should be a python iterable object, and each - element is the data batch. - :type input: collections.Iterable - :param feeding: Reader dictionary. Default could generate from input - value. - :param field: The prediction field. It should in [`value`, `id`, `prob`]. - `value` and `prob` mean return the prediction probabilities, - `id` means return the prediction labels. Default is `value`. - Note that `prob` only used when output_layer is beam_search - or max_id. - :type field: str - :return: The prediction result. If there are multiple outout_layers and fields, - the return order is outout_layer1.field1, outout_layer2.field1, ..., - outout_layer1.field2, outout_layer2.field2 ... - :rtype: numpy.ndarray - """ - - inferer = Inference(output_layer=output_layer, parameters=parameters) - return inferer.infer(field=field, input=input, feeding=feeding) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py deleted file mode 100644 index a188a03eb3698c972de92c9807f1bdb71a249330..0000000000000000000000000000000000000000 --- a/python/paddle/v2/layer.py +++ /dev/null @@ -1,326 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -`paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2, -we want to make Paddle a plain Python package. The model config package defines -the way how to configure a neural network topology in Paddle Python code. - -The primary usage shows below. - -.. code-block:: python - - import paddle - - img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784)) - hidden = paddle.layer.fc(input=img, size=200) - prediction = paddle.layer.fc(input=hidden, size=10, - act=paddle.activation.Softmax()) - - # use prediction instance where needed. - parameters = paddle.parameters.create(cost) -""" -import collections -import copy -import re -import paddle.trainer_config_helpers.layers as v1_layers -import paddle.trainer.config_parser as cp -from paddle.proto.ModelConfig_pb2 import ModelConfig, SubModelConfig -from config_base import __convert_to_v2__ -import config_base - -__all__ = ['data', 'parse_network'] - - -def __need_to_keep__(name): - return name in [ - 'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType', - 'layer_support', 'BaseGeneratedInput' - ] - - -def __need_to_wrap__(name): - return name not in ['AggregateLevel', 'ExpandLevel', 'BaseGeneratedInput'] - - -def __convert_name__(inname): - if __need_to_keep__(inname): - return inname - if inname == 'maxid_layer': - return 'max_id' - elif inname.endswith('memory') or inname.endswith( - '_seq') or inname.endswith('_sim') or inname == 'hsigmoid': - return inname - elif inname in [ - 'cross_entropy', 'multi_binary_label_cross_entropy', - 'cross_entropy_with_selfnorm' - ]: - return inname + "_cost" - elif inname.endswith('_cost'): - return inname - elif inname.endswith("_layer"): - return inname[:-len("_layer")] - else: - return inname - - -for name in v1_layers.__all__: - obj = getattr(v1_layers, name) - new_name = __convert_name__(name) - if callable(obj) and __need_to_wrap__(name): - globals()[new_name] = __convert_to_v2__(obj, new_name, __name__) - else: - globals()[new_name] = obj - __all__.append(new_name) - - -def __data_layer__(name, type, **kwargs): - l = v1_layers.data_layer(name, type.dim, **kwargs) - l.data_type = type - return l - - -def __map_data_docstr__(doc): - doc = re.sub(r'(data = [^\)]+)\).*', - "data = paddle.layer.data(name=\"input\", " - "type=paddle.data_type.dense_vector(1000))", doc) - - doc = re.sub(r':param size:.*', ':param type: Data type of this data layer', - doc) - doc = re.sub(r':type size:.*', ":type size: paddle.v2.data_type.InputType", - doc) - return doc - - -__data_layer__.__doc__ = __map_data_docstr__(v1_layers.data_layer.__doc__) - -data = __convert_to_v2__(__data_layer__, 'name', __name__) - - -def __get_used_layers__(output_layers): - layer_names = set() - parents = {} - - def add_parent(child, parent): - if child in parents: - parents[child].append(parent) - else: - parents[child] = [parent] - - def add_additional_parents(): - for sub_model in cp.g_config.model_config.sub_models: - if sub_model.name == 'root': - continue - for link in sub_model.in_links: - add_parent(link.link_name, link.layer_name) - add_parent(sub_model.name, link.layer_name) - for link in sub_model.out_links: - add_parent(link.link_name, link.layer_name) - add_parent(link.link_name, sub_model.name) - for mem in sub_model.memories: - if mem.boot_layer_name: - add_parent(mem.layer_name, mem.boot_layer_name) - add_parent(mem.link_name, mem.layer_name) - - if sub_model.HasField('generator'): - # according to the implementation of text generation - # in recurrent layer group, the generated word must be - # the first out link - add_parent(sub_model.out_links[0].layer_name, - sub_model.generator.eos_layer_name) - - def dfs_travel(layer_name): - if layer_name in layer_names: - return - layer_names.add(layer_name) - layer = cp.g_layer_map[layer_name] - - for inp in layer.inputs: - dfs_travel(inp.input_layer_name) - if layer.name in parents: - for p in parents[layer.name]: - dfs_travel(p) - - add_additional_parents() - - for layer in output_layers: - dfs_travel(layer.full_name) - - # print layer needs to be specially handled because no other - # layer depends on it. It is used to print the result of some - # layers when running the model for debug purpose. So we explicitly - # add a print layer to the topolty if its input is in the toplogy. - for layer in cp.g_config.model_config.layers: - if layer.type == 'print': - used = True - for inp in layer.inputs: - if inp.input_layer_name not in layer_names: - used = False - break - if used: - layer_names.add(layer.name) - - return layer_names - - -def __get_used_parameters__(layer_names, sub_models): - parameter_names = set() - for name in layer_names: - l = cp.g_layer_map[name] - for inp in l.inputs: - if inp.input_parameter_name: - parameter_names.add(inp.input_parameter_name) - if l.bias_parameter_name: - parameter_names.add(l.bias_parameter_name) - - for sub_model in sub_models: - for mem in sub_model.memories: - if mem.HasField("boot_bias_parameter_name"): - parameter_names.add(mem.boot_bias_parameter_name) - - return parameter_names - - -def __get_used_submodels__(layer_names): - submodel_names = set() - for submodel in cp.g_config.model_config.sub_models: - if submodel.name in layer_names: - submodel_names.add(submodel.name) - return submodel_names - - -def __get_submodel_data_out_links__(): - data_links = set() - for submodel in cp.g_config.model_config.sub_models: - for link in submodel.out_links: - if cp.g_layer_map[link.link_name].type == 'data': - data_links.add(link.link_name) - return data_links - - -def __get_used_evaluators__(layer_names): - evaluator_names = set() - for e in cp.g_config.model_config.evaluators: - used = True - for name in e.input_layers: - if name not in layer_names: - used = False - break - if used: - evaluator_names.add(e.name) - return evaluator_names - - -def __trim_submodel__(old_submodel, layer_names, input_layer_names, - output_layer_names, evaluator_names): - - submodel = SubModelConfig() - submodel.name = old_submodel.name - submodel.layer_names.extend( - filter(lambda x: x in layer_names, old_submodel.layer_names)) - submodel.input_layer_names.extend( - filter(lambda x: x in input_layer_names, submodel.layer_names)) - submodel.output_layer_names.extend( - filter(lambda x: x in output_layer_names, submodel.layer_names)) - submodel.evaluator_names.extend( - filter(lambda x: x in evaluator_names, old_submodel.evaluator_names)) - - submodel.is_recurrent_layer_group = old_submodel.is_recurrent_layer_group - submodel.reversed = old_submodel.reversed - - submodel.memories.extend( - filter(lambda x: x.link_name in layer_names, old_submodel.memories)) - target_inlinkid = (old_submodel.target_inlinkid - if old_submodel.HasField('target_inlinkid') else -1) - in_links = [] - for i, link in enumerate(old_submodel.in_links): - if link.link_name in layer_names or i == target_inlinkid: - in_links.append(link) - if i == target_inlinkid: - target_inlinkid = len(in_links) - 1 - submodel.in_links.extend(in_links) - - submodel.out_links.extend( - filter(lambda x: x.link_name in layer_names, old_submodel.out_links)) - if old_submodel.HasField('generator'): - submodel.generator.CopyFrom(old_submodel.generator) - - if old_submodel.HasField('target_inlinkid'): - submodel.target_inlinkid = target_inlinkid - return submodel - - -def parse_network(output_layers, extra_layers=None): - if not isinstance(output_layers, collections.Sequence): - output_layers = [output_layers] - if extra_layers is not None: - if not isinstance(extra_layers, collections.Sequence): - extra_layers = [extra_layers] - else: - extra_layers = [] - - layer_names = __get_used_layers__(list(output_layers) + list(extra_layers)) - submodel_names = __get_used_submodels__(layer_names) - submodel_names.add('root') - evaluator_names = __get_used_evaluators__(layer_names) - data_out_links = __get_submodel_data_out_links__() - input_layer_names = set() - output_layer_names = set() - - model_config = ModelConfig() - model_config.type = cp.g_config.model_config.type - - for layer in output_layers: - model_config.output_layer_names.append(layer.full_name) - output_layer_names.add(layer.full_name) - - for l in cp.g_config.model_config.layers: - if l.name not in layer_names: - continue - model_config.layers.extend([l]) - if l.type == 'data': - if l.name in data_out_links: - """ - In text generation, the outlink to save the generated word - indices is a data_layer defined in recurrent_group. This - data_layer is sure to be the output of the network in text - generation task, so this statement excludes such a special - data_layer from being inputs of the network, otherwise an error - will occur during data feeding. - """ - continue - model_config.input_layer_names.append(l.name) - input_layer_names.add(l.name) - - for e in cp.g_config.model_config.evaluators: - if e.name in evaluator_names: - model_config.evaluators.extend([e]) - - for s in cp.g_config.model_config.sub_models: - if s.name in submodel_names: - s = __trim_submodel__(s, layer_names, input_layer_names, - output_layer_names, evaluator_names) - model_config.sub_models.extend([s]) - - parameter_names = __get_used_parameters__(layer_names, - model_config.sub_models) - - for p in cp.g_config.model_config.parameters: - if p.name in parameter_names: - model_config.parameters.extend([p]) - - return model_config - - -def get_layer(name): - return config_base.__layer_map__.get(name) diff --git a/python/paddle/v2/master/.gitignore b/python/paddle/v2/master/.gitignore deleted file mode 100644 index a3ac6e1a33e74631136fc95574532284db7cd7cd..0000000000000000000000000000000000000000 --- a/python/paddle/v2/master/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.whl -*.so -*.pyc diff --git a/python/paddle/v2/master/__init__.py b/python/paddle/v2/master/__init__.py deleted file mode 100644 index efaeeabfa2652a16c510453ba672e7d0fe9debc9..0000000000000000000000000000000000000000 --- a/python/paddle/v2/master/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from client import * - -__all__ = ['client'] diff --git a/python/paddle/v2/master/client.py b/python/paddle/v2/master/client.py deleted file mode 100644 index d62e7cc28ef0f15a594949afa038bc3e8f6deccc..0000000000000000000000000000000000000000 --- a/python/paddle/v2/master/client.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import ctypes -import os - -__lib__ = None - - -def get_c_lib(): - global __lib__ - if __lib__ is None: - path = os.path.join(os.path.dirname(__file__), "libpaddle_master.so") - __lib__ = ctypes.cdll.LoadLibrary(path) - return __lib__ - - -class client(object): - """ - client is a client to the master server. - """ - - def __init__(self, etcd_endpoints, timeout_sec, buf_size=0): - self.c = get_c_lib().paddle_new_etcd_master_client( - etcd_endpoints, timeout_sec, buf_size) - - def request_save_model(self, trainer_id, block_ms): - """request to save model - - Conventionally the 0-th trainer will save model. But in - distributed training, any trainer could be killed. This - function asks the master server if the trainer should proceed - with saving model. - - :param trainer_id: trainer id. - :param block_ms: number of millisecond that other save model - will be blocked if this save model request succeeded. - - Returns: - int: 1 if the save the model request is approved, 0 if - does the request is rejected because other trainer is - saving the model, -1 if error happened. - - """ - return get_c_lib().paddle_request_save_model(self.c, trainer_id, - block_ms) - - def release(self): - get_c_lib().paddle_release_master_client(self.c) - self.c = None - - def set_dataset(self, paths): - holder_type = ctypes.c_char_p * len(paths) - holder = holder_type() - for idx, path in enumerate(paths): - c_ptr = ctypes.c_char_p(path) - holder[idx] = c_ptr - get_c_lib().paddle_set_dataset(self.c, holder, len(paths)) - - def next_record(self): - """gets next record for training - - Returns: - string: the record. - int: error code, 0 if successful, < 0 otherwise. - """ - p = ctypes.c_char_p() - ret = ctypes.pointer(p) - size = get_c_lib().paddle_next_record(self.c, ret) - if size < 0: - # Error - return None, size - - if size == 0: - # Empty record - return "", 0 - - record = ret.contents.value[:size] - # Memory created from C should be freed. - get_c_lib().mem_free(ret.contents) - return record, 0 - - def paddle_start_get_records(self, pass_id): - get_c_lib().paddle_start_get_records(self.c, pass_id) diff --git a/python/paddle/v2/minibatch.py b/python/paddle/v2/minibatch.py deleted file mode 100644 index 3c6a53db3c2287e8ef5931a06ca5dad455665ee0..0000000000000000000000000000000000000000 --- a/python/paddle/v2/minibatch.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = ['batch'] - - -def batch(reader, batch_size, drop_last=True): - """ - Create a batched reader. - - :param reader: the data reader to read from. - :type reader: callable - :param batch_size: size of each mini-batch - :type batch_size: int - :param drop_last: drop the last batch, if the size of last batch is not equal to batch_size. - :type drop_last: bool - :return: the batched reader. - :rtype: callable - """ - - def batch_reader(): - r = reader() - b = [] - for instance in r: - b.append(instance) - if len(b) == batch_size: - yield b - b = [] - if drop_last == False and len(b) != 0: - yield b - - return batch_reader diff --git a/python/paddle/v2/networks.py b/python/paddle/v2/networks.py deleted file mode 100644 index 8ae9f3b202d8c101b051c38d5850b03f54217a95..0000000000000000000000000000000000000000 --- a/python/paddle/v2/networks.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.trainer_config_helpers.networks as conf_nw -import inspect -from config_base import __convert_to_v2__ - -__all__ = [] - - -def __initialize__(): - for each_subnetwork in conf_nw.__all__: - if each_subnetwork in ['inputs', 'outputs']: - continue - func = getattr(conf_nw, each_subnetwork) - globals()[each_subnetwork] = func - globals()[each_subnetwork].__name__ = each_subnetwork - global __all__ - __all__.append(each_subnetwork) - - -__initialize__() diff --git a/python/paddle/v2/op.py b/python/paddle/v2/op.py deleted file mode 100644 index 03f3b9b9ef273613cb60c0530005e0984f904ded..0000000000000000000000000000000000000000 --- a/python/paddle/v2/op.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import layer -import activation as act -from config_base import Layer -from paddle.trainer_config_helpers.attrs import is_compatible_with -from paddle.trainer_config_helpers.default_decorators import wrap_name_default - -__all__ = [] - - -def __register_unary_math_op__(op_name, act): - def op(input, name=None): - return layer.mixed( - input=[layer.identity_projection(input=input)], name=name, act=act) - - op = wrap_name_default(op_name)(op) - op.__doc__ = type(act).__doc__ - globals()[op_name] = op - __all__.append(op_name) - - -__register_unary_math_op__('exp', act.Exp()) -__register_unary_math_op__('log', act.Log()) -__register_unary_math_op__('abs', act.Abs()) -__register_unary_math_op__('sigmoid', act.Sigmoid()) -__register_unary_math_op__('tanh', act.Tanh()) -__register_unary_math_op__('square', act.Square()) -__register_unary_math_op__('relu', act.Relu()) -__register_unary_math_op__('sqrt', act.Sqrt()) -__register_unary_math_op__('reciprocal', act.Reciprocal()) -__register_unary_math_op__('softmax', act.Softmax()) - - -def __add__(layeroutput, other): - if is_compatible_with(other, float): - return layer.slope_intercept(input=layeroutput, intercept=other) - if not isinstance(other, Layer): - raise TypeError("Layer can only be added with" - " another Layer or a number") - if layeroutput.size == other.size: - return layer.mixed(input=[ - layer.identity_projection(input=layeroutput), - layer.identity_projection(input=other) - ]) - if other.size != 1 and layeroutput.size != 1: - raise TypeError("Two Layer can be added only if they have equal size" - " or one of their sizes is 1. sizes are %s and %s" % - (layeroutput.size, other.size)) - elif layeroutput.size == 1: - tmp = layeroutput - layeroutput = other - other = tmp - other = layer.repeat(other, layeroutput.size) - return layer.mixed(input=[ - layer.identity_projection(input=layeroutput), - layer.identity_projection(input=other) - ]) - - -Layer.__radd__ = __add__ -Layer.__add__ = __add__ - - -def __neg__(layeroutput): - return layer.slope_intercept(input=layeroutput, slope=-1.0) - - -Layer.__neg__ = __neg__ - - -def __sub__(layeroutput, other): - if is_compatible_with(other, float): - return layer.slope_intercept(input=layeroutput, intercept=other) - if not isinstance(other, Layer): - raise TypeError("Layer can only be subtracted with" - " another Layeroutput or a number") - return __add__(layeroutput, -other) - - -Layer.__sub__ = __sub__ - - -def __rsub__(layeroutput, other): - neg = layer.slope_intercept(input=layeroutput, slope=-1.0) - return __add__(neg, other) - - -Layer.__rsub__ = __rsub__ - - -def __mul__(layeroutput, other): - if is_compatible_with(other, float): - return layer.slope_intercept(input=layeroutput, slope=other) - if not isinstance(other, Layer): - raise TypeError("Layer can only be multiplied with" - " another Layer or a number") - elif layeroutput.size == 1: - return layer.scaling(input=other, weight=layeroutput) - elif other.size == 1: - return layer.scaling(input=layeroutput, weight=other) - else: - raise TypeError("At least one of the operand of '*' must be a number" - " or a Layer with size=1") - - -Layer.__mul__ = __mul__ -Layer.__rmul__ = __mul__ diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py deleted file mode 100644 index caef5f484e2d629f2298ced457e89ff93a536311..0000000000000000000000000000000000000000 --- a/python/paddle/v2/optimizer.py +++ /dev/null @@ -1,297 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils -import paddle.trainer_config_helpers.optimizers as v1_optimizers -from paddle.proto.OptimizerConfig_pb2 import OptimizerConfig - -__all__ = [ - 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', - 'RMSProp', 'ModelAverage', 'L2Regularization' -] - - -class Optimizer(object): - def __init__(self, **kwargs): - import py_paddle.swig_paddle as swig_api - if 'batch_size' in kwargs: - del kwargs['batch_size'] # not important for python library. - - def __impl__(): - v1_optimizers.settings(batch_size=1, **kwargs) - - self.__opt_conf_proto__ = config_parser_utils.parse_optimizer_config( - __impl__) - self.__opt_conf__ = swig_api.OptimizationConfig.createFromProto( - self.__opt_conf_proto__) - - def enable_types(self): - """ - get enable_types for each optimizer. - enable_types = [value, gradient, momentum, etc] - For each optimizer(SGD, Adam), GradientMachine should enable different - buffers. - """ - import py_paddle.swig_paddle as swig_api - tmp = swig_api.ParameterOptimizer.create(self.__opt_conf__) - assert isinstance(tmp, swig_api.ParameterOptimizer) - return tmp.getParameterTypes() - - def __create_local_updater__(self): - import py_paddle.swig_paddle as swig_api - return swig_api.ParameterUpdater.createLocalUpdater(self.__opt_conf__) - - def __create_remote_updater__(self, pass_num, use_sparse_updater): - import py_paddle.swig_paddle as swig_api - return swig_api.ParameterUpdater.createRemoteUpdater( - self.__opt_conf__, pass_num, use_sparse_updater) - - def __create_new_remote_updater__(self, pserver_spec, use_etcd): - import py_paddle.swig_paddle as swig_api - return swig_api.ParameterUpdater.createNewRemoteUpdater( - self.__opt_conf__, pserver_spec, use_etcd) - - def create_updater(self, is_local, num_passes, use_sparse_updater, - pserver_spec, use_etcd): - """ - create proper parameter_updater by configuration. - :param is_local: create local or remote parameter updater - :param num_passes: remote parameter updater will use this to config - parameter server. - :param use_sparse_updater: when use remote updater, if some parameter is - sparse, updater should do some extra thing: - - .. code-block:: python - - if use_sparse_remote_updater: - gradient_machine.prefetch(in_args) - parameter_updater.getParametersRemote() - - :param pserver_spec: pserver location, eg: localhost:3000, if use etcd, - pserver_spec should be the etcd endpoints, eg: http://localhost:2379 - :return: parameter_updater - """ - if is_local: - parameter_updater = self.__create_local_updater__() - else: - if pserver_spec is None: - parameter_updater = self.__create_remote_updater__( - num_passes, use_sparse_updater) - else: - parameter_updater = self.__create_new_remote_updater__( - pserver_spec, use_etcd) - return parameter_updater - - -class Momentum(Optimizer): - """ - Momentum Optimizer. - - When sparse=False, the momentum update formula is as follows: - - .. math:: - - v_{t} &= k * v_{t-1} - \\gamma_t (g_{t} + \\lambda w_{t-1}) \\\\ - w_{t} &= w_{t-1} + v_{t} \\\\ - - where, :math:`k` is momentum, :math:`\\lambda` is decay rate, - :math:`\\gamma_t` is learning rate at the t'th iteration. - :math:`w_{t}` is the weight as the t'th iteration. - And the :math:`v_{t}` is the history momentum variable. - - When sparse=True, the update scheme: - - .. math:: - - \\alpha_t &= \\alpha_{t-1} / k \\\\ - \\beta_t &= \\beta_{t-1} / (1 + \\lambda \\gamma_t) \\\\ - u_t &= u_{t-1} - \\alpha_t \\gamma_t g_t \\\\ - v_t &= v_{t-1} + \\tau_{t-1} \\alpha_t \\gamma_t g_t \\\\ - \\tau_t &= \\tau_{t-1} + \\beta_t / \\alpha_t - - where :math:`k` is momentum, :math:`\\lambda` is decay rate, - :math:`\\gamma_t` is learning rate at the t'th iteration. - - :param momentum: the momentum factor. - :type momentum: float - :param sparse: with sparse support or not, False by default. - :type sparse: bool - """ - - def __init__(self, momentum=None, sparse=False, **kwargs): - learning_method = v1_optimizers.MomentumOptimizer( - momentum=momentum, sparse=sparse) - super(Momentum, self).__init__( - learning_method=learning_method, **kwargs) - - -class Adam(Optimizer): - """ - Adam optimizer. - The details of please refer `Adam: A Method for Stochastic Optimization - `_ - - .. math:: - - m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\ - v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\ - w & = w - \\frac{\\eta m(w, t)}{\\sqrt{v(w,t) + \\epsilon}} - - :param beta1: the :math:`\\beta_1` in equation. - :type beta1: float - :param beta2: the :math:`\\beta_2` in equation. - :type beta2: float - :param epsilon: the :math:`\\epsilon` in equation. It is used to prevent - divided by zero. - :type epsilon: float - """ - - def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs): - learning_method = v1_optimizers.AdamOptimizer( - beta1=beta1, beta2=beta2, epsilon=epsilon) - super(Adam, self).__init__(learning_method=learning_method, **kwargs) - - -class Adamax(Optimizer): - """ - Adamax optimizer. - - The details of please refer this `Adam: A Method for Stochastic Optimization - `_ - - .. math:: - - m_t & = \\beta_1 * m_{t-1} + (1-\\beta_1)* \\nabla Q_i(w) \\\\ - u_t & = max(\\beta_2*u_{t-1}, abs(\\nabla Q_i(w))) \\\\ - w_t & = w_{t-1} - (\\eta/(1-\\beta_1^t))*m_t/u_t - - :param beta1: the :math:`\\beta_1` in the equation. - :type beta1: float - :param beta2: the :math:`\\beta_2` in the equation. - :type beta2: float - """ - - def __init__(self, beta1=0.9, beta2=0.999, **kwargs): - learning_method = v1_optimizers.AdamaxOptimizer( - beta1=beta1, beta2=beta2) - super(Adamax, self).__init__(learning_method=learning_method, **kwargs) - - -class AdaGrad(Optimizer): - """ - Adagrad(for ADAptive GRAdient algorithm) optimizer. - - For details please refer this `Adaptive Subgradient Methods for - Online Learning and Stochastic Optimization - `_. - - .. math:: - - G &= \\sum_{\\tau=1}^{t} g_{\\tau} g_{\\tau}^T \\\\ - w & = w - \\eta diag(G)^{-\\frac{1}{2}} \\circ g - """ - - def __init__(self, **kwargs): - learning_method = v1_optimizers.AdaGradOptimizer() - super(AdaGrad, self).__init__(learning_method=learning_method, **kwargs) - - -class DecayedAdaGrad(Optimizer): - """ - AdaGrad method with decayed sum gradients. The equations of this method - show as follow. - - .. math:: - - E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\ - learning\\_rate &= 1/sqrt( ( E(g_t^2) + \\epsilon ) - - :param rho: The :math:`\\rho` parameter in that equation - :type rho: float - :param epsilon: The :math:`\\epsilon` parameter in that equation. - :type epsilon: float - """ - - def __init__(self, rho=0.95, epsilon=1e-06, **kwargs): - learning_method = v1_optimizers.DecayedAdaGradOptimizer( - rho=rho, epsilon=epsilon) - super(DecayedAdaGrad, self).__init__( - learning_method=learning_method, **kwargs) - - -class AdaDelta(Optimizer): - """ - AdaDelta method. The details of adadelta please refer to this - `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD - `_. - - .. math:: - - E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\ - learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\ - E(g_t^2) + \\epsilon ) ) \\\\ - E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2 - - :param rho: :math:`\\rho` in equation - :type rho: float - :param epsilon: :math:`\\rho` in equation - :type epsilon: float - """ - - def __init__(self, rho=0.95, epsilon=1e-06, **kwargs): - learning_method = v1_optimizers.AdaDeltaOptimizer( - rho=rho, epsilon=epsilon) - super(AdaDelta, self).__init__( - learning_method=learning_method, **kwargs) - - -class RMSProp(Optimizer): - """ - RMSProp(for Root Mean Square Propagation) optimizer. For details please - refer this `slide `_. - - The equations of this method as follows: - - .. math:: - - v(w, t) & = \\rho v(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\ - w & = w - \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w) - - :param rho: the :math:`\\rho` in the equation. The forgetting factor. - :type rho: float - :param epsilon: the :math:`\\epsilon` in the equation. - :type epsilon: float - """ - - def __init__(self, rho=0.95, epsilon=1e-6, **kwargs): - learning_method = v1_optimizers.RMSPropOptimizer( - rho=rho, epsilon=epsilon) - super(RMSProp, self).__init__(learning_method=learning_method, **kwargs) - - -ModelAverage = v1_optimizers.ModelAverage -L2Regularization = v1_optimizers.L2Regularization - -if __name__ == '__main__': - import py_paddle.swig_paddle as swig_api - swig_api.initPaddle('--use_gpu=false') - for opt in [ - Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(), - AdaDelta(), RMSProp(), Adam( - model_average=ModelAverage(average_window=0.5), - regularization=L2Regularization(rate=0.5), - gradient_clipping_threshold=25) - ]: - print opt, opt.enable_types() diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py deleted file mode 100644 index 7b7d1a1d1672802e0e91a857100604758683224e..0000000000000000000000000000000000000000 --- a/python/paddle/v2/parameters.py +++ /dev/null @@ -1,441 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -from paddle.proto.ParameterConfig_pb2 import ParameterConfig -from collections import OrderedDict -import paddle.trainer.config_parser as cp -import struct -import tarfile -import cStringIO -from topology import Topology - -__all__ = ['Parameters', 'create'] - - -def create(layers): - """ - Create parameter pool by topology. - - :param layers: - :return: - """ - topology = Topology(layers) - pool = Parameters() - initializers = cp.g_parameter_initializer_map - for param in topology.proto().parameters: - pool.__append_config__(param) - if param.name in initializers: - pool[param.name] = initializers[param.name](param.name) - return pool - - -class Parameters(object): - """ - `Parameters` manages all the learnable parameters in a neural network. - It stores parameters' information in an OrderedDict. The key is - the name of a parameter, and value is a parameter's configuration(in - protobuf format), such as initialization mean and std, its size, whether it - is a static parameter, and so on. - - :param __param_conf__: store the configurations of learnable parameters in - the network in an OrderedDict. Parameter is added one by one into the - dict by following their created order in the network: parameters of - the previous layers in a network are careted first. You can visit the - parameters from bottom to top by iterating over this dict. - :type __param_conf__: OrderedDict - :param __gradient_machines__: all of the parameters in a neural network are - appended to a PaddlePaddle gradient machine, which is used internally to - copy parameter values between C++ and Python end. - :type __gradient_machines__: list - :param __tmp_params__: a dict to store dummy parameters if no - __gradient_machines__ is appended to `Parameters`. - :type __tmp_params__: dict - - Basically usage is - - .. code-block:: python - - data = paddle.layers.data(...) - ... - out = paddle.layers.fc(...) - - parameters = paddle.parameters.create(out) - - parameter_names = parameters.names() - fc_mat = parameters.get('fc') - print fc_mat - """ - - def __init__(self): - self.__param_conf__ = OrderedDict() - self.__gradient_machines__ = [] - self.__tmp_params__ = dict() - - def __append_config__(self, param_conf): - """ - Append a parameter configuration. It used to initialize Parameters and - should be invoked only in paddle.parameters.create - - :param param_conf: The parameter configuration in protobuf - :type param_conf: ParameterConfig - :return: Nothing - """ - - if not isinstance(param_conf, ParameterConfig): - raise ValueError("param_conf must be paddle.proto.ParameterConfig") - - if param_conf.name in self.__param_conf__: - raise ValueError("duplicated parameter %s" % param_conf.name) - - self.__param_conf__[param_conf.name] = param_conf - - def update_param_conf(self, model_config): - for p in model_config.parameters: - self.__param_conf__[p.name] = p - - def keys(self): - """ - keys are the names of each parameter. - - :return: list of parameter name - :rtype: list - """ - return self.__param_conf__.keys() - - def names(self): - """ - names of each parameter. - - :return: list of parameter name - :rtype: list - """ - return self.keys() - - def has_key(self, key): - """ - has_key return true if there are such parameter name == key - - :param key: Parameter name - :type key: basestring - :return: True if contains such key - """ - return key in self.__param_conf__.keys() - - def __iter__(self): - """ - Return an iterator of parameter name. It is used by `for loop` - or `in` operator. - - .. code-block:: python - - parameters = paddle.parameters.create(...) - if "fc_param" in parameters: - print 'OK' - :return: an iterator of parameter name - :rtype: iterator - """ - return iter(self.__param_conf__) - - def __getter_inner(self, key, param_type): - import py_paddle.swig_paddle as api - shape = self.get_shape(key) - - if len(self.__gradient_machines__) == 0: - # create new parameter in python numpy. - if key in self.__tmp_params__: - return self.__tmp_params__[key] - else: - return np.ndarray(shape=shape, dtype=np.float32) - else: - for each_gradient_machine in self.__gradient_machines__: - param = __get_parameter_in_gradient_machine__( - each_gradient_machine, key) - # for simplify implementation now, we always copy from C++ - assert isinstance(param, api.Parameter) - val = param.getBuf(param_type) - assert isinstance(val, api.Vector) - val = val.copyToNumpyArray() - return val - # else continue - - raise RuntimeError("Unexpected branch") - - def __getitem__(self, key): - """ - Get parameter by parameter name. It uses Python dict syntax. - - :note: It will always copy the parameter from C++ side. - :param key: Parameter name - :type key: basestring - :return: parameter value - :rtype: np.ndarray - """ - import py_paddle.swig_paddle as api - return self.__getter_inner(key, api.PARAMETER_VALUE) - - def get_shape(self, key): - """ - get shape of the parameter. - - :param key: parameter name - :type key: basestring - :return: parameter's shape - :rtype: tuple - """ - if not isinstance(key, basestring): - raise ValueError("parameter name should be string") - if not self.has_key(key): - raise ValueError("No such parameter %s" % key) - conf = self.__param_conf__[key] - dims = conf.dims if conf.dims else (1, conf.size) - return tuple(map(int, dims)) - - def __setitem__(self, key, value): - """ - Set parameter by parameter name & value. It use Python dict syntax. - - :note: It will always copy the parameter to C++ side. - :param key: Parameter name - :type key: basestring - :param value: Parameter matrix. - :type value: np.ndarray - :return: Nothing - """ - - if not isinstance(value, np.ndarray): - raise ValueError("Must return ndarray") - value = value.astype(dtype=np.float32) - shape = self.get_shape(key) - if value.shape != shape: - raise ValueError("Value shape mismatch, expect %s, should %s" % - (shape, value.shape)) - - if len(self.__gradient_machines__) == 0: - self.__tmp_params__[key] = value - else: - for each_gradient_machine in self.__gradient_machines__: - __copy_parameter_to_gradient_machine__(each_gradient_machine, - key, value) - - def get(self, parameter_name): - """ - Get parameter by parameter name. - - :note: It will always copy the parameter from C++ side. - :param parameter_name: parameter name - :type parameter_name: basestring - :return: The parameter matrix. - :rtype: np.ndarray - """ - return self.__getitem__(key=parameter_name) - - def get_grad(self, key): - """ - Get grandient by parameter name. - - :note: It will always copy the parameter from C++ side. - :param key: parameter name - :type key: basestring - :return: The grandient matrix. - :rtype: np.ndarray - """ - import py_paddle.swig_paddle as api - if self.__param_conf__[key].is_static: - return np.zeros(self.__param_conf__[key].size, dtype=np.float32) - - return self.__getter_inner(key, api.PARAMETER_GRADIENT) - - def set(self, parameter_name, value): - """ - Set parameter by parameter name & matrix. - - :param parameter_name: parameter name - :type parameter_name: basestring - :param value: parameter matrix - :type value: np.ndarray - :return: Nothing. - """ - self.__setitem__(key=parameter_name, value=value) - - def append_gradient_machine(self, gradient_machine): - """ - append gradient machine to parameters. This method is used internally in - Trainer.train. - - :param gradient_machine: PaddlePaddle C++ GradientMachine object. - :type gradient_machine: api.GradientMachine - :return: - """ - import py_paddle.swig_paddle as api - if not isinstance(gradient_machine, api.GradientMachine): - raise ValueError("gradient_machine should be api.GradientMachine") - - if len(self.__tmp_params__) != 0: - for name, val in self.__tmp_params__.iteritems(): - try: - __copy_parameter_to_gradient_machine__(gradient_machine, - name, val) - except ValueError: - # If no such parameter in gradient machine, then don't copy - pass - - self.__gradient_machines__.append(gradient_machine) - - def serialize(self, name, f): - """ - - :param name: - :param f: - :type f: file - :return: - """ - param = self.get(name) - size = reduce(lambda a, b: a * b, param.shape) - f.write(struct.pack("IIQ", 0, 4, size)) - param = param.astype(np.float32) - s = param.tostring() - wrote_size = 0 - buf = buffer(s, wrote_size, 65535) - while buf: # f.write crashes with big data blog. - f.write(buf) - wrote_size += 65535 - buf = buffer(s, wrote_size, 65535) - - def deserialize(self, name, f): - """ - - :param name: - :param f: - :type f: file - :return: - """ - f.read(16) # header - arr = np.frombuffer(f.read(), dtype=np.float32) - self.set(name, arr.reshape(self.get_shape(name))) - - def to_tar(self, f): - """ - Save parameters to a tar file. - - WARNING: You should use `paddle.v2.trainer.SGD.save_parameter_to_tar(f)` - to save parameters most of the time. Otherwise, some settings such - as model average will not take effect. - - :param f: - :type f: file - :return: - """ - tar = tarfile.TarFile(fileobj=f, mode='w') - for nm in self.names(): - buf = cStringIO.StringIO() - self.serialize(nm, buf) - tarinfo = tarfile.TarInfo(name=nm) - buf.seek(0) - tarinfo.size = len(buf.getvalue()) - tar.addfile(tarinfo, buf) - - conf = self.__param_conf__[nm] - confStr = conf.SerializeToString() - tarinfo = tarfile.TarInfo(name="%s.protobuf" % nm) - tarinfo.size = len(confStr) - buf = cStringIO.StringIO(confStr) - buf.seek(0) - tar.addfile(tarinfo, fileobj=buf) - - @staticmethod - def from_tar(f): - """ - Create a `Parameters` object from the given file. And - the `Parameters` only contains the parameters in this - file. It is adapted the parameters are same in the - defined network and the given file. For example, it - can be used in the inference. - - :param f: the initialized model file. - :type f: tar file - :return: A Parameters object. - :rtype: Parameters. - """ - params = Parameters() - tar = tarfile.TarFile(fileobj=f, mode='r') - for finfo in tar: - assert isinstance(finfo, tarfile.TarInfo) - if finfo.name.endswith('.protobuf'): - f = tar.extractfile(finfo) - conf = ParameterConfig() - conf.ParseFromString(f.read()) - params.__append_config__(conf) - - for param_name in params.names(): - f = tar.extractfile(param_name) - params.deserialize(param_name, f) - return params - - def init_from_tar(self, f, exclude_params=[]): - """ - Different from `from_tar`, this interface can be used to - init partial network parameters from another saved model. - - :param f: the initialized model file. - :type f: tar file - :param exclude_params: the names of parameters that should - not be initialized from the model file. - :type exclude_params: list of strings - :return: Nothing. - """ - - tar_param = Parameters.from_tar(f) - for pname in tar_param.names(): - if pname in self.names() and pname not in exclude_params: - self.set(pname, tar_param.get(pname)) - - -def __get_parameter_in_gradient_machine__(gradient_machine, name): - """ - - :param gradient_machine: - :type gradient_machine: api.GradientMachine - :param name: - :return: - :rtype: api.Parameter - """ - params = filter(lambda p: p.getName() == name, - gradient_machine.getParameters()) - - if len(params) == 0: - raise ValueError("No such parameter") - elif len(params) > 1: - raise ValueError("Unexpected branch") - else: - return params[0] - - -def __copy_parameter_to_gradient_machine__(gradient_machine, name, arr): - """ - Copy a python ndarray into the gradient machine. - - :param gradient_machine: - :type gradient_machine: api.GradientMachine - :param name: - :param arr: - :type arr: np.ndarray - :return: - :rtype: api.Parameter - """ - import py_paddle.swig_paddle as api - param = __get_parameter_in_gradient_machine__(gradient_machine, name) - vec = param.getBuf(api.PARAMETER_VALUE) - assert isinstance(vec, api.Vector) - vec.copyFromNumpyArray(arr.flatten()) diff --git a/python/paddle/v2/plot/__init__.py b/python/paddle/v2/plot/__init__.py deleted file mode 100644 index acd3013db4e6a57cd1b269266bea82a31e928397..0000000000000000000000000000000000000000 --- a/python/paddle/v2/plot/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from plot import Ploter - -__all__ = ['Ploter'] diff --git a/python/paddle/v2/plot/plot.py b/python/paddle/v2/plot/plot.py deleted file mode 100644 index c18e63dd5f60481ba804738a6a9238dfea35d9f3..0000000000000000000000000000000000000000 --- a/python/paddle/v2/plot/plot.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - - -class PlotData(object): - def __init__(self): - self.step = [] - self.value = [] - - def append(self, step, value): - self.step.append(step) - self.value.append(value) - - def reset(self): - self.step = [] - self.value = [] - - -class Ploter(object): - def __init__(self, *args): - self.__args__ = args - self.__plot_data__ = {} - for title in args: - self.__plot_data__[title] = PlotData() - # demo in notebooks will use Ploter to plot figure, but when we convert - # the ipydb to py file for testing, the import of matplotlib will make the - # script crash. So we can use `export DISABLE_PLOT=True` to disable import - # these libs - self.__disable_plot__ = os.environ.get("DISABLE_PLOT") - if not self.__plot_is_disabled__(): - import matplotlib.pyplot as plt - from IPython import display - self.plt = plt - self.display = display - - def __plot_is_disabled__(self): - return self.__disable_plot__ == "True" - - def append(self, title, step, value): - assert isinstance(title, basestring) - assert self.__plot_data__.has_key(title) - data = self.__plot_data__[title] - assert isinstance(data, PlotData) - data.append(step, value) - - def plot(self, path=None): - if self.__plot_is_disabled__(): - return - - titles = [] - for title in self.__args__: - data = self.__plot_data__[title] - assert isinstance(data, PlotData) - if len(data.step) > 0: - titles.append(title) - self.plt.plot(data.step, data.value) - self.plt.legend(titles, loc='upper left') - if path is None: - self.display.clear_output(wait=True) - self.display.display(self.plt.gcf()) - else: - self.plt.savefig(path) - self.plt.gcf().clear() - - def reset(self): - for key in self.__plot_data__: - data = self.__plot_data__[key] - assert isinstance(data, PlotData) - data.reset() diff --git a/python/paddle/v2/plot/tests/CMakeLists.txt b/python/paddle/v2/plot/tests/CMakeLists.txt deleted file mode 100644 index 4b6c1c80969182ccf6e0189b18bade8758bbbc30..0000000000000000000000000000000000000000 --- a/python/paddle/v2/plot/tests/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -if (NOT APPLE) - # The Mac OS X backend will not be able to function correctly if Python is - # not installed as a framework. - py_test(test_ploter SRCS test_ploter.py) -endif() diff --git a/python/paddle/v2/plot/tests/__init__.py b/python/paddle/v2/plot/tests/__init__.py deleted file mode 100644 index d1abfc08f19505a9010e924e34074e5bc3cc0571..0000000000000000000000000000000000000000 --- a/python/paddle/v2/plot/tests/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import test_ploter - -__all__ = ['test_ploter.py'] diff --git a/python/paddle/v2/plot/tests/test_ploter.py b/python/paddle/v2/plot/tests/test_ploter.py deleted file mode 100644 index a75f853ed933dfce651faf758f71feca7cd8d328..0000000000000000000000000000000000000000 --- a/python/paddle/v2/plot/tests/test_ploter.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -from paddle.v2.plot import Ploter - - -class TestCommon(unittest.TestCase): - def test_append(self): - title1 = "title1" - title2 = "title2" - plot_test = Ploter(title1, title2) - plot_test.append(title1, 1, 2) - plot_test.append(title1, 2, 5) - plot_test.append(title2, 3, 4) - self.assertEqual(plot_test.__plot_data__[title1].step, [1, 2]) - self.assertEqual(plot_test.__plot_data__[title1].value, [2, 5]) - self.assertEqual(plot_test.__plot_data__[title2].step, [3]) - self.assertEqual(plot_test.__plot_data__[title2].value, [4]) - plot_test.reset() - self.assertEqual(plot_test.__plot_data__[title1].step, []) - self.assertEqual(plot_test.__plot_data__[title1].value, []) - self.assertEqual(plot_test.__plot_data__[title2].step, []) - self.assertEqual(plot_test.__plot_data__[title2].value, []) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/pooling.py b/python/paddle/v2/pooling.py deleted file mode 100644 index 4881c27d1d6d3d926f12aab096f377164debf1ef..0000000000000000000000000000000000000000 --- a/python/paddle/v2/pooling.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.trainer_config_helpers.poolings -import copy - -__all__ = [] -suffix = 'Pooling' - -for name in paddle.trainer_config_helpers.poolings.__all__: - new_name = name[:-len(suffix)] - globals()[new_name] = copy.copy( - getattr(paddle.trainer_config_helpers.poolings, name)) - globals()[new_name].__name__ = new_name - __all__.append(new_name) diff --git a/python/paddle/v2/reader/__init__.py b/python/paddle/v2/reader/__init__.py deleted file mode 100644 index 12efdc4a0fec83fed57bdcbf687aaec69d13ba91..0000000000000000000000000000000000000000 --- a/python/paddle/v2/reader/__init__.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -At training and testing time, PaddlePaddle programs need to read data. To ease -the users' work to write data reading code, we define that - -- A *reader* is a function that reads data (from file, network, random number - generator, etc) and yields data items. -- A *reader creator* is a function that returns a reader function. -- A *reader decorator* is a function, which accepts one or more readers, and - returns a reader. -- A *batch reader* is a function that reads data (from *reader*, file, network, - random number generator, etc) and yields a batch of data items. - -##################### -Data Reader Interface -##################### - -Indeed, *data reader* doesn't have to be a function that reads and yields data -items. It can be any function with no parameter that creates a iterable -(anything can be used in :code:`for x in iterable`)\: - -.. code-block:: python - - iterable = data_reader() - -Element produced from the iterable should be a **single** entry of data, -**not** a mini batch. That entry of data could be a single item, or a tuple of -items. -Item should be of `supported type `_ (e.g., numpy 1d -array of float32, int, list of int) - -An example implementation for single item data reader creator: - -.. code-block:: python - - def reader_creator_random_image(width, height): - def reader(): - while True: - yield numpy.random.uniform(-1, 1, size=width*height) - return reader - -An example implementation for multiple item data reader creator: - -.. code-block:: python - - def reader_creator_random_image_and_label(width, height, label): - def reader(): - while True: - yield numpy.random.uniform(-1, 1, size=width*height), label - return reader - - -TODO(yuyang18): Should we add whole design doc here? -""" - -import decorator -from decorator import * - -import creator - -__all__ = decorator.__all__ + ['creator'] diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py deleted file mode 100644 index fda5246d74f598200b439774a25e80ec3e504077..0000000000000000000000000000000000000000 --- a/python/paddle/v2/reader/creator.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Creator package contains some simple reader creator, which could -be used in user program. -""" - -__all__ = ['np_array', 'text_file', 'recordio', 'cloud_reader'] - - -def np_array(x): - """ - Creates a reader that yields elements of x, if it is a - numpy vector. Or rows of x, if it is a numpy matrix. - Or any sub-hyperplane indexed by the highest dimension. - - :param x: the numpy array to create reader from. - :returns: data reader created from x. - """ - - def reader(): - if x.ndim < 1: - yield x - - for e in x: - yield e - - return reader - - -def text_file(path): - """ - Creates a data reader that outputs text line by line from given text file. - Trailing new line ('\\\\n') of each line will be removed. - - :path: path of the text file. - :returns: data reader of text file - """ - - def reader(): - f = open(path, "r") - for l in f: - yield l.rstrip('\n') - f.close() - - return reader - - -def recordio(paths, buf_size=100): - """ - Creates a data reader from given RecordIO file paths separated by ",", - glob pattern is supported. - :path: path of recordio files, can be a string or a string list. - :returns: data reader of recordio files. - """ - - import recordio as rec - import paddle.v2.reader.decorator as dec - import cPickle as pickle - - def reader(): - if isinstance(paths, basestring): - path = paths - else: - path = ",".join(paths) - f = rec.reader(path) - while True: - r = f.read() - if r is None: - break - yield pickle.loads(r) - f.close() - - return dec.buffered(reader, buf_size) - - -pass_num = 0 - - -def cloud_reader(paths, etcd_endpoints, timeout_sec=5, buf_size=64): - """ - Create a data reader that yield a record one by one from - the paths: - :paths: path of recordio files, can be a string or a string list. - :etcd_endpoints: the endpoints for etcd cluster - :returns: data reader of recordio files. - - .. code-block:: python - from paddle.v2.reader.creator import cloud_reader - etcd_endpoints = "http://127.0.0.1:2379" - trainer.train.( - reader=cloud_reader(["/work/dataset/uci_housing/uci_housing*"], etcd_endpoints), - ) - """ - import os - import cPickle as pickle - import paddle.v2.master as master - c = master.client(etcd_endpoints, timeout_sec, buf_size) - - if isinstance(paths, basestring): - path = [paths] - else: - path = paths - c.set_dataset(path) - - def reader(): - global pass_num - c.paddle_start_get_records(pass_num) - pass_num += 1 - - while True: - r, e = c.next_record() - if not r: - if e != -2: - print "get record error: ", e - break - yield pickle.loads(r) - - return reader diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py deleted file mode 100644 index 44a6e344630bb35d28ee29078bf8727053a24bef..0000000000000000000000000000000000000000 --- a/python/paddle/v2/reader/decorator.py +++ /dev/null @@ -1,405 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = [ - 'map_readers', 'buffered', 'compose', 'chain', 'shuffle', - 'ComposeNotAligned', 'firstn', 'xmap_readers', 'PipeReader' -] - -from threading import Thread -import subprocess - -from Queue import Queue -import itertools -import random -import zlib - - -def map_readers(func, *readers): - """ - Creates a data reader that outputs return value of function using - output of each data readers as arguments. - - :param func: function to use. The type of func should be (Sample) => Sample - :type: callable - :param readers: readers whose outputs will be used as arguments of func. - :return: the created data reader. - :rtype: callable - """ - - def reader(): - rs = [] - for r in readers: - rs.append(r()) - for e in itertools.imap(func, *rs): - yield e - - return reader - - -def shuffle(reader, buf_size): - """ - Creates a data reader whose data output is shuffled. - - Output from the iterator that created by original reader will be - buffered into shuffle buffer, and then shuffled. The size of shuffle buffer - is determined by argument buf_size. - - :param reader: the original reader whose output will be shuffled. - :type reader: callable - :param buf_size: shuffle buffer size. - :type buf_size: int - - :return: the new reader whose output is shuffled. - :rtype: callable - """ - - def data_reader(): - buf = [] - for e in reader(): - buf.append(e) - if len(buf) >= buf_size: - random.shuffle(buf) - for b in buf: - yield b - buf = [] - - if len(buf) > 0: - random.shuffle(buf) - for b in buf: - yield b - - return data_reader - - -def chain(*readers): - """ - Creates a data reader whose output is the outputs of input data - readers chained together. - - If input readers output following data entries: - [0, 0, 0] - [1, 1, 1] - [2, 2, 2] - The chained reader will output: - [0, 0, 0, 1, 1, 1, 2, 2, 2] - - :param readers: input readers. - :return: the new data reader. - :rtype: callable - """ - - def reader(): - rs = [] - for r in readers: - rs.append(r()) - - for e in itertools.chain(*rs): - yield e - - return reader - - -class ComposeNotAligned(ValueError): - pass - - -def compose(*readers, **kwargs): - """ - Creates a data reader whose output is the combination of input readers. - - If input readers output following data entries: - (1, 2) 3 (4, 5) - The composed reader will output: - (1, 2, 3, 4, 5) - - :param readers: readers that will be composed together. - :param check_alignment: if True, will check if input readers are aligned - correctly. If False, will not check alignment and trailing outputs - will be discarded. Defaults to True. - :type check_alignment: bool - - :return: the new data reader. - - :raises ComposeNotAligned: outputs of readers are not aligned. - Will not raise when check_alignment is set to False. - """ - check_alignment = kwargs.pop('check_alignment', True) - - def make_tuple(x): - if isinstance(x, tuple): - return x - else: - return (x, ) - - def reader(): - rs = [] - for r in readers: - rs.append(r()) - if not check_alignment: - for outputs in itertools.izip(*rs): - yield sum(map(make_tuple, outputs), ()) - else: - for outputs in itertools.izip_longest(*rs): - for o in outputs: - if o is None: - # None will be not be present if compose is aligned - raise ComposeNotAligned( - "outputs of readers are not aligned.") - yield sum(map(make_tuple, outputs), ()) - - return reader - - -def buffered(reader, size): - """ - Creates a buffered data reader. - - The buffered data reader will read and save data entries into a - buffer. Reading from the buffered data reader will proceed as long - as the buffer is not empty. - - :param reader: the data reader to read from. - :type reader: callable - :param size: max buffer size. - :type size: int - - :returns: the buffered data reader. - """ - - class EndSignal(): - pass - - end = EndSignal() - - def read_worker(r, q): - for d in r: - q.put(d) - q.put(end) - - def data_reader(): - r = reader() - q = Queue(maxsize=size) - t = Thread( - target=read_worker, args=( - r, - q, )) - t.daemon = True - t.start() - e = q.get() - while e != end: - yield e - e = q.get() - - return data_reader - - -def firstn(reader, n): - """ - Limit the max number of samples that reader could return. - - :param reader: the data reader to read from. - :type reader: callable - :param n: the max number of samples that return. - :type n: int - :return: the decorated reader. - :rtype: callable - """ - - # TODO(yuyang18): Check if just drop the reader, could clean the opened - # resource or not? - - def firstn_reader(): - for i, item in enumerate(reader()): - if i == n: - break - yield item - - return firstn_reader - - -class XmapEndSignal(): - pass - - -def xmap_readers(mapper, reader, process_num, buffer_size, order=False): - """ - Use multiprocess to map samples from reader by a mapper defined by user. - And this function contains a buffered decorator. - :param mapper: a function to map sample. - :type mapper: callable - :param reader: the data reader to read from - :type reader: callable - :param process_num: process number to handle original sample - :type process_num: int - :param buffer_size: max buffer size - :type buffer_size: int - :param order: keep the order of reader - :type order: bool - :return: the decarated reader - :rtype: callable - """ - end = XmapEndSignal() - - # define a worker to read samples from reader to in_queue - def read_worker(reader, in_queue): - for i in reader(): - in_queue.put(i) - in_queue.put(end) - - # define a worker to read samples from reader to in_queue with order flag - def order_read_worker(reader, in_queue): - in_order = 0 - for i in reader(): - in_queue.put((in_order, i)) - in_order += 1 - in_queue.put(end) - - # define a worker to handle samples from in_queue by mapper - # and put mapped samples into out_queue - def handle_worker(in_queue, out_queue, mapper): - sample = in_queue.get() - while not isinstance(sample, XmapEndSignal): - r = mapper(sample) - out_queue.put(r) - sample = in_queue.get() - in_queue.put(end) - out_queue.put(end) - - # define a worker to handle samples from in_queue by mapper - # and put mapped samples into out_queue by order - def order_handle_worker(in_queue, out_queue, mapper, out_order): - ins = in_queue.get() - while not isinstance(ins, XmapEndSignal): - order, sample = ins - r = mapper(sample) - while order != out_order[0]: - pass - out_queue.put(r) - out_order[0] += 1 - ins = in_queue.get() - in_queue.put(end) - out_queue.put(end) - - def xreader(): - in_queue = Queue(buffer_size) - out_queue = Queue(buffer_size) - out_order = [0] - # start a read worker in a thread - target = order_read_worker if order else read_worker - t = Thread(target=target, args=(reader, in_queue)) - t.daemon = True - t.start() - # start several handle_workers - target = order_handle_worker if order else handle_worker - args = (in_queue, out_queue, mapper, out_order) if order else ( - in_queue, out_queue, mapper) - workers = [] - for i in xrange(process_num): - worker = Thread(target=target, args=args) - worker.daemon = True - workers.append(worker) - for w in workers: - w.start() - - sample = out_queue.get() - while not isinstance(sample, XmapEndSignal): - yield sample - sample = out_queue.get() - finish = 1 - while finish < process_num: - sample = out_queue.get() - if isinstance(sample, XmapEndSignal): - finish += 1 - else: - yield sample - - return xreader - - -def _buf2lines(buf, line_break="\n"): - # FIXME: line_break should be automatically configured. - lines = buf.split(line_break) - return lines[:-1], lines[-1] - - -class PipeReader: - """ - PipeReader read data by stream from a command, take it's - stdout into a pipe buffer and redirect it to the parser to - parse, then yield data as your desired format. - - You can using standard linux command or call another program - to read data, from HDFS, Ceph, URL, AWS S3 etc: - - .. code-block:: python - cmd = "hadoop fs -cat /path/to/some/file" - cmd = "cat sample_file.tar.gz" - cmd = "curl http://someurl" - cmd = "python print_s3_bucket.py" - - An example: - - .. code-block:: python - - def example_reader(): - for f in myfiles: - pr = PipeReader("cat %s"%f) - for l in pr.get_line(): - sample = l.split(" ") - yield sample - """ - - def __init__(self, command, bufsize=8192, file_type="plain"): - if not isinstance(command, str): - raise TypeError("left_cmd must be a string") - if file_type == "gzip": - self.dec = zlib.decompressobj( - 32 + zlib.MAX_WBITS) # offset 32 to skip the header - self.file_type = file_type - self.bufsize = bufsize - self.process = subprocess.Popen( - command.split(" "), bufsize=bufsize, stdout=subprocess.PIPE) - - def get_line(self, cut_lines=True, line_break="\n"): - """ - :param cut_lines: cut buffer to lines - :type cut_lines: bool - :param line_break: line break of the file, like \n or \r - :type line_break: string - - :return: one line or a buffer of bytes - :rtype: string - """ - remained = "" - while True: - buff = self.process.stdout.read(self.bufsize) - if buff: - if self.file_type == "gzip": - decomp_buff = self.dec.decompress(buff) - elif self.file_type == "plain": - decomp_buff = buff - else: - raise TypeError("file_type %s is not allowed" % - self.file_type) - - if cut_lines: - lines, remained = _buf2lines(''.join( - [remained, decomp_buff]), line_break) - for line in lines: - yield line - else: - yield decomp_buff - else: - break diff --git a/python/paddle/v2/reader/tests/CMakeLists.txt b/python/paddle/v2/reader/tests/CMakeLists.txt deleted file mode 100644 index 107d5912e1567e0c8721987a281272c7feb51e63..0000000000000000000000000000000000000000 --- a/python/paddle/v2/reader/tests/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -py_test(creator_test SRCS creator_test.py) -py_test(decorator_test SRCS decorator_test.py) diff --git a/python/paddle/v2/reader/tests/__init__.py b/python/paddle/v2/reader/tests/__init__.py deleted file mode 100644 index eca2dce114b069bf9b455d77ce670d73b5047fd2..0000000000000000000000000000000000000000 --- a/python/paddle/v2/reader/tests/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py deleted file mode 100644 index 7fe374e663607607cd0839eb6ca9c70c4d15eef8..0000000000000000000000000000000000000000 --- a/python/paddle/v2/reader/tests/creator_test.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copyright PaddlePaddle contributors. All Rights Reservedd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import unittest -import numpy as np -import paddle.v2.reader.creator - - -class TestNumpyArray(unittest.TestCase): - def test_numpy_array(self): - l = [[1, 2, 3], [4, 5, 6]] - x = np.array(l, np.int32) - reader = paddle.v2.reader.creator.np_array(x) - for idx, e in enumerate(reader()): - self.assertItemsEqual(e, l[idx]) - - -class TestTextFile(unittest.TestCase): - def test_text_file(self): - path = os.path.join(os.path.dirname(__file__), "test_data_creator.txt") - reader = paddle.v2.reader.creator.text_file(path) - for idx, e in enumerate(reader()): - self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1)) - - -class TestRecordIO(unittest.TestCase): - def do_test(self, path): - reader = paddle.v2.reader.creator.recordio(path) - idx = 0 - for e in reader(): - if idx == 0: - self.assertEqual(e, (1, 2, 3)) - elif idx == 1: - self.assertEqual(e, (4, 5, 6)) - idx += 1 - self.assertEqual(idx, 2) - - def test_recordIO(self): - self.do_test( - os.path.join( - os.path.dirname(__file__), "test_reader_recordio.dat")) - self.do_test([ - os.path.join( - os.path.dirname(__file__), "test_reader_recordio.dat") - ]) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/reader/tests/decorator_test.py b/python/paddle/v2/reader/tests/decorator_test.py deleted file mode 100644 index 6b680e39f3fb299a14e7d8162470996d1d16b83d..0000000000000000000000000000000000000000 --- a/python/paddle/v2/reader/tests/decorator_test.py +++ /dev/null @@ -1,178 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -import unittest - -import paddle.v2.reader - - -def reader_creator_10(dur): - def reader(): - for i in range(10): - # this invocation helps testing paddle.reader.buffer - time.sleep(dur) - yield i - - return reader - - -class TestMap(unittest.TestCase): - def test_map(self): - d = {"h": 0, "i": 1} - - def tokenize(x): - return d[x] - - def read(): - yield "h" - yield "i" - - r = paddle.v2.reader.map_readers(tokenize, read) - for i, e in enumerate(r()): - self.assertEqual(e, i) - - -class TestBuffered(unittest.TestCase): - def test_read(self): - for size in range(20): - b = paddle.v2.reader.buffered(reader_creator_10(0), size) - c = 0 - for i in b(): - self.assertEqual(i, c) - c += 1 - self.assertEqual(c, 10) - - def test_buffering(self): - # read have 30ms delay. - b = paddle.v2.reader.buffered(reader_creator_10(0.03), 10) - last_time = time.time() - for idx, i in enumerate(b()): - elapsed_time = time.time() - last_time - if i == 0: - time.sleep(0.3) - else: - # read time should be short, meaning already buffered. - self.assertLess(elapsed_time, 0.05) - last_time = time.time() - - -class TestCompose(unittest.TestCase): - def test_compse(self): - reader = paddle.v2.reader.compose( - reader_creator_10(0), reader_creator_10(0)) - for idx, e in enumerate(reader()): - self.assertEqual(e, (idx, idx)) - - def test_compose_not_aligned(self): - total = 0 - reader = paddle.v2.reader.compose( - paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)), - reader_creator_10(0)) - with self.assertRaises(paddle.v2.reader.ComposeNotAligned): - for e in reader(): - total += 1 - # expecting 10, not 20 - self.assertEqual(total, 10) - - def test_compose_not_aligned_no_check(self): - total = 0 - reader = paddle.v2.reader.compose( - paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)), - reader_creator_10(0), - check_alignment=False) - for e in reader(): - total += 1 - # expecting 10, not 20 - self.assertEqual(total, 10) - - -class TestChain(unittest.TestCase): - def test_chain(self): - c = paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)) - idx = 0 - for e in c(): - self.assertEqual(e, idx % 10) - idx += 1 - self.assertEqual(idx, 20) - - -class TestShuffle(unittest.TestCase): - def test_shuffle(self): - case = [(0, True), (1, True), (10, False), (100, False)] - a = reader_creator_10(0) - for size, checkEq in case: - s = paddle.v2.reader.shuffle(a, size) - total = 0 - for idx, e in enumerate(s()): - if checkEq: - self.assertEqual(idx, e) - total += 1 - self.assertEqual(total, 10) - - -class TestXmap(unittest.TestCase): - def test_xmap(self): - def mapper(x): - return (x + 1) - - orders = (True, False) - thread_nums = (1, 2, 4, 8, 16) - buffered_size = (1, 2, 4, 8, 16) - for order in orders: - for tNum in thread_nums: - for size in buffered_size: - reader = paddle.v2.reader.xmap_readers(mapper, - reader_creator_10(0), - tNum, size, order) - for n in xrange(3): - result = [] - for i in reader(): - result.append(i) - if not order: - result.sort() - for idx, e in enumerate(result): - self.assertEqual(e, mapper(idx)) - - -class TestPipeReader(unittest.TestCase): - def test_pipe_reader(self): - def example_reader(myfiles): - for f in myfiles: - pr = paddle.v2.reader.PipeReader("cat %s" % f, bufsize=128) - for l in pr.get_line(): - yield l - - import tempfile - - records = [str(i) for i in xrange(5)] - temp = tempfile.NamedTemporaryFile() - try: - with open(temp.name, 'w') as f: - for r in records: - f.write('%s\n' % r) - - result = [] - for r in example_reader([temp.name]): - result.append(r) - - for idx, e in enumerate(records): - self.assertEqual(e, result[idx]) - finally: - # delete the temporary file - temp.close() - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/reader/tests/test_data_creator.txt b/python/paddle/v2/reader/tests/test_data_creator.txt deleted file mode 100644 index a2a8d47d43868d369083808497697da79e620e31..0000000000000000000000000000000000000000 --- a/python/paddle/v2/reader/tests/test_data_creator.txt +++ /dev/null @@ -1,3 +0,0 @@ -0 1 -2 3 -4 5 diff --git a/python/paddle/v2/reader/tests/test_reader_recordio.dat b/python/paddle/v2/reader/tests/test_reader_recordio.dat deleted file mode 100644 index a99a35bb829e066c4845d0b85b96cd1eb3a12491..0000000000000000000000000000000000000000 Binary files a/python/paddle/v2/reader/tests/test_reader_recordio.dat and /dev/null differ diff --git a/python/paddle/v2/reader/tests/test_recordio_creator.dat b/python/paddle/v2/reader/tests/test_recordio_creator.dat deleted file mode 100644 index 17aa89b6796184407e83246d3f342a55a66b4a69..0000000000000000000000000000000000000000 Binary files a/python/paddle/v2/reader/tests/test_recordio_creator.dat and /dev/null differ diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt deleted file mode 100644 index b4333ed530ce464095ec38d72706949cc464fbe4..0000000000000000000000000000000000000000 --- a/python/paddle/v2/tests/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -py_test(test_op SRCS test_op.py) -py_test(test_image SRCS test_image.py) -py_test(test_layer SRCS test_layer.py) -py_test(test_topology SRCS test_topology.py) -py_test(test_rnn_layer SRCS test_rnn_layer.py) -py_test(test_parameters SRCS test_parameters.py) -py_test(test_data_feeder SRCS test_data_feeder.py) -py_test(test_paramconf_order SRCS test_paramconf_order.py) diff --git a/python/paddle/v2/tests/cat.jpg b/python/paddle/v2/tests/cat.jpg deleted file mode 100644 index bc1fbbd371216b9904b522ed302700c79d2e4876..0000000000000000000000000000000000000000 Binary files a/python/paddle/v2/tests/cat.jpg and /dev/null differ diff --git a/python/paddle/v2/tests/test_data_feeder.py b/python/paddle/v2/tests/test_data_feeder.py deleted file mode 100644 index 63905c04cf737d0f1d226a4a5a27777351dbf5a3..0000000000000000000000000000000000000000 --- a/python/paddle/v2/tests/test_data_feeder.py +++ /dev/null @@ -1,267 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import py_paddle.swig_paddle as api -import numpy as np - -from paddle.v2 import data_type -from paddle.v2.data_feeder import DataFeeder - - -class DataFeederTest(unittest.TestCase): - def dense_reader(self, size): - data = np.random.random(size) - return data - - def sparse_binary_reader(self, high, size_limit, non_empty=False): - num = np.random.randint(size_limit) # num could be 0 - while non_empty and num == 0: - num = np.random.randint(size_limit) - return np.random.randint(high, size=num).tolist() - - def test_dense(self): - def compare(input): - feeder = DataFeeder([('image', data_type.dense_vector(784))], - {'image': 0}) - arg = feeder(input) - output = arg.getSlotValue(0).copyToNumpyMat() - input = np.array(input, dtype='float32') - self.assertAlmostEqual(input.all(), output.all()) - - # test numpy array - batch_size = 32 - dim = 784 - data = [] - for i in xrange(batch_size): - each_sample = [] - each_sample.append(self.dense_reader(dim)) - data.append(each_sample) - compare(data) - - # each feature is a list - data = [] - for i in xrange(batch_size): - each_sample = [] - each_sample.append(self.dense_reader(dim).tolist()) - data.append(each_sample) - compare(data) - - # test tuple - data = [] - for i in xrange(batch_size): - each_sample = (self.dense_reader(dim).tolist(), ) - data.append(each_sample) - compare(data) - - def test_sparse_binary(self): - dim = 10000 - batch_size = 32 - data = [] - for i in xrange(batch_size): - each_sample = [] - each_sample.append(self.sparse_binary_reader(dim, 50)) - data.append(each_sample) - feeder = DataFeeder([('input', data_type.sparse_binary_vector(dim))], - {'input': 0}) - arg = feeder(data) - output = arg.getSlotValue(0) - assert isinstance(output, api.Matrix) - for i in xrange(batch_size): - self.assertEqual(output.getSparseRowCols(i), data[i][0]) - - def test_sparse(self): - dim = 10000 - batch_size = 32 - v = [] - w = [] - data = [] - for dat in xrange(batch_size): - each_sample = [] - a = self.sparse_binary_reader(dim, 40, non_empty=True) - b = self.dense_reader(len(a)).tolist() - v.append(a) - w.append(np.array(b, dtype="float32")) - each_sample.append(zip(a, b)) - data.append(each_sample) - - feeder = DataFeeder([('input', data_type.sparse_float_vector(dim))], - {'input': 0}) - arg = feeder(data) - output = arg.getSlotValue(0) - assert isinstance(output, api.Matrix) - for i in xrange(batch_size): - self.assertEqual(output.getSparseRowCols(i), v[i]) - cols_value = output.getSparseRowColsVal(i) - value = [val[1] for val in cols_value] - value = np.array(value, dtype="float32") - self.assertAlmostEqual(value.all(), w[i].all()) - - def test_integer(self): - value_range = 100 - batch_size = 32 - index = [] - for i in xrange(batch_size): - each_sample = [] - each_sample.append(np.random.randint(value_range)) - index.append(each_sample) - feeder = DataFeeder([('input', data_type.integer_value(value_range))], - {'input': 0}) - arg = feeder(index) - output = arg.getSlotIds(0).copyToNumpyArray() - index = np.array(index, dtype='int') - self.assertEqual(output.all(), index.flatten().all()) - - def test_integer_sequence(self): - value_range = 10000 - batch_size = 32 - start = [0] - data = [] - for i in xrange(batch_size): - each_sample = [] - each_sample.append( - self.sparse_binary_reader( - value_range, 30, non_empty=True)) - data.append(each_sample) - start.append(len(each_sample[0]) + start[-1]) - feeder = DataFeeder( - [('input', data_type.integer_value_sequence(value_range))], - {'input': 0}) - arg = feeder(data) - output_data = arg.getSlotIds(0).copyToNumpyArray() - output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray() - - index = [] - for dat in data: - index.extend(x for x in dat[0]) # only one feature, so dat[0] - index = np.array(index, dtype='int') - start = np.array(start, dtype='int') - self.assertEqual(output_data.all(), index.all()) - self.assertEqual(output_start.all(), start.all()) - - def test_multiple_features(self): - batch_size = 2 - data = [] - for i in xrange(batch_size): - each_sample = [] - each_sample.append(np.random.randint(10)) - each_sample.append( - self.sparse_binary_reader( - 20000, 40, non_empty=True)) - each_sample.append(self.dense_reader(100)) - data.append(each_sample) - - # test multiple features - data_types = [('fea0', data_type.dense_vector(100)), - ('fea1', data_type.sparse_binary_vector(20000)), - ('fea2', data_type.integer_value(10))] - feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0}) - arg = feeder(data) - output_dense = arg.getSlotValue(0).copyToNumpyMat() - output_sparse = arg.getSlotValue(1) - output_index = arg.getSlotIds(2).copyToNumpyArray() - for i in xrange(batch_size): - self.assertEqual(output_dense[i].all(), data[i][2].all()) - self.assertEqual(output_sparse.getSparseRowCols(i), data[i][1]) - self.assertEqual(output_index[i], data[i][0]) - - # reader returns 3 features, but only use 2 features - data_types = [('fea0', data_type.dense_vector(100)), - ('fea2', data_type.integer_value(10))] - feeder = DataFeeder(data_types, {'fea0': 2, 'fea2': 0}) - arg = feeder(data) - output_dense = arg.getSlotValue(0).copyToNumpyMat() - output_index = arg.getSlotIds(1).copyToNumpyArray() - for i in xrange(batch_size): - self.assertEqual(output_dense[i].all(), data[i][2].all()) - self.assertEqual(output_index[i], data[i][0]) - - # reader returns 3 featreus, one is duplicate data - data_types = [('fea0', data_type.dense_vector(100)), - ('fea1', data_type.sparse_binary_vector(20000)), - ('fea2', data_type.integer_value(10)), - ('fea3', data_type.dense_vector(100))] - feeder = DataFeeder(data_types, - {'fea0': 2, - 'fea1': 1, - 'fea2': 0, - 'fea3': 2}) - arg = feeder(data) - fea0 = arg.getSlotValue(0).copyToNumpyMat() - fea1 = arg.getSlotValue(1) - fea2 = arg.getSlotIds(2).copyToNumpyArray() - fea3 = arg.getSlotValue(3).copyToNumpyMat() - for i in xrange(batch_size): - self.assertEqual(fea0[i].all(), data[i][2].all()) - self.assertEqual(fea1.getSparseRowCols(i), data[i][1]) - self.assertEqual(fea2[i], data[i][0]) - self.assertEqual(fea3[i].all(), data[i][2].all()) - - def test_multiple_features_tuple(self): - batch_size = 2 - data = [] - for i in xrange(batch_size): - a = np.random.randint(10) - b = self.sparse_binary_reader(20000, 40, non_empty=True) - c = self.dense_reader(100) - each_sample = (a, b, c) - data.append(each_sample) - - # test multiple features - data_types = [('fea0', data_type.dense_vector(100)), - ('fea1', data_type.sparse_binary_vector(20000)), - ('fea2', data_type.integer_value(10))] - feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0}) - arg = feeder(data) - out_dense = arg.getSlotValue(0).copyToNumpyMat() - out_sparse = arg.getSlotValue(1) - out_index = arg.getSlotIds(2).copyToNumpyArray() - for i in xrange(batch_size): - self.assertEqual(out_dense[i].all(), data[i][2].all()) - self.assertEqual(out_sparse.getSparseRowCols(i), data[i][1]) - self.assertEqual(out_index[i], data[i][0]) - - def test_dense_set_shape(self): - # test 2-D data - def gen_data(batch_size, shape): - data = [] - for i in xrange(batch_size): - each_sample = [] - each_sample.append(np.random.random(shape)) - data.append(each_sample) - return data - - feeder = DataFeeder([('image', data_type.dense_array(2352))], - {'image': 0}) - arg = feeder(gen_data(32, (3, 28, 28))) - h = arg.getSlotFrameHeight(0) - w = arg.getSlotFrameWidth(0) - self.assertEqual(h, 28) - self.assertEqual(w, 28) - - arg = feeder(gen_data(32, (3, 30, 32))) - h = arg.getSlotFrameHeight(0) - w = arg.getSlotFrameWidth(0) - self.assertEqual(h, 30) - self.assertEqual(w, 32) - - -if __name__ == '__main__': - api.initPaddle("--use_gpu=0") - suite = unittest.TestLoader().loadTestsFromTestCase(DataFeederTest) - unittest.TextTestRunner().run(suite) - if api.isGpuVersion(): - api.setUseGpu(True) - unittest.main() diff --git a/python/paddle/v2/tests/test_image.py b/python/paddle/v2/tests/test_image.py deleted file mode 100644 index c78bbdc40a25878b21ba7e678afedf9d8f0a87cf..0000000000000000000000000000000000000000 --- a/python/paddle/v2/tests/test_image.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import numpy as np - -import paddle.v2.image as image - - -class Image(unittest.TestCase): - def test_resize_flip_chw(self): - # resize - im = image.load_image('cat.jpg') - im = image.resize_short(im, 256) - self.assertEqual(256, min(im.shape[:2])) - self.assertEqual(3, im.shape[2]) - - # flip - im = image.left_right_flip(im) - im2 = np.flip(im, 1) - self.assertEqual(im.all(), im2.all()) - - # to_chw - h, w, c = im.shape - im = image.to_chw(im) - self.assertEqual(c, im.shape[0]) - self.assertEqual(h, im.shape[1]) - self.assertEqual(w, im.shape[2]) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py deleted file mode 100644 index b169a0f38ee61cd8f69a51e836d5cc3d42a9f850..0000000000000000000000000000000000000000 --- a/python/paddle/v2/tests/test_layer.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import paddle.v2.activation as activation -import paddle.v2.attr as attr -import paddle.v2.data_type as data_type -import paddle.v2.layer as layer -import paddle.v2.pooling as pooling -import paddle.v2.networks as networks -import paddle.v2.evaluator as evaluator - -pixel = layer.data(name='pixel', type=data_type.dense_vector(128)) -label = layer.data(name='label', type=data_type.integer_value(10)) -weight = layer.data(name='weight', type=data_type.dense_vector(1)) -combine_weight = layer.data( - name='weight_combine', type=data_type.dense_vector(10)) -score = layer.data(name='score', type=data_type.dense_vector(1)) - -hidden = layer.fc(input=pixel, - size=100, - act=activation.Sigmoid(), - param_attr=attr.Param(name='hidden')) -inference = layer.fc(input=hidden, size=10, act=activation.Softmax()) -conv = layer.img_conv( - input=pixel, - filter_size=1, - filter_size_y=1, - num_channels=8, - num_filters=16, - act=activation.Linear()) - - -class ImageLayerTest(unittest.TestCase): - def test_conv_layer(self): - conv_shift = layer.conv_shift(a=pixel, b=score) - print layer.parse_network(conv, conv_shift) - - def test_pooling_layer(self): - maxpool = layer.img_pool( - input=conv, - pool_size=2, - num_channels=16, - padding=1, - pool_type=pooling.Max()) - spp = layer.spp(input=conv, - pyramid_height=2, - num_channels=16, - pool_type=pooling.Max()) - maxout = layer.maxout(input=conv, num_channels=16, groups=4) - print layer.parse_network([maxpool, spp, maxout]) - - def test_norm_layer(self): - norm1 = layer.img_cmrnorm(input=conv, size=5) - norm2 = layer.batch_norm(input=conv) - norm3 = layer.sum_to_one_norm(input=conv) - print layer.parse_network([norm1, norm2, norm3]) - - -class AggregateLayerTest(unittest.TestCase): - def test_aggregate_layer(self): - pool = layer.pooling( - input=pixel, - pooling_type=pooling.Avg(), - agg_level=layer.AggregateLevel.TO_SEQUENCE) - last_seq = layer.last_seq(input=pixel) - first_seq = layer.first_seq(input=pixel) - concat = layer.concat(input=[last_seq, first_seq]) - seq_concat = layer.seq_concat(a=last_seq, b=first_seq) - print layer.parse_network( - [pool, last_seq, first_seq, concat, seq_concat]) - - -class MathLayerTest(unittest.TestCase): - def test_math_layer(self): - addto = layer.addto(input=[pixel, pixel]) - linear_comb = layer.linear_comb( - weights=combine_weight, vectors=hidden, size=10) - interpolation = layer.interpolation( - input=[hidden, hidden], weight=score) - bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4) - power = layer.power(input=pixel, weight=score) - scaling = layer.scaling(input=pixel, weight=score) - slope = layer.slope_intercept(input=pixel) - tensor = layer.tensor(a=pixel, b=pixel, size=1000) - cos_sim = layer.cos_sim(a=pixel, b=pixel) - trans = layer.trans(input=tensor) - print layer.parse_network([ - addto, linear_comb, interpolation, power, scaling, slope, tensor, - cos_sim, trans - ]) - - -class ReshapeLayerTest(unittest.TestCase): - def test_reshape_layer(self): - block_expand = layer.block_expand( - input=conv, num_channels=4, stride_x=1, block_x=1) - expand = layer.expand( - input=weight, - expand_as=pixel, - expand_level=layer.ExpandLevel.FROM_NO_SEQUENCE) - repeat = layer.repeat(input=pixel, num_repeats=4) - reshape = layer.seq_reshape(input=pixel, reshape_size=4) - rotate = layer.rotate(input=pixel, height=16, width=49) - print layer.parse_network( - [block_expand, expand, repeat, reshape, rotate]) - - -class RecurrentLayerTest(unittest.TestCase): - def test_recurrent_layer(self): - word = layer.data(name='word', type=data_type.integer_value(12)) - recurrent = layer.recurrent(input=word) - lstm = layer.lstmemory(input=word) - gru = layer.grumemory(input=word) - print layer.parse_network([recurrent, lstm, gru]) - - -class CostLayerTest(unittest.TestCase): - def test_cost_layer(self): - cost1 = layer.classification_cost(input=inference, label=label) - cost2 = layer.classification_cost( - input=inference, label=label, weight=weight) - cost3 = layer.cross_entropy_cost(input=inference, label=label) - cost4 = layer.cross_entropy_with_selfnorm_cost( - input=inference, label=label) - cost5 = layer.square_error_cost(input=inference, label=label) - cost6 = layer.square_error_cost( - input=inference, label=label, weight=weight) - cost7 = layer.multi_binary_label_cross_entropy_cost( - input=inference, label=label) - cost8 = layer.rank_cost(left=score, right=score, label=score) - cost9 = layer.lambda_cost(input=inference, score=score) - cost10 = layer.sum_cost(input=inference) - cost11 = layer.huber_regression_cost(input=score, label=label) - cost12 = layer.huber_classification_cost(input=score, label=label) - - print layer.parse_network([cost1, cost2]) - print layer.parse_network([cost3, cost4]) - print layer.parse_network([cost5, cost6]) - print layer.parse_network([cost7, cost8, cost9, cost10, cost11, cost12]) - - crf = layer.crf(input=inference, label=label) - crf_decoding = layer.crf_decoding(input=inference, size=3) - ctc = layer.ctc(input=inference, label=label) - warp_ctc = layer.warp_ctc(input=pixel, label=label) - nce = layer.nce(input=inference, label=label, num_classes=3) - hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3) - - print layer.parse_network( - [crf, crf_decoding, ctc, warp_ctc, nce, hsigmoid]) - - -class OtherLayerTest(unittest.TestCase): - def test_sampling_layer(self): - maxid = layer.max_id(input=inference) - sampling_id = layer.sampling_id(input=inference) - eos = layer.eos(input=maxid, eos_id=5) - layer.printer(maxid) - print layer.parse_network([maxid, sampling_id, eos]) - - def test_slicing_joining_layer(self): - pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1]) - print layer.parse_network(pad) - - -class ProjOpTest(unittest.TestCase): - def test_projection(self): - input = layer.data(name='data2', type=data_type.dense_vector(784)) - word = layer.data( - name='word2', type=data_type.integer_value_sequence(10000)) - fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) - fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) - mixed0 = layer.mixed( - size=256, - input=[ - layer.full_matrix_projection(input=fc0), - layer.full_matrix_projection(input=fc1) - ]) - with layer.mixed(size=200) as mixed1: - mixed1 += layer.full_matrix_projection(input=fc0) - mixed1 += layer.identity_projection(input=fc1) - - table = layer.table_projection(input=word) - emb0 = layer.mixed(size=512, input=table) - with layer.mixed(size=512) as emb1: - emb1 += table - - scale = layer.scaling_projection(input=fc0) - scale0 = layer.mixed(size=100, input=scale) - with layer.mixed(size=100) as scale1: - scale1 += scale - - dotmul = layer.dotmul_projection(input=fc0) - dotmul0 = layer.mixed(size=100, input=dotmul) - with layer.mixed(size=100) as dotmul1: - dotmul1 += dotmul - - context = layer.context_projection(input=fc0, context_len=5) - context0 = layer.mixed(size=500, input=context) - with layer.mixed(size=500) as context1: - context1 += context - - conv = layer.conv_projection( - input=input, - filter_size=1, - num_channels=1, - num_filters=128, - stride=1, - padding=0) - conv0 = layer.mixed(input=conv, bias_attr=True) - with layer.mixed(bias_attr=True) as conv1: - conv1 += conv - - print layer.parse_network(mixed0) - print layer.parse_network(mixed1) - print layer.parse_network(emb0) - print layer.parse_network(emb1) - print layer.parse_network(scale0) - print layer.parse_network(scale1) - print layer.parse_network(dotmul0) - print layer.parse_network(dotmul1) - print layer.parse_network(conv0) - print layer.parse_network(conv1) - - def test_operator(self): - ipt0 = layer.data(name='data1', type=data_type.dense_vector(784)) - ipt1 = layer.data(name='word1', type=data_type.dense_vector(128)) - fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) - fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) - - dotmul_op = layer.dotmul_operator(a=fc0, b=fc1) - dotmul0 = layer.mixed(input=dotmul_op) - with layer.mixed() as dotmul1: - dotmul1 += dotmul_op - - conv = layer.conv_operator( - img=ipt0, - filter=ipt1, - filter_size=1, - num_channels=1, - num_filters=128, - stride=1, - padding=0) - conv0 = layer.mixed(input=conv) - with layer.mixed() as conv1: - conv1 += conv - - print layer.parse_network(dotmul0) - print layer.parse_network(dotmul1) - print layer.parse_network(conv0) - print layer.parse_network(conv1) - - -class NetworkTests(unittest.TestCase): - def test_vgg(self): - img = layer.data(name='pixel1', type=data_type.dense_vector(784)) - vgg_out = networks.small_vgg( - input_image=img, num_channels=1, num_classes=2) - print layer.parse_network(vgg_out) - - -class EvaluatorTest(unittest.TestCase): - def test_evaluator(self): - img = layer.data(name='pixel2', type=data_type.dense_vector(784)) - output = layer.fc(input=img, - size=10, - act=activation.Softmax(), - name='fc_here') - lbl = layer.data(name='label2', type=data_type.integer_value(10)) - cost = layer.cross_entropy_cost(input=output, label=lbl) - - evaluator.classification_error(input=output, label=lbl) - print layer.parse_network(cost) - print layer.parse_network(output) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/tests/test_op.py b/python/paddle/v2/tests/test_op.py deleted file mode 100644 index 15d5aef5111c2c2d5970fc21c54be4ca66aaba90..0000000000000000000000000000000000000000 --- a/python/paddle/v2/tests/test_op.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import paddle.v2.data_type as data_type -import paddle.v2.layer as layer -import paddle.v2.op as op - - -class OpTest(unittest.TestCase): - def test_op(self): - x = layer.data(name='data', type=data_type.dense_vector(128)) - x = op.exp(x) - x = op.sqrt(x) - x = op.reciprocal(x) - x = op.log(x) - x = op.abs(x) - x = op.sigmoid(x) - x = op.tanh(x) - x = op.square(x) - x = op.relu(x) - y = 1 + x - y = y + 1 - y = x + y - y = y - x - y = y - 2 - y = 2 - y - y = 2 * y - y = y * 3 - z = layer.data(name='data_2', type=data_type.dense_vector(1)) - y = y * z - y = z * y - y = y + z - y = z + y - print layer.parse_network(y) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/tests/test_paramconf_order.py b/python/paddle/v2/tests/test_paramconf_order.py deleted file mode 100644 index 264442be182ea69c95b39b3bdb4c389d52eff66e..0000000000000000000000000000000000000000 --- a/python/paddle/v2/tests/test_paramconf_order.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copyright PaddlePaddle contributors. All Rights Reservedd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import unittest -import math -import paddle.v2 as paddle - - -def wordemb(inlayer): - wordemb = paddle.layer.table_projection( - input=inlayer, - size=5, - param_attr=paddle.attr.Param( - name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)) - return wordemb - - -def train(): - word_dict = paddle.dataset.imikolov.build_dict() - dict_size = len(word_dict) - # Every layer takes integer value of range [0, dict_size) - firstword = paddle.layer.data( - name="firstw", type=paddle.data_type.integer_value(dict_size)) - secondword = paddle.layer.data( - name="secondw", type=paddle.data_type.integer_value(dict_size)) - thirdword = paddle.layer.data( - name="thirdw", type=paddle.data_type.integer_value(dict_size)) - fourthword = paddle.layer.data( - name="fourthw", type=paddle.data_type.integer_value(dict_size)) - nextword = paddle.layer.data( - name="fifthw", type=paddle.data_type.integer_value(dict_size)) - - Efirst = wordemb(firstword) - Esecond = wordemb(secondword) - Ethird = wordemb(thirdword) - Efourth = wordemb(fourthword) - - contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth]) - hidden1 = paddle.layer.fc(name="fc1", - input=contextemb, - size=128, - act=paddle.activation.Sigmoid(), - layer_attr=paddle.attr.Extra(drop_rate=0.5), - bias_attr=paddle.attr.Param(learning_rate=2), - param_attr=paddle.attr.Param( - initial_std=1. / math.sqrt(5 * 8), - learning_rate=1, - l2_rate=6e-4)) - predictword = paddle.layer.fc(input=hidden1, - size=dict_size, - bias_attr=paddle.attr.Param(learning_rate=2), - act=paddle.activation.Softmax()) - - return paddle.layer.classification_cost(input=predictword, label=nextword) - - -class TestParamConfOrder(unittest.TestCase): - def test_param_conf_order(self): - paddle.init() - cost = train() - parameters = paddle.parameters.create(cost) - adagrad = paddle.optimizer.AdaGrad( - learning_rate=3e-3, - regularization=paddle.optimizer.L2Regularization(rate=8e-4)) - - trainer = paddle.trainer.SGD(cost, parameters, adagrad) - for p in trainer.get_topology_proto().parameters: - if p.name == "_fc1.w0": - self.assertEqual(p.decay_rate, 6e-4) - else: - self.assertEqual(p.decay_rate, 8e-4) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/tests/test_parameters.py b/python/paddle/v2/tests/test_parameters.py deleted file mode 100644 index 3bfd9348a61033de910d7f6f0867660d1d36f7fb..0000000000000000000000000000000000000000 --- a/python/paddle/v2/tests/test_parameters.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import sys - -try: - import py_paddle - - del py_paddle -except ImportError: - print >> sys.stderr, "It seems swig of Paddle is not installed, this " \ - "unittest will not be run." - sys.exit(0) - -import paddle.v2.parameters as parameters -import paddle.v2.data_type as data_type -import paddle.v2.layer as layer -from paddle.v2.attr import ParamAttr -from paddle.proto.ParameterConfig_pb2 import ParameterConfig -import random -import cStringIO -import numpy - - -def __rand_param_config__(name, psize=None): - conf = ParameterConfig() - conf.name = name - size = 1 - if psize is None: - for i in xrange(2): - dim = random.randint(1, 1000) - conf.dims.append(dim) - size *= dim - else: - size = psize - conf.size = size - assert conf.IsInitialized() - return conf - - -class TestParameters(unittest.TestCase): - def test_serialization(self): - params = parameters.Parameters() - params.__append_config__(__rand_param_config__("param_0")) - params.__append_config__(__rand_param_config__("param_1")) - - for name in params.names(): - param = params.get(name) - param[:] = numpy.random.uniform( - -1.0, 1.0, size=params.get_shape(name)) - params.set(name, param) - - tmp_file = cStringIO.StringIO() - params.to_tar(tmp_file) - tmp_file.seek(0) - params_dup = parameters.Parameters.from_tar(tmp_file) - - self.assertEqual(params_dup.names(), params.names()) - - for name in params.names(): - self.assertEqual(params.get_shape(name), params_dup.get_shape(name)) - p0 = params.get(name) - p1 = params_dup.get(name) - self.assertTrue(numpy.isclose(p0, p1).all()) - - def test_initializer(self): - def initializer(name): - assert name == "fc.w" - mat = numpy.ones((3, 2), dtype=numpy.float32) - mat[1, 1] = 2 - return mat - - x = layer.data(name="x", type=data_type.dense_vector(3)) - y = layer.fc(x, - size=2, - bias_attr=False, - param_attr=ParamAttr( - name="fc.w", initializer=initializer)) - params = parameters.create(y) - val = params["fc.w"] - assert val.shape == (3, 2) - expected = numpy.array([[1, 1], [1, 2], [1, 1]], numpy.float32) - assert numpy.logical_and.reduce(numpy.reshape(val == expected, 6)) - - def test_init_from_tar(self): - def get_param(names, size): - p = parameters.Parameters() - for k, v in zip(names, size): - p.__append_config__(__rand_param_config__(k, v)) - for name in p.names(): - param = p.get(name) - param[:] = numpy.random.uniform( - -1.0, 1.0, size=p.get_shape(name)) - p.set(name, param) - return p - - def get_parames(): - name1 = ['param_0', 'param_1'] - size1 = [128, 256] - p1 = get_param(name1, size1) - file1 = cStringIO.StringIO() - p1.to_tar(file1) - file1.seek(0) - - name2 = ['param_0', 'param_1', 'param_2'] - size2 = [128, 256, 288] - p2 = get_param(name2, size2) - file2 = cStringIO.StringIO() - p2.to_tar(file2) - file2.seek(0) - return p1, file1, p2, file2 - - p1, file1, p2, file2 = get_parames() - p2.init_from_tar(file1) - for name in p1.names(): - self.assertEqual(p1.get_shape(name), p2.get_shape(name)) - v1 = p1.get(name) - v2 = p2.get(name) - self.assertTrue(numpy.isclose(v1, v2).all()) - - p1, file1, p2, file2 = get_parames() - p1.init_from_tar(file2) - for name in p1.names(): - self.assertEqual(p1.get_shape(name), p2.get_shape(name)) - v1 = p1.get(name) - v2 = p2.get(name) - self.assertTrue(numpy.isclose(v1, v2).all()) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py deleted file mode 100644 index 6ad07167dce13089b2081ee12119d67453b23873..0000000000000000000000000000000000000000 --- a/python/paddle/v2/tests/test_rnn_layer.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import difflib -import unittest - -import paddle.trainer_config_helpers as conf_helps -import paddle.v2.activation as activation -import paddle.v2.data_type as data_type -import paddle.v2.layer as layer -from paddle.trainer_config_helpers.config_parser_utils import \ - parse_network_config as parse_network -from paddle.trainer_config_helpers.config_parser_utils import \ - reset_parser - - -class RNNTest(unittest.TestCase): - def test_simple_rnn(self): - dict_dim = 10 - word_dim = 8 - hidden_dim = 8 - - def parse_old_rnn(): - reset_parser() - - def step(y): - mem = conf_helps.memory(name="rnn_state", size=hidden_dim) - out = conf_helps.fc_layer( - input=[y, mem], - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out - - def test(): - data = conf_helps.data_layer(name="word", size=dict_dim) - embd = conf_helps.embedding_layer(input=data, size=word_dim) - conf_helps.recurrent_group( - name="rnn", step=step, input=embd, reverse=True) - - return str(parse_network(test)) - - def parse_new_rnn(): - reset_parser() - - def new_step(y): - mem = layer.memory(name="rnn_state", size=hidden_dim) - out = layer.fc(input=[y, mem], - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out - - data = layer.data( - name="word", type=data_type.integer_value(dict_dim)) - embd = layer.embedding(input=data, size=word_dim) - rnn_layer = layer.recurrent_group( - name="rnn", step=new_step, input=embd, reverse=True) - return str(layer.parse_network(rnn_layer)) - - diff = difflib.unified_diff(parse_old_rnn().splitlines(1), - parse_new_rnn().splitlines(1)) - print ''.join(diff) - - def test_sequence_rnn_multi_input(self): - dict_dim = 10 - word_dim = 8 - hidden_dim = 8 - label_dim = 3 - - def parse_old_rnn(): - reset_parser() - - def test(): - data = conf_helps.data_layer(name="word", size=dict_dim) - label = conf_helps.data_layer(name="label", size=label_dim) - emb = conf_helps.embedding_layer(input=data, size=word_dim) - boot_layer = conf_helps.data_layer(name="boot", size=10) - boot_layer = conf_helps.fc_layer( - name='boot_fc', input=boot_layer, size=10) - - def step(y, wid): - z = conf_helps.embedding_layer(input=wid, size=word_dim) - mem = conf_helps.memory( - name="rnn_state", - size=hidden_dim, - boot_layer=boot_layer) - out = conf_helps.fc_layer( - input=[y, z, mem], - size=hidden_dim, - act=conf_helps.TanhActivation(), - bias_attr=True, - name="rnn_state") - return out - - out = conf_helps.recurrent_group( - name="rnn", step=step, input=[emb, data]) - - rep = conf_helps.last_seq(input=out) - prob = conf_helps.fc_layer( - size=label_dim, - input=rep, - act=conf_helps.SoftmaxActivation(), - bias_attr=True) - - conf_helps.outputs( - conf_helps.classification_cost( - input=prob, label=label)) - - return str(parse_network(test)) - - def parse_new_rnn(): - reset_parser() - data = layer.data( - name="word", type=data_type.dense_vector(dict_dim)) - label = layer.data( - name="label", type=data_type.dense_vector(label_dim)) - emb = layer.embedding(input=data, size=word_dim) - boot_layer = layer.data( - name="boot", type=data_type.dense_vector(10)) - boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10) - - def step(y, wid): - z = layer.embedding(input=wid, size=word_dim) - mem = layer.memory( - name="rnn_state", size=hidden_dim, boot_layer=boot_layer) - out = layer.fc(input=[y, z, mem], - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out - - out = layer.recurrent_group( - name="rnn", step=step, input=[emb, data]) - - rep = layer.last_seq(input=out) - prob = layer.fc(size=label_dim, - input=rep, - act=activation.Softmax(), - bias_attr=True) - - cost = layer.classification_cost(input=prob, label=label) - - return str(layer.parse_network(cost)) - - diff = difflib.unified_diff(parse_old_rnn().splitlines(1), - parse_new_rnn().splitlines(1)) - print ''.join(diff) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/tests/test_topology.py b/python/paddle/v2/tests/test_topology.py deleted file mode 100644 index bacd28ddb7b61fcc396d61292a7cfcc87487e2e4..0000000000000000000000000000000000000000 --- a/python/paddle/v2/tests/test_topology.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import paddle.v2.layer as layer -import paddle.v2.topology as topology -import paddle.v2.data_type as data_type -import paddle.trainer_config_helpers as conf_helps -import paddle.trainer.PyDataProvider2 as pydp2 - - -class TestTopology(unittest.TestCase): - def test_data_type(self): - pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) - label = layer.data(name='label', type=data_type.integer_value(10)) - hidden = layer.fc(input=pixel, - size=100, - act=conf_helps.SigmoidActivation()) - inference = layer.fc(input=hidden, - size=10, - act=conf_helps.SoftmaxActivation()) - cost = layer.classification_cost(input=inference, label=label) - topo = topology.Topology(cost) - data_types = topo.data_type() - self.assertEqual(len(data_types), 2) - pixel_data_type = filter(lambda type: type[0] == "pixel", data_types) - self.assertEqual(len(pixel_data_type), 1) - pixel_data_type = pixel_data_type[0] - self.assertEqual(pixel_data_type[1].type, pydp2.DataType.Dense) - self.assertEqual(pixel_data_type[1].dim, 784) - - label_data_type = filter(lambda type: type[0] == "label", data_types) - self.assertEqual(len(label_data_type), 1) - label_data_type = label_data_type[0] - self.assertEqual(label_data_type[1].type, pydp2.DataType.Index) - self.assertEqual(label_data_type[1].dim, 10) - - def test_get_layer(self): - pixel = layer.data(name='pixel2', type=data_type.dense_vector(784)) - label = layer.data(name='label2', type=data_type.integer_value(10)) - hidden = layer.fc(input=pixel, - size=100, - act=conf_helps.SigmoidActivation()) - inference = layer.fc(input=hidden, - size=10, - act=conf_helps.SoftmaxActivation()) - cost = layer.classification_cost(input=inference, label=label) - topo = topology.Topology(cost) - pixel_layer = topo.get_layer("pixel2") - label_layer = topo.get_layer("label2") - self.assertEqual(pixel_layer, pixel) - self.assertEqual(label_layer, label) - - def test_parse(self): - pixel = layer.data(name='pixel3', type=data_type.dense_vector(784)) - label = layer.data(name='label3', type=data_type.integer_value(10)) - hidden = layer.fc(input=pixel, - size=100, - act=conf_helps.SigmoidActivation()) - inference = layer.fc(input=hidden, - size=10, - act=conf_helps.SoftmaxActivation()) - maxid = layer.max_id(input=inference) - cost1 = layer.classification_cost(input=inference, label=label) - cost2 = layer.cross_entropy_cost(input=inference, label=label) - - topology.Topology(cost2).proto() - topology.Topology([cost1]).proto() - topology.Topology([cost1, cost2]).proto() - topology.Topology([inference, maxid]).proto() - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/topology.py b/python/paddle/v2/topology.py deleted file mode 100644 index 923ccecb0bf1236b4a3768fdc07dc3027e2863b7..0000000000000000000000000000000000000000 --- a/python/paddle/v2/topology.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import collections - -from paddle.proto.ModelConfig_pb2 import ModelConfig -import paddle.trainer_config_helpers as conf_helps -import layer as v2_layer -import config_base -import cPickle -from paddle.trainer import config_parser as cp - -__all__ = ['Topology'] - - -class Topology(object): - """ - Topology is used to store the information about all layers - and network configs. - """ - - def __init__(self, layers, extra_layers=None): - def __check__(layers): - if not isinstance(layers, collections.Sequence): - layers = [layers] - for layer in layers: - __check_layer_type__(layer) - return layers - - layers = __check__(layers) - self.layers = layers - if extra_layers is not None: - extra_layers = __check__(extra_layers) - - self.__model_config__ = v2_layer.parse_network( - layers, extra_layers=extra_layers) - - if extra_layers is not None: - self.layers.extend(extra_layers) - - assert isinstance(self.__model_config__, ModelConfig) - - def update_from_default(self): - # HACK(typhoonzero): update ParameterConfig(proto) in case of - # optimizers are defined after layers, or between layers. - # Must be called from trainer.__init__() - for parameter in self.__model_config__.parameters: - if parameter.momentum == 0.0 and cp.g_default_momentum: - parameter.momentum = cp.g_default_momentum - if parameter.decay_rate == 0.0 and cp.g_default_decay_rate: - parameter.decay_rate = cp.g_default_decay_rate - if parameter.initial_mean == 0.0: - parameter.initial_mean = cp.g_default_initial_mean - if parameter.initial_std == 0.01: - parameter.initial_std = cp.g_default_initial_std - if parameter.initial_strategy == 0: - parameter.initial_strategy = cp.g_default_initial_strategy - if parameter.initial_smart == False: - parameter.initial_smart = cp.g_default_initial_smart - if parameter.num_batches_regularization == 1 and \ - cp.g_default_num_batches_regularization: - parameter.num_batches_regularization = \ - cp.g_default_num_batches_regularization - if parameter.gradient_clipping_threshold == 0.0 and \ - cp.g_default_gradient_clipping_threshold: - parameter.gradient_clipping_threshold = \ - cp.g_default_gradient_clipping_threshold - if parameter.device == -1 and cp.g_default_device: - parameter.device = cp.g_default_device - # FIXME(typhoonzero): ignored: update_hooks, g_default_compact_func - - def use_sparse_updater(self): - """ - check if any parameter require to use sparse_update - :return: - """ - use_sparse = False - for parameter in self.__model_config__.parameters: - if parameter.sparse_update or parameter.sparse_remote_update: - use_sparse = True - break - return use_sparse - - def proto(self): - return self.__model_config__ - - def get_layer(self, name): - """ - get v2.Layer Class instance by layer name - :param name: - :return: - """ - return v2_layer.get_layer(name) - - def data_layers(self): - """ - get all data layer - :return: - """ - data_layers = {} - for layer in self.proto().layers: - l = v2_layer.get_layer(layer.name) - if l and l.layer_type == conf_helps.LayerType.DATA: - data_layers[layer.name] = l - return data_layers - - def data_type(self): - """ - get data_type from proto, such as: - [('image', dense_vector(768)), ('label', integer_value(10))] - """ - data_layers = self.data_layers() - - return [(nm, data_layers[nm].data_type) - for nm in self.proto().input_layer_names] - - def get_layer_proto(self, name): - for layer in self.__model_config__.layers: - if layer.name == name: - return layer - return None - - def serialize_for_inference(self, stream): - protobin = self.proto().SerializeToString() - data_type = self.data_type() - cPickle.dump({ - 'protobin': protobin, - 'data_type': data_type - }, stream, cPickle.HIGHEST_PROTOCOL) - - -def __check_layer_type__(layer): - if not isinstance(layer, config_base.Layer): - raise ValueError('layer should have type paddle.v2.config_base.Layer') diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py deleted file mode 100644 index 5d98d5b6db57006b1e78c95eb1d6c7de7346e0e4..0000000000000000000000000000000000000000 --- a/python/paddle/v2/trainer.py +++ /dev/null @@ -1,258 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Module Trainer -""" -import collections -from topology import Topology -from . import event as v2_event -from . import optimizer as v2_optimizer -from . import parameters as v2_parameters - -__all__ = ['SGD'] - - -def default_event_handler(event): - """ - Default event handler. It will print some log and save mode. - - TODO(yuyang18): Complete it! - :param event: - :return: - """ - pass - - -class SGD(object): - """ - Simple SGD Trainer. - SGD Trainer combines data reader, network topolopy and update_equation together - to train/test a neural network. - - :param cost: Target cost that neural network should be optimized. - :type cost: paddle.v2.config_base.Layer - :param parameters: The parameters dictionary. - :type parameters: paddle.v2.parameters.Parameters - :param update_equation: The optimizer object. - :type update_equation: paddle.v2.optimizer.Optimizer - :param extra_layers: Some layers in the neural network graph are not - in the path of cost layer. - :type extra_layers: paddle.v2.config_base.Layer - :param is_local: Whether trainning locally - :type is_local: bool - :param pserver_spec: comma string for pserver location, - eg:127.10.0.10:3000,127.10.0.11:3000, - and this parameter is only used for fault - tolerant mode cluster training. - :type pserver_spec: string - :param use_etcd: Whether using etcd pserver. - :param use_etcd: bool - """ - - def __init__(self, - cost, - parameters, - update_equation, - extra_layers=None, - is_local=True, - pserver_spec=None, - use_etcd=True): - - if not isinstance(parameters, v2_parameters.Parameters): - raise TypeError('parameters should be parameters') - - if not isinstance(update_equation, v2_optimizer.Optimizer): - raise TypeError("update equation parameter must be " - "paddle.v2.optimizer.Optimizer") - import py_paddle.swig_paddle as api - topology = Topology(cost, extra_layers=extra_layers) - # HACK(typhoonzero): update ParameterConfig(proto) in case of optimizers - # are defined after layers, or between layers. - topology.update_from_default() - parameters.update_param_conf(topology.proto()) - - self.__optimizer__ = update_equation - self.__topology__ = topology - self.__parameters__ = parameters - self.__topology_in_proto__ = topology.proto() - self.__is_local__ = is_local - self.__pserver_spec__ = pserver_spec - self.__use_etcd__ = use_etcd - - self.__use_sparse_updater__ = self.__topology__.use_sparse_updater() - # # In local mode, disable sparse_remote_update. - if is_local: - for param in self.__topology_in_proto__.parameters: - if param.sparse_remote_update: - param.sparse_remote_update = False - - self.__gm_create_mode__ = api.CREATE_MODE_NORMAL if not \ - self.__use_sparse_updater__ else api.CREATE_MODE_SGD_SPARSE_CPU_TRAINING - self.__data_types__ = topology.data_type() - gm = api.GradientMachine.createFromConfigProto( - self.__topology_in_proto__, self.__gm_create_mode__, - self.__optimizer__.enable_types()) - assert isinstance(gm, api.GradientMachine) - self.__gradient_machine__ = gm - self.__gradient_machine__.randParameters() - self.__parameters__.append_gradient_machine(gm) - self.__parameter_updater__ = None - - def get_topology_proto(self): - return self.__topology_in_proto__ - - def __use_remote_sparse_updater__(self): - return self.__use_sparse_updater__ and not self.__is_local__ - - def __prepare_parameter__(self, in_args): - """ - prepare parameter before forward backward. - 1. When use remote sparse updater, parameters should be got - from ps according to input arguments. - :param in_args: input arguments of this batch. - :return: - """ - if self.__use_remote_sparse_updater__(): - self.__gradient_machine__.prefetch(in_args) - self.__parameter_updater__.getParametersRemote() - - def save_parameter_to_tar(self, f): - self.__parameter_updater__.catchUpWith() - self.__parameter_updater__.apply() - self.__parameter_updater__.getParametersRemote(True, True) - self.__parameters__.to_tar(f) - self.__parameter_updater__.restore() - - def train(self, reader, num_passes=1, event_handler=None, feeding=None): - """ - Training method. Will train num_passes of input data. - - :param reader: A reader that reads and yeilds data items. Usually we use a - batched reader to do mini-batch training. - :type reader: collections.Iterable - :param num_passes: The total train passes. - :param event_handler: Event handler. A method will be invoked when event - occurred. - :type event_handler: (BaseEvent) => None - :param feeding: Feeding is a map of neural network input name and array - index that reader returns. - :type feeding: dict|list - :return: - """ - import py_paddle.swig_paddle as api - from data_feeder import DataFeeder - if event_handler is None: - event_handler = default_event_handler - __check_train_args__(**locals()) - - self.__parameter_updater__ = self.__optimizer__.create_updater( - self.__is_local__, num_passes, self.__use_sparse_updater__, - self.__pserver_spec__, self.__use_etcd__) - self.__parameter_updater__.init(self.__gradient_machine__) - - self.__gradient_machine__.start() - batch_evaluator = self.__gradient_machine__.makeEvaluator() - assert isinstance(batch_evaluator, api.Evaluator) - pass_evaluator = self.__gradient_machine__.makeEvaluator() - assert isinstance(pass_evaluator, api.Evaluator) - out_args = api.Arguments.createArguments(0) - feeder = DataFeeder(self.__data_types__, feeding) - for pass_id in xrange(num_passes): - event_handler(v2_event.BeginPass(pass_id)) - pass_evaluator.start() - self.__parameter_updater__.startPass() - for batch_id, data_batch in enumerate(reader()): - batch_evaluator.start() - event_handler( - v2_event.BeginIteration( - pass_id=pass_id, batch_id=batch_id)) - pass_type = self.__parameter_updater__.startBatch( - len(data_batch)) - in_args = feeder(data_batch) - self.__prepare_parameter__(in_args) - self.__gradient_machine__.forwardBackward(in_args, out_args, - pass_type) - self.__gradient_machine__.eval(pass_evaluator) - self.__gradient_machine__.eval(batch_evaluator) - event_handler( - v2_event.EndForwardBackward( - pass_id=pass_id, - batch_id=batch_id, - gm=self.__gradient_machine__)) - for each_param in self.__gradient_machine__.getNonStaticParameters( - ): - self.__parameter_updater__.update(each_param) - cost_sum = out_args.sum() - cost = cost_sum / len(data_batch) - self.__parameter_updater__.finishBatch(cost) - batch_evaluator.finish() - event_handler( - v2_event.EndIteration( - pass_id=pass_id, - batch_id=batch_id, - cost=cost, - evaluator=batch_evaluator, - gm=self.__gradient_machine__)) - - self.__parameter_updater__.finishPass() - pass_evaluator.finish() - event_handler( - v2_event.EndPass( - pass_id, - evaluator=pass_evaluator, - gm=self.__gradient_machine__)) - self.__gradient_machine__.finish() - - def test(self, reader, feeding=None): - """ - Testing method. Will test input data. - - :param reader: A batch reader that reads and yeilds data items, - it should be a paddle.v2.batch. - :type reader: collections.Iterable - :param feeding: Feeding is a map of neural network input name and array - index that reader returns. - :type feeding: dict - :return: - """ - import py_paddle.swig_paddle as api - from data_feeder import DataFeeder - feeder = DataFeeder(self.__data_types__, feeding) - evaluator = self.__gradient_machine__.makeEvaluator() - out_args = api.Arguments.createArguments(0) - evaluator.start() - total_cost = 0 - num_samples = 0.0 - for data_batch in reader(): - num_samples += len(data_batch) - in_args = feeder(data_batch) - self.__prepare_parameter__(in_args) - self.__gradient_machine__.forward(in_args, out_args, api.PASS_TEST) - total_cost += out_args.sum() - self.__gradient_machine__.eval(evaluator) - - evaluator.finish() - return v2_event.TestResult( - evaluator=evaluator, cost=total_cost / num_samples) - - -def __check_train_args__(reader, event_handler, **kwargs): - """ - Check train function's argument types - """ - if not callable(reader) or not isinstance(reader(), collections.Iterator): - raise TypeError('train_data_reader should be a function, ' - 'which can return a iterator') - if not callable(event_handler): - raise TypeError('event handler should be a function')