# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from paddle.trainer.config_parser import * __all__ = [ 'ParamAttr', 'ExtraAttr', 'ParameterAttribute', 'ExtraLayerAttribute' ] def convert_and_compare(x, Type): """ Convert x to be the same type as Type and then convert back to check whether there is a loss of information :param x: object to be checked :param Type: target type to check x over """ return type(x)(Type(x)) == x def is_compatible_with(x, Type): """ Check if x has a type compatible with Type :param x: object to be checked :param Type: target type to check x over """ if type(x) == Type: return True try: if float == Type or int == Type: # avoid those types that can be converted to float/int but not very # meaningful and could potentially lead to error # i.e., str and bool typed value should not be used for initializing float/int variable if not isinstance(x, str) and not isinstance(x, bool): return convert_and_compare(x, Type) elif bool == Type: # should not use string type to initialize bool variable if not isinstance(x, str): return convert_and_compare(x, Type) else: return False except: return False class ParameterAttribute(object): """ Parameter Attributes object. To fine-tuning network training process, user can set attribute to control training details, such as l1,l2 rate / learning rate / how to init param. NOTE: IT IS A HIGH LEVEL USER INTERFACE. :param is_static: True if this parameter will be fixed while training. :type is_static: bool :param initial_std: Gauss Random initialization standard deviation. None if not using Gauss Random initialize parameter. :type initial_std: float or None :param initial_mean: Gauss Random initialization mean. None if not using Gauss Random initialize parameter. :type initial_mean: float or None :param initial_max: Uniform initialization max value. :type initial_max: float or None :param initial_min: Uniform initialization min value. :type initial_min: float or None :param l1_rate: the l1 regularization factor :type l1_rate: float or None :param l2_rate: the l2 regularization factor :type l2_rate: float or None :param learning_rate: The parameter learning rate. None means 1. The learning rate when optimize is LEARNING_RATE = GLOBAL_LEARNING_RATE * PARAMETER_LEARNING_RATE * SCHEDULER_FACTOR. :type learning_rate: float or None :param momentum: The parameter momentum. None means use global value. :type momentum: float or None :param gradient_clipping_threshold: gradient clipping threshold. If gradient value larger than some value, will be clipped. :type gradient_clipping_threshold: float :param sparse_update: Enable sparse update for this parameter. It will enable both local and remote sparse update. :type sparse_update: bool """ def __init__(self, name=None, is_static=False, initial_std=None, initial_mean=None, initial_max=None, initial_min=None, l1_rate=None, l2_rate=None, learning_rate=None, momentum=None, gradient_clipping_threshold=None, sparse_update=False): # initialize strategy. if is_static: self.attr = {'is_static': True} elif initial_std is None and initial_mean is None and initial_max \ is None and initial_min is None: self.attr = {'initial_smart': True} elif is_compatible_with(initial_std, float) or \ is_compatible_with(initial_mean, float): self.attr = dict() if initial_std is not None: self.attr['initial_std'] = initial_std if initial_mean is not None: self.attr['initial_mean'] = initial_mean self.attr['initial_strategy'] = 0 # Gauss Random elif is_compatible_with(initial_max, float) and \ is_compatible_with(initial_min, float): initial_max = initial_max initial_min = initial_min assert initial_min < initial_max initial_mean = (initial_max + initial_min) / 2 initial_std = initial_mean - initial_min self.attr = dict() self.attr['initial_mean'] = initial_mean self.attr['initial_std'] = initial_std self.attr['initial_strategy'] = 1 # Uniform Random else: raise RuntimeError("Unexpected branch.") if not is_static and is_compatible_with(l1_rate, float): self.attr['decay_rate_l1'] = l1_rate if not is_static and is_compatible_with(l2_rate, float): self.attr['decay_rate'] = l2_rate if not is_static and is_compatible_with(learning_rate, float): self.attr['learning_rate'] = learning_rate if not is_static and is_compatible_with(momentum, float): self.attr['momentum'] = momentum if name is not None: self.attr['parameter_name'] = name if sparse_update: self.attr['sparse_update'] = True self.attr['sparse_remote_update'] = True if gradient_clipping_threshold is not None and \ is_compatible_with(gradient_clipping_threshold, float): self.attr['gradient_clipping_threshold'] = \ gradient_clipping_threshold def set_default_parameter_name(self, name): """ Set default parameter name. If parameter not set, then will use default parameter name. :param name: default parameter name. :type name: basestring """ if 'parameter_name' not in self.attr: self.attr['parameter_name'] = name @staticmethod def to_bias(bias_attr): if isinstance(bias_attr, ParameterAttribute): return Bias(**bias_attr.attr) else: return False class ExtraLayerAttribute(object): """ Some high level layer attributes config. You can set all attributes here, but some layer doesn't support all attributes. If you set an attribute to a layer that not support this attribute, paddle will print an error and core. :param error_clipping_threshold: Error clipping threshold. :type error_clipping_threshold: float :param drop_rate: Dropout rate. Dropout will create a mask on layer output. The dropout rate is the zero rate of this mask. The details of what dropout is please refer to `here `_. :type drop_rate: float :param device: device ID of layer. device=-1, use CPU. device>=0, use GPU. The details allocation in parallel_nn please refer to `here `_. :type device: int """ def __init__(self, error_clipping_threshold=None, drop_rate=None, device=None): self.attr = dict() if isinstance(error_clipping_threshold, float): assert error_clipping_threshold > 0 self.attr["error_clipping_threshold"] = error_clipping_threshold if isinstance(drop_rate, float): assert drop_rate > 0 self.attr["drop_rate"] = drop_rate if isinstance(device, int): self.attr["device"] = device def check(self, layer_name): for key in self.attr: if not hasattr(self, 'can_%s' % key) or \ not getattr(self, 'can_%s' % key): raise NotImplementedError("Layer %s cannot support %s" % (layer_name, key)) @staticmethod def to_kwargs(attr): if attr is None: return dict() else: return attr.attr ParamAttr = ParameterAttribute ExtraAttr = ExtraLayerAttribute