diff --git a/example/resnet101_imagenet2012/train.py b/example/resnet101_imagenet2012/train.py index f8eb2fae94ab4e7c4939319b256338a1f72def56..4cb1959b14be7655b0ad70a36a2e0b7392dc8879 100755 --- a/example/resnet101_imagenet2012/train.py +++ b/example/resnet101_imagenet2012/train.py @@ -14,7 +14,6 @@ # ============================================================================ """train_imagenet.""" import os -import math import argparse import random import numpy as np @@ -34,7 +33,6 @@ from mindspore.communication.management import init import mindspore.nn as nn import mindspore.common.initializer as weight_init from crossentropy import CrossEntropy -from var_init import default_recurisive_init, KaimingNormal random.seed(1) np.random.seed(1) @@ -71,11 +69,13 @@ if __name__ == '__main__': epoch_size = config.epoch_size net = resnet101(class_num=config.class_num) # weight init - default_recurisive_init(net) for _, cell in net.cells_and_names(): if isinstance(cell, nn.Conv2d): - cell.weight.default_input = weight_init.initializer(KaimingNormal(a=math.sqrt(5), - mode='fan_out', nonlinearity='relu'), + cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(), + cell.weight.default_input.shape(), + cell.weight.default_input.dtype()) + if isinstance(cell, nn.Dense): + cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(), cell.weight.default_input.shape(), cell.weight.default_input.dtype()) if not config.label_smooth: diff --git a/example/resnet101_imagenet2012/var_init.py b/example/resnet101_imagenet2012/var_init.py deleted file mode 100755 index 34d8664a4923a0822152c16f8dea357718c0c210..0000000000000000000000000000000000000000 --- a/example/resnet101_imagenet2012/var_init.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""weight initial""" -import math -import numpy as np -from mindspore.common import initializer as init -import mindspore.nn as nn -from mindspore import Tensor - -def calculate_gain(nonlinearity, param=None): - r"""Return the recommended gain value for the given nonlinearity function. - The values are as follows: - ================= ==================================================== - nonlinearity gain - ================= ==================================================== - Linear / Identity :math:`1` - Conv{1,2,3}D :math:`1` - Sigmoid :math:`1` - Tanh :math:`\frac{5}{3}` - ReLU :math:`\sqrt{2}` - Leaky Relu :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}` - ================= ==================================================== - Args: - nonlinearity: the non-linear function (`nn.functional` name) - param: optional parameter for the non-linear function - """ - linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d'] - gain = 0 - if nonlinearity in linear_fns or nonlinearity == 'sigmoid': - gain = 1 - elif nonlinearity == 'tanh': - gain = 5.0 / 3 - elif nonlinearity == 'relu': - gain = math.sqrt(2.0) - elif nonlinearity == 'leaky_relu': - if param is None: - negative_slope = 0.01 - elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float): - # True/False are instances of int, hence check above - negative_slope = param - else: - raise ValueError("negative_slope {} not a valid number".format(param)) - gain = math.sqrt(2.0 / (1 + negative_slope ** 2)) - else: - raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) - return gain - -def _calculate_correct_fan(array, mode): - mode = mode.lower() - valid_modes = ['fan_in', 'fan_out'] - if mode not in valid_modes: - raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes)) - fan_in, fan_out = _calculate_fan_in_and_fan_out(array) - return fan_in if mode == 'fan_in' else fan_out - -def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'): - r"""Fills the input `Tensor` with values according to the method - described in `Delving deep into rectifiers: Surpassing human-level - performance on ImageNet classification` - He, K. et al. (2015), using a - uniform distribution. The resulting tensor will have values sampled from - :math:`\mathcal{U}(-\text{bound}, \text{bound})` where - .. math:: - \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}} - Also known as He initialization. - - Args: - array: an n-dimensional `tensor` - a: the negative slope of the rectifier used after this layer (only - used with ``'leaky_relu'``) - mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'`` - preserves the magnitude of the variance of the weights in the - forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the - backwards pass. - nonlinearity: the non-linear function (`nn.functional` name), - recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default). - """ - fan = _calculate_correct_fan(array, mode) - gain = calculate_gain(nonlinearity, a) - std = gain / math.sqrt(fan) - bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation - return np.random.uniform(-bound, bound, array.shape) - -def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'): - r"""Fills the input `Tensor` with values according to the method - described in `Delving deep into rectifiers: Surpassing human-level - performance on ImageNet classification` - He, K. et al. (2015), using a - normal distribution. The resulting tensor will have values sampled from - :math:`\mathcal{N}(0, \text{std}^2)` where - .. math:: - \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}} - Also known as He initialization. - - Args: - array: an n-dimensional `tensor` - a: the negative slope of the rectifier used after this layer (only - used with ``'leaky_relu'``) - mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'`` - preserves the magnitude of the variance of the weights in the - forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the - backwards pass. - nonlinearity: the non-linear function (`nn.functional` name), - recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default). - """ - fan = _calculate_correct_fan(array, mode) - gain = calculate_gain(nonlinearity, a) - std = gain / math.sqrt(fan) - return np.random.normal(0, std, array.shape) - -def _calculate_fan_in_and_fan_out(array): - """calculate the fan_in and fan_out for input array""" - dimensions = len(array.shape) - if dimensions < 2: - raise ValueError("Fan in and fan out can not be computed for array with fewer than 2 dimensions") - num_input_fmaps = array.shape[1] - num_output_fmaps = array.shape[0] - receptive_field_size = 1 - if dimensions > 2: - receptive_field_size = array[0][0].size - fan_in = num_input_fmaps * receptive_field_size - fan_out = num_output_fmaps * receptive_field_size - return fan_in, fan_out - -def assignment(arr, num): - """Assign the value of num to arr""" - if arr.shape == (): - arr = arr.reshape((1)) - arr[:] = num - arr = arr.reshape(()) - else: - if isinstance(num, np.ndarray): - arr[:] = num[:] - else: - arr[:] = num - return arr - -class KaimingUniform(init.Initializer): - def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'): - super(KaimingUniform, self).__init__() - self.a = a - self.mode = mode - self.nonlinearity = nonlinearity - def _initialize(self, arr): - tmp = kaiming_uniform_(arr, self.a, self.mode, self.nonlinearity) - assignment(arr, tmp) - -class KaimingNormal(init.Initializer): - def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'): - super(KaimingNormal, self).__init__() - self.a = a - self.mode = mode - self.nonlinearity = nonlinearity - def _initialize(self, arr): - tmp = kaiming_normal_(arr, self.a, self.mode, self.nonlinearity) - assignment(arr, tmp) - -def default_recurisive_init(custom_cell): - """weight init for conv2d and dense""" - for _, cell in custom_cell.cells_and_names(): - if isinstance(cell, nn.Conv2d): - cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), - cell.weight.default_input.shape(), - cell.weight.default_input.dtype()) - if cell.bias is not None: - fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy()) - bound = 1 / math.sqrt(fan_in) - cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, - cell.bias.default_input.shape()), - cell.bias.default_input.dtype()) - elif isinstance(cell, nn.Dense): - cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), - cell.weight.default_input.shape(), - cell.weight.default_input.dtype()) - if cell.bias is not None: - fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy()) - bound = 1 / math.sqrt(fan_in) - cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, - cell.bias.default_input.shape()), - cell.bias.default_input.dtype()) - elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)): - pass