diff --git a/demo/darts/model.py b/demo/darts/model.py
index c0ceb198ea129aab10e3faf65a688991cb019c9f..a313337bf681e71d46008c72f9e05b3a0c8c901f 100644
--- a/demo/darts/model.py
+++ b/demo/darts/model.py
@@ -20,7 +20,8 @@ import numpy as np
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import ConstantInitializer, MSRAInitializer
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
+from paddle.nn import Conv2D
+from paddle.fluid.dygraph.nn import Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
from genotypes import PRIMITIVES
from genotypes import Genotype
diff --git a/demo/darts/operations.py b/demo/darts/operations.py
index cf362c1e00049ccdb732340b53316dd5ce7a3192..f63696f7ff7f652c80d3c517dc3b292077ce09a0 100644
--- a/demo/darts/operations.py
+++ b/demo/darts/operations.py
@@ -13,7 +13,8 @@
# limitations under the License.
import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm
+from paddle.nn import Conv2D
+from paddle.fluid.dygraph.nn import Pool2D, BatchNorm
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import ConstantInitializer, MSRAInitializer
@@ -58,10 +59,8 @@ OPS = {
def bn_param_config(affine=False):
- gama = ParamAttr(
- initializer=ConstantInitializer(value=1), trainable=affine)
- beta = ParamAttr(
- initializer=ConstantInitializer(value=0), trainable=affine)
+ gama = ParamAttr(initializer=ConstantInitializer(value=1), trainable=affine)
+ beta = ParamAttr(initializer=ConstantInitializer(value=0), trainable=affine)
return gama, beta
@@ -107,8 +106,7 @@ class FactorizedReduce(fluid.dygraph.Layer):
param_attr=fluid.ParamAttr(initializer=MSRAInitializer()),
bias_attr=False)
gama, beta = bn_param_config(affine)
- self.bn = BatchNorm(
- num_channels=c_out, param_attr=gama, bias_attr=beta)
+ self.bn = BatchNorm(num_channels=c_out, param_attr=gama, bias_attr=beta)
def forward(self, x):
x = fluid.layers.relu(x)
@@ -140,8 +138,7 @@ class SepConv(fluid.dygraph.Layer):
param_attr=fluid.ParamAttr(initializer=MSRAInitializer()),
bias_attr=False)
gama, beta = bn_param_config(affine)
- self.bn1 = BatchNorm(
- num_channels=c_in, param_attr=gama, bias_attr=beta)
+ self.bn1 = BatchNorm(num_channels=c_in, param_attr=gama, bias_attr=beta)
self.conv3 = Conv2D(
num_channels=c_in,
num_filters=c_in,
@@ -257,8 +254,7 @@ class ReLUConvBN(fluid.dygraph.Layer):
param_attr=fluid.ParamAttr(initializer=MSRAInitializer()),
bias_attr=False)
gama, beta = bn_param_config(affine)
- self.bn = BatchNorm(
- num_channels=c_out, param_attr=gama, bias_attr=beta)
+ self.bn = BatchNorm(num_channels=c_out, param_attr=gama, bias_attr=beta)
def forward(self, x):
x = fluid.layers.relu(x)
diff --git a/demo/one_shot/train.py b/demo/one_shot/train.py
index 5e8267ff66f37f5b24807eb86c3bdad7182de2b7..7885470156c3cbe776311717d3e2da31eddf0ec2 100644
--- a/demo/one_shot/train.py
+++ b/demo/one_shot/train.py
@@ -21,7 +21,8 @@ import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
+from paddle.nn import Conv2D
+from paddle.fluid.dygraph.nn import Pool2D, Linear
from paddle.fluid.dygraph.base import to_variable
from paddleslim.nas.one_shot import SuperMnasnet
@@ -142,8 +143,7 @@ def train_mnist(args, model, tokens=None):
epoch_num = args.epoch
BATCH_SIZE = 64
- adam = AdamOptimizer(
- learning_rate=0.001, parameter_list=model.parameters())
+ adam = AdamOptimizer(learning_rate=0.001, parameter_list=model.parameters())
train_reader = paddle.fluid.io.batch(
paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True)
@@ -187,8 +187,7 @@ def train_mnist(args, model, tokens=None):
print("Loss at epoch {} , acc is: {}".format(epoch, test_acc))
save_parameters = (not args.use_data_parallel) or (
- args.use_data_parallel and
- fluid.dygraph.parallel.Env().local_rank == 0)
+ args.use_data_parallel and fluid.dygraph.parallel.Env().local_rank == 0)
if save_parameters:
fluid.save_dygraph(model.state_dict(), "save_temp")
print("checkpoint saved")
diff --git a/paddleslim/models/dygraph/mobilenet.py b/paddleslim/models/dygraph/mobilenet.py
index 16f0aef39ef59697f71b50bde15eb5eb3778e522..f1dca13851eb5276ddafd92d9f3b544f9d686d8a 100755
--- a/paddleslim/models/dygraph/mobilenet.py
+++ b/paddleslim/models/dygraph/mobilenet.py
@@ -24,7 +24,8 @@ import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
+from paddle.nn import Conv2D
+from paddle.fluid.dygraph.nn import Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework
diff --git a/paddleslim/models/dygraph/resnet.py b/paddleslim/models/dygraph/resnet.py
index a33f6f56decfdba28f93282eb62adbdb185ede4a..7b9feae920ae1681570b0ca31d21b5da4d80f8b5 100644
--- a/paddleslim/models/dygraph/resnet.py
+++ b/paddleslim/models/dygraph/resnet.py
@@ -15,7 +15,8 @@
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
+from paddle.nn import Conv2D
+from paddle.fluid.dygraph.nn import Pool2D, BatchNorm, Linear
class ConvBNLayer(fluid.dygraph.Layer):
@@ -114,11 +115,7 @@ class ResNet(fluid.dygraph.Layer):
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
- num_channels=3,
- num_filters=64,
- filter_size=7,
- stride=1,
- act='relu')
+ num_channels=3, num_filters=64, filter_size=7, stride=1, act='relu')
self.pool2d_max = Pool2D(
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
diff --git a/paddleslim/nas/darts/search_space/conv_bert/model/bert.py b/paddleslim/nas/darts/search_space/conv_bert/model/bert.py
index 38de45e6d88058800b4880d7d2354f5a2b5605f9..87d584bc41f0cdb5e9c65030cae0ff6d209b509f 100755
--- a/paddleslim/nas/darts/search_space/conv_bert/model/bert.py
+++ b/paddleslim/nas/darts/search_space/conv_bert/model/bert.py
@@ -23,8 +23,10 @@ import json
import numpy as np
import paddle
import paddle.fluid as fluid
-from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
+from paddle.nn import Conv2D
+from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer
+from paddle.fluid.dygraph import Pool2D, BatchNorm, Linear
+from paddle.fluid.dygraph import to_variable, guard
from paddle.fluid import ParamAttr
from paddle.fluid.initializer import MSRA
from .transformer_encoder import EncoderLayer
diff --git a/paddleslim/nas/darts/search_space/conv_bert/model/transformer_encoder.py b/paddleslim/nas/darts/search_space/conv_bert/model/transformer_encoder.py
index 86a5db277b48c36aadf3c651189408d1c8162db4..ab99b96c5ef8d250296f9fae6f71142ad9eeeb5e 100755
--- a/paddleslim/nas/darts/search_space/conv_bert/model/transformer_encoder.py
+++ b/paddleslim/nas/darts/search_space/conv_bert/model/transformer_encoder.py
@@ -22,8 +22,9 @@ from collections.abc import Iterable
import paddle
import paddle.fluid as fluid
+from paddle.nn import Conv2D
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear
-from paddle.fluid.dygraph import Conv2D, BatchNorm, Pool2D
+from paddle.fluid.dygraph import BatchNorm, Pool2D
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import to_variable
from paddle.fluid.initializer import NormalInitializer
diff --git a/paddleslim/nas/ofa/__init__.py b/paddleslim/nas/ofa/__init__.py
index 21e19995aedd48d1a048aad0dca86d54b2275a38..a4a565ed283d2354f675ff3460e4d82a93c92b5a 100644
--- a/paddleslim/nas/ofa/__init__.py
+++ b/paddleslim/nas/ofa/__init__.py
@@ -16,10 +16,4 @@ from .ofa import OFA, RunConfig, DistillConfig
from .convert_super import supernet
from .utils.special_config import *
from .get_sub_model import *
-
-from .utils.utils import get_paddle_version
-pd_ver = get_paddle_version()
-if pd_ver == 185:
- from .layers_old import *
-else:
- from .layers import *
+from .layers import *
diff --git a/paddleslim/nas/ofa/convert_super.py b/paddleslim/nas/ofa/convert_super.py
index 580f6b656f18ebb519adcc1d9ef6790858d2d266..c6d47ed85a710dd49ee417165ca8a1c20de8fcaf 100644
--- a/paddleslim/nas/ofa/convert_super.py
+++ b/paddleslim/nas/ofa/convert_super.py
@@ -18,24 +18,15 @@ import logging
import numbers
import paddle
from ...common import get_logger
+import paddle.nn as nn
+from paddle.nn import Conv2D, Conv2DTranspose, Linear, LayerNorm, Embedding, SyncBatchNorm
+from paddle import ParamAttr
from .utils.utils import get_paddle_version
pd_ver = get_paddle_version()
-if pd_ver == 185:
- import paddle.fluid.dygraph.nn as nn
- from paddle.fluid.dygraph.nn import Conv2D, Conv2DTranspose, Linear, LayerNorm, Embedding
- from paddle.fluid import ParamAttr
- from .layers_old import *
- from . import layers_old as layers
- Layer = paddle.fluid.dygraph.Layer
-else:
- import paddle.nn as nn
- from paddle.nn import Conv2D, Conv2DTranspose, Linear, LayerNorm, Embedding, SyncBatchNorm
- from paddle import ParamAttr
- from .layers import *
- from . import layers
- Layer = paddle.nn.Layer
+from .layers import *
+from . import layers
+from paddle.nn import Layer
from .layers_base import Block
-from . import layers_old
_logger = get_logger(__name__, level=logging.INFO)
__all__ = ['supernet', 'Convert']
diff --git a/paddleslim/nas/ofa/layers.py b/paddleslim/nas/ofa/layers.py
index b79cbc45e1474a7fdc401449652925c121f1c1b2..717255eef9896e300a1a2cc111a099f4debbd842 100644
--- a/paddleslim/nas/ofa/layers.py
+++ b/paddleslim/nas/ofa/layers.py
@@ -994,9 +994,9 @@ class SuperBatchNorm2D(nn.BatchNorm2D):
if in_dygraph_mode():
if feature_dim != self._mean.shape[0]:
batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm(
- input, weight, bias, mean, variance, self._momentum,
- self._epsilon, self._data_format, not self.training,
- self._use_global_stats, trainable_statistics, False, False)
+ input, mean, variance, weight, bias, not self.training,
+ self._momentum, self._epsilon, self._data_format,
+ self._use_global_stats, trainable_statistics)
self._mean[:feature_dim].set_value(mean)
self._variance[:feature_dim].set_value(variance)
mean_out[:feature_dim].set_value(mean_out_tmp)
@@ -1004,9 +1004,9 @@ class SuperBatchNorm2D(nn.BatchNorm2D):
return batch_norm_out
else:
batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm(
- input, weight, bias, mean, variance, self._momentum,
- self._epsilon, self._data_format, not self.training,
- self._use_global_stats, trainable_statistics, False)
+ input, mean, variance, weight, bias, not self.training,
+ self._momentum, self._epsilon, self._data_format,
+ self._use_global_stats, trainable_statistics)
return batch_norm_out
elif _in_legacy_dygraph():
diff --git a/paddleslim/nas/ofa/layers_old.py b/paddleslim/nas/ofa/layers_old.py
deleted file mode 100644
index bf7839e451132d85aa06678fc72709f79e81b1c0..0000000000000000000000000000000000000000
--- a/paddleslim/nas/ofa/layers_old.py
+++ /dev/null
@@ -1,1140 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-### NOTE: the API of this file is based on Paddle1.8, the API in layers.py is based on Paddle2.0
-
-import numpy as np
-import logging
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-import paddle.fluid.dygraph_utils as dygraph_utils
-from paddle.fluid.data_feeder import check_variable_and_dtype
-from paddle.fluid.framework import _varbase_creator, in_dygraph_mode, _in_legacy_dygraph, _non_static_mode
-from paddle import _C_ops, _legacy_C_ops
-from paddle.fluid.data_feeder import check_variable_and_dtype
-from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper
-from paddle.fluid.dygraph.nn import InstanceNorm, Conv2D, Conv2DTranspose, BatchNorm
-
-from ...common import get_logger
-from .utils.utils import compute_start_end, get_same_padding, convert_to_list
-from .layers_base import *
-
-__all__ = [
- 'SuperConv2D', 'SuperConv2DTranspose', 'SuperSeparableConv2D',
- 'SuperBatchNorm', 'SuperLinear', 'SuperInstanceNorm', 'SuperGroupConv2D',
- 'SuperDepthwiseConv2D', 'SuperGroupConv2DTranspose',
- 'SuperDepthwiseConv2DTranspose', 'SuperLayerNorm', 'SuperEmbedding'
-]
-
-_logger = get_logger(__name__, level=logging.INFO)
-
-### TODO: if task is elastic width, need to add re_organize_middle_weight in 1x1 conv in MBBlock
-
-
-class SuperConv2D(fluid.dygraph.Conv2D):
- """
- This interface is used to construct a callable object of the ``SuperConv2D`` class.
- The difference between ```SuperConv2D``` and ```Conv2D``` is: ```SuperConv2D``` need
- to feed a config dictionary with the format of {'channel', num_of_channel} represents
- the channels of the outputs, used to change the first dimension of weight and bias,
- only train the first channels of the weight and bias.
-
- Note: the channel in config need to less than first defined.
-
- The super convolution2D layer calculates the output based on the input, filter
- and strides, paddings, dilations, groups parameters. Input and
- Output are in NCHW format, where N is batch size, C is the number of
- the feature map, H is the height of the feature map, and W is the width of the feature map.
- Filter's shape is [MCHW] , where M is the number of output feature map,
- C is the number of input feature map, H is the height of the filter,
- and W is the width of the filter. If the groups is greater than 1,
- C will equal the number of input feature map divided by the groups.
- Please refer to UFLDL's `convolution
- `_
- for more details.
- If bias attribution and activation type are provided, bias is added to the
- output of the convolution, and the corresponding activation function is
- applied to the final result.
- For each input :math:`X`, the equation is:
- .. math::
- Out = \\sigma (W \\ast X + b)
- Where:
- * :math:`X`: Input value, a ``Tensor`` with NCHW format.
- * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] .
- * :math:`\\ast`: Convolution operation.
- * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1].
- * :math:`\\sigma`: Activation function.
- * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
-
- Example:
- - Input:
- Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
- Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
- - Output:
- Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
- Where
- .. math::
- H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
- W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
- Parameters:
- num_channels(int): The number of channels in the input image.
- num_filters(int): The number of filter. It is as same as the output
- feature map.
- filter_size (int or tuple): The filter size. If filter_size is a tuple,
- it must contain two integers, (filter_size_H, filter_size_W).
- Otherwise, the filter will be a square.
- candidate_config(dict, optional): Dictionary descripts candidate config of this layer,
- such as {'kernel_size': (3, 5, 7), 'channel': (4, 6, 8)}, means the kernel size of
- this layer can be choose from (3, 5, 7), the key of candidate_config
- only can be 'kernel_size', 'channel' and 'expand_ratio', 'channel' and 'expand_ratio'
- CANNOT be set at the same time. Default: None.
- transform_kernel(bool, optional): Whether to use transform matrix to transform a large filter
- to a small filter. Default: False.
- stride (int or tuple, optional): The stride size. If stride is a tuple, it must
- contain two integers, (stride_H, stride_W). Otherwise, the
- stride_H = stride_W = stride. Default: 1.
- padding (int or tuple, optional): The padding size. If padding is a tuple, it must
- contain two integers, (padding_H, padding_W). Otherwise, the
- padding_H = padding_W = padding. Default: 0.
- dilation (int or tuple, optional): The dilation size. If dilation is a tuple, it must
- contain two integers, (dilation_H, dilation_W). Otherwise, the
- dilation_H = dilation_W = dilation. Default: 1.
- groups (int, optional): The groups number of the Conv2d Layer. According to grouped
- convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
- the first half of the filters is only connected to the first half
- of the input channels, while the second half of the filters is only
- connected to the second half of the input channels. Default: 1.
- param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
- of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
- will create ParamAttr as param_attr. If the Initializer of the param_attr
- is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
- and the :math:`std` is :math:`(\\frac{2.0 }{filter\\_elem\\_num})^{0.5}`. Default: None.
- bias_attr (ParamAttr or bool, optional): The attribute for the bias of conv2d.
- If it is set to False, no bias will be added to the output units.
- If it is set to None or one attribute of ParamAttr, conv2d
- will create ParamAttr as bias_attr. If the Initializer of the bias_attr
- is not set, the bias is initialized zero. Default: None.
- use_cudnn (bool, optional): Use cudnn kernel or not, it is valid only when the cudnn
- library is installed. Default: True.
- act (str, optional): Activation type, if it is set to None, activation is not appended.
- Default: None.
- dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
- Attribute:
- **weight** (Parameter): the learnable weights of filter of this layer.
- **bias** (Parameter or None): the learnable bias of this layer.
- Returns:
- None
-
- Raises:
- ValueError: if ``use_cudnn`` is not a bool value.
- Examples:
- .. code-block:: python
- from paddle.fluid.dygraph.base import to_variable
- import paddle.fluid as fluid
- from paddleslim.core.layers import SuperConv2D
- import numpy as np
- data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
- with fluid.dygraph.guard():
- super_conv2d = SuperConv2D(3, 10, 3)
- config = {'channel': 5}
- data = to_variable(data)
- conv = super_conv2d(data, config)
-
- """
-
- ### NOTE: filter_size, num_channels and num_filters must be the max of candidate to define a largest network.
- def __init__(self,
- num_channels,
- num_filters,
- filter_size,
- candidate_config={},
- transform_kernel=False,
- stride=1,
- dilation=1,
- padding=0,
- groups=None,
- param_attr=None,
- bias_attr=None,
- use_cudnn=True,
- act=None,
- dtype='float32'):
- ### NOTE: padding always is 0, add padding in forward because of kernel size is uncertain
- super(SuperConv2D, self).__init__(
- num_channels, num_filters, filter_size, stride, padding, dilation,
- groups, param_attr, bias_attr, use_cudnn, act, dtype)
-
- if isinstance(self._filter_size, int):
- self._filter_size = convert_to_list(self._filter_size, 2)
-
- self.candidate_config = candidate_config
- if len(candidate_config.items()) != 0:
- for k, v in candidate_config.items():
- candidate_config[k] = list(set(v))
-
- self.ks_set = candidate_config[
- 'kernel_size'] if 'kernel_size' in candidate_config else None
-
- self.expand_ratio = candidate_config[
- 'expand_ratio'] if 'expand_ratio' in candidate_config else None
- self.channel = candidate_config[
- 'channel'] if 'channel' in candidate_config else None
- self.base_channel = self._num_filters
- if self.expand_ratio != None:
- self.base_channel = int(self._num_filters / max(self.expand_ratio))
-
- self.transform_kernel = transform_kernel
- if self.ks_set != None:
- self.ks_set.sort()
- if self.transform_kernel != False:
- scale_param = dict()
- ### create parameter to transform kernel
- for i in range(len(self.ks_set) - 1):
- ks_small = self.ks_set[i]
- ks_large = self.ks_set[i + 1]
- param_name = '%dto%d_matrix' % (ks_large, ks_small)
- ks_t = ks_small**2
- scale_param[param_name] = self.create_parameter(
- attr=fluid.ParamAttr(
- name=self._full_name + param_name,
- initializer=fluid.initializer.NumpyArrayInitializer(
- np.eye(ks_t))),
- shape=(ks_t, ks_t),
- dtype=self._dtype)
-
- for name, param in scale_param.items():
- setattr(self, name, param)
-
- def get_active_filter(self, in_nc, out_nc, kernel_size):
- ### Unsupport for asymmetric kernels
- if self._filter_size[0] != self._filter_size[1]:
- return self.weight[:out_nc, :in_nc, :, :]
- start, end = compute_start_end(self._filter_size[0], kernel_size)
- ### if NOT transform kernel, intercept a center filter with kernel_size from largest filter
- filters = self.weight[:out_nc, :in_nc, start:end, start:end]
- if self.transform_kernel != False and kernel_size < self._filter_size[
- 0]:
- ### if transform kernel, then use matrix to transform
- start_filter = self.weight[:out_nc, :in_nc, :, :]
- for i in range(len(self.ks_set) - 1, 0, -1):
- src_ks = self.ks_set[i]
- if src_ks <= kernel_size:
- break
- target_ks = self.ks_set[i - 1]
- start, end = compute_start_end(src_ks, target_ks)
- _input_filter = start_filter[:, :, start:end, start:end]
- _input_filter = fluid.layers.reshape(
- _input_filter,
- shape=[(_input_filter.shape[0] * _input_filter.shape[1]),
- -1])
- _tmp_filter = _varbase_creator(dtype=_input_filter.dtype)
- if _non_static_mode():
- _legacy_C_ops.matmul(_input_filter,
- self.__getattr__('%dto%d_matrix' %
- (src_ks, target_ks)),
- _tmp_filter, 'transpose_X', False,
- 'transpose_Y', False, "alpha", 1)
-
- _tmp_filter = fluid.layers.reshape(
- _tmp_filter,
- shape=[
- filters.shape[0], filters.shape[1], target_ks, target_ks
- ])
- start_filter = _tmp_filter
- filters = start_filter
- return filters
-
- def get_groups_in_out_nc(self, in_nc, out_nc):
- if self._groups == 1 or self._groups == None:
- ### standard conv
- return self._groups, in_nc, out_nc
- elif self._groups == self._num_channels:
- ### depthwise convolution
- if in_nc != out_nc:
- _logger.debug(
- "input channel and output channel in depthwise conv is different, change output channel to input channel! origin channel:(in_nc {}, out_nc {}): ".
- format(in_nc, out_nc))
- groups = in_nc
- out_nc = in_nc
- return groups, in_nc, out_nc
- else:
- ### groups convolution
- ### conv: weight: (Cout, Cin/G, Kh, Kw)
- groups = self._groups
- in_nc = int(in_nc // groups)
- return groups, in_nc, out_nc
-
- def forward(self, input, kernel_size=None, expand_ratio=None, channel=None):
- self.cur_config = {
- 'kernel_size': kernel_size,
- 'expand_ratio': expand_ratio,
- 'channel': channel
- }
- in_nc = int(input.shape[1])
- assert (
- expand_ratio == None or channel == None
- ), "expand_ratio and channel CANNOT be NOT None at the same time."
- if expand_ratio != None:
- out_nc = int(expand_ratio * self.base_channel)
- elif channel != None:
- out_nc = int(channel)
- else:
- out_nc = self._num_filters
- ks = int(self._filter_size[0]) if kernel_size == None else int(
- kernel_size)
-
- if kernel_size is not None and self._filter_size[
- 0] != self._filter_size[1]:
- _logger.error("Searching for asymmetric kernels is NOT supported")
-
- groups, weight_in_nc, weight_out_nc = self.get_groups_in_out_nc(in_nc,
- out_nc)
-
- weight = self.get_active_filter(weight_in_nc, weight_out_nc, ks)
-
- if kernel_size != None or 'kernel_size' in self.candidate_config.keys():
- padding = convert_to_list(get_same_padding(ks), 2)
- else:
- padding = self._padding
-
- if self._l_type == 'conv2d':
- attrs = ('strides', self._stride, 'paddings', padding, 'dilations',
- self._dilation, 'groups', groups
- if groups else 1, 'use_cudnn', self._use_cudnn)
- if in_dygraph_mode():
- out = _C_ops.conv2d(
- input, weight, self._stride, padding, "EXPLICIT", groups
- if groups else 1, self._dilation, "NCHW", False, -1, False)
- elif _in_legacy_dygraph():
- out = _legacy_C_ops.conv2d(input, weight, *attrs)
- elif self._l_type == 'depthwise_conv2d':
- attrs = ('strides', self._stride, 'paddings', padding, 'dilations',
- self._dilation, 'groups', groups
- if groups else self._groups, 'use_cudnn', self._use_cudnn)
- out = core.ops.depthwise_conv2d(input, weight, *attrs)
- else:
- raise ValueError("conv type error")
-
- pre_bias = out
- out_nc = int(pre_bias.shape[1])
- if self.bias is not None:
- bias = self.bias[:out_nc]
- pre_act = dygraph_utils._append_bias_in_dygraph(pre_bias, bias, 1)
- else:
- pre_act = pre_bias
-
- return dygraph_utils._append_activation_in_dygraph(pre_act, self._act)
-
-
-class SuperGroupConv2D(SuperConv2D):
- def get_groups_in_out_nc(self, in_nc, out_nc):
- ### groups convolution
- ### conv: weight: (Cout, Cin/G, Kh, Kw)
- groups = self._groups
- in_nc = int(in_nc // groups)
- return groups, in_nc, out_nc
-
-
-class SuperDepthwiseConv2D(SuperConv2D):
- ### depthwise convolution
- def get_groups_in_out_nc(self, in_nc, out_nc):
- if in_nc != out_nc:
- _logger.debug(
- "input channel and output channel in depthwise conv is different, change output channel to input channel! origin channel:(in_nc {}, out_nc {}): ".
- format(in_nc, out_nc))
- groups = in_nc
- out_nc = in_nc
- return groups, in_nc, out_nc
-
-
-class SuperConv2DTranspose(fluid.dygraph.Conv2DTranspose):
- """
- This interface is used to construct a callable object of the ``SuperConv2DTranspose``
- class.
- The difference between ```SuperConv2DTranspose``` and ```Conv2DTranspose``` is:
- ```SuperConv2DTranspose``` need to feed a config dictionary with the format of
- {'channel', num_of_channel} represents the channels of the outputs, used to change
- the first dimension of weight and bias, only train the first channels of the weight
- and bias.
-
- Note: the channel in config need to less than first defined.
-
- The super convolution2D transpose layer calculates the output based on the input,
- filter, and dilations, strides, paddings. Input and output
- are in NCHW format. Where N is batch size, C is the number of feature map,
- H is the height of the feature map, and W is the width of the feature map.
- Filter's shape is [MCHW] , where M is the number of input feature map,
- C is the number of output feature map, H is the height of the filter,
- and W is the width of the filter. If the groups is greater than 1,
- C will equal the number of input feature map divided by the groups.
- If bias attribution and activation type are provided, bias is added to
- the output of the convolution, and the corresponding activation function
- is applied to the final result.
- The details of convolution transpose layer, please refer to the following explanation and references
- `conv2dtranspose `_ .
- For each input :math:`X`, the equation is:
- .. math::
- Out = \\sigma (W \\ast X + b)
- Where:
- * :math:`X`: Input value, a ``Tensor`` with NCHW format.
- * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] .
- * :math:`\\ast`: Convolution operation.
- * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1].
- * :math:`\\sigma`: Activation function.
- * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
- Example:
- - Input:
- Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
- Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
- - Output:
- Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
- Where
- .. math::
- H^\\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
- W^\\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\
- H_{out} &\\in [ H^\\prime_{out}, H^\\prime_{out} + strides[0] ) \\\\
- W_{out} &\\in [ W^\\prime_{out}, W^\\prime_{out} + strides[1] )
- Parameters:
- num_channels(int): The number of channels in the input image.
- num_filters(int): The number of the filter. It is as same as the output
- feature map.
- filter_size(int or tuple): The filter size. If filter_size is a tuple,
- it must contain two integers, (filter_size_H, filter_size_W).
- Otherwise, the filter will be a square.
- candidate_config(dict, optional): Dictionary descripts candidate config of this layer,
- such as {'kernel_size': (3, 5, 7), 'channel': (4, 6, 8)}, means the kernel size of
- this layer can be choose from (3, 5, 7), the key of candidate_config
- only can be 'kernel_size', 'channel' and 'expand_ratio', 'channel' and 'expand_ratio'
- CANNOT be set at the same time. Default: None.
- transform_kernel(bool, optional): Whether to use transform matrix to transform a large filter
- to a small filter. Default: False.
- output_size(int or tuple, optional): The output image size. If output size is a
- tuple, it must contain two integers, (image_H, image_W). None if use
- filter_size, padding, and stride to calculate output_size.
- if output_size and filter_size are specified at the same time, They
- should follow the formula above. Default: None.
- padding(int or tuple, optional): The padding size. If padding is a tuple, it must
- contain two integers, (padding_H, padding_W). Otherwise, the
- padding_H = padding_W = padding. Default: 0.
- stride(int or tuple, optional): The stride size. If stride is a tuple, it must
- contain two integers, (stride_H, stride_W). Otherwise, the
- stride_H = stride_W = stride. Default: 1.
- dilation(int or tuple, optional): The dilation size. If dilation is a tuple, it must
- contain two integers, (dilation_H, dilation_W). Otherwise, the
- dilation_H = dilation_W = dilation. Default: 1.
- groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by
- grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
- when group=2, the first half of the filters is only connected to the
- first half of the input channels, while the second half of the
- filters is only connected to the second half of the input channels.
- Default: 1.
- param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
- of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose
- will create ParamAttr as param_attr. If the Initializer of the param_attr
- is not set, the parameter is initialized with Xavier. Default: None.
- bias_attr (ParamAttr or bool, optional): The attribute for the bias of conv2d_transpose.
- If it is set to False, no bias will be added to the output units.
- If it is set to None or one attribute of ParamAttr, conv2d_transpose
- will create ParamAttr as bias_attr. If the Initializer of the bias_attr
- is not set, the bias is initialized zero. Default: None.
- use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn
- library is installed. Default: True.
- act (str, optional): Activation type, if it is set to None, activation is not appended.
- Default: None.
- dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
- Attribute:
- **weight** (Parameter): the learnable weights of filters of this layer.
- **bias** (Parameter or None): the learnable bias of this layer.
- Returns:
- None
- Examples:
- .. code-block:: python
- import paddle.fluid as fluid
- from paddleslim.core.layers import SuperConv2DTranspose
- import numpy as np
- with fluid.dygraph.guard():
- data = np.random.random((3, 32, 32, 5)).astype('float32')
- config = {'channel': 5
- super_convtranspose = SuperConv2DTranspose(num_channels=32, num_filters=10, filter_size=3)
- ret = super_convtranspose(fluid.dygraph.base.to_variable(data), config)
- """
-
- def __init__(self,
- num_channels,
- num_filters,
- filter_size,
- output_size=None,
- candidate_config={},
- transform_kernel=False,
- stride=1,
- dilation=1,
- padding=0,
- groups=None,
- param_attr=None,
- bias_attr=None,
- use_cudnn=True,
- act=None,
- dtype='float32'):
- super(SuperConv2DTranspose, self).__init__(
- num_channels, num_filters, filter_size, output_size, padding,
- stride, dilation, groups, param_attr, bias_attr, use_cudnn, act,
- dtype)
- self.candidate_config = candidate_config
- if len(self.candidate_config.items()) != 0:
- for k, v in candidate_config.items():
- candidate_config[k] = list(set(v))
- self.ks_set = candidate_config[
- 'kernel_size'] if 'kernel_size' in candidate_config else None
-
- if isinstance(self._filter_size, int):
- self._filter_size = convert_to_list(self._filter_size, 2)
-
- self.expand_ratio = candidate_config[
- 'expand_ratio'] if 'expand_ratio' in candidate_config else None
- self.channel = candidate_config[
- 'channel'] if 'channel' in candidate_config else None
- self.base_channel = self._num_filters
- if self.expand_ratio:
- self.base_channel = int(self._num_filters / max(self.expand_ratio))
-
- self.transform_kernel = transform_kernel
- if self.ks_set != None:
- self.ks_set.sort()
- if self.transform_kernel != False:
- scale_param = dict()
- ### create parameter to transform kernel
- for i in range(len(self.ks_set) - 1):
- ks_small = self.ks_set[i]
- ks_large = self.ks_set[i + 1]
- param_name = '%dto%d_matrix' % (ks_large, ks_small)
- ks_t = ks_small**2
- scale_param[param_name] = self.create_parameter(
- attr=fluid.ParamAttr(
- name=self._full_name + param_name,
- initializer=fluid.initializer.NumpyArrayInitializer(
- np.eye(ks_t))),
- shape=(ks_t, ks_t),
- dtype=self._dtype)
-
- for name, param in scale_param.items():
- setattr(self, name, param)
-
- def get_active_filter(self, in_nc, out_nc, kernel_size):
- ### Unsupport for asymmetric kernels
- if self._filter_size[0] != self._filter_size[1]:
- return self.weight[:out_nc, :in_nc, :, :]
- start, end = compute_start_end(self._filter_size[0], kernel_size)
- filters = self.weight[:in_nc, :out_nc, start:end, start:end]
- if self.transform_kernel != False and kernel_size < self._filter_size[
- 0]:
- start_filter = self.weight[:in_nc, :out_nc, :, :]
- for i in range(len(self.ks_set) - 1, 0, -1):
- src_ks = self.ks_set[i]
- if src_ks <= kernel_size:
- break
- target_ks = self.ks_set[i - 1]
- start, end = compute_start_end(src_ks, target_ks)
- _input_filter = start_filter[:, :, start:end, start:end]
- _input_filter = fluid.layers.reshape(
- _input_filter,
- shape=[(_input_filter.shape[0] * _input_filter.shape[1]),
- -1])
- _tmp_filter = _varbase_creator(dtype=_input_filter.dtype)
- if _non_static_mode():
- _legacy_C_ops.matmul(_input_filter,
- self.__getattr__('%dto%d_matrix' %
- (src_ks, target_ks)),
- _tmp_filter, 'transpose_X', False,
- 'transpose_Y', False, "alpha", 1)
-
- _tmp_filter = fluid.layers.reshape(
- _tmp_filter,
- shape=[
- filters.shape[0], filters.shape[1], target_ks, target_ks
- ])
- start_filter = _tmp_filter
- filters = start_filter
- return filters
-
- def get_groups_in_out_nc(self, in_nc, out_nc):
- if self._groups == 1 or self._groups == None:
- ### standard conv
- return self._groups, in_nc, out_nc
- elif self._groups == self._num_channels:
- ### depthwise convolution
- if in_nc != out_nc:
- _logger.debug(
- "input channel and output channel in depthwise conv is different, change output channel to input channel! origin channel:(in_nc {}, out_nc {}): ".
- format(in_nc, out_nc))
- groups = in_nc
- out_nc = in_nc
- return groups, in_nc, out_nc
- else:
- ### groups convolution
- ### groups conv transpose: weight: (Cin, Cout/G, Kh, Kw)
- groups = self._groups
- out_nc = int(out_nc // groups)
- return groups, in_nc, out_nc
-
- def forward(self, input, kernel_size=None, expand_ratio=None, channel=None):
- self.cur_config = {
- 'kernel_size': kernel_size,
- 'expand_ratio': expand_ratio,
- 'channel': channel
- }
- in_nc = int(input.shape[1])
- assert (
- expand_ratio == None or channel == None
- ), "expand_ratio and channel CANNOT be NOT None at the same time."
- if expand_ratio != None:
- out_nc = int(expand_ratio * self.base_channel)
- elif channel != None:
- out_nc = int(channel)
- else:
- out_nc = self._num_filters
-
- ks = int(self._filter_size[0]) if kernel_size == None else int(
- kernel_size)
-
- if kernel_size is not None and self._filter_size[
- 0] != self._filter_size[1]:
- _logger.error("Searching for asymmetric kernels is NOT supported")
-
- groups, weight_in_nc, weight_out_nc = self.get_groups_in_out_nc(in_nc,
- out_nc)
-
- weight = self.get_active_filter(weight_in_nc, weight_out_nc, ks)
- if kernel_size != None or 'kernel_size' in self.candidate_config.keys():
- padding = convert_to_list(get_same_padding(ks), 2)
- else:
- padding = self._padding
-
- if _non_static_mode():
- op = getattr(_legacy_C_ops, self._op_type)
- out = op(input, weight, 'output_size', self._output_size, 'strides',
- self._stride, 'paddings', padding, 'dilations',
- self._dilation, 'groups', groups, 'use_cudnn',
- self._use_cudnn)
-
- pre_bias = out
- out_nc = int(pre_bias.shape[1])
- if self.bias is not None:
- bias = self.bias[:out_nc]
- pre_act = dygraph_utils._append_bias_in_dygraph(pre_bias, bias, 1)
- else:
- pre_act = pre_bias
-
- return dygraph_utils._append_activation_in_dygraph(
- pre_act, act=self._act)
-
-
-class SuperGroupConv2DTranspose(SuperConv2DTranspose):
- def get_groups_in_out_nc(self, in_nc, out_nc):
- ### groups convolution
- ### groups conv transpose: weight: (Cin, Cout/G, Kh, Kw)
- groups = self._groups
- out_nc = int(out_nc // groups)
- return groups, in_nc, out_nc
-
-
-class SuperDepthwiseConv2DTranspose(SuperConv2DTranspose):
- def get_groups_in_out_nc(self, in_nc, out_nc):
- if in_nc != out_nc:
- _logger.debug(
- "input channel and output channel in depthwise conv transpose is different, change output channel to input channel! origin channel:(in_nc {}, out_nc {}): ".
- format(in_nc, out_nc))
- groups = in_nc
- out_nc = in_nc
- return groups, in_nc, out_nc
-
-
-### NOTE: only search channel, write for GAN-compression, maybe change to SuperDepthwiseConv and SuperConv after.
-class SuperSeparableConv2D(fluid.dygraph.Layer):
- """
- This interface is used to construct a callable object of the ``SuperSeparableConv2D``
- class.
- The difference between ```SuperSeparableConv2D``` and ```SeparableConv2D``` is:
- ```SuperSeparableConv2D``` need to feed a config dictionary with the format of
- {'channel', num_of_channel} represents the channels of the first conv's outputs and
- the second conv's inputs, used to change the first dimension of weight and bias,
- only train the first channels of the weight and bias.
-
- The architecture of super separable convolution2D op is [Conv2D, norm layer(may be BatchNorm
- or InstanceNorm), Conv2D]. The first conv is depthwise conv, the filter number is input channel
- multiply scale_factor, the group is equal to the number of input channel. The second conv
- is standard conv, which filter size and stride size are 1.
-
- Parameters:
- num_channels(int): The number of channels in the input image.
- num_filters(int): The number of the second conv's filter. It is as same as the output
- feature map.
- filter_size(int or tuple): The first conv's filter size. If filter_size is a tuple,
- it must contain two integers, (filter_size_H, filter_size_W).
- Otherwise, the filter will be a square.
- padding(int or tuple, optional): The first conv's padding size. If padding is a tuple,
- it must contain two integers, (padding_H, padding_W). Otherwise, the
- padding_H = padding_W = padding. Default: 0.
- stride(int or tuple, optional): The first conv's stride size. If stride is a tuple,
- it must contain two integers, (stride_H, stride_W). Otherwise, the
- stride_H = stride_W = stride. Default: 1.
- dilation(int or tuple, optional): The first conv's dilation size. If dilation is a tuple,
- it must contain two integers, (dilation_H, dilation_W). Otherwise, the
- dilation_H = dilation_W = dilation. Default: 1.
- norm_layer(class): The normalization layer between two convolution. Default: InstanceNorm.
- bias_attr (ParamAttr or bool, optional): The attribute for the bias of convolution.
- If it is set to False, no bias will be added to the output units.
- If it is set to None or one attribute of ParamAttr, convolution
- will create ParamAttr as bias_attr. If the Initializer of the bias_attr
- is not set, the bias is initialized zero. Default: None.
- scale_factor(float): The scale factor of the first conv's output channel. Default: 1.
- use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn
- library is installed. Default: True.
- Returns:
- None
- """
-
- def __init__(self,
- num_channels,
- num_filters,
- filter_size,
- candidate_config={},
- stride=1,
- padding=0,
- dilation=1,
- norm_layer=InstanceNorm,
- bias_attr=None,
- scale_factor=1,
- use_cudnn=False):
- super(SuperSeparableConv2D, self).__init__()
- self.conv = fluid.dygraph.LayerList([
- fluid.dygraph.nn.Conv2D(
- num_channels=num_channels,
- num_filters=num_channels * scale_factor,
- filter_size=filter_size,
- stride=stride,
- padding=padding,
- use_cudnn=False,
- groups=num_channels,
- bias_attr=bias_attr)
- ])
-
- self.conv.extend([norm_layer(num_channels * scale_factor)])
-
- self.conv.extend([
- fluid.dygraph.nn.Conv2D(
- num_channels=num_channels * scale_factor,
- num_filters=num_filters,
- filter_size=1,
- stride=1,
- use_cudnn=use_cudnn,
- bias_attr=bias_attr)
- ])
-
- self.candidate_config = candidate_config
- self.expand_ratio = candidate_config[
- 'expand_ratio'] if 'expand_ratio' in candidate_config else None
- self.base_output_dim = self.conv[0]._num_filters
- if self.expand_ratio != None:
- self.base_output_dim = int(self.conv[0]._num_filters /
- max(self.expand_ratio))
-
- def forward(self, input, expand_ratio=None, channel=None):
- self.cur_config = {'expand_ratio': expand_ratio, 'channel': channel}
- in_nc = int(input.shape[1])
- assert (
- expand_ratio == None or channel == None
- ), "expand_ratio and channel CANNOT be NOT None at the same time."
- if expand_ratio != None:
- out_nc = int(expand_ratio * self.base_output_dim)
- elif channel != None:
- out_nc = int(channel)
- else:
- out_nc = self.conv[0]._num_filters
-
- weight = self.conv[0].weight[:in_nc]
- ### conv1
- if self.conv[0]._l_type == 'conv2d':
- if in_dygraph_mode():
- out = _C_ops.conv2d(input, weight, self.conv[0]._stride,
- self.conv[0]._padding, "EXPLICIT", in_nc,
- self.conv[0]._dilation, "NCHW", False, -1,
- False)
-
- elif _in_legacy_dygraph():
- attrs = ('strides', self.conv[0]._stride, 'paddings',
- self.conv[0]._padding, 'dilations',
- self.conv[0]._dilation, 'groups', in_nc, 'use_cudnn',
- self.conv[0]._use_cudnn)
- out = _legacy_C_ops.conv2d(input, weight, *attrs)
-
- elif self.conv[0]._l_type == 'depthwise_conv2d':
- if in_dygraph_mode():
- out = _C_ops.depthwise_conv2d(
- input, weight, self.conv[0]._stride, self.conv[0]._padding,
- "EXPLICIT", in_nc, self.conv[0]._dilation, "NCHW", False,
- -1, False, False, self.conv[0]._use_cudnn)
-
- elif _in_legacy_dygraph():
- attrs = ('strides', self.conv[0]._stride, 'paddings',
- self.conv[0]._padding, 'dilations',
- self.conv[0]._dilation, 'groups', in_nc, 'use_cudnn',
- self.conv[0]._use_cudnn)
-
- out = _legacy_C_ops.depthwise_conv2d(input, weight, *attrs)
- else:
- raise ValueError("conv type error")
-
- pre_bias = out
- if self.conv[0].bias is not None:
- bias = self.conv[0].bias[:in_nc]
- pre_act = dygraph_utils._append_bias_in_dygraph(pre_bias, bias, 1)
- else:
- pre_act = pre_bias
-
- conv0_out = dygraph_utils._append_activation_in_dygraph(
- pre_act, self.conv[0]._act)
-
- norm_out = self.conv[1](conv0_out)
-
- weight = self.conv[2].weight[:out_nc, :in_nc, :, :]
-
- if self.conv[2]._l_type == 'conv2d':
- if in_dygraph_mode():
- out = _C_ops.conv2d(
- input, weight, self.conv[2]._stride, self.conv[2]._padding,
- "EXPLICIT", self.conv[2]._groups if self.conv[2]._groups
- else 1, self.conv[2]._dilation, "NCHW", False, -1, False)
-
- elif _in_legacy_dygraph():
- attrs = ('strides', self.conv[2]._stride, 'paddings',
- self.conv[2]._padding, 'dilations',
- self.conv[2]._dilation, 'groups', self.conv[2]._groups
- if self.conv[2]._groups else 1, 'use_cudnn',
- self.conv[2]._use_cudnn)
- out = _legacy_C_ops.conv2d(norm_out, weight, *attrs)
- elif self.conv[2]._l_type == 'depthwise_conv2d':
- attrs = ('strides', self.conv[2]._stride, 'paddings',
- self.conv[2]._padding, 'dilations', self.conv[2]._dilation,
- 'groups', self.conv[2]._groups, 'use_cudnn',
- self.conv[2]._use_cudnn)
- out = core.ops.depthwise_conv2d(norm_out, weight, *attrs)
- else:
- raise ValueError("conv type error")
-
- pre_bias = out
- if self.conv[2].bias is not None:
- bias = self.conv[2].bias[:out_nc]
- pre_act = dygraph_utils._append_bias_in_dygraph(pre_bias, bias, 1)
- else:
- pre_act = pre_bias
-
- conv1_out = dygraph_utils._append_activation_in_dygraph(
- pre_act, self.conv[2]._act)
-
- return conv1_out
-
-
-class SuperLinear(fluid.dygraph.Linear):
- """
- """
-
- def __init__(self,
- input_dim,
- output_dim,
- candidate_config={},
- param_attr=None,
- bias_attr=None,
- act=None,
- dtype="float32"):
- super(SuperLinear, self).__init__(input_dim, output_dim, param_attr,
- bias_attr, act, dtype)
- self._param_attr = param_attr
- self._bias_attr = bias_attr
- self.output_dim = output_dim
- self.candidate_config = candidate_config
- self.expand_ratio = candidate_config[
- 'expand_ratio'] if 'expand_ratio' in candidate_config else None
- self.base_output_dim = self.output_dim
- if self.expand_ratio != None:
- self.base_output_dim = int(self.output_dim / max(self.expand_ratio))
-
- def forward(self, input, expand_ratio=None, channel=None):
- self.cur_config = {'expand_ratio': expand_ratio, 'channel': channel}
- ### weight: (Cin, Cout)
- in_nc = int(input.shape[-1])
- assert (
- expand_ratio == None or channel == None
- ), "expand_ratio and channel CANNOT be NOT None at the same time."
- if expand_ratio != None:
- out_nc = int(expand_ratio * self.base_output_dim)
- elif channel != None:
- out_nc = int(channel)
- else:
- out_nc = self.output_dim
-
- weight = self.weight[:in_nc, :out_nc]
- if self._bias_attr != False:
- bias = self.bias[:out_nc]
- use_bias = True
-
- pre_bias = _varbase_creator(dtype=input.dtype)
- if _non_static_mode():
- _legacy_C_ops.matmul(input, weight, pre_bias, 'transpose_X', False,
- 'transpose_Y', False, "alpha", 1)
-
- if self._bias_attr != False:
- pre_act = dygraph_utils._append_bias_in_dygraph(
- pre_bias, bias, axis=len(input.shape) - 1)
- else:
- pre_act = pre_bias
-
- return dygraph_utils._append_activation_in_dygraph(pre_act, self._act)
-
-
-class SuperBatchNorm(fluid.dygraph.BatchNorm):
- """
- add comment
- """
-
- def __init__(self,
- num_channels,
- act=None,
- is_test=False,
- momentum=0.9,
- epsilon=1e-05,
- param_attr=None,
- bias_attr=None,
- dtype='float32',
- data_layout='NCHW',
- in_place=False,
- moving_mean_name=None,
- moving_variance_name=None,
- do_model_average_for_mean_and_var=True,
- use_global_stats=False,
- trainable_statistics=False):
- super(SuperBatchNorm, self).__init__(
- num_channels, act, is_test, momentum, epsilon, param_attr,
- bias_attr, dtype, data_layout, in_place, moving_mean_name,
- moving_variance_name, do_model_average_for_mean_and_var,
- use_global_stats, trainable_statistics)
-
- def forward(self, input):
- feature_dim = int(input.shape[1])
-
- weight = self.weight[:feature_dim]
- bias = self.bias[:feature_dim]
- mean = self._mean[:feature_dim]
- variance = self._variance[:feature_dim]
-
- mean_out = self._mean
- variance_out = self._variance
- mean_out_tmp = mean
- variance_out_tmp = variance
-
- attrs = ("momentum", self._momentum, "epsilon", self._epsilon,
- "is_test", not self.training, "data_layout", self._data_layout,
- "use_mkldnn", False, "fuse_with_relu", self._fuse_with_relu,
- "use_global_stats", self._use_global_stats,
- 'trainable_statistics', self._trainable_statistics)
-
- if in_dygraph_mode():
- if feature_dim != self._mean.shape[0]:
- batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm(
- input, weight, bias, mean, variance, self._momentum,
- self._epsilon, self._data_layout, not self.training,
- self._use_global_stats, self._trainable_statistics, False)
- self._mean[:feature_dim] = mean
- self._variance[:feature_dim] = variance
- mean_out[:feature_dim] = mean_out_tmp
- variance_out[:feature_dim] = variance_out_tmp
- else:
- batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm(
- input, weight, bias, mean, variance, self._momentum,
- self._epsilon, self._data_layout, not self.training,
- self._use_global_stats, self._trainable_statistics, False)
- return batch_norm_out
-
- elif _in_legacy_dygraph():
- if feature_dim != self._mean.shape[0]:
- batch_norm_out, t1, t2, t3, t4, _ = _legacy_C_ops.batch_norm(
- input, weight, bias, mean, variance, None, mean_out_tmp,
- variance_out_tmp, *attrs)
- self._mean[:feature_dim].set_value(mean)
- self._variance[:feature_dim].set_value(variance)
- mean_out[:feature_dim].set_value(mean_out_tmp)
- variance_out[:feature_dim].set_value(variance_out_tmp)
- else:
- batch_norm_out, t1, t2, t3, t4, _ = _legacy_C_ops.batch_norm(
- input, weight, bias, self._mean, self._variance, None,
- mean_out, variance_out, *attrs)
- return batch_norm_out
-
- else:
- check_variable_and_dtype(
- input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm')
-
- # for static need dict
- attrs = {
- "momentum": self._momentum,
- "epsilon": self._epsilon,
- "is_test": not self.training,
- "data_layout": self._data_layout,
- "use_mkldnn": False,
- "fuse_with_relu": False,
- "use_global_stats": self._use_global_stats,
- "trainable_statistics": self._trainable_statistics,
- }
-
- inputs = {
- "X": [input],
- "Scale": [weight],
- "Bias": [bias],
- "Mean": [mean],
- "Variance": [variance]
- }
-
- helper = LayerObjectHelper('batch_norm')
-
- param_dtype = input.dtype if input.dtype != 'float16' else 'float32'
- saved_mean = helper.create_variable_for_type_inference(
- dtype=param_dtype, stop_gradient=True)
- saved_variance = helper.create_variable_for_type_inference(
- dtype=param_dtype, stop_gradient=True)
- batch_norm_out = helper.create_variable_for_type_inference(
- input.dtype)
-
- outputs = {
- "Y": [batch_norm_out],
- "MeanOut": [mean],
- "VarianceOut": [variance],
- "SavedMean": [saved_mean],
- "SavedVariance": [saved_variance]
- }
-
- if self.training or self._trainable_statistics:
- # reserve_space is only used for training.
- reserve_space = helper.create_variable_for_type_inference(
- dtype=input.dtype, stop_gradient=True)
- outputs["ReserveSpace"] = [reserve_space]
-
- helper.append_op(
- type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs)
-
- return batch_norm_out
-
-
-class SuperInstanceNorm(fluid.dygraph.InstanceNorm):
- """
- """
-
- def __init__(self,
- num_channels,
- epsilon=1e-05,
- param_attr=None,
- bias_attr=None,
- dtype='float32'):
- super(SuperInstanceNorm, self).__init__(num_channels, epsilon,
- param_attr, bias_attr, dtype)
-
- def forward(self, input):
- feature_dim = int(input.shape[1])
-
- if self._param_attr == False and self._bias_attr == False:
- scale = None
- bias = None
- else:
- scale = self.scale[:feature_dim]
- bias = self.bias[:feature_dim]
-
- if in_dygraph_mode():
- out = _C_ops.instance_norm(input, scale, bias, self._epsilon)
- return out
- if _in_legacy_dygraph():
- out, _, _ = _legacy_C_ops.instance_norm(input, scale, bias,
- 'epsilon', self._epsilon)
- return out
-
-
-class SuperLayerNorm(fluid.dygraph.LayerNorm):
- def __init__(self,
- normalized_shape,
- scale=True,
- shift=True,
- epsilon=1e-05,
- param_attr=None,
- bias_attr=None,
- act=None,
- dtype='float32'):
- super(SuperLayerNorm,
- self).__init__(normalized_shape, scale, shift, epsilon,
- param_attr, bias_attr, act, dtype)
-
- def forward(self, input):
- input_shape = list(input.shape)
- input_ndim = len(input_shape)
- normalized_ndim = len(self._normalized_shape)
- self._begin_norm_axis = input_ndim - normalized_ndim
-
- ### TODO(ceci3): fix if normalized_shape is not a single number
- feature_dim = int(input.shape[-1])
- weight = self.weight[:feature_dim]
- bias = self.bias[:feature_dim]
- if in_dygraph_mode():
- pre_act, _, _, = _C_ops.layer_norm(input, weight, bias,
- self._epsilon,
- self._begin_norm_axis, False)
- elif _in_legacy_dygraph():
- pre_act, _, _ = _legacy_C_ops.layer_norm(
- input, weight, bias, 'epsilon', self._epsilon,
- 'begin_norm_axis', self._begin_norm_axis)
- return pre_act
-
-
-class SuperEmbedding(fluid.dygraph.Embedding):
- def __init__(self,
- size,
- candidate_config={},
- is_sparse=False,
- is_distributed=False,
- padding_idx=None,
- param_attr=None,
- dtype='float32'):
- super(SuperEmbedding, self).__init__(size, is_sparse, is_distributed,
- padding_idx, param_attr, dtype)
- self.candidate_config = candidate_config
- self.expand_ratio = candidate_config[
- 'expand_ratio'] if 'expand_ratio' in candidate_config else None
- self.base_output_dim = self._size[-1]
- if self.expand_ratio != None:
- self.base_output_dim = int(self._size[-1] / max(self.expand_ratio))
-
- def forward(self, input, expand_ratio=None, channel=None):
- assert (
- expand_ratio == None or channel == None
- ), "expand_ratio and channel CANNOT be NOT None at the same time."
- if expand_ratio != None:
- out_nc = int(expand_ratio * self.base_output_dim)
- elif channel != None:
- out_nc = int(channel)
- else:
- out_nc = self._size[-1]
-
- weight = self.weight[:, :out_nc]
- if in_dygraph_mode():
- return _C_ops.embedding(input, weight, self._padding_idx,
- self._is_sparse)
- elif _in_legacy_dygraph():
- return _legacy_C_ops.lookup_table_v2(
- weight, input, 'is_sparse', self._is_sparse, 'is_distributed',
- self._is_distributed, 'remote_prefetch', self._remote_prefetch,
- 'padding_idx', self._padding_idx)
diff --git a/paddleslim/nas/ofa/ofa.py b/paddleslim/nas/ofa/ofa.py
index b0a02fbad850264ccd5a7686236b93c74ff190ce..17075590fef126b50144b979211cda4384c36cf0 100644
--- a/paddleslim/nas/ofa/ofa.py
+++ b/paddleslim/nas/ofa/ofa.py
@@ -18,15 +18,8 @@ from collections import namedtuple
import paddle
import paddle.fluid as fluid
from .utils.utils import get_paddle_version, remove_model_fn, build_input
-pd_ver = get_paddle_version()
-if pd_ver == 185:
- from .layers_old import SuperConv2D, SuperLinear
- Layer = paddle.fluid.dygraph.Layer
- DataParallel = paddle.fluid.dygraph.DataParallel
-else:
- from .layers import SuperConv2D, SuperLinear
- Layer = paddle.nn.Layer
- DataParallel = paddle.DataParallel
+from .layers import SuperConv2D, SuperLinear
+from paddle.nn import Layer
from .layers_base import BaseBlock, Block
from .utils.utils import search_idx
from ...common import get_logger
@@ -98,7 +91,7 @@ class OFABase(Layer):
key2name = dict()
elastic_task = set()
model_to_traverse = self.model._layers if isinstance(
- self.model, DataParallel) else self.model
+ self.model, paddle.DataParallel) else self.model
for name, sublayer in model_to_traverse.named_sublayers():
if isinstance(sublayer, BaseBlock):
sublayer.set_supernet(self)
@@ -291,7 +284,7 @@ class OFA(OFABase):
# if mapping layer is NOT None, add hook and compute distill loss about mapping layers.
mapping_layers = getattr(self.distill_config, 'mapping_layers', None)
if mapping_layers != None:
- if isinstance(self.model, DataParallel):
+ if isinstance(self.model, paddle.DataParallel):
for idx, name in enumerate(mapping_layers):
if name[:7] != '_layers':
mapping_layers[idx] = '_layers.' + name
@@ -602,7 +595,7 @@ class OFA(OFABase):
origin_model = self.model
origin_model = origin_model._layers if isinstance(
- origin_model, DataParallel) else origin_model
+ origin_model, paddle.DataParallel) else origin_model
_logger.info("Start to get pruned params, please wait...")
pruned_param, pruned_groups = self._get_model_pruned_weight()
@@ -697,13 +690,13 @@ class OFA(OFABase):
### find shortcut block using static model
model_to_traverse = self.model._layers if isinstance(
- self.model, DataParallel) else self.model
+ self.model, paddle.DataParallel) else self.model
_st_prog = dygraph2program(
model_to_traverse, inputs=input_shapes, dtypes=input_dtypes)
else:
model_to_traverse = self.model._layers if isinstance(
- self.model, DataParallel) else self.model
+ self.model, paddle.DataParallel) else self.model
model_to_traverse.eval()
_st_prog = dygraph2program(model_to_traverse, inputs=input_spec)
diff --git a/paddleslim/nas/one_shot/super_mnasnet.py b/paddleslim/nas/one_shot/super_mnasnet.py
index 169d1050ba18043fcf8221a3cb3c52773dec7f44..2a186c7c3d1b972ae55142b5796c87c6401aa587 100644
--- a/paddleslim/nas/one_shot/super_mnasnet.py
+++ b/paddleslim/nas/one_shot/super_mnasnet.py
@@ -23,7 +23,7 @@ class DConvBlock(fluid.dygraph.Layer):
self.stride = stride
self.flops = 0
self.flops_calculated = False
- self.expand = fluid.dygraph.Conv2D(
+ self.expand = paddle.nn.Conv2D(
in_channels,
num_filters=in_channels * expansion,
filter_size=1,
@@ -34,7 +34,7 @@ class DConvBlock(fluid.dygraph.Layer):
self.expand_bn = fluid.dygraph.BatchNorm(
num_channels=in_channels * expansion, act='relu6')
- self.dconv = fluid.dygraph.Conv2D(
+ self.dconv = paddle.nn.Conv2D(
in_channels * expansion,
num_filters=in_channels * expansion,
filter_size=kernel_size,
@@ -47,7 +47,7 @@ class DConvBlock(fluid.dygraph.Layer):
self.dconv_bn = fluid.dygraph.BatchNorm(
num_channels=in_channels * expansion, act='relu6')
- self.project = fluid.dygraph.Conv2D(
+ self.project = paddle.nn.Conv2D(
in_channels * expansion,
num_filters=channels,
filter_size=1,
@@ -58,7 +58,7 @@ class DConvBlock(fluid.dygraph.Layer):
self.project_bn = fluid.dygraph.BatchNorm(
num_channels=channels, act=None)
- self.shortcut = fluid.dygraph.Conv2D(
+ self.shortcut = paddle.nn.Conv2D(
in_channels,
num_filters=channels,
filter_size=1,
@@ -135,9 +135,9 @@ class AuxiliaryHead(fluid.dygraph.Layer):
self.pool1 = fluid.dygraph.Pool2D(
5, 'avg', pool_stride=3, pool_padding=0)
- self.conv1 = fluid.dygraph.Conv2D(128, 1, bias_attr=False)
+ self.conv1 = paddle.nn.Conv2D(128, 1, bias_attr=False)
self.bn1 = fluid.dygraph.BatchNorm(128, act='relu6')
- self.conv2 = fluid.dygraph.Conv2D(768, 2, bias_attr=False)
+ self.conv2 = paddle.nn.Conv2D(768, 2, bias_attr=False)
self.bn2 = fluid.dygraph.BatchNorm(768, act='relu6')
self.classifier = fluid.dygraph.FC(num_classes, act='softmax')
self.layer_helper = LayerHelper(self.full_name(), act='relu6')
@@ -167,10 +167,10 @@ class SuperMnasnet(OneShotSuperNet):
self.repeat_times = repeat_times
self.flops_calculated = False
self.last_tokens = None
- self._conv = fluid.dygraph.Conv2D(
+ self._conv = paddle.nn.Conv2D(
input_channels, 32, 3, 1, 1, act=None, bias_attr=False)
self._bn = fluid.dygraph.BatchNorm(32, act='relu6')
- self._sep_conv = fluid.dygraph.Conv2D(
+ self._sep_conv = paddle.nn.Conv2D(
32,
32,
3,
@@ -181,11 +181,11 @@ class SuperMnasnet(OneShotSuperNet):
use_cudnn=False,
bias_attr=False)
self._sep_conv_bn = fluid.dygraph.BatchNorm(32, act='relu6')
- self._sep_project = fluid.dygraph.Conv2D(
+ self._sep_project = paddle.nn.Conv2D(
32, 16, 1, 1, 0, act=None, bias_attr=False)
self._sep_project_bn = fluid.dygraph.BatchNorm(16, act='relu6')
- self._final_conv = fluid.dygraph.Conv2D(
+ self._final_conv = paddle.nn.Conv2D(
320, out_channels, 1, 1, 0, act=None, bias_attr=False)
self._final_bn = fluid.dygraph.BatchNorm(out_channels, act='relu6')
self.stride = stride
diff --git a/tests/test_ofa_layers_old.py b/tests/test_ofa_layers_old.py
deleted file mode 100644
index 4d66019f2d63b3350c671f243b51b337c6811f1f..0000000000000000000000000000000000000000
--- a/tests/test_ofa_layers_old.py
+++ /dev/null
@@ -1,171 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-sys.path.append("../")
-import numpy as np
-import unittest
-import paddle
-import paddle.nn as nn
-from paddleslim.nas import ofa
-from paddleslim.nas.ofa import OFA
-from paddleslim.nas.ofa.layers_old import *
-
-
-class ModelCase1(nn.Layer):
- def __init__(self):
- super(ModelCase1, self).__init__()
- models = [SuperConv2D(3, 4, 3, bias_attr=False)]
- models += [
- SuperConv2D(
- 4,
- 4,
- 7,
- candidate_config={
- 'expand_ratio': (0.5, 1.0),
- 'kernel_size': (3, 5, 7)
- },
- transform_kernel=True)
- ]
- models += [SuperConv2D(4, 4, 3, groups=4)]
- models += [SuperConv2D(4, 4, 3, groups=2)]
- models += [SuperBatchNorm(4)]
- models += [SuperConv2DTranspose(4, 4, 3, bias_attr=False)]
- models += [
- SuperConv2DTranspose(
- 4,
- 4,
- 7,
- candidate_config={
- 'expand_ratio': (0.5, 1.0),
- 'kernel_size': (3, 5, 7)
- },
- transform_kernel=True)
- ]
- models += [SuperConv2DTranspose(4, 4, 3, groups=4)]
- models += [SuperInstanceNorm(4)]
- models += [nn.Conv2DTranspose(4, 4, 3, groups=2)]
- models += [SuperConv2DTranspose(4, 4, 3, groups=2)]
- models += [
- SuperSeparableConv2D(
- 4,
- 4,
- 1,
- padding=1,
- bias_attr=False,
- candidate_config={'expand_ratio': (0.5, 1.0)}),
- ]
- models += [
- SuperSeparableConv2D(
- 4, 4, 1, padding=1, candidate_config={'channel': (2, 4)}),
- ]
- self.models = paddle.nn.Sequential(*models)
-
- def forward(self, inputs):
- return self.models(inputs)
-
-
-class ModelCase2(nn.Layer):
- def __init__(self):
- super(ModelCase2, self).__init__()
- models = [
- SuperEmbedding(
- size=(64, 64), candidate_config={'expand_ratio': (0.5, 1.0)})
- ]
- models += [
- SuperLinear(
- 64, 64, candidate_config={'expand_ratio': (0.5, 1.0)})
- ]
- models += [SuperLayerNorm(64)]
- models += [SuperLinear(64, 64, candidate_config={'channel': (32, 64)})]
- models += [
- SuperLinear(
- 64, 64, bias_attr=False,
- candidate_config={'channel': (32, 64)})
- ]
- self.models = paddle.nn.Sequential(*models)
-
- def forward(self, inputs):
- return self.models(inputs)
-
-
-class ModelCase3(nn.Layer):
- def __init__(self):
- super(ModelCase3, self).__init__()
- self.conv1 = SuperConv2D(
- 3,
- 4,
- 7,
- candidate_config={'kernel_size': (3, 5, 7)},
- transform_kernel=True)
- self.conv2 = SuperConv2DTranspose(
- 4,
- 4,
- 7,
- candidate_config={'kernel_size': (3, 5, 7)},
- transform_kernel=True)
-
- def forward(self, inputs):
- inputs = self.conv1(inputs, kernel_size=3)
- inputs = self.conv2(inputs, kernel_size=3)
- return inputs
-
-
-class ModelCase4(nn.Layer):
- def __init__(self):
- super(ModelCase4, self).__init__()
- models = [SuperBatchNorm(4)]
- self.models = paddle.nn.Sequential(*models)
-
- def forward(self, inputs):
- return self.models(inputs)
-
-
-class TestCase(unittest.TestCase):
- def setUp(self):
- self.model = ModelCase1()
- data_np = np.random.random((1, 3, 64, 64)).astype(np.float32)
- self.data = paddle.to_tensor(data_np)
-
- def test_ofa(self):
- ofa_model = OFA(self.model)
- out = self.model(self.data)
-
-
-class TestCase2(TestCase):
- def setUp(self):
- self.model = ModelCase2()
- data_np = np.random.random((64, 64)).astype(np.int64)
- self.data = paddle.to_tensor(data_np)
-
-
-class TestCase3(TestCase):
- def setUp(self):
- self.model = ModelCase3()
- data_np = np.random.random((1, 3, 64, 64)).astype(np.float32)
- self.data = paddle.to_tensor(data_np)
-
-
-class TestCase4(TestCase):
- def setUp(self):
- self.model = ModelCase4()
- data_np = np.random.random((1, 3, 64, 64)).astype(np.float32)
- self.data = paddle.to_tensor(data_np)
-
- def test_ofa(self):
- out = self.model(self.data)
-
-
-if __name__ == '__main__':
- unittest.main()