fix ofa transform kernel when kernel size is even (#693)

* fix when kernel size is even * fix name * fix weight_op

fix ofa transform kernel when kernel size is even (#693)
* fix when kernel size is even * fix name * fix weight_op
0b2df6ec · ceci3 · GitHub · 44114b96 · 0b2df6ec · 0b2df6ec
4 changed file
--- a/paddleslim/nas/ofa/get_sub_model.py
+++ b/paddleslim/nas/ofa/get_sub_model.py
@@ -19,7 +19,9 @@ from .layers_base import BaseBlock
 __all__ = ['get_prune_params_config', 'prune_params', 'check_search_space']
-WEIGHT_OP = ['conv2d', 'conv3d', 'conv1d', 'linear', 'embedding']
+WEIGHT_OP = [
+    'conv2d', 'linear', 'embedding', 'conv2d_transpose', 'depthwise_conv2d'
+]
 CONV_TYPES = [
    'conv2d', 'conv3d', 'conv1d', 'superconv2d', 'supergroupconv2d',
    'superdepthwiseconv2d'
@@ -95,6 +97,7 @@ def prune_params(model, param_config, super_model_sd=None):
                name = l_name + '.' + p_name
                super_t_value = super_model_sd[name].value().get_tensor()
                super_value = np.array(super_t_value).astype("float32")
+                super_model_sd.pop(name)
            if param.name in param_config.keys():
                if len(param_config[param.name]) > 1:
@@ -137,6 +140,11 @@ def prune_params(model, param_config, super_model_sd=None):
            if param.trainable:
                param.clear_gradient()
+    ### initialize param which not in sublayers, such as create persistable inputs by create_parameters 
+    if super_model_sd != None and len(super_model_sd) != 0:
+        for k, v in super_model_sd.items():
+            setattr(model, k, v)
 def _find_weight_ops(op, graph, weights):
    """ Find the vars come from operators with weight.

--- a/paddleslim/nas/ofa/utils/utils.py
+++ b/paddleslim/nas/ofa/utils/utils.py
@@ -65,6 +65,9 @@ def remove_model_fn(model, state_dict):
    for name, param in model.state_dict().items():
        keys.append(name)
    for name, param in state_dict.items():
+        if len(name.split('.')) <= 2:
+            new_dict[name] = param
+            continue
        if name.split('.')[-2] == 'fn':
            tmp_n = name.split('.')[:-2] + [name.split('.')[-1]]
            tmp_n = '.'.join(tmp_n)
@@ -81,7 +84,10 @@ def compute_start_end(kernel_size, sub_kernel_size):
    center = kernel_size // 2
    sub_center = sub_kernel_size // 2
    start = center - sub_center
-    end = center + sub_center + 1
+    if sub_kernel_size % 2 == 0:
+        end = center + sub_center
+    else:
+        end = center + sub_center + 1
    assert end - start == sub_kernel_size
    return start, end

--- a/tests/test_ofa.py
+++ b/tests/test_ofa.py
@@ -31,7 +31,7 @@ class ModelConv(nn.Layer):
    def __init__(self):
        super(ModelConv, self).__init__()
        with supernet(
-                kernel_size=(3, 5, 7),
+                kernel_size=(3, 4, 5, 7),
                channel=((4, 8, 12), (8, 12, 16), (8, 12, 16),
                         (8, 12, 16))) as ofa_super:
            models = []

--- a/tests/test_ofa_v2.py
+++ b/tests/test_ofa_v2.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+sys.path.append("../")
+import numpy as np
+import unittest
+import paddle
+import paddle.nn as nn
+from paddle.nn import ReLU
+from paddleslim.nas import ofa
+from paddleslim.nas.ofa import OFA, RunConfig, DistillConfig
+from paddleslim.nas.ofa.convert_super import supernet
+from paddleslim.nas.ofa.convert_super import Convert, supernet
+class ModelV1(nn.Layer):
+    def __init__(self, name=''):
+        super(ModelV1, self).__init__()
+        self.model = nn.Sequential(nn.Conv2D(3, 12, 16), nn.ReLU())
+        self.cls = self.create_parameter(
+            attr=paddle.ParamAttr(
+                name=name + 'cls',
+                initializer=nn.initializer.Assign(
+                    paddle.zeros(shape=(2, 12, 17, 17)))),
+            shape=(2, 12, 17, 17))
+    def forward(self, inputs):
+        return self.cls + self.model(inputs)
+class TestOFAV2(unittest.TestCase):
+    def setUp(self):
+        model = ModelV1()
+        sp_net_config = supernet(expand_ratio=[0.25, 0.5, 1.0])
+        self.model = Convert(sp_net_config).convert(model)
+        self.images = paddle.randn(shape=[2, 3, 32, 32], dtype='float32')
+    def test_ofa(self):
+        self.ofa_model = OFA(self.model)
+        self.ofa_model.set_epoch(0)
+        self.ofa_model.set_task('expand_ratio')
+        out, _ = self.ofa_model(self.images)
+        print(self.ofa_model.get_current_config)
+class TestOFAV2Export(unittest.TestCase):
+    def setUp(self):
+        model = ModelV1(name='export')
+        sp_net_config = supernet(expand_ratio=[0.25, 0.5, 1.0])
+        self.model = Convert(sp_net_config).convert(model)
+        self.images = paddle.randn(shape=[2, 3, 32, 32], dtype='float32')
+        self.ofa_model = OFA(self.model)
+    def test_export(self):
+        origin_model = ModelV1(name='origin')
+        net_config = {'model.0': {}}
+        self.ofa_model.export(
+            net_config,
+            input_shapes=[1, 3, 32, 32],
+            input_dtypes=['float32'],
+            origin_model=origin_model)
+if __name__ == '__main__':
+    unittest.main()