# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys sys.path.append("../") import numpy as np import unittest import paddle import paddle.nn as nn from paddle.nn import ReLU from paddleslim.nas import ofa from paddleslim.nas.ofa import OFA, RunConfig, DistillConfig from paddleslim.nas.ofa.convert_super import supernet from paddleslim.nas.ofa.layers_new import Block, SuperSeparableConv2D class ModelConv(nn.Layer): def __init__(self): super(ModelConv, self).__init__() with supernet( kernel_size=(3, 5, 7), channel=((4, 8, 12), (8, 12, 16), (8, 12, 16), (8, 12, 16))) as ofa_super: models = [] models += [nn.Conv2D(3, 4, 3, padding=1)] models += [nn.InstanceNorm2D(4)] models += [ReLU()] models += [nn.Conv2D(4, 4, 3, groups=4)] models += [nn.InstanceNorm2D(4)] models += [ReLU()] models += [nn.Conv2DTranspose(4, 4, 3, groups=4, padding=1)] models += [nn.BatchNorm2D(4)] models += [ReLU()] models += [nn.Conv2D(4, 3, 3)] models += [ReLU()] models = ofa_super.convert(models) models += [ Block( SuperSeparableConv2D( 3, 6, 1, padding=1, candidate_config={'channel': (3, 6)}), fixed=True) ] with supernet( kernel_size=(3, 5, 7), expand_ratio=(1, 2, 4)) as ofa_super: models1 = [] models1 += [nn.Conv2D(6, 4, 3)] models1 += [nn.BatchNorm2D(4)] models1 += [ReLU()] models1 += [nn.Conv2D(4, 4, 3, groups=2)] models1 += [nn.InstanceNorm2D(4)] models1 += [ReLU()] models1 += [nn.Conv2DTranspose(4, 4, 3, groups=2)] models1 += [nn.BatchNorm2D(4)] models1 += [ReLU()] models1 += [nn.Conv2DTranspose(4, 4, 3)] models1 += [nn.BatchNorm2D(4)] models1 += [ReLU()] models1 += [nn.Conv2DTranspose(4, 4, 1)] models1 += [nn.BatchNorm2D(4)] models1 += [ReLU()] models1 = ofa_super.convert(models1) models += models1 self.models = paddle.nn.Sequential(*models) def forward(self, inputs, depth=None): if depth != None: assert isinstance(depth, int) assert depth <= len(self.models) else: depth = len(self.models) for idx in range(depth): layer = self.models[idx] inputs = layer(inputs) return inputs class ModelConv2(nn.Layer): def __init__(self): super(ModelConv2, self).__init__() with supernet(expand_ratio=(1, 2, 4)) as ofa_super: models = [] models += [ nn.Conv2DTranspose( 4, 4, 3, weight_attr=paddle.ParamAttr(name='conv1_w')) ] models += [ nn.BatchNorm2D( 4, weight_attr=paddle.ParamAttr(name='bn1_w'), bias_attr=paddle.ParamAttr(name='bn1_b')) ] models += [ReLU()] models += [nn.Conv2D(4, 4, 3)] models += [nn.BatchNorm2D(4)] models += [ReLU()] models = ofa_super.convert(models) with supernet(channel=((4, 6, 8), (4, 6, 8))) as ofa_super: models1 = [] models1 += [nn.Conv2DTranspose(4, 4, 3)] models1 += [nn.BatchNorm2D(4)] models1 += [ReLU()] models1 += [nn.Conv2DTranspose(4, 4, 3)] models1 += [nn.BatchNorm2D(4)] models1 += [ReLU()] models1 = ofa_super.convert(models1) models += models1 with supernet(kernel_size=(3, 5, 7)) as ofa_super: models2 = [] models2 += [nn.Conv2D(4, 4, 3)] models2 += [nn.BatchNorm2D(4)] models2 += [ReLU()] models2 += [nn.Conv2DTranspose(4, 4, 3)] models2 += [nn.BatchNorm2D(4)] models2 += [ReLU()] models2 += [nn.Conv2D(4, 4, 3)] models2 += [nn.BatchNorm2D(4)] models2 += [ReLU()] models2 = ofa_super.convert(models2) models += models2 self.models = paddle.nn.Sequential(*models) class ModelLinear(nn.Layer): def __init__(self): super(ModelLinear, self).__init__() with supernet(expand_ratio=(1, 2, 4)) as ofa_super: models = [] models += [nn.Embedding(num_embeddings=64, embedding_dim=64)] models += [nn.Linear(64, 128)] models += [nn.LayerNorm(128)] models += [nn.Linear(128, 256)] models = ofa_super.convert(models) with supernet(expand_ratio=(1, 2, 4)) as ofa_super: models1 = [] models1 += [nn.Linear(256, 256)] models1 = ofa_super.convert(models1) models += models1 self.models = paddle.nn.Sequential(*models) def forward(self, inputs, depth=None): if depth != None: assert isinstance(depth, int) assert depth < len(self.models) else: depth = len(self.models) for idx in range(depth): layer = self.models[idx] inputs = layer(inputs) return inputs class ModelLinear1(nn.Layer): def __init__(self): super(ModelLinear1, self).__init__() with supernet(channel=((64, 128, 256), (64, 128, 256), (64, 128, 256))) as ofa_super: models = [] models += [nn.Embedding(num_embeddings=64, embedding_dim=64)] models += [nn.Linear(64, 128)] models += [nn.LayerNorm(128)] models += [nn.Linear(128, 256)] models = ofa_super.convert(models) with supernet(channel=((64, 128, 256), )) as ofa_super: models1 = [] models1 += [nn.Linear(256, 256)] models1 = ofa_super.convert(models1) models += models1 self.models = paddle.nn.Sequential(*models) def forward(self, inputs, depth=None): if depth != None: assert isinstance(depth, int) assert depth < len(self.models) else: depth = len(self.models) for idx in range(depth): layer = self.models[idx] inputs = layer(inputs) return inputs class ModelLinear2(nn.Layer): def __init__(self): super(ModelLinear2, self).__init__() with supernet(expand_ratio=None) as ofa_super: models = [] models += [ nn.Embedding( num_embeddings=64, embedding_dim=64, weight_attr=paddle.ParamAttr(name='emb')) ] models += [ nn.Linear( 64, 128, weight_attr=paddle.ParamAttr(name='fc1_w'), bias_attr=paddle.ParamAttr(name='fc1_b')) ] models += [ nn.LayerNorm( 128, weight_attr=paddle.ParamAttr(name='ln1_w'), bias_attr=paddle.ParamAttr(name='ln1_b')) ] models += [nn.Linear(128, 256)] models = ofa_super.convert(models) self.models = paddle.nn.Sequential(*models) def forward(self, inputs, depth=None): if depth != None: assert isinstance(depth, int) assert depth < len(self.models) else: depth = len(self.models) for idx in range(depth): layer = self.models[idx] inputs = layer(inputs) return inputs class TestOFA(unittest.TestCase): def setUp(self): self.init_model_and_data() self.init_config() def init_model_and_data(self): self.model = ModelConv() self.teacher_model = ModelConv() data_np = np.random.random((1, 3, 10, 10)).astype(np.float32) label_np = np.random.random((1)).astype(np.float32) self.data = paddle.to_tensor(data_np) def init_config(self): default_run_config = { 'train_batch_size': 1, 'n_epochs': [[1], [2, 3], [4, 5]], 'init_learning_rate': [[0.001], [0.003, 0.001], [0.003, 0.001]], 'dynamic_batch_size': [1, 1, 1], 'total_images': 1, 'elastic_depth': (5, 15, 24) } self.run_config = RunConfig(**default_run_config) default_distill_config = { 'lambda_distill': 0.01, 'teacher_model': self.teacher_model, 'mapping_layers': ['models.0.fn'], 'mapping_op': 'conv2d' } self.distill_config = DistillConfig(**default_distill_config) self.elastic_order = ['kernel_size', 'width', 'depth'] def test_ofa(self): ofa_model = OFA(self.model, self.run_config, distill_config=self.distill_config, elastic_order=self.elastic_order) start_epoch = 0 for idx in range(len(self.run_config.n_epochs)): cur_idx = self.run_config.n_epochs[idx] for ph_idx in range(len(cur_idx)): cur_lr = self.run_config.init_learning_rate[idx][ph_idx] adam = paddle.optimizer.Adam( learning_rate=cur_lr, parameters=(ofa_model.parameters() + ofa_model.netAs_param)) for epoch_id in range(start_epoch, self.run_config.n_epochs[idx][ph_idx]): if epoch_id == 0: ofa_model.set_epoch(epoch_id) for model_no in range(self.run_config.dynamic_batch_size[ idx]): output, _ = ofa_model(self.data) loss = paddle.mean(output) if self.distill_config.mapping_layers != None: dis_loss = ofa_model.calc_distill_loss() loss += dis_loss dis_loss = dis_loss.numpy()[0] else: dis_loss = 0 print('epoch: {}, loss: {}, distill loss: {}'.format( epoch_id, loss.numpy()[0], dis_loss)) loss.backward() adam.minimize(loss) adam.clear_gradients() start_epoch = self.run_config.n_epochs[idx][ph_idx] class TestOFACase1(TestOFA): def init_model_and_data(self): self.model = ModelLinear() self.teacher_model = ModelLinear() data_np = np.random.random((3, 64)).astype(np.int64) self.data = paddle.to_tensor(data_np) def init_config(self): default_run_config = { 'train_batch_size': 1, 'n_epochs': [[2, 5]], 'init_learning_rate': [[0.003, 0.001]], 'dynamic_batch_size': [1], 'total_images': 1, } self.run_config = RunConfig(**default_run_config) default_distill_config = { 'lambda_distill': 0.01, 'teacher_model': self.teacher_model, 'mapping_op': 'linear', 'mapping_layers': ['models.3.fn'], } self.distill_config = DistillConfig(**default_distill_config) self.elastic_order = None class TestOFACase2(TestOFA): def init_model_and_data(self): self.model = ModelLinear1() self.teacher_model = ModelLinear1() data_np = np.random.random((3, 64)).astype(np.int64) self.data = paddle.to_tensor(data_np) def init_config(self): default_run_config = { 'train_batch_size': 1, 'n_epochs': [[2, 5]], 'init_learning_rate': [[0.003, 0.001]], 'dynamic_batch_size': [1], 'total_images': 1, } self.run_config = RunConfig(**default_run_config) default_distill_config = { 'teacher_model': self.teacher_model, 'mapping_layers': ['models.3.fn'], } self.distill_config = DistillConfig(**default_distill_config) self.elastic_order = None class TestOFACase3(unittest.TestCase): def test_ofa(self): self.model = ModelLinear2() ofa_model = OFA(self.model) ofa_model.set_net_config({'expand_ratio': None}) class TestOFACase4(unittest.TestCase): def test_ofa(self): self.model = ModelConv2() if __name__ == '__main__': unittest.main()