From b38e4f2840ebc4ee0195ab8de789bf5b8d54ef37 Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Fri, 25 Sep 2020 22:08:40 +0800 Subject: [PATCH] Refine vision models (#27476) * refine vision models --- python/paddle/hapi/callbacks.py | 10 +- python/paddle/hapi/model.py | 60 +-- python/paddle/metric/metrics.py | 9 +- python/paddle/tests/CMakeLists.txt | 4 - .../paddle/tests/dist_hapi_mnist_dynamic.py | 2 +- python/paddle/tests/dist_hapi_mnist_static.py | 2 +- python/paddle/tests/test_model.py | 57 ++- python/paddle/tests/test_pretrained_model.py | 45 ++- python/paddle/tests/test_vision_models.py | 2 +- python/paddle/vision/models/lenet.py | 30 +- python/paddle/vision/models/mobilenetv1.py | 189 ++++------ python/paddle/vision/models/mobilenetv2.py | 347 ++++++++---------- python/paddle/vision/models/resnet.py | 337 ++++++++--------- python/paddle/vision/models/vgg.py | 73 ++-- 14 files changed, 539 insertions(+), 628 deletions(-) diff --git a/python/paddle/hapi/callbacks.py b/python/paddle/hapi/callbacks.py index 7ed571fa9c6..69b7fedd72e 100644 --- a/python/paddle/hapi/callbacks.py +++ b/python/paddle/hapi/callbacks.py @@ -301,10 +301,11 @@ class ProgBarLogger(Callback): train_dataset = paddle.vision.datasets.MNIST(mode='train') - model = paddle.Model(paddle.vision.LeNet(classifier_activation=None), + lenet = paddle.vision.LeNet() + model = paddle.Model(lenet, inputs, labels) - optim = paddle.optimizer.Adam(0.001) + optim = paddle.optimizer.Adam(0.001, parameters=lenet.parameters()) model.prepare(optimizer=optim, loss=paddle.nn.CrossEntropyLoss(), metrics=paddle.metric.Accuracy()) @@ -436,10 +437,11 @@ class ModelCheckpoint(Callback): train_dataset = paddle.vision.datasets.MNIST(mode='train') - model = paddle.Model(paddle.vision.LeNet(classifier_activation=None), + lenet = paddle.vision.LeNet() + model = paddle.Model(lenet, inputs, labels) - optim = paddle.optimizer.Adam(0.001) + optim = paddle.optimizer.Adam(0.001, parameters=lenet.parameters()) model.prepare(optimizer=optim, loss=paddle.nn.CrossEntropyLoss(), metrics=paddle.metric.Accuracy()) diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index 53928ebed1b..1bfe8f07a2f 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -814,10 +814,9 @@ class Model(object): from paddle.static import InputSpec device = paddle.set_device('cpu') # or 'gpu' - # if use static graph, do not set - paddle.disable_static(device) net = nn.Sequential( + nn.Flatten(1), nn.Linear(784, 200), nn.Tanh(), nn.Linear(200, 10)) @@ -833,7 +832,7 @@ class Model(object): paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy()) - data = paddle.vision.datasets.MNIST(mode='train', chw_format=False) + data = paddle.vision.datasets.MNIST(mode='train') model.fit(data, epochs=2, batch_size=32, verbose=1) """ @@ -850,7 +849,8 @@ class Model(object): if not isinstance(inputs, (list, dict, Input)): raise TypeError( - "'inputs' must be list or dict, and couldn't be None.") + "'inputs' must be list or dict in static graph mode") + self._inputs = self._verify_spec(inputs, True) self._labels = self._verify_spec(labels) @@ -885,7 +885,6 @@ class Model(object): from paddle.static import InputSpec device = paddle.set_device('cpu') # or 'gpu' - paddle.disable_static(device) net = nn.Sequential( nn.Linear(784, 200), @@ -930,7 +929,6 @@ class Model(object): from paddle.static import InputSpec device = paddle.set_device('cpu') # or 'gpu' - paddle.disable_static(device) net = nn.Sequential( nn.Linear(784, 200), @@ -970,9 +968,12 @@ class Model(object): import numpy as np import paddle import paddle.nn as nn + from paddle.static import InputSpec device = paddle.set_device('cpu') # or 'gpu' - paddle.disable_static(device) + + input = InputSpec([None, 784], 'float32', 'x') + label = InputSpec([None, 1], 'int64', 'label') net = nn.Sequential( nn.Linear(784, 200), @@ -980,7 +981,7 @@ class Model(object): nn.Linear(200, 10), nn.Softmax()) - model = paddle.Model(net) + model = paddle.Model(net, input, label) model.prepare() data = np.random.random(size=(4,784)).astype(np.float32) out = model.test_batch([data]) @@ -1026,6 +1027,7 @@ class Model(object): def __init__(self): super(Mnist, self).__init__() self.net = nn.Sequential( + nn.Flatten(1), nn.Linear(784, 200), nn.Tanh(), nn.Linear(200, 10), @@ -1045,7 +1047,7 @@ class Model(object): optim = paddle.optimizer.SGD(learning_rate=1e-3, parameters=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss()) - data = paddle.vision.datasets.MNIST(mode='train', chw_format=False) + data = paddle.vision.datasets.MNIST(mode='train') model.fit(data, epochs=1, batch_size=32, verbose=0) model.save('checkpoint/test') # save for training model.save('inference_model', False) # save for inference @@ -1092,15 +1094,18 @@ class Model(object): import paddle import paddle.nn as nn - + from paddle.static import InputSpec + device = paddle.set_device('cpu') - paddle.disable_static(device) + + input = InputSpec([None, 784], 'float32', 'x') model = paddle.Model(nn.Sequential( nn.Linear(784, 200), nn.Tanh(), nn.Linear(200, 10), - nn.Softmax())) + nn.Softmax()), input) + model.save('checkpoint/test') model.load('checkpoint/test') """ @@ -1165,13 +1170,15 @@ class Model(object): import paddle import paddle.nn as nn + from paddle.static import InputSpec - paddle.disable_static() - + input = InputSpec([None, 784], 'float32', 'x') + model = paddle.Model(nn.Sequential( nn.Linear(784, 200), nn.Tanh(), - nn.Linear(200, 10))) + nn.Linear(200, 10)), input) + params = model.parameters() """ return self._adapter.parameters() @@ -1313,7 +1320,7 @@ class Model(object): label = InputSpec([None, 1], 'int64', 'label') model = paddle.Model( - paddle.vision.models.LeNet(classifier_activation=None), + paddle.vision.models.LeNet(), input, label) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) @@ -1350,7 +1357,7 @@ class Model(object): label = InputSpec([None, 1], 'int64', 'label') model = paddle.Model( - paddle.vision.models.LeNet(classifier_activation=None), input, label) + paddle.vision.models.LeNet(), input, label) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) model.prepare( @@ -1483,7 +1490,7 @@ class Model(object): # imperative mode paddle.disable_static() - model = paddle.Model(paddle.vision.models.LeNet()) + model = paddle.Model(paddle.vision.models.LeNet(), input, label) model.prepare(metrics=paddle.metric.Accuracy()) result = model.evaluate(val_dataset, batch_size=64) print(result) @@ -1580,19 +1587,20 @@ class Model(object): test_dataset = MnistDataset(mode='test', return_label=False) - # declarative mode + # imperative mode input = InputSpec([-1, 1, 28, 28], 'float32', 'image') model = paddle.Model(paddle.vision.models.LeNet(), input) model.prepare() - result = model.predict(test_dataset, batch_size=64) print(len(result[0]), result[0][0].shape) - # imperative mode + # declarative mode device = paddle.set_device('cpu') - paddle.disable_static(device) - model = paddle.Model(paddle.vision.models.LeNet()) + paddle.enable_static() + input = InputSpec([-1, 1, 28, 28], 'float32', 'image') + model = paddle.Model(paddle.vision.models.LeNet(), input) model.prepare() + result = model.predict(test_dataset, batch_size=64) print(len(result[0]), result[0][0].shape) """ @@ -1832,15 +1840,11 @@ class Model(object): import paddle from paddle.static import InputSpec - - dynamic = True - device = paddle.set_device('cpu') - paddle.disable_static(device) if dynamic else None input = InputSpec([None, 1, 28, 28], 'float32', 'image') label = InputSpec([None, 1], 'int64', 'label') - model = paddle.Model(paddle.vision.LeNet(classifier_activation=None), + model = paddle.Model(paddle.vision.LeNet(), input, label) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py index 1cd65171ff0..f4a9b8c01d0 100644 --- a/python/paddle/metric/metrics.py +++ b/python/paddle/metric/metrics.py @@ -182,7 +182,6 @@ class Accuracy(Metric): import numpy as np import paddle - paddle.disable_static() x = paddle.to_tensor(np.array([ [0.1, 0.2, 0.3, 0.4], [0.1, 0.4, 0.3, 0.2], @@ -202,11 +201,13 @@ class Accuracy(Metric): .. code-block:: python import paddle - - paddle.disable_static() + from paddle.static import InputSpec + + input = InputSpec([None, 1, 28, 28], 'float32', 'image') + label = InputSpec([None, 1], 'int64', 'label') train_dataset = paddle.vision.datasets.MNIST(mode='train') - model = paddle.Model(paddle.vision.LeNet(classifier_activation=None)) + model = paddle.Model(paddle.vision.LeNet(), input, label) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) model.prepare( diff --git a/python/paddle/tests/CMakeLists.txt b/python/paddle/tests/CMakeLists.txt index 6fb73b08c11..e1bc65a5d15 100644 --- a/python/paddle/tests/CMakeLists.txt +++ b/python/paddle/tests/CMakeLists.txt @@ -8,10 +8,6 @@ foreach(TEST_OP ${DIST_TEST_OPS}) list(REMOVE_ITEM TEST_OPS ${TEST_OP}) endforeach() -# disable test_pretrained_model and test_vision_models -list(REMOVE_ITEM TEST_OPS test_pretrained_model) -list(REMOVE_ITEM TEST_OPS test_vision_models) - foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() diff --git a/python/paddle/tests/dist_hapi_mnist_dynamic.py b/python/paddle/tests/dist_hapi_mnist_dynamic.py index 13d966bf38f..46d02789402 100644 --- a/python/paddle/tests/dist_hapi_mnist_dynamic.py +++ b/python/paddle/tests/dist_hapi_mnist_dynamic.py @@ -68,7 +68,7 @@ class TestDistTraning(unittest.TestCase): inputs = [Input(im_shape, 'float32', 'image')] labels = [Input([None, 1], 'int64', 'label')] - model = Model(LeNet(classifier_activation=None), inputs, labels) + model = Model(LeNet(), inputs, labels) optim = fluid.optimizer.Momentum( learning_rate=0.001, momentum=.9, parameter_list=model.parameters()) model.prepare(optim, CrossEntropyLoss(), Accuracy()) diff --git a/python/paddle/tests/dist_hapi_mnist_static.py b/python/paddle/tests/dist_hapi_mnist_static.py index 9d8e5f3652c..eab34a6dafb 100644 --- a/python/paddle/tests/dist_hapi_mnist_static.py +++ b/python/paddle/tests/dist_hapi_mnist_static.py @@ -67,7 +67,7 @@ class TestDistTraning(unittest.TestCase): inputs = [Input(im_shape, 'float32', 'image')] labels = [Input([None, 1], 'int64', 'label')] - model = Model(LeNet(classifier_activation=None), inputs, labels) + model = Model(LeNet(), inputs, labels) optim = fluid.optimizer.Momentum( learning_rate=0.001, momentum=.9, parameter_list=model.parameters()) model.prepare(optim, CrossEntropyLoss(), Accuracy()) diff --git a/python/paddle/tests/test_model.py b/python/paddle/tests/test_model.py index c89cbbbfbda..5a3d837407b 100644 --- a/python/paddle/tests/test_model.py +++ b/python/paddle/tests/test_model.py @@ -40,7 +40,7 @@ from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTra class LeNetDygraph(paddle.nn.Layer): - def __init__(self, num_classes=10, classifier_activation=None): + def __init__(self, num_classes=10): super(LeNetDygraph, self).__init__() self.num_classes = num_classes self.features = Sequential( @@ -55,8 +55,7 @@ class LeNetDygraph(paddle.nn.Layer): if num_classes > 0: self.fc = Sequential( - Linear(400, 120), Linear(120, 84), Linear(84, 10), - Softmax()) #Todo: accept any activation + Linear(400, 120), Linear(120, 84), Linear(84, 10)) def forward(self, inputs): x = self.features(inputs) @@ -67,6 +66,34 @@ class LeNetDygraph(paddle.nn.Layer): return x +class LeNetDeclarative(fluid.dygraph.Layer): + def __init__(self, num_classes=10): + super(LeNetDeclarative, self).__init__() + self.num_classes = num_classes + self.features = Sequential( + Conv2d( + 1, 6, 3, stride=1, padding=1), + ReLU(), + Pool2D(2, 'max', 2), + Conv2d( + 6, 16, 5, stride=1, padding=0), + ReLU(), + Pool2D(2, 'max', 2)) + + if num_classes > 0: + self.fc = Sequential( + Linear(400, 120), Linear(120, 84), Linear(84, 10)) + + @declarative + def forward(self, inputs): + x = self.features(inputs) + + if self.num_classes > 0: + x = fluid.layers.flatten(x, 1) + x = self.fc(x) + return x + + class MnistDataset(MNIST): def __init__(self, mode, return_label=True, sample_num=None): super(MnistDataset, self).__init__(mode=mode) @@ -198,7 +225,7 @@ class TestModel(unittest.TestCase): paddle.manual_seed(seed) paddle.framework.random._manual_program_seed(seed) - net = LeNet(classifier_activation=None) + net = LeNet() optim_new = fluid.optimizer.Adam( learning_rate=0.001, parameter_list=net.parameters()) model = Model(net, inputs=self.inputs, labels=self.labels) @@ -287,14 +314,12 @@ class TestModel(unittest.TestCase): class MyModel(paddle.nn.Layer): - def __init__(self, classifier_activation='softmax'): + def __init__(self): super(MyModel, self).__init__() self._fc = Linear(20, 10) - self._act = Softmax() #Todo: accept any activation def forward(self, x): y = self._fc(x) - y = self._act(y) return y @@ -311,7 +336,7 @@ class TestModelFunction(unittest.TestCase): def get_expect(): fluid.enable_dygraph(fluid.CPUPlace()) self.set_seed() - m = MyModel(classifier_activation=None) + m = MyModel() optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=m.parameters()) m.train() @@ -330,7 +355,7 @@ class TestModelFunction(unittest.TestCase): fluid.enable_dygraph(device) if dynamic else None self.set_seed() - net = MyModel(classifier_activation=None) + net = MyModel() optim2 = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=net.parameters()) @@ -374,7 +399,7 @@ class TestModelFunction(unittest.TestCase): for dynamic in [True, False]: device = paddle.set_device('cpu') fluid.enable_dygraph(device) if dynamic else None - net = MyModel(classifier_activation=None) + net = MyModel() inputs = [InputSpec([None, 20], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, @@ -417,7 +442,7 @@ class TestModelFunction(unittest.TestCase): fluid.enable_dygraph(device) inputs = [InputSpec([None, 20], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] - model = Model(MyModel(classifier_activation=None), inputs, labels) + model = Model(MyModel(), inputs, labels) optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=model.parameters()) model.prepare(optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) @@ -426,7 +451,7 @@ class TestModelFunction(unittest.TestCase): inputs = [InputSpec([None, 20], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] - model = Model(MyModel(classifier_activation=None), inputs, labels) + model = Model(MyModel(), inputs, labels) optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=model.parameters()) model.prepare(optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) @@ -436,7 +461,7 @@ class TestModelFunction(unittest.TestCase): def test_static_save_dynamic_load(self): path = tempfile.mkdtemp() - net = MyModel(classifier_activation=None) + net = MyModel() inputs = [InputSpec([None, 20], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, @@ -448,7 +473,7 @@ class TestModelFunction(unittest.TestCase): device = paddle.set_device('cpu') fluid.enable_dygraph(device) #if dynamic else None - net = MyModel(classifier_activation=None) + net = MyModel() inputs = [InputSpec([None, 20], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, @@ -557,7 +582,7 @@ class TestModelFunction(unittest.TestCase): class TestRaiseError(unittest.TestCase): def test_input_without_name(self): - net = MyModel(classifier_activation=None) + net = MyModel() inputs = [InputSpec([None, 10], 'float32')] labels = [InputSpec([None, 1], 'int64', 'label')] @@ -567,7 +592,7 @@ class TestRaiseError(unittest.TestCase): def test_input_without_input_spec(self): for dynamic in [True, False]: paddle.disable_static() if dynamic else None - net = MyModel(classifier_activation=None) + net = MyModel() with self.assertRaises(TypeError): model = Model(net) paddle.enable_static() diff --git a/python/paddle/tests/test_pretrained_model.py b/python/paddle/tests/test_pretrained_model.py index 641147d39e9..bf9c2a2ae06 100644 --- a/python/paddle/tests/test_pretrained_model.py +++ b/python/paddle/tests/test_pretrained_model.py @@ -13,6 +13,8 @@ # limitations under the License. import unittest +import tempfile +import shutil import numpy as np import paddle @@ -23,27 +25,36 @@ import paddle.vision.models as models # test the predicted resutls of static graph and dynamic graph are equal # when used pretrained model class TestPretrainedModel(unittest.TestCase): - def infer(self, x, arch, dygraph=True): - if dygraph: - paddle.disable_static() - - net = models.__dict__[arch](pretrained=True, classifier_activation=None) - inputs = [InputSpec([None, 3, 224, 224], 'float32', 'image')] - model = paddle.Model(network=net, inputs=inputs) - model.prepare() - res = model.test_batch(x) - - if dygraph: - paddle.enable_static() - return res + def infer(self, arch): + path = tempfile.mkdtemp() + x = np.array(np.random.random((2, 3, 224, 224)), dtype=np.float32) + res = {} + for dygraph in [True, False]: + if not dygraph: + paddle.enable_static() + + net = models.__dict__[arch]() + inputs = [InputSpec([None, 3, 224, 224], 'float32', 'image')] + model = paddle.Model(network=net, inputs=inputs) + model.prepare() + + if dygraph: + model.save(path) + res['dygraph'] = model.test_batch(x) + else: + model.load(path) + res['static'] = model.test_batch(x) + + if not dygraph: + paddle.disable_static() + + shutil.rmtree(path) + np.testing.assert_allclose(res['dygraph'], res['static']) def test_models(self): arches = ['mobilenet_v1', 'mobilenet_v2', 'resnet18'] for arch in arches: - x = np.array(np.random.random((2, 3, 224, 224)), dtype=np.float32) - y_dygraph = self.infer(x, arch) - y_static = self.infer(x, arch, dygraph=False) - np.testing.assert_allclose(y_dygraph, y_static) + self.infer(arch) if __name__ == '__main__': diff --git a/python/paddle/tests/test_vision_models.py b/python/paddle/tests/test_vision_models.py index 44f9ab53901..6489b02615b 100644 --- a/python/paddle/tests/test_vision_models.py +++ b/python/paddle/tests/test_vision_models.py @@ -36,7 +36,7 @@ class TestVisonModels(unittest.TestCase): model.test_batch(x) def test_mobilenetv2_pretrained(self): - self.models_infer('mobilenet_v2', pretrained=True) + self.models_infer('mobilenet_v2', pretrained=False) def test_mobilenetv1(self): self.models_infer('mobilenet_v1') diff --git a/python/paddle/vision/models/lenet.py b/python/paddle/vision/models/lenet.py index c2d4be7cda1..b30d5992f9a 100644 --- a/python/paddle/vision/models/lenet.py +++ b/python/paddle/vision/models/lenet.py @@ -12,20 +12,19 @@ #See the License for the specific language governing permissions and #limitations under the License. -import paddle.fluid as fluid -from paddle.nn import Conv2d, Pool2D, Linear, ReLU, Sequential, Softmax +import paddle +import paddle.nn as nn __all__ = ['LeNet'] -class LeNet(fluid.dygraph.Layer): +class LeNet(nn.Layer): """LeNet model from `"LeCun Y, Bottou L, Bengio Y, et al. Gradient-based learning applied to document recognition[J]. Proceedings of the IEEE, 1998, 86(11): 2278-2324.`_ Args: num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 10. - classifier_activation (str): activation for the last fc layer. Default: 'softmax'. Examples: .. code-block:: python @@ -35,28 +34,27 @@ class LeNet(fluid.dygraph.Layer): model = LeNet() """ - def __init__(self, num_classes=10, classifier_activation='softmax'): + def __init__(self, num_classes=10): super(LeNet, self).__init__() self.num_classes = num_classes - self.features = Sequential( - Conv2d( + self.features = nn.Sequential( + nn.Conv2d( 1, 6, 3, stride=1, padding=1), - ReLU(), - Pool2D(2, 'max', 2), - Conv2d( + nn.ReLU(), + nn.MaxPool2d(2, 2), + nn.Conv2d( 6, 16, 5, stride=1, padding=0), - ReLU(), - Pool2D(2, 'max', 2)) + nn.ReLU(), + nn.MaxPool2d(2, 2)) if num_classes > 0: - self.fc = Sequential( - Linear(400, 120), Linear(120, 84), Linear(84, 10), - Softmax()) #Todo: accept any activation + self.fc = nn.Sequential( + nn.Linear(400, 120), nn.Linear(120, 84), nn.Linear(84, 10)) def forward(self, inputs): x = self.features(inputs) if self.num_classes > 0: - x = fluid.layers.flatten(x, 1) + x = paddle.flatten(x, 1) x = self.fc(x) return x diff --git a/python/paddle/vision/models/mobilenetv1.py b/python/paddle/vision/models/mobilenetv1.py index 10defbf593d..39654122e3b 100644 --- a/python/paddle/vision/models/mobilenetv1.py +++ b/python/paddle/vision/models/mobilenetv1.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear +import paddle +import paddle.nn as nn from paddle.utils.download import get_weights_path_from_url @@ -24,85 +22,66 @@ __all__ = ['MobileNetV1', 'mobilenet_v1'] model_urls = { 'mobilenetv1_1.0': ('https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams', - 'bf0d25cb0bed1114d9dac9384ce2b4a6') + '42a154c2f26f86e7457d6daded114e8c') } -class ConvBNLayer(fluid.dygraph.Layer): +class ConvBNLayer(nn.Layer): def __init__(self, - num_channels, - filter_size, - num_filters, + in_channels, + out_channels, + kernel_size, stride, padding, - channels=None, - num_groups=1, - act='relu', - use_cudnn=True, - name=None): + num_groups=1): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, + self._conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size, stride=stride, padding=padding, groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=ParamAttr( - initializer=MSRA(), name=self.full_name() + "_weights"), bias_attr=False) - self._batch_norm = BatchNorm( - num_filters, - act=act, - param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"), - bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"), - moving_mean_name=self.full_name() + "_bn" + '_mean', - moving_variance_name=self.full_name() + "_bn" + '_variance') + self._norm_layer = nn.BatchNorm2d(out_channels) + self._act = nn.ReLU() - def forward(self, inputs): - y = self._conv(inputs) - y = self._batch_norm(y) - return y + def forward(self, x): + x = self._conv(x) + x = self._norm_layer(x) + x = self._act(x) + return x -class DepthwiseSeparable(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters1, - num_filters2, - num_groups, - stride, - scale, - name=None): +class DepthwiseSeparable(nn.Layer): + def __init__(self, in_channels, out_channels1, out_channels2, num_groups, + stride, scale): super(DepthwiseSeparable, self).__init__() self._depthwise_conv = ConvBNLayer( - num_channels=num_channels, - num_filters=int(num_filters1 * scale), - filter_size=3, + in_channels, + int(out_channels1 * scale), + kernel_size=3, stride=stride, padding=1, - num_groups=int(num_groups * scale), - use_cudnn=False) + num_groups=int(num_groups * scale)) self._pointwise_conv = ConvBNLayer( - num_channels=int(num_filters1 * scale), - filter_size=1, - num_filters=int(num_filters2 * scale), + int(out_channels1 * scale), + int(out_channels2 * scale), + kernel_size=1, stride=1, padding=0) - def forward(self, inputs): - y = self._depthwise_conv(inputs) - y = self._pointwise_conv(y) - return y + def forward(self, x): + x = self._depthwise_conv(x) + x = self._pointwise_conv(x) + return x -class MobileNetV1(fluid.dygraph.Layer): +class MobileNetV1(nn.Layer): """MobileNetV1 model from `"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" `_. @@ -111,7 +90,6 @@ class MobileNetV1(fluid.dygraph.Layer): num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 1000. with_pool (bool): use pool before the last fc layer or not. Default: True. - classifier_activation (str): activation for the last fc layer. Default: 'softmax'. Examples: .. code-block:: python @@ -121,11 +99,7 @@ class MobileNetV1(fluid.dygraph.Layer): model = MobileNetV1() """ - def __init__(self, - scale=1.0, - num_classes=1000, - with_pool=True, - classifier_activation='softmax'): + def __init__(self, scale=1.0, num_classes=1000, with_pool=True): super(MobileNetV1, self).__init__() self.scale = scale self.dwsl = [] @@ -133,18 +107,17 @@ class MobileNetV1(fluid.dygraph.Layer): self.with_pool = with_pool self.conv1 = ConvBNLayer( - num_channels=3, - filter_size=3, - channels=3, - num_filters=int(32 * scale), + in_channels=3, + out_channels=int(32 * scale), + kernel_size=3, stride=2, padding=1) dws21 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(32 * scale), - num_filters1=32, - num_filters2=64, + in_channels=int(32 * scale), + out_channels1=32, + out_channels2=64, num_groups=32, stride=1, scale=scale), @@ -153,9 +126,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws22 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(64 * scale), - num_filters1=64, - num_filters2=128, + in_channels=int(64 * scale), + out_channels1=64, + out_channels2=128, num_groups=64, stride=2, scale=scale), @@ -164,9 +137,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws31 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(128 * scale), - num_filters1=128, - num_filters2=128, + in_channels=int(128 * scale), + out_channels1=128, + out_channels2=128, num_groups=128, stride=1, scale=scale), @@ -175,9 +148,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws32 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(128 * scale), - num_filters1=128, - num_filters2=256, + in_channels=int(128 * scale), + out_channels1=128, + out_channels2=256, num_groups=128, stride=2, scale=scale), @@ -186,9 +159,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws41 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(256 * scale), - num_filters1=256, - num_filters2=256, + in_channels=int(256 * scale), + out_channels1=256, + out_channels2=256, num_groups=256, stride=1, scale=scale), @@ -197,9 +170,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws42 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(256 * scale), - num_filters1=256, - num_filters2=512, + in_channels=int(256 * scale), + out_channels1=256, + out_channels2=512, num_groups=256, stride=2, scale=scale), @@ -209,9 +182,9 @@ class MobileNetV1(fluid.dygraph.Layer): for i in range(5): tmp = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(512 * scale), - num_filters1=512, - num_filters2=512, + in_channels=int(512 * scale), + out_channels1=512, + out_channels2=512, num_groups=512, stride=1, scale=scale), @@ -220,9 +193,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws56 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(512 * scale), - num_filters1=512, - num_filters2=1024, + in_channels=int(512 * scale), + out_channels1=512, + out_channels2=1024, num_groups=512, stride=2, scale=scale), @@ -231,9 +204,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws6 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(1024 * scale), - num_filters1=1024, - num_filters2=1024, + in_channels=int(1024 * scale), + out_channels1=1024, + out_channels2=1024, num_groups=1024, stride=1, scale=scale), @@ -241,29 +214,23 @@ class MobileNetV1(fluid.dygraph.Layer): self.dwsl.append(dws6) if with_pool: - self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) - - if num_classes > -1: - self.out = Linear( - int(1024 * scale), - num_classes, - act=classifier_activation, - param_attr=ParamAttr( - initializer=MSRA(), name=self.full_name() + "fc7_weights"), - bias_attr=ParamAttr(name="fc7_offset")) - - def forward(self, inputs): - y = self.conv1(inputs) + self.pool2d_avg = nn.AdaptiveAvgPool2d(1) + + if num_classes > 0: + self.fc = nn.Linear(int(1024 * scale), num_classes) + + def forward(self, x): + x = self.conv1(x) for dws in self.dwsl: - y = dws(y) + x = dws(x) if self.with_pool: - y = self.pool2d_avg(y) + x = self.pool2d_avg(x) if self.num_classes > 0: - y = fluid.layers.reshape(y, shape=[-1, 1024]) - y = self.out(y) - return y + x = paddle.flatten(x, 1) + x = self.fc(x) + return x def _mobilenet(arch, pretrained=False, **kwargs): @@ -275,7 +242,7 @@ def _mobilenet(arch, pretrained=False, **kwargs): model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" - param, _ = fluid.load_dygraph(weight_path) + param, _ = paddle.load(weight_path) model.load_dict(param) return model diff --git a/python/paddle/vision/models/mobilenetv2.py b/python/paddle/vision/models/mobilenetv2.py index c08fb88f8bd..bab8b7b2b1b 100644 --- a/python/paddle/vision/models/mobilenetv2.py +++ b/python/paddle/vision/models/mobilenetv2.py @@ -14,9 +14,9 @@ import numpy as np import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear + +import paddle.nn as nn +import paddle.nn.functional as F from paddle.utils.download import get_weights_path_from_url @@ -25,221 +25,166 @@ __all__ = ['MobileNetV2', 'mobilenet_v2'] model_urls = { 'mobilenetv2_1.0': ('https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams', - '8ff74f291f72533f2a7956a4efff9d88') + '0340af0a901346c8d46f4529882fb63d') } -class ConvBNLayer(fluid.dygraph.Layer): - def __init__(self, - num_channels, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - use_cudnn=True): - super(ConvBNLayer, self).__init__() - - tmp_param = ParamAttr(name=self.full_name() + "_weights") - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=tmp_param, - bias_attr=False) - - self._batch_norm = BatchNorm( - num_filters, - param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"), - bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"), - moving_mean_name=self.full_name() + "_bn" + '_mean', - moving_variance_name=self.full_name() + "_bn" + '_variance') - - def forward(self, inputs, if_act=True): - y = self._conv(inputs) - y = self._batch_norm(y) - if if_act: - y = fluid.layers.relu6(y) - return y - - -class InvertedResidualUnit(fluid.dygraph.Layer): - def __init__( - self, - num_channels, - num_in_filter, - num_filters, - stride, - filter_size, - padding, - expansion_factor, ): - super(InvertedResidualUnit, self).__init__() - num_expfilter = int(round(num_in_filter * expansion_factor)) - self._expand_conv = ConvBNLayer( - num_channels=num_channels, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1) - - self._bottleneck_conv = ConvBNLayer( - num_channels=num_expfilter, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - use_cudnn=False) - - self._linear_conv = ConvBNLayer( - num_channels=num_expfilter, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1) - - def forward(self, inputs, ifshortcut): - y = self._expand_conv(inputs, if_act=True) - y = self._bottleneck_conv(y, if_act=True) - y = self._linear_conv(y, if_act=False) - if ifshortcut: - y = fluid.layers.elementwise_add(inputs, y) - return y - - -class InvresiBlocks(fluid.dygraph.Layer): - def __init__(self, in_c, t, c, n, s): - super(InvresiBlocks, self).__init__() - - self._first_block = InvertedResidualUnit( - num_channels=in_c, - num_in_filter=in_c, - num_filters=c, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t) - - self._inv_blocks = [] - for i in range(1, n): - tmp = self.add_sublayer( - sublayer=InvertedResidualUnit( - num_channels=c, - num_in_filter=c, - num_filters=c, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t), - name=self.full_name() + "_" + str(i + 1)) - self._inv_blocks.append(tmp) - - def forward(self, inputs): - y = self._first_block(inputs, ifshortcut=False) - for inv_block in self._inv_blocks: - y = inv_block(y, ifshortcut=True) - return y - - -class MobileNetV2(fluid.dygraph.Layer): - """MobileNetV2 model from - `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. - - Args: - scale (float): scale of channels in each layer. Default: 1.0. - num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer - will not be defined. Default: 1000. - with_pool (bool): use pool before the last fc layer or not. Default: True. - classifier_activation (str): activation for the last fc layer. Default: 'softmax'. - - Examples: - .. code-block:: python +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - from paddle.vision.models import MobileNetV2 + if new_v < 0.9 * v: + new_v += divisor + return new_v - model = MobileNetV2() - """ +class ConvBNReLU(nn.Sequential): + def __init__(self, + in_planes, + out_planes, + kernel_size=3, + stride=1, + groups=1, + norm_layer=nn.BatchNorm2d): + padding = (kernel_size - 1) // 2 + + super(ConvBNReLU, self).__init__( + nn.Conv2d( + in_planes, + out_planes, + kernel_size, + stride, + padding, + groups=groups, + bias_attr=False), + norm_layer(out_planes), + nn.ReLU6()) + + +class InvertedResidual(nn.Layer): def __init__(self, - scale=1.0, - num_classes=1000, - with_pool=True, - classifier_activation='softmax'): + inp, + oup, + stride, + expand_ratio, + norm_layer=nn.BatchNorm2d): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + layers.append( + ConvBNReLU( + inp, hidden_dim, kernel_size=1, norm_layer=norm_layer)) + layers.extend([ + ConvBNReLU( + hidden_dim, + hidden_dim, + stride=stride, + groups=hidden_dim, + norm_layer=norm_layer), + nn.Conv2d( + hidden_dim, oup, 1, 1, 0, bias_attr=False), + norm_layer(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Layer): + def __init__(self, scale=1.0, num_classes=1000, with_pool=True): + """MobileNetV2 model from + `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. + + Args: + scale (float): scale of channels in each layer. Default: 1.0. + num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + will not be defined. Default: 1000. + with_pool (bool): use pool before the last fc layer or not. Default: True. + + Examples: + .. code-block:: python + + from paddle.vision.models import MobileNetV2 + + model = MobileNetV2() + """ super(MobileNetV2, self).__init__() - self.scale = scale self.num_classes = num_classes self.with_pool = with_pool + input_channel = 32 + last_channel = 1280 + + block = InvertedResidual + round_nearest = 8 + norm_layer = nn.BatchNorm2d + inverted_residual_setting = [ + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] - bottleneck_params_list = [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 3, 2), - (6, 64, 4, 2), - (6, 96, 3, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), + input_channel = _make_divisible(input_channel * scale, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, scale), + round_nearest) + features = [ + ConvBNReLU( + 3, input_channel, stride=2, norm_layer=norm_layer) ] - self._conv1 = ConvBNLayer( - num_channels=3, - num_filters=int(32 * scale), - filter_size=3, - stride=2, - padding=1) - - self._invl = [] - i = 1 - in_c = int(32 * scale) - for layer_setting in bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - tmp = self.add_sublayer( - sublayer=InvresiBlocks( - in_c=in_c, t=t, c=int(c * scale), n=n, s=s), - name='conv' + str(i)) - self._invl.append(tmp) - in_c = int(c * scale) - - self._out_c = int(1280 * scale) if scale > 1.0 else 1280 - self._conv9 = ConvBNLayer( - num_channels=in_c, - num_filters=self._out_c, - filter_size=1, - stride=1, - padding=0) + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * scale, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append( + block( + input_channel, + output_channel, + stride, + expand_ratio=t, + norm_layer=norm_layer)) + input_channel = output_channel + + features.append( + ConvBNReLU( + input_channel, + self.last_channel, + kernel_size=1, + norm_layer=norm_layer)) + + self.features = nn.Sequential(*features) if with_pool: - self._pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) - - if num_classes > 0: - tmp_param = ParamAttr(name=self.full_name() + "fc10_weights") - self._fc = Linear( - self._out_c, - num_classes, - act=classifier_activation, - param_attr=tmp_param, - bias_attr=ParamAttr(name="fc10_offset")) - - def forward(self, inputs): - y = self._conv1(inputs, if_act=True) - for inv in self._invl: - y = inv(y) - y = self._conv9(y, if_act=True) + self.pool2d_avg = nn.AdaptiveAvgPool2d(1) + + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Dropout(0.2), nn.Linear(self.last_channel, num_classes)) + + def forward(self, x): + x = self.features(x) if self.with_pool: - y = self._pool2d_avg(y) + x = self.pool2d_avg(x) + if self.num_classes > 0: - y = fluid.layers.reshape(y, shape=[-1, self._out_c]) - y = self._fc(y) - return y + x = paddle.flatten(x, 1) + x = self.classifier(x) + return x def _mobilenet(arch, pretrained=False, **kwargs): @@ -251,7 +196,7 @@ def _mobilenet(arch, pretrained=False, **kwargs): model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" - param, _ = fluid.load_dygraph(weight_path) + param, _ = paddle.load(weight_path) model.load_dict(param) return model diff --git a/python/paddle/vision/models/resnet.py b/python/paddle/vision/models/resnet.py index da0c3e9eb3f..f9e00aefd6b 100644 --- a/python/paddle/vision/models/resnet.py +++ b/python/paddle/vision/models/resnet.py @@ -15,11 +15,8 @@ from __future__ import division from __future__ import print_function -import math -import paddle.fluid as fluid - -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear -from paddle.fluid.dygraph.container import Sequential +import paddle +import paddle.nn as nn from paddle.utils.download import get_weights_path_from_url @@ -29,143 +26,129 @@ __all__ = [ model_urls = { 'resnet18': ('https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams', - '0ba53eea9bc970962d0ef96f7b94057e'), + 'cf548f46534aa3560945be4b95cd11c4'), 'resnet34': ('https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams', - '46bc9f7c3dd2e55b7866285bee91eff3'), + '8d2275cf8706028345f78ac0e1d31969'), 'resnet50': ('https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams', - '5ce890a9ad386df17cf7fe2313dca0a1'), + 'ca6f485ee1ab0492d38f323885b0ad80'), 'resnet101': ('https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams', - 'fb07a451df331e4b0bb861ed97c3a9b9'), + '02f35f034ca3858e1e54d4036443c92d'), 'resnet152': ('https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams', - 'f9c700f26d3644bb76ad2226ed5f5713'), + '7ad16a2f1e7333859ff986138630fd7a'), } -class ConvBNLayer(fluid.dygraph.Layer): +class BasicBlock(nn.Layer): + expansion = 1 + def __init__(self, - num_channels, - num_filters, - filter_size, + inplanes, + planes, stride=1, + downsample=None, groups=1, - act=None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - bias_attr=False) - - self._batch_norm = BatchNorm(num_filters, act=act) - - def forward(self, inputs): - x = self._conv(inputs) - x = self._batch_norm(x) - - return x - - -class BasicBlock(fluid.dygraph.Layer): - """residual block of resnet18 and resnet34 - """ - expansion = 1 - - def __init__(self, num_channels, num_filters, stride, shortcut=True): + base_width=64, + dilation=1, + norm_layer=None): super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - act='relu') - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu') + if dilation > 1: + raise NotImplementedError( + "Dilation > 1 not supported in BasicBlock") - if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=stride) + self.conv1 = nn.Conv2d( + inplanes, planes, 3, padding=1, stride=stride, bias_attr=False) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU() + self.conv2 = nn.Conv2d(planes, planes, 3, padding=1, bias_attr=False) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride - self.shortcut = shortcut + def forward(self, x): + identity = x - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) - if self.shortcut: - short = inputs - else: - short = self.short(inputs) + out = self.conv2(out) + out = self.bn2(out) - y = short + conv1 + if self.downsample is not None: + identity = self.downsample(x) - return fluid.layers.relu(y) + out += identity + out = self.relu(out) + return out -class BottleneckBlock(fluid.dygraph.Layer): - """residual block of resnet50, resnet101 amd resnet152 - """ + +class BottleneckBlock(nn.Layer): expansion = 4 - def __init__(self, num_channels, num_filters, stride, shortcut=True): + def __init__(self, + inplanes, + planes, + stride=1, + downsample=None, + groups=1, + base_width=64, + dilation=1, + norm_layer=None): super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu') - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + + self.conv1 = nn.Conv2d(inplanes, width, 1, bias_attr=False) + self.bn1 = norm_layer(width) + + self.conv2 = nn.Conv2d( + width, + width, + 3, + padding=dilation, stride=stride, - act='relu') - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * self.expansion, - filter_size=1, - act=None) + groups=groups, + dilation=dilation, + bias_attr=False) + self.bn2 = norm_layer(width) - if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * self.expansion, - filter_size=1, - stride=stride) + self.conv3 = nn.Conv2d( + width, planes * self.expansion, 1, bias_attr=False) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU() + self.downsample = downsample + self.stride = stride - self.shortcut = shortcut + def forward(self, x): + identity = x - self._num_channels_out = num_filters * self.expansion + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) - def forward(self, inputs): - x = self.conv0(inputs) - conv1 = self.conv1(x) - conv2 = self.conv2(conv1) + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) - if self.shortcut: - short = inputs - else: - short = self.short(inputs) + out = self.conv3(out) + out = self.bn3(out) - x = fluid.layers.elementwise_add(x=short, y=conv2) + if self.downsample is not None: + identity = self.downsample(x) - return fluid.layers.relu(x) + out += identity + out = self.relu(out) + return out -class ResNet(fluid.dygraph.Layer): + +class ResNet(nn.Layer): """ResNet model from `"Deep Residual Learning for Image Recognition" `_ @@ -175,7 +158,6 @@ class ResNet(fluid.dygraph.Layer): num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 1000. with_pool (bool): use pool before the last fc layer or not. Default: True. - classifier_activation (str): activation for the last fc layer. Default: 'softmax'. Examples: .. code-block:: python @@ -189,82 +171,87 @@ class ResNet(fluid.dygraph.Layer): """ - def __init__(self, - Block, - depth=50, - num_classes=1000, - with_pool=True, - classifier_activation='softmax'): + def __init__(self, block, depth, num_classes=1000, with_pool=True): super(ResNet, self).__init__() - - self.num_classes = num_classes - self.with_pool = with_pool - - layer_config = { + layer_cfg = { 18: [2, 2, 2, 2], 34: [3, 4, 6, 3], 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], - 152: [3, 8, 36, 3], + 152: [3, 8, 36, 3] } - assert depth in layer_config.keys(), \ - "supported depth are {} but input layer is {}".format( - layer_config.keys(), depth) - - layers = layer_config[depth] - - in_channels = 64 - out_channels = [64, 128, 256, 512] - - self.conv = ConvBNLayer( - num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - - self.layers = [] - for idx, num_blocks in enumerate(layers): - blocks = [] - shortcut = False - for b in range(num_blocks): - if b == 1: - in_channels = out_channels[idx] * Block.expansion - block = Block( - num_channels=in_channels, - num_filters=out_channels[idx], - stride=2 if b == 0 and idx != 0 else 1, - shortcut=shortcut) - blocks.append(block) - shortcut = True - layer = self.add_sublayer("layer_{}".format(idx), - Sequential(*blocks)) - self.layers.append(layer) + layers = layer_cfg[depth] + self.num_classes = num_classes + self.with_pool = with_pool + self._norm_layer = nn.BatchNorm2d + + self.inplanes = 64 + self.dilation = 1 + self.conv1 = nn.Conv2d( + 3, + self.inplanes, + kernel_size=7, + stride=2, + padding=3, + bias_attr=False) + self.bn1 = self._norm_layer(self.inplanes) + self.relu = nn.ReLU() + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) if with_pool: - self.global_pool = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) if num_classes > 0: - stdv = 1.0 / math.sqrt(out_channels[-1] * Block.expansion * 1.0) - self.fc_input_dim = out_channels[-1] * Block.expansion * 1 * 1 - self.fc = Linear( - self.fc_input_dim, - num_classes, - act=classifier_activation, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - - def forward(self, inputs): - x = self.conv(inputs) - x = self.pool(x) - for layer in self.layers: - x = layer(x) - - if self.with_pool: - x = self.global_pool(x) - - if self.num_classes > -1: - x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim]) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + self.inplanes, + planes * block.expansion, + 1, + stride=stride, + bias_attr=False), + norm_layer(planes * block.expansion), ) + + layers = [] + layers.append( + block(self.inplanes, planes, stride, downsample, 1, 64, + previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + if self.with_pool > 0: + x = self.avgpool(x) + + if self.num_classes > 0: + x = paddle.flatten(x, 1) x = self.fc(x) + return x @@ -277,7 +264,7 @@ def _resnet(arch, Block, depth, pretrained, **kwargs): model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" - param, _ = fluid.load_dygraph(weight_path) + param, _ = paddle.load(weight_path) model.set_dict(param) return model diff --git a/python/paddle/vision/models/vgg.py b/python/paddle/vision/models/vgg.py index 8bfacda2476..d11845b6616 100644 --- a/python/paddle/vision/models/vgg.py +++ b/python/paddle/vision/models/vgg.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.fluid as fluid -from paddle.nn import Conv2d, Pool2D, BatchNorm, Linear, ReLU, Softmax -from paddle.fluid.dygraph.container import Sequential +import paddle +import paddle.nn as nn from paddle.utils.download import get_weights_path_from_url @@ -28,39 +27,18 @@ __all__ = [ model_urls = { 'vgg16': ('https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams', - 'c788f453a3b999063e8da043456281ee') + '89bbffc0f87d260be9b8cdc169c991c4') } -class Classifier(fluid.dygraph.Layer): - def __init__(self, num_classes, classifier_activation='softmax'): - super(Classifier, self).__init__() - self.linear1 = Linear(512 * 7 * 7, 4096) - self.linear2 = Linear(4096, 4096) - self.linear3 = Linear(4096, num_classes) - self.act = Softmax() #Todo: accept any activation - - def forward(self, x): - x = self.linear1(x) - x = fluid.layers.relu(x) - x = fluid.layers.dropout(x, 0.5) - x = self.linear2(x) - x = fluid.layers.relu(x) - x = fluid.layers.dropout(x, 0.5) - x = self.linear3(x) - out = self.act(x) - return out - - -class VGG(fluid.dygraph.Layer): +class VGG(nn.Layer): """VGG model from `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ Args: - features (fluid.dygraph.Layer): vgg features create by function make_layers. + features (nn.Layer): vgg features create by function make_layers. num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 1000. - classifier_activation (str): activation for the last fc layer. Default: 'softmax'. Examples: .. code-block:: python @@ -76,44 +54,41 @@ class VGG(fluid.dygraph.Layer): """ - def __init__(self, - features, - num_classes=1000, - classifier_activation='softmax'): + def __init__(self, features, num_classes=1000): super(VGG, self).__init__() self.features = features - self.num_classes = num_classes - - if num_classes > 0: - classifier = Classifier(num_classes, classifier_activation) - self.classifier = self.add_sublayer("classifier", - Sequential(classifier)) + self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(), + nn.Dropout(), + nn.Linear(4096, num_classes), ) def forward(self, x): x = self.features(x) - - if self.num_classes > 0: - x = fluid.layers.flatten(x, 1) - x = self.classifier(x) + x = self.avgpool(x) + x = paddle.flatten(x, 1) + x = self.classifier(x) return x def make_layers(cfg, batch_norm=False): layers = [] in_channels = 3 - for v in cfg: if v == 'M': - layers += [Pool2D(pool_size=2, pool_stride=2)] + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) if batch_norm: - conv2d = Conv2d(in_channels, v, kernel_size=3, padding=1) - layers += [conv2d, BatchNorm(v), ReLU()] + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU()] else: - conv2d = Conv2d(in_channels, v, kernel_size=3, padding=1) - layers += [conv2d, ReLU()] + layers += [conv2d, nn.ReLU()] in_channels = v - return Sequential(*layers) + return nn.Sequential(*layers) cfgs = { @@ -144,7 +119,7 @@ def _vgg(arch, cfg, batch_norm, pretrained, **kwargs): model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" - param, _ = fluid.load_dygraph(weight_path) + param, _ = paddle.load(weight_path) model.load_dict(param) return model -- GitLab