diff --git a/PaddleSlim/docs/images/usage/ConvertToInt8Pass.png b/PaddleSlim/docs/images/usage/ConvertToInt8Pass.png new file mode 100644 index 0000000000000000000000000000000000000000..8b5849819c0bc8e592dc8f864d8945330df85ab1 Binary files /dev/null and b/PaddleSlim/docs/images/usage/ConvertToInt8Pass.png differ diff --git a/PaddleSlim/docs/images/usage/FreezePass.png b/PaddleSlim/docs/images/usage/FreezePass.png new file mode 100644 index 0000000000000000000000000000000000000000..acd2b0a890a8af85bec6eecdb22e47ad386a178c Binary files /dev/null and b/PaddleSlim/docs/images/usage/FreezePass.png differ diff --git a/PaddleSlim/docs/images/usage/TransformForMobilePass.png b/PaddleSlim/docs/images/usage/TransformForMobilePass.png new file mode 100644 index 0000000000000000000000000000000000000000..4104cacc67af0be1c7bc152696e2ae544127aace Binary files /dev/null and b/PaddleSlim/docs/images/usage/TransformForMobilePass.png differ diff --git a/PaddleSlim/docs/images/usage/TransformPass.png b/PaddleSlim/docs/images/usage/TransformPass.png new file mode 100644 index 0000000000000000000000000000000000000000..f29ab62753e0e6ddf28d0c1dda7139705fc24b18 Binary files /dev/null and b/PaddleSlim/docs/images/usage/TransformPass.png differ diff --git a/PaddleSlim/docs/usage.md b/PaddleSlim/docs/usage.md index 5b584021517f8628a549cb685c14c26d1c42f2cb..48069bc30a078317e3d6d4e7298f1f7251691722 100644 --- a/PaddleSlim/docs/usage.md +++ b/PaddleSlim/docs/usage.md @@ -215,6 +215,10 @@ compress_pass: ### 2.1 量化训练 +**用户须知:** 现阶段的量化训练主要针对卷积层(包括二维卷积和Depthwise卷积)以及全连接层进行量化。卷积层和全连接层在PaddlePaddle框架中对应算子包括`conv2d`、`depthwise_conv2d`和`mul`等。量化训练会对所有的`conv2d`、`depthwise_conv2d`和`mul`进行量化操作,且要求它们的输入中必须包括激活和参数两部分。 + +#### 2.1.1 基于High-Level API的量化训练 + >注意:多个压缩策略组合使用时,量化训练策略必须放在最后。 ``` @@ -279,6 +283,11 @@ strategies: - **save_in_nodes:** variable名称列表。在保存量化后模型的时候,需要根据save_in_nodes对eval programg 网络进行前向遍历剪枝。默认为eval_feed_list内指定的variable的名称列表。 - **save_out_nodes:** varibale名称列表。在保存量化后模型的时候,需要根据save_out_nodes对eval programg 网络进行回溯剪枝。默认为eval_fetch_list内指定的variable的名称列表。 + +#### 2.1.2 基于Low-Level API的量化训练 + +量化训练High-Level API是对Low-Level API的高层次封装,这使得用户仅需编写少量的代码和配置文件即可进行量化训练。然而,封装必然会带来使用灵活性的降低。因此,若用户在进行量化训练时需要更多的灵活性,可参考 [量化训练Low-Level API使用示例](../quant_low_level_api/README.md) 。 + ### 2.2 卷积核剪切 该策略通过减少指定卷积层中卷积核的数量,达到缩减模型大小和计算复杂度的目的。根据选取剪切比例的策略的不同,又细分为以下两个方式: diff --git a/PaddleSlim/models/__init__.py b/PaddleSlim/models/__init__.py index 458020712dfedac220586a8a31852f5163ad407f..2141eb9a881317f2b901c820ce0b764ebde3491a 100644 --- a/PaddleSlim/models/__init__.py +++ b/PaddleSlim/models/__init__.py @@ -1,2 +1,3 @@ from .mobilenet import MobileNet from .resnet import ResNet50, ResNet101, ResNet152 +from .googlenet import GoogleNet diff --git a/PaddleSlim/models/googlenet.py b/PaddleSlim/models/googlenet.py new file mode 100644 index 0000000000000000000000000000000000000000..bd9040c53e61a48d9f5bff6683bec961d3f95583 --- /dev/null +++ b/PaddleSlim/models/googlenet.py @@ -0,0 +1,233 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['GoogleNet'] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 70, 100], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class GoogleNet(): + def __init__(self): + self.params = train_parameters + + def conv_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + channels = input.shape[1] + stdv = (3.0 / (filter_size**2 * channels))**0.5 + param_attr = ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + "_weights") + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=act, + param_attr=param_attr, + bias_attr=False, + name=name) + return conv + + def xavier(self, channels, filter_size, name): + stdv = (3.0 / (filter_size**2 * channels))**0.5 + param_attr = ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + "_weights") + + return param_attr + + def inception(self, + input, + channels, + filter1, + filter3R, + filter3, + filter5R, + filter5, + proj, + name=None): + conv1 = self.conv_layer( + input=input, + num_filters=filter1, + filter_size=1, + stride=1, + act=None, + name="inception_" + name + "_1x1") + conv3r = self.conv_layer( + input=input, + num_filters=filter3R, + filter_size=1, + stride=1, + act=None, + name="inception_" + name + "_3x3_reduce") + conv3 = self.conv_layer( + input=conv3r, + num_filters=filter3, + filter_size=3, + stride=1, + act=None, + name="inception_" + name + "_3x3") + conv5r = self.conv_layer( + input=input, + num_filters=filter5R, + filter_size=1, + stride=1, + act=None, + name="inception_" + name + "_5x5_reduce") + conv5 = self.conv_layer( + input=conv5r, + num_filters=filter5, + filter_size=5, + stride=1, + act=None, + name="inception_" + name + "_5x5") + pool = fluid.layers.pool2d( + input=input, + pool_size=3, + pool_stride=1, + pool_padding=1, + pool_type='max') + convprj = fluid.layers.conv2d( + input=pool, + filter_size=1, + num_filters=proj, + stride=1, + padding=0, + name="inception_" + name + "_3x3_proj", + param_attr=ParamAttr( + name="inception_" + name + "_3x3_proj_weights"), + bias_attr=False) + cat = fluid.layers.concat(input=[conv1, conv3, conv5, convprj], axis=1) + cat = fluid.layers.relu(cat) + return cat + + def net(self, input, class_dim=1000): + conv = self.conv_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act=None, + name="conv1") + pool = fluid.layers.pool2d( + input=conv, pool_size=3, pool_type='max', pool_stride=2) + + conv = self.conv_layer( + input=pool, + num_filters=64, + filter_size=1, + stride=1, + act=None, + name="conv2_1x1") + conv = self.conv_layer( + input=conv, + num_filters=192, + filter_size=3, + stride=1, + act=None, + name="conv2_3x3") + pool = fluid.layers.pool2d( + input=conv, pool_size=3, pool_type='max', pool_stride=2) + + ince3a = self.inception(pool, 192, 64, 96, 128, 16, 32, 32, "ince3a") + ince3b = self.inception(ince3a, 256, 128, 128, 192, 32, 96, 64, + "ince3b") + pool3 = fluid.layers.pool2d( + input=ince3b, pool_size=3, pool_type='max', pool_stride=2) + + ince4a = self.inception(pool3, 480, 192, 96, 208, 16, 48, 64, "ince4a") + ince4b = self.inception(ince4a, 512, 160, 112, 224, 24, 64, 64, + "ince4b") + ince4c = self.inception(ince4b, 512, 128, 128, 256, 24, 64, 64, + "ince4c") + ince4d = self.inception(ince4c, 512, 112, 144, 288, 32, 64, 64, + "ince4d") + ince4e = self.inception(ince4d, 528, 256, 160, 320, 32, 128, 128, + "ince4e") + pool4 = fluid.layers.pool2d( + input=ince4e, pool_size=3, pool_type='max', pool_stride=2) + + ince5a = self.inception(pool4, 832, 256, 160, 320, 32, 128, 128, + "ince5a") + ince5b = self.inception(ince5a, 832, 384, 192, 384, 48, 128, 128, + "ince5b") + pool5 = fluid.layers.pool2d( + input=ince5b, pool_size=7, pool_type='avg', pool_stride=7) + dropout = fluid.layers.dropout(x=pool5, dropout_prob=0.4) + out = fluid.layers.fc(input=dropout, + size=class_dim, + act='softmax', + param_attr=self.xavier(1024, 1, "out"), + name="out", + bias_attr=ParamAttr(name="out_offset")) + + pool_o1 = fluid.layers.pool2d( + input=ince4a, pool_size=5, pool_type='avg', pool_stride=3) + conv_o1 = self.conv_layer( + input=pool_o1, + num_filters=128, + filter_size=1, + stride=1, + act=None, + name="conv_o1") + fc_o1 = fluid.layers.fc(input=conv_o1, + size=1024, + act='relu', + param_attr=self.xavier(2048, 1, "fc_o1"), + name="fc_o1", + bias_attr=ParamAttr(name="fc_o1_offset")) + dropout_o1 = fluid.layers.dropout(x=fc_o1, dropout_prob=0.7) + out1 = fluid.layers.fc(input=dropout_o1, + size=class_dim, + act='softmax', + param_attr=self.xavier(1024, 1, "out1"), + name="out1", + bias_attr=ParamAttr(name="out1_offset")) + + pool_o2 = fluid.layers.pool2d( + input=ince4d, pool_size=5, pool_type='avg', pool_stride=3) + conv_o2 = self.conv_layer( + input=pool_o2, + num_filters=128, + filter_size=1, + stride=1, + act=None, + name="conv_o2") + fc_o2 = fluid.layers.fc(input=conv_o2, + size=1024, + act='relu', + param_attr=self.xavier(2048, 1, "fc_o2"), + name="fc_o2", + bias_attr=ParamAttr(name="fc_o2_offset")) + dropout_o2 = fluid.layers.dropout(x=fc_o2, dropout_prob=0.7) + out2 = fluid.layers.fc(input=dropout_o2, + size=class_dim, + act='softmax', + param_attr=self.xavier(1024, 1, "out2"), + name="out2", + bias_attr=ParamAttr(name="out2_offset")) + + # last fc layer is "out" + return out, out1, out2 diff --git a/PaddleSlim/quant_low_level_api/README.md b/PaddleSlim/quant_low_level_api/README.md new file mode 100644 index 0000000000000000000000000000000000000000..377bca8030e8e25429778c6fa288c31848087d55 --- /dev/null +++ b/PaddleSlim/quant_low_level_api/README.md @@ -0,0 +1,160 @@ +
+
+图1:应用QuantizationTransformPass后的结果
+
+
+图2:应用QuantizationFreezePass后的结果
+
+
+图3:应用ConvertToInt8Pass后的结果
+
+
+图4:应用TransformForMobilePass后的结果
+