diff --git a/algo/algo/index.html b/algo/algo/index.html index 220048d91ebcbb0b58e6332ffa6e7287bbfdacc7..ac7f16636385953f97943e5e62ac11309e1f2a5a 100644 --- a/algo/algo/index.html +++ b/algo/algo/index.html @@ -125,6 +125,9 @@ 算法原理

示例:

点击AIStudio执行以下示例代码。 -

import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddleslim.prune import Pruner
-
-def conv_bn_layer(input,
-                  num_filters,
-                  filter_size,
-                  name,
-                  stride=1,
-                  groups=1,
-                  act=None):
-    conv = fluid.layers.conv2d(
-        input=input,
-        num_filters=num_filters,
-        filter_size=filter_size,
-        stride=stride,
-        padding=(filter_size - 1) // 2,
-        groups=groups,
-        act=None,
-        param_attr=ParamAttr(name=name + "_weights"),
-        bias_attr=False,
-        name=name + "_out")
-    bn_name = name + "_bn"
-    return fluid.layers.batch_norm(
-        input=conv,
-        act=act,
-        name=bn_name + '_output',
-        param_attr=ParamAttr(name=bn_name + '_scale'),
-        bias_attr=ParamAttr(bn_name + '_offset'),
-        moving_mean_name=bn_name + '_mean',
-        moving_variance_name=bn_name + '_variance', )
-
-main_program = fluid.Program()
-startup_program = fluid.Program()
-#   X       X              O       X              O
-# conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6
-#     |            ^ |                    ^
-#     |____________| |____________________|
-#
-# X: prune output channels
-# O: prune input channels
-with fluid.program_guard(main_program, startup_program):
-    input = fluid.data(name="image", shape=[None, 3, 16, 16])
-    conv1 = conv_bn_layer(input, 8, 3, "conv1")
-    conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
-    sum1 = conv1 + conv2
-    conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
-    conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
-    sum2 = conv4 + sum1
-    conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
-    conv6 = conv_bn_layer(conv5, 8, 3, "conv6")
-
-place = fluid.CPUPlace()
-exe = fluid.Executor(place)
-scope = fluid.Scope()
-exe.run(startup_program, scope=scope)
-pruner = Pruner()
-main_program, _, _ = pruner.prune(
-    main_program,
-    scope,
-    params=["conv4_weights"],
-    ratios=[0.5],
-    place=place,
-    lazy=False,
-    only_graph=False,
-    param_backup=False,
-    param_shape_backup=False)
-
-for param in main_program.global_block().all_parameters():
-    if "weights" in param.name:
-        print("param name: {}; param shape: {}".format(param.name, param.shape))
+
import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddleslim.prune import Pruner
+
+def conv_bn_layer(input,
+                  num_filters,
+                  filter_size,
+                  name,
+                  stride=1,
+                  groups=1,
+                  act=None):
+    conv = fluid.layers.conv2d(
+        input=input,
+        num_filters=num_filters,
+        filter_size=filter_size,
+        stride=stride,
+        padding=(filter_size - 1) // 2,
+        groups=groups,
+        act=None,
+        param_attr=ParamAttr(name=name + "_weights"),
+        bias_attr=False,
+        name=name + "_out")
+    bn_name = name + "_bn"
+    return fluid.layers.batch_norm(
+        input=conv,
+        act=act,
+        name=bn_name + '_output',
+        param_attr=ParamAttr(name=bn_name + '_scale'),
+        bias_attr=ParamAttr(bn_name + '_offset'),
+        moving_mean_name=bn_name + '_mean',
+        moving_variance_name=bn_name + '_variance', )
+
+main_program = fluid.Program()
+startup_program = fluid.Program()
+#   X       X              O       X              O
+# conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6
+#     |            ^ |                    ^
+#     |____________| |____________________|
+#
+# X: prune output channels
+# O: prune input channels
+with fluid.program_guard(main_program, startup_program):
+    input = fluid.data(name="image", shape=[None, 3, 16, 16])
+    conv1 = conv_bn_layer(input, 8, 3, "conv1")
+    conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
+    sum1 = conv1 + conv2
+    conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
+    conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
+    sum2 = conv4 + sum1
+    conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
+    conv6 = conv_bn_layer(conv5, 8, 3, "conv6")
+
+place = fluid.CPUPlace()
+exe = fluid.Executor(place)
+scope = fluid.Scope()
+exe.run(startup_program, scope=scope)
+pruner = Pruner()
+main_program, _, _ = pruner.prune(
+    main_program,
+    scope,
+    params=["conv4_weights"],
+    ratios=[0.5],
+    place=place,
+    lazy=False,
+    only_graph=False,
+    param_backup=False,
+    param_shape_backup=False)
+
+for param in main_program.global_block().all_parameters():
+    if "weights" in param.name:
+        print("param name: {}; param shape: {}".format(param.name, param.shape))
 


sensitivity#

-
paddleslim.prune.sensitivity(program, place, param_names, eval_func, sensitivities_file=None, pruned_ratios=None) [源代码]
+
paddleslim.prune.sensitivity(program, place, param_names, eval_func, sensitivities_file=None, pruned_ratios=None) 源代码

计算网络中每个卷积层的敏感度。每个卷积层的敏感度信息统计方法为:依次剪掉当前卷积层不同比例的输出通道数,在测试集上计算剪裁后的精度损失。得到敏感度信息后,可以通过观察或其它方式确定每层卷积的剪裁率。

@@ -339,15 +339,15 @@

program(paddle.fluid.Program) - 待评估的目标网络。更多关于Program的介绍请参考:Program概念介绍

  • -

    place(paddle.fluid.Place) - 待分析的参数所在的设备位置,可以是CUDAPlaceCPUPlace

    +

    place(paddle.fluid.Place) - 待分析的参数所在的设备位置,可以是CUDAPlaceCPUPlacePlace概念介绍

  • param_names(list) - 待分析的卷积层的参数的名称列表。可以通过以下方式查看模型中所有参数的名称:

  • -
    for block in program.blocks:
    -    for param in block.all_parameters():
    -        print("param: {}; shape: {}".format(param.name, param.shape))
    +
    for block in program.blocks:
    +    for param in block.all_parameters():
    +        print("param: {}; shape: {}".format(param.name, param.shape))
     
      @@ -365,116 +365,116 @@
      • sensitivities(dict) - 存放敏感度信息的dict,其格式为:
      -
      {"weight_0":
      -   {0.1: 0.22,
      -    0.2: 0.33
      -   },
      - "weight_1":
      -   {0.1: 0.21,
      -    0.2: 0.4
      -   }
      -}
      +
      {"weight_0":
      +   {0.1: 0.22,
      +    0.2: 0.33
      +   },
      + "weight_1":
      +   {0.1: 0.21,
      +    0.2: 0.4
      +   }
      +}
       

      其中,weight_0是卷积层参数的名称,sensitivities['weight_0']的value为剪裁比例,value为精度损失的比例。

      示例:

      点击AIStudio运行以下示例代码。

      -
      import paddle
      -import numpy as np
      -import paddle.fluid as fluid
      -from paddle.fluid.param_attr import ParamAttr
      -from paddleslim.prune import sensitivity
      -import paddle.dataset.mnist as reader
      -
      -def conv_bn_layer(input,
      -                  num_filters,
      -                  filter_size,
      -                  name,
      -                  stride=1,
      -                  groups=1,
      -                  act=None):
      -    conv = fluid.layers.conv2d(
      -        input=input,
      -        num_filters=num_filters,
      -        filter_size=filter_size,
      -        stride=stride,
      -        padding=(filter_size - 1) // 2,
      -        groups=groups,
      -        act=None,
      -        param_attr=ParamAttr(name=name + "_weights"),
      -        bias_attr=False,
      -        name=name + "_out")
      -    bn_name = name + "_bn"
      -    return fluid.layers.batch_norm(
      -        input=conv,
      -        act=act,
      -        name=bn_name + '_output',
      -        param_attr=ParamAttr(name=bn_name + '_scale'),
      -        bias_attr=ParamAttr(bn_name + '_offset'),
      -        moving_mean_name=bn_name + '_mean',
      -        moving_variance_name=bn_name + '_variance', )
      -
      -main_program = fluid.Program()
      -startup_program = fluid.Program()
      -#   X       X              O       X              O
      -# conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6
      -#     |            ^ |                    ^
      -#     |____________| |____________________|
      -#
      -# X: prune output channels
      -# O: prune input channels
      -image_shape = [1,28,28]
      -with fluid.program_guard(main_program, startup_program):
      -    image = fluid.data(name='image', shape=[None]+image_shape, dtype='float32')
      -    label = fluid.data(name='label', shape=[None, 1], dtype='int64')  
      -    conv1 = conv_bn_layer(image, 8, 3, "conv1")
      -    conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
      -    sum1 = conv1 + conv2
      -    conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
      -    conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
      -    sum2 = conv4 + sum1
      -    conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
      -    conv6 = conv_bn_layer(conv5, 8, 3, "conv6")
      -    out = fluid.layers.fc(conv6, size=10, act="softmax")
      -#    cost = fluid.layers.cross_entropy(input=out, label=label)
      -#    avg_cost = fluid.layers.mean(x=cost)
      -    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
      -#    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
      -
      -
      -place = fluid.CPUPlace()
      -exe = fluid.Executor(place)
      -exe.run(startup_program)
      -
      -val_reader = paddle.batch(reader.test(), batch_size=128)
      -val_feeder = feeder = fluid.DataFeeder(
      -        [image, label], place, program=main_program)
      -
      -def eval_func(program):
      -
      -    acc_top1_ns = []
      -    for data in val_reader():
      -        acc_top1_n = exe.run(program,
      -                             feed=val_feeder.feed(data),
      -                             fetch_list=[acc_top1.name])
      -        acc_top1_ns.append(np.mean(acc_top1_n))
      -    return np.mean(acc_top1_ns)
      -param_names = []
      -for param in main_program.global_block().all_parameters():
      -    if "weights" in param.name:
      -        param_names.append(param.name)
      -sensitivities = sensitivity(main_program,
      -                            place,
      -                            param_names,
      -                            eval_func,
      -                            sensitivities_file="./sensitive.data",
      -                            pruned_ratios=[0.1, 0.2, 0.3])
      -print(sensitivities)
      +
      import paddle
      +import numpy as np
      +import paddle.fluid as fluid
      +from paddle.fluid.param_attr import ParamAttr
      +from paddleslim.prune import sensitivity
      +import paddle.dataset.mnist as reader
      +
      +def conv_bn_layer(input,
      +                  num_filters,
      +                  filter_size,
      +                  name,
      +                  stride=1,
      +                  groups=1,
      +                  act=None):
      +    conv = fluid.layers.conv2d(
      +        input=input,
      +        num_filters=num_filters,
      +        filter_size=filter_size,
      +        stride=stride,
      +        padding=(filter_size - 1) // 2,
      +        groups=groups,
      +        act=None,
      +        param_attr=ParamAttr(name=name + "_weights"),
      +        bias_attr=False,
      +        name=name + "_out")
      +    bn_name = name + "_bn"
      +    return fluid.layers.batch_norm(
      +        input=conv,
      +        act=act,
      +        name=bn_name + '_output',
      +        param_attr=ParamAttr(name=bn_name + '_scale'),
      +        bias_attr=ParamAttr(bn_name + '_offset'),
      +        moving_mean_name=bn_name + '_mean',
      +        moving_variance_name=bn_name + '_variance', )
      +
      +main_program = fluid.Program()
      +startup_program = fluid.Program()
      +#   X       X              O       X              O
      +# conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6
      +#     |            ^ |                    ^
      +#     |____________| |____________________|
      +#
      +# X: prune output channels
      +# O: prune input channels
      +image_shape = [1,28,28]
      +with fluid.program_guard(main_program, startup_program):
      +    image = fluid.data(name='image', shape=[None]+image_shape, dtype='float32')
      +    label = fluid.data(name='label', shape=[None, 1], dtype='int64')  
      +    conv1 = conv_bn_layer(image, 8, 3, "conv1")
      +    conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
      +    sum1 = conv1 + conv2
      +    conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
      +    conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
      +    sum2 = conv4 + sum1
      +    conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
      +    conv6 = conv_bn_layer(conv5, 8, 3, "conv6")
      +    out = fluid.layers.fc(conv6, size=10, act="softmax")
      +#    cost = fluid.layers.cross_entropy(input=out, label=label)
      +#    avg_cost = fluid.layers.mean(x=cost)
      +    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
      +#    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
      +
      +
      +place = fluid.CPUPlace()
      +exe = fluid.Executor(place)
      +exe.run(startup_program)
      +
      +val_reader = paddle.batch(reader.test(), batch_size=128)
      +val_feeder = feeder = fluid.DataFeeder(
      +        [image, label], place, program=main_program)
      +
      +def eval_func(program):
      +
      +    acc_top1_ns = []
      +    for data in val_reader():
      +        acc_top1_n = exe.run(program,
      +                             feed=val_feeder.feed(data),
      +                             fetch_list=[acc_top1.name])
      +        acc_top1_ns.append(np.mean(acc_top1_n))
      +    return np.mean(acc_top1_ns)
      +param_names = []
      +for param in main_program.global_block().all_parameters():
      +    if "weights" in param.name:
      +        param_names.append(param.name)
      +sensitivities = sensitivity(main_program,
      +                            place,
      +                            param_names,
      +                            eval_func,
      +                            sensitivities_file="./sensitive.data",
      +                            pruned_ratios=[0.1, 0.2, 0.3])
      +print(sensitivities)
       

      merge_sensitive#

      -
      paddleslim.prune.merge_sensitive(sensitivities)[源代码]
      +
      paddleslim.prune.merge_sensitive(sensitivities)源代码

      合并多个敏感度信息。

      @@ -487,22 +487,22 @@
      • sensitivities(dict) - 合并后的敏感度信息。其格式为:
      -
      {"weight_0":
      -   {0.1: 0.22,
      -    0.2: 0.33
      -   },
      - "weight_1":
      -   {0.1: 0.21,
      -    0.2: 0.4
      -   }
      -}
      +
      {"weight_0":
      +   {0.1: 0.22,
      +    0.2: 0.33
      +   },
      + "weight_1":
      +   {0.1: 0.21,
      +    0.2: 0.4
      +   }
      +}
       

      其中,weight_0是卷积层参数的名称,sensitivities['weight_0']的value为剪裁比例,value为精度损失的比例。

      示例:

      load_sensitivities#

      -
      paddleslim.prune.load_sensitivities(sensitivities_file)[源代码]
      +
      paddleslim.prune.load_sensitivities(sensitivities_file)源代码

      从文件中加载敏感度信息。

      @@ -518,7 +518,7 @@

      示例:

      get_ratios_by_loss#

      -
      paddleslim.prune.get_ratios_by_loss(sensitivities, loss)[源代码]
      +
      paddleslim.prune.get_ratios_by_loss(sensitivities, loss)源代码

      根据敏感度和精度损失阈值计算出一组剪切率。对于参数w, 其剪裁率为使精度损失低于loss的最大剪裁率。

      diff --git a/api/quantization_api/index.html b/api/quantization_api/index.html index 8e7d1707364f12f0eb7c8bde4120fa55eba4c838..87d88f93a84917f84685111c5f6f698cb9aa5b81 100644 --- a/api/quantization_api/index.html +++ b/api/quantization_api/index.html @@ -172,7 +172,7 @@
    • 量化
    • - Edit on GitHub
    • @@ -184,29 +184,50 @@

      量化配置#

      通过字典配置量化参数

      -
      quant_config_default = {
      -    'weight_quantize_type': 'abs_max',
      -    'activation_quantize_type': 'abs_max',
      -    'weight_bits': 8,
      -    'activation_bits': 8,
      -    # ops of name_scope in not_quant_pattern list, will not be quantized
      -    'not_quant_pattern': ['skip_quant'],
      -    # ops of type in quantize_op_types, will be quantized
      -    'quantize_op_types':
      -    ['conv2d', 'depthwise_conv2d', 'mul', 'elementwise_add', 'pool2d'],
      -    # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
      -    'dtype': 'int8',
      -    # window size for 'range_abs_max' quantization. defaulf is 10000
      -    'window_size': 10000,
      -    # The decay coefficient of moving average, default is 0.9
      -    'moving_rate': 0.9,
      +
      TENSORRT_OP_TYPES = [
      +    'mul', 'conv2d', 'pool2d', 'depthwise_conv2d', 'elementwise_add',
      +    'leaky_relu'
      +]
      +TRANSFORM_PASS_OP_TYPES = ['conv2d', 'depthwise_conv2d', 'mul']
      +
      +QUANT_DEQUANT_PASS_OP_TYPES = [
      +        "pool2d", "elementwise_add", "concat", "softmax", "argmax", "transpose",
      +        "equal", "gather", "greater_equal", "greater_than", "less_equal",
      +        "less_than", "mean", "not_equal", "reshape", "reshape2",
      +        "bilinear_interp", "nearest_interp", "trilinear_interp", "slice",
      +        "squeeze", "elementwise_sub", "relu", "relu6", "leaky_relu", "tanh", "swish"
      +    ]
      +
      +_quant_config_default = {
      +    # weight quantize type, default is 'channel_wise_abs_max'
      +    'weight_quantize_type': 'channel_wise_abs_max',
      +    # activation quantize type, default is 'moving_average_abs_max'
      +    'activation_quantize_type': 'moving_average_abs_max',
      +    # weight quantize bit num, default is 8
      +    'weight_bits': 8,
      +    # activation quantize bit num, default is 8
      +    'activation_bits': 8,
      +    # ops of name_scope in not_quant_pattern list, will not be quantized
      +    'not_quant_pattern': ['skip_quant'],
      +    # ops of type in quantize_op_types, will be quantized
      +    'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
      +    # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
      +    'dtype': 'int8',
      +    # window size for 'range_abs_max' quantization. defaulf is 10000
      +    'window_size': 10000,
      +    # The decay coefficient of moving average, default is 0.9
      +    'moving_rate': 0.9,
      +    # if True, 'quantize_op_types' will be TENSORRT_OP_TYPES
      +    'for_tensorrt': False,
      +    # if True, 'quantoze_op_types' will be TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES
      +    'is_full_quantize': False
       }
       

      参数:

        -
      • weight_quantize_type(str) - 参数量化方式。可选'abs_max', 'channel_wise_abs_max', 'range_abs_max', 'moving_average_abs_max'。 默认'abs_max'
      • -
      • activation_quantize_type(str) - 激活量化方式,可选'abs_max', 'range_abs_max', 'moving_average_abs_max',默认'abs_max'
      • +
      • weight_quantize_type(str) - 参数量化方式。可选'abs_max', 'channel_wise_abs_max', 'range_abs_max', 'moving_average_abs_max'。如果使用TensorRT加载量化后的模型来预测,请使用'channel_wise_abs_max'。 默认'channel_wise_abs_max'
      • +
      • activation_quantize_type(str) - 激活量化方式,可选'abs_max', 'range_abs_max', 'moving_average_abs_max'。如果使用TensorRT加载量化后的模型来预测,请使用'range_abs_max', 'moving_average_abs_max'。,默认'moving_average_abs_max'
      • weight_bits(int) - 参数量化bit数,默认8, 推荐设为8。
      • activation_bits(int) - 激活量化bit数,默认8, 推荐设为8。
      • not_quant_pattern(str | list[str]) - 所有name_scope包含'not_quant_pattern'字符串的op,都不量化, 设置方式请参考fluid.name_scope
      • @@ -214,6 +235,14 @@
      • dtype(int8) - 量化后的参数类型,默认 int8, 目前仅支持int8
      • window_size(int) - 'range_abs_max'量化方式的window size,默认10000。
      • moving_rate(int) - 'moving_average_abs_max'量化方式的衰减系数,默认 0.9。
      • +
      • for_tensorrt(bool) - 量化后的模型是否使用TensorRT进行预测。如果是的话,量化op类型为:TENSORRT_OP_TYPES。默认值为False.
      • +
      • is_full_quantize(bool) - 是否量化所有可支持op类型。默认值为False.
      • +
      +
      +

      注意事项

      +
      +
        +
      • 目前Paddle-Lite有int8 kernel来加速的op只有 ['conv2d', 'depthwise_conv2d', 'mul'], 其他op的int8 kernel将陆续支持。

      quant_aware#

      @@ -237,13 +266,13 @@

    注意事项

    +
    • 此接口会改变program结构,并且可能增加一些persistable的变量,所以加载模型参数时请注意和相应的program对应。
    • 此接口底层经历了fluid.Program-> fluid.framework.IrGraph->fluid.Program的转变,在fluid.framework.IrGraph中没有Parameter的概念,Variable只有persistablenot persistable的区别,所以在保存和加载参数时,请使用fluid.io.save_persistablesfluid.io.load_persistables接口。
    • 由于此接口会根据program的结构和量化配置来对program添加op,所以Paddle中一些通过fuse op来加速训练的策略不能使用。已知以下策略在使用量化时必须设为Falsefuse_all_reduce_ops, sync_batch_norm
    • 如果传入的program中存在和任何op都没有连接的Variable,则会在量化的过程中被优化掉。
    -

    convert#

    paddleslim.quant.convert(program, place, config, scope=None, save_int8=False)[源代码]
    @@ -266,10 +295,10 @@

    注意事项

    -

    因为该接口会对opVariable做相应的删除和修改,所以此接口只能在训练完成之后调用。如果想转化训练的中间模型,可加载相应的参数之后再使用此接口。

    +

    因为该接口会对opVariable做相应的删除和修改,所以此接口只能在训练完成之后调用。如果想转化训练的中间模型,可加载相应的参数之后再使用此接口。

    代码示例

    -
    #encoding=utf8
    +
    #encoding=utf8
     import paddle.fluid as fluid
     import paddleslim.quant as quant
     
    @@ -311,7 +340,7 @@
     

    更详细的用法请参考 量化训练demo

    quant_post#

    -
    paddleslim.quant.quant_post(executor, model_dir, quantize_model_path,sample_generator, model_filename=None, params_filename=None, batch_size=16,batch_nums=None, scope=None, algo='KL', quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"])[源代码]
    +
    paddleslim.quant.quant_post(executor, model_dir, quantize_model_path,sample_generator, model_filename=None, params_filename=None, batch_size=16,batch_nums=None, scope=None, algo='KL', quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], is_full_quantize=False, is_use_cache_file=False, cache_dir="./temp_post_training")[源代码]

    对保存在${model_dir}下的模型进行量化,使用sample_generator的数据进行参数校正。

    @@ -329,18 +358,24 @@
  • scope(fluid.Scope, optional) - 用来获取和写入Variable, 如果设置为None,则使用fluid.global_scope(). 默认值是None.
  • algo(str) - 量化时使用的算法名称,可为'KL'或者'direct'。该参数仅针对激活值的量化,因为参数值的量化使用的方式为'channel_wise_abs_max'. 当algo 设置为'direct'时,使用校正数据的激活值的绝对值的最大值当作Scale值,当设置为'KL'时,则使用KL散度的方法来计算Scale值。默认值为'KL'
  • quantizable_op_type(list[str]) - 需要量化的op类型列表。默认值为["conv2d", "depthwise_conv2d", "mul"]
  • +
  • is_full_quantize(bool) - 是否量化所有可支持的op类型。如果设置为False, 则按照 'quantizable_op_type' 的设置进行量化。
  • +
  • is_use_cache_file(bool) - 是否使用硬盘对中间结果进行存储。如果为False, 则将中间结果存储在内存中。
  • +
  • cache_dir(str) - 如果 'is_use_cache_file'为True, 则将中间结果存储在此参数设置的路径下。
  • 返回

    无。

    注意事项

    -

    因为该接口会收集校正数据的所有的激活值,所以使用的校正图片不能太多。'KL'散度的计算也比较耗时。

    +
      +
    • 因为该接口会收集校正数据的所有的激活值,当校正图片比较多时,请设置'is_use_cache_file'为True, 将中间结果存储在硬盘中。另外,'KL'散度的计算比较耗时。
    • +
    • 目前Paddle-Lite有int8 kernel来加速的op只有 ['conv2d', 'depthwise_conv2d', 'mul'], 其他op的int8 kernel将陆续支持。
    • +

    代码示例

    注: 此示例不能直接运行,因为需要加载${model_dir}下的模型,所以不能直接运行。

    -

    import paddle.fluid as fluid
    +

    import paddle.fluid as fluid
     import paddle.dataset.mnist as reader
     from paddleslim.quant import quant_post
     val_reader = reader.train()
    @@ -383,7 +418,7 @@
     

    返回类型

    fluid.Program

    代码示例 -

    import paddle.fluid as fluid
    +
    import paddle.fluid as fluid
     import paddleslim.quant as quant
     
     train_program = fluid.Program()
    diff --git a/api/single_distiller_api/index.html b/api/single_distiller_api/index.html
    index 334c738d15c0c23c78fd4050a96f012ca220c810..11bb3de84331c403ca527ddb20718897d178e2d2 100644
    --- a/api/single_distiller_api/index.html
    +++ b/api/single_distiller_api/index.html
    @@ -172,7 +172,7 @@
         
  • 知识蒸馏
  • - Edit on GitHub
  • @@ -184,9 +184,9 @@

    merge#

    -
    paddleslim.dist.merge(teacher_program, student_program, data_name_map, place, scope=fluid.global_scope(), name_prefix='teacher_') [源代码]
    +
    paddleslim.dist.merge(teacher_program, student_program, data_name_map, place, scope=fluid.global_scope(), name_prefix='teacher_') [源代码]
    -

    merge将两个paddle program(teacher_program, student_program)融合为一个program,并将融合得到的program返回。在融合的program中,可以为其中合适的teacher特征图和student特征图添加蒸馏损失函数,从而达到用teacher模型的暗知识(Dark Knowledge)指导student模型学习的目的。

    +

    merge将teacher_program融合到student_program中。在融合的program中,可以为其中合适的teacher特征图和student特征图添加蒸馏损失函数,从而达到用teacher模型的暗知识(Dark Knowledge)指导student模型学习的目的。

    参数:

    @@ -198,13 +198,13 @@
  • scope(Scope)-该参数表示程序使用的变量作用域,如果不指定将使用默认的全局作用域。默认值:fluid.global_scope()
  • name_prefix(str)-merge操作将统一为teacher的Variables添加的名称前缀name_prefix。默认值:'teacher_'
  • -

    返回: 由student_program和teacher_program merge得到的program

    +

    返回:

    Note

    data_name_mapteacher_var name到student_var name的映射,如果写反可能无法正确进行merge

    使用示例:

    -
    import paddle.fluid as fluid
    +
    import paddle.fluid as fluid
     import paddleslim.dist as dist
     student_program = fluid.Program()
     with fluid.program_guard(student_program):
    @@ -220,7 +220,7 @@
     data_name_map = {'y':'x'}
     USE_GPU = False
     place = fluid.CUDAPlace(0) if USE_GPU else fluid.CPUPlace()
    -main_program = dist.merge(teacher_program, student_program,
    +dist.merge(teacher_program, student_program,
                               data_name_map, place)
     
    @@ -241,7 +241,7 @@

    返回: 由teacher_var1, teacher_var2, student_var1, student_var2组合得到的fsp_loss

    使用示例:

    -
    import paddle.fluid as fluid
    +
    import paddle.fluid as fluid
     import paddleslim.dist as dist
     student_program = fluid.Program()
     with fluid.program_guard(student_program):
    @@ -257,8 +257,8 @@
     data_name_map = {'y':'x'}
     USE_GPU = False
     place = fluid.CUDAPlace(0) if USE_GPU else fluid.CPUPlace()
    -main_program = merge(teacher_program, student_program, data_name_map, place)
    -with fluid.program_guard(main_program):
    +merge(teacher_program, student_program, data_name_map, place)
    +with fluid.program_guard(student_program):
         distillation_loss = dist.fsp_loss('teacher_t1.tmp_1', 'teacher_t2.tmp_1',
                                           's1.tmp_1', 's2.tmp_1', main_program)
     
    @@ -272,13 +272,13 @@

    参数:

      -
    • teacher_var_name(str): teacher_var的名称.
    • +
    • teacher_var_name(str): teacher_var的名称.
    • student_var_name(str): student_var的名称.
    • program(Program): 用于蒸馏训练的fluid program。默认值:fluid.default_main_program()

    返回: 由teacher_var, student_var组合得到的l2_loss

    使用示例:

    -
    import paddle.fluid as fluid
    +
    import paddle.fluid as fluid
     import paddleslim.dist as dist
     student_program = fluid.Program()
     with fluid.program_guard(student_program):
    @@ -294,8 +294,8 @@
     data_name_map = {'y':'x'}
     USE_GPU = False
     place = fluid.CUDAPlace(0) if USE_GPU else fluid.CPUPlace()
    -main_program = merge(teacher_program, student_program, data_name_map, place)
    -with fluid.program_guard(main_program):
    +merge(teacher_program, student_program, data_name_map, place)
    +with fluid.program_guard(student_program):
         distillation_loss = dist.l2_loss('teacher_t2.tmp_1', 's2.tmp_1',
                                          main_program)
     
    @@ -309,15 +309,15 @@

    参数:

      -
    • teacher_var_name(str): teacher_var的名称.
    • -
    • student_var_name(str): student_var的名称.
    • +
    • teacher_var_name(str): teacher_var的名称.
    • +
    • student_var_name(str): student_var的名称.
    • program(Program): 用于蒸馏训练的fluid program。默认值:fluid.default_main_program()
    • -
    • teacher_temperature(float): 对teacher_var进行soft操作的温度值,温度值越大得到的特征图越平滑
    • -
    • student_temperature(float): 对student_var进行soft操作的温度值,温度值越大得到的特征图越平滑
    • +
    • teacher_temperature(float): 对teacher_var进行soft操作的温度值,温度值越大得到的特征图越平滑
    • +
    • student_temperature(float): 对student_var进行soft操作的温度值,温度值越大得到的特征图越平滑

    返回: 由teacher_var, student_var组合得到的soft_label_loss

    使用示例:

    -
    import paddle.fluid as fluid
    +
    import paddle.fluid as fluid
     import paddleslim.dist as dist
     student_program = fluid.Program()
     with fluid.program_guard(student_program):
    @@ -333,8 +333,8 @@
     data_name_map = {'y':'x'}
     USE_GPU = False
     place = fluid.CUDAPlace(0) if USE_GPU else fluid.CPUPlace()
    -main_program = merge(teacher_program, student_program, data_name_map, place)
    -with fluid.program_guard(main_program):
    +merge(teacher_program, student_program, data_name_map, place)
    +with fluid.program_guard(student_program):
         distillation_loss = dist.soft_label_loss('teacher_t2.tmp_1',
                                                  's2.tmp_1', main_program, 1., 1.)
     
    @@ -348,13 +348,13 @@

    参数:

    返回:自定义的损失函数loss

    使用示例:

    -
    import paddle.fluid as fluid
    +
    import paddle.fluid as fluid
     import paddleslim.dist as dist
     student_program = fluid.Program()
     with fluid.program_guard(student_program):
    @@ -370,13 +370,13 @@
     data_name_map = {'y':'x'}
     USE_GPU = False
     place = fluid.CUDAPlace(0) if USE_GPU else fluid.CPUPlace()
    -main_program = merge(teacher_program, student_program, data_name_map, place)
    +merge(teacher_program, student_program, data_name_map, place)
     def adaptation_loss(t_var, s_var):
         teacher_channel = t_var.shape[1]
         s_hint = fluid.layers.conv2d(s_var, teacher_channel, 1)
         hint_loss = fluid.layers.reduce_mean(fluid.layers.square(s_hint - t_var))
         return hint_loss
    -with fluid.program_guard(main_program):
    +with fluid.program_guard(student_program):
         distillation_loss = dist.loss(main_program, adaptation_loss,
                 t_var='teacher_t2.tmp_1', s_var='s2.tmp_1')
     
    diff --git a/index.html b/index.html index 50fd8ff300c708a82f23e82fcbccc4d40e25c726..f006c752332664dcc875cb52a8d1e649f5c0b0c9 100644 --- a/index.html +++ b/index.html @@ -168,7 +168,7 @@
  • Home
  • - Edit on GitHub
  • @@ -211,15 +211,15 @@
    • 安装develop版本
    -
    git clone https://github.com/PaddlePaddle/PaddleSlim.git
    -cd PaddleSlim
    -python setup.py install
    +
    git clone https://github.com/PaddlePaddle/PaddleSlim.git
    +cd PaddleSlim
    +python setup.py install
     
    • 安装官方发布的最新版本
    -
    pip install paddleslim -i https://pypi.org/simple
    +
    pip install paddleslim -i https://pypi.org/simple