index.html 40.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  
  <link rel="shortcut icon" href="../../img/favicon.ico">
  <title>量化 - PaddleSlim Docs</title>
  <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>

  <link rel="stylesheet" href="../../css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
  
  <script>
    // Current page data
    var mkdocs_page_name = "\u91cf\u5316";
    var mkdocs_page_input_path = "api/quantization_api.md";
    var mkdocs_page_url = null;
  </script>
  
  <script src="../../js/jquery-2.1.1.min.js" defer></script>
  <script src="../../js/modernizr-2.8.3.min.js" defer></script>
  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
  <script>hljs.initHighlightingOnLoad();</script> 
  
</head>

<body class="wy-body-for-nav" role="document">

  <div class="wy-grid-for-nav">

    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
      <div class="wy-side-nav-search">
        <a href="../.." class="icon icon-home"> PaddleSlim Docs</a>
        <div role="search">
  <form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
  </form>
</div>
      </div>

      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
	<ul class="current">
	  
          
            <li class="toctree-l1">
		
    <a class="" href="../..">Home</a>
	    </li>
          
            <li class="toctree-l1">
		
    <span class="caption-text">教程</span>
    <ul class="subnav">
                <li class="">
                    
    <a class="" href="../../tutorials/quant_post_demo/">离线量化</a>
                </li>
                <li class="">
                    
    <a class="" href="../../tutorials/quant_aware_demo/">量化训练</a>
                </li>
                <li class="">
                    
    <a class="" href="../../tutorials/quant_embedding_demo/">Embedding量化</a>
                </li>
                <li class="">
                    
    <a class="" href="../../tutorials/nas_demo/">SA搜索</a>
                </li>
76 77 78 79
                <li class="">
                    
    <a class="" href="../../tutorials/distillation_demo/">知识蒸馏</a>
                </li>
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
    </ul>
	    </li>
          
            <li class="toctree-l1">
		
    <span class="caption-text">API</span>
    <ul class="subnav">
                <li class=" current">
                    
    <a class="current" href="./">量化</a>
    <ul class="subnav">
            
    <li class="toctree-l3"><a href="#paddleslimquant-api">paddleslim.quant API文档</a></li>
    
        <ul>
        
            <li><a class="toctree-l4" href="#api">量化训练API</a></li>
        
            <li><a class="toctree-l4" href="#api_1">离线量化API</a></li>
        
            <li><a class="toctree-l4" href="#embeddingapi">Embedding量化API</a></li>
        
        </ul>
    

    </ul>
                </li>
                <li class="">
                    
109
    <a class="" href="../prune_api/">剪枝与敏感度</a>
110 111 112
                </li>
                <li class="">
                    
113
    <a class="" href="../analysis_api/">模型分析</a>
114 115 116
                </li>
                <li class="">
                    
117
    <a class="" href="../single_distiller_api/">知识蒸馏</a>
118 119 120 121 122 123 124 125 126
                </li>
                <li class="">
                    
    <a class="" href="../nas_api/">SA搜索</a>
                </li>
                <li class="">
                    
    <a class="" href="../search_space/">搜索空间</a>
                </li>
127 128 129 130
                <li class="">
                    
    <a class="" href="../../table_latency/">硬件延时评估表</a>
                </li>
131 132 133
    </ul>
	    </li>
          
134 135 136 137 138
            <li class="toctree-l1">
		
    <a class="" href="../../algo/algo/">算法原理</a>
	    </li>
          
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
        </ul>
      </div>
      &nbsp;
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
        <a href="../..">PaddleSlim Docs</a>
      </nav>

      
      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
    <li><a href="../..">Docs</a> &raquo;</li>
    
      
        
          <li>API &raquo;</li>
        
      
    
    <li>量化</li>
    <li class="wy-breadcrumbs-aside">
      
        <a href="https://github.com/PaddlePaddle/PaddleSlim/edit/master/docs/api/quantization_api.md"
          class="icon icon-github"> Edit on GitHub</a>
      
    </li>
  </ul>
  <hr/>
</div>
          <div role="main">
            <div class="section">
              
                <h1 id="paddleslimquant-api">paddleslim.quant API文档<a class="headerlink" href="#paddleslimquant-api" title="Permanent link">#</a></h1>
<h2 id="api">量化训练API<a class="headerlink" href="#api" title="Permanent link">#</a></h2>
<h3 id="_1">量化配置<a class="headerlink" href="#_1" title="Permanent link">#</a></h3>
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
<p><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span> 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20</pre></div></td><td class="code"><div class="codehilite"><pre><span></span><span class="nv">quant_config_default</span> <span class="o">=</span> {
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
    <span class="s1">&#39;</span><span class="s">weight_quantize_type</span><span class="s1">&#39;</span>: <span class="s1">&#39;</span><span class="s">abs_max</span><span class="s1">&#39;</span>,
    <span class="s1">&#39;</span><span class="s">activation_quantize_type</span><span class="s1">&#39;</span>: <span class="s1">&#39;</span><span class="s">abs_max</span><span class="s1">&#39;</span>,
    <span class="s1">&#39;</span><span class="s">weight_bits</span><span class="s1">&#39;</span>: <span class="mi">8</span>,
    <span class="s1">&#39;</span><span class="s">activation_bits</span><span class="s1">&#39;</span>: <span class="mi">8</span>,
    # <span class="nv">ops</span> <span class="nv">of</span> <span class="nv">name_scope</span> <span class="nv">in</span> <span class="nv">not_quant_pattern</span> <span class="nv">list</span>, <span class="nv">will</span> <span class="nv">not</span> <span class="nv">be</span> <span class="nv">quantized</span>
    <span class="s1">&#39;</span><span class="s">not_quant_pattern</span><span class="s1">&#39;</span>: [<span class="s1">&#39;</span><span class="s">skip_quant</span><span class="s1">&#39;</span>],
    # <span class="nv">ops</span> <span class="nv">of</span> <span class="nv">type</span> <span class="nv">in</span> <span class="nv">quantize_op_types</span>, <span class="nv">will</span> <span class="nv">be</span> <span class="nv">quantized</span>
    <span class="s1">&#39;</span><span class="s">quantize_op_types</span><span class="s1">&#39;</span>:
    [<span class="s1">&#39;</span><span class="s">conv2d</span><span class="s1">&#39;</span>, <span class="s1">&#39;</span><span class="s">depthwise_conv2d</span><span class="s1">&#39;</span>, <span class="s1">&#39;</span><span class="s">mul</span><span class="s1">&#39;</span>, <span class="s1">&#39;</span><span class="s">elementwise_add</span><span class="s1">&#39;</span>, <span class="s1">&#39;</span><span class="s">pool2d</span><span class="s1">&#39;</span>],
    # <span class="nv">data</span> <span class="nv">type</span> <span class="nv">after</span> <span class="nv">quantization</span>, <span class="nv">such</span> <span class="nv">as</span> <span class="s1">&#39;</span><span class="s">uint8</span><span class="s1">&#39;</span>, <span class="s1">&#39;</span><span class="s">int8</span><span class="s1">&#39;</span>, <span class="nv">etc</span>. <span class="nv">default</span> <span class="nv">is</span> <span class="s1">&#39;</span><span class="s">int8</span><span class="s1">&#39;</span>
    <span class="s1">&#39;</span><span class="s">dtype</span><span class="s1">&#39;</span>: <span class="s1">&#39;</span><span class="s">int8</span><span class="s1">&#39;</span>,
    # <span class="nv">window</span> <span class="nv">size</span> <span class="k">for</span> <span class="s1">&#39;</span><span class="s">range_abs_max</span><span class="s1">&#39;</span> <span class="nv">quantization</span>. <span class="nv">defaulf</span> <span class="nv">is</span> <span class="mi">10000</span>
    <span class="s1">&#39;</span><span class="s">window_size</span><span class="s1">&#39;</span>: <span class="mi">10000</span>,
    # <span class="nv">The</span> <span class="nv">decay</span> <span class="nv">coefficient</span> <span class="nv">of</span> <span class="nv">moving</span> <span class="nv">average</span>, <span class="nv">default</span> <span class="nv">is</span> <span class="mi">0</span>.<span class="mi">9</span>
    <span class="s1">&#39;</span><span class="s">moving_rate</span><span class="s1">&#39;</span>: <span class="mi">0</span>.<span class="mi">9</span>,
    # <span class="k">if</span> <span class="nv">set</span> <span class="nv">quant_weight_only</span> <span class="nv">True</span>, <span class="k">then</span> <span class="nv">only</span> <span class="nv">quantize</span> <span class="nv">parameters</span> <span class="nv">of</span> <span class="nv">layers</span> <span class="nv">which</span> <span class="nv">need</span> <span class="nv">to</span> <span class="nv">be</span> <span class="nv">quantized</span>,
    # <span class="nv">and</span> <span class="nv">activations</span> <span class="nv">will</span> <span class="nv">not</span> <span class="nv">be</span> <span class="nv">quantized</span>.
    <span class="s1">&#39;</span><span class="s">quant_weight_only</span><span class="s1">&#39;</span>: <span class="nv">False</span>
}
</pre></div>
221 222
</td></tr></table>
设置量化训练需要的配置。</p>
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
<p><strong>参数:</strong></p>
<ul>
<li><strong>weight_quantize_type(str)</strong> - 参数量化方式。可选<code>'abs_max'</code>,  <code>'channel_wise_abs_max'</code>, <code>'range_abs_max'</code>, <code>'moving_average_abs_max'</code>。 默认<code>'abs_max'</code></li>
<li><strong>activation_quantize_type(str)</strong> - 激活量化方式,可选<code>'abs_max'</code>, <code>'range_abs_max'</code>, <code>'moving_average_abs_max'</code>,默认<code>'abs_max'</code></li>
<li><strong>weight_bits(int)</strong> - 参数量化bit数,默认8, 推荐设为8。</li>
<li><strong>activation_bits(int)</strong> -  激活量化bit数,默认8, 推荐设为8。</li>
<li><strong>not_quant_pattern(str or list[str])</strong> - 所有<code>name_scope</code>包含<code>'not_quant_pattern'</code>字符串的<code>op</code>,都不量化, 设置方式请参考<code>fluid.name_scope()</code></li>
<li><strong>quantize_op_types(list[str])</strong> -  需要进行量化的<code>op</code>类型,目前支持<code>'conv2d', 'depthwise_conv2d', 'mul'</code></li>
<li><strong>dtype(int8)</strong> - 量化后的参数类型,默认 <code>int8</code>, 目前仅支持<code>int8</code></li>
<li><strong>window_size(int)</strong> -  <code>'range_abs_max'</code>量化方式的<code>window size</code>,默认10000。</li>
<li><strong>moving_rate(int)</strong> - <code>'moving_average_abs_max'</code>量化方式的衰减系数,默认 0.9。</li>
<li><strong>quant_weight_only(bool)</strong> - 是否只量化参数,如果设为<code>True</code>,则激活不进行量化,默认<code>False</code>。目前暂不支持设置为<code>True</code>。 设置为<code>True</code>时,只量化参数,这种方式不能减少显存占用和加速,只能用来减少带宽。</li>
</ul>
<h3 id="paddleslimquantquant_awareprogram-place-config-scopenone-for_testfalse">paddleslim.quant.quant_aware(program, place, config, scope=None, for_test=False)<a class="headerlink" href="#paddleslimquantquant_awareprogram-place-config-scopenone-for_testfalse" title="Permanent link">#</a></h3>
<p><code>program</code>中加入量化和反量化<code>op</code>, 用于量化训练。</p>
<p><strong>参数:</strong></p>
<ul>
<li><strong>program (fluid.Program)</strong> -  传入训练或测试<code>program</code></li>
<li><strong>place(fluid.CPUPlace or fluid.CUDAPlace)</strong> -  该参数表示<code>Executor</code>执行所在的设备。</li>
<li><strong>config(dict)</strong> -  量化配置表。</li>
<li><strong>scope(fluid.Scope, optional)</strong> -  传入用于存储<code>Variable</code><code>scope</code>,需要传入<code>program</code>所使用的<code>scope</code>,一般情况下,是<code>fluid.global_scope()</code>。设置为<code>None</code>时将使用<code>fluid.global_scope()</code>,默认值为<code>None</code></li>
<li><strong>for_test(bool)</strong> -  如果<code>program</code>参数是一个测试<code>program</code><code>for_test</code>应设为<code>True</code>,否则设为<code>False</code></li>
</ul>
<p><strong>返回</strong></p>
<p>含有量化和反量化<code>operator</code><code>program</code></p>
<p><strong>返回类型</strong></p>
<ul>
<li><code>for_test=False</code>,返回类型为<code>fluid.CompiledProgram</code><strong>注意,此返回值不能用于保存参数</strong></li>
<li><code>for_test=True</code>,返回类型为<code>fluid.Program</code></li>
</ul>
<p><strong>注意事项</strong></p>
<ul>
<li>此接口会改变<code>program</code>结构,并且可能增加一些<code>persistable</code>的变量,所以加载模型参数时请注意和相应的<code>program</code>对应。</li>
<li>此接口底层经历了<code>fluid.Program</code>-&gt; <code>fluid.framework.IrGraph</code>-&gt;<code>fluid.Program</code>的转变,在<code>fluid.framework.IrGraph</code>中没有<code>Parameter</code>的概念,<code>Variable</code>只有<code>persistable</code><code>not persistable</code>的区别,所以在保存和加载参数时,请使用<code>fluid.io.save_persistables</code><code>fluid.io.load_persistables</code>接口。</li>
<li>由于此接口会根据<code>program</code>的结构和量化配置来对<code>program</code>添加op,所以<code>Paddle</code>中一些通过<code>fuse op</code>来加速训练的策略不能使用。已知以下策略在使用量化时必须设为<code>False</code><code>fuse_all_reduce_ops, sync_batch_norm</code></li>
<li>如果传入的<code>program</code>中存在和任何op都没有连接的<code>Variable</code>,则会在量化的过程中被优化掉。</li>
</ul>
<h3 id="paddleslimquantconvertprogram-place-config-scopenone-save_int8false">paddleslim.quant.convert(program, place, config, scope=None, save_int8=False)<a class="headerlink" href="#paddleslimquantconvertprogram-place-config-scopenone-save_int8false" title="Permanent link">#</a></h3>
<p>把训练好的量化<code>program</code>,转换为可用于保存<code>inference model</code><code>program</code></p>
<p><strong>参数:</strong>
- <strong>program (fluid.Program)</strong> -  传入测试<code>program</code>
- <strong>place(fluid.CPUPlace or fluid.CUDAPlace)</strong> - 该参数表示<code>Executor</code>执行所在的设备。
- <strong>config(dict)</strong> -  量化配置表。
- <strong>scope(fluid.Scope)</strong> - 传入用于存储<code>Variable</code><code>scope</code>,需要传入<code>program</code>所使用的<code>scope</code>,一般情况下,是<code>fluid.global_scope()</code>。设置为<code>None</code>时将使用<code>fluid.global_scope()</code>,默认值为<code>None</code>
- <strong>save_int8(bool)</strong> -  是否需要返回参数为<code>int8</code><code>program</code>。该功能目前只能用于确认模型大小。默认值为<code>False</code></p>
<p><strong>返回</strong></p>
<ul>
<li><strong>program (fluid.Program)</strong> - freezed program,可用于保存inference model,参数为<code>float32</code>类型,但其数值范围可用int8表示。</li>
<li><strong>int8_program (fluid.Program)</strong> - freezed program,可用于保存inference model,参数为<code>int8</code>类型。当<code>save_int8</code><code>False</code>时,不返回该值。</li>
</ul>
<p><strong>注意事项</strong></p>
<p>因为该接口会对<code>op</code><code>Variable</code>做相应的删除和修改,所以此接口只能在训练完成之后调用。如果想转化训练的中间模型,可加载相应的参数之后再使用此接口。</p>
<p><strong>代码示例</strong></p>
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
<table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span> 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37</pre></div></td><td class="code"><div class="codehilite"><pre><span></span><span class="c1">#encoding=utf8</span>
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
<span class="kn">import</span> <span class="nn">paddle.fluid</span> <span class="kn">as</span> <span class="nn">fluid</span>
<span class="kn">import</span> <span class="nn">paddleslim.quant</span> <span class="kn">as</span> <span class="nn">quant</span>


<span class="n">train_program</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">Program</span><span class="p">()</span>

<span class="k">with</span> <span class="n">fluid</span><span class="o">.</span><span class="n">program_guard</span><span class="p">(</span><span class="n">train_program</span><span class="p">):</span>
    <span class="n">image</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;x&#39;</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">[</span><span class="bp">None</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">28</span><span class="p">,</span> <span class="mi">28</span><span class="p">])</span>
    <span class="n">label</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;label&#39;</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">[</span><span class="bp">None</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="s1">&#39;int64&#39;</span><span class="p">)</span>
    <span class="n">conv</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">conv2d</span><span class="p">(</span><span class="n">image</span><span class="p">,</span> <span class="mi">32</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
    <span class="n">feat</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">fc</span><span class="p">(</span><span class="n">conv</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">act</span><span class="o">=</span><span class="s1">&#39;softmax&#39;</span><span class="p">)</span>
    <span class="n">cost</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">cross_entropy</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">feat</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="n">label</span><span class="p">)</span>
    <span class="n">avg_cost</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">x</span><span class="o">=</span><span class="n">cost</span><span class="p">)</span>

<span class="n">use_gpu</span> <span class="o">=</span> <span class="bp">True</span>
<span class="n">place</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">CUDAPlace</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="k">if</span> <span class="n">use_gpu</span> <span class="k">else</span> <span class="n">fluid</span><span class="o">.</span><span class="n">CPUPlace</span><span class="p">()</span>
<span class="n">exe</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">Executor</span><span class="p">(</span><span class="n">place</span><span class="p">)</span>
<span class="n">exe</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">fluid</span><span class="o">.</span><span class="n">default_startup_program</span><span class="p">())</span>
<span class="n">eval_program</span> <span class="o">=</span> <span class="n">train_program</span><span class="o">.</span><span class="n">clone</span><span class="p">(</span><span class="n">for_test</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
<span class="c1">#配置</span>
<span class="n">config</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;weight_quantize_type&#39;</span><span class="p">:</span> <span class="s1">&#39;abs_max&#39;</span><span class="p">,</span>
        <span class="s1">&#39;activation_quantize_type&#39;</span><span class="p">:</span> <span class="s1">&#39;moving_average_abs_max&#39;</span><span class="p">}</span>
<span class="n">build_strategy</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">BuildStrategy</span><span class="p">()</span>
<span class="n">exec_strategy</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">ExecutionStrategy</span><span class="p">()</span>
<span class="c1">#调用api</span>
<span class="n">quant_train_program</span> <span class="o">=</span> <span class="n">quant</span><span class="o">.</span><span class="n">quant_aware</span><span class="p">(</span><span class="n">train_program</span><span class="p">,</span> <span class="n">place</span><span class="p">,</span> <span class="n">config</span><span class="p">,</span> <span class="n">for_test</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
<span class="n">quant_eval_program</span> <span class="o">=</span> <span class="n">quant</span><span class="o">.</span><span class="n">quant_aware</span><span class="p">(</span><span class="n">eval_program</span><span class="p">,</span> <span class="n">place</span><span class="p">,</span> <span class="n">config</span><span class="p">,</span> <span class="n">for_test</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
<span class="c1">#关闭策略</span>
<span class="n">build_strategy</span><span class="o">.</span><span class="n">fuse_all_reduce_ops</span> <span class="o">=</span> <span class="bp">False</span>
<span class="n">build_strategy</span><span class="o">.</span><span class="n">sync_batch_norm</span> <span class="o">=</span> <span class="bp">False</span>
<span class="n">quant_train_program</span> <span class="o">=</span> <span class="n">quant_train_program</span><span class="o">.</span><span class="n">with_data_parallel</span><span class="p">(</span>
    <span class="n">loss_name</span><span class="o">=</span><span class="n">avg_cost</span><span class="o">.</span><span class="n">name</span><span class="p">,</span>
    <span class="n">build_strategy</span><span class="o">=</span><span class="n">build_strategy</span><span class="p">,</span>
    <span class="n">exec_strategy</span><span class="o">=</span><span class="n">exec_strategy</span><span class="p">)</span>

<span class="n">inference_prog</span> <span class="o">=</span> <span class="n">quant</span><span class="o">.</span><span class="n">convert</span><span class="p">(</span><span class="n">quant_eval_program</span><span class="p">,</span> <span class="n">place</span><span class="p">,</span> <span class="n">config</span><span class="p">)</span>
</pre></div>
350
</td></tr></table>
351 352 353

<p>更详细的用法请参考 <a href='../../demo/quant/quant_aware/README.md'>量化训练demo</a></p>
<h2 id="api_1">离线量化API<a class="headerlink" href="#api_1" title="Permanent link">#</a></h2>
354 355 356 357 358 359 360 361 362 363 364
<p><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span> 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11</pre></div></td><td class="code"><div class="codehilite"><pre><span></span><span class="n">paddleslim</span><span class="p">.</span><span class="n">quant</span><span class="p">.</span><span class="n">quant_post</span><span class="p">(</span><span class="n">executor</span><span class="p">,</span>
365 366 367 368 369 370 371 372 373 374 375
           <span class="n">model_dir</span><span class="p">,</span>
           <span class="n">quantize_model_path</span><span class="p">,</span>
           <span class="n">sample_generator</span><span class="p">,</span>
           <span class="n">model_filename</span><span class="o">=</span><span class="k">None</span><span class="p">,</span>
           <span class="n">params_filename</span><span class="o">=</span><span class="k">None</span><span class="p">,</span>
           <span class="n">batch_size</span><span class="o">=</span><span class="mi">16</span><span class="p">,</span>
           <span class="n">batch_nums</span><span class="o">=</span><span class="k">None</span><span class="p">,</span>
           <span class="k">scope</span><span class="o">=</span><span class="k">None</span><span class="p">,</span>
           <span class="n">algo</span><span class="o">=</span><span class="s1">&#39;KL&#39;</span><span class="p">,</span>
           <span class="n">quantizable_op_type</span><span class="o">=</span><span class="p">[</span><span class="ss">&quot;conv2d&quot;</span><span class="p">,</span> <span class="ss">&quot;depthwise_conv2d&quot;</span><span class="p">,</span> <span class="ss">&quot;mul&quot;</span><span class="p">])</span>
</pre></div>
376 377
</td></tr></table>
对保存在<code>${model_dir}</code>下的模型进行量化,使用<code>sample_generator</code>的数据进行参数校正。</p>
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
<p><strong>参数:</strong>
- <strong>executor (fluid.Executor)</strong> - 执行模型的executor,可以在cpu或者gpu上执行。
- <strong>model_dir(str)</strong> - 需要量化的模型所在的文件夹。
- <strong>quantize_model_path(str)</strong> - 保存量化后的模型的路径
- <strong>sample_generator(python generator)</strong> - 读取数据样本,每次返回一个样本。
- <strong>model_filename(str, optional)</strong> - 模型文件名,如果需要量化的模型的参数存在一个文件中,则需要设置<code>model_filename</code>为模型文件的名称,否则设置为<code>None</code>即可。默认值是<code>None</code>
- <strong>params_filename(str)</strong> - 参数文件名,如果需要量化的模型的参数存在一个文件中,则需要设置<code>params_filename</code>为参数文件的名称,否则设置为<code>None</code>即可。默认值是<code>None</code>
- <strong>batch_size(int)</strong> - 每个batch的图片数量。默认值为16 。
- <strong>batch_nums(int, optional)</strong> - 迭代次数。如果设置为<code>None</code>,则会一直运行到<code>sample_generator</code> 迭代结束, 否则,迭代次数为<code>batch_nums</code>, 也就是说参与对<code>Scale</code>进行校正的样本个数为 <code>'batch_nums' * 'batch_size'</code>.
- <strong>scope(fluid.Scope, optional)</strong> - 用来获取和写入<code>Variable</code>, 如果设置为<code>None</code>,则使用<code>fluid.global_scope()</code>. 默认值是<code>None</code>.
- <strong>algo(str)</strong> - 量化时使用的算法名称,可为<code>'KL'</code>或者<code>'direct'</code>。该参数仅针对激活值的量化,因为参数值的量化使用的方式为<code>'channel_wise_abs_max'</code>. 当<code>algo</code> 设置为<code>'direct'</code>时,使用校正数据的激活值的绝对值的最大值当作<code>Scale</code>值,当设置为<code>'KL'</code>时,则使用<code>KL</code>散度的方法来计算<code>Scale</code>值。默认值为<code>'KL'</code>
- <strong>quantizable_op_type(list[str])</strong> -  需要量化的<code>op</code>类型列表。默认值为<code>["conv2d", "depthwise_conv2d", "mul"]</code></p>
<p><strong>返回</strong></p>
<p>无。</p>
<p><strong>注意事项</strong></p>
<p>因为该接口会收集校正数据的所有的激活值,所以使用的校正图片不能太多。<code>'KL'</code>散度的计算也比较耗时。</p>
<p><strong>代码示例</strong></p>
<blockquote>
<p>注: 此示例不能直接运行,因为需要加载<code>${model_dir}</code>下的模型,所以不能直接运行。</p>
</blockquote>
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
<p><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span> 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17</pre></div></td><td class="code"><div class="codehilite"><pre><span></span><span class="kn">import</span> <span class="nn">paddle.fluid</span> <span class="kn">as</span> <span class="nn">fluid</span>
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
<span class="kn">import</span> <span class="nn">paddle.dataset.mnist</span> <span class="kn">as</span> <span class="nn">reader</span>
<span class="kn">from</span> <span class="nn">paddleslim.quant</span> <span class="kn">import</span> <span class="n">quant_post</span>
<span class="n">val_reader</span> <span class="o">=</span> <span class="n">reader</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
<span class="n">use_gpu</span> <span class="o">=</span> <span class="bp">True</span>
<span class="n">place</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">CUDAPlace</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="k">if</span> <span class="n">use_gpu</span> <span class="k">else</span> <span class="n">fluid</span><span class="o">.</span><span class="n">CPUPlace</span><span class="p">()</span>

<span class="n">exe</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">Executor</span><span class="p">(</span><span class="n">place</span><span class="p">)</span>
<span class="n">quant_post</span><span class="p">(</span>
        <span class="n">executor</span><span class="o">=</span><span class="n">exe</span><span class="p">,</span>
        <span class="n">model_dir</span><span class="o">=</span><span class="s1">&#39;./model_path&#39;</span><span class="p">,</span>
        <span class="n">quantize_model_path</span><span class="o">=</span><span class="s1">&#39;./save_path&#39;</span><span class="p">,</span>
        <span class="n">sample_generator</span><span class="o">=</span><span class="n">val_reader</span><span class="p">,</span>
        <span class="n">model_filename</span><span class="o">=</span><span class="s1">&#39;__model__&#39;</span><span class="p">,</span>
        <span class="n">params_filename</span><span class="o">=</span><span class="s1">&#39;__params__&#39;</span><span class="p">,</span>
        <span class="n">batch_size</span><span class="o">=</span><span class="mi">16</span><span class="p">,</span>
        <span class="n">batch_nums</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
</pre></div>
432 433
</td></tr></table>
更详细的用法请参考 <a href='../../demo/quant/quant_post/README.md'>离线量化demo</a></p>
434
<h2 id="embeddingapi">Embedding量化API<a class="headerlink" href="#embeddingapi" title="Permanent link">#</a></h2>
435
<p><table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span>1</pre></div></td><td class="code"><div class="codehilite"><pre><span></span><span class="n">paddleslim</span><span class="p">.</span><span class="n">quant</span><span class="p">.</span><span class="n">quant_embedding</span><span class="p">(</span><span class="n">program</span><span class="p">,</span> <span class="n">place</span><span class="p">,</span> <span class="n">config</span><span class="p">,</span> <span class="k">scope</span><span class="o">=</span><span class="k">None</span><span class="p">)</span>
436
</pre></div>
437 438
</td></tr></table>
<code>Embedding</code>参数进行量化。</p>
439 440 441 442 443 444 445 446 447 448 449 450 451 452
<p><strong>参数:</strong>
- <strong>program(fluid.Program)</strong> - 需要量化的program
- <strong>scope(fluid.Scope, optional)</strong> - 用来获取和写入<code>Variable</code>, 如果设置为<code>None</code>,则使用<code>fluid.global_scope()</code>.
- <strong>place(fluid.CPUPlace or fluid.CUDAPlace)</strong> - 运行program的设备
- <strong>config(dict)</strong> - 定义量化的配置。可以配置的参数有:
    - <code>'params_name'</code> (str, required): 需要进行量化的参数名称,此参数必须设置。
    - <code>'quantize_type'</code> (str, optional): 量化的类型,目前支持的类型是<code>'abs_max'</code>, 待支持的类型有 <code>'log', 'product_quantization'</code>。 默认值是<code>'abs_max'</code>.
    - <code>'quantize_bits'</code>(int, optional): 量化的<code>bit</code>数,目前支持的<code>bit</code>数为8。默认值是8.
    - <code>'dtype'</code>(str, optional): 量化之后的数据类型, 目前支持的是<code>'int8'</code>. 默认值是<code>int8</code>
    - <code>'threshold'</code>(float, optional): 量化之前将根据此阈值对需要量化的参数值进行<code>clip</code>. 如果不设置,则跳过<code>clip</code>过程直接量化。</p>
<p><strong>返回</strong></p>
<p>量化之后的program</p>
<p><strong>返回类型</strong></p>
<p><code>fluid.Program</code></p>
453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
<p><strong>代码示例</strong>
<table class="codehilitetable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span> 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22</pre></div></td><td class="code"><div class="codehilite"><pre><span></span><span class="kn">import</span> <span class="nn">paddle.fluid</span> <span class="kn">as</span> <span class="nn">fluid</span>
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497
<span class="kn">import</span> <span class="nn">paddleslim.quant</span> <span class="kn">as</span> <span class="nn">quant</span>

<span class="n">train_program</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">Program</span><span class="p">()</span>
<span class="k">with</span> <span class="n">fluid</span><span class="o">.</span><span class="n">program_guard</span><span class="p">(</span><span class="n">train_program</span><span class="p">):</span>
    <span class="n">input_word</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;input_word&quot;</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">[</span><span class="bp">None</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="s1">&#39;int64&#39;</span><span class="p">)</span>
    <span class="n">input_emb</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span>
        <span class="nb">input</span><span class="o">=</span><span class="n">input_word</span><span class="p">,</span>
        <span class="n">is_sparse</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
        <span class="n">size</span><span class="o">=</span><span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">128</span><span class="p">],</span>
        <span class="n">param_attr</span><span class="o">=</span><span class="n">fluid</span><span class="o">.</span><span class="n">ParamAttr</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;emb&#39;</span><span class="p">,</span>
        <span class="n">initializer</span><span class="o">=</span><span class="n">fluid</span><span class="o">.</span><span class="n">initializer</span><span class="o">.</span><span class="n">Uniform</span><span class="p">(</span><span class="o">-</span><span class="mf">0.005</span><span class="p">,</span> <span class="mf">0.005</span><span class="p">)))</span>

<span class="n">infer_program</span> <span class="o">=</span> <span class="n">train_program</span><span class="o">.</span><span class="n">clone</span><span class="p">(</span><span class="n">for_test</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>

<span class="n">use_gpu</span> <span class="o">=</span> <span class="bp">True</span>
<span class="n">place</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">CUDAPlace</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="k">if</span> <span class="n">use_gpu</span> <span class="k">else</span> <span class="n">fluid</span><span class="o">.</span><span class="n">CPUPlace</span><span class="p">()</span>
<span class="n">exe</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">Executor</span><span class="p">(</span><span class="n">place</span><span class="p">)</span>
<span class="n">exe</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">fluid</span><span class="o">.</span><span class="n">default_startup_program</span><span class="p">())</span>

<span class="n">config</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;params_name&#39;</span><span class="p">:</span> <span class="s1">&#39;emb&#39;</span><span class="p">,</span> <span class="s1">&#39;quantize_type&#39;</span><span class="p">:</span> <span class="s1">&#39;abs_max&#39;</span><span class="p">}</span>
<span class="n">quant_program</span> <span class="o">=</span> <span class="n">quant</span><span class="o">.</span><span class="n">quant_embedding</span><span class="p">(</span><span class="n">infer_program</span><span class="p">,</span> <span class="n">place</span><span class="p">,</span> <span class="n">config</span><span class="p">)</span>
</pre></div>
498
</td></tr></table></p>
499 500 501 502 503 504 505 506
<p>更详细的用法请参考 <a href='../../demo/quant/quant_embedding/README.md'>Embedding量化demo</a></p>
              
            </div>
          </div>
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
507
        <a href="../prune_api/" class="btn btn-neutral float-right" title="剪枝与敏感度">Next <span class="icon icon-circle-arrow-right"></span></a>
508 509
      
      
510
        <a href="../../tutorials/distillation_demo/" class="btn btn-neutral" title="知识蒸馏"><span class="icon icon-circle-arrow-left"></span> Previous</a>
511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <!-- Copyright etc -->
    
  </div>

  Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
      
        </div>
      </div>

    </section>

  </div>

  <div class="rst-versions" role="note" style="cursor: pointer">
    <span class="rst-current-version" data-toggle="rst-current-version">
      
          <a href="https://github.com/PaddlePaddle/PaddleSlim/" class="fa fa-github" style="float: left; color: #fcfcfc"> GitHub</a>
      
      
538
        <span><a href="../../tutorials/distillation_demo/" style="color: #fcfcfc;">&laquo; Previous</a></span>
539 540 541 542 543 544 545 546 547
      
      
        <span style="margin-left: 15px"><a href="../prune_api/" style="color: #fcfcfc">Next &raquo;</a></span>
      
    </span>
</div>
    <script>var base_url = '../..';</script>
    <script src="../../js/theme.js" defer></script>
      <script src="../../mathjax-config.js" defer></script>
548
      <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML" defer></script>
549 550 551 552
      <script src="../../search/main.js" defer></script>

</body>
</html>