memory_optimization.html 28.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84


<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Memory Optimization &mdash; PaddlePaddle  文档</title>
  

  
  

  

  
  
    

  

  
  
    <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  

  
  
        <link rel="index" title="索引"
              href="../genindex.html"/>
        <link rel="search" title="搜索" href="../search.html"/>
    <link rel="top" title="PaddlePaddle  文档" href="../index.html"/> 

  <link rel="stylesheet" href="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/css/perfect-scrollbar.min.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/override.css" type="text/css" />
  <script>
  var _hmt = _hmt || [];
  (function() {
    var hm = document.createElement("script");
    hm.src = "//hm.baidu.com/hm.js?b9a314ab40d04d805655aab1deee08ba";
    var s = document.getElementsByTagName("script")[0]; 
    s.parentNode.insertBefore(hm, s);
  })();
  </script>

  

  
  <script src="../_static/js/modernizr.min.js"></script>

</head>

<body class="wy-body-for-nav" role="document">

  
  <header class="site-header">
    <div class="site-logo">
      <a href="/"><img src="../_static/images/PP_w.png"></a>
    </div>
    <div class="site-nav-links">
      <div class="site-menu">
        <a class="fork-on-github" href="https://github.com/PaddlePaddle/Paddle" target="_blank"><i class="fa fa-github"></i>Fork me on Github</a>
        <div class="language-switcher dropdown">
          <a type="button" data-toggle="dropdown">
            <span>English</span>
            <i class="fa fa-angle-up"></i>
            <i class="fa fa-angle-down"></i>
          </a>
          <ul class="dropdown-menu">
            <li><a href="/doc_cn">中文</a></li>
            <li><a href="/doc">English</a></li>
          </ul>
        </div>
        <ul class="site-page-links">
          <li><a href="/">Home</a></li>
        </ul>
      </div>
      <div class="doc-module">
        
        <ul>
<li class="toctree-l1"><a class="reference internal" href="../getstarted/index_cn.html">新手入门</a></li>
85 86 87
<li class="toctree-l1"><a class="reference internal" href="../build_and_install/index_cn.html">安装与编译</a></li>
<li class="toctree-l1"><a class="reference internal" href="../howto/index_cn.html">进阶使用</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dev/index_cn.html">开发标准</a></li>
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
<li class="toctree-l1"><a class="reference internal" href="../faq/index_cn.html">FAQ</a></li>
</ul>

        
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>        
      </div>
    </div>
  </header>
  
  <div class="main-content-wrap">

    
    <nav class="doc-menu-vertical" role="navigation">
        
          
          <ul>
<li class="toctree-l1"><a class="reference internal" href="../getstarted/index_cn.html">新手入门</a><ul>
111 112
<li class="toctree-l2"><a class="reference internal" href="../getstarted/quickstart_cn.html">快速开始</a></li>
<li class="toctree-l2"><a class="reference internal" href="../getstarted/concepts/use_concepts_cn.html">基本使用概念</a></li>
113 114
</ul>
</li>
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
<li class="toctree-l1"><a class="reference internal" href="../build_and_install/index_cn.html">安装与编译</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../build_and_install/pip_install_cn.html">使用pip安装</a></li>
<li class="toctree-l2"><a class="reference internal" href="../build_and_install/docker_install_cn.html">使用Docker安装运行</a></li>
<li class="toctree-l2"><a class="reference internal" href="../build_and_install/build_cn.html">用Docker编译和测试PaddlePaddle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../build_and_install/build_from_source_cn.html">从源码编译</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../howto/index_cn.html">进阶使用</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../howto/cmd_parameter/index_cn.html">命令行参数设置</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../howto/cmd_parameter/use_case_cn.html">使用案例</a></li>
<li class="toctree-l3"><a class="reference internal" href="../howto/cmd_parameter/arguments_cn.html">参数概述</a></li>
<li class="toctree-l3"><a class="reference internal" href="../howto/cmd_parameter/detail_introduction_cn.html">细节描述</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../howto/cluster/index_cn.html">分布式训练</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../howto/cluster/preparations_cn.html">环境准备</a></li>
<li class="toctree-l3"><a class="reference internal" href="../howto/cluster/cmd_argument_cn.html">启动参数说明</a></li>
<li class="toctree-l3"><a class="reference internal" href="../howto/cluster/multi_cluster/index_cn.html">在不同集群中运行</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../howto/cluster/multi_cluster/fabric_cn.html">使用fabric启动集群训练</a></li>
<li class="toctree-l4"><a class="reference internal" href="../howto/cluster/multi_cluster/openmpi_cn.html">在OpenMPI集群中提交训练作业</a></li>
<li class="toctree-l4"><a class="reference internal" href="../howto/cluster/multi_cluster/k8s_cn.html">Kubernetes单机训练</a></li>
<li class="toctree-l4"><a class="reference internal" href="../howto/cluster/multi_cluster/k8s_distributed_cn.html">Kubernetes分布式训练</a></li>
<li class="toctree-l4"><a class="reference internal" href="../howto/cluster/multi_cluster/k8s_aws_cn.html">Distributed PaddlePaddle Training on AWS with Kubernetes</a></li>
138 139 140 141
</ul>
</li>
</ul>
</li>
142 143 144 145
<li class="toctree-l2"><a class="reference internal" href="../howto/capi/index_cn.html">C-API预测库</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../howto/capi/compile_paddle_lib_cn.html">安装与编译C-API预测库</a></li>
<li class="toctree-l3"><a class="reference internal" href="../howto/capi/organization_of_the_inputs_cn.html">输入/输出数据组织</a></li>
<li class="toctree-l3"><a class="reference internal" href="../howto/capi/workflow_of_capi_cn.html">C-API使用流程</a></li>
146 147
</ul>
</li>
148
<li class="toctree-l2"><a class="reference internal" href="../howto/rnn/index_cn.html">RNN模型</a><ul>
149 150 151 152
<li class="toctree-l3"><a class="reference internal" href="../howto/rnn/rnn_config_cn.html">RNN配置</a></li>
<li class="toctree-l3"><a class="reference internal" href="../howto/rnn/recurrent_group_cn.html">Recurrent Group教程</a></li>
<li class="toctree-l3"><a class="reference internal" href="../howto/rnn/hierarchical_layer_cn.html">支持双层序列作为输入的Layer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../howto/rnn/hrnn_rnn_api_compare_cn.html">单双层RNN API对比介绍</a></li>
153 154
</ul>
</li>
155
<li class="toctree-l2"><a class="reference internal" href="../howto/optimization/gpu_profiling_cn.html">GPU性能调优</a></li>
156 157
</ul>
</li>
158 159 160
<li class="toctree-l1"><a class="reference internal" href="../dev/index_cn.html">开发标准</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../dev/contribute_to_paddle_cn.html">如何贡献代码</a></li>
<li class="toctree-l2"><a class="reference internal" href="../dev/write_docs_cn.html">如何贡献文档</a></li>
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../faq/index_cn.html">FAQ</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../faq/build_and_install/index_cn.html">编译安装与单元测试</a></li>
<li class="toctree-l2"><a class="reference internal" href="../faq/model/index_cn.html">模型配置</a></li>
<li class="toctree-l2"><a class="reference internal" href="../faq/parameter/index_cn.html">参数设置</a></li>
<li class="toctree-l2"><a class="reference internal" href="../faq/local/index_cn.html">本地训练与预测</a></li>
<li class="toctree-l2"><a class="reference internal" href="../faq/cluster/index_cn.html">集群训练与预测</a></li>
</ul>
</li>
</ul>

        
    </nav>
    
    <section class="doc-content-wrap">

      

 







<div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
      
    <li>Memory Optimization</li>
  </ul>
</div>
      
      <div class="wy-nav-content" id="doc-content">
        <div class="rst-content">
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="memory-optimization">
<span id="memory-optimization"></span><h1>Memory Optimization<a class="headerlink" href="#memory-optimization" title="永久链接至标题"></a></h1>
<div class="section" id="problem">
<span id="problem"></span><h2>Problem<a class="headerlink" href="#problem" title="永久链接至标题"></a></h2>
<p>In a lecture from Andrew Ng, he attributes the recent sucess of AI due to a combination of these:</p>
<ul class="simple">
206 207 208
<li>Availability of Big Data</li>
<li>Supercomputing power to process this Big Data over very large neural networks</li>
<li>Modern algorithms</li>
209 210 211
</ul>
<p>Following graph shows the details:</p>
<p><img alt="" src="../_images/deep_learning.png" /></p>
212
<p>Larger model usually bring better performance. However, GPU memory is limited. For example, the memory size of a GTX TITAN X is only 12GB. To train complex and large models, we have to take care of memory usage. Besides, memory optimization is also necessary in both online/mobile inference.</p>
213 214 215 216 217
</div>
<div class="section" id="solution">
<span id="solution"></span><h2>Solution<a class="headerlink" href="#solution" title="永久链接至标题"></a></h2>
<div class="section" id="basic-strategy">
<span id="basic-strategy"></span><h3>Basic Strategy<a class="headerlink" href="#basic-strategy" title="永久链接至标题"></a></h3>
218
<p>There are some basic strategies to improve memory usage, including in-place operations and memory sharing.</p>
219 220 221 222
<div class="section" id="in-place-operation">
<span id="in-place-operation"></span><h4>In-place Operation<a class="headerlink" href="#in-place-operation" title="永久链接至标题"></a></h4>
<p>In a relu activation operator:</p>
<p>$y = \max(x, 0)$</p>
223
<p>If the variable x is not used in any other operator, we can make an in-place operation. In other words, the memory block of variable y and variable x will be the same. In-place operations will save 50% memory occupancy immediately.</p>
224 225 226 227 228 229 230 231 232 233
</div>
<div class="section" id="memory-sharing">
<span id="memory-sharing"></span><h4>Memory Sharing<a class="headerlink" href="#memory-sharing" title="永久链接至标题"></a></h4>
<p>Not all operators support in-place operations. Memory sharing is a more general strategy.</p>
<p>Following is an example:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">a</span> <span class="o">=</span> <span class="n">op1</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">);</span>
<span class="n">d</span> <span class="o">=</span> <span class="n">op2</span><span class="p">(</span><span class="n">a</span><span class="p">)</span>
<span class="n">e</span> <span class="o">=</span> <span class="n">op3</span><span class="p">(</span><span class="n">d</span><span class="p">,</span> <span class="n">f</span><span class="p">)</span>
</pre></div>
</div>
234
<p>In this case, variable a is no longer used, and op2 does not support in-place operation. After op2 finishes, we can put the memory of variable a to a memory pool. Then, variable e can share the memory of variable a from the pool.</p>
235 236 237 238
</div>
</div>
<div class="section" id="live-variable-analysis">
<span id="live-variable-analysis"></span><h3>Live Variable Analysis<a class="headerlink" href="#live-variable-analysis" title="永久链接至标题"></a></h3>
239
<p>It&#8217;s not enough to only have some basic strategies. The pre-requisite of memory optimization is to know if a variable is still &#8220;live&#8221; after an operation.</p>
240
<p>In our design, the neural network topology is defined as a program. Luckily, <a class="reference external" href="https://en.wikipedia.org/wiki/Live_variable_analysis">live variable analysis</a> is a classic problem in compilers which can be used in many stages, such as register allocation.</p>
241 242
<p>In compilers, the front end of the compiler translates programs into an intermediate language with an unbounded number of temporary variables. This program must run on a machine with a bounded number of registers. Two temporary variables a and b can fit into the same register, if a and b are never &#8220;in use&#8221; at the same time. Thus, many temporary variables can fit in few registers; if they don&#8217;t all fit, the excess tempory variables can be kept in memory.</p>
<p>Therefore, the compiler needs to analyze the intermediate-representation program to determine which temporary variables are in use at the same time. We say a variable is &#8220;live&#8221; if it holds a value that may be needed in the future, so this analysis is called liveness analysis.</p>
243 244 245 246 247 248 249
<p>We can leran these techniques from compilers. There are mainly two stages to make live variable analysis:</p>
<ul class="simple">
<li>construct a control flow graph</li>
<li>solve the dataflow equations</li>
</ul>
<div class="section" id="control-flow-graph">
<span id="control-flow-graph"></span><h4>Control Flow Graph<a class="headerlink" href="#control-flow-graph" title="永久链接至标题"></a></h4>
250
<p>To perform analysis on a program, it is often useful to make a control flow graph. A <a class="reference external" href="https://en.wikipedia.org/wiki/Control_flow_graph">control flow graph</a> (CFG) in computer science is a representation, using graph notation, of all paths that might be traversed through a program during its execution. Each statement in the program is a node in the flow graph; if statemment x can be followed by statement y, there is an egde from x to y.</p>
251 252 253 254 255
<p>Following is the flow graph for a simple loop.</p>
<p><img alt="" src="../_images/control_flow_graph.png" /></p>
</div>
<div class="section" id="dataflow-analysis">
<span id="dataflow-analysis"></span><h4>Dataflow Analysis<a class="headerlink" href="#dataflow-analysis" title="永久链接至标题"></a></h4>
256
<p>Liveness of variable &#8220;flows&#8221; around the edges of the control flow graph; determining the live range of each variable is an example of a dataflow problem. <a class="reference external" href="https://en.wikipedia.org/wiki/Data-flow_analysis">Dataflow analysis</a> is a technique for gathering information about the possible set of values calculated at various points in a computer program.</p>
257 258 259 260
<p>A simple way to perform data-flow analysis of programs is to set up dataflow equations for each node of the control flow graph and solve them by repeatedly calculating the output from the input locally at each node until the whole system stabilizes.</p>
<ul class="simple">
<li>Flow Graph Terminology</li>
</ul>
261
<p>A flow graph node has out-edges that lead to sucessor nodes, and in-edges that come from predecessor nodes. The set <em>pred[n]</em> is all the predecessors of node n, and <em>succ[n]</em> is the set of sucessors.
262 263 264 265
In former control flow graph, the out-edges of node 5 are 5 &#8211;&gt; 6 and 5 &#8211;&gt; 2, and <em>succ[5]</em> = {2, 6}. The in-edges of 2 are 5 &#8211;&gt; 2 and 1 &#8211;&gt; 2, and <em>pred[2]</em> = {1, 5}.</p>
<ul class="simple">
<li>Uses and Defs</li>
</ul>
266
<p>An assignmemt to a variable or temporary defines that variable. An occurence of a variable on the right-hand side of an assginment(or in other expressions) uses the variable. We can define the <em>def</em> of a variable as the set of graph nodes that define it; or the <em>def</em> of a graph node as the set of variables that it defines; and the similarly for the <em>use</em> of a variable or graph node. In former control flow graph, <em>def(3)</em> = {c}, <em>use(3)</em> = {b, c}.</p>
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
<ul class="simple">
<li>Liveness</li>
</ul>
<p>A variable is <em>live</em> on an edge if there is a directed path from that edge to a <em>use</em> of the variable that does not go through any <em>def</em>. A variable is <em>live-in</em> at a node if it is live on any of the in-edges of that node; it is <em>live-out</em> at a node if it is live on any of the out-edges of the node.</p>
<p>The calcution of liveness can be solved by iteration until a fixed pointer is reached. Following is the recursive formula:</p>
<p><img alt="" src="../_images/dataflow_equations.png" /></p>
</div>
</div>
<div class="section" id="memory-optimization-transpiler">
<span id="memory-optimization-transpiler"></span><h3>Memory optimization transpiler<a class="headerlink" href="#memory-optimization-transpiler" title="永久链接至标题"></a></h3>
<p>At last, we take basic strategy and liveness analysis techniques learning from compilers to implement our memory optimization transpiler.</p>
<div class="section" id="add-in-place-attribute">
<span id="add-in-place-attribute"></span><h4>add in-place attribute<a class="headerlink" href="#add-in-place-attribute" title="永久链接至标题"></a></h4>
<p>In-place is a built-in attribute of an operator. Since we treat in-place and other operators differently, we have to add an in-place attribute for every operator.</p>
</div>
<div class="section" id="contruct-control-flow-graph">
<span id="contruct-control-flow-graph"></span><h4>contruct control flow graph<a class="headerlink" href="#contruct-control-flow-graph" title="永久链接至标题"></a></h4>
<p>Following is the ProgramDesc protobuf of <a class="reference external" href="https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/tests/book/test_machine_translation.py">machine translation</a> example.</p>
<ul class="simple">
<li>Block0:</li>
</ul>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">lookup_table</span>
<span class="n">mul</span>
<span class="o">...</span>
<span class="k">while</span><span class="p">(</span><span class="n">sub</span><span class="o">-</span><span class="n">block</span> <span class="n">idx</span> <span class="mi">1</span><span class="p">)</span>
<span class="o">...</span>
<span class="n">array_to_lod_tensor</span>
<span class="n">cross_entropy</span>
<span class="o">...</span>
<span class="n">while_grad</span><span class="p">(</span><span class="n">sub</span><span class="o">-</span><span class="n">block</span> <span class="n">idx</span> <span class="mi">2</span><span class="p">)</span>
<span class="n">read_from_array</span>
<span class="n">array_to_lod_tensor</span>
<span class="o">...</span>
</pre></div>
</div>
<ul class="simple">
<li>Block1</li>
</ul>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">read_from_array</span>
<span class="n">read_from_array</span>
<span class="o">...</span>
<span class="n">write_to_array</span>
<span class="n">increment</span>
<span class="n">write_to_array</span>
<span class="n">less_than</span>
</pre></div>
</div>
<ul class="simple">
<li>Block2</li>
</ul>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">read_from_array</span>
<span class="n">increment</span>
<span class="o">...</span>
<span class="n">write_to_array</span>
<span class="n">write_to_array</span>
</pre></div>
</div>
<p>We can transfer all the operators and variables in ProgramDesc to build a control flow graph.</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">ControlFlowGraph</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">Program</span><span class="p">):</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">_sucessors</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">set</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">_presucessors</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">set</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">_uses</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">set</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">_defs</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">set</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">_live_in</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">set</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">_live_out</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">set</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">_program</span> <span class="o">=</span> <span class="n">Program</span>
    
    <span class="k">def</span> <span class="nf">build</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
        <span class="k">pass</span>
    
    <span class="k">def</span> <span class="nf">dataflow_analysis</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
        <span class="k">pass</span>
        
    <span class="k">def</span> <span class="nf">memory_optimization</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
        <span class="k">pass</span>
        
    <span class="k">def</span> <span class="nf">get_program</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_program</span>
</pre></div>
</div>
</div>
<div class="section" id="make-dataflow-analysis">
350 351
<span id="make-dataflow-analysis"></span><h4>Make dataflow analysis<a class="headerlink" href="#make-dataflow-analysis" title="永久链接至标题"></a></h4>
<p>We follow the guide from compilers and try to solve the dataflow equation to get liveness of every variable. If the live-in of an operator node is different from the live-out, then we can make memory sharing.</p>
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
<p>For example:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">a</span> <span class="o">=</span> <span class="n">op1</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">);</span>
<span class="n">d</span> <span class="o">=</span> <span class="n">op2</span><span class="p">(</span><span class="n">a</span><span class="p">)</span>
<span class="n">e</span> <span class="o">=</span> <span class="n">op3</span><span class="p">(</span><span class="n">d</span><span class="p">,</span> <span class="n">f</span><span class="p">)</span>
</pre></div>
</div>
<p>The dataflow analysis result is:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">live_in</span><span class="p">(</span><span class="n">op1</span><span class="p">)</span> <span class="o">=</span> <span class="p">{</span><span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">f</span><span class="p">}</span>
<span class="n">live_out</span><span class="p">(</span><span class="n">op1</span><span class="p">)</span> <span class="o">=</span> <span class="p">{</span><span class="n">a</span><span class="p">,</span> <span class="n">f</span><span class="p">}</span>

<span class="n">live_in</span><span class="p">(</span><span class="n">op2</span><span class="p">)</span> <span class="o">=</span> <span class="p">{</span><span class="n">a</span><span class="p">,</span> <span class="n">f</span><span class="p">}</span>
<span class="n">live_out</span><span class="p">(</span><span class="n">op2</span><span class="p">)</span> <span class="o">=</span> <span class="p">{</span><span class="n">d</span><span class="p">,</span> <span class="n">f</span><span class="p">}</span>

<span class="n">live_in</span><span class="p">(</span><span class="n">op3</span><span class="p">)</span> <span class="o">=</span> <span class="p">{</span><span class="n">d</span><span class="p">,</span> <span class="n">f</span><span class="p">}</span>
<span class="n">live_out</span><span class="p">(</span><span class="n">op3</span><span class="p">)</span> <span class="o">=</span> <span class="p">{}</span>
</pre></div>
</div>
<p>After op1, we can process variable b and variable c; After op2, we can process variable a. After op3, we can process variable d and variable f.</p>
</div>
<div class="section" id="memory-sharing-policy">
<span id="memory-sharing-policy"></span><h4>memory sharing policy<a class="headerlink" href="#memory-sharing-policy" title="永久链接至标题"></a></h4>
<p>A memory pool will be mantained in the stage of memory optimization. Each operator node will be scanned to determine memory optimization is done or not. If an operator satifies the requirement, following policy will be taken to handle input/output variables.</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="k">if</span> <span class="n">op</span><span class="o">.</span><span class="n">support_inplace</span><span class="p">():</span>
    <span class="n">i</span> <span class="o">--&gt;</span> <span class="n">pool</span>
    <span class="n">pool</span> <span class="o">--&gt;</span> <span class="n">o</span>
<span class="k">else</span><span class="p">:</span>
    <span class="n">pool</span> <span class="o">--&gt;</span> <span class="n">o</span>
    <span class="n">i</span> <span class="o">--&gt;</span> <span class="n">pool</span>
</pre></div>
</div>
</div>
</div>
</div>
<div class="section" id="reference">
<span id="reference"></span><h2>Reference<a class="headerlink" href="#reference" title="永久链接至标题"></a></h2>
<ul class="simple">
<li><a class="reference external" href="https://manavsehgal.com/lecture-notes-from-artificial-intelligence-is-the-new-electricity-by-andrew-ng-4712dcbf26e5">Lecture Notes From Artificial Intelligence Is The New Electricity By Andrew Ng</a></li>
<li>Modern compiler implementation in ML, by Andrew W. Appel</li>
<li><a class="reference external" href="https://mxnet.incubator.apache.org/architecture/note_memory.html">Optimizing Memory Consumption in Deep learning</a></li>
</ul>
</div>
</div>


           </div>
          </div>
          <footer>
  

  <hr/>

  <div role="contentinfo">
    <p>
        &copy; Copyright 2016, PaddlePaddle developers.

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  


  

    <script type="text/javascript">
        var DOCUMENTATION_OPTIONS = {
            URL_ROOT:'../',
            VERSION:'',
            COLLAPSE_INDEX:false,
            FILE_SUFFIX:'.html',
            HAS_SOURCE:  true,
            SOURCELINK_SUFFIX: ".txt",
        };
    </script>
      <script type="text/javascript" src="../_static/jquery.js"></script>
      <script type="text/javascript" src="../_static/underscore.js"></script>
      <script type="text/javascript" src="../_static/doctools.js"></script>
      <script type="text/javascript" src="../_static/translations.js"></script>
      <script type="text/javascript" src="https://cdn.bootcss.com/mathjax/2.7.0/MathJax.js"></script>
       
  

  
  
    <script type="text/javascript" src="../_static/js/theme.js"></script>
  
  
  <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
  <script src="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/js/perfect-scrollbar.jquery.min.js"></script>
  <script src="../_static/js/paddle_doc_init.js"></script> 

</body>
</html>