new_layer_en.html 60.8 KB
Newer Older
1 2


3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Write New Layers &mdash; PaddlePaddle  documentation</title>
  

  
  

  

  
  
    
Y
Yu Yang 已提交
22

23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
  

  
  
    <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
  

  
  
        <link rel="index" title="Index"
              href="../../genindex.html"/>
        <link rel="search" title="Search" href="../../search.html"/>
    <link rel="top" title="PaddlePaddle  documentation" href="../../index.html"/>
        <link rel="up" title="HOW TO" href="../index_en.html"/>
        <link rel="next" title="Contribute Code" href="contribute_to_paddle_en.html"/>
        <link rel="prev" title="Distributed PaddlePaddle Training on AWS with Kubernetes" href="../usage/k8s/k8s_aws_en.html"/> 

  <link rel="stylesheet" href="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/css/perfect-scrollbar.min.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/css/override.css" type="text/css" />
  <script>
  var _hmt = _hmt || [];
  (function() {
    var hm = document.createElement("script");
    hm.src = "//hm.baidu.com/hm.js?b9a314ab40d04d805655aab1deee08ba";
    var s = document.getElementsByTagName("script")[0]; 
    s.parentNode.insertBefore(hm, s);
  })();
  </script>

  

  
  <script src="../../_static/js/modernizr.min.js"></script>

</head>

<body class="wy-body-for-nav" role="document">

  
  <header class="site-header">
    <div class="site-logo">
      <a href="/"><img src="../../_static/images/PP_w.png"></a>
    </div>
    <div class="site-nav-links">
      <div class="site-menu">
68
        <a class="fork-on-github" href="https://github.com/PaddlePaddle/Paddle" target="_blank"><i class="fa fa-github"></i>Fork me on Github</a>
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
        <div class="language-switcher dropdown">
          <a type="button" data-toggle="dropdown">
            <span>English</span>
            <i class="fa fa-angle-up"></i>
            <i class="fa fa-angle-down"></i>
          </a>
          <ul class="dropdown-menu">
            <li><a href="/doc_cn">中文</a></li>
            <li><a href="/doc">English</a></li>
          </ul>
        </div>
        <ul class="site-page-links">
          <li><a href="/">Home</a></li>
        </ul>
      </div>
      <div class="doc-module">
        
        <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../getstarted/index_en.html">GET STARTED</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../index_en.html">HOW TO</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/index_en.html">API</a></li>
90
<li class="toctree-l1"><a class="reference internal" href="../../mobile/index_en.html">MOBILE</a></li>
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
</ul>

        
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>        
      </div>
    </div>
  </header>
  
  <div class="main-content-wrap">
Y
Yu Yang 已提交
106 107

    
108 109 110 111 112 113
    <nav class="doc-menu-vertical" role="navigation">
        
          
          <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../getstarted/index_en.html">GET STARTED</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../getstarted/build_and_install/index_en.html">Install and Build</a><ul>
114 115 116 117
<li class="toctree-l3"><a class="reference internal" href="../../getstarted/build_and_install/pip_install_en.html">Install Using pip</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../getstarted/build_and_install/docker_install_en.html">Run in Docker Containers</a></li>
<li class="toctree-l3"><a class="reference internal" href="build_en.html">Build using Docker</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../getstarted/build_and_install/build_from_source_en.html">Build from Sources</a></li>
118 119 120 121 122 123 124 125 126 127 128
</ul>
</li>
</ul>
</li>
<li class="toctree-l1 current"><a class="reference internal" href="../index_en.html">HOW TO</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../usage/cmd_parameter/index_en.html">Set Command-line Parameters</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../usage/cmd_parameter/use_case_en.html">Use Case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../usage/cmd_parameter/arguments_en.html">Argument Outline</a></li>
<li class="toctree-l3"><a class="reference internal" href="../usage/cmd_parameter/detail_introduction_en.html">Detail Description</a></li>
</ul>
</li>
129
<li class="toctree-l2"><a class="reference internal" href="../usage/cluster/cluster_train_en.html">PaddlePaddle Distributed Training</a></li>
130 131 132 133
<li class="toctree-l2"><a class="reference internal" href="../usage/k8s/k8s_en.html">Paddle On Kubernetes</a></li>
<li class="toctree-l2"><a class="reference internal" href="../usage/k8s/k8s_aws_en.html">Distributed PaddlePaddle Training on AWS with Kubernetes</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Write New Layers</a></li>
<li class="toctree-l2"><a class="reference internal" href="contribute_to_paddle_en.html">Contribute Code</a></li>
134 135 136 137 138
<li class="toctree-l2"><a class="reference internal" href="write_docs_en.html">Contribute Documentation</a></li>
<li class="toctree-l2"><a class="reference internal" href="../deep_model/rnn/index_en.html">RNN Models</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../deep_model/rnn/rnn_config_en.html">RNN Configuration</a></li>
</ul>
</li>
139 140 141 142 143 144 145
<li class="toctree-l2"><a class="reference internal" href="../optimization/gpu_profiling_en.html">Tune GPU Performance</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../api/index_en.html">API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/model_configs.html">Model Configuration</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/activation.html">Activation</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/layer.html">Layers</a></li>
146
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/evaluators.html">Evaluators</a></li>
147 148 149 150 151 152
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/optimizer.html">Optimizer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/pooling.html">Pooling</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/networks.html">Networks</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/attr.html">Parameter Attribute</a></li>
</ul>
</li>
153 154 155 156 157 158
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/data.html">Data Reader Interface and DataSets</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/data_reader.html">Data Reader Interface</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/image.html">Image Interface</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/dataset.html">Dataset</a></li>
</ul>
</li>
159
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/run_logic.html">Training and Inference</a></li>
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/fluid.html">Fluid</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/layers.html">Layers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/data_feeder.html">DataFeeder</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/executor.html">Executor</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/initializer.html">Initializer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/evaluator.html">Evaluator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/nets.html">Nets</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/optimizer.html">Optimizer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/param_attr.html">ParamAttr</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/profiler.html">Profiler</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/regularizer.html">Regularizer</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../mobile/index_en.html">MOBILE</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../mobile/cross_compiling_for_android_en.html">Build PaddlePaddle for Android</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../mobile/cross_compiling_for_raspberry_en.html">Build PaddlePaddle for Raspberry Pi</a></li>
178 179 180 181 182 183
</ul>
</li>
</ul>

        
    </nav>
Y
Yu Yang 已提交
184
    
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
    <section class="doc-content-wrap">

      

 







<div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
      
        <li><a href="../index_en.html">HOW TO</a> > </li>
      
    <li>Write New Layers</li>
  </ul>
</div>
      
      <div class="wy-nav-content" id="doc-content">
        <div class="rst-content">
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
Y
Yu Yang 已提交
210
            
211 212
  <div class="section" id="write-new-layers">
<h1>Write New Layers<a class="headerlink" href="#write-new-layers" title="Permalink to this headline"></a></h1>
Y
Yu Yang 已提交
213 214 215 216
<p>This tutorial will guide you to write customized layers in PaddlePaddle. We will utilize fully connected layer as an example to guide you through the following steps for writing a new layer.</p>
<ul class="simple">
<li>Derive equations for the forward and backward part of the layer.</li>
<li>Implement C++ class for the layer.</li>
Y
Yu Yang 已提交
217 218
<li>Write gradient check unit test to make sure the gradients are correctly computed.</li>
<li>Implement Python wrapper for the layer.</li>
Y
Yu Yang 已提交
219 220
</ul>
<div class="section" id="derive-equations">
221
<h2>Derive Equations<a class="headerlink" href="#derive-equations" title="Permalink to this headline"></a></h2>
Y
Yu Yang 已提交
222
<p>First we need to derive equations of the <em>forward</em> and <em>backward</em> part of the layer. The forward part computes the output given an input. The backward part computes the gradients of the input and the parameters given the the gradients of the output.</p>
Y
Yu Yang 已提交
223 224
<p>The illustration of a fully connected layer is shown in the following figure. In a fully connected layer, all output nodes are connected to all the input nodes.</p>
<a class="reference internal image-reference" href="../../_images/FullyConnected.jpg"><img alt="../../_images/FullyConnected.jpg" class="align-center" src="../../_images/FullyConnected.jpg" style="width: 391.2px; height: 175.2px;" /></a>
Y
Yu Yang 已提交
225
<p>The <em>forward part</em> of a layer transforms an input into the corresponding output.
Y
Yu Yang 已提交
226 227 228 229 230 231 232 233
Fully connected layer takes a dense input vector with dimension <span class="math">\(D_i\)</span>. It uses a transformation matrix <span class="math">\(W\)</span> with size <span class="math">\(D_i \times D_o\)</span> to project <span class="math">\(x\)</span> into a <span class="math">\(D_o\)</span> dimensional vector, and add a bias vector <span class="math">\(b\)</span> with dimension <span class="math">\(D_o\)</span> to the vector.</p>
<div class="math">
\[y = f(W^T x + b)\]</div>
<p>where <span class="math">\(f(.)\)</span> is an nonlinear <em>activation</em> function, such as sigmoid, tanh, and Relu.</p>
<p>The transformation matrix <span class="math">\(W\)</span> and bias vector <span class="math">\(b\)</span> are the <em>parameters</em> of the layer. The <em>parameters</em> of a layer are learned during training in the <em>backward pass</em>. The backward pass computes the gradients of the output function with respect to all parameters and inputs. The optimizer can use chain rule to compute the gradients of the loss function with respect to each parameter.</p>
<p>Suppose our loss function is <span class="math">\(c(y)\)</span>, then</p>
<div class="math">
\[\frac{\partial c(y)}{\partial x} = \frac{\partial c(y)}{\partial y} \frac{\partial y}{\partial x}\]</div>
234
<p>Suppose <span class="math">\(z = W^T x + b\)</span>, then</p>
Y
Yu Yang 已提交
235 236 237 238 239 240 241 242
<div class="math">
\[\frac{\partial y}{\partial z} = \frac{\partial f(z)}{\partial z}\]</div>
<p>This derivative can be automatically computed by our base layer class.</p>
<p>Then, for fully connected layer, we need to compute:</p>
<div class="math">
\[\frac{\partial z}{\partial x} = W, \frac{\partial z_j}{\partial W_{ij}} = x_i, \frac{\partial z}{\partial b} = \mathbf 1\]</div>
<p>where <span class="math">\(\mathbf 1\)</span> is an all one vector, <span class="math">\(W_{ij}\)</span> is the number at the i-th row and j-th column of the matrix <span class="math">\(W\)</span>, <span class="math">\(z_j\)</span> is the j-th component of the vector <span class="math">\(z\)</span>, and <span class="math">\(x_i\)</span> is the i-th component of the vector <span class="math">\(x\)</span>.</p>
<p>Finally we can use chain rule to calculate <span class="math">\(\frac{\partial z}{\partial x}\)</span>, and <span class="math">\(\frac{\partial z}{\partial W}\)</span>. The details of the computation will be given in the next section.</p>
Y
Yu Yang 已提交
243 244
</div>
<div class="section" id="implement-c-class">
245
<h2>Implement C++ Class<a class="headerlink" href="#implement-c-class" title="Permalink to this headline"></a></h2>
Y
Yu Yang 已提交
246
<p>The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at <code class="code docutils literal"><span class="pre">paddle/gserver/layers/FullyConnectedLayer.h</span></code> and <code class="code docutils literal"><span class="pre">paddle/gserver/layers/FullyConnectedLayer.cpp</span></code>. We list simplified version of the code below.</p>
247
<p>It needs to derive the base class <code class="code docutils literal"><span class="pre">paddle::Layer</span></code>, and it needs to override the following functions:</p>
Y
Yu Yang 已提交
248 249
<ul class="simple">
<li>constructor and destructor.</li>
Y
Yu Yang 已提交
250 251 252 253
<li><code class="code docutils literal"><span class="pre">init</span></code> function. It is used to initialize the parameters and settings.</li>
<li><code class="code docutils literal"><span class="pre">forward</span></code>. It implements the forward part of the layer.</li>
<li><code class="code docutils literal"><span class="pre">backward</span></code>. It implements the backward part of the layer.</li>
<li><code class="code docutils literal"><span class="pre">prefetch</span></code>. It is utilized to determine the rows corresponding parameter matrix to prefetch from parameter server. You do not need to override this function if your layer does not need remote sparse update. (most layers do not need to support remote sparse update)</li>
Y
Yu Yang 已提交
254 255
</ul>
<p>The header file is listed below:</p>
Y
Yu Yang 已提交
256
<div class="highlight-c++"><div class="highlight"><pre><span></span><span class="k">namespace</span> <span class="n">paddle</span> <span class="p">{</span>
Y
Yu Yang 已提交
257 258 259 260 261 262 263 264
<span class="cm">/**</span>
<span class="cm"> * A layer has full connections to all neurons in the previous layer.</span>
<span class="cm"> * It computes an inner product with a set of learned weights, and</span>
<span class="cm"> * (optionally) adds biases.</span>
<span class="cm"> *</span>
<span class="cm"> * The config file api is fc_layer.</span>
<span class="cm"> */</span>

Y
Yu Yang 已提交
265 266
<span class="k">class</span> <span class="nc">FullyConnectedLayer</span> <span class="o">:</span> <span class="k">public</span> <span class="n">Layer</span> <span class="p">{</span>
<span class="k">protected</span><span class="o">:</span>
Y
Yu Yang 已提交
267 268 269
  <span class="n">WeightList</span> <span class="n">weights_</span><span class="p">;</span>
  <span class="n">std</span><span class="o">::</span><span class="n">unique_ptr</span><span class="o">&lt;</span><span class="n">Weight</span><span class="o">&gt;</span> <span class="n">biases_</span><span class="p">;</span>

Y
Yu Yang 已提交
270 271
<span class="k">public</span><span class="o">:</span>
  <span class="k">explicit</span> <span class="n">FullyConnectedLayer</span><span class="p">(</span><span class="k">const</span> <span class="n">LayerConfig</span><span class="o">&amp;</span> <span class="n">config</span><span class="p">)</span>
Y
Yu Yang 已提交
272 273 274 275 276 277 278 279 280
      <span class="o">:</span> <span class="n">Layer</span><span class="p">(</span><span class="n">config</span><span class="p">)</span> <span class="p">{}</span>
  <span class="o">~</span><span class="n">FullyConnectedLayer</span><span class="p">()</span> <span class="p">{}</span>

  <span class="kt">bool</span> <span class="n">init</span><span class="p">(</span><span class="k">const</span> <span class="n">LayerMap</span><span class="o">&amp;</span> <span class="n">layerMap</span><span class="p">,</span> <span class="k">const</span> <span class="n">ParameterMap</span><span class="o">&amp;</span> <span class="n">parameterMap</span><span class="p">);</span>

  <span class="n">Weight</span><span class="o">&amp;</span> <span class="n">getWeight</span><span class="p">(</span><span class="kt">int</span> <span class="n">idx</span><span class="p">)</span> <span class="p">{</span> <span class="k">return</span> <span class="o">*</span><span class="n">weights_</span><span class="p">[</span><span class="n">idx</span><span class="p">];</span> <span class="p">}</span>

  <span class="kt">void</span> <span class="n">prefetch</span><span class="p">();</span>
  <span class="kt">void</span> <span class="nf">forward</span><span class="p">(</span><span class="n">PassType</span> <span class="n">passType</span><span class="p">);</span>
Y
Yu Yang 已提交
281
  <span class="kt">void</span> <span class="nf">backward</span><span class="p">(</span><span class="k">const</span> <span class="n">UpdateCallback</span><span class="o">&amp;</span> <span class="n">callback</span> <span class="o">=</span> <span class="k">nullptr</span><span class="p">);</span>
Y
Yu Yang 已提交
282 283 284 285
<span class="p">};</span>
<span class="p">}</span>  <span class="c1">// namespace paddle</span>
</pre></div>
</div>
Y
Yu Yang 已提交
286
<p>It defines the parameters as class variables. We use <code class="code docutils literal"><span class="pre">Weight</span></code> class as abstraction of parameters. It supports multi-thread update. The details of this class will be described in details in the implementations.</p>
Y
Yu Yang 已提交
287
<ul class="simple">
Y
Yu Yang 已提交
288 289
<li><code class="code docutils literal"><span class="pre">weights_</span></code> is a list of weights for the transformation matrices. The current implementation can have more than one inputs. Thus, it has a list of weights. One weight corresponds to an input.</li>
<li><code class="code docutils literal"><span class="pre">biases_</span></code> is a weight for the bias vector.</li>
Y
Yu Yang 已提交
290
</ul>
Y
Yu Yang 已提交
291 292
<p>The fully connected layer does not have layer configuration hyper-parameters. If there are some layer hyper-parameters, a common practice is to store it in <code class="code docutils literal"><span class="pre">LayerConfig&amp;</span> <span class="pre">config</span></code>, and put it into a class variable in the constructor.</p>
<p>The following code snippet implements the <code class="code docutils literal"><span class="pre">init</span></code> function.</p>
Y
Yu Yang 已提交
293
<ul class="simple">
Y
Yu Yang 已提交
294 295
<li>First, every <code class="code docutils literal"><span class="pre">init</span></code> function must call the <code class="code docutils literal"><span class="pre">init</span></code> function of the base class <code class="code docutils literal"><span class="pre">Layer::init(layerMap,</span> <span class="pre">parameterMap);</span></code>. This statement will initialize the required variables and connections for each layer.</li>
<li>The it initializes all the weights matrices <span class="math">\(W\)</span>. The current implementation can have more than one inputs. Thus, it has a list of weights.</li>
Y
Yu Yang 已提交
296 297
<li>Finally, it initializes the bias.</li>
</ul>
Y
Yu Yang 已提交
298
<div class="highlight-c++"><div class="highlight"><pre><span></span><span class="kt">bool</span> <span class="n">FullyConnectedLayer</span><span class="o">::</span><span class="n">init</span><span class="p">(</span><span class="k">const</span> <span class="n">LayerMap</span><span class="o">&amp;</span> <span class="n">layerMap</span><span class="p">,</span>
Y
Yu Yang 已提交
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
                               <span class="k">const</span> <span class="n">ParameterMap</span><span class="o">&amp;</span> <span class="n">parameterMap</span><span class="p">)</span> <span class="p">{</span>
  <span class="cm">/* Initialize the basic parent class */</span>
  <span class="n">Layer</span><span class="o">::</span><span class="n">init</span><span class="p">(</span><span class="n">layerMap</span><span class="p">,</span> <span class="n">parameterMap</span><span class="p">);</span>

  <span class="cm">/* initialize the weightList */</span>
  <span class="n">CHECK</span><span class="p">(</span><span class="n">inputLayers_</span><span class="p">.</span><span class="n">size</span><span class="p">()</span> <span class="o">==</span> <span class="n">parameters_</span><span class="p">.</span><span class="n">size</span><span class="p">());</span>
  <span class="k">for</span> <span class="p">(</span><span class="kt">size_t</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="n">i</span> <span class="o">&lt;</span> <span class="n">inputLayers_</span><span class="p">.</span><span class="n">size</span><span class="p">();</span> <span class="n">i</span><span class="o">++</span><span class="p">)</span> <span class="p">{</span>
    <span class="c1">// Option the parameters</span>
    <span class="kt">size_t</span> <span class="n">height</span> <span class="o">=</span> <span class="n">inputLayers_</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-&gt;</span><span class="n">getSize</span><span class="p">();</span>
    <span class="kt">size_t</span> <span class="n">width</span> <span class="o">=</span> <span class="n">getSize</span><span class="p">();</span>

    <span class="c1">// create a new weight</span>
    <span class="k">if</span> <span class="p">(</span><span class="n">parameters_</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-&gt;</span><span class="n">isSparse</span><span class="p">())</span> <span class="p">{</span>
      <span class="n">CHECK_LE</span><span class="p">(</span><span class="n">parameters_</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-&gt;</span><span class="n">getSize</span><span class="p">(),</span> <span class="n">width</span> <span class="o">*</span> <span class="n">height</span><span class="p">);</span>
    <span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
      <span class="n">CHECK_EQ</span><span class="p">(</span><span class="n">parameters_</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-&gt;</span><span class="n">getSize</span><span class="p">(),</span> <span class="n">width</span> <span class="o">*</span> <span class="n">height</span><span class="p">);</span>
    <span class="p">}</span>
Y
Yu Yang 已提交
316
    <span class="n">Weight</span><span class="o">*</span> <span class="n">w</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Weight</span><span class="p">(</span><span class="n">height</span><span class="p">,</span> <span class="n">width</span><span class="p">,</span> <span class="n">parameters_</span><span class="p">[</span><span class="n">i</span><span class="p">]);</span>
Y
Yu Yang 已提交
317 318 319 320 321 322 323

    <span class="c1">// append the new weight to the list</span>
    <span class="n">weights_</span><span class="p">.</span><span class="n">emplace_back</span><span class="p">(</span><span class="n">w</span><span class="p">);</span>
  <span class="p">}</span>

  <span class="cm">/* initialize biases_ */</span>
  <span class="k">if</span> <span class="p">(</span><span class="n">biasParameter_</span><span class="p">.</span><span class="n">get</span><span class="p">()</span> <span class="o">!=</span> <span class="nb">NULL</span><span class="p">)</span> <span class="p">{</span>
Y
Yu Yang 已提交
324
    <span class="n">biases_</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">unique_ptr</span><span class="o">&lt;</span><span class="n">Weight</span><span class="o">&gt;</span><span class="p">(</span><span class="k">new</span> <span class="n">Weight</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">getSize</span><span class="p">(),</span> <span class="n">biasParameter_</span><span class="p">));</span>
Y
Yu Yang 已提交
325 326 327 328 329 330 331 332
  <span class="p">}</span>

  <span class="k">return</span> <span class="nb">true</span><span class="p">;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The implementation of the forward part has the following steps.</p>
<ul class="simple">
Y
Yu Yang 已提交
333 334 335 336
<li>Every layer must call <code class="code docutils literal"><span class="pre">Layer::forward(passType);</span></code> at the beginning of its <code class="code docutils literal"><span class="pre">forward</span></code> function.</li>
<li>Then it allocates memory for the output using <code class="code docutils literal"><span class="pre">reserveOutput(batchSize,</span> <span class="pre">size);</span></code>. This step is necessary because we support the batches to have different batch sizes. <code class="code docutils literal"><span class="pre">reserveOutput</span></code> will change the size of the output accordingly. For the sake of efficiency, we will allocate new memory if we want to expand the matrix, but we will reuse the existing memory block if we want to shrink the matrix.</li>
<li>Then it computes <span class="math">\(\sum_i W_i x + b\)</span> using Matrix operations. <code class="code docutils literal"><span class="pre">getInput(i).value</span></code> retrieve the matrix of the i-th input. Each input is a <span class="math">\(batchSize \times dim\)</span> matrix, where each row represents an single input in a batch. For a complete lists of supported matrix operations, please refer to <code class="code docutils literal"><span class="pre">paddle/math/Matrix.h</span></code> and <code class="code docutils literal"><span class="pre">paddle/math/BaseMatrix.h</span></code>.</li>
<li>Finally it applies the activation function using <code class="code docutils literal"><span class="pre">forwardActivation();</span></code>. It will automatically applies the corresponding activation function specifies in the network configuration.</li>
Y
Yu Yang 已提交
337
</ul>
Y
Yu Yang 已提交
338
<div class="highlight-c++"><div class="highlight"><pre><span></span><span class="kt">void</span> <span class="n">FullyConnectedLayer</span><span class="o">::</span><span class="n">forward</span><span class="p">(</span><span class="n">PassType</span> <span class="n">passType</span><span class="p">)</span> <span class="p">{</span>
Y
Yu Yang 已提交
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
  <span class="n">Layer</span><span class="o">::</span><span class="n">forward</span><span class="p">(</span><span class="n">passType</span><span class="p">);</span>

  <span class="cm">/* malloc memory for the output_ if necessary */</span>
  <span class="kt">int</span> <span class="n">batchSize</span> <span class="o">=</span> <span class="n">getInput</span><span class="p">(</span><span class="mi">0</span><span class="p">).</span><span class="n">getBatchSize</span><span class="p">();</span>
  <span class="kt">int</span> <span class="n">size</span> <span class="o">=</span> <span class="n">getSize</span><span class="p">();</span>

  <span class="p">{</span>
    <span class="c1">// Settup the size of the output.</span>
    <span class="n">reserveOutput</span><span class="p">(</span><span class="n">batchSize</span><span class="p">,</span> <span class="n">size</span><span class="p">);</span>
  <span class="p">}</span>

  <span class="n">MatrixPtr</span> <span class="n">outV</span> <span class="o">=</span> <span class="n">getOutputValue</span><span class="p">();</span>

  <span class="c1">// Apply the the transformation matrix to each input.</span>
  <span class="k">for</span> <span class="p">(</span><span class="kt">size_t</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="n">i</span> <span class="o">!=</span> <span class="n">inputLayers_</span><span class="p">.</span><span class="n">size</span><span class="p">();</span> <span class="o">++</span><span class="n">i</span><span class="p">)</span> <span class="p">{</span>
    <span class="k">auto</span> <span class="n">input</span> <span class="o">=</span> <span class="n">getInput</span><span class="p">(</span><span class="n">i</span><span class="p">);</span>
    <span class="n">CHECK</span><span class="p">(</span><span class="n">input</span><span class="p">.</span><span class="n">value</span><span class="p">)</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;The input of &#39;fc&#39; layer must be matrix&quot;</span><span class="p">;</span>
    <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span> <span class="o">?</span> <span class="n">outV</span><span class="o">-&gt;</span><span class="n">mul</span><span class="p">(</span><span class="n">input</span><span class="p">.</span><span class="n">value</span><span class="p">,</span> <span class="n">weights_</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-&gt;</span><span class="n">getW</span><span class="p">(),</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
           <span class="o">:</span> <span class="n">outV</span><span class="o">-&gt;</span><span class="n">mul</span><span class="p">(</span><span class="n">input</span><span class="p">.</span><span class="n">value</span><span class="p">,</span> <span class="n">weights_</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-&gt;</span><span class="n">getW</span><span class="p">(),</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">);</span>
  <span class="p">}</span>

  <span class="cm">/* add the bias-vector */</span>
  <span class="k">if</span> <span class="p">(</span><span class="n">biases_</span><span class="p">.</span><span class="n">get</span><span class="p">()</span> <span class="o">!=</span> <span class="nb">NULL</span><span class="p">)</span> <span class="p">{</span>
    <span class="n">outV</span><span class="o">-&gt;</span><span class="n">addBias</span><span class="p">(</span><span class="o">*</span><span class="p">(</span><span class="n">biases_</span><span class="o">-&gt;</span><span class="n">getW</span><span class="p">()),</span> <span class="mi">1</span><span class="p">);</span>
  <span class="p">}</span>

  <span class="cm">/* activation */</span> <span class="p">{</span>
    <span class="n">forwardActivation</span><span class="p">();</span>
  <span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The implementation of the backward part has the following steps.</p>
<ul class="simple">
Y
Yu Yang 已提交
373 374 375
<li><code class="code docutils literal"><span class="pre">backwardActivation()</span></code> computes the gradients of the activation. The gradients will be multiplies in place to the gradients of the output, which can be retrieved using <code class="code docutils literal"><span class="pre">getOutputGrad()</span></code>.</li>
<li>Compute the gradients of bias. Notice that we an use <code class="code docutils literal"><span class="pre">biases_-&gt;getWGrad()</span></code> to get the gradient matrix of the corresponding parameter. After the gradient of one parameter is updated, it <strong>MUST</strong> call <code class="code docutils literal"><span class="pre">getParameterPtr()-&gt;incUpdate(callback);</span></code>. This is utilize for parameter update over multiple threads or multiple machines.</li>
<li>Then it computes the gradients of the transformation matrices and inputs, and it calls <code class="code docutils literal"><span class="pre">incUpdate</span></code> for the corresponding parameter. This gives the framework the chance to know whether it has gathered all the gradient to one parameter so that it can do some overlapping work (e.g., network communication)</li>
Y
Yu Yang 已提交
376
</ul>
Y
Yu Yang 已提交
377
<div class="highlight-c++"><div class="highlight"><pre><span></span><span class="kt">void</span> <span class="n">FullyConnectedLayer</span><span class="o">::</span><span class="n">backward</span><span class="p">(</span><span class="k">const</span> <span class="n">UpdateCallback</span><span class="o">&amp;</span> <span class="n">callback</span><span class="p">)</span> <span class="p">{</span>
Y
Yu Yang 已提交
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
  <span class="cm">/* Do derivation for activations.*/</span> <span class="p">{</span>
    <span class="n">backwardActivation</span><span class="p">();</span>
  <span class="p">}</span>

  <span class="k">if</span> <span class="p">(</span><span class="n">biases_</span> <span class="o">&amp;&amp;</span> <span class="n">biases_</span><span class="o">-&gt;</span><span class="n">getWGrad</span><span class="p">())</span> <span class="p">{</span>
    <span class="n">biases_</span><span class="o">-&gt;</span><span class="n">getWGrad</span><span class="p">()</span><span class="o">-&gt;</span><span class="n">collectBias</span><span class="p">(</span><span class="o">*</span><span class="n">getOutputGrad</span><span class="p">(),</span> <span class="mi">1</span><span class="p">);</span>

    <span class="n">biases_</span><span class="o">-&gt;</span><span class="n">getParameterPtr</span><span class="p">()</span><span class="o">-&gt;</span><span class="n">incUpdate</span><span class="p">(</span><span class="n">callback</span><span class="p">);</span>
  <span class="p">}</span>

  <span class="kt">bool</span> <span class="n">syncFlag</span> <span class="o">=</span> <span class="n">hl_get_sync_flag</span><span class="p">();</span>

  <span class="k">for</span> <span class="p">(</span><span class="kt">size_t</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="n">i</span> <span class="o">!=</span> <span class="n">inputLayers_</span><span class="p">.</span><span class="n">size</span><span class="p">();</span> <span class="o">++</span><span class="n">i</span><span class="p">)</span> <span class="p">{</span>
    <span class="cm">/* Calculate the W-gradient for the current layer */</span>
    <span class="k">if</span> <span class="p">(</span><span class="n">weights_</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-&gt;</span><span class="n">getWGrad</span><span class="p">())</span> <span class="p">{</span>
      <span class="n">MatrixPtr</span> <span class="n">input_T</span> <span class="o">=</span> <span class="n">getInputValue</span><span class="p">(</span><span class="n">i</span><span class="p">)</span><span class="o">-&gt;</span><span class="n">getTranspose</span><span class="p">();</span>
      <span class="n">MatrixPtr</span> <span class="n">oGrad</span> <span class="o">=</span> <span class="n">getOutputGrad</span><span class="p">();</span>
      <span class="p">{</span>
        <span class="n">weights_</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-&gt;</span><span class="n">getWGrad</span><span class="p">()</span><span class="o">-&gt;</span><span class="n">mul</span><span class="p">(</span><span class="n">input_T</span><span class="p">,</span> <span class="n">oGrad</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">);</span>
      <span class="p">}</span>
    <span class="p">}</span>


    <span class="cm">/* Calculate the input layers error */</span>
    <span class="n">MatrixPtr</span> <span class="n">preGrad</span> <span class="o">=</span> <span class="n">getInputGrad</span><span class="p">(</span><span class="n">i</span><span class="p">);</span>
    <span class="k">if</span> <span class="p">(</span><span class="nb">NULL</span> <span class="o">!=</span> <span class="n">preGrad</span><span class="p">)</span> <span class="p">{</span>
      <span class="n">MatrixPtr</span> <span class="n">weights_T</span> <span class="o">=</span> <span class="n">weights_</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-&gt;</span><span class="n">getW</span><span class="p">()</span><span class="o">-&gt;</span><span class="n">getTranspose</span><span class="p">();</span>
      <span class="n">preGrad</span><span class="o">-&gt;</span><span class="n">mul</span><span class="p">(</span><span class="n">getOutputGrad</span><span class="p">(),</span> <span class="n">weights_T</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">);</span>
    <span class="p">}</span>

    <span class="p">{</span>
      <span class="n">weights_</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-&gt;</span><span class="n">getParameterPtr</span><span class="p">()</span><span class="o">-&gt;</span><span class="n">incUpdate</span><span class="p">(</span><span class="n">callback</span><span class="p">);</span>
    <span class="p">}</span>
  <span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
Y
Yu Yang 已提交
415
<p>The <code class="code docutils literal"><span class="pre">prefetch</span></code> function specifies the rows that need to be fetched from parameter server during training. It is only useful for remote sparse training. In remote sparse training, the full parameter matrix is stored distributedly at the parameter server. When the layer uses a batch for training, only a subset of locations of the input is non-zero in this batch. Thus, this layer only needs the rows of the transformation matrix corresponding to the locations of these non-zero entries. The <code class="code docutils literal"><span class="pre">prefetch</span></code> function specifies the ids of these rows.</p>
Y
Yu Yang 已提交
416
<p>Most of the layers do not need remote sparse training function. You do not need to override this function in this case.</p>
Y
Yu Yang 已提交
417
<div class="highlight-c++"><div class="highlight"><pre><span></span><span class="kt">void</span> <span class="n">FullyConnectedLayer</span><span class="o">::</span><span class="n">prefetch</span><span class="p">()</span> <span class="p">{</span>
Y
Yu Yang 已提交
418 419
  <span class="k">for</span> <span class="p">(</span><span class="kt">size_t</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="n">i</span> <span class="o">!=</span> <span class="n">inputLayers_</span><span class="p">.</span><span class="n">size</span><span class="p">();</span> <span class="o">++</span><span class="n">i</span><span class="p">)</span> <span class="p">{</span>
    <span class="k">auto</span><span class="o">*</span> <span class="n">sparseParam</span> <span class="o">=</span>
Y
Yu Yang 已提交
420
        <span class="k">dynamic_cast</span><span class="o">&lt;</span><span class="n">SparsePrefetchRowCpuMatrix</span><span class="o">*&gt;</span><span class="p">(</span><span class="n">weights_</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-&gt;</span><span class="n">getW</span><span class="p">().</span><span class="n">get</span><span class="p">());</span>
Y
Yu Yang 已提交
421 422 423 424 425 426 427 428
    <span class="k">if</span> <span class="p">(</span><span class="n">sparseParam</span><span class="p">)</span> <span class="p">{</span>
      <span class="n">MatrixPtr</span> <span class="n">input</span> <span class="o">=</span> <span class="n">getInputValue</span><span class="p">(</span><span class="n">i</span><span class="p">);</span>
      <span class="n">sparseParam</span><span class="o">-&gt;</span><span class="n">addRows</span><span class="p">(</span><span class="n">input</span><span class="p">);</span>
    <span class="p">}</span>
  <span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
Y
Yu Yang 已提交
429 430
<p>Finally, you can use <code class="code docutils literal"><span class="pre">REGISTER_LAYER(fc,</span> <span class="pre">FullyConnectedLayer);</span></code> to register the layer. <code class="code docutils literal"><span class="pre">fc</span></code> is the identifier of the layer, and <code class="code docutils literal"><span class="pre">FullyConnectedLayer</span></code> is the class name of the layer.</p>
<div class="highlight-c++"><div class="highlight"><pre><span></span><span class="k">namespace</span> <span class="n">paddle</span> <span class="p">{</span>
Y
Yu Yang 已提交
431 432 433 434
<span class="n">REGISTER_LAYER</span><span class="p">(</span><span class="n">fc</span><span class="p">,</span> <span class="n">FullyConnectedLayer</span><span class="p">);</span>
<span class="p">}</span>
</pre></div>
</div>
Y
Yu Yang 已提交
435 436 437
<p>If the <code class="code docutils literal"><span class="pre">cpp</span></code> file is put into <code class="code docutils literal"><span class="pre">paddle/gserver/layers</span></code>, it will be automatically added to the compilation list.</p>
</div>
<div class="section" id="write-gradient-check-unit-test">
438
<h2>Write Gradient Check Unit Test<a class="headerlink" href="#write-gradient-check-unit-test" title="Permalink to this headline"></a></h2>
Y
Yu Yang 已提交
439 440
<p>An easy way to verify the correctness of new layer&#8217;s implementation is to write a gradient check unit test. Gradient check unit test utilizes finite difference method to verify the gradient of a layer. It modifies the input with a small perturbation <span class="math">\(\Delta x\)</span> and observes the changes of output <span class="math">\(\Delta y\)</span>, the gradient can be computed as <span class="math">\(\frac{\Delta y}{\Delta x }\)</span>. This gradient can be compared with the gradient computed by the <code class="code docutils literal"><span class="pre">backward</span></code> function of the layer to ensure the correctness of the gradient computation. Notice that the gradient check only tests the correctness of the gradient computation, it does not necessarily guarantee the correctness of the implementation of the <code class="code docutils literal"><span class="pre">forward</span></code> and <code class="code docutils literal"><span class="pre">backward</span></code> function. You need to write more sophisticated unit tests to make sure your layer is implemented correctly.</p>
<p>All the gradient check unit tests are located in <code class="code docutils literal"><span class="pre">paddle/gserver/tests/test_LayerGrad.cpp</span></code>. You are recommended to put your test into a new test file if you are planning to write a new layer. The gradient test of the gradient check unit test of the fully connected layer is listed below. It has the following steps.</p>
441
<ul class="simple">
Y
Yu Yang 已提交
442 443
<li><dl class="first docutils">
<dt>Create layer configuration. A layer configuration can include the following attributes:</dt>
444
<dd><ul class="first last">
Y
Yu Yang 已提交
445 446 447 448 449 450 451 452 453 454 455 456 457 458
<li>size of the bias parameter. (4096 in our example)</li>
<li>type of the layer. (fc in our example)</li>
<li>size of the layer. (4096 in our example)</li>
<li>activation type. (softmax in our example)</li>
<li>dropout rate. (0.1 in our example)</li>
</ul>
</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>configure the input of the layer. In our example, we have only one input.</dt>
<dd><ul class="first last">
<li><dl class="first docutils">
<dt>type of the input (<code class="code docutils literal"><span class="pre">INPUT_DATA</span></code>) in our example. It can be one of the following types</dt>
459
<dd><ul class="first last">
Y
Yu Yang 已提交
460 461 462 463 464 465 466 467 468 469 470 471
<li><code class="code docutils literal"><span class="pre">INPUT_DATA</span></code>: dense vector.</li>
<li><code class="code docutils literal"><span class="pre">INPUT_LABEL</span></code>: integer.</li>
<li><code class="code docutils literal"><span class="pre">INPUT_DATA_TARGET</span></code>: dense vector, but it does not used to compute gradient.</li>
<li><code class="code docutils literal"><span class="pre">INPUT_SEQUENCE_DATA</span></code>: dense vector with sequence information.</li>
<li><code class="code docutils literal"><span class="pre">INPUT_HASSUB_SEQUENCE_DATA</span></code>: dense vector with both sequence and sub-sequence information.</li>
<li><code class="code docutils literal"><span class="pre">INPUT_SEQUENCE_LABEL</span></code>: integer with sequence information.</li>
<li><code class="code docutils literal"><span class="pre">INPUT_SPARSE_NON_VALUE_DATA</span></code>: 0-1 sparse data.</li>
<li><code class="code docutils literal"><span class="pre">INPUT_SPARSE_FLOAT_VALUE_DATA</span></code>: float sparse data.</li>
</ul>
</dd>
</dl>
</li>
472 473 474 475
<li>name of the input. (<code class="code docutils literal"><span class="pre">layer_0</span></code> in our example)</li>
<li>size of the input. (8192 in our example)</li>
<li>number of non-zeros, only useful for sparse inputs.</li>
<li>format of sparse data, only useful for sparse inputs.</li>
Y
Yu Yang 已提交
476 477 478 479
</ul>
</dd>
</dl>
</li>
480
<li>each inputs needs to call <code class="code docutils literal"><span class="pre">config.layerConfig.add_inputs();</span></code> once.</li>
Y
Yu Yang 已提交
481 482
<li><dl class="first docutils">
<dt>call <code class="code docutils literal"><span class="pre">testLayerGrad</span></code> to perform gradient checks. It has the following arguments.</dt>
483
<dd><ul class="first last">
Y
Yu Yang 已提交
484
<li>layer and input configurations. (<code class="code docutils literal"><span class="pre">config</span></code> in our example)</li>
485
<li>type of the layer. (<code class="code docutils literal"><span class="pre">fc</span></code> in our example)</li>
Y
Yu Yang 已提交
486 487 488 489 490 491 492 493 494 495 496 497 498 499
<li>batch size of the gradient check. (100 in our example)</li>
<li>whether the input is transpose. Most layers need to set it to <code class="code docutils literal"><span class="pre">false</span></code>. (<code class="code docutils literal"><span class="pre">false</span></code> in our example)</li>
<li>whether to use weights. Some layers or activations perform normalization so that the sum of their output is a constant. For example, the sum of output of a softmax activation is one. In this case, we cannot correctly compute the gradients using regular gradient check techniques. A weighted sum of the output, which is not a constant, is utilized to compute the gradients. (<code class="code docutils literal"><span class="pre">true</span></code> in our example, because the activation of a fully connected layer can be softmax)</li>
</ul>
</dd>
</dl>
</li>
</ul>
<div class="highlight-c++"><div class="highlight"><pre><span></span><span class="kt">void</span> <span class="nf">testFcLayer</span><span class="p">(</span><span class="n">string</span> <span class="n">format</span><span class="p">,</span> <span class="kt">size_t</span> <span class="n">nnz</span><span class="p">)</span> <span class="p">{</span>
  <span class="c1">// Create layer configuration.</span>
  <span class="n">TestConfig</span> <span class="n">config</span><span class="p">;</span>
  <span class="n">config</span><span class="p">.</span><span class="n">biasSize</span> <span class="o">=</span> <span class="mi">4096</span><span class="p">;</span>
  <span class="n">config</span><span class="p">.</span><span class="n">layerConfig</span><span class="p">.</span><span class="n">set_type</span><span class="p">(</span><span class="s">&quot;fc&quot;</span><span class="p">);</span>
  <span class="n">config</span><span class="p">.</span><span class="n">layerConfig</span><span class="p">.</span><span class="n">set_size</span><span class="p">(</span><span class="mi">4096</span><span class="p">);</span>
500
  <span class="n">config</span><span class="p">.</span><span class="n">layerConfig</span><span class="p">.</span><span class="n">set_active_type</span><span class="p">(</span><span class="s">&quot;softmax&quot;</span><span class="p">);</span>
Y
Yu Yang 已提交
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
  <span class="n">config</span><span class="p">.</span><span class="n">layerConfig</span><span class="p">.</span><span class="n">set_drop_rate</span><span class="p">(</span><span class="mf">0.1</span><span class="p">);</span>
  <span class="c1">// Setup inputs.</span>
  <span class="n">config</span><span class="p">.</span><span class="n">inputDefs</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span>
      <span class="p">{</span><span class="n">INPUT_DATA</span><span class="p">,</span> <span class="s">&quot;layer_0&quot;</span><span class="p">,</span> <span class="mi">8192</span><span class="p">,</span> <span class="n">nnz</span><span class="p">,</span> <span class="n">ParaSparse</span><span class="p">(</span><span class="n">format</span><span class="p">)});</span>
    <span class="n">config</span><span class="p">.</span><span class="n">layerConfig</span><span class="p">.</span><span class="n">add_inputs</span><span class="p">();</span>
  <span class="n">LOG</span><span class="p">(</span><span class="n">INFO</span><span class="p">)</span> <span class="o">&lt;&lt;</span> <span class="n">config</span><span class="p">.</span><span class="n">inputDefs</span><span class="p">[</span><span class="mi">0</span><span class="p">].</span><span class="n">sparse</span><span class="p">.</span><span class="n">sparse</span> <span class="o">&lt;&lt;</span> <span class="s">&quot; &quot;</span>
            <span class="o">&lt;&lt;</span> <span class="n">config</span><span class="p">.</span><span class="n">inputDefs</span><span class="p">[</span><span class="mi">0</span><span class="p">].</span><span class="n">sparse</span><span class="p">.</span><span class="n">format</span><span class="p">;</span>
  <span class="k">for</span> <span class="p">(</span><span class="k">auto</span> <span class="nl">useGpu</span> <span class="p">:</span> <span class="p">{</span><span class="nb">false</span><span class="p">,</span> <span class="nb">true</span><span class="p">})</span> <span class="p">{</span>
    <span class="n">testLayerGrad</span><span class="p">(</span><span class="n">config</span><span class="p">,</span> <span class="s">&quot;fc&quot;</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="cm">/* trans */</span> <span class="nb">false</span><span class="p">,</span> <span class="n">useGpu</span><span class="p">,</span>
                  <span class="cm">/* weight */</span> <span class="nb">true</span><span class="p">);</span>
  <span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<p>If you are creating a new file for the test, such as <code class="code docutils literal"><span class="pre">paddle/gserver/tests/testFCGrad.cpp</span></code>, you need to add the file to <code class="code docutils literal"><span class="pre">paddle/gserver/tests/CMakeLists.txt</span></code>. An example is given below. All the unit tests will run when you execute the command <code class="code docutils literal"><span class="pre">make</span> <span class="pre">tests</span></code>. Notice that some layers might need high accuracy for the gradient check unit tests to work well. You need to configure <code class="code docutils literal"><span class="pre">WITH_DOUBLE</span></code> to <cite>ON</cite> when configuring cmake.</p>
<div class="highlight-bash"><div class="highlight"><pre><span></span>add_unittest_without_exec<span class="o">(</span>test_FCGrad
    test_FCGrad.cpp
    LayerGradUtil.cpp
    TestUtil.cpp<span class="o">)</span>

add_test<span class="o">(</span>NAME test_FCGrad
    COMMAND test_FCGrad<span class="o">)</span>
</pre></div>
</div>
Y
Yu Yang 已提交
525 526
</div>
<div class="section" id="implement-python-wrapper">
527
<h2>Implement Python Wrapper<a class="headerlink" href="#implement-python-wrapper" title="Permalink to this headline"></a></h2>
Y
Yu Yang 已提交
528
<p>Implementing Python wrapper allows us to use the added layer in configuration files. All the Python wrappers are in file <code class="code docutils literal"><span class="pre">python/paddle/trainer/config_parser.py</span></code>. An example of the Python wrapper for fully connected layer is listed below. It has the following steps:</p>
529 530
<ul class="simple">
<li>Use <code class="code docutils literal"><span class="pre">&#64;config_layer('fc')</span></code> at the decorator for all the Python wrapper class. <code class="code docutils literal"><span class="pre">fc</span></code> is the identifier of the layer.</li>
Y
Yu Yang 已提交
531 532
<li><dl class="first docutils">
<dt>Implements <code class="code docutils literal"><span class="pre">__init__</span></code> constructor function.</dt>
533
<dd><ul class="first last">
Y
Yu Yang 已提交
534
<li>It first call <code class="code docutils literal"><span class="pre">super(FCLayer,</span> <span class="pre">self).__init__(name,</span> <span class="pre">'fc',</span> <span class="pre">size,</span> <span class="pre">inputs=inputs,</span> <span class="pre">**xargs)</span></code> base constructor function. <code class="code docutils literal"><span class="pre">FCLayer</span></code> is the Python wrapper class name, and <code class="code docutils literal"><span class="pre">fc</span></code> is the layer identifier name. They must be correct in order for the wrapper to work.</li>
Y
Yu Yang 已提交
535 536
<li>Then it computes the size and format (whether sparse) of each transformation matrix as well as the size.</li>
</ul>
Y
Yu Yang 已提交
537 538
</dd>
</dl>
Y
Yu Yang 已提交
539 540 541 542
</li>
</ul>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="nd">@config_layer</span><span class="p">(</span><span class="s1">&#39;fc&#39;</span><span class="p">)</span>
<span class="k">class</span> <span class="nc">FCLayer</span><span class="p">(</span><span class="n">LayerBase</span><span class="p">):</span>
543
    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
Y
Yu Yang 已提交
544 545 546 547 548 549
            <span class="bp">self</span><span class="p">,</span>
            <span class="n">name</span><span class="p">,</span>
            <span class="n">size</span><span class="p">,</span>
            <span class="n">inputs</span><span class="p">,</span>
            <span class="n">bias</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span>
            <span class="o">**</span><span class="n">xargs</span><span class="p">):</span>
550
        <span class="nb">super</span><span class="p">(</span><span class="n">FCLayer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="s1">&#39;fc&#39;</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">inputs</span><span class="o">=</span><span class="n">inputs</span><span class="p">,</span> <span class="o">**</span><span class="n">xargs</span><span class="p">)</span>
Y
Yu Yang 已提交
551 552 553 554 555 556 557 558 559 560 561 562 563 564
        <span class="k">for</span> <span class="n">input_index</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">inputs</span><span class="p">)):</span>
            <span class="n">input_layer</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_input_layer</span><span class="p">(</span><span class="n">input_index</span><span class="p">)</span>
            <span class="n">psize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">size</span> <span class="o">*</span> <span class="n">input_layer</span><span class="o">.</span><span class="n">size</span>
            <span class="n">dims</span> <span class="o">=</span> <span class="p">[</span><span class="n">input_layer</span><span class="o">.</span><span class="n">size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">size</span><span class="p">]</span>
            <span class="n">format</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">inputs</span><span class="p">[</span><span class="n">input_index</span><span class="p">]</span><span class="o">.</span><span class="n">format</span>
            <span class="n">sparse</span> <span class="o">=</span> <span class="n">format</span> <span class="o">==</span> <span class="s2">&quot;csr&quot;</span> <span class="ow">or</span> <span class="n">format</span> <span class="o">==</span> <span class="s2">&quot;csc&quot;</span>
            <span class="k">if</span> <span class="n">sparse</span><span class="p">:</span>
                <span class="n">psize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">inputs</span><span class="p">[</span><span class="n">input_index</span><span class="p">]</span><span class="o">.</span><span class="n">nnz</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">create_input_parameter</span><span class="p">(</span><span class="n">input_index</span><span class="p">,</span> <span class="n">psize</span><span class="p">,</span> <span class="n">dims</span><span class="p">,</span> <span class="n">sparse</span><span class="p">,</span> <span class="n">format</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">create_bias_parameter</span><span class="p">(</span><span class="n">bias</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">size</span><span class="p">)</span>
</pre></div>
</div>
<p>In network configuration, the layer can be specifies using the following code snippets. The arguments of this class are:</p>
<ul class="simple">
Y
Yu Yang 已提交
565 566 567 568 569
<li><code class="code docutils literal"><span class="pre">name</span></code> is the name identifier of the layer instance.</li>
<li><code class="code docutils literal"><span class="pre">type</span></code> is the type of the layer, specified using layer identifier.</li>
<li><code class="code docutils literal"><span class="pre">size</span></code> is the output size of the layer.</li>
<li><code class="code docutils literal"><span class="pre">bias</span></code> specifies whether this layer instance has bias.</li>
<li><code class="code docutils literal"><span class="pre">inputs</span></code> specifies a list of layer instance names as inputs.</li>
Y
Yu Yang 已提交
570 571 572 573 574 575 576 577 578 579
</ul>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">Layer</span><span class="p">(</span>
    <span class="n">name</span> <span class="o">=</span> <span class="s2">&quot;fc1&quot;</span><span class="p">,</span>
    <span class="nb">type</span> <span class="o">=</span> <span class="s2">&quot;fc&quot;</span><span class="p">,</span>
    <span class="n">size</span> <span class="o">=</span> <span class="mi">64</span><span class="p">,</span>
    <span class="n">bias</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span>
    <span class="n">inputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">Input</span><span class="p">(</span><span class="s2">&quot;pool3&quot;</span><span class="p">)]</span>
<span class="p">)</span>
</pre></div>
</div>
Y
Yu Yang 已提交
580
<p>You are also recommended to implement a helper for the Python wrapper, which makes it easier to write models. You can refer to <code class="code docutils literal"><span class="pre">python/paddle/trainer_config_helpers/layers.py</span></code> for examples.</p>
581
</div>
Y
Yu Yang 已提交
582 583 584
</div>


585
           </div>
Y
Yu Yang 已提交
586
          </div>
587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="contribute_to_paddle_en.html" class="btn btn-neutral float-right" title="Contribute Code" accesskey="n">Next <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="../usage/k8s/k8s_aws_en.html" class="btn btn-neutral" title="Distributed PaddlePaddle Training on AWS with Kubernetes" accesskey="p"><span class="fa fa-arrow-circle-left"></span> Previous</a>
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <p>
        &copy; Copyright 2016, PaddlePaddle developers.

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>
Y
Yu Yang 已提交
610 611 612

        </div>
      </div>
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648

    </section>

  </div>
  


  

    <script type="text/javascript">
        var DOCUMENTATION_OPTIONS = {
            URL_ROOT:'../../',
            VERSION:'',
            COLLAPSE_INDEX:false,
            FILE_SUFFIX:'.html',
            HAS_SOURCE:  true,
            SOURCELINK_SUFFIX: ".txt",
        };
    </script>
      <script type="text/javascript" src="../../_static/jquery.js"></script>
      <script type="text/javascript" src="../../_static/underscore.js"></script>
      <script type="text/javascript" src="../../_static/doctools.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
       
  

  
  
    <script type="text/javascript" src="../../_static/js/theme.js"></script>
  
  
  <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
  <script src="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/js/perfect-scrollbar.jquery.min.js"></script>
  <script src="../../_static/js/paddle_doc_init.js"></script> 

</body>
Y
Yu Yang 已提交
649
</html>