rnn_config_cn.html 45.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26


<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>RNN配置 &mdash; PaddlePaddle  文档</title>
  

  
  

  

  
  
    

  

  
  
27
    <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
28 29 30 31 32
  

  
  
        <link rel="index" title="索引"
33 34 35
              href="../../genindex.html"/>
        <link rel="search" title="搜索" href="../../search.html"/>
    <link rel="top" title="PaddlePaddle  文档" href="../../index.html"/>
36 37 38
        <link rel="up" title="RNN相关模型" href="index_cn.html"/>
        <link rel="next" title="Recurrent Group教程" href="recurrent_group_cn.html"/>
        <link rel="prev" title="RNN相关模型" href="index_cn.html"/> 
39 40

  <link rel="stylesheet" href="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/css/perfect-scrollbar.min.css" type="text/css" />
41
  <link rel="stylesheet" href="../../_static/css/override.css" type="text/css" />
42 43 44 45 46 47 48 49 50 51 52 53 54
  <script>
  var _hmt = _hmt || [];
  (function() {
    var hm = document.createElement("script");
    hm.src = "//hm.baidu.com/hm.js?b9a314ab40d04d805655aab1deee08ba";
    var s = document.getElementsByTagName("script")[0]; 
    s.parentNode.insertBefore(hm, s);
  })();
  </script>

  

  
55
  <script src="../../_static/js/modernizr.min.js"></script>
56 57 58 59 60 61 62 63

</head>

<body class="wy-body-for-nav" role="document">

  
  <header class="site-header">
    <div class="site-logo">
64
      <a href="/"><img src="../../_static/images/PP_w.png"></a>
65 66 67
    </div>
    <div class="site-nav-links">
      <div class="site-menu">
68
        <a class="fork-on-github" href="https://github.com/PaddlePaddle/Paddle" target="_blank"><i class="fa fa-github"></i>Fork me on Github</a>
69 70 71 72 73 74 75 76 77 78 79 80
        <div class="language-switcher dropdown">
          <a type="button" data-toggle="dropdown">
            <span>English</span>
            <i class="fa fa-angle-up"></i>
            <i class="fa fa-angle-down"></i>
          </a>
          <ul class="dropdown-menu">
            <li><a href="/doc_cn">中文</a></li>
            <li><a href="/doc">English</a></li>
          </ul>
        </div>
        <ul class="site-page-links">
81
          <li><a href="/">Home</a></li>
82 83 84 85
        </ul>
      </div>
      <div class="doc-module">
        
86
        <ul class="current">
87 88 89 90 91 92
<li class="toctree-l1"><a class="reference internal" href="../../getstarted/index_cn.html">新手入门</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../build_and_install/index_cn.html">安装与编译</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../index_cn.html">进阶使用</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dev/index_cn.html">开发标准</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/index_cn.html">API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../faq/index_cn.html">FAQ</a></li>
93 94 95 96
</ul>

        
<div role="search">
97
  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>        
      </div>
    </div>
  </header>
  
  <div class="main-content-wrap">

    
    <nav class="doc-menu-vertical" role="navigation">
        
          
113
          <ul class="current">
114 115 116
<li class="toctree-l1"><a class="reference internal" href="../../getstarted/index_cn.html">新手入门</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../getstarted/quickstart_cn.html">快速开始</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../getstarted/concepts/use_concepts_cn.html">基本使用概念</a></li>
117 118
</ul>
</li>
119 120 121 122 123
<li class="toctree-l1"><a class="reference internal" href="../../build_and_install/index_cn.html">安装与编译</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../build_and_install/pip_install_cn.html">使用pip安装</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../build_and_install/docker_install_cn.html">使用Docker安装运行</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../build_and_install/build_cn.html">用Docker编译和测试PaddlePaddle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../build_and_install/build_from_source_cn.html">从源码编译</a></li>
124 125
</ul>
</li>
126 127 128 129 130
<li class="toctree-l1 current"><a class="reference internal" href="../index_cn.html">进阶使用</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../cmd_parameter/index_cn.html">命令行参数设置</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../cmd_parameter/use_case_cn.html">使用案例</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cmd_parameter/arguments_cn.html">参数概述</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cmd_parameter/detail_introduction_cn.html">细节描述</a></li>
131 132
</ul>
</li>
133 134 135 136 137 138 139 140 141 142
<li class="toctree-l2"><a class="reference internal" href="../cluster/index_cn.html">分布式训练</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../cluster/introduction_cn.html">概述</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cluster/preparations_cn.html">环境准备</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cluster/cmd_argument_cn.html">启动参数说明</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cluster/multi_cluster/index_cn.html">在不同集群中运行</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../cluster/multi_cluster/fabric_cn.html">使用fabric启动集群训练</a></li>
<li class="toctree-l4"><a class="reference internal" href="../cluster/multi_cluster/openmpi_cn.html">在OpenMPI集群中提交训练作业</a></li>
<li class="toctree-l4"><a class="reference internal" href="../cluster/multi_cluster/k8s_cn.html">Kubernetes单机训练</a></li>
<li class="toctree-l4"><a class="reference internal" href="../cluster/multi_cluster/k8s_distributed_cn.html">Kubernetes分布式训练</a></li>
<li class="toctree-l4"><a class="reference internal" href="../cluster/multi_cluster/k8s_aws_cn.html">Distributed PaddlePaddle Training on AWS with Kubernetes</a></li>
143 144
</ul>
</li>
145 146
</ul>
</li>
147 148 149 150 151 152
<li class="toctree-l2"><a class="reference internal" href="../capi/index_cn.html">C-API预测库</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../capi/compile_paddle_lib_cn.html">安装与编译C-API预测库</a></li>
<li class="toctree-l3"><a class="reference internal" href="../capi/organization_of_the_inputs_cn.html">输入/输出数据组织</a></li>
<li class="toctree-l3"><a class="reference internal" href="../capi/workflow_of_capi_cn.html">C-API使用流程</a></li>
</ul>
</li>
153 154
<li class="toctree-l2 current"><a class="reference internal" href="index_cn.html">RNN相关模型</a><ul class="current">
<li class="toctree-l3 current"><a class="current reference internal" href="#">RNN配置</a></li>
155 156 157 158 159
<li class="toctree-l3"><a class="reference internal" href="recurrent_group_cn.html">Recurrent Group教程</a></li>
<li class="toctree-l3"><a class="reference internal" href="hierarchical_layer_cn.html">支持双层序列作为输入的Layer</a></li>
<li class="toctree-l3"><a class="reference internal" href="hrnn_rnn_api_compare_cn.html">单双层RNN API对比介绍</a></li>
</ul>
</li>
160 161 162 163 164 165
<li class="toctree-l2"><a class="reference internal" href="../optimization/gpu_profiling_cn.html">GPU性能调优</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../dev/index_cn.html">开发标准</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../dev/contribute_to_paddle_cn.html">如何贡献代码</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../dev/write_docs_cn.html">如何贡献文档</a></li>
166 167
</ul>
</li>
168 169 170 171 172 173 174 175 176
<li class="toctree-l1"><a class="reference internal" href="../../api/index_cn.html">API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/model_configs.html">模型配置</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/activation.html">Activation</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/layer.html">Layers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/evaluators.html">Evaluators</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/optimizer.html">Optimizer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/pooling.html">Pooling</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/networks.html">Networks</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/attr.html">Parameter Attribute</a></li>
177 178
</ul>
</li>
179 180 181 182
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/data.html">数据访问</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/data_reader.html">Data Reader Interface</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/image.html">Image Interface</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/dataset.html">Dataset</a></li>
183 184
</ul>
</li>
185 186 187 188 189 190 191 192 193 194 195 196 197
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/run_logic.html">训练与应用</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/fluid.html">Fluid</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/layers.html">layers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/data_feeder.html">data_feeder</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/executor.html">executor</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/initializer.html">initializer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/evaluator.html">evaluator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/nets.html">nets</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/optimizer.html">optimizer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/param_attr.html">param_attr</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/profiler.html">profiler</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/regularizer.html">regularizer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/io.html">io</a></li>
198 199
</ul>
</li>
200 201
</ul>
</li>
202 203 204 205 206 207
<li class="toctree-l1"><a class="reference internal" href="../../faq/index_cn.html">FAQ</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../faq/build_and_install/index_cn.html">编译安装与单元测试</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/model/index_cn.html">模型配置</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/parameter/index_cn.html">参数设置</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/local/index_cn.html">本地训练与预测</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/cluster/index_cn.html">集群训练与预测</a></li>
208 209
</ul>
</li>
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
</ul>

        
    </nav>
    
    <section class="doc-content-wrap">

      

 







<div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
      
230
        <li><a href="../index_cn.html">进阶使用</a> > </li>
231 232 233
      
        <li><a href="index_cn.html">RNN相关模型</a> > </li>
      
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
    <li>RNN配置</li>
  </ul>
</div>
      
      <div class="wy-nav-content" id="doc-content">
        <div class="rst-content">
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="rnn">
<h1>RNN配置<a class="headerlink" href="#rnn" title="永久链接至标题"></a></h1>
<p>本教程将指导你如何在 PaddlePaddle
中配置循环神经网络(RNN)。PaddlePaddle
高度支持灵活和高效的循环神经网络配置。 在本教程中,您将了解如何:</p>
<ul class="simple">
<li>配置循环神经网络架构。</li>
<li>使用学习完成的循环神经网络模型生成序列。</li>
</ul>
<p>我们将使用 vanilla 循环神经网络和 sequence to sequence
模型来指导你完成这些步骤。sequence to sequence
254 255
模型的代码可以在 <a class="reference external" href="https://github.com/PaddlePaddle/book/tree/develop/08.machine_translation">book/08.machine_translation</a> 找到。
wmt14数据的提供文件在 <a class="reference external" href="https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/dataset/wmt14.py">python/paddle/v2/dataset/wmt14.py</a></p>
256
<div class="section" id="id1">
257
<h2>配置循环神经网络架构<a class="headerlink" href="#id1" title="永久链接至标题"></a></h2>
258 259 260
<div class="section" id="gated-recurrent-neural-network">
<h3>简单门控循环神经网络(Gated Recurrent Neural Network)<a class="headerlink" href="#gated-recurrent-neural-network" title="永久链接至标题"></a></h3>
<p>循环神经网络在每个时间步骤顺序地处理序列。下面列出了 LSTM 的架构的示例。</p>
261
<img alt="../../_images/bi_lstm.jpg" class="align-center" src="../../_images/bi_lstm.jpg" />
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
<p>一般来说,循环网络从 <span class="math">\(t=1\)</span><span class="math">\(t=T\)</span> 或者反向地从 <span class="math">\(t=T\)</span><span class="math">\(t=1\)</span> 执行以下操作。</p>
<div class="math">
\[x_{t+1} = f_x(x_t), y_t = f_y(x_t)\]</div>
<p>其中 <span class="math">\(f_x(.)\)</span> 称为<strong>单步函数</strong>(即单时间步执行的函数,step
function),而 <span class="math">\(f_y(.)\)</span> 称为<strong>输出函数</strong>。在 vanilla
循环神经网络中,单步函数和输出函数都非常简单。然而,PaddlePaddle
可以通过修改这两个函数来实现复杂的网络配置。我们将使用 sequence to
sequence
模型演示如何配置复杂的循环神经网络模型。在本节中,我们将使用简单的
vanilla
循环神经网络作为使用<code class="docutils literal"><span class="pre">recurrent_group</span></code>配置简单循环神经网络的例子。
注意,如果你只需要使用简单的RNN,GRU或LSTM,那么推荐使用<code class="docutils literal"><span class="pre">grumemory</span></code><code class="docutils literal"><span class="pre">lstmemory</span></code>,因为它们的计算效率比<code class="docutils literal"><span class="pre">recurrent_group</span></code>更高。</p>
<p>对于 vanilla RNN,在每个时间步长,<strong>单步函数</strong>为:</p>
<div class="math">
\[x_{t+1} = W_x x_t + W_i I_t + b\]</div>
<p>其中 <span class="math">\(x_t\)</span> 是RNN状态,并且 <span class="math">\(I_t\)</span> 是输入,<span class="math">\(W_x\)</span>
<span class="math">\(W_i\)</span> 分别是RNN状态和输入的变换矩阵。<span class="math">\(b\)</span> 是偏差。它的<strong>输出函数</strong>只需要 <span class="math">\(x_t\)</span> 作为输出。</p>
<p><code class="docutils literal"><span class="pre">recurrent_group</span></code>是构建循环神经网络的最重要的工具。
它定义了<strong>单步函数</strong><strong>输出函数</strong>和循环神经网络的输入。注意,这个函数的<code class="docutils literal"><span class="pre">step</span></code>参数需要实现<code class="docutils literal"><span class="pre">step</span> <span class="pre">function</span></code>(单步函数)和<code class="docutils literal"><span class="pre">output</span> <span class="pre">function</span></code>(输出函数):</p>
<div class="code python highlight-default"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">simple_rnn</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span>
               <span class="n">size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
               <span class="n">name</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
               <span class="n">reverse</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
               <span class="n">rnn_bias_attr</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
               <span class="n">act</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
               <span class="n">rnn_layer_attr</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
    <span class="k">def</span> <span class="nf">__rnn_step__</span><span class="p">(</span><span class="n">ipt</span><span class="p">):</span>
289 290 291 292 293 294 295 296
       <span class="n">out_mem</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">memory</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">size</span><span class="p">)</span>
       <span class="n">rnn_out</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">mixed</span><span class="p">(</span><span class="nb">input</span> <span class="o">=</span> <span class="p">[</span><span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">ipt</span><span class="p">),</span>
                                             <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">out_mem</span><span class="p">)],</span>
                                    <span class="n">name</span> <span class="o">=</span> <span class="n">name</span><span class="p">,</span>
                                    <span class="n">bias_attr</span> <span class="o">=</span> <span class="n">rnn_bias_attr</span><span class="p">,</span>
                                    <span class="n">act</span> <span class="o">=</span> <span class="n">act</span><span class="p">,</span>
                                    <span class="n">layer_attr</span> <span class="o">=</span> <span class="n">rnn_layer_attr</span><span class="p">,</span>
                                    <span class="n">size</span> <span class="o">=</span> <span class="n">size</span><span class="p">)</span>
297
       <span class="k">return</span> <span class="n">rnn_out</span>
298 299 300 301
    <span class="k">return</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">recurrent_group</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;</span><span class="si">%s</span><span class="s1">_recurrent_group&#39;</span> <span class="o">%</span> <span class="n">name</span><span class="p">,</span>
                                        <span class="n">step</span><span class="o">=</span><span class="n">__rnn_step__</span><span class="p">,</span>
                                        <span class="n">reverse</span><span class="o">=</span><span class="n">reverse</span><span class="p">,</span>
                                        <span class="nb">input</span><span class="o">=</span><span class="nb">input</span><span class="p">)</span>
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
</pre></div>
</div>
<p>PaddlePaddle
使用“Memory”(记忆模块)实现单步函数。<strong>Memory</strong>是在PaddlePaddle中构造循环神经网络时最重要的概念。
Memory是在单步函数中循环使用的状态,例如 <span class="math">\(x_{t+1} = f_x(x_t)\)</span>
一个Memory包含<strong>输出</strong><strong>输入</strong>。当前时间步处的Memory的输出作为下一时间步Memory的输入。Memory也可以具有<strong>boot
layer(引导层)</strong>,其输出被用作Memory的初始值。
在我们的例子中,门控循环单元的输出被用作输出Memory。请注意,<code class="docutils literal"><span class="pre">rnn_out</span></code>层的名称与<code class="docutils literal"><span class="pre">out_mem</span></code>的名称相同。这意味着<code class="docutils literal"><span class="pre">rnn_out</span></code>
(<em>x</em><em>t</em> + 1)的输出被用作<code class="docutils literal"><span class="pre">out_mem</span></code>Memory的<strong>输出</strong></p>
<p>Memory也可以是序列。在这种情况下,在每个时间步中,我们有一个序列作为循环神经网络的状态。这在构造非常复杂的循环神经网络时是有用的。
其他高级功能包括定义多个Memory,以及使用子序列来定义分级循环神经网络架构。</p>
<p>我们在函数的结尾返回<code class="docutils literal"><span class="pre">rnn_out</span></code>。 这意味着 <code class="docutils literal"><span class="pre">rnn_out</span></code>
层的输出被用作门控循环神经网络的<strong>输出</strong>函数。</p>
</div>
<div class="section" id="sequence-to-sequence-model-with-attention">
<h3>Sequence to Sequence Model with Attention<a class="headerlink" href="#sequence-to-sequence-model-with-attention" title="永久链接至标题"></a></h3>
<p>我们将使用 sequence to sequence model with attention
作为例子演示如何配置复杂的循环神经网络模型。该模型的说明如下图所示。</p>
320
<img alt="../../_images/encoder-decoder-attention-model.png" class="align-center" src="../../_images/encoder-decoder-attention-model.png" />
321 322 323 324 325 326 327 328
<p>在这个模型中,源序列 <span class="math">\(S = \{s_1, \dots, s_T\}\)</span>
用双向门控循环神经网络编码。双向门控循环神经网络的隐藏状态
<span class="math">\(H_S = \{H_1, \dots, H_T\}\)</span> 被称为
<em>编码向量</em>。解码器是门控循环神经网络。当解读每一个 <span class="math">\(y_t\)</span> 时,
这个门控循环神经网络生成一系列权重  <span class="math">\(W_S^t = \{W_1^t, \dots, W_T^t\}\)</span> ,
用于计算编码向量的加权和。加权和用来生成 <span class="math">\(y_t\)</span></p>
<p>模型的编码器部分如下所示。它叫做<code class="docutils literal"><span class="pre">grumemory</span></code>来表示门控循环神经网络。如果网络架构简单,那么推荐使用循环神经网络的方法,因为它比
<code class="docutils literal"><span class="pre">recurrent_group</span></code>
329
更快。我们已经实现了大多数常用的循环神经网络架构,可以参考 <span class="xref std std-ref">api_trainer_config_helpers_layers</span> 了解更多细节。</p>
330 331 332 333
<p>我们还将编码向量投射到 <code class="docutils literal"><span class="pre">decoder_size</span></code>
维空间。这通过获得反向循环网络的第一个实例,并将其投射到
<code class="docutils literal"><span class="pre">decoder_size</span></code> 维空间完成:</p>
<div class="code python highlight-default"><div class="highlight"><pre><span></span><span class="c1"># 定义源语句的数据层</span>
334 335 336
<span class="n">src_word_id</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">data</span><span class="p">(</span>
    <span class="n">name</span><span class="o">=</span><span class="s1">&#39;source_language_word&#39;</span><span class="p">,</span>
    <span class="nb">type</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">data_type</span><span class="o">.</span><span class="n">integer_value_sequence</span><span class="p">(</span><span class="n">source_dict_dim</span><span class="p">))</span>
337
<span class="c1"># 计算每个词的词向量</span>
338
<span class="n">src_embedding</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span>
339 340
    <span class="nb">input</span><span class="o">=</span><span class="n">src_word_id</span><span class="p">,</span>
    <span class="n">size</span><span class="o">=</span><span class="n">word_vector_dim</span><span class="p">,</span>
341
    <span class="n">param_attr</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">attr</span><span class="o">.</span><span class="n">ParamAttr</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;_source_language_embedding&#39;</span><span class="p">))</span>
342
<span class="c1"># 应用前向循环神经网络</span>
343 344
<span class="n">src_forward</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">networks</span><span class="o">.</span><span class="n">simple_gru</span><span class="p">(</span>
    <span class="nb">input</span><span class="o">=</span><span class="n">src_embedding</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">encoder_size</span><span class="p">)</span>
345
<span class="c1"># 应用反向递归神经网络(reverse=True表示反向循环神经网络)</span>
346 347
<span class="n">src_backward</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">networks</span><span class="o">.</span><span class="n">simple_gru</span><span class="p">(</span>
    <span class="nb">input</span><span class="o">=</span><span class="n">src_embedding</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">encoder_size</span><span class="p">,</span> <span class="n">reverse</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
348
<span class="c1"># 将循环神经网络的前向和反向部分混合在一起</span>
349
<span class="n">encoded_vector</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">concat</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="p">[</span><span class="n">src_forward</span><span class="p">,</span> <span class="n">src_backward</span><span class="p">])</span>
350 351

<span class="c1"># 投射编码向量到 decoder_size</span>
352 353 354
<span class="n">encoded_proj</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">mixed</span><span class="p">(</span>
    <span class="n">size</span><span class="o">=</span><span class="n">decoder_size</span><span class="p">,</span>
    <span class="nb">input</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="n">encoded_vector</span><span class="p">))</span>
355 356

<span class="c1"># 计算反向RNN的第一个实例</span>
357
<span class="n">backward_first</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">first_seq</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">src_backward</span><span class="p">)</span>
358 359

<span class="c1"># 投射反向RNN的第一个实例到 decoder size</span>
360 361 362 363
<span class="n">decoder_boot</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">mixed</span><span class="p">(</span>
   <span class="n">size</span><span class="o">=</span><span class="n">decoder_size</span><span class="p">,</span>
   <span class="n">act</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">activation</span><span class="o">.</span><span class="n">Tanh</span><span class="p">(),</span>
   <span class="nb">input</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="n">backward_first</span><span class="p">))</span>
364 365 366 367
</pre></div>
</div>
<p>解码器使用 <code class="docutils literal"><span class="pre">recurrent_group</span></code> 来定义循环神经网络。单步函数和输出函数在
<code class="docutils literal"><span class="pre">gru_decoder_with_attention</span></code> 中定义:</p>
368 369 370 371 372 373 374 375 376 377
<div class="code python highlight-default"><div class="highlight"><pre><span></span><span class="n">group_input1</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">StaticInput</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">encoded_vector</span><span class="p">,</span> <span class="n">is_seq</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">group_input2</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">StaticInput</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">encoded_proj</span><span class="p">,</span> <span class="n">is_seq</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">group_inputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">group_input1</span><span class="p">,</span> <span class="n">group_input2</span><span class="p">]</span>
<span class="n">trg_embedding</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span>
        <span class="nb">input</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">data</span><span class="p">(</span>
            <span class="n">name</span><span class="o">=</span><span class="s1">&#39;target_language_word&#39;</span><span class="p">,</span>
            <span class="nb">type</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">data_type</span><span class="o">.</span><span class="n">integer_value_sequence</span><span class="p">(</span><span class="n">target_dict_dim</span><span class="p">)),</span>
        <span class="n">size</span><span class="o">=</span><span class="n">word_vector_dim</span><span class="p">,</span>
        <span class="n">param_attr</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">attr</span><span class="o">.</span><span class="n">ParamAttr</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;_target_language_embedding&#39;</span><span class="p">))</span>
    <span class="n">group_inputs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">trg_embedding</span><span class="p">)</span>
378 379 380 381 382 383 384 385
<span class="n">group_inputs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">trg_embedding</span><span class="p">)</span>

<span class="c1"># 对于配备有注意力机制的解码器,在训练中,</span>
<span class="c1"># 目标向量(groudtruth)是数据输入,</span>
<span class="c1"># 而源序列的编码向量可以被无边界的memory访问</span>
<span class="c1"># StaticInput 意味着不同时间步的输入都是相同的值,</span>
<span class="c1"># 否则它以一个序列输入,不同时间步的输入是不同的。</span>
<span class="c1"># 所有输入序列应该有相同的长度。</span>
386 387 388 389
<span class="n">decoder</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">recurrent_group</span><span class="p">(</span>
        <span class="n">name</span><span class="o">=</span><span class="n">decoder_group_name</span><span class="p">,</span>
        <span class="n">step</span><span class="o">=</span><span class="n">gru_decoder_with_attention</span><span class="p">,</span>
        <span class="nb">input</span><span class="o">=</span><span class="n">group_inputs</span><span class="p">)</span>
390 391 392 393 394 395 396 397
</pre></div>
</div>
<p>单步函数的实现如下所示。首先,它定义解码网络的<strong>Memory</strong>。然后定义
attention,门控循环单元单步函数和输出函数:</p>
<div class="code python highlight-default"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">gru_decoder_with_attention</span><span class="p">(</span><span class="n">enc_vec</span><span class="p">,</span> <span class="n">enc_proj</span><span class="p">,</span> <span class="n">current_word</span><span class="p">):</span>
    <span class="c1"># 定义解码器的Memory</span>
    <span class="c1"># Memory的输出定义在 gru_step 内</span>
    <span class="c1"># 注意 gru_step 应该与它的Memory名字相同</span>
398 399
    <span class="n">decoder_mem</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">memory</span><span class="p">(</span>
        <span class="n">name</span><span class="o">=</span><span class="s1">&#39;gru_decoder&#39;</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">decoder_size</span><span class="p">,</span> <span class="n">boot_layer</span><span class="o">=</span><span class="n">decoder_boot</span><span class="p">)</span>
400
    <span class="c1"># 计算 attention 加权编码向量</span>
401 402 403 404
    <span class="n">context</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">networks</span><span class="o">.</span><span class="n">simple_attention</span><span class="p">(</span>
        <span class="n">encoded_sequence</span><span class="o">=</span><span class="n">enc_vec</span><span class="p">,</span>
        <span class="n">encoded_proj</span><span class="o">=</span><span class="n">enc_proj</span><span class="p">,</span>
        <span class="n">decoder_state</span><span class="o">=</span><span class="n">decoder_mem</span><span class="p">)</span>
405
    <span class="c1"># 混合当前词向量和attention加权编码向量</span>
406 407 408 409 410 411
     <span class="n">decoder_inputs</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">mixed</span><span class="p">(</span>
        <span class="n">size</span><span class="o">=</span><span class="n">decoder_size</span> <span class="o">*</span> <span class="mi">3</span><span class="p">,</span>
        <span class="nb">input</span><span class="o">=</span><span class="p">[</span>
            <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">context</span><span class="p">),</span>
            <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">current_word</span><span class="p">)</span>
        <span class="p">])</span>
412
    <span class="c1"># 定义门控循环单元循环神经网络单步函数</span>
413 414 415 416 417
     <span class="n">gru_step</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">gru_step</span><span class="p">(</span>
        <span class="n">name</span><span class="o">=</span><span class="s1">&#39;gru_decoder&#39;</span><span class="p">,</span>
        <span class="nb">input</span><span class="o">=</span><span class="n">decoder_inputs</span><span class="p">,</span>
        <span class="n">output_mem</span><span class="o">=</span><span class="n">decoder_mem</span><span class="p">,</span>
        <span class="n">size</span><span class="o">=</span><span class="n">decoder_size</span><span class="p">)</span>
418
    <span class="c1"># 定义输出函数</span>
419 420 421 422 423
     <span class="n">out</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">mixed</span><span class="p">(</span>
        <span class="n">size</span><span class="o">=</span><span class="n">target_dict_dim</span><span class="p">,</span>
        <span class="n">bias_attr</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
        <span class="n">act</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">activation</span><span class="o">.</span><span class="n">Softmax</span><span class="p">(),</span>
        <span class="nb">input</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">gru_step</span><span class="p">))</span>
424 425 426 427 428
    <span class="k">return</span> <span class="n">out</span>
</pre></div>
</div>
</div>
</div>
429 430
<div class="section" id="id2">
<h2>生成序列<a class="headerlink" href="#id2" title="永久链接至标题"></a></h2>
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446
<p>训练模型后,我们可以使用它来生成序列。通常的做法是使用<strong>beam search</strong>
生成序列。以下代码片段定义 beam search 算法。注意,<code class="docutils literal"><span class="pre">beam_search</span></code>
函数假设 <code class="docutils literal"><span class="pre">step</span></code> 的输出函数返回的是下一个时刻输出词的 softmax
归一化概率向量。我们对模型进行了以下更改。</p>
<ul class="simple">
<li>使用 <code class="docutils literal"><span class="pre">GeneratedInput</span></code> 来表示 trg_embedding。 <code class="docutils literal"><span class="pre">GeneratedInput</span></code>
将上一时间步所生成的词的向量来作为当前时间步的输入。</li>
<li>使用 <code class="docutils literal"><span class="pre">beam_search</span></code> 函数。这个函数需要设置:<ul>
<li><code class="docutils literal"><span class="pre">bos_id</span></code>: 开始标记。每个句子都以开始标记开头。</li>
<li><code class="docutils literal"><span class="pre">eos_id</span></code>: 结束标记。每个句子都以结束标记结尾。</li>
<li><code class="docutils literal"><span class="pre">beam_size</span></code>: beam search 算法中的beam大小。</li>
<li><code class="docutils literal"><span class="pre">max_length</span></code>: 生成序列的最大长度。</li>
</ul>
</li>
</ul>
<p>代码如下:</p>
447 448 449
<div class="code python highlight-default"><div class="highlight"><pre><span></span><span class="n">group_input1</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">StaticInput</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">encoded_vector</span><span class="p">,</span> <span class="n">is_seq</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">group_input2</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">StaticInput</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">encoded_proj</span><span class="p">,</span> <span class="n">is_seq</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">group_inputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">group_input1</span><span class="p">,</span> <span class="n">group_input2</span><span class="p">]</span>
450 451 452
<span class="c1"># 在生成时,解码器基于编码源序列和最后生成的目标词预测下一目标词。</span>
<span class="c1"># 编码源序列(编码器输出)必须由只读Memory的 StaticInput 指定。</span>
<span class="c1"># 这里, GeneratedInputs 自动获取上一个生成的词,并在最开始初始化为起始词,如 &lt;s&gt;</span>
453 454 455 456
<span class="n">trg_embedding</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">GeneratedInput</span><span class="p">(</span>
        <span class="n">size</span><span class="o">=</span><span class="n">target_dict_dim</span><span class="p">,</span>
        <span class="n">embedding_name</span><span class="o">=</span><span class="s1">&#39;_target_language_embedding&#39;</span><span class="p">,</span>
        <span class="n">embedding_size</span><span class="o">=</span><span class="n">word_vector_dim</span><span class="p">)</span>
457
<span class="n">group_inputs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">trg_embedding</span><span class="p">)</span>
458 459 460 461 462 463 464 465 466 467
<span class="n">beam_gen</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">beam_search</span><span class="p">(</span>
        <span class="n">name</span><span class="o">=</span><span class="n">decoder_group_name</span><span class="p">,</span>
        <span class="n">step</span><span class="o">=</span><span class="n">gru_decoder_with_attention</span><span class="p">,</span>
        <span class="nb">input</span><span class="o">=</span><span class="n">group_inputs</span><span class="p">,</span>
        <span class="n">bos_id</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="c1"># Beginnning token.</span>
        <span class="n">eos_id</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="c1"># End of sentence token.</span>
        <span class="n">beam_size</span><span class="o">=</span><span class="n">beam_size</span><span class="p">,</span>
        <span class="n">max_length</span><span class="o">=</span><span class="n">max_length</span><span class="p">)</span>

<span class="k">return</span> <span class="n">beam_gen</span>
468 469
</pre></div>
</div>
470 471
<p>注意,这种生成技术只用于类似解码器的生成过程。如果你正在处理序列标记任务,请参阅 <a class="reference external" href="https://github.com/PaddlePaddle/book/tree/develop/06.understand_sentiment">book/06.understand_sentiment</a> 了解更多详细信息。</p>
<p>完整的配置文件在 <a class="reference external" href="https://github.com/PaddlePaddle/book/blob/develop/08.machine_translation/train.py">book/08.machine_translation/train.py</a></p>
472 473 474 475 476 477 478 479
</div>
</div>


           </div>
          </div>
          <footer>
  
480 481 482 483 484 485 486 487 488
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="recurrent_group_cn.html" class="btn btn-neutral float-right" title="Recurrent Group教程" accesskey="n">Next <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="index_cn.html" class="btn btn-neutral" title="RNN相关模型" accesskey="p"><span class="fa fa-arrow-circle-left"></span> Previous</a>
      
    </div>
  
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514

  <hr/>

  <div role="contentinfo">
    <p>
        &copy; Copyright 2016, PaddlePaddle developers.

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  


  

    <script type="text/javascript">
        var DOCUMENTATION_OPTIONS = {
515
            URL_ROOT:'../../',
516 517 518
            VERSION:'',
            COLLAPSE_INDEX:false,
            FILE_SUFFIX:'.html',
519 520
            HAS_SOURCE:  true,
            SOURCELINK_SUFFIX: ".txt",
521 522
        };
    </script>
523 524 525 526
      <script type="text/javascript" src="../../_static/jquery.js"></script>
      <script type="text/javascript" src="../../_static/underscore.js"></script>
      <script type="text/javascript" src="../../_static/doctools.js"></script>
      <script type="text/javascript" src="../../_static/translations.js"></script>
527
      <script type="text/javascript" src="https://cdn.bootcss.com/mathjax/2.7.0/MathJax.js"></script>
528 529 530 531 532
       
  

  
  
533
    <script type="text/javascript" src="../../_static/js/theme.js"></script>
534 535 536 537
  
  
  <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
  <script src="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/js/perfect-scrollbar.jquery.min.js"></script>
538
  <script src="../../_static/js/paddle_doc_init.js"></script> 
539 540

</body>
541
</html>