rnn_config_cn.html 41.5 KB
Newer Older
1 2


3 4


5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>RNN配置 &mdash; PaddlePaddle  文档</title>
  

  
  

  

  
  
    

  

  
  
29
    <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
30 31 32
  

  
33

34 35
  
        <link rel="index" title="索引"
36 37 38
              href="../../genindex.html"/>
        <link rel="search" title="搜索" href="../../search.html"/>
    <link rel="top" title="PaddlePaddle  文档" href="../../index.html"/>
39
        <link rel="up" title="RNN模型" href="index_cn.html"/>
40
        <link rel="next" title="Recurrent Group教程" href="recurrent_group_cn.html"/>
41
        <link rel="prev" title="RNN模型" href="index_cn.html"/> 
42 43 44 45 46 47 48 49 50 51
<script>
var _hmt = _hmt || [];
(function() {
  var hm = document.createElement("script");
  hm.src = "//hm.baidu.com/hm.js?b9a314ab40d04d805655aab1deee08ba";
  var s = document.getElementsByTagName("script")[0]; 
  s.parentNode.insertBefore(hm, s);
})();
</script>

52 53

  
54
  <script src="../../_static/js/modernizr.min.js"></script>
55 56 57 58 59

</head>

<body class="wy-body-for-nav" role="document">

60 61 62 63 64 65 66 67 68 69 70 71 72
  <div class="wy-grid-for-nav">

    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search">
          

          
            <a href="../../index_cn.html" class="icon icon-home"> PaddlePaddle
          

          
73 74
          </a>

75 76 77 78 79 80
          
            
            
          

          
81
<div role="search">
82
  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
83 84 85 86
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
87
</div>
88 89

          
90 91 92 93
        </div>

        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
94 95 96 97 98 99 100 101 102 103 104 105 106 107
<nav class="doc-menu-vertical" role="navigation">

<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../getstarted/index_cn.html">新手入门</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../getstarted/quickstart_cn.html">快速开始</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../getstarted/concepts/use_concepts_cn.html">基本使用概念</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../build_and_install/index_cn.html">安装与编译</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../build_and_install/pip_install_cn.html">使用pip安装</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../build_and_install/docker_install_cn.html">使用Docker安装运行</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../build_and_install/build_from_source_cn.html">从源码编译</a></li>
</ul>
</li>
108
<li class="toctree-l1 current"><a class="reference internal" href="../index_cn.html">进阶使用</a><ul class="current">
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
<li class="toctree-l2"><a class="reference internal" href="../cmd_parameter/index_cn.html">命令行参数设置</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../cmd_parameter/use_case_cn.html">使用案例</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cmd_parameter/arguments_cn.html">参数概述</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cmd_parameter/detail_introduction_cn.html">细节描述</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../cluster/index_cn.html">分布式训练</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../cluster/preparations_cn.html">环境准备</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cluster/cmd_argument_cn.html">启动参数说明</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cluster/multi_cluster/index_cn.html">在不同集群中运行</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../cluster/multi_cluster/k8s_cn.html">Kubernetes单机训练</a></li>
<li class="toctree-l4"><a class="reference internal" href="../cluster/multi_cluster/k8s_distributed_cn.html">Kubernetes分布式训练</a></li>
<li class="toctree-l4"><a class="reference internal" href="../cluster/multi_cluster/openmpi_cn.html">在OpenMPI集群中启动训练</a></li>
<li class="toctree-l4"><a class="reference internal" href="../cluster/multi_cluster/fabric_cn.html">使用fabric启动集群训练</a></li>
<li class="toctree-l4"><a class="reference internal" href="../cluster/multi_cluster/k8s_aws_cn.html">Kubernetes on AWS</a></li>
124 125
</ul>
</li>
126 127 128 129 130 131 132 133 134 135
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../capi/index_cn.html">C-API预测库</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../capi/compile_paddle_lib_cn.html">安装与编译C-API预测库</a></li>
<li class="toctree-l3"><a class="reference internal" href="../capi/organization_of_the_inputs_cn.html">输入/输出数据组织</a></li>
<li class="toctree-l3"><a class="reference internal" href="../capi/workflow_of_capi_cn.html">C-API使用流程</a></li>
</ul>
</li>
<li class="toctree-l2 current"><a class="reference internal" href="index_cn.html">RNN模型</a><ul class="current">
<li class="toctree-l3 current"><a class="current reference internal" href="#">RNN配置</a></li>
136 137 138 139 140
<li class="toctree-l3"><a class="reference internal" href="recurrent_group_cn.html">Recurrent Group教程</a></li>
<li class="toctree-l3"><a class="reference internal" href="hierarchical_layer_cn.html">支持双层序列作为输入的Layer</a></li>
<li class="toctree-l3"><a class="reference internal" href="hrnn_rnn_api_compare_cn.html">单双层RNN API对比介绍</a></li>
</ul>
</li>
141 142 143
<li class="toctree-l2"><a class="reference internal" href="../optimization/gpu_profiling_cn.html">GPU性能调优</a></li>
</ul>
</li>
144 145 146
<li class="toctree-l1"><a class="reference internal" href="../../dev/index_cn.html">开发标准</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../dev/contribute_to_paddle_cn.html">如何贡献代码</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../dev/write_docs_cn.html">如何贡献文档</a></li>
147
<li class="toctree-l2"><a class="reference internal" href="../../dev/new_layer_cn.html">如何实现新的网络层</a></li>
148 149 150 151 152 153 154 155 156 157
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../faq/index_cn.html">FAQ</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../faq/build_and_install/index_cn.html">编译安装与单元测试</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/model/index_cn.html">模型配置</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/parameter/index_cn.html">参数设置</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/local/index_cn.html">本地训练与预测</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/cluster/index_cn.html">集群训练与预测</a></li>
</ul>
</li>
158 159
</ul>

160 161
</nav>

162 163
        </div>
      </div>
164 165
    </nav>

166
    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
167

168 169 170 171 172
      
      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
        <a href="../../index_cn.html">PaddlePaddle</a>
      </nav>
173 174


175 176 177 178
      
      <div class="wy-nav-content">
        <div class="rst-content">
          
179

180
 
181 182 183 184 185



<div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
186
    <li><a href="../../index_cn.html">Docs</a> &raquo;</li>
187
      
188
          <li><a href="../index_cn.html">进阶使用</a> &raquo;</li>
189
      
190
          <li><a href="index_cn.html">RNN模型</a> &raquo;</li>
191
      
192
    <li>RNN配置</li>
193 194 195 196 197 198 199
      <li class="wy-breadcrumbs-aside">
        
          
            <a href="../../_sources/howto/rnn/rnn_config_cn.rst.txt" rel="nofollow"> View page source</a>
          
        
      </li>
200
  </ul>
201
  <hr/>
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="rnn">
<h1>RNN配置<a class="headerlink" href="#rnn" title="永久链接至标题"></a></h1>
<p>本教程将指导你如何在 PaddlePaddle
中配置循环神经网络(RNN)。PaddlePaddle
高度支持灵活和高效的循环神经网络配置。 在本教程中,您将了解如何:</p>
<ul class="simple">
<li>配置循环神经网络架构。</li>
<li>使用学习完成的循环神经网络模型生成序列。</li>
</ul>
<p>我们将使用 vanilla 循环神经网络和 sequence to sequence
模型来指导你完成这些步骤。sequence to sequence
217 218
模型的代码可以在 <a class="reference external" href="https://github.com/PaddlePaddle/book/tree/develop/08.machine_translation">book/08.machine_translation</a> 找到。
wmt14数据的提供文件在 <a class="reference external" href="https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/dataset/wmt14.py">python/paddle/v2/dataset/wmt14.py</a></p>
219
<div class="section" id="id1">
220
<h2>配置循环神经网络架构<a class="headerlink" href="#id1" title="永久链接至标题"></a></h2>
221 222 223
<div class="section" id="gated-recurrent-neural-network">
<h3>简单门控循环神经网络(Gated Recurrent Neural Network)<a class="headerlink" href="#gated-recurrent-neural-network" title="永久链接至标题"></a></h3>
<p>循环神经网络在每个时间步骤顺序地处理序列。下面列出了 LSTM 的架构的示例。</p>
224
<img alt="../../_images/bi_lstm.jpg" class="align-center" src="../../_images/bi_lstm.jpg" />
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
<p>一般来说,循环网络从 <span class="math">\(t=1\)</span><span class="math">\(t=T\)</span> 或者反向地从 <span class="math">\(t=T\)</span><span class="math">\(t=1\)</span> 执行以下操作。</p>
<div class="math">
\[x_{t+1} = f_x(x_t), y_t = f_y(x_t)\]</div>
<p>其中 <span class="math">\(f_x(.)\)</span> 称为<strong>单步函数</strong>(即单时间步执行的函数,step
function),而 <span class="math">\(f_y(.)\)</span> 称为<strong>输出函数</strong>。在 vanilla
循环神经网络中,单步函数和输出函数都非常简单。然而,PaddlePaddle
可以通过修改这两个函数来实现复杂的网络配置。我们将使用 sequence to
sequence
模型演示如何配置复杂的循环神经网络模型。在本节中,我们将使用简单的
vanilla
循环神经网络作为使用<code class="docutils literal"><span class="pre">recurrent_group</span></code>配置简单循环神经网络的例子。
注意,如果你只需要使用简单的RNN,GRU或LSTM,那么推荐使用<code class="docutils literal"><span class="pre">grumemory</span></code><code class="docutils literal"><span class="pre">lstmemory</span></code>,因为它们的计算效率比<code class="docutils literal"><span class="pre">recurrent_group</span></code>更高。</p>
<p>对于 vanilla RNN,在每个时间步长,<strong>单步函数</strong>为:</p>
<div class="math">
\[x_{t+1} = W_x x_t + W_i I_t + b\]</div>
<p>其中 <span class="math">\(x_t\)</span> 是RNN状态,并且 <span class="math">\(I_t\)</span> 是输入,<span class="math">\(W_x\)</span>
<span class="math">\(W_i\)</span> 分别是RNN状态和输入的变换矩阵。<span class="math">\(b\)</span> 是偏差。它的<strong>输出函数</strong>只需要 <span class="math">\(x_t\)</span> 作为输出。</p>
<p><code class="docutils literal"><span class="pre">recurrent_group</span></code>是构建循环神经网络的最重要的工具。
它定义了<strong>单步函数</strong><strong>输出函数</strong>和循环神经网络的输入。注意,这个函数的<code class="docutils literal"><span class="pre">step</span></code>参数需要实现<code class="docutils literal"><span class="pre">step</span> <span class="pre">function</span></code>(单步函数)和<code class="docutils literal"><span class="pre">output</span> <span class="pre">function</span></code>(输出函数):</p>
<div class="code python highlight-default"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">simple_rnn</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span>
               <span class="n">size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
               <span class="n">name</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
               <span class="n">reverse</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
               <span class="n">rnn_bias_attr</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
               <span class="n">act</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
               <span class="n">rnn_layer_attr</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
    <span class="k">def</span> <span class="nf">__rnn_step__</span><span class="p">(</span><span class="n">ipt</span><span class="p">):</span>
252 253 254 255 256 257 258 259
       <span class="n">out_mem</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">memory</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">size</span><span class="p">)</span>
       <span class="n">rnn_out</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">mixed</span><span class="p">(</span><span class="nb">input</span> <span class="o">=</span> <span class="p">[</span><span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">ipt</span><span class="p">),</span>
                                             <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">out_mem</span><span class="p">)],</span>
                                    <span class="n">name</span> <span class="o">=</span> <span class="n">name</span><span class="p">,</span>
                                    <span class="n">bias_attr</span> <span class="o">=</span> <span class="n">rnn_bias_attr</span><span class="p">,</span>
                                    <span class="n">act</span> <span class="o">=</span> <span class="n">act</span><span class="p">,</span>
                                    <span class="n">layer_attr</span> <span class="o">=</span> <span class="n">rnn_layer_attr</span><span class="p">,</span>
                                    <span class="n">size</span> <span class="o">=</span> <span class="n">size</span><span class="p">)</span>
260
       <span class="k">return</span> <span class="n">rnn_out</span>
261 262 263 264
    <span class="k">return</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">recurrent_group</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;</span><span class="si">%s</span><span class="s1">_recurrent_group&#39;</span> <span class="o">%</span> <span class="n">name</span><span class="p">,</span>
                                        <span class="n">step</span><span class="o">=</span><span class="n">__rnn_step__</span><span class="p">,</span>
                                        <span class="n">reverse</span><span class="o">=</span><span class="n">reverse</span><span class="p">,</span>
                                        <span class="nb">input</span><span class="o">=</span><span class="nb">input</span><span class="p">)</span>
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
</pre></div>
</div>
<p>PaddlePaddle
使用“Memory”(记忆模块)实现单步函数。<strong>Memory</strong>是在PaddlePaddle中构造循环神经网络时最重要的概念。
Memory是在单步函数中循环使用的状态,例如 <span class="math">\(x_{t+1} = f_x(x_t)\)</span>
一个Memory包含<strong>输出</strong><strong>输入</strong>。当前时间步处的Memory的输出作为下一时间步Memory的输入。Memory也可以具有<strong>boot
layer(引导层)</strong>,其输出被用作Memory的初始值。
在我们的例子中,门控循环单元的输出被用作输出Memory。请注意,<code class="docutils literal"><span class="pre">rnn_out</span></code>层的名称与<code class="docutils literal"><span class="pre">out_mem</span></code>的名称相同。这意味着<code class="docutils literal"><span class="pre">rnn_out</span></code>
(<em>x</em><em>t</em> + 1)的输出被用作<code class="docutils literal"><span class="pre">out_mem</span></code>Memory的<strong>输出</strong></p>
<p>Memory也可以是序列。在这种情况下,在每个时间步中,我们有一个序列作为循环神经网络的状态。这在构造非常复杂的循环神经网络时是有用的。
其他高级功能包括定义多个Memory,以及使用子序列来定义分级循环神经网络架构。</p>
<p>我们在函数的结尾返回<code class="docutils literal"><span class="pre">rnn_out</span></code>。 这意味着 <code class="docutils literal"><span class="pre">rnn_out</span></code>
层的输出被用作门控循环神经网络的<strong>输出</strong>函数。</p>
</div>
<div class="section" id="sequence-to-sequence-model-with-attention">
<h3>Sequence to Sequence Model with Attention<a class="headerlink" href="#sequence-to-sequence-model-with-attention" title="永久链接至标题"></a></h3>
<p>我们将使用 sequence to sequence model with attention
作为例子演示如何配置复杂的循环神经网络模型。该模型的说明如下图所示。</p>
283
<img alt="../../_images/encoder-decoder-attention-model.png" class="align-center" src="../../_images/encoder-decoder-attention-model.png" />
284 285 286 287 288 289 290 291
<p>在这个模型中,源序列 <span class="math">\(S = \{s_1, \dots, s_T\}\)</span>
用双向门控循环神经网络编码。双向门控循环神经网络的隐藏状态
<span class="math">\(H_S = \{H_1, \dots, H_T\}\)</span> 被称为
<em>编码向量</em>。解码器是门控循环神经网络。当解读每一个 <span class="math">\(y_t\)</span> 时,
这个门控循环神经网络生成一系列权重  <span class="math">\(W_S^t = \{W_1^t, \dots, W_T^t\}\)</span> ,
用于计算编码向量的加权和。加权和用来生成 <span class="math">\(y_t\)</span></p>
<p>模型的编码器部分如下所示。它叫做<code class="docutils literal"><span class="pre">grumemory</span></code>来表示门控循环神经网络。如果网络架构简单,那么推荐使用循环神经网络的方法,因为它比
<code class="docutils literal"><span class="pre">recurrent_group</span></code>
292
更快。我们已经实现了大多数常用的循环神经网络架构,可以参考 <span class="xref std std-ref">api_trainer_config_helpers_layers</span> 了解更多细节。</p>
293 294 295 296
<p>我们还将编码向量投射到 <code class="docutils literal"><span class="pre">decoder_size</span></code>
维空间。这通过获得反向循环网络的第一个实例,并将其投射到
<code class="docutils literal"><span class="pre">decoder_size</span></code> 维空间完成:</p>
<div class="code python highlight-default"><div class="highlight"><pre><span></span><span class="c1"># 定义源语句的数据层</span>
297 298 299
<span class="n">src_word_id</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">data</span><span class="p">(</span>
    <span class="n">name</span><span class="o">=</span><span class="s1">&#39;source_language_word&#39;</span><span class="p">,</span>
    <span class="nb">type</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">data_type</span><span class="o">.</span><span class="n">integer_value_sequence</span><span class="p">(</span><span class="n">source_dict_dim</span><span class="p">))</span>
300
<span class="c1"># 计算每个词的词向量</span>
301
<span class="n">src_embedding</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span>
302 303
    <span class="nb">input</span><span class="o">=</span><span class="n">src_word_id</span><span class="p">,</span>
    <span class="n">size</span><span class="o">=</span><span class="n">word_vector_dim</span><span class="p">,</span>
304
    <span class="n">param_attr</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">attr</span><span class="o">.</span><span class="n">ParamAttr</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;_source_language_embedding&#39;</span><span class="p">))</span>
305
<span class="c1"># 应用前向循环神经网络</span>
306 307
<span class="n">src_forward</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">networks</span><span class="o">.</span><span class="n">simple_gru</span><span class="p">(</span>
    <span class="nb">input</span><span class="o">=</span><span class="n">src_embedding</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">encoder_size</span><span class="p">)</span>
308
<span class="c1"># 应用反向递归神经网络(reverse=True表示反向循环神经网络)</span>
309 310
<span class="n">src_backward</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">networks</span><span class="o">.</span><span class="n">simple_gru</span><span class="p">(</span>
    <span class="nb">input</span><span class="o">=</span><span class="n">src_embedding</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">encoder_size</span><span class="p">,</span> <span class="n">reverse</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
311
<span class="c1"># 将循环神经网络的前向和反向部分混合在一起</span>
312
<span class="n">encoded_vector</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">concat</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="p">[</span><span class="n">src_forward</span><span class="p">,</span> <span class="n">src_backward</span><span class="p">])</span>
313 314

<span class="c1"># 投射编码向量到 decoder_size</span>
315 316 317
<span class="n">encoded_proj</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">mixed</span><span class="p">(</span>
    <span class="n">size</span><span class="o">=</span><span class="n">decoder_size</span><span class="p">,</span>
    <span class="nb">input</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="n">encoded_vector</span><span class="p">))</span>
318 319

<span class="c1"># 计算反向RNN的第一个实例</span>
320
<span class="n">backward_first</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">first_seq</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">src_backward</span><span class="p">)</span>
321 322

<span class="c1"># 投射反向RNN的第一个实例到 decoder size</span>
323 324 325 326
<span class="n">decoder_boot</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">mixed</span><span class="p">(</span>
   <span class="n">size</span><span class="o">=</span><span class="n">decoder_size</span><span class="p">,</span>
   <span class="n">act</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">activation</span><span class="o">.</span><span class="n">Tanh</span><span class="p">(),</span>
   <span class="nb">input</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="n">backward_first</span><span class="p">))</span>
327 328 329 330
</pre></div>
</div>
<p>解码器使用 <code class="docutils literal"><span class="pre">recurrent_group</span></code> 来定义循环神经网络。单步函数和输出函数在
<code class="docutils literal"><span class="pre">gru_decoder_with_attention</span></code> 中定义:</p>
331 332 333 334 335 336 337 338 339 340
<div class="code python highlight-default"><div class="highlight"><pre><span></span><span class="n">group_input1</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">StaticInput</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">encoded_vector</span><span class="p">,</span> <span class="n">is_seq</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">group_input2</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">StaticInput</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">encoded_proj</span><span class="p">,</span> <span class="n">is_seq</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">group_inputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">group_input1</span><span class="p">,</span> <span class="n">group_input2</span><span class="p">]</span>
<span class="n">trg_embedding</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span>
        <span class="nb">input</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">data</span><span class="p">(</span>
            <span class="n">name</span><span class="o">=</span><span class="s1">&#39;target_language_word&#39;</span><span class="p">,</span>
            <span class="nb">type</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">data_type</span><span class="o">.</span><span class="n">integer_value_sequence</span><span class="p">(</span><span class="n">target_dict_dim</span><span class="p">)),</span>
        <span class="n">size</span><span class="o">=</span><span class="n">word_vector_dim</span><span class="p">,</span>
        <span class="n">param_attr</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">attr</span><span class="o">.</span><span class="n">ParamAttr</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;_target_language_embedding&#39;</span><span class="p">))</span>
    <span class="n">group_inputs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">trg_embedding</span><span class="p">)</span>
341 342 343 344 345 346 347 348
<span class="n">group_inputs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">trg_embedding</span><span class="p">)</span>

<span class="c1"># 对于配备有注意力机制的解码器,在训练中,</span>
<span class="c1"># 目标向量(groudtruth)是数据输入,</span>
<span class="c1"># 而源序列的编码向量可以被无边界的memory访问</span>
<span class="c1"># StaticInput 意味着不同时间步的输入都是相同的值,</span>
<span class="c1"># 否则它以一个序列输入,不同时间步的输入是不同的。</span>
<span class="c1"># 所有输入序列应该有相同的长度。</span>
349 350 351 352
<span class="n">decoder</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">recurrent_group</span><span class="p">(</span>
        <span class="n">name</span><span class="o">=</span><span class="n">decoder_group_name</span><span class="p">,</span>
        <span class="n">step</span><span class="o">=</span><span class="n">gru_decoder_with_attention</span><span class="p">,</span>
        <span class="nb">input</span><span class="o">=</span><span class="n">group_inputs</span><span class="p">)</span>
353 354 355 356 357 358 359 360
</pre></div>
</div>
<p>单步函数的实现如下所示。首先,它定义解码网络的<strong>Memory</strong>。然后定义
attention,门控循环单元单步函数和输出函数:</p>
<div class="code python highlight-default"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">gru_decoder_with_attention</span><span class="p">(</span><span class="n">enc_vec</span><span class="p">,</span> <span class="n">enc_proj</span><span class="p">,</span> <span class="n">current_word</span><span class="p">):</span>
    <span class="c1"># 定义解码器的Memory</span>
    <span class="c1"># Memory的输出定义在 gru_step 内</span>
    <span class="c1"># 注意 gru_step 应该与它的Memory名字相同</span>
361 362
    <span class="n">decoder_mem</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">memory</span><span class="p">(</span>
        <span class="n">name</span><span class="o">=</span><span class="s1">&#39;gru_decoder&#39;</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">decoder_size</span><span class="p">,</span> <span class="n">boot_layer</span><span class="o">=</span><span class="n">decoder_boot</span><span class="p">)</span>
363
    <span class="c1"># 计算 attention 加权编码向量</span>
364 365 366 367
    <span class="n">context</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">networks</span><span class="o">.</span><span class="n">simple_attention</span><span class="p">(</span>
        <span class="n">encoded_sequence</span><span class="o">=</span><span class="n">enc_vec</span><span class="p">,</span>
        <span class="n">encoded_proj</span><span class="o">=</span><span class="n">enc_proj</span><span class="p">,</span>
        <span class="n">decoder_state</span><span class="o">=</span><span class="n">decoder_mem</span><span class="p">)</span>
368
    <span class="c1"># 混合当前词向量和attention加权编码向量</span>
369 370 371 372 373 374
     <span class="n">decoder_inputs</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">mixed</span><span class="p">(</span>
        <span class="n">size</span><span class="o">=</span><span class="n">decoder_size</span> <span class="o">*</span> <span class="mi">3</span><span class="p">,</span>
        <span class="nb">input</span><span class="o">=</span><span class="p">[</span>
            <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">context</span><span class="p">),</span>
            <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">current_word</span><span class="p">)</span>
        <span class="p">])</span>
375
    <span class="c1"># 定义门控循环单元循环神经网络单步函数</span>
376 377 378 379 380
     <span class="n">gru_step</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">gru_step</span><span class="p">(</span>
        <span class="n">name</span><span class="o">=</span><span class="s1">&#39;gru_decoder&#39;</span><span class="p">,</span>
        <span class="nb">input</span><span class="o">=</span><span class="n">decoder_inputs</span><span class="p">,</span>
        <span class="n">output_mem</span><span class="o">=</span><span class="n">decoder_mem</span><span class="p">,</span>
        <span class="n">size</span><span class="o">=</span><span class="n">decoder_size</span><span class="p">)</span>
381
    <span class="c1"># 定义输出函数</span>
382 383 384 385 386
     <span class="n">out</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">mixed</span><span class="p">(</span>
        <span class="n">size</span><span class="o">=</span><span class="n">target_dict_dim</span><span class="p">,</span>
        <span class="n">bias_attr</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
        <span class="n">act</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">activation</span><span class="o">.</span><span class="n">Softmax</span><span class="p">(),</span>
        <span class="nb">input</span><span class="o">=</span><span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">full_matrix_projection</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">gru_step</span><span class="p">))</span>
387 388 389 390 391
    <span class="k">return</span> <span class="n">out</span>
</pre></div>
</div>
</div>
</div>
392 393
<div class="section" id="id2">
<h2>生成序列<a class="headerlink" href="#id2" title="永久链接至标题"></a></h2>
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409
<p>训练模型后,我们可以使用它来生成序列。通常的做法是使用<strong>beam search</strong>
生成序列。以下代码片段定义 beam search 算法。注意,<code class="docutils literal"><span class="pre">beam_search</span></code>
函数假设 <code class="docutils literal"><span class="pre">step</span></code> 的输出函数返回的是下一个时刻输出词的 softmax
归一化概率向量。我们对模型进行了以下更改。</p>
<ul class="simple">
<li>使用 <code class="docutils literal"><span class="pre">GeneratedInput</span></code> 来表示 trg_embedding。 <code class="docutils literal"><span class="pre">GeneratedInput</span></code>
将上一时间步所生成的词的向量来作为当前时间步的输入。</li>
<li>使用 <code class="docutils literal"><span class="pre">beam_search</span></code> 函数。这个函数需要设置:<ul>
<li><code class="docutils literal"><span class="pre">bos_id</span></code>: 开始标记。每个句子都以开始标记开头。</li>
<li><code class="docutils literal"><span class="pre">eos_id</span></code>: 结束标记。每个句子都以结束标记结尾。</li>
<li><code class="docutils literal"><span class="pre">beam_size</span></code>: beam search 算法中的beam大小。</li>
<li><code class="docutils literal"><span class="pre">max_length</span></code>: 生成序列的最大长度。</li>
</ul>
</li>
</ul>
<p>代码如下:</p>
410 411 412
<div class="code python highlight-default"><div class="highlight"><pre><span></span><span class="n">group_input1</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">StaticInput</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">encoded_vector</span><span class="p">,</span> <span class="n">is_seq</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">group_input2</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">StaticInput</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">encoded_proj</span><span class="p">,</span> <span class="n">is_seq</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">group_inputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">group_input1</span><span class="p">,</span> <span class="n">group_input2</span><span class="p">]</span>
413 414 415
<span class="c1"># 在生成时,解码器基于编码源序列和最后生成的目标词预测下一目标词。</span>
<span class="c1"># 编码源序列(编码器输出)必须由只读Memory的 StaticInput 指定。</span>
<span class="c1"># 这里, GeneratedInputs 自动获取上一个生成的词,并在最开始初始化为起始词,如 &lt;s&gt;</span>
416 417 418 419
<span class="n">trg_embedding</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">GeneratedInput</span><span class="p">(</span>
        <span class="n">size</span><span class="o">=</span><span class="n">target_dict_dim</span><span class="p">,</span>
        <span class="n">embedding_name</span><span class="o">=</span><span class="s1">&#39;_target_language_embedding&#39;</span><span class="p">,</span>
        <span class="n">embedding_size</span><span class="o">=</span><span class="n">word_vector_dim</span><span class="p">)</span>
420
<span class="n">group_inputs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">trg_embedding</span><span class="p">)</span>
421 422 423 424 425 426 427 428 429 430
<span class="n">beam_gen</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">layer</span><span class="o">.</span><span class="n">beam_search</span><span class="p">(</span>
        <span class="n">name</span><span class="o">=</span><span class="n">decoder_group_name</span><span class="p">,</span>
        <span class="n">step</span><span class="o">=</span><span class="n">gru_decoder_with_attention</span><span class="p">,</span>
        <span class="nb">input</span><span class="o">=</span><span class="n">group_inputs</span><span class="p">,</span>
        <span class="n">bos_id</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="c1"># Beginnning token.</span>
        <span class="n">eos_id</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="c1"># End of sentence token.</span>
        <span class="n">beam_size</span><span class="o">=</span><span class="n">beam_size</span><span class="p">,</span>
        <span class="n">max_length</span><span class="o">=</span><span class="n">max_length</span><span class="p">)</span>

<span class="k">return</span> <span class="n">beam_gen</span>
431 432
</pre></div>
</div>
433 434
<p>注意,这种生成技术只用于类似解码器的生成过程。如果你正在处理序列标记任务,请参阅 <a class="reference external" href="https://github.com/PaddlePaddle/book/tree/develop/06.understand_sentiment">book/06.understand_sentiment</a> 了解更多详细信息。</p>
<p>完整的配置文件在 <a class="reference external" href="https://github.com/PaddlePaddle/book/blob/develop/08.machine_translation/train.py">book/08.machine_translation/train.py</a></p>
435 436 437 438 439 440 441 442
</div>
</div>


           </div>
          </div>
          <footer>
  
443 444 445 446 447
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="recurrent_group_cn.html" class="btn btn-neutral float-right" title="Recurrent Group教程" accesskey="n">Next <span class="fa fa-arrow-circle-right"></span></a>
      
      
448
        <a href="index_cn.html" class="btn btn-neutral" title="RNN模型" accesskey="p"><span class="fa fa-arrow-circle-left"></span> Previous</a>
449 450 451
      
    </div>
  
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477

  <hr/>

  <div role="contentinfo">
    <p>
        &copy; Copyright 2016, PaddlePaddle developers.

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  


  

    <script type="text/javascript">
        var DOCUMENTATION_OPTIONS = {
478
            URL_ROOT:'../../',
479 480 481
            VERSION:'',
            COLLAPSE_INDEX:false,
            FILE_SUFFIX:'.html',
482
            HAS_SOURCE:  true
483 484
        };
    </script>
485 486 487 488
      <script type="text/javascript" src="../../_static/jquery.js"></script>
      <script type="text/javascript" src="../../_static/underscore.js"></script>
      <script type="text/javascript" src="../../_static/doctools.js"></script>
      <script type="text/javascript" src="../../_static/translations.js"></script>
489
      <script type="text/javascript" src="https://cdn.bootcss.com/mathjax/2.7.0/MathJax.js"></script>
490

491 492 493 494
  

  
  
495
    <script type="text/javascript" src="../../_static/js/theme.js"></script>
496
  
497

498
  
499 500 501 502 503 504 505
  
  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.StickyNav.enable();
      });
  </script>
   
506 507

</body>
508
</html>