Deploy to GitHub Pages: c80af6ff

aa798ba1 · Travis CI · 175f359a · aa798ba1 · aa798ba1 · aa798ba1
25 changed file
--- a/develop/doc/api/v2/fluid/evaluator.html
+++ b/develop/doc/api/v2/fluid/evaluator.html
@@ -220,75 +220,6 @@
 <h1>Evaluator<a class="headerlink" href="#evaluator" title="Permalink to this headline">¶</a></h1>
 <div class="section" id="id1">
 <h2>Evaluator<a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.evaluator.</code><code class="descname">Evaluator</code><span class="sig-paren">(</span><em>name</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Base Class for all evaluators</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
-<li><strong>name</strong> (<em>str</em>) &#8211; The name of evaluator. such as, &#8220;accuracy&#8221;. Used for generate
-temporary variable name.</li>
-<li><strong>main_program</strong> (<em>Program</em><em>, </em><em>optional</em>) &#8211; The evaluator should be added to this
-main_program. Default default_main_program()</li>
-<li><strong>startup_program</strong> (<em>Program</em><em>, </em><em>optional</em>) &#8211; The parameter should be added to this
-startup_program. Default default_startup_program()</li>
-</ul>
-</td>
-</tr>
-</tbody>
-</table>
-<dl class="attribute">
-<dt id="paddle.v2.fluid.evaluator.Evaluator.states">
-<code class="descname">states</code><a class="headerlink" href="#paddle.v2.fluid.evaluator.Evaluator.states" title="Permalink to this definition">¶</a></dt>
-<dd><p><em>list</em> &#8211; The list of state variables. states will be reset to zero
-when <cite>reset</cite> is invoked.</p>
-</dd></dl>
-
-<dl class="attribute">
-<dt id="paddle.v2.fluid.evaluator.Evaluator.metrics">
-<code class="descname">metrics</code><a class="headerlink" href="#paddle.v2.fluid.evaluator.Evaluator.metrics" title="Permalink to this definition">¶</a></dt>
-<dd><p><em>list</em> &#8211; The list of metrics variables. They will be calculate
-every mini-batch</p>
-</dd></dl>
-
-<dl class="method">
-<dt>
-<code class="descname">reset</code><span class="sig-paren">(</span><em>executor</em>, <em>reset_program=None</em><span class="sig-paren">)</span></dt>
-<dd><p>reset metric states at the begin of each pass/user specified batch</p>
-</dd></dl>
-
-<dl class="method">
-<dt>
-<code class="descname">eval</code><span class="sig-paren">(</span><em>executor</em>, <em>eval_program=None</em><span class="sig-paren">)</span></dt>
-<dd><p>Evaluate the statistics merged by multiple mini-batches.</p>
-</dd></dl>
-
-<dl class="method">
-<dt>
-<code class="descname">create_state</code><span class="sig-paren">(</span><em>suffix</em>, <em>dtype</em>, <em>shape</em><span class="sig-paren">)</span></dt>
-<dd><p>Create state variable.</p>
-<p>NOTE: It is not a public API.</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
-<li><strong>suffix</strong> (<em>str</em>) &#8211; the state suffix.</li>
-<li><strong>dtype</strong> (<em>str|core.DataType</em>) &#8211; the state data type</li>
-<li><strong>shape</strong> (<em>tuple|list</em>) &#8211; the shape of state</li>
-</ul>
-</td>
-</tr>
-</tbody>
-</table>
-<p>Returns: State variable</p>
-</dd></dl>
-
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc/api/v2/fluid/initializer.html
+++ b/develop/doc/api/v2/fluid/initializer.html
@@ -220,90 +220,21 @@
 <h1>Initializer<a class="headerlink" href="#initializer" title="Permalink to this headline">¶</a></h1>
 <div class="section" id="id1">
 <h2>Initializer<a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">Initializer</code></dt>
-<dd><p>Base class for variable initializers</p>
-<p>Defines the common interface of variable initializers.
-They add operations to the init program that are used
-to initialize variables. Users should not use this class
-directly, but need to use one of its implementations.</p>
-</dd></dl>
-
 </div>
 <div class="section" id="constantinitializer">
 <h2>ConstantInitializer<a class="headerlink" href="#constantinitializer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">ConstantInitializer</code><span class="sig-paren">(</span><em>value=0.0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the constant initializer</p>
-</dd></dl>
-
 </div>
 <div class="section" id="uniforminitializer">
 <h2>UniformInitializer<a class="headerlink" href="#uniforminitializer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">UniformInitializer</code><span class="sig-paren">(</span><em>low=-1.0</em>, <em>high=1.0</em>, <em>seed=0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the random uniform distribution initializer</p>
-</dd></dl>
-
 </div>
 <div class="section" id="normalinitializer">
 <h2>NormalInitializer<a class="headerlink" href="#normalinitializer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">NormalInitializer</code><span class="sig-paren">(</span><em>loc=0.0</em>, <em>scale=1.0</em>, <em>seed=0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the  random Normal(Gaussian) distribution initializer</p>
-</dd></dl>
-
 </div>
 <div class="section" id="xavierinitializer">
 <h2>XavierInitializer<a class="headerlink" href="#xavierinitializer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">XavierInitializer</code><span class="sig-paren">(</span><em>uniform=True</em>, <em>fan_in=None</em>, <em>fan_out=None</em>, <em>seed=0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the Xavier initializer</p>
-<p>This class implements the Xavier weight initializer from the paper
-Understanding the difficulty of training deep feedforward neural
-networks[1] by Xavier Glorot and Yoshua Bengio.</p>
-<p>This initializer is designed to keep the scale of the gradients
-approximately same in all the layers. In case of Uniform distribution,
-the range is [-x, x], where x = sqrt(6 / (fan_in + fan_out)).
-In case of Normal distribution, the mean is 0 and the standard deviation
-is sqrt(2/ (fan_in + fan_out)).</p>
-<p class="rubric">References</p>
-<dl class="docutils">
-<dt>[1] Understanding the difficulty of training deep feedforward neural</dt>
-<dd>networks. International conference on artificial intelligence and
-statistics.
-(<a class="reference external" href="http://proceedings.mlr.press/v9/glorot10a.html">http://proceedings.mlr.press/v9/glorot10a.html</a>)</dd>
-</dl>
-</dd></dl>
-
 </div>
 <div class="section" id="msrainitializer">
 <h2>MSRAInitializer<a class="headerlink" href="#msrainitializer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">MSRAInitializer</code><span class="sig-paren">(</span><em>uniform=True</em>, <em>fan_in=None</em>, <em>seed=0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the MSRA initializer a.k.a. Kaiming Initializer</p>
-<p>This class implements the weight initialization from the paper
-Delving Deep into Rectifiers: Surpassing Human-Level Performance on
-ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren
-and Jian Sun. This is a robust initialization method that particularly
-considers the rectifier nonlinearities. In case of Uniform distribution,
-the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal
-distribution, the mean is 0 and the standard deviation
-is sqrt(2/ fan_in).</p>
-<p class="rubric">References</p>
-<dl class="docutils">
-<dt>[1] Delving Deep into Rectifiers: Surpassing Human-Level Performance</dt>
-<dd>on ImageNet Classification
-(<a class="reference external" href="https://arxiv.org/abs/1502.01852">https://arxiv.org/abs/1502.01852</a>)</dd>
-</dl>
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc/api/v2/fluid/io.html
+++ b/develop/doc/api/v2/fluid/io.html
@@ -220,23 +220,6 @@
 <h1>IO<a class="headerlink" href="#io" title="Permalink to this headline">¶</a></h1>
 <div class="section" id="is-parameter">
 <h2>is_parameter<a class="headerlink" href="#is-parameter" title="Permalink to this headline">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.io.</code><code class="descname">is_parameter</code><span class="sig-paren">(</span><em>var</em><span class="sig-paren">)</span></dt>
-<dd><p>Check whether the variable is a Parameter.</p>
-<p>This function checks whether the input variable is a Parameter.</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>var</strong> &#8211; The input variable.</td>
-</tr>
-<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">boolean result whether the variable is a Parameter.</td>
-</tr>
-</tbody>
-</table>
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc/api/v2/fluid/layers.html
+++ b/develop/doc/api/v2/fluid/layers.html
--- a/develop/doc/api/v2/fluid/nets.html
+++ b/develop/doc/api/v2/fluid/nets.html
@@ -220,118 +220,18 @@
 <h1>Nets<a class="headerlink" href="#nets" title="Permalink to this headline">¶</a></h1>
 <div class="section" id="simple-img-conv-pool">
 <h2>simple_img_conv_pool<a class="headerlink" href="#simple-img-conv-pool" title="Permalink to this headline">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.nets.</code><code class="descname">simple_img_conv_pool</code><span class="sig-paren">(</span><em>input</em>, <em>num_filters</em>, <em>filter_size</em>, <em>pool_size</em>, <em>pool_stride</em>, <em>act</em>, <em>param_attr=None</em>, <em>pool_type='max'</em>, <em>use_cudnn=True</em><span class="sig-paren">)</span></dt>
-<dd></dd></dl>
-
 </div>
 <div class="section" id="img-conv-group">
 <h2>img_conv_group<a class="headerlink" href="#img-conv-group" title="Permalink to this headline">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.nets.</code><code class="descname">img_conv_group</code><span class="sig-paren">(</span><em>input</em>, <em>conv_num_filter</em>, <em>pool_size</em>, <em>conv_padding=1</em>, <em>conv_filter_size=3</em>, <em>conv_act=None</em>, <em>param_attr=None</em>, <em>conv_with_batchnorm=False</em>, <em>conv_batchnorm_drop_rate=None</em>, <em>pool_stride=1</em>, <em>pool_type=None</em>, <em>use_cudnn=True</em><span class="sig-paren">)</span></dt>
-<dd><p>Image Convolution Group, Used for vgg net.</p>
-</dd></dl>
-
 </div>
 <div class="section" id="sequence-conv-pool">
 <h2>sequence_conv_pool<a class="headerlink" href="#sequence-conv-pool" title="Permalink to this headline">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.nets.</code><code class="descname">sequence_conv_pool</code><span class="sig-paren">(</span><em>input</em>, <em>num_filters</em>, <em>filter_size</em>, <em>param_attr=None</em>, <em>act='sigmoid'</em>, <em>pool_type='max'</em><span class="sig-paren">)</span></dt>
-<dd></dd></dl>
-
 </div>
 <div class="section" id="glu">
 <h2>glu<a class="headerlink" href="#glu" title="Permalink to this headline">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.nets.</code><code class="descname">glu</code><span class="sig-paren">(</span><em>input</em>, <em>dim=-1</em><span class="sig-paren">)</span></dt>
-<dd><p>The gated linear unit composed by split, sigmoid activation and elementwise
-multiplication. Specifically, Split the input into two equal sized parts
-<span class="math">\(a\)</span> and <span class="math">\(b\)</span> along the given dimension and then compute as
-following:</p>
-<blockquote>
-<div><div class="math">
-\[{GLU}(a, b)= a \otimes \sigma(b)\]</div>
-</div></blockquote>
-<p>Refer to <a class="reference external" href="https://arxiv.org/pdf/1612.08083.pdf">Language Modeling with Gated Convolutional Networks</a>.</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
-<li><strong>input</strong> (<em>Variable</em>) &#8211; The input variable which is a Tensor or LoDTensor.</li>
-<li><strong>dim</strong> (<em>int</em>) &#8211; The dimension along which to split. If <span class="math">\(dim &lt; 0\)</span>, the
-dimension to split along is <span class="math">\(rank(input) + dim\)</span>.</li>
-</ul>
-</td>
-</tr>
-<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The Tensor variable with half the size of input.</p>
-</td>
-</tr>
-<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">Variable</p>
-</td>
-</tr>
-</tbody>
-</table>
-<p class="rubric">Examples</p>
-<div class="highlight-python"><div class="highlight"><pre><span></span><span class="c1"># x is a Tensor variable with shape [3, 6, 9]</span>
-<span class="n">fluid</span><span class="o">.</span><span class="n">nets</span><span class="o">.</span><span class="n">glu</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">x</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>  <span class="c1"># shape of output: [3, 3, 9]</span>
-</pre></div>
-</div>
-</dd></dl>
-
 </div>
 <div class="section" id="dot-product-attention">
 <h2>dot_product_attention<a class="headerlink" href="#dot-product-attention" title="Permalink to this headline">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.nets.</code><code class="descname">dot_product_attention</code><span class="sig-paren">(</span><em>querys</em>, <em>keys</em>, <em>values</em><span class="sig-paren">)</span></dt>
-<dd><p>The dot-product attention.</p>
-<p>Attention mechanism can be seen as mapping a query and a set of key-value
-pairs to an output. The output is computed as a weighted sum of the values,
-where the weight assigned to each value is computed by a compatibility
-function (dot-product here) of the query with the corresponding key.</p>
-<p>The dot-product attention can be implemented through (batch) matrix
-multipication as follows:</p>
-<blockquote>
-<div><div class="math">
-\[Attention(Q, K, V)= softmax(QK^\mathrm{T})V\]</div>
-</div></blockquote>
-<p>Refer to <a class="reference external" href="https://arxiv.org/pdf/1706.03762.pdf">Attention Is All You Need</a>.</p>
-<p>Note that batch data containing sequences with different lengths is not
-supported by this because of the (batch) matrix multipication.</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
-<li><strong>query</strong> (<em>Variable</em>) &#8211; The input variable which is a Tensor or LoDTensor.</li>
-<li><strong>key</strong> (<em>Variable</em>) &#8211; The input variable which is a Tensor or LoDTensor.</li>
-<li><strong>value</strong> (<em>Variable</em>) &#8211; The input variable which is a Tensor or LoDTensor.</li>
-</ul>
-</td>
-</tr>
-<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The Tensor variables representing the output and attention scores.</p>
-</td>
-</tr>
-<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">tuple</p>
-</td>
-</tr>
-</tbody>
-</table>
-<p class="rubric">Examples</p>
-<div class="highlight-python"><div class="highlight"><pre><span></span><span class="c1"># Suppose q, k, v are tensor variables with the following shape:</span>
-<span class="c1"># q: [3, 5, 9], k: [3, 6, 9], v: [3, 6, 10]</span>
-<span class="n">out</span><span class="p">,</span> <span class="n">attn_scores</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">nets</span><span class="o">.</span><span class="n">dot_product_attention</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span>
-<span class="n">out</span><span class="o">.</span><span class="n">shape</span>  <span class="c1"># [3, 5, 10]</span>
-<span class="n">attn_scores</span><span class="o">.</span><span class="n">shape</span>  <span class="c1"># [3, 5, 6]</span>
-</pre></div>
-</div>
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc/api/v2/fluid/optimizer.html
+++ b/develop/doc/api/v2/fluid/optimizer.html
@@ -220,105 +220,24 @@
 <h1>Optimizer<a class="headerlink" href="#optimizer" title="Permalink to this headline">¶</a></h1>
 <div class="section" id="id1">
 <h2>Optimizer<a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">Optimizer</code><span class="sig-paren">(</span><em>global_step=None</em>, <em>regularization=None</em><span class="sig-paren">)</span></dt>
-<dd><p>Optimizer Base class.</p>
-<p>Define the common interface of an optimizer.
-User should not use this class directly,
-but need to use one of it&#8217;s implementation.</p>
-<dl class="method">
-<dt>
-<code class="descname">create_optimization_pass</code><span class="sig-paren">(</span><em>parameters_and_grads</em>, <em>loss</em>, <em>startup_program=None</em><span class="sig-paren">)</span></dt>
-<dd><p>Add optimization operators to update gradients to variables.</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
-<li><strong>loss</strong> &#8211; the target that this optimization is for.</li>
-<li><strong>parameters_and_grads</strong> &#8211; a list of (variable, gradient) pair to update.</li>
-</ul>
-</td>
-</tr>
-<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">a list of operators that will complete one step of
-optimization. This will include parameter update ops, global step
-update ops and any other custom ops required by subclasses to manage
-their internal state.
-:param startup_program:</p>
-</td>
-</tr>
-<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">return_op_list</p>
-</td>
-</tr>
-</tbody>
-</table>
-</dd></dl>
-
-<dl class="method">
-<dt>
-<code class="descname">minimize</code><span class="sig-paren">(</span><em>loss</em>, <em>startup_program=None</em>, <em>parameter_list=None</em>, <em>no_grad_set=None</em><span class="sig-paren">)</span></dt>
-<dd><p>Add operations to minimize <cite>loss</cite> by updating <cite>parameter_list</cite>.</p>
-<p>This method combines interface <cite>append_backward()</cite> and
-<cite>create_optimization_pass()</cite> into one.</p>
-</dd></dl>
-
-</dd></dl>
-
 </div>
 <div class="section" id="sgdoptimizer">
 <h2>SGDOptimizer<a class="headerlink" href="#sgdoptimizer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">SGDOptimizer</code><span class="sig-paren">(</span><em>learning_rate</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Simple SGD optimizer without any state.</p>
-</dd></dl>
-
 </div>
 <div class="section" id="momentumoptimizer">
 <h2>MomentumOptimizer<a class="headerlink" href="#momentumoptimizer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">MomentumOptimizer</code><span class="sig-paren">(</span><em>learning_rate</em>, <em>momentum</em>, <em>use_nesterov=False</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Simple Momentum optimizer with velocity state</p>
-</dd></dl>
-
 </div>
 <div class="section" id="adagradoptimizer">
 <h2>AdagradOptimizer<a class="headerlink" href="#adagradoptimizer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">AdagradOptimizer</code><span class="sig-paren">(</span><em>learning_rate</em>, <em>epsilon=1e-06</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Simple Adagrad optimizer with moment state</p>
-</dd></dl>
-
 </div>
 <div class="section" id="adamoptimizer">
 <h2>AdamOptimizer<a class="headerlink" href="#adamoptimizer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">AdamOptimizer</code><span class="sig-paren">(</span><em>learning_rate=0.001</em>, <em>beta1=0.9</em>, <em>beta2=0.999</em>, <em>epsilon=1e-08</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the Adam Optimizer</p>
-</dd></dl>
-
 </div>
 <div class="section" id="adamaxoptimizer">
 <h2>AdamaxOptimizer<a class="headerlink" href="#adamaxoptimizer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">AdamaxOptimizer</code><span class="sig-paren">(</span><em>learning_rate=0.001</em>, <em>beta1=0.9</em>, <em>beta2=0.999</em>, <em>epsilon=1e-08</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the Adamax Optimizer</p>
-</dd></dl>
-
 </div>
 <div class="section" id="decayedadagradoptimizer">
 <h2>DecayedAdagradOptimizer<a class="headerlink" href="#decayedadagradoptimizer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">DecayedAdagradOptimizer</code><span class="sig-paren">(</span><em>learning_rate</em>, <em>decay=0.95</em>, <em>epsilon=1e-06</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Simple Decayed Adagrad optimizer with moment state</p>
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc/api/v2/fluid/profiler.html
+++ b/develop/doc/api/v2/fluid/profiler.html
@@ -220,35 +220,6 @@
 <h1>Profiler<a class="headerlink" href="#profiler" title="Permalink to this headline">¶</a></h1>
 <div class="section" id="id1">
 <h2>Profiler<a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.profiler.</code><code class="descname">cuda_profiler</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwds</em><span class="sig-paren">)</span></dt>
-<dd><p>The CUDA profiler.
-This fuctions is used to profile CUDA program by CUDA runtime application
-programming interface. The profiling result will be written into
-<cite>output_file</cite> with Key-Value pair format or Comma separated values format.
-The user can set the output mode by <cite>output_mode</cite> argument and set the
-counters/options for profiling by <cite>config</cite> argument. The default config
-is [&#8216;gpustarttimestamp&#8217;, &#8216;gpustarttimestamp&#8217;, &#8216;gridsize3d&#8217;,
-&#8216;threadblocksize&#8217;, &#8216;streamid&#8217;, &#8216;enableonstart 0&#8217;, &#8216;conckerneltrace&#8217;].</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
-<li><strong>output_file</strong> (<em>string</em>) &#8211; The output file name, the result will be
-written into this file.</li>
-<li><strong>output_mode</strong> (<em>string</em>) &#8211; The output mode has Key-Value pair format and
-Comma separated values format. It should be &#8216;kvp&#8217; or &#8216;csv&#8217;.</li>
-<li><strong>config</strong> (<em>list of string</em>) &#8211; The profiler options and counters can refer
-to &#8220;Compute Command Line Profiler User Guide&#8221;.</li>
-</ul>
-</td>
-</tr>
-</tbody>
-</table>
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc/api/v2/fluid/regularizer.html
+++ b/develop/doc/api/v2/fluid/regularizer.html
@@ -220,36 +220,12 @@
 <h1>Regularizer<a class="headerlink" href="#regularizer" title="Permalink to this headline">¶</a></h1>
 <div class="section" id="weightdecayregularizer">
 <h2>WeightDecayRegularizer<a class="headerlink" href="#weightdecayregularizer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.regularizer.</code><code class="descname">WeightDecayRegularizer</code></dt>
-<dd><p>Base class for weight decay regularizers</p>
-<p>Defines the common interface of weight-decay regularizers.
-Weight-decay regularizers are added only during the backward
-pass for faster regularization. They add operations to the network
-that correspond to gradient of the regularization function.
-Users should not use this class directly, but need to use one
-of its implementations</p>
-</dd></dl>
-
 </div>
 <div class="section" id="l2decayregularizer">
 <h2>L2DecayRegularizer<a class="headerlink" href="#l2decayregularizer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.regularizer.</code><code class="descname">L2DecayRegularizer</code><span class="sig-paren">(</span><em>regularization_coeff=0.0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the L2 Weight Decay Regularization</p>
-</dd></dl>
-
 </div>
-<div class="section" id="module-paddle.v2.fluid.regularizer">
-<span id="l1decayregularizer"></span><h2>L1DecayRegularizer<a class="headerlink" href="#module-paddle.v2.fluid.regularizer" title="Permalink to this headline">¶</a></h2>
-<dl class="class">
-<dt id="paddle.v2.fluid.regularizer.L1DecayRegularizer">
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.regularizer.</code><code class="descname">L1DecayRegularizer</code><span class="sig-paren">(</span><em>regularization_coeff=0.0</em><span class="sig-paren">)</span><a class="headerlink" href="#paddle.v2.fluid.regularizer.L1DecayRegularizer" title="Permalink to this definition">¶</a></dt>
-<dd><p>Implements the L1 Weight Decay Regularization</p>
-</dd></dl>
-
+<div class="section" id="l1decayregularizer">
+<h2>L1DecayRegularizer<a class="headerlink" href="#l1decayregularizer" title="Permalink to this headline">¶</a></h2>
 </div>
 </div>


--- a/develop/doc/genindex.html
+++ b/develop/doc/genindex.html
@@ -217,7 +217,6 @@
 <a href="#B"><strong>B</strong></a>
 | <a href="#C"><strong>C</strong></a>
 | <a href="#L"><strong>L</strong></a>
- | <a href="#M"><strong>M</strong></a>
 | <a href="#P"><strong>P</strong></a>
 | <a href="#R"><strong>R</strong></a>
 | <a href="#S"><strong>S</strong></a>
@@ -243,14 +242,12 @@
 <h2 id="L">L</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="api/v2/fluid/regularizer.html#paddle.v2.fluid.regularizer.L1DecayRegularizer">L1DecayRegularizer (class in paddle.v2.fluid.regularizer)</a>
-</li>
      <li><a href="api/v2/data/image.html#paddle.v2.image.left_right_flip">left_right_flip() (in module paddle.v2.image)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="api/v2/data/image.html#paddle.v2.image.load_and_transform">load_and_transform() (in module paddle.v2.image)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="api/v2/data/image.html#paddle.v2.image.load_image">load_image() (in module paddle.v2.image)</a>
 </li>
      <li><a href="api/v2/data/image.html#paddle.v2.image.load_image_bytes">load_image_bytes() (in module paddle.v2.image)</a>
@@ -258,23 +255,13 @@
  </ul></td>
 </tr></table>

-<h2 id="M">M</h2>
-<table style="width: 100%" class="indextable genindextable"><tr>
-  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="api/v2/fluid/evaluator.html#paddle.v2.fluid.evaluator.Evaluator.metrics">metrics (paddle.v2.fluid.evaluator.Evaluator attribute)</a>
-</li>
-  </ul></td>
-</tr></table>
-
 <h2 id="P">P</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="api/v2/fluid/regularizer.html#module-paddle.v2.fluid.regularizer">paddle.v2.fluid.regularizer (module)</a>
+      <li><a href="api/v2/data/image.html#module-paddle.v2.image">paddle.v2.image (module)</a>
 </li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="api/v2/data/image.html#module-paddle.v2.image">paddle.v2.image (module)</a>
-</li>
      <li><a href="api/v1/data_provider/pydataprovider2_en.html#paddle.trainer.PyDataProvider2.provider">provider() (in module paddle.trainer.PyDataProvider2)</a>
 </li>
  </ul></td>
@@ -296,10 +283,6 @@
 <table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="api/v2/data/image.html#paddle.v2.image.simple_transform">simple_transform() (in module paddle.v2.image)</a>
-</li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="api/v2/fluid/evaluator.html#paddle.v2.fluid.evaluator.Evaluator.states">states (paddle.v2.fluid.evaluator.Evaluator attribute)</a>
 </li>
  </ul></td>
 </tr></table>

--- a/develop/doc/objects.inv
+++ b/develop/doc/objects.inv
--- a/develop/doc/operators.json
+++ b/develop/doc/operators.json
@@ -1140,24 +1140,6 @@
   "intermediate" : 0
 } ], 
 "attrs" : [  ] 
-},{
- "type" : "log",
- "comment" : "\nLog Activation Operator.\n\n$out = \\ln(x)$\n\nNatural logarithm of x.\n\n",
- "inputs" : [ 
- { 
-   "name" : "X",
-   "comment" : "Input of Log operator",
-   "duplicable" : 0,
-   "intermediate" : 0
- } ], 
- "outputs" : [ 
- { 
-   "name" : "Out",
-   "comment" : "Output of Log operator",
-   "duplicable" : 0,
-   "intermediate" : 0
- } ], 
- "attrs" : [  ] 
 },{
 "type" : "softmax",
 "comment" : "\nSoftmax Operator.\n\nThe input of the softmax operator is a 2-D tensor with shape N x K (N is the\nbatch_size, K is the dimension of input feature). The output tensor has the\nsame shape as the input tensor.\n\nFor each row of the input tensor, the softmax operator squashes the\nK-dimensional vector of arbitrary real values to a K-dimensional vector of real\nvalues in the range [0, 1] that add up to 1.\nIt computes the exponential of the given dimension and the sum of exponential\nvalues of all the other dimensions in the K-dimensional vector input.\nThen the ratio of the exponential of the given dimension and the sum of\nexponential values of all the other dimensions is the output of the softmax\noperator.\n\nFor each row $i$ and each column $j$ in Input(X), we have:\n    $$Out[i, j] = \\frac{\\exp(X[i, j])}{\\sum_j(exp(X[i, j])}$$\n\n",
@@ -1546,6 +1528,34 @@
   "comment" : "(float, default 0.0) L2 regularization strength.",
   "generated" : 0
 } ] 
+},{
+ "type" : "rank_loss",
+ "comment" : "\nRankLoss Operator.\n\nRankLoss operator for RankNet\n(http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf). \nRankNet is a pairwise ranking model with\none training sample consisting of a pair of doc A and B, and the label P\nindicating that A is ranked higher than B or not:\n\nP = {0, 1} or {0, 0.5, 1}, where 0.5 means no information about the rank of\nthe input pair.\n\nThe RankLoss operator takes three inputs: Left (o_i), Right (o_j) and Label\n(P_{i,j}), which represent the output score of RankNet for the two docs and \nthe label respectively, and yields the rank loss C_{i,j} using the following \nequation:\n\n$$\n  C_{i,j} = -\\tilde{P_{ij}} * o_{i,j} + \\log(1 + e^{o_{i,j}}) \\\\\n  o_{i,j} =  o_i - o_j  \\\\\n  \\tilde{P_{i,j}} = \\left \\{0, 0.5, 1 \\right \\} \\ or \\ \\left \\{0, 1 \\right \\}\n$$\n\nThe operator can take batch inputs with size batch_size (batch_size >= 1).\n\n",
+ "inputs" : [ 
+ { 
+   "name" : "Label",
+   "comment" : "(2-D Tensor with shape [batch_size x 1]) The label indicating A ranked higher than B or not.",
+   "duplicable" : 0,
+   "intermediate" : 0
+ }, { 
+   "name" : "Left",
+   "comment" : "(2-D Tensor with shape [batch_size x 1]) The output of RankNet for doc A.",
+   "duplicable" : 0,
+   "intermediate" : 0
+ }, { 
+   "name" : "Right",
+   "comment" : "(2-D Tensor with shape [batch_size x 1]) The output of RankNet for doc B.",
+   "duplicable" : 0,
+   "intermediate" : 0
+ } ], 
+ "outputs" : [ 
+ { 
+   "name" : "Out",
+   "comment" : "(2-D Tensor with shape [batch_size x 1]) The output loss of RankLoss operator.",
+   "duplicable" : 0,
+   "intermediate" : 0
+ } ], 
+ "attrs" : [  ] 
 },{
 "type" : "reciprocal",
 "comment" : "\nReciprocal Activation Operator.\n\n$$out = \\frac{1}{x}$$\n\n",
@@ -2389,7 +2399,13 @@
   "duplicable" : 0,
   "intermediate" : 0
 } ], 
- "attrs" : [  ] 
+ "attrs" : [ 
+ { 
+   "name" : "axis",
+   "type" : "int",
+   "comment" : "(int, default -1). The start dimension index for broadcasting Y onto X.",
+   "generated" : 0
+ } ] 
 },{
 "type" : "sequence_pool",
 "comment" : "\nSequence Pool Operator.\n\nThe SequencePoolOp pools features of all time-steps of each instance.\nIt supports six pooling types:\n1. AVERAGE: $$Out[i] = \\frac{\\sum_i X_i}{N}$$\n2. SUM:     $$Out[i] = \\sum_jX_{ij}$$\n3. SQRT:    $$Out[i] = \\frac{\\sum_jX_{ij}}{\\sqrt{len(X_i)}}$$\n4. LAST:    Out[i] = last instance in i-th sequence X[i]\n5. FIRST:   Out[i] = first instance in i-th sequence X[i]\n6. MAX:     $$Out[i] = max(X_i)$$\n\nThe following example explains how this works:\nFor a mini-batch of 3 variable-length sentences,\ncontaining 2, 3, and 2 time-steps:\n\nAssume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2.\nBesides, for the sake of simplicity, we assume M=1 and N=1,\nand the value of X = [[1, 3], [2, 4, 6], [5, 1]].\n\nThus, Out is a [3,1,1] Tensor without LoD infomation.\nAnd for different pooltype, the value of Out is as follows:\n\n- AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2\n- SUM: [4, 12, 6], where 4=1+3, 12=2+4+6, 6=5+1\n- SQRT: [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2),\n           6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2)\n- MAX: [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1)\n- LAST: [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1)\n- FIRST: [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1)\n\n    ",
@@ -3197,57 +3213,6 @@
   "comment" : "Hyper parameter in huber loss.",
   "generated" : 0
 } ] 
-},{
- "type" : "rank_loss",
- "comment" : "\nRankLoss Operator.\n\nRankLoss operator for RankNet\n(http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf). \nRankNet is a pairwise ranking model with\none training sample consisting of a pair of doc A and B, and the label P\nindicating that A is ranked higher than B or not:\n\nP = {0, 1} or {0, 0.5, 1}, where 0.5 means no information about the rank of\nthe input pair.\n\nThe RankLoss operator takes three inputs: Left (o_i), Right (o_j) and Label\n(P_{i,j}), which represent the output score of RankNet for the two docs and \nthe label respectively, and yields the rank loss C_{i,j} using the following \nequation:\n\n$$\n  C_{i,j} = -\\tilde{P_{ij}} * o_{i,j} + \\log(1 + e^{o_{i,j}}) \\\\\n  o_{i,j} =  o_i - o_j  \\\\\n  \\tilde{P_{i,j}} = \\left \\{0, 0.5, 1 \\right \\} \\ or \\ \\left \\{0, 1 \\right \\}\n$$\n\nThe operator can take batch inputs with size batch_size (batch_size >= 1).\n\n",
- "inputs" : [ 
- { 
-   "name" : "Label",
-   "comment" : "(2-D Tensor with shape [batch_size x 1]) The label indicating A ranked higher than B or not.",
-   "duplicable" : 0,
-   "intermediate" : 0
- }, { 
-   "name" : "Left",
-   "comment" : "(2-D Tensor with shape [batch_size x 1]) The output of RankNet for doc A.",
-   "duplicable" : 0,
-   "intermediate" : 0
- }, { 
-   "name" : "Right",
-   "comment" : "(2-D Tensor with shape [batch_size x 1]) The output of RankNet for doc B.",
-   "duplicable" : 0,
-   "intermediate" : 0
- } ], 
- "outputs" : [ 
- { 
-   "name" : "Out",
-   "comment" : "(2-D Tensor with shape [batch_size x 1]) The output loss of RankLoss operator.",
-   "duplicable" : 0,
-   "intermediate" : 0
- } ], 
- "attrs" : [  ] 
-},{
- "type" : "greater_than",
- "comment" : "greater_than Operator\n\nIt operates element-wise on X and Y, and returns the Out. Each of them is a\nN-dim tensor. X and Y could be any type.  The each element of the Out tensor is\ncalculated by Out = X > Y\n",
- "inputs" : [ 
- { 
-   "name" : "X",
-   "comment" : "(LoDTensor) the left hand operand of greater_than operator",
-   "duplicable" : 0,
-   "intermediate" : 0
- }, { 
-   "name" : "Y",
-   "comment" : "(LoDTensor) the right hand operand of greater_than operator",
-   "duplicable" : 0,
-   "intermediate" : 0
- } ], 
- "outputs" : [ 
- { 
-   "name" : "Out",
-   "comment" : "(LoDTensor) n-dim bool tensor. Each element is Out = X > Y",
-   "duplicable" : 0,
-   "intermediate" : 0
- } ], 
- "attrs" : [  ] 
 },{
 "type" : "sequence_softmax",
 "comment" : "\nSequence Softmax Operator.\n\nSequenceSoftmaxOp computes the softmax activation among all time-steps for each\nsequence. The dimension of each time-step should be 1. Thus, the shape of\ninput Tensor can be either [N, 1] or [N], where N is the sum of the length\nof all sequences.\n\nThe algorithm works as follows:\n\n    for i-th sequence in a mini-batch:\n\n$$\nOut(X[lod[i]:lod[i+1]], :) = \\\n\\frac{\\exp(X[lod[i]:lod[i+1], :])} \\\n{\\sum(\\exp(X[lod[i]:lod[i+1], :]))}\n$$\n\nFor example, for a mini-batch of 3 sequences with variable-length,\neach containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7],\nthen softmax will be computed among X[0:2, :], X[2:5, :], X[5:7, :]\nand N turns out to be 7.\n\n",
@@ -4515,29 +4480,6 @@
   "comment" : "(int) the specific lod level to split.",
   "generated" : 0
 } ] 
-},{
- "type" : "greater_equal",
- "comment" : "greater_equal Operator\n\nIt operates element-wise on X and Y, and returns the Out. Each of them is a\nN-dim tensor. X and Y could be any type.  The each element of the Out tensor is\ncalculated by Out = X >= Y\n",
- "inputs" : [ 
- { 
-   "name" : "X",
-   "comment" : "(LoDTensor) the left hand operand of greater_equal operator",
-   "duplicable" : 0,
-   "intermediate" : 0
- }, { 
-   "name" : "Y",
-   "comment" : "(LoDTensor) the right hand operand of greater_equal operator",
-   "duplicable" : 0,
-   "intermediate" : 0
- } ], 
- "outputs" : [ 
- { 
-   "name" : "Out",
-   "comment" : "(LoDTensor) n-dim bool tensor. Each element is Out = X >= Y",
-   "duplicable" : 0,
-   "intermediate" : 0
- } ], 
- "attrs" : [  ] 
 },{
 "type" : "crop",
 "comment" : "\nCrop Operator.\n\nCrop input into output, as specified by offsets and shape.\n\nThere are two ways to set shape:\n1. reference input: crop input X into the same shape as reference input.\n                    The dimension of reference input should\n                    be the same as the dimension of input X.\n2. shape list: crop input X into the shape described by a list<int>.\n               The size of shape list should be the same as\n               the dimension size of input X.\n\nThe input should be a k-D tensor(k > 0 and k < 7). As an example:\n\nCase 1:\nGiven\n\n    X = [[0, 1, 2, 0, 0]\n         [0, 3, 4, 0, 0]\n         [0, 0, 0, 0, 0]],\n\nand\n\n    offsets = [0, 1],\n\nand\n\n    shape = [2, 2],\n\nwe get:\n\n    Out = [[1, 2],\n           [3, 4]].\n\n\nCase 2:\nGiven\n\n    X = [[0, 1, 2, 5, 0]\n         [0, 3, 4, 6, 0]\n         [0, 0, 0, 0, 0]],\n\nand\n\n    offsets = [0, 1],\n\nand\n\n    Y = [[0, 0, 0]\n         [0, 0, 0]],\n\nwe get:\n\n    Out = [[1, 2, 5],\n           [3, 4, 6]].\n",
@@ -4750,7 +4692,13 @@
   "duplicable" : 0,
   "intermediate" : 0
 } ], 
- "attrs" : [  ] 
+ "attrs" : [ 
+ { 
+   "name" : "axis",
+   "type" : "int",
+   "comment" : "(int, default -1). The start dimension index for broadcasting Y onto X.",
+   "generated" : 0
+ } ] 
 },{
 "type" : "equal",
 "comment" : "equal Operator\n\nIt operates element-wise on X and Y, and returns the Out. Each of them is a\nN-dim tensor. X and Y could be any type.  The each element of the Out tensor is\ncalculated by Out = X == Y\n",
@@ -4773,7 +4721,13 @@
   "duplicable" : 0,
   "intermediate" : 0
 } ], 
- "attrs" : [  ] 
+ "attrs" : [ 
+ { 
+   "name" : "axis",
+   "type" : "int",
+   "comment" : "(int, default -1). The start dimension index for broadcasting Y onto X.",
+   "generated" : 0
+ } ] 
 },{
 "type" : "gather",
 "comment" : "\nGather Operator.\n\n$Out = X[Index]$\n\nOut is obtained by gathering entries of the outer-most dimension \nof X indexed by Index and concatenate them together.\n\nExample:\n\nX = [[1, 2],\n     [3, 4],\n     [5, 6]]\n\nIndex = [[1, 2]]\n\nThen:\n\nOut = [[3, 4],\n       [5, 6]]\n\n",
@@ -5359,6 +5313,24 @@
   "comment" : "(float, default 1.0e-6) Constant for numerical stability",
   "generated" : 0
 } ] 
+},{
+ "type" : "log",
+ "comment" : "\nLog Activation Operator.\n\n$out = \\ln(x)$\n\nNatural logarithm of x.\n\n",
+ "inputs" : [ 
+ { 
+   "name" : "X",
+   "comment" : "Input of Log operator",
+   "duplicable" : 0,
+   "intermediate" : 0
+ } ], 
+ "outputs" : [ 
+ { 
+   "name" : "Out",
+   "comment" : "Output of Log operator",
+   "duplicable" : 0,
+   "intermediate" : 0
+ } ], 
+ "attrs" : [  ] 
 },{
 "type" : "nce",
 "comment" : "\nCompute and return the noise-contrastive estimation training loss.\nSee [Noise-contrastive estimation: A new estimation principle for unnormalized statistical models](http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf).\nBy default this operator uses a uniform distribution for sampling.\n",

--- a/develop/doc/py-modindex.html
+++ b/develop/doc/py-modindex.html
@@ -229,11 +229,6 @@
       <td>
       <code class="xref">paddle</code></td><td>
       <em></em></td></tr>
-     <tr class="cg-1">
-       <td></td>
-       <td>&#160;&#160;&#160;
-       <a href="api/v2/fluid/regularizer.html#module-paddle.v2.fluid.regularizer"><code class="xref">paddle.v2.fluid.regularizer</code></a></td><td>
-       <em></em></td></tr>
     <tr class="cg-1">
       <td></td>
       <td>&#160;&#160;&#160;

--- a/develop/doc/searchindex.js
+++ b/develop/doc/searchindex.js
--- a/develop/doc_cn/api/v2/fluid/evaluator.html
+++ b/develop/doc_cn/api/v2/fluid/evaluator.html
@@ -239,75 +239,6 @@
 <h1>Evaluator<a class="headerlink" href="#evaluator" title="永久链接至标题">¶</a></h1>
 <div class="section" id="id1">
 <h2>Evaluator<a class="headerlink" href="#id1" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.evaluator.</code><code class="descname">Evaluator</code><span class="sig-paren">(</span><em>name</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Base Class for all evaluators</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">参数:</th><td class="field-body"><ul class="first last simple">
-<li><strong>name</strong> (<em>str</em>) &#8211; The name of evaluator. such as, &#8220;accuracy&#8221;. Used for generate
-temporary variable name.</li>
-<li><strong>main_program</strong> (<em>Program</em><em>, </em><em>optional</em>) &#8211; The evaluator should be added to this
-main_program. Default default_main_program()</li>
-<li><strong>startup_program</strong> (<em>Program</em><em>, </em><em>optional</em>) &#8211; The parameter should be added to this
-startup_program. Default default_startup_program()</li>
-</ul>
-</td>
-</tr>
-</tbody>
-</table>
-<dl class="attribute">
-<dt id="paddle.v2.fluid.evaluator.Evaluator.states">
-<code class="descname">states</code><a class="headerlink" href="#paddle.v2.fluid.evaluator.Evaluator.states" title="永久链接至目标">¶</a></dt>
-<dd><p><em>list</em> &#8211; The list of state variables. states will be reset to zero
-when <cite>reset</cite> is invoked.</p>
-</dd></dl>
-
-<dl class="attribute">
-<dt id="paddle.v2.fluid.evaluator.Evaluator.metrics">
-<code class="descname">metrics</code><a class="headerlink" href="#paddle.v2.fluid.evaluator.Evaluator.metrics" title="永久链接至目标">¶</a></dt>
-<dd><p><em>list</em> &#8211; The list of metrics variables. They will be calculate
-every mini-batch</p>
-</dd></dl>
-
-<dl class="method">
-<dt>
-<code class="descname">reset</code><span class="sig-paren">(</span><em>executor</em>, <em>reset_program=None</em><span class="sig-paren">)</span></dt>
-<dd><p>reset metric states at the begin of each pass/user specified batch</p>
-</dd></dl>
-
-<dl class="method">
-<dt>
-<code class="descname">eval</code><span class="sig-paren">(</span><em>executor</em>, <em>eval_program=None</em><span class="sig-paren">)</span></dt>
-<dd><p>Evaluate the statistics merged by multiple mini-batches.</p>
-</dd></dl>
-
-<dl class="method">
-<dt>
-<code class="descname">create_state</code><span class="sig-paren">(</span><em>suffix</em>, <em>dtype</em>, <em>shape</em><span class="sig-paren">)</span></dt>
-<dd><p>Create state variable.</p>
-<p>NOTE: It is not a public API.</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">参数:</th><td class="field-body"><ul class="first last simple">
-<li><strong>suffix</strong> (<em>str</em>) &#8211; the state suffix.</li>
-<li><strong>dtype</strong> (<em>str|core.DataType</em>) &#8211; the state data type</li>
-<li><strong>shape</strong> (<em>tuple|list</em>) &#8211; the shape of state</li>
-</ul>
-</td>
-</tr>
-</tbody>
-</table>
-<p>Returns: State variable</p>
-</dd></dl>
-
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc_cn/api/v2/fluid/initializer.html
+++ b/develop/doc_cn/api/v2/fluid/initializer.html
@@ -239,90 +239,21 @@
 <h1>Initializer<a class="headerlink" href="#initializer" title="永久链接至标题">¶</a></h1>
 <div class="section" id="id1">
 <h2>Initializer<a class="headerlink" href="#id1" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">Initializer</code></dt>
-<dd><p>Base class for variable initializers</p>
-<p>Defines the common interface of variable initializers.
-They add operations to the init program that are used
-to initialize variables. Users should not use this class
-directly, but need to use one of its implementations.</p>
-</dd></dl>
-
 </div>
 <div class="section" id="constantinitializer">
 <h2>ConstantInitializer<a class="headerlink" href="#constantinitializer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">ConstantInitializer</code><span class="sig-paren">(</span><em>value=0.0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the constant initializer</p>
-</dd></dl>
-
 </div>
 <div class="section" id="uniforminitializer">
 <h2>UniformInitializer<a class="headerlink" href="#uniforminitializer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">UniformInitializer</code><span class="sig-paren">(</span><em>low=-1.0</em>, <em>high=1.0</em>, <em>seed=0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the random uniform distribution initializer</p>
-</dd></dl>
-
 </div>
 <div class="section" id="normalinitializer">
 <h2>NormalInitializer<a class="headerlink" href="#normalinitializer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">NormalInitializer</code><span class="sig-paren">(</span><em>loc=0.0</em>, <em>scale=1.0</em>, <em>seed=0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the  random Normal(Gaussian) distribution initializer</p>
-</dd></dl>
-
 </div>
 <div class="section" id="xavierinitializer">
 <h2>XavierInitializer<a class="headerlink" href="#xavierinitializer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">XavierInitializer</code><span class="sig-paren">(</span><em>uniform=True</em>, <em>fan_in=None</em>, <em>fan_out=None</em>, <em>seed=0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the Xavier initializer</p>
-<p>This class implements the Xavier weight initializer from the paper
-Understanding the difficulty of training deep feedforward neural
-networks[1] by Xavier Glorot and Yoshua Bengio.</p>
-<p>This initializer is designed to keep the scale of the gradients
-approximately same in all the layers. In case of Uniform distribution,
-the range is [-x, x], where x = sqrt(6 / (fan_in + fan_out)).
-In case of Normal distribution, the mean is 0 and the standard deviation
-is sqrt(2/ (fan_in + fan_out)).</p>
-<p class="rubric">References</p>
-<dl class="docutils">
-<dt>[1] Understanding the difficulty of training deep feedforward neural</dt>
-<dd>networks. International conference on artificial intelligence and
-statistics.
-(<a class="reference external" href="http://proceedings.mlr.press/v9/glorot10a.html">http://proceedings.mlr.press/v9/glorot10a.html</a>)</dd>
-</dl>
-</dd></dl>
-
 </div>
 <div class="section" id="msrainitializer">
 <h2>MSRAInitializer<a class="headerlink" href="#msrainitializer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.initializer.</code><code class="descname">MSRAInitializer</code><span class="sig-paren">(</span><em>uniform=True</em>, <em>fan_in=None</em>, <em>seed=0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the MSRA initializer a.k.a. Kaiming Initializer</p>
-<p>This class implements the weight initialization from the paper
-Delving Deep into Rectifiers: Surpassing Human-Level Performance on
-ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren
-and Jian Sun. This is a robust initialization method that particularly
-considers the rectifier nonlinearities. In case of Uniform distribution,
-the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal
-distribution, the mean is 0 and the standard deviation
-is sqrt(2/ fan_in).</p>
-<p class="rubric">References</p>
-<dl class="docutils">
-<dt>[1] Delving Deep into Rectifiers: Surpassing Human-Level Performance</dt>
-<dd>on ImageNet Classification
-(<a class="reference external" href="https://arxiv.org/abs/1502.01852">https://arxiv.org/abs/1502.01852</a>)</dd>
-</dl>
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc_cn/api/v2/fluid/io.html
+++ b/develop/doc_cn/api/v2/fluid/io.html
@@ -239,23 +239,6 @@
 <h1>IO<a class="headerlink" href="#io" title="永久链接至标题">¶</a></h1>
 <div class="section" id="is-parameter">
 <h2>is_parameter<a class="headerlink" href="#is-parameter" title="永久链接至标题">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.io.</code><code class="descname">is_parameter</code><span class="sig-paren">(</span><em>var</em><span class="sig-paren">)</span></dt>
-<dd><p>Check whether the variable is a Parameter.</p>
-<p>This function checks whether the input variable is a Parameter.</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">参数:</th><td class="field-body"><strong>var</strong> &#8211; The input variable.</td>
-</tr>
-<tr class="field-even field"><th class="field-name">返回:</th><td class="field-body">boolean result whether the variable is a Parameter.</td>
-</tr>
-</tbody>
-</table>
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc_cn/api/v2/fluid/layers.html
+++ b/develop/doc_cn/api/v2/fluid/layers.html
--- a/develop/doc_cn/api/v2/fluid/nets.html
+++ b/develop/doc_cn/api/v2/fluid/nets.html
@@ -239,118 +239,18 @@
 <h1>Nets<a class="headerlink" href="#nets" title="永久链接至标题">¶</a></h1>
 <div class="section" id="simple-img-conv-pool">
 <h2>simple_img_conv_pool<a class="headerlink" href="#simple-img-conv-pool" title="永久链接至标题">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.nets.</code><code class="descname">simple_img_conv_pool</code><span class="sig-paren">(</span><em>input</em>, <em>num_filters</em>, <em>filter_size</em>, <em>pool_size</em>, <em>pool_stride</em>, <em>act</em>, <em>param_attr=None</em>, <em>pool_type='max'</em>, <em>use_cudnn=True</em><span class="sig-paren">)</span></dt>
-<dd></dd></dl>
-
 </div>
 <div class="section" id="img-conv-group">
 <h2>img_conv_group<a class="headerlink" href="#img-conv-group" title="永久链接至标题">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.nets.</code><code class="descname">img_conv_group</code><span class="sig-paren">(</span><em>input</em>, <em>conv_num_filter</em>, <em>pool_size</em>, <em>conv_padding=1</em>, <em>conv_filter_size=3</em>, <em>conv_act=None</em>, <em>param_attr=None</em>, <em>conv_with_batchnorm=False</em>, <em>conv_batchnorm_drop_rate=None</em>, <em>pool_stride=1</em>, <em>pool_type=None</em>, <em>use_cudnn=True</em><span class="sig-paren">)</span></dt>
-<dd><p>Image Convolution Group, Used for vgg net.</p>
-</dd></dl>
-
 </div>
 <div class="section" id="sequence-conv-pool">
 <h2>sequence_conv_pool<a class="headerlink" href="#sequence-conv-pool" title="永久链接至标题">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.nets.</code><code class="descname">sequence_conv_pool</code><span class="sig-paren">(</span><em>input</em>, <em>num_filters</em>, <em>filter_size</em>, <em>param_attr=None</em>, <em>act='sigmoid'</em>, <em>pool_type='max'</em><span class="sig-paren">)</span></dt>
-<dd></dd></dl>
-
 </div>
 <div class="section" id="glu">
 <h2>glu<a class="headerlink" href="#glu" title="永久链接至标题">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.nets.</code><code class="descname">glu</code><span class="sig-paren">(</span><em>input</em>, <em>dim=-1</em><span class="sig-paren">)</span></dt>
-<dd><p>The gated linear unit composed by split, sigmoid activation and elementwise
-multiplication. Specifically, Split the input into two equal sized parts
-<span class="math">\(a\)</span> and <span class="math">\(b\)</span> along the given dimension and then compute as
-following:</p>
-<blockquote>
-<div><div class="math">
-\[{GLU}(a, b)= a \otimes \sigma(b)\]</div>
-</div></blockquote>
-<p>Refer to <a class="reference external" href="https://arxiv.org/pdf/1612.08083.pdf">Language Modeling with Gated Convolutional Networks</a>.</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">参数:</th><td class="field-body"><ul class="first simple">
-<li><strong>input</strong> (<em>Variable</em>) &#8211; The input variable which is a Tensor or LoDTensor.</li>
-<li><strong>dim</strong> (<em>int</em>) &#8211; The dimension along which to split. If <span class="math">\(dim &lt; 0\)</span>, the
-dimension to split along is <span class="math">\(rank(input) + dim\)</span>.</li>
-</ul>
-</td>
-</tr>
-<tr class="field-even field"><th class="field-name">返回:</th><td class="field-body"><p class="first">The Tensor variable with half the size of input.</p>
-</td>
-</tr>
-<tr class="field-odd field"><th class="field-name">返回类型:</th><td class="field-body"><p class="first last">Variable</p>
-</td>
-</tr>
-</tbody>
-</table>
-<p class="rubric">Examples</p>
-<div class="highlight-python"><div class="highlight"><pre><span></span><span class="c1"># x is a Tensor variable with shape [3, 6, 9]</span>
-<span class="n">fluid</span><span class="o">.</span><span class="n">nets</span><span class="o">.</span><span class="n">glu</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">x</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>  <span class="c1"># shape of output: [3, 3, 9]</span>
-</pre></div>
-</div>
-</dd></dl>
-
 </div>
 <div class="section" id="dot-product-attention">
 <h2>dot_product_attention<a class="headerlink" href="#dot-product-attention" title="永久链接至标题">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.nets.</code><code class="descname">dot_product_attention</code><span class="sig-paren">(</span><em>querys</em>, <em>keys</em>, <em>values</em><span class="sig-paren">)</span></dt>
-<dd><p>The dot-product attention.</p>
-<p>Attention mechanism can be seen as mapping a query and a set of key-value
-pairs to an output. The output is computed as a weighted sum of the values,
-where the weight assigned to each value is computed by a compatibility
-function (dot-product here) of the query with the corresponding key.</p>
-<p>The dot-product attention can be implemented through (batch) matrix
-multipication as follows:</p>
-<blockquote>
-<div><div class="math">
-\[Attention(Q, K, V)= softmax(QK^\mathrm{T})V\]</div>
-</div></blockquote>
-<p>Refer to <a class="reference external" href="https://arxiv.org/pdf/1706.03762.pdf">Attention Is All You Need</a>.</p>
-<p>Note that batch data containing sequences with different lengths is not
-supported by this because of the (batch) matrix multipication.</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">参数:</th><td class="field-body"><ul class="first simple">
-<li><strong>query</strong> (<em>Variable</em>) &#8211; The input variable which is a Tensor or LoDTensor.</li>
-<li><strong>key</strong> (<em>Variable</em>) &#8211; The input variable which is a Tensor or LoDTensor.</li>
-<li><strong>value</strong> (<em>Variable</em>) &#8211; The input variable which is a Tensor or LoDTensor.</li>
-</ul>
-</td>
-</tr>
-<tr class="field-even field"><th class="field-name">返回:</th><td class="field-body"><p class="first">The Tensor variables representing the output and attention scores.</p>
-</td>
-</tr>
-<tr class="field-odd field"><th class="field-name">返回类型:</th><td class="field-body"><p class="first last">tuple</p>
-</td>
-</tr>
-</tbody>
-</table>
-<p class="rubric">Examples</p>
-<div class="highlight-python"><div class="highlight"><pre><span></span><span class="c1"># Suppose q, k, v are tensor variables with the following shape:</span>
-<span class="c1"># q: [3, 5, 9], k: [3, 6, 9], v: [3, 6, 10]</span>
-<span class="n">out</span><span class="p">,</span> <span class="n">attn_scores</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">nets</span><span class="o">.</span><span class="n">dot_product_attention</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span>
-<span class="n">out</span><span class="o">.</span><span class="n">shape</span>  <span class="c1"># [3, 5, 10]</span>
-<span class="n">attn_scores</span><span class="o">.</span><span class="n">shape</span>  <span class="c1"># [3, 5, 6]</span>
-</pre></div>
-</div>
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc_cn/api/v2/fluid/optimizer.html
+++ b/develop/doc_cn/api/v2/fluid/optimizer.html
@@ -239,105 +239,24 @@
 <h1>Optimizer<a class="headerlink" href="#optimizer" title="永久链接至标题">¶</a></h1>
 <div class="section" id="id1">
 <h2>Optimizer<a class="headerlink" href="#id1" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">Optimizer</code><span class="sig-paren">(</span><em>global_step=None</em>, <em>regularization=None</em><span class="sig-paren">)</span></dt>
-<dd><p>Optimizer Base class.</p>
-<p>Define the common interface of an optimizer.
-User should not use this class directly,
-but need to use one of it&#8217;s implementation.</p>
-<dl class="method">
-<dt>
-<code class="descname">create_optimization_pass</code><span class="sig-paren">(</span><em>parameters_and_grads</em>, <em>loss</em>, <em>startup_program=None</em><span class="sig-paren">)</span></dt>
-<dd><p>Add optimization operators to update gradients to variables.</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">参数:</th><td class="field-body"><ul class="first simple">
-<li><strong>loss</strong> &#8211; the target that this optimization is for.</li>
-<li><strong>parameters_and_grads</strong> &#8211; a list of (variable, gradient) pair to update.</li>
-</ul>
-</td>
-</tr>
-<tr class="field-even field"><th class="field-name">返回:</th><td class="field-body"><p class="first">a list of operators that will complete one step of
-optimization. This will include parameter update ops, global step
-update ops and any other custom ops required by subclasses to manage
-their internal state.
-:param startup_program:</p>
-</td>
-</tr>
-<tr class="field-odd field"><th class="field-name">返回类型:</th><td class="field-body"><p class="first last">return_op_list</p>
-</td>
-</tr>
-</tbody>
-</table>
-</dd></dl>
-
-<dl class="method">
-<dt>
-<code class="descname">minimize</code><span class="sig-paren">(</span><em>loss</em>, <em>startup_program=None</em>, <em>parameter_list=None</em>, <em>no_grad_set=None</em><span class="sig-paren">)</span></dt>
-<dd><p>Add operations to minimize <cite>loss</cite> by updating <cite>parameter_list</cite>.</p>
-<p>This method combines interface <cite>append_backward()</cite> and
-<cite>create_optimization_pass()</cite> into one.</p>
-</dd></dl>
-
-</dd></dl>
-
 </div>
 <div class="section" id="sgdoptimizer">
 <h2>SGDOptimizer<a class="headerlink" href="#sgdoptimizer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">SGDOptimizer</code><span class="sig-paren">(</span><em>learning_rate</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Simple SGD optimizer without any state.</p>
-</dd></dl>
-
 </div>
 <div class="section" id="momentumoptimizer">
 <h2>MomentumOptimizer<a class="headerlink" href="#momentumoptimizer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">MomentumOptimizer</code><span class="sig-paren">(</span><em>learning_rate</em>, <em>momentum</em>, <em>use_nesterov=False</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Simple Momentum optimizer with velocity state</p>
-</dd></dl>
-
 </div>
 <div class="section" id="adagradoptimizer">
 <h2>AdagradOptimizer<a class="headerlink" href="#adagradoptimizer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">AdagradOptimizer</code><span class="sig-paren">(</span><em>learning_rate</em>, <em>epsilon=1e-06</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Simple Adagrad optimizer with moment state</p>
-</dd></dl>
-
 </div>
 <div class="section" id="adamoptimizer">
 <h2>AdamOptimizer<a class="headerlink" href="#adamoptimizer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">AdamOptimizer</code><span class="sig-paren">(</span><em>learning_rate=0.001</em>, <em>beta1=0.9</em>, <em>beta2=0.999</em>, <em>epsilon=1e-08</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the Adam Optimizer</p>
-</dd></dl>
-
 </div>
 <div class="section" id="adamaxoptimizer">
 <h2>AdamaxOptimizer<a class="headerlink" href="#adamaxoptimizer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">AdamaxOptimizer</code><span class="sig-paren">(</span><em>learning_rate=0.001</em>, <em>beta1=0.9</em>, <em>beta2=0.999</em>, <em>epsilon=1e-08</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the Adamax Optimizer</p>
-</dd></dl>
-
 </div>
 <div class="section" id="decayedadagradoptimizer">
 <h2>DecayedAdagradOptimizer<a class="headerlink" href="#decayedadagradoptimizer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.optimizer.</code><code class="descname">DecayedAdagradOptimizer</code><span class="sig-paren">(</span><em>learning_rate</em>, <em>decay=0.95</em>, <em>epsilon=1e-06</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt>
-<dd><p>Simple Decayed Adagrad optimizer with moment state</p>
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc_cn/api/v2/fluid/profiler.html
+++ b/develop/doc_cn/api/v2/fluid/profiler.html
@@ -239,35 +239,6 @@
 <h1>Profiler<a class="headerlink" href="#profiler" title="永久链接至标题">¶</a></h1>
 <div class="section" id="id1">
 <h2>Profiler<a class="headerlink" href="#id1" title="永久链接至标题">¶</a></h2>
-<dl class="function">
-<dt>
-<code class="descclassname">paddle.v2.fluid.profiler.</code><code class="descname">cuda_profiler</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwds</em><span class="sig-paren">)</span></dt>
-<dd><p>The CUDA profiler.
-This fuctions is used to profile CUDA program by CUDA runtime application
-programming interface. The profiling result will be written into
-<cite>output_file</cite> with Key-Value pair format or Comma separated values format.
-The user can set the output mode by <cite>output_mode</cite> argument and set the
-counters/options for profiling by <cite>config</cite> argument. The default config
-is [&#8216;gpustarttimestamp&#8217;, &#8216;gpustarttimestamp&#8217;, &#8216;gridsize3d&#8217;,
-&#8216;threadblocksize&#8217;, &#8216;streamid&#8217;, &#8216;enableonstart 0&#8217;, &#8216;conckerneltrace&#8217;].</p>
-<table class="docutils field-list" frame="void" rules="none">
-<col class="field-name" />
-<col class="field-body" />
-<tbody valign="top">
-<tr class="field-odd field"><th class="field-name">参数:</th><td class="field-body"><ul class="first last simple">
-<li><strong>output_file</strong> (<em>string</em>) &#8211; The output file name, the result will be
-written into this file.</li>
-<li><strong>output_mode</strong> (<em>string</em>) &#8211; The output mode has Key-Value pair format and
-Comma separated values format. It should be &#8216;kvp&#8217; or &#8216;csv&#8217;.</li>
-<li><strong>config</strong> (<em>list of string</em>) &#8211; The profiler options and counters can refer
-to &#8220;Compute Command Line Profiler User Guide&#8221;.</li>
-</ul>
-</td>
-</tr>
-</tbody>
-</table>
-</dd></dl>
-
 </div>
 </div>


--- a/develop/doc_cn/api/v2/fluid/regularizer.html
+++ b/develop/doc_cn/api/v2/fluid/regularizer.html
@@ -239,36 +239,12 @@
 <h1>Regularizer<a class="headerlink" href="#regularizer" title="永久链接至标题">¶</a></h1>
 <div class="section" id="weightdecayregularizer">
 <h2>WeightDecayRegularizer<a class="headerlink" href="#weightdecayregularizer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.regularizer.</code><code class="descname">WeightDecayRegularizer</code></dt>
-<dd><p>Base class for weight decay regularizers</p>
-<p>Defines the common interface of weight-decay regularizers.
-Weight-decay regularizers are added only during the backward
-pass for faster regularization. They add operations to the network
-that correspond to gradient of the regularization function.
-Users should not use this class directly, but need to use one
-of its implementations</p>
-</dd></dl>
-
 </div>
 <div class="section" id="l2decayregularizer">
 <h2>L2DecayRegularizer<a class="headerlink" href="#l2decayregularizer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt>
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.regularizer.</code><code class="descname">L2DecayRegularizer</code><span class="sig-paren">(</span><em>regularization_coeff=0.0</em><span class="sig-paren">)</span></dt>
-<dd><p>Implements the L2 Weight Decay Regularization</p>
-</dd></dl>
-
 </div>
-<div class="section" id="module-paddle.v2.fluid.regularizer">
-<span id="l1decayregularizer"></span><h2>L1DecayRegularizer<a class="headerlink" href="#module-paddle.v2.fluid.regularizer" title="永久链接至标题">¶</a></h2>
-<dl class="class">
-<dt id="paddle.v2.fluid.regularizer.L1DecayRegularizer">
-<em class="property">class </em><code class="descclassname">paddle.v2.fluid.regularizer.</code><code class="descname">L1DecayRegularizer</code><span class="sig-paren">(</span><em>regularization_coeff=0.0</em><span class="sig-paren">)</span><a class="headerlink" href="#paddle.v2.fluid.regularizer.L1DecayRegularizer" title="永久链接至目标">¶</a></dt>
-<dd><p>Implements the L1 Weight Decay Regularization</p>
-</dd></dl>
-
+<div class="section" id="l1decayregularizer">
+<h2>L1DecayRegularizer<a class="headerlink" href="#l1decayregularizer" title="永久链接至标题">¶</a></h2>
 </div>
 </div>


--- a/develop/doc_cn/genindex.html
+++ b/develop/doc_cn/genindex.html
@@ -236,7 +236,6 @@
 <a href="#B"><strong>B</strong></a>
 | <a href="#C"><strong>C</strong></a>
 | <a href="#L"><strong>L</strong></a>
- | <a href="#M"><strong>M</strong></a>
 | <a href="#P"><strong>P</strong></a>
 | <a href="#R"><strong>R</strong></a>
 | <a href="#S"><strong>S</strong></a>
@@ -262,14 +261,12 @@
 <h2 id="L">L</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="api/v2/fluid/regularizer.html#paddle.v2.fluid.regularizer.L1DecayRegularizer">L1DecayRegularizer (paddle.v2.fluid.regularizer 中的类)</a>
-</li>
      <li><a href="api/v2/data/image.html#paddle.v2.image.left_right_flip">left_right_flip() (在 paddle.v2.image 模块中)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="api/v2/data/image.html#paddle.v2.image.load_and_transform">load_and_transform() (在 paddle.v2.image 模块中)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="api/v2/data/image.html#paddle.v2.image.load_image">load_image() (在 paddle.v2.image 模块中)</a>
 </li>
      <li><a href="api/v2/data/image.html#paddle.v2.image.load_image_bytes">load_image_bytes() (在 paddle.v2.image 模块中)</a>
@@ -277,20 +274,8 @@
  </ul></td>
 </tr></table>

-<h2 id="M">M</h2>
-<table style="width: 100%" class="indextable genindextable"><tr>
-  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="api/v2/fluid/evaluator.html#paddle.v2.fluid.evaluator.Evaluator.metrics">metrics (paddle.v2.fluid.evaluator.Evaluator 属性)</a>
-</li>
-  </ul></td>
-</tr></table>
-
 <h2 id="P">P</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
-  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="api/v2/fluid/regularizer.html#module-paddle.v2.fluid.regularizer">paddle.v2.fluid.regularizer (模块)</a>
-</li>
-  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="api/v2/data/image.html#module-paddle.v2.image">paddle.v2.image (模块)</a>
 </li>
@@ -313,10 +298,6 @@
 <table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="api/v2/data/image.html#paddle.v2.image.simple_transform">simple_transform() (在 paddle.v2.image 模块中)</a>
-</li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="api/v2/fluid/evaluator.html#paddle.v2.fluid.evaluator.Evaluator.states">states (paddle.v2.fluid.evaluator.Evaluator 属性)</a>
 </li>
  </ul></td>
 </tr></table>

--- a/develop/doc_cn/objects.inv
+++ b/develop/doc_cn/objects.inv
--- a/develop/doc_cn/py-modindex.html
+++ b/develop/doc_cn/py-modindex.html
@@ -248,11 +248,6 @@
       <td>
       <code class="xref">paddle</code></td><td>
       <em></em></td></tr>
-     <tr class="cg-1">
-       <td></td>
-       <td>&#160;&#160;&#160;
-       <a href="api/v2/fluid/regularizer.html#module-paddle.v2.fluid.regularizer"><code class="xref">paddle.v2.fluid.regularizer</code></a></td><td>
-       <em></em></td></tr>
     <tr class="cg-1">
       <td></td>
       <td>&#160;&#160;&#160;

--- a/develop/doc_cn/searchindex.js
+++ b/develop/doc_cn/searchindex.js