From d499f9489992cccf779441e5e7ca899b412931b2 Mon Sep 17 00:00:00 2001
From: Aurelius84 <liujiezhangbupt@gmail.com>
Date: Tue, 24 Sep 2019 16:42:57 +0800
Subject: [PATCH] add fluid.embedding api zh doc (#1184)

* add fluid.embedding cn doc test=document_preview

* fix example code test=document_preview

* refine ref link

* fix param True style

* add fluid.one_hot zh doc

* refine code style

* fix dtype description

* add one_hot_cn.rst in fluid_cn

* add default value

* support LoDTensor

* modify embedding doc

* refine param_attr

* rm warning

* fix data

* add padding data update doc

* add np.dtype
---
 doc/fluid/api_cn/fluid_cn.rst              |  2 +
 doc/fluid/api_cn/fluid_cn/embedding_cn.rst | 81 +++++++++++++++++++++
 doc/fluid/api_cn/fluid_cn/one_hot_cn.rst   | 84 ++++++++++++++++++++++
 3 files changed, 167 insertions(+)
 create mode 100644 doc/fluid/api_cn/fluid_cn/embedding_cn.rst
 create mode 100644 doc/fluid/api_cn/fluid_cn/one_hot_cn.rst

diff --git a/doc/fluid/api_cn/fluid_cn.rst b/doc/fluid/api_cn/fluid_cn.rst
index c4e8d5d4e..1e266d34a 100644
--- a/doc/fluid/api_cn/fluid_cn.rst
+++ b/doc/fluid/api_cn/fluid_cn.rst
@@ -24,6 +24,7 @@ fluid
     fluid_cn/default_startup_program_cn.rst
     fluid_cn/DistributeTranspiler_cn.rst
     fluid_cn/DistributeTranspilerConfig_cn.rst
+    fluid_cn/embedding_cn.rst
     fluid_cn/ExecutionStrategy_cn.rst
     fluid_cn/Executor_cn.rst
     fluid_cn/global_scope_cn.rst
@@ -33,6 +34,7 @@ fluid
     fluid_cn/LoDTensorArray_cn.rst
     fluid_cn/memory_optimize_cn.rst
     fluid_cn/name_scope_cn.rst
+    fluid_cn/one_hot_cn.rst
     fluid_cn/ParallelExecutor_cn.rst
     fluid_cn/ParamAttr_cn.rst
     fluid_cn/Program_cn.rst
diff --git a/doc/fluid/api_cn/fluid_cn/embedding_cn.rst b/doc/fluid/api_cn/fluid_cn/embedding_cn.rst
new file mode 100644
index 000000000..bee1a40e2
--- /dev/null
+++ b/doc/fluid/api_cn/fluid_cn/embedding_cn.rst
@@ -0,0 +1,81 @@
+.. _cn_api_fluid_embedding:
+
+embedding
+-------------------------------
+
+.. py:function:: paddle.fluid.embedding(input, size, is_sparse=False, is_distributed=False, padding_idx=None, param_attr=None, dtype='float32')
+
+该OP根据input中的id信息从embedding矩阵中查询对应embedding信息，函数会根据输入的size (vocab_size, emb_size)和dtype自动构造一个二维embedding矩阵。
+
+输出的Tensor的shape是在输入Tensor shape的最后一维后面添加了emb_size的维度。
+
+注：input中的id必须满足 ``0 =< id < size[0]``，否则程序会抛异常退出。
+
+
+::
+
+    Case 1:
+
+    input是Tensor, 且padding_idx = -1
+        input.data = [[1, 3], [2, 4], [4, 127]]
+        input.shape = [3, 2]
+    若size = [128, 16]
+    输出为Tensor:
+        out.shape = [3, 2, 16]
+        out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
+                     [0.345421456, 0.524563927, ..., 0.144534654]],
+
+                    [[0.345249859, 0.124939536, ..., 0.194353745],
+                     [0.945345345, 0.435394634, ..., 0.435345365]],
+
+                    [[0.945345345, 0.435394634, ..., 0.435345365],
+                     [0.0,         0.0,         ..., 0.0        ]]]  # padding data
+    输入的padding_idx小于0，则自动转换为padding_idx = -1 + 128 = 127, 对于输入id为127的词，进行padding处理。
+    
+    Case 2:
+
+    input是lod level 为1的LoDTensor, 且padding_idx = 0
+        input.lod = [[2, 3]]
+        input.data = [[1], [3], [2], [4], [0]]
+        input.shape = [5, 1]
+    若size = [128, 16]
+    输出为LoDTensor:
+        out.lod = [[2, 3]]
+        out.shape = [5, 1, 16]
+        out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]],
+                    [[0.345421456, 0.524563927, ..., 0.144534654]],
+                    [[0.345249859, 0.124939536, ..., 0.194353745]],
+                    [[0.945345345, 0.435394634, ..., 0.435345365]],
+                    [[0.0,         0.0,         ..., 0.0        ]]]  # padding data
+    输入的padding_idx = 0，则对于输入id为0的词，进行padding处理。
+
+
+参数：
+    - **input** (Variable) - 存储id信息，数据类型必须为：int64。
+    - **size** (tuple|list) - embedding矩阵的维度。必须包含两个元素，第一个元素为vocab_size(词表大小), 第二个为emb_size（embedding 层维度）。
+    - **is_sparse** (bool) - 是否使用稀疏的更新方式，这个参数只会影响反向的梯度更新的性能，sparse更新速度更快。但某些optimizer不支持sparse更新，比如Adadelta，此时is_sparse必须为False。默认为False。
+    - **is_distributed** (bool) - 是否使用分布式的方式存储embedding矩阵，仅在多机分布式cpu训练中使用。默认为False。
+    - **padding_idx** (int|long|None) - padding_idx需在区间[-vocab_size, vocab_size)，否则不生效，padding_idx<0时，padding_idx 会被改成 vocab_size + padding_idx，input中等于padding_index的id对应的embedding信息会被设置为0，且这部分填充数据在训练时将不会被更新。如果为none，不作处理，默认为None。
+    - **param_attr** (ParamAttr) - 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **dtype** (str|np.dtype|core.VarDesc.VarType) - 输出Tensor或LoDTensor的数据类型，数据类型必须为：float32，float64，默认为float32。
+
+返回：input映射后embedding Tensor或LoDTensor，数据类型和dtype定义的类型一致。
+
+返回类型：Variable
+
+**代码示例**:
+
+.. code-block:: python
+
+    import paddle.fluid as fluid
+    data = fluid.layers.data(name='sequence', shape=[1], dtype='int64', lod_level=1)
+    emb = fluid.embedding(input=data, size=[128, 64])
+
+
+
+
+
+
+
+
+
diff --git a/doc/fluid/api_cn/fluid_cn/one_hot_cn.rst b/doc/fluid/api_cn/fluid_cn/one_hot_cn.rst
new file mode 100644
index 000000000..a090c8466
--- /dev/null
+++ b/doc/fluid/api_cn/fluid_cn/one_hot_cn.rst
@@ -0,0 +1,84 @@
+.. _cn_api_fluid_one_hot:
+
+one_hot
+-------------------------------
+
+.. py:function:: paddle.fluid.one_hot(input, depth, allow_out_of_range=False)
+
+该OP将输入（input）中的每个词id转换为一个one-hot向量，其长度为字典大小（depth），该词id对应的向量维度上的值为1，其余维度的值为0。
+
+输出的Tensor（或LoDTensor）的shape是在输入shape的最后一维后面添加了depth的维度。
+
+- 示例1（allow_out_of_range=False）：
+
+.. code-block:: python
+
+  输入：
+    X.shape = [4]
+    X.data = [1, 1, 3, 0]
+    depth = 4
+
+  输出：
+    Out.shape = [4, 4]
+    Out.data = [[0., 1., 0., 0.],
+                [0., 1., 0., 0.],
+                [0., 0., 0., 1.],
+                [1., 0., 0., 0.]]
+
+- 示例2 （allow_out_of_range=True）：
+
+.. code-block:: python
+
+  输入：
+    X.shape = [4]
+    X.data = [1, 1, 5, 0]
+    depth = 4
+    allow_out_of_range=True
+
+  输出：
+    Out.shape = [4, 4]
+    Out.data = [[0., 1., 0., 0.],
+                [0., 1., 0., 0.], 
+                [0., 0., 0., 0.], ## 这一维的值是5，超过了depth，因此填成0
+                [1., 0., 0., 0.]]
+
+- 示例3 （allow_out_of_range=False）：
+
+.. code-block:: python
+  
+  输入：
+    X.shape = [4]
+    X.data = [1, 1, 5, 0]
+    depth = 4
+    allow_out_of_range=False
+
+  输出：抛出 Illegal value 的异常
+    X中第2维的值是5，超过了depth，而allow_out_of_range=False表示不允许超过，因此抛异常。  
+
+
+参数：
+    - **input** (Variable) - 维度为 :math:`[N_1, ..., N_n]` 的多维Tensor或LoDTensor，维度至少1维。数据类型为int32或int64。
+    - **depth** (int) - 字典大小
+    - **allow_out_of_range** (bool) - 指明input中所包含的id值是否可以大于depth值。当超过depth时，如果 `allow_out_of_range` 为False，则会抛出 `Illegal value` 的异常；如果设置为True，该id对应的向量为0向量。默认值为False。
+
+返回：转换后的one_hot Tensor或LoDTensor，数据类型为float32。
+
+返回类型：Variable
+
+**代码示例**：
+
+.. code-block:: python
+
+    import paddle.fluid as fluid
+    # 该代码对应上述第一个示例，其中输入label的shape是[4]，输出one_hot_label的shape是[4, 4]
+    label = fluid.layers.data(name="label", shape=[4], append_batch_size=False, dtype="int64")
+    one_hot_label = fluid.one_hot(input=label, depth=4)
+
+
+
+
+
+
+
+
+
-- 
GitLab