From 597dd92e71647fd608a8d40877bca8c0673b5037 Mon Sep 17 00:00:00 2001
From: minqiyang <minqiyang@baidu.com>
Date: Sun, 28 Oct 2018 20:38:58 +0800
Subject: [PATCH] Polish the doc of hash op

test=develop
---
 python/paddle/fluid/layers/nn.py | 62 ++++++++++++++++++++++++++------
 1 file changed, 51 insertions(+), 11 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 99f1a9111..3aaea684c 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -7499,19 +7499,59 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None):
 
 def hash(input, hash_size, num_hash=1, name=None):
     """
-    hash the input
-     Args:
-        input (Variable): The input variable which is a one-hot word.
-        hash_size (int): The space size for hash algorithm.
+    Hash the input to an integer whose value is less than the given hash size.
+
+    The hash algorithm was implemented in here:
+    https://github.com/Cyan4973/xxHash/tree/v0.6.5
+
+    A simple example as below:
+
+    .. code-block:: text
+
+        Given:
+
+        # shape [2, 2]
+        input.data = [
+            [[1], [2]],
+            [[3], [4]],
+        ]
+
+        input.lod = [[0, 2]]
+
+        hash_size = 10000
+
+        num_hash = 4
+
+        Then:
+
+        Hash op will take all number in input's 2nd dimension as hash algorithm's
+        input for each time. Each input will be hashed for 4 times, and get an
+        array whose length is 4. Each value in the array ranges from 0 to 9999.
+
+        # shape [2, 4]
+        output.data = [
+            [[9662], [9217], [1129], [8487]],
+            [[8310], [1327], [1654], [4567]],
+        ]
+
+        output.lod = [[0, 2]]
+
+    Args:
+        input (Variable): The input variable which is a one-hot word. The
+            dimensions of the input variable must be 2.
+        hash_size (int): The space size for hash algorithm. The output value
+            will keep in the range:math:`[0, hash_size - 1]`.
         num_hash (int): The times of hash, default 1.
         name (str, default None): The name of this layer.
-     Returns:
-        Variable: The hash result variable which is a LoDTensor.
-     Examples:
-        .. code-block:: python
-            word_dict = paddle.dataset.imdb.word_dict()
-            x = fluid.layers.data(shape[1], dtype='int32', lod_level=1)
-            out = fluid.layers.hash(input=x, len(word_dict))
+
+    Returns:
+       Variable: The hash result variable which is a LoDTensor.
+
+    Examples:
+       .. code-block:: python
+           word_dict = paddle.dataset.imdb.word_dict()
+           x = fluid.layers.data(shape[1], dtype='int32', lod_level=1)
+           out = fluid.layers.hash(input=x, num_hash=4, hash_size=1000)
     """
     helper = LayerHelper('hash', **locals())
     out = helper.create_variable_for_type_inference(
-- 
GitLab