diff --git a/paddle/fluid/operators/index_sample_op.cc b/paddle/fluid/operators/index_sample_op.cc
index 139954b0e5e8b71a26a7d87df9ed30ba4fa39ada..2d97797cfec21ed50f0999fa13f8bb1ae9618b71 100644
--- a/paddle/fluid/operators/index_sample_op.cc
+++ b/paddle/fluid/operators/index_sample_op.cc
@@ -142,13 +142,14 @@ REGISTER_OPERATOR(index_sample, ops::IndexSampleOp, ops::IndexSampleOpMaker,
 REGISTER_OPERATOR(index_sample_grad, ops::IndexSampleGradOp,
                   ops::IndexSampleGradNoNeedBufferVarInferer);
 REGISTER_OP_CPU_KERNEL(
-    index_sample, ops::IndexSampleKernel<paddle::platform::CPUPlace, float>,
-    ops::IndexSampleKernel<paddle::platform::CPUPlace, double>,
-    ops::IndexSampleKernel<paddle::platform::CPUPlace, int>,
-    ops::IndexSampleKernel<paddle::platform::CPUPlace, int64_t>);
+    index_sample,
+    ops::IndexSampleKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::IndexSampleKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::IndexSampleKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::IndexSampleKernel<paddle::platform::CPUDeviceContext, int64_t>);
 REGISTER_OP_CPU_KERNEL(
     index_sample_grad,
-    ops::IndexSampleGradKernel<paddle::platform::CPUPlace, float>,
-    ops::IndexSampleGradKernel<paddle::platform::CPUPlace, double>,
-    ops::IndexSampleGradKernel<paddle::platform::CPUPlace, int>,
-    ops::IndexSampleGradKernel<paddle::platform::CPUPlace, int64_t>);
+    ops::IndexSampleGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::IndexSampleGradKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::IndexSampleGradKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::IndexSampleGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
diff --git a/paddle/fluid/operators/index_sample_op.cu b/paddle/fluid/operators/index_sample_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..1dc7a128edc472f277caad7b3ee842616ad17501
--- /dev/null
+++ b/paddle/fluid/operators/index_sample_op.cu
@@ -0,0 +1,29 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/operators/index_sample_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    index_sample,
+    ops::IndexSampleKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::IndexSampleKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::IndexSampleKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::IndexSampleKernel<paddle::platform::CUDADeviceContext, int64_t>);
+REGISTER_OP_CUDA_KERNEL(
+    index_sample_grad,
+    ops::IndexSampleGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::IndexSampleGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::IndexSampleGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::IndexSampleGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
diff --git a/paddle/fluid/operators/index_sample_op.h b/paddle/fluid/operators/index_sample_op.h
index 9618ad5cb67be05ebc709167eb82f29100d737a6..6ba197d5c66f25ad2563d3a01ed71f14df9f2175 100644
--- a/paddle/fluid/operators/index_sample_op.h
+++ b/paddle/fluid/operators/index_sample_op.h
@@ -41,39 +41,41 @@ void IndexSampleInner(const framework::ExecutionContext &context,
   auto value_length = input_dims[1];
   auto index_length = index_dims[1];
   int index_ids_num = index.numel();
-  auto *input_data = input.data<T>();
-  auto *index_data = index.data<IndexT>();
 
-  std::vector<T> res{};
+  std::vector<T> input_vec;
+  std::vector<IndexT> index_vec;
+  TensorToVector(input, context.device_context(), &input_vec);
+  TensorToVector(index, context.device_context(), &index_vec);
+
+  std::vector<T> res(index_ids_num);
   for (int i = 0; i < index_ids_num; i++) {
     int b = floor(i / index_length);
     PADDLE_ENFORCE_GE(
-        index_data[i], 0,
+        index_vec[i], 0,
         platform::errors::InvalidArgument(
             "Variable value (index) of OP(index_sample) "
             "expected >= 0 and < %ld, but got %ld. Please check input "
             "value.",
-            value_length, index_data[i]));
+            value_length, index_vec[i]));
     PADDLE_ENFORCE_LT(
-        index_data[i], value_length,
+        index_vec[i], value_length,
         platform::errors::InvalidArgument(
             "Variable value (index) of OP(index_sample) "
             "expected >= 0 and < %ld, but got %ld. Please check input "
             "value.",
-            value_length, index_data[i]));
+            value_length, index_vec[i]));
 
-    int v_i = b * value_length + static_cast<int>(index_data[i]);
-    T v = input_data[v_i];
+    int v_i = b * value_length + static_cast<int>(index_vec[i]);
+    T v = input_vec[v_i];
     VLOG(4) << "Index Sample: batch = " << b << " index = " << v_i
             << " value = " << v;
-    res.push_back(v);
+    res[i] = v;
   }
 
   auto ddim = framework::make_ddim({batch_size, index_length});
+  output->mutable_data<T>(context.GetPlace());
+  framework::TensorFromVector(res, context.device_context(), output);
   output->Resize(ddim);
-  T *out_data = output->mutable_data<T>(context.GetPlace());
-
-  memcpy(out_data, &res[0], sizeof(T) * index_ids_num);
 }
 
 template <typename DeviceContext, typename T>
@@ -113,39 +115,42 @@ template <typename T, typename IndexT = int>
 void IndexSampleGradInner(const framework::ExecutionContext &context,
                           const LoDTensor &out_grad, const LoDTensor &index,
                           LoDTensor *x_grad) {
+  std::vector<T> out_grad_vec;
+  std::vector<IndexT> index_vec;
+  TensorToVector(out_grad, context.device_context(), &out_grad_vec);
+  TensorToVector(index, context.device_context(), &index_vec);
+
   auto index_dims = index.dims();
   auto x_grad_dims = x_grad->dims();
 
-  int batch_size = x_grad_dims[0];
   auto value_length = x_grad_dims[1];
   auto index_length = index_dims[1];
   int index_ids_num = index.numel();
 
-  T *x_grad_data = x_grad->mutable_data<T>(context.GetPlace());
-  auto *out_grad_data = out_grad.data<T>();
-  auto *index_data = index.data<IndexT>();
-
-  memset(x_grad_data, 0, batch_size * value_length * sizeof(T));
+  std::vector<T> x_grad_vec(x_grad->numel(), 0);
 
   for (int i = 0; i < index_ids_num; i++) {
     int b = floor(i / index_length);
     PADDLE_ENFORCE_GE(
-        index_data[i], 0,
+        index_vec[i], 0,
         platform::errors::InvalidArgument(
             "Variable value (index) of OP(index_sample_grad) "
             "expected >= 0 and < %ld, but got %ld. Please check input "
             "value.",
-            value_length, index_data[i]));
+            value_length, index_vec[i]));
     PADDLE_ENFORCE_LT(
-        index_data[i], value_length,
+        index_vec[i], value_length,
         platform::errors::InvalidArgument(
             "Variable value (index) of OP(index_sample_grad) "
             "expected >= 0 and < %ld, but got %ld. Please check input "
             "value.",
-            value_length, index_data[i]));
-    int v_i = b * value_length + static_cast<int>(index_data[i]);
-    x_grad_data[v_i] += out_grad_data[i];
+            value_length, index_vec[i]));
+    int v_i = b * value_length + static_cast<int>(index_vec[i]);
+    x_grad_vec[v_i] += out_grad_vec[i];
   }
+  x_grad->mutable_data<T>(context.GetPlace());
+  framework::TensorFromVector(x_grad_vec, context.device_context(), x_grad);
+  x_grad->Resize(x_grad_dims);
 }
 
 template <typename DeviceContext, typename T>
diff --git a/python/paddle/fluid/tests/unittests/test_index_sample_op.py b/python/paddle/fluid/tests/unittests/test_index_sample_op.py
index e3fc219cdf5b05729ae9c1e5e269a0e037745f68..bd71ca0c1c9e795a529fb12cab5c12a7478c9ba4 100644
--- a/python/paddle/fluid/tests/unittests/test_index_sample_op.py
+++ b/python/paddle/fluid/tests/unittests/test_index_sample_op.py
@@ -32,6 +32,7 @@ class TestIndexSampleOp(OpTest):
         for i in range(self.index_shape[0]):
             for j in indexnp[i]:
                 index_array.append(xnp[i, j])
+        index_array = np.array(index_array).astype(self.x_type)
         out = np.reshape(index_array, self.index_shape)
         self.outputs = {'Out': out}
 
diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py
index de02c412b9f5cbc2de359a52c3978ff2bea6a449..b27b223a26f2d0989d31417bf2827155e2dcbe85 100644
--- a/python/paddle/tensor/search.py
+++ b/python/paddle/tensor/search.py
@@ -17,7 +17,7 @@ from ..fluid.layer_helper import LayerHelper
 from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype
 from ..fluid import core, layers
 
-# TODO: define searching & indexing functions of a tensor  
+# TODO: define searching & indexing functions of a tensor
 __all__ = [
     'argmax',
     #            'argmin',
@@ -132,7 +132,7 @@ def index_select(input, index, dim=0):
     the entries in `index` which is a Tensor. The returned tensor has the same number 
     of dimensions as the original `input` tensor. The dim-th dimension has the same 
     size as the length of `index`; other dimensions have the same size as in the `input` tensor. 
-        
+
     Args:
         input (Variable): The input tensor variable.
         index (Variable): The 1-D tensor containing the indices to index.
@@ -140,7 +140,7 @@ def index_select(input, index, dim=0):
 
     Returns:
         Variable: A Tensor with same data type as `input`.
-        
+
     Examples:
         .. code-block:: python
             import paddle
@@ -196,7 +196,7 @@ def nonzero(input, as_tuple=False):
     as_tuple is False, we can get a output tensor with shape [z, n], where `z` is the 
     number of all non-zero elements in the `input` tensor. If as_tuple is True, we can get 
     a 1-D tensor tuple of length `n`, and the shape of each 1-D tensor is [z, 1].
-        
+
     Args:
         inputs (Variable): The input tensor variable.
         as_tuple (bool): Return type, Tensor or tuple of Tensor.
@@ -365,13 +365,13 @@ def where(condition, x, y, name=None):
     Return a tensor of elements selected from either $x$ or $y$, depending on $condition$.
 
     .. math::
- 
+
       out_i =
       \\begin{cases}
       x_i, \quad  \\text{if}  \\ condition_i \\  is \\ True \\\\
       y_i, \quad  \\text{if}  \\ condition_i \\  is \\ False \\\\
       \\end{cases}
-  
+
 
     Args:
         condition(Variable): The condition to choose x or y.
@@ -475,21 +475,48 @@ def index_sample(x, index):
             import paddle.fluid as fluid
             import numpy as np
 
-            # create x value
-            x_shape = (2, 5)
-            x_type = "float64"
-            x_np = np.random.random(x_shape).astype(x_type)
-
-            # create index value
-            index_shape = (2, 3)
-            index_type = "int32"
-            index_np = np.random.randint(low=0, 
-                                         high=x_shape[1],
-                                         size=index_shape).astype(index_type)
-
-            x = fluid.data(name='x', shape=[-1, 5], dtype='float64')
-            index = fluid.data(name='index', shape=[-1, 3], dtype='int32')
-            output = paddle.index_sample(x=x, index=index)
+            data = np.array([[1.0, 2.0, 3.0, 4.0],
+                                [5.0, 6.0, 7.0, 8.0],
+                                [9.0, 10.0, 11.0, 12.0]]).astype('float32')
+
+            data_index = np.array([[0, 1, 2],
+                                    [1, 2, 3],
+                                    [0, 0, 0]]).astype('int32')
+
+            target_data = np.array([[100, 200, 300, 400],
+                                    [500, 600, 700, 800],
+                                    [900, 1000, 1100, 1200]]).astype('int32')
+
+            with fluid.dygraph.guard():
+                x = fluid.dygraph.to_variable(data)
+                index = fluid.dygraph.to_variable(data_index)
+                target = fluid.dygraph.to_variable(target_data)
+
+                out_z1 = paddle.index_sample(x, index)
+                print(out_z1.numpy())
+                #[[1. 2. 3.]
+                # [6. 7. 8.]
+                # [9. 9. 9.]]
+
+                # Use the index of the maximum value by topk op
+                # get the value of the element of the corresponding index in other tensors
+                top_value, top_index = fluid.layers.topk(x, k=2)
+                out_z2 = paddle.index_sample(target, top_index)
+                print(top_value.numpy())
+                #[[ 4.  3.]
+                # [ 8.  7.]
+                # [12. 11.]]
+
+                print(top_index.numpy())
+                #[[3 2]
+                # [3 2]
+                # [3 2]]
+
+                print(out_z2.numpy())
+                #[[ 400  300]
+                # [ 800  700]
+                # [1200 1100]]
+
 
     """
     helper = LayerHelper("index_sample", **locals())