diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 7b40a5977a0ab7c5014e0bd1a8dc21d79c65e269..19986c728e8a1c16bf94b27654612d3e926648d6 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -1336,12 +1336,6 @@ Scope* OperatorWithKernel::PrepareData(
         continue;
       }
 
-      auto out_var_names = OutputVars(true);
-      if (std::find(out_var_names.begin(), out_var_names.end(), var_name) !=
-          out_var_names.end()) {
-        transfered_inplace_vars->emplace_back(var_name);
-      }
-
       VLOG(3) << "Transform Variable " << var_name << " from "
               << kernel_type_for_var << " to " << expected_kernel_key;
 
@@ -1383,13 +1377,33 @@ Scope* OperatorWithKernel::PrepareData(
       if (enable_cache_runtime_context_) {
         pre_scope_ = nullptr;
       }
+
+      // Create new var with the same name in transfer scopes
       auto* trans_var = new_scope->Var(var_name);
       input_vars[i] = trans_var;
+
+      // Find if inplace exists between input and output
+      // If inplace exists, set the new created var to inplaced output, and
+      // record its name in transfered_inplace_vars.
+      for (auto& pair : Outputs()) {
+        for (size_t j = 0; j < pair.second.size(); ++j) {
+          if (pair.second[j] == var_name) {
+            VLOG(4) << "Found inplace between input(" << var_name_item.first
+                    << ") and output(" << pair.first
+                    << "), the variable name is " << var_name;
+            ctx->outputs[pair.first][j] = trans_var;
+            transfered_inplace_vars->emplace_back(var_name);
+          }
+        }
+      }
+
+      // Do transfer
       Tensor out;
       TransformData(expected_kernel_key, kernel_type_for_var, *tensor_in, &out);
       SetTensorToVariable(*var, out, trans_var);
     }
   }
+
   // If pre_scope = &scope, it means that scope is cached and the op is not in
   // while block. If new_scope = nullptr, it means that for each input of this
   // Op, there is no need to do PrepareData. So PrepareData could be skipped at
diff --git a/python/paddle/fluid/tests/unittests/test_increment.py b/python/paddle/fluid/tests/unittests/test_increment.py
index e8cc7c8cf18192f5171e34b3d44a74f13e3782e5..38f6a546071b0471522b97ca8ffb5ace14ffbed6 100755
--- a/python/paddle/fluid/tests/unittests/test_increment.py
+++ b/python/paddle/fluid/tests/unittests/test_increment.py
@@ -40,5 +40,19 @@ class TestIncrement(unittest.TestCase):
             self.assertEqual((output.numpy() == expected_result).all(), True)
 
 
+class TestInplaceApiWithDataTransform(unittest.TestCase):
+    def test_increment(self):
+        if fluid.core.is_compiled_with_cuda():
+            paddle.enable_static()
+            with paddle.fluid.device_guard("gpu:0"):
+                x = paddle.fluid.layers.fill_constant([1], "float32", 0)
+            with paddle.fluid.device_guard("cpu"):
+                x = paddle.increment(x)
+            exe = paddle.static.Executor(paddle.CUDAPlace(0))
+            a, = exe.run(paddle.static.default_main_program(), fetch_list=[x])
+            paddle.disable_static()
+            self.assertEqual(a[0], 1)
+
+
 if __name__ == "__main__":
     unittest.main()