diff --git a/lite/pylite/test/test_network_device.py b/lite/pylite/test/test_network_device.py
index b668993f43a5294ff8e76b9dc4c6222c6b8d8f9c..648199e7c363100a823adef3df8bc69ef85b2d21 100644
--- a/lite/pylite/test/test_network_device.py
+++ b/lite/pylite/test/test_network_device.py
@@ -140,7 +140,8 @@ class TestNetwork(TestShuffleNetCuda):
         network.load(self.model_path)
 
         input_tensor = network.get_io_tensor("data")
-        assert input_tensor.device_type == LiteDeviceType.LITE_CPU
+        # the device type is cuda, but by default, the memory type is pinned memory on the host side, which is not on cuda.
+        assert input_tensor.device_type == LiteDeviceType.LITE_CUDA
 
         self.do_forward(network)
 
diff --git a/lite/src/mge/tensor_impl.cpp b/lite/src/mge/tensor_impl.cpp
index 30203932bc2bf16bda024b1a2ced9acbfdb9bc04..4dbe093b4eeff89cd605c94b35d072cfc32c034b 100644
--- a/lite/src/mge/tensor_impl.cpp
+++ b/lite/src/mge/tensor_impl.cpp
@@ -102,7 +102,7 @@ TensorImplDft::TensorImplDft(
 
 LiteDeviceType TensorImplDft::get_device_type() const {
     if (is_host()) {
-        return LiteDeviceType::LITE_CPU;
+        return get_device_from_locator(m_host_tensor->comp_node().locator());
     } else {
         return get_device_from_locator(m_dev_tensor->comp_node().locator());
     }
diff --git a/lite/test/test_tensor.cpp b/lite/test/test_tensor.cpp
index f312b37ae79aef5098f66d90dd8c592e740a17df..ab5d06ea9a7d6bfe62f554987177aae17bcf37a8 100644
--- a/lite/test/test_tensor.cpp
+++ b/lite/test/test_tensor.cpp
@@ -571,6 +571,17 @@ TEST(TestTensor, ConcatDevice) {
     check(1);
     check(2);
 }
+
+TEST(TestTensor, CudaOutputDevice) {
+    Layout layout{{1, 4}, 2};
+    bool is_pinned_host = true;
+    Tensor tensor(LiteDeviceType::LITE_CUDA, layout, is_pinned_host);
+    // If is_pinned_host is true, when calling update_from_implement(), the device type
+    // should always be updated with
+    // get_device_from_locator(m_host_tensor->comp_node().locator()).
+    tensor.update_from_implement();
+    ASSERT_EQ(tensor.get_device_type(), LiteDeviceType::LITE_CUDA);
+}
 #endif
 #endif