diff --git a/paddle/fluid/platform/device/gpu/gpu_launch_config.h b/paddle/fluid/platform/device/gpu/gpu_launch_config.h
index d07ef73a49e7991d43d056da7d41eb83792a402b..cb0173fd6d911d7f0cc95f1d2dcfd168b73e768e 100644
--- a/paddle/fluid/platform/device/gpu/gpu_launch_config.h
+++ b/paddle/fluid/platform/device/gpu/gpu_launch_config.h
@@ -63,8 +63,9 @@ static inline int RoundToPowerOfTwo(int n) {
 #ifdef WITH_NV_JETSON
 // The number of threads cannot be assigned 1024 in some cases when the device
 // is nano or tx2 .
-inline void ChangeThreadNum(const platform::CUDADeviceContext& context,
-                            int* num_thread, int alternative_num_thread = 512) {
+template <typename CUDADeviceContext>
+inline void ChangeThreadNum(const CUDADeviceContext& context, int* num_thread,
+                            int alternative_num_thread = 512) {
   if (context.GetComputeCapability() == 53 ||
       context.GetComputeCapability() == 62) {
     *num_thread = alternative_num_thread;