diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h
index c69baadb3c22e47cacd65d2eefea15da804b7fac..32e49cf3996f120d2e2a8f909883e0c46f7b1352 100644
--- a/paddle/fluid/operators/elementwise/elementwise_op_function.h
+++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h
@@ -39,7 +39,11 @@ limitations under the License. */
 
 #include "paddle/fluid/platform/cuda_device_function.h"
 #include "paddle/fluid/platform/cuda_primitives.h"
+#ifdef __HIPCC__
+constexpr int ELEMWISE_MAX_BLOCK_DIM = 256;
+#else
 constexpr int ELEMWISE_MAX_BLOCK_DIM = 1024;
+#endif
 #define BLOCK_X 32
 #define BLOCK_Y 32
 #endif
diff --git a/paddle/fluid/operators/group_norm_op.cu b/paddle/fluid/operators/group_norm_op.cu
index 45d97723a3e21044daf1609b749a22ae08efad39..18a248f55314f7814c3363fd45181501f0aa8b48 100644
--- a/paddle/fluid/operators/group_norm_op.cu
+++ b/paddle/fluid/operators/group_norm_op.cu
@@ -174,7 +174,11 @@ class GroupNormKernel<platform::CUDADeviceContext, T>
     int imsize = (data_layout == DataLayout::kNCHW ? x_dims[2] * x_dims[3]
                                                    : x_dims[1] * x_dims[2]);
 
+#ifdef __HIPCC__
+    int block_size = std::max(std::min(256, imsize), 64);
+#else
     int block_size = std::min(1024, imsize);
+#endif
     dim3 grid(group_size, groups, x_dims[0]);
     dim3 threads(block_size, 1, 1);
     GroupNormForwardGetMeanAndVar<T><<<grid, threads, 0, dev_ctx.stream()>>>(
@@ -348,7 +352,11 @@ class GroupNormGradKernel<platform::CUDADeviceContext, T>
     int imsize = (data_layout == DataLayout::kNCHW ? x_dims[2] * x_dims[3]
                                                    : x_dims[1] * x_dims[2]);
 
+#ifdef __HIPCC__
+    int block_size = std::max(std::min(256, imsize), 64);
+#else
     int block_size = std::min(1024, imsize);
+#endif
     dim3 grid(group_size, groups, x_dims[0]);
     dim3 threads(block_size, 1, 1);
     int flags =
diff --git a/paddle/fluid/operators/miopen_lstm_cache.h b/paddle/fluid/operators/miopen_lstm_cache.h
index 7c0faa86be0be0c73dcf0d0ad68201c1bfba04cc..a357e6e5af6af0895e07e993bcbba98a33239978 100644
--- a/paddle/fluid/operators/miopen_lstm_cache.h
+++ b/paddle/fluid/operators/miopen_lstm_cache.h
@@ -75,10 +75,11 @@ class ScopedRNNBase {
                              dropout_state, seed_, state_size);
 
     // ------------------- miopen rnn descriptors ---------------------
-    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSetRNNDescriptor(
-        rnn_desc_.desc(), hidden_size_, num_layers_, miopenRNNlinear,
+    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSetRNNDescriptor_V2(
+        rnn_desc_.desc(), hidden_size_, num_layers_, dropout_desc_.desc(),
+        miopenRNNlinear,
         is_bidirec_ ? miopenRNNbidirection : miopenRNNunidirection, miopenLSTM,
-        miopenRNNNoBias, miopenRNNdefault, miopen_type));
+        miopenRNNwithBias, miopenRNNdefault, miopen_type));
 
     // ------------------- miopen weights_size ---------------------
     size_t weights_size_;
diff --git a/paddle/fluid/platform/miopen_helper.h b/paddle/fluid/platform/miopen_helper.h
index 435d28d518df1adf3ed37841324100e0bfbffa88..46c7da839704196952deda6de6d09c7009ee6a78 100644
--- a/paddle/fluid/platform/miopen_helper.h
+++ b/paddle/fluid/platform/miopen_helper.h
@@ -434,9 +434,10 @@ class ScopedPoolingDescriptor {
             "The size of kernel and strides should be equal. But "
             "received size of kernel is %d, size of strides is %d.",
             kernel.size(), strides.size()));
-    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::miopenSet2dPoolingDescriptor(
-        desc_, GetPoolingMode(mode), kernel[0], kernel[1], pads[0], pads[1],
-        strides[0], strides[1]));
+    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::miopenSetNdPoolingDescriptor(
+        desc_, GetPoolingMode(mode), kernel.size(),
+        const_cast<int*>(kernel.data()), const_cast<int*>(pads.data()),
+        const_cast<int*>(strides.data())));
     return desc_;
   }
 
diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py
index ba9db2c104f18c3af969570e4e6c93e2d14c5c60..60e9d0a26b380d78e7b1566b2aa01a8be958fbcf 100644
--- a/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py
@@ -42,11 +42,12 @@ class TestBilinearTensorProductOp(OpTest):
         size0 = 5
         size1 = 4
         size2 = 5
-        a = np.random.random((batch_size, size0)).astype("float64")
-        b = np.random.random((batch_size, size1)).astype("float64")
-        w = np.random.random((size2, size0, size1)).astype("float64")
-        bias = np.random.random((1, size2)).astype("float64")
-        output = np.zeros((batch_size, size2)).astype("float64")
+        dtype = "float32" if fluid.core.is_compiled_with_rocm() else "float64"
+        a = np.random.random((batch_size, size0)).astype(dtype)
+        b = np.random.random((batch_size, size1)).astype(dtype)
+        w = np.random.random((size2, size0, size1)).astype(dtype)
+        bias = np.random.random((1, size2)).astype(dtype)
+        output = np.zeros((batch_size, size2)).astype(dtype)
         for i in range(size2):
             w_i = w[i, :, :]
             output[:, i] = np.sum(np.matmul(a, w_i) * b, axis=1)
diff --git a/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py b/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py
index 7aa3d0d16862bd5ac49ab95817ea23d3cc0ef8a7..d5f49919bc951741e2d67e22a76633268ad8582e 100644
--- a/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py
@@ -30,7 +30,7 @@ class TestConvDoubleGradCheck(unittest.TestCase):
     def func(self, place):
         shape = [2, 4, 3, 3]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv2d(x, 2, 1, groups=1, bias_attr=False)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -57,7 +57,7 @@ class TestConvDoubleGradCheck(unittest.TestCase):
     def func(self, place):
         shape = [2, 4, 3, 3]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv2d(x, 2, 1, bias_attr=False)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -82,7 +82,7 @@ class TestConvDoubleGradCheckTest1(unittest.TestCase):
     def func(self, place):
         shape = [2, 3, 3, 3]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv2d(x, 2, 1, padding=1, bias_attr=False)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -107,7 +107,7 @@ class TestConv3DDoubleGradCheck(unittest.TestCase):
     def func(self, place):
         shape = [2, 4, 3, 4, 2]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv3d(x, 2, 1, bias_attr=False)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -132,7 +132,7 @@ class TestConv3DDoubleGradCheckTest1(unittest.TestCase):
     def func(self, place):
         shape = [2, 4, 5, 3, 2]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv3d(x, 2, 1, padding=1, bias_attr=False)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -157,7 +157,7 @@ class TestConv2DoubleGradCheck_AsyPadding(unittest.TestCase):
     def func(self, place):
         shape = [2, 2, 3, 3]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv2d(
             input=x,
@@ -188,7 +188,7 @@ class TestConv2DoubleGradCheck_PaddingSAME(unittest.TestCase):
     def func(self, place):
         shape = [2, 2, 3, 3]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv2d(
             input=x,
@@ -219,7 +219,7 @@ class TestConv2DoubleGradCheck_PaddingVALID(unittest.TestCase):
     def func(self, place):
         shape = [2, 2, 3, 3]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv2d(
             input=x,
@@ -250,7 +250,7 @@ class TestConv2DoubleGradCheck_ChannelLast(unittest.TestCase):
     def func(self, place):
         shape = [2, 2, 3, 3]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv2d(
             input=x,
@@ -283,7 +283,7 @@ class TestConv2DoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase):
     def func(self, place):
         shape = [2, 2, 3, 3]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv2d(
             input=x,
@@ -316,7 +316,7 @@ class TestConv3DDoubleGradCheck_AsyPadding(unittest.TestCase):
     def func(self, place):
         shape = [2, 2, 2, 2, 2]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv3d(
             input=x,
@@ -347,7 +347,7 @@ class TestConv3DoubleGradCheck_PaddingSAME(unittest.TestCase):
     def func(self, place):
         shape = [2, 2, 2, 2, 2]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv3d(
             input=x,
@@ -379,7 +379,7 @@ class TestConv3DoubleGradCheck_PaddingVALID(unittest.TestCase):
     def func(self, place):
         shape = [2, 2, 3, 3, 2]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv3d(
             input=x,
@@ -410,7 +410,7 @@ class TestConv3DDoubleGradCheck_ChannelLast(unittest.TestCase):
     def func(self, place):
         shape = [2, 2, 2, 2, 3]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv3d(
             input=x,
@@ -443,7 +443,7 @@ class TestConv3DDoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase):
     def func(self, place):
         shape = [2, 2, 2, 2, 3]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
         y = layers.conv3d(
             input=x,
@@ -476,7 +476,7 @@ class TestDepthWiseConvDoubleGradCheck(unittest.TestCase):
     def func(self, place):
         shape = [2, 4, 3, 3]
         eps = 0.005
-        dtype = np.float64
+        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
         x = layers.data('x', shape, False, dtype)
 
         # condition of depthwise conv: 
diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
index 1a5e4b28355674010ba8f92b176d5cabca3e1a8d..ea44e23da249caaae2f209d0728d06b1494fbb4d 100644
--- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
@@ -191,12 +191,16 @@ def cross_entropy_soft_2d(softmax,
 
 
 class CrossEntropyLoss(unittest.TestCase):
+    def setUp(self):
+        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+        ) else 'float64'
 
     ###test for deprecated softmax_with_cross_entropy
     def test_softmax_with_cross_entropy(self):
         self.numeric_stable_mode = False
         self.soft_label = True
-        self.dtype = np.float64
+        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+        ) else 'float64'
         self.axis = -1
         self.ignore_index = -100  #should not be changed
         self.N = 4
@@ -248,7 +252,8 @@ class CrossEntropyLoss(unittest.TestCase):
     def test_cross_entropy_loss_soft_1d(self):
         self.numeric_stable_mode = False
         self.soft_label = True
-        self.dtype = np.float64
+        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+        ) else 'float64'
         self.axis = -1
         self.ignore_index = -100  #should not be changed
         self.N = 4
@@ -296,9 +301,9 @@ class CrossEntropyLoss(unittest.TestCase):
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
             input = fluid.data(
-                name='input', shape=[self.N, self.C], dtype='float64')
+                name='input', shape=[self.N, self.C], dtype=self.dtype)
             label = fluid.data(
-                name='label', shape=[self.N, self.C], dtype='float64')
+                name='label', shape=[self.N, self.C], dtype=self.dtype)
 
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 reduction=self.reduction, soft_label=True)
@@ -321,7 +326,8 @@ class CrossEntropyLoss(unittest.TestCase):
     def test_cross_entropy_loss_soft_1d_weight(self):
         self.numeric_stable_mode = False
         self.soft_label = True
-        self.dtype = np.float64
+        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+        ) else 'float64'
         self.axis = -1
         self.ignore_index = -100  #should not be changed
         self.N = 4
@@ -376,10 +382,10 @@ class CrossEntropyLoss(unittest.TestCase):
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
             input = fluid.data(
-                name='input', shape=[self.N, self.C], dtype='float64')
+                name='input', shape=[self.N, self.C], dtype=self.dtype)
             label = fluid.data(
-                name='label', shape=[self.N, self.C], dtype='float64')
-            weight = fluid.data(name='weight', shape=[self.C], dtype='float64')
+                name='label', shape=[self.N, self.C], dtype=self.dtype)
+            weight = fluid.data(name='weight', shape=[self.C], dtype=self.dtype)
 
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 weight=weight, reduction=self.reduction, soft_label=True)
@@ -403,7 +409,8 @@ class CrossEntropyLoss(unittest.TestCase):
     def test_cross_entropy_loss_soft_1d_mean(self):
         self.numeric_stable_mode = False
         self.soft_label = True
-        self.dtype = np.float64
+        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+        ) else 'float64'
         self.axis = -1
         self.ignore_index = -100  #should not be changed
         self.N = 4
@@ -451,9 +458,9 @@ class CrossEntropyLoss(unittest.TestCase):
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
             input = fluid.data(
-                name='input', shape=[self.N, self.C], dtype='float64')
+                name='input', shape=[self.N, self.C], dtype=self.dtype)
             label = fluid.data(
-                name='label', shape=[self.N, self.C], dtype='float64')
+                name='label', shape=[self.N, self.C], dtype=self.dtype)
 
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 reduction=self.reduction, soft_label=True)
@@ -475,7 +482,8 @@ class CrossEntropyLoss(unittest.TestCase):
     def test_cross_entropy_loss_soft_1d_weight_mean(self):
         self.numeric_stable_mode = False
         self.soft_label = True
-        self.dtype = np.float64
+        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+        ) else 'float64'
         self.axis = -1
         self.ignore_index = -100  #should not be changed
         self.N = 4
@@ -523,10 +531,10 @@ class CrossEntropyLoss(unittest.TestCase):
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
             input = fluid.data(
-                name='input', shape=[self.N, self.C], dtype='float64')
+                name='input', shape=[self.N, self.C], dtype=self.dtype)
             label = fluid.data(
-                name='label', shape=[self.N, self.C], dtype='float64')
-            weight = fluid.data(name='weight', shape=[self.C], dtype='float64')
+                name='label', shape=[self.N, self.C], dtype=self.dtype)
+            weight = fluid.data(name='weight', shape=[self.C], dtype=self.dtype)
 
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 weight=weight, reduction=self.reduction, soft_label=True)
@@ -549,7 +557,8 @@ class CrossEntropyLoss(unittest.TestCase):
     def test_cross_entropy_loss_soft_2d(self):
         self.numeric_stable_mode = False
         self.soft_label = True
-        self.dtype = np.float64
+        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+        ) else 'float64'
         self.axis = -1
         self.ignore_index = -100  #should not be changed
         self.N = 3
@@ -604,11 +613,11 @@ class CrossEntropyLoss(unittest.TestCase):
             input = fluid.data(
                 name='input',
                 shape=[self.N, self.H, self.W, self.C],
-                dtype='float64')
+                dtype=self.dtype)
             label = fluid.data(
                 name='label',
                 shape=[self.N, self.H, self.W, self.C],
-                dtype='float64')
+                dtype=self.dtype)
 
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 reduction=self.reduction, soft_label=True)
@@ -631,7 +640,8 @@ class CrossEntropyLoss(unittest.TestCase):
     def test_cross_entropy_loss_soft_2d_weight_mean(self):
         self.numeric_stable_mode = False
         self.soft_label = True
-        self.dtype = np.float64
+        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+        ) else 'float64'
         self.axis = -1
         self.ignore_index = -100  #should not be changed
         self.N = 3
@@ -685,12 +695,12 @@ class CrossEntropyLoss(unittest.TestCase):
             input = fluid.data(
                 name='input',
                 shape=[self.N, self.H, self.W, self.C],
-                dtype='float64')
+                dtype=self.dtype)
             label = fluid.data(
                 name='label',
                 shape=[self.N, self.H, self.W, self.C],
-                dtype='float64')
-            weight = fluid.data(name='weight', shape=[self.C], dtype='float64')
+                dtype=self.dtype)
+            weight = fluid.data(name='weight', shape=[self.C], dtype=self.dtype)
 
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 weight=weight, reduction=self.reduction, soft_label=True)
@@ -713,7 +723,7 @@ class CrossEntropyLoss(unittest.TestCase):
     ###soft_label test end
 
     def test_cross_entropy_loss_1d_with_mean_ignore(self):
-        input_np = np.random.random([2, 4]).astype(np.float64)
+        input_np = np.random.random([2, 4]).astype(self.dtype)
         label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
         paddle.enable_static()
         prog = fluid.Program()
@@ -721,7 +731,7 @@ class CrossEntropyLoss(unittest.TestCase):
         place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
-            input = fluid.data(name='input', shape=[2, 4], dtype='float64')
+            input = fluid.data(name='input', shape=[2, 4], dtype=self.dtype)
             label = fluid.data(name='label', shape=[2], dtype='int64')
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(ignore_index=0)
             ret = cross_entropy_loss(input, label)
@@ -752,20 +762,20 @@ class CrossEntropyLoss(unittest.TestCase):
     def test_cross_entropy_loss_1d_with_weight_mean_ignore(self):
         N = 100
         C = 200
-        input_np = np.random.random([N, C]).astype(np.float64)
+        input_np = np.random.random([N, C]).astype(self.dtype)
         label_np = np.random.randint(0, C, size=(N)).astype(np.int64)
-        weight_np = np.random.random([C]).astype(np.float64)
+        weight_np = np.random.random([C]).astype(self.dtype)
         paddle.enable_static()
         prog = fluid.Program()
         startup_prog = fluid.Program()
         place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
-            input = fluid.data(name='input', shape=[N, C], dtype='float64')
+            input = fluid.data(name='input', shape=[N, C], dtype=self.dtype)
             label = fluid.data(name='label', shape=[N], dtype='int64')
             weight = fluid.data(
                 name='weight', shape=[C],
-                dtype='float64')  #weight for each class
+                dtype=self.dtype)  #weight for each class
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 weight=weight, ignore_index=0)
             ret = cross_entropy_loss(input, label)
@@ -798,20 +808,20 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_1d_with_weight_mean(self):
-        input_np = np.random.random([2, 4]).astype(np.float64)
+        input_np = np.random.random([2, 4]).astype(self.dtype)
         label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
-        weight_np = np.random.random([4]).astype(np.float64)  #shape:C
+        weight_np = np.random.random([4]).astype(self.dtype)  #shape:C
         paddle.enable_static()
         prog = fluid.Program()
         startup_prog = fluid.Program()
         place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
-            input = fluid.data(name='input', shape=[2, 4], dtype='float64')
+            input = fluid.data(name='input', shape=[2, 4], dtype=self.dtype)
             label = fluid.data(name='label', shape=[2], dtype='int64')
             weight = fluid.data(
                 name='weight', shape=[4],
-                dtype='float64')  #weight for each class
+                dtype=self.dtype)  #weight for each class
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(weight=weight)
             ret = cross_entropy_loss(input, label)
 
@@ -842,18 +852,18 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_1d_with_weight_sum(self):
-        input_np = np.random.random([100, 200]).astype(np.float64)  #N,C
+        input_np = np.random.random([100, 200]).astype(self.dtype)  #N,C
         label_np = np.random.randint(0, 100, size=(100)).astype(np.int64)  #N,1
-        weight_np = np.random.random([200]).astype(np.float64)  #C
+        weight_np = np.random.random([200]).astype(self.dtype)  #C
         paddle.enable_static()
         prog = fluid.Program()
         startup_prog = fluid.Program()
         place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
-            input = fluid.data(name='input', shape=[100, 200], dtype='float64')
+            input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype)
             label = fluid.data(name='label', shape=[100], dtype='int64')
-            weight = fluid.data(name='weight', shape=[200], dtype='float64')
+            weight = fluid.data(name='weight', shape=[200], dtype=self.dtype)
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 weight=weight, reduction='sum')
             ret = cross_entropy_loss(input, label)
@@ -882,9 +892,9 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_1d_with_weight_none(self):
-        input_np = np.random.random([100, 200]).astype(np.float64)  #N,C
+        input_np = np.random.random([100, 200]).astype(self.dtype)  #N,C
         label_np = np.random.randint(0, 100, size=(100)).astype(np.int64)  #N,1
-        weight_np = np.random.random([200]).astype(np.float64)  #C
+        weight_np = np.random.random([200]).astype(self.dtype)  #C
 
         paddle.enable_static()
         prog = fluid.Program()
@@ -892,9 +902,9 @@ class CrossEntropyLoss(unittest.TestCase):
         place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
-            input = fluid.data(name='input', shape=[100, 200], dtype='float64')
+            input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype)
             label = fluid.data(name='label', shape=[100], dtype='int64')
-            weight = fluid.data(name='weight', shape=[200], dtype='float64')
+            weight = fluid.data(name='weight', shape=[200], dtype=self.dtype)
 
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 weight=weight, reduction='none')
@@ -926,18 +936,18 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_1d_with_weight_none_func(self):
-        input_np = np.random.random([100, 200]).astype(np.float64)  #N,C
+        input_np = np.random.random([100, 200]).astype(self.dtype)  #N,C
         label_np = np.random.randint(0, 100, size=(100)).astype(np.int64)  #N
-        weight_np = np.random.random([200]).astype(np.float64)  #C
+        weight_np = np.random.random([200]).astype(self.dtype)  #C
         paddle.enable_static()
         prog = fluid.Program()
         startup_prog = fluid.Program()
         place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
-            input = fluid.data(name='input', shape=[100, 200], dtype='float64')
+            input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype)
             label = fluid.data(name='label', shape=[100], dtype='int64')
-            weight = fluid.data(name='weight', shape=[200], dtype='float64')
+            weight = fluid.data(name='weight', shape=[200], dtype=self.dtype)
             ret = paddle.nn.functional.cross_entropy(
                 input, label, weight=weight, reduction='none')
 
@@ -967,18 +977,18 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_1d_mean(self):
-        input_np = np.random.random([100, 200]).astype(np.float64)  #N,C
+        input_np = np.random.random([100, 200]).astype(self.dtype)  #N,C
         label_np = np.random.randint(0, 100, size=(100)).astype(np.int64)  #N,1
-        weight_np = np.random.random([200]).astype(np.float64)  #C
+        weight_np = np.random.random([200]).astype(self.dtype)  #C
         paddle.enable_static()
         prog = fluid.Program()
         startup_prog = fluid.Program()
         place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
-            input = fluid.data(name='input', shape=[100, 200], dtype='float64')
+            input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype)
             label = fluid.data(name='label', shape=[100], dtype='int64')
-            weight = fluid.data(name='weight', shape=[100], dtype='float64')
+            weight = fluid.data(name='weight', shape=[100], dtype=self.dtype)
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss()
             ret = cross_entropy_loss(input, label)
             exe = fluid.Executor(place)
@@ -1000,7 +1010,7 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_1d_sum(self):
-        input_np = np.random.random([100, 200]).astype(np.float64)  #N,C
+        input_np = np.random.random([100, 200]).astype(self.dtype)  #N,C
         label_np = np.random.randint(0, 100, size=(100)).astype(np.int64)  #N,1
         paddle.enable_static()
         prog = fluid.Program()
@@ -1008,7 +1018,7 @@ class CrossEntropyLoss(unittest.TestCase):
         place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
-            input = fluid.data(name='input', shape=[100, 200], dtype='float64')
+            input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype)
             label = fluid.data(name='label', shape=[100], dtype='int64')
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 reduction='sum')
@@ -1033,7 +1043,7 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_1d_none(self):
-        input_np = np.random.random([100, 200]).astype(np.float64)  #N,C
+        input_np = np.random.random([100, 200]).astype(self.dtype)  #N,C
         label_np = np.random.randint(0, 100, size=(100)).astype(np.int64)  #N,1
         paddle.enable_static()
         prog = fluid.Program()
@@ -1041,7 +1051,7 @@ class CrossEntropyLoss(unittest.TestCase):
         place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
-            input = fluid.data(name='input', shape=[100, 200], dtype='float64')
+            input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype)
             label = fluid.data(name='label', shape=[100], dtype='int64')
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 reduction='none')
@@ -1068,10 +1078,10 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_2d_with_weight_none(self):
-        input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64)  #NHWC
+        input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype)  #NHWC
         label_np = np.random.randint(
             0, 3, size=(2, 2, 2)).astype(np.int64)  #NHW1
-        weight_np = np.random.random(size=(3, )).astype(np.float64)  #C
+        weight_np = np.random.random(size=(3, )).astype(self.dtype)  #C
 
         paddle.enable_static()
         prog = fluid.Program()
@@ -1080,9 +1090,9 @@ class CrossEntropyLoss(unittest.TestCase):
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
             input = fluid.data(
-                name='input', shape=[2, 2, 2, 3], dtype='float64')
+                name='input', shape=[2, 2, 2, 3], dtype=self.dtype)
             label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64')
-            weight = fluid.data(name='weight', shape=[3], dtype='float64')
+            weight = fluid.data(name='weight', shape=[3], dtype=self.dtype)
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 weight=weight, reduction='none')
             ret = cross_entropy_loss(input, label)
@@ -1113,10 +1123,10 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_2d_with_weight_mean(self):
-        input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64)  #NHWC
+        input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype)  #NHWC
         label_np = np.random.randint(
             0, 3, size=(2, 2, 2)).astype(np.int64)  #NHW
-        weight_np = np.random.random(size=(3, )).astype(np.float64)  #C
+        weight_np = np.random.random(size=(3, )).astype(self.dtype)  #C
         paddle.enable_static()
         prog = fluid.Program()
         startup_prog = fluid.Program()
@@ -1124,9 +1134,9 @@ class CrossEntropyLoss(unittest.TestCase):
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
             input = fluid.data(
-                name='input', shape=[2, 2, 2, 3], dtype='float64')
+                name='input', shape=[2, 2, 2, 3], dtype=self.dtype)
             label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64')
-            weight = fluid.data(name='weight', shape=[3], dtype='float64')
+            weight = fluid.data(name='weight', shape=[3], dtype=self.dtype)
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 weight=weight, reduction='mean')
             ret = cross_entropy_loss(input, label)
@@ -1155,10 +1165,10 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_2d_with_weight_sum(self):
-        input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64)  #NHWC
+        input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype)  #NHWC
         label_np = np.random.randint(
             0, 3, size=(2, 2, 2)).astype(np.int64)  #NHW
-        weight_np = np.random.random(size=(3, )).astype(np.float64)  #C
+        weight_np = np.random.random(size=(3, )).astype(self.dtype)  #C
         paddle.enable_static()
 
         prog = fluid.Program()
@@ -1167,9 +1177,9 @@ class CrossEntropyLoss(unittest.TestCase):
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
             input = fluid.data(
-                name='input', shape=[2, 2, 2, 3], dtype='float64')
+                name='input', shape=[2, 2, 2, 3], dtype=self.dtype)
             label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64')
-            weight = fluid.data(name='weight', shape=[3], dtype='float64')
+            weight = fluid.data(name='weight', shape=[3], dtype=self.dtype)
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 weight=weight, reduction='sum')
             ret = cross_entropy_loss(input, label)
@@ -1198,7 +1208,7 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_2d_none(self):
-        input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64)  #NHWC
+        input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype)  #NHWC
         label_np = np.random.randint(
             0, 3, size=(2, 2, 2)).astype(np.int64)  #NHW
         paddle.enable_static()
@@ -1208,7 +1218,7 @@ class CrossEntropyLoss(unittest.TestCase):
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
             input = fluid.data(
-                name='input', shape=[2, 2, 2, 3], dtype='float64')
+                name='input', shape=[2, 2, 2, 3], dtype=self.dtype)
             label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64')
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 reduction='none')
@@ -1237,7 +1247,7 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_2d_mean(self):
-        input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64)  #NHWC
+        input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype)  #NHWC
         label_np = np.random.randint(
             0, 3, size=(2, 2, 2)).astype(np.int64)  #NHW
         paddle.enable_static()
@@ -1247,7 +1257,7 @@ class CrossEntropyLoss(unittest.TestCase):
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
             input = fluid.data(
-                name='input', shape=[2, 2, 2, 3], dtype='float64')
+                name='input', shape=[2, 2, 2, 3], dtype=self.dtype)
             label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64')
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 reduction='mean')
@@ -1276,7 +1286,7 @@ class CrossEntropyLoss(unittest.TestCase):
         self.assertTrue(np.allclose(dy_ret_value, expected))
 
     def test_cross_entropy_loss_2d_sum(self):
-        input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64)  #NHWC
+        input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype)  #NHWC
         label_np = np.random.randint(
             0, 3, size=(2, 2, 2)).astype(np.int64)  #NHW
         paddle.enable_static()
@@ -1286,7 +1296,7 @@ class CrossEntropyLoss(unittest.TestCase):
         ) else fluid.CPUPlace()
         with fluid.program_guard(prog, startup_prog):
             input = fluid.data(
-                name='input', shape=[2, 2, 2, 3], dtype='float64')
+                name='input', shape=[2, 2, 2, 3], dtype=self.dtype)
             label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64')
             cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
                 reduction='sum')
diff --git a/python/paddle/fluid/tests/unittests/test_expand_op.py b/python/paddle/fluid/tests/unittests/test_expand_op.py
index a325ffe1d0ef4636bad6329099c359bdf79d5dfc..edda6da655ddd9fe7af33fdf574c65f457327ec8 100644
--- a/python/paddle/fluid/tests/unittests/test_expand_op.py
+++ b/python/paddle/fluid/tests/unittests/test_expand_op.py
@@ -27,8 +27,10 @@ class TestExpandOpRank1(OpTest):
     def setUp(self):
         self.op_type = "expand"
         self.init_data()
+        self.dtype = "float32" if fluid.core.is_compiled_with_rocm(
+        ) else "float64"
 
-        self.inputs = {'X': np.random.random(self.ori_shape).astype("float64")}
+        self.inputs = {'X': np.random.random(self.ori_shape).astype(self.dtype)}
         self.attrs = {'expand_times': self.expand_times}
         output = np.tile(self.inputs['X'], self.expand_times)
         self.outputs = {'Out': output}
@@ -79,13 +81,16 @@ class TestExpandOpRank1_tensor_attr(OpTest):
     def setUp(self):
         self.op_type = "expand"
         self.init_data()
+        self.dtype = "float32" if fluid.core.is_compiled_with_rocm(
+        ) else "float64"
+
         expand_times_tensor = []
         for index, ele in enumerate(self.expand_times):
             expand_times_tensor.append(("x" + str(index), np.ones(
                 (1)).astype('int32') * ele))
 
         self.inputs = {
-            'X': np.random.random(self.ori_shape).astype("float64"),
+            'X': np.random.random(self.ori_shape).astype(self.dtype),
             'expand_times_tensor': expand_times_tensor,
         }
         self.attrs = {"expand_times": self.infer_expand_times}
@@ -123,9 +128,11 @@ class TestExpandOpRank1_tensor(OpTest):
     def setUp(self):
         self.op_type = "expand"
         self.init_data()
+        self.dtype = "float32" if fluid.core.is_compiled_with_rocm(
+        ) else "float64"
 
         self.inputs = {
-            'X': np.random.random(self.ori_shape).astype("float64"),
+            'X': np.random.random(self.ori_shape).astype(self.dtype),
             'ExpandTimes': np.array(self.expand_times).astype("int32"),
         }
         self.attrs = {}
diff --git a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
index d854372bbc6e7d713c823fac540bb2bd425c9f91..2b51bec9cb0e7d7087b1476220148014f9c39cae 100644
--- a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
+++ b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
@@ -26,7 +26,10 @@ from paddle.fluid import core
 class TestGPUPackagePaddle(unittest.TestCase):
     def test_import_paddle(self):
         if core.is_compiled_with_cuda():
-            os.environ['CUDA_VISIBLE_DEVICES'] = ''
+            if core.is_compiled_with_rocm():
+                os.environ['HIP_VISIBLE_DEVICES'] = ''
+            else:
+                os.environ['CUDA_VISIBLE_DEVICES'] = ''
             test_file = 'test_no_gpu_run_rand.py'
             with open(test_file, 'w') as wb:
                 cmd_test = """
diff --git a/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py b/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py
index eb1fed81cbee58dac1eba154455e1168861eddce..9f18ec9843d7a40c8e73663e56405ef4b864f25f 100644
--- a/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py
@@ -44,8 +44,9 @@ class TestGRUOp(OpTest):
 
     def setUp(self):
         self.op_type = "rnn"
-        self.dtype = "float64"
-        self.sequence_length = np.array(
+        self.dtype = "float32" if core.is_compiled_with_rocm() else "float64"
+        self.sequence_length = None if core.is_compiled_with_rocm(
+        ) else np.array(
             [12, 11, 10, 9, 8, 7, 6, 5], dtype=np.int32)
         self.num_layers = 1
         self.is_bidirec = False
@@ -83,6 +84,24 @@ class TestGRUOp(OpTest):
 
         output, last_hidden = rnn1(input, sequence_length=self.sequence_length)
 
+        if core.is_compiled_with_rocm():
+
+            def rocm_rnn_get_place():
+                places = [core.CUDAPlace(0)]
+                return places
+
+            self._get_places = rocm_rnn_get_place
+
+            if self.is_bidirec:
+                for i in range(0, len(flat_w), 4):
+                    flat_w[i + 1], flat_w[i + 2] = flat_w[i + 2], flat_w[i + 1]
+
+            for i in range(len(flat_w)):
+                w = np.split(flat_w[i][1], 3, 0)
+                w = [w[1], w[0], w[2]]
+                w = np.concatenate(w)
+                flat_w[i] = (flat_w[i][0], w)
+
         init_h = np.zeros((self.num_layers * self.direction_num, batch_size,
                            self.hidden_size)).astype(self.dtype)
 
diff --git a/python/paddle/fluid/tests/unittests/test_gru_unit_op.py b/python/paddle/fluid/tests/unittests/test_gru_unit_op.py
index a570e266072adc167f973cd4b22eda3ecb5e5abb..74afa7db2899b4405e11fc9eb8fbea2f383428cb 100644
--- a/python/paddle/fluid/tests/unittests/test_gru_unit_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gru_unit_op.py
@@ -121,12 +121,12 @@ class TestGRUUnitOp(OpTest):
         self.op_type = 'gru_unit'
         self.inputs = {
             'Input': np.random.uniform(
-                -0.1, 0.1, (batch_size, frame_size * 3)).astype('float64'),
+                -0.1, 0.1, (batch_size, frame_size * 3)).astype(self.dtype),
             'HiddenPrev': np.random.uniform(
-                -0.1, 0.1, (batch_size, frame_size)).astype('float64'),
+                -0.1, 0.1, (batch_size, frame_size)).astype(self.dtype),
             'Weight': np.random.uniform(
                 -1. / math.sqrt(frame_size), 1. / math.sqrt(frame_size),
-                (frame_size, frame_size * 3)).astype('float64'),
+                (frame_size, frame_size * 3)).astype(self.dtype),
         }
         self.attrs = {
             'activation': GRUActivationType.tanh,
@@ -161,12 +161,14 @@ class TestGRUUnitOp(OpTest):
         else:
             h = u * c + (1 - u) * h_p
         self.outputs = {
-            'Gate': g.astype('float64'),
-            'ResetHiddenPrev': r_h_p.astype('float64'),
-            'Hidden': h.astype('float64')
+            'Gate': g.astype(self.dtype),
+            'ResetHiddenPrev': r_h_p.astype(self.dtype),
+            'Hidden': h.astype(self.dtype)
         }
 
     def setUp(self):
+        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+        ) else 'float64'
         self.set_inputs()
         self.set_outputs()
 
@@ -179,6 +181,8 @@ class TestGRUUnitOp(OpTest):
 
 class TestGRUUnitOpOriginMode(TestGRUUnitOp):
     def setUp(self):
+        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+        ) else 'float64'
         self.set_inputs(origin_mode=True)
         self.set_outputs(origin_mode=True)
 
@@ -189,7 +193,7 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp):
         frame_size = self.frame_size
         super(TestGRUUnitOpWithBias, self).set_inputs()
         self.inputs['Bias'] = np.random.uniform(
-            -0.1, 0.1, (1, frame_size * 3)).astype('float64')
+            -0.1, 0.1, (1, frame_size * 3)).astype(self.dtype)
         self.attrs = {
             'activation': GRUActivationType.identity,
             'gate_activation': GRUActivationType.sigmoid,
@@ -207,6 +211,8 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp):
 
 class TestGRUUnitOpWithBiasOriginMode(TestGRUUnitOpWithBias):
     def setUp(self):
+        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+        ) else 'float64'
         self.set_inputs(origin_mode=True)
         self.set_outputs(origin_mode=True)
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
index cd019c920756f34e6081da2d98e990d1bb5abee5..36c4d67bf2d813a5910eca7b0d766eee9c2a0824 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -190,10 +190,18 @@ class TestImperativeOptimizerBase(unittest.TestCase):
         for key, value in six.iteritems(static_param_init_value):
             self.assertTrue(np.allclose(value, dy_param_init_value[key]))
 
-        self.assertTrue(np.allclose(static_out, dy_out))
+        if core.is_compiled_with_rocm():
+            self.assertTrue(np.allclose(static_out, dy_out, atol=1e-3))
+        else:
+            self.assertTrue(np.allclose(static_out, dy_out))
 
         for key, value in six.iteritems(static_param_value):
-            self.assertTrue(np.allclose(value, dy_param_value[key]))
+            if core.is_compiled_with_rocm():
+                self.assertTrue(
+                    np.allclose(
+                        value, dy_param_value[key], atol=1e-3))
+            else:
+                self.assertTrue(np.allclose(value, dy_param_value[key]))
 
 
 class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
index e3d82888f6160d42db6b83562618bcc857b44dc0..eac627d1b5b07656f107dd6e9fa667bb9cbb0249 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
@@ -207,10 +207,18 @@ class TestImperativeOptimizerBase(unittest.TestCase):
         for key, value in six.iteritems(static_param_init_value):
             self.assertTrue(np.allclose(value, dy_param_init_value[key]))
 
-        self.assertTrue(np.allclose(static_out, dy_out))
+        if core.is_compiled_with_rocm():
+            self.assertTrue(np.allclose(static_out, dy_out, atol=1e-3))
+        else:
+            self.assertTrue(np.allclose(static_out, dy_out))
 
         for key, value in six.iteritems(static_param_value):
-            self.assertTrue(np.allclose(value, dy_param_value[key]))
+            if core.is_compiled_with_rocm():
+                self.assertTrue(
+                    np.allclose(
+                        value, dy_param_value[key], atol=1e-3))
+            else:
+                self.assertTrue(np.allclose(value, dy_param_value[key]))
 
 
 class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
diff --git a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
index 82443f8c5493b4d10d039f7cd602e7d49428f24e..372b8d0d4d2766cefb5b1e7aad35ff798d41df51 100644
--- a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
@@ -390,8 +390,10 @@ class TestCUDNNLstmOp(OpTest):
 
     def setUp(self):
         self.op_type = "cudnn_lstm"
-        self.dtype = np.float64
-        self.sequence_length = np.array([12, 11, 10, 9, 8], dtype=np.int32)
+        self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64
+        self.sequence_length = None if core.is_compiled_with_rocm(
+        ) else np.array(
+            [12, 11, 10, 9, 8], dtype=np.int32)
         self.num_layers = 1
         self.set_attrs()
 
@@ -447,6 +449,13 @@ class TestCUDNNLstmOp(OpTest):
                            hidden_size)).astype(self.dtype)
         state_out = np.ndarray((300)).astype("uint8")
 
+        if core.is_compiled_with_rocm():
+            for i in range(len(flat_w)):
+                w = np.split(flat_w[i][1], 4, 0)
+                w = [w[0], w[1], w[3], w[2]]
+                w = np.concatenate(w)
+                flat_w[i] = (flat_w[i][0], w)
+
         self.inputs = {
             'Input': input,
             'WeightList': flat_w,
@@ -454,6 +463,13 @@ class TestCUDNNLstmOp(OpTest):
             'InitC': init_c,
             'SequenceLength': self.sequence_length
         }
+        if self.sequence_length is None:
+            self.inputs = {
+                'Input': input,
+                'WeightList': flat_w,
+                'InitH': init_h,
+                'InitC': init_c,
+            }
         self.attrs = {
             'dropout_prob': 0.0,
             'is_bidirec': False,
@@ -474,8 +490,12 @@ class TestCUDNNLstmOp(OpTest):
 
     def test_output_with_place(self):
         place = core.CUDAPlace(0)
-        self.check_output_with_place(
-            place, no_check_set=['Reserve', 'StateOut'])
+        if core.is_compiled_with_rocm():
+            self.check_output_with_place(
+                place, atol=1e-5, no_check_set=['Reserve', 'StateOut'])
+        else:
+            self.check_output_with_place(
+                place, no_check_set=['Reserve', 'StateOut'])
 
     def test_grad_with_place(self):
         place = core.CUDAPlace(0)
@@ -496,14 +516,13 @@ class TestCUDNNlstmAPI(unittest.TestCase):
         hidden_size = 20
         dropout_prob = 0.0
         num_layers = 1
+        dtype = 'float32' if core.is_compiled_with_rocm() else 'float64'
         input = fluid.data(
-            name='input',
-            shape=[seq_len, batch_size, hidden_size],
-            dtype='float64')
+            name='input', shape=[seq_len, batch_size, hidden_size], dtype=dtype)
         init_h = layers.fill_constant([num_layers, batch_size, hidden_size],
-                                      'float64', 0.0)
+                                      dtype, 0.0)
         init_c = layers.fill_constant([num_layers, batch_size, hidden_size],
-                                      'float64', 0.0)
+                                      dtype, 0.0)
         rnn_out, last_h, last_c = layers.lstm(input, init_h, init_c, seq_len,
                                               hidden_size, num_layers,
                                               dropout_prob, False)
@@ -526,14 +545,13 @@ class TestCUDNNlstmAPI(unittest.TestCase):
         hidden_size = 20
         dropout_prob = 0.0
         num_layers = 2
+        dtype = 'float32' if core.is_compiled_with_rocm() else 'float64'
         input = fluid.data(
-            name='input',
-            shape=[seq_len, batch_size, hidden_size],
-            dtype='float64')
+            name='input', shape=[seq_len, batch_size, hidden_size], dtype=dtype)
         init_h = layers.fill_constant([num_layers, batch_size, hidden_size],
-                                      'float64', 0.0)
+                                      dtype, 0.0)
         init_c = layers.fill_constant([num_layers, batch_size, hidden_size],
-                                      'float64', 0.0)
+                                      dtype, 0.0)
         rnn_out, last_h, last_c = layers.lstm(input, init_h, init_c, seq_len,
                                               hidden_size, num_layers,
                                               dropout_prob, False, True)
@@ -541,7 +559,7 @@ class TestCUDNNlstmAPI(unittest.TestCase):
         exe.run(fluid.default_startup_program())
         input_i = np.random.uniform(
             low=-0.1, high=0.1, size=(seq_len, batch_size,
-                                      hidden_size)).astype("float64")
+                                      hidden_size)).astype(dtype)
         out = exe.run(fluid.default_main_program(),
                       feed={'input': input_i},
                       fetch_list=[rnn_out, last_h, last_c, 'cudnn_lstm_0.w_0'])
diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_op.py b/python/paddle/fluid/tests/unittests/test_pool3d_op.py
index d618875835ffd8cbeba1bccbdfe7829a66c1ba76..40b9be9ee4f9bd69c763422fce850b0a3d6931d3 100644
--- a/python/paddle/fluid/tests/unittests/test_pool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pool3d_op.py
@@ -224,7 +224,7 @@ class TestPool3D_Op(OpTest):
     def setUp(self):
         self.op_type = "pool3d"
         self.init_kernel_type()
-        self.dtype = np.float64
+        self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64
         self.init_test_case()
         self.padding_algorithm = "EXPLICIT"
         self.init_paddings()
@@ -277,9 +277,16 @@ class TestPool3D_Op(OpTest):
             return
         if self.has_cudnn() and self.pool_type != "max":
             place = core.CUDAPlace(0)
-            self.check_grad_with_place(place, set(['X']), 'Out')
+            if core.is_compiled_with_rocm():
+                self.check_grad_with_place(
+                    place, set(['X']), 'Out', max_relative_error=1e-2)
+            else:
+                self.check_grad_with_place(place, set(['X']), 'Out')
         elif self.pool_type != "max":
-            self.check_grad(set(['X']), 'Out')
+            if core.is_compiled_with_rocm():
+                self.check_grad(set(['X']), 'Out', max_relative_error=1e-2)
+            else:
+                self.check_grad(set(['X']), 'Out')
 
     def init_data_format(self):
         self.data_format = "NCDHW"
@@ -400,7 +407,10 @@ def create_test_cudnn_fp16_class(parent):
             if core.is_compiled_with_cuda():
                 place = core.CUDAPlace(0)
                 if core.is_float16_supported(place):
-                    self.check_output_with_place(place, atol=1e-3)
+                    if core.is_compiled_with_rocm():
+                        self.check_output_with_place(place, atol=1e-2)
+                    else:
+                        self.check_output_with_place(place, atol=1e-3)
 
     cls_name = "{0}_{1}".format(parent.__name__, "CUDNNFp16Op")
     TestCUDNNFp16Case.__name__ = cls_name
diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py
index a899f18f521e8e7265cba24086da9cb5cc8cc22f..0cefb89340a7c0f847f2c81e48aa473ecfccaa16 100644
--- a/python/paddle/nn/layer/rnn.py
+++ b/python/paddle/nn/layer/rnn.py
@@ -1053,7 +1053,8 @@ class RNNBase(LayerList):
                 initial_states,
                 paddle.fluid.framework.Variable) else initial_states
 
-        if self.could_use_cudnn:
+        if self.could_use_cudnn and (not fluid.core.is_compiled_with_rocm() or
+                                     sequence_length is None):
             # Add CPU kernel and dispatch in backend later
             return self._cudnn_impl(inputs, initial_states, sequence_length)