[ROCM] fix test_rnn_op (#31735)

c9e1d9dc · ronnywang · GitHub · 1c67cf0c · c9e1d9dc · c9e1d9dc
3 changed file
--- a/paddle/fluid/operators/rnn_op.cu.cc
+++ b/paddle/fluid/operators/rnn_op.cu.cc
@@ -117,10 +117,11 @@ class RNNDescriptors {

 // ------------------- cudnn rnn descriptors ---------------------
 #ifdef PADDLE_WITH_HIP
-    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSetRNNDescriptor(
-        rnn_desc_.desc(), hidden_size_, num_layers_, miopenRNNlinear,
+    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSetRNNDescriptor_V2(
+        rnn_desc_.desc(), hidden_size_, num_layers_, dropout_desc_.desc(),
+        miopenRNNlinear,
        is_bidirec_ ? miopenRNNbidirection : miopenRNNunidirection, mode_,
-        miopenRNNNoBias, miopenRNNdefault, cudnn_type));
+        miopenRNNwithBias, miopenRNNdefault, cudnn_type));
 #elif CUDNN_VERSION >= 6000
    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor_v6(
        handle, rnn_desc_.desc(), hidden_size_, num_layers_,

--- a/paddle/fluid/platform/dynload/miopen.h
+++ b/paddle/fluid/platform/dynload/miopen.h
@@ -125,6 +125,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
  __macro(miopenCreateRNNDescriptor);                     \
  __macro(miopenDestroyRNNDescriptor);                    \
  __macro(miopenSetRNNDescriptor);                        \
+  __macro(miopenSetRNNDescriptor_V2);                     \
  __macro(miopenGetRNNParamsSize);                        \
  __macro(miopenGetRNNWorkspaceSize);                     \
  __macro(miopenGetRNNTrainingReserveSize);               \

--- a/python/paddle/fluid/tests/unittests/test_rnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_op.py
@@ -47,8 +47,10 @@ class TestRNNOp(OpTest):

    def setUp(self):
        self.op_type = "rnn"
-        self.dtype = np.float64
-        self.sequence_length = np.array([12, 11, 10, 9, 8], dtype=np.int32)
+        self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64
+        self.sequence_length = None if core.is_compiled_with_rocm(
+        ) else np.array(
+            [12, 11, 10, 9, 8], dtype=np.int32)
        self.num_layers = 1
        self.is_bidirec = False
        self.mode = "LSTM"
@@ -78,12 +80,31 @@ class TestRNNOp(OpTest):
            num_layers=self.num_layers,
            time_major=True,
            direction=direction,
-            dropout=self.dropout)
+            dropout=self.dropout,
+            dtype=self.dtype)

        flat_w = get_params_for_net(rnn1)
        output, (last_hidden, last_cell) = rnn1(
            input, sequence_length=self.sequence_length)

+        if core.is_compiled_with_rocm():
+
+            def rocm_rnn_get_place():
+                places = [core.CUDAPlace(0)]
+                return places
+
+            self._get_places = rocm_rnn_get_place
+
+            if self.is_bidirec:
+                for i in range(0, len(flat_w), 4):
+                    flat_w[i + 1], flat_w[i + 2] = flat_w[i + 2], flat_w[i + 1]
+
+            for i in range(len(flat_w)):
+                w = np.split(flat_w[i][1], 4, 0)
+                w = [w[0], w[1], w[3], w[2]]
+                w = np.concatenate(w)
+                flat_w[i] = (flat_w[i][0], w)
+
        init_h = np.zeros((self.num_layers * self.direction_num, batch_size,
                           hidden_size)).astype(self.dtype)
        init_c = np.zeros((self.num_layers * self.direction_num, batch_size,