diff --git a/paddle/fluid/operators/rnn_op.cu.cc b/paddle/fluid/operators/rnn_op.cu.cc index ccf619a074ae28d6e4c4110267fa95f38b360a88..2be59c620441d6b3674b02373acc44e54751a50e 100644 --- a/paddle/fluid/operators/rnn_op.cu.cc +++ b/paddle/fluid/operators/rnn_op.cu.cc @@ -117,10 +117,11 @@ class RNNDescriptors { // ------------------- cudnn rnn descriptors --------------------- #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSetRNNDescriptor( - rnn_desc_.desc(), hidden_size_, num_layers_, miopenRNNlinear, + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSetRNNDescriptor_V2( + rnn_desc_.desc(), hidden_size_, num_layers_, dropout_desc_.desc(), + miopenRNNlinear, is_bidirec_ ? miopenRNNbidirection : miopenRNNunidirection, mode_, - miopenRNNNoBias, miopenRNNdefault, cudnn_type)); + miopenRNNwithBias, miopenRNNdefault, cudnn_type)); #elif CUDNN_VERSION >= 6000 PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor_v6( handle, rnn_desc_.desc(), hidden_size_, num_layers_, diff --git a/paddle/fluid/platform/dynload/miopen.h b/paddle/fluid/platform/dynload/miopen.h index 43a3e1a1079d93183768178ac967c8f03eeb3c23..15de4c64e3e64569e231e5ad301453458645604b 100644 --- a/paddle/fluid/platform/dynload/miopen.h +++ b/paddle/fluid/platform/dynload/miopen.h @@ -125,6 +125,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name); __macro(miopenCreateRNNDescriptor); \ __macro(miopenDestroyRNNDescriptor); \ __macro(miopenSetRNNDescriptor); \ + __macro(miopenSetRNNDescriptor_V2); \ __macro(miopenGetRNNParamsSize); \ __macro(miopenGetRNNWorkspaceSize); \ __macro(miopenGetRNNTrainingReserveSize); \ diff --git a/python/paddle/fluid/tests/unittests/test_rnn_op.py b/python/paddle/fluid/tests/unittests/test_rnn_op.py index 5ad2ffec98247b43f025ebc1e7bbfbb9598800d2..22e07b0bc48c0489ebee7e5b9faff3f859734f36 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_op.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_op.py @@ -47,8 +47,10 @@ class TestRNNOp(OpTest): def setUp(self): self.op_type = "rnn" - self.dtype = np.float64 - self.sequence_length = np.array([12, 11, 10, 9, 8], dtype=np.int32) + self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 + self.sequence_length = None if core.is_compiled_with_rocm( + ) else np.array( + [12, 11, 10, 9, 8], dtype=np.int32) self.num_layers = 1 self.is_bidirec = False self.mode = "LSTM" @@ -78,12 +80,31 @@ class TestRNNOp(OpTest): num_layers=self.num_layers, time_major=True, direction=direction, - dropout=self.dropout) + dropout=self.dropout, + dtype=self.dtype) flat_w = get_params_for_net(rnn1) output, (last_hidden, last_cell) = rnn1( input, sequence_length=self.sequence_length) + if core.is_compiled_with_rocm(): + + def rocm_rnn_get_place(): + places = [core.CUDAPlace(0)] + return places + + self._get_places = rocm_rnn_get_place + + if self.is_bidirec: + for i in range(0, len(flat_w), 4): + flat_w[i + 1], flat_w[i + 2] = flat_w[i + 2], flat_w[i + 1] + + for i in range(len(flat_w)): + w = np.split(flat_w[i][1], 4, 0) + w = [w[0], w[1], w[3], w[2]] + w = np.concatenate(w) + flat_w[i] = (flat_w[i][0], w) + init_h = np.zeros((self.num_layers * self.direction_num, batch_size, hidden_size)).astype(self.dtype) init_c = np.zeros((self.num_layers * self.direction_num, batch_size,