diff --git a/paddle/fluid/operators/layer_norm_op.cu b/paddle/fluid/operators/layer_norm_op.cu index d0f7dca98af0f66dc81352908616f92376aa54ae..3656de3525d32cac814e4199089de56b40ea09d8 100644 --- a/paddle/fluid/operators/layer_norm_op.cu +++ b/paddle/fluid/operators/layer_norm_op.cu @@ -43,7 +43,11 @@ template using LayerNormParamType = typename CudnnDataType::BatchNormParamType; inline static int GetDesiredBlockDim(int block_dim) { +#ifdef __HIPCC__ + const int kMaxBlockDim = 256; +#else const int kMaxBlockDim = 512; +#endif return block_dim >= kMaxBlockDim ? kMaxBlockDim : (1 << (static_cast(std::log2f(block_dim)))); @@ -698,8 +702,11 @@ static void LayerNormBackward(const T *x, const T *d_y, const U *scale, const framework::ExecutionContext &ctx) { auto &dev_ctx = ctx.cuda_device_context(); auto stream = dev_ctx.stream(); - +#ifdef __HIPCC__ + const int kMaxBlockDim = 256; +#else const int kMaxBlockDim = 512; +#endif const int kMaxBlockNum = 128; int gradient_flag = ((d_x != nullptr ? 1 : 0) << 2) | ((d_scale != nullptr ? 1 : 0) << 1) | diff --git a/paddle/fluid/operators/norm_op.cu b/paddle/fluid/operators/norm_op.cu index 6b5c70c925843ee8002e4297c242f39b485a5fa3..4c1674ded1a44a4a7f0b0f4c3b8bca37c810ed4c 100644 --- a/paddle/fluid/operators/norm_op.cu +++ b/paddle/fluid/operators/norm_op.cu @@ -79,8 +79,11 @@ class NormCUDAKernel : public framework::OpKernel { GetDims(xdim, axis, &pre, &n, &post); auto& dev_ctx = ctx.cuda_device_context(); - +#ifdef __HIPCC__ + const int block = 256; +#else const int block = 512; +#endif int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); const int max_blocks = std::max(max_threads / block, 1); int grid = std::min(max_blocks, pre * post); @@ -146,7 +149,11 @@ class NormGradCUDAKernel : public framework::OpKernel { auto& dev_ctx = ctx.cuda_device_context(); +#ifdef __HIPCC__ + const int block = 256; +#else const int block = 512; +#endif int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); const int max_blocks = std::max(max_threads / block, 1); int grid = std::min(max_blocks, pre * post); diff --git a/paddle/fluid/operators/p_norm_op.cu b/paddle/fluid/operators/p_norm_op.cu index 918f0bb1e49d681743a5e831755e8d5d3a58b4b7..bd6694abdbf763db4f9d65809f22ad16f5457959 100644 --- a/paddle/fluid/operators/p_norm_op.cu +++ b/paddle/fluid/operators/p_norm_op.cu @@ -142,7 +142,12 @@ class PnormCUDAKernel : public framework::OpKernel { auto& dev_ctx = ctx.cuda_device_context(); +#ifdef __HIPCC__ + const int block = 256; +#else const int block = 512; +#endif + int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); const int max_blocks = std::max(max_threads / block, 1); int grid = std::min(max_blocks, pre * post); @@ -244,7 +249,12 @@ class PnormGradCUDAKernel : public framework::OpKernel { auto& dev_ctx = ctx.cuda_device_context(); +#ifdef __HIPCC__ + const int block = 256; +#else const int block = 512; +#endif + int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); const int max_blocks = std::max(max_threads / block, 1); int grid = std::min(max_blocks, pre * post); diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_softmax_op.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_softmax_op.py index 92146820da17243f4345bc3576ee0de79d7b7215..cb92a68bde638d0fc9c1e7e76ef9e00788cd849a 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_softmax_op.py @@ -28,10 +28,10 @@ class TestSequenceSoftmaxOp(OpTest): self.op_type = "sequence_softmax" self.use_cudnn = False self.init_op_type() - - x = np.random.uniform(0.1, 1, (110, 1)).astype("float64") + self.dtype = "float32" if core.is_compiled_with_rocm() else "float64" + x = np.random.uniform(0.1, 1, (110, 1)).astype(self.dtype) self.init_lod() - out = np.zeros((110, 1)).astype("float64") + out = np.zeros((110, 1)).astype(self.dtype) offset = 0 for i in range(len(self.lod[0])): if (self.lod[0][i] == 0): diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py index e908f1a60a0028502bbacf3b0ad37c185d3f7311..4b097f6359f8862d128c568f4de0776c46190a4e 100644 --- a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py @@ -354,8 +354,11 @@ class TestMathOpPatchesVarBase(unittest.TestCase): [1.30058, 1.0688717, 1.4928783], [1.0958099, 1.3724753, 1.8926544]]) d = d.matmul(d.t()) - self.assertTrue( - np.array_equal(d.cholesky().numpy(), paddle.cholesky(d).numpy())) + # ROCM not support cholesky + if not fluid.core.is_compiled_with_rocm(): + self.assertTrue( + np.array_equal(d.cholesky().numpy(), paddle.cholesky(d).numpy( + ))) self.assertTrue( np.array_equal(x.is_empty().numpy(), paddle.is_empty(x).numpy()))