diff --git a/paddle/fluid/operators/affine_channel_op.cu b/paddle/fluid/operators/affine_channel_op.cu index cddc288c24c2bdbe12c553a6266f960acb6860f8..5fa1e18553bd53231691f3fda9de251d646f5b29 100644 --- a/paddle/fluid/operators/affine_channel_op.cu +++ b/paddle/fluid/operators/affine_channel_op.cu @@ -71,7 +71,11 @@ class AffineChannelCUDAKernel : public framework::OpKernel { const T* bias_d = bias->data(); T* y_d = y->data(); +#ifdef PADDLE_WITH_HIP + int block = 256; +#else int block = 1024; +#endif // PADDLE_WITH_HIP int grid = (num + block - 1) / block; int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); @@ -153,7 +157,11 @@ class AffineChannelGradCUDAKernel : public framework::OpKernel { T* ds_d = dscale ? dscale->mutable_data(ctx.GetPlace()) : nullptr; T* db_d = dbias ? dbias->mutable_data(ctx.GetPlace()) : nullptr; +#ifdef PADDLE_WITH_HIP + const int block = 256; +#else const int block = 1024; +#endif // PADDLE_WITH_HIP int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); const int max_blocks = std::max(max_threads / block, 1); int grid1 = (num + block - 1) / block; diff --git a/paddle/fluid/operators/lookup_table_op.cu b/paddle/fluid/operators/lookup_table_op.cu index 3e06e5caed31797b191dabc9846105d7a2480ad5..6985b9167571733a3116e2485cf81b3a217f536c 100644 --- a/paddle/fluid/operators/lookup_table_op.cu +++ b/paddle/fluid/operators/lookup_table_op.cu @@ -105,9 +105,24 @@ class LookupTableCUDAKernel : public framework::OpKernel { auto *table = table_t->data(); auto *output = output_t->mutable_data(context.GetPlace()); +#ifdef PADDLE_WITH_HIP + dim3 threads(64, 4); +#else dim3 threads(128, 8); +#endif // PADDLE_WITH_HIP dim3 grids(8, 1); - +#ifdef PADDLE_WITH_HIP + if (padding_idx == -1) + LookupTable< + T, 64, 4, 8, + false><<>>( + output, table, ids, N, K, D, padding_idx); + else + LookupTable< + T, 64, 4, 8, + true><<>>( + output, table, ids, N, K, D, padding_idx); +#else if (padding_idx == -1) LookupTable< T, 128, 8, 8, @@ -118,6 +133,7 @@ class LookupTableCUDAKernel : public framework::OpKernel { T, 128, 8, 8, true><<>>( output, table, ids, N, K, D, padding_idx); +#endif // PADDLE_WITH_HIP } }; @@ -185,10 +201,20 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { auto t = framework::EigenVector::Flatten(*d_table_t); t.device(*dev_ctx.eigen_device()) = t.constant(static_cast(0)); +#ifdef PADDLE_WITH_HIP + dim3 threads(64, 4); +#else dim3 threads(128, 8); +#endif // PADDLE_WITH_HIP dim3 grids(8, 1); + +#ifdef PADDLE_WITH_HIP + LookupTableGrad<<>>( + d_table, d_output, ids, N, K, D); +#else LookupTableGrad<<>>( d_table, d_output, ids, N, K, D); +#endif // PADDLE_WITH_HIP } } }; diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_op.py index 93f62b20f2997447e453988709a0f26fe2e47181..633aa2cd613b690ccbc6f2a87e904d61b99f548a 100644 --- a/python/paddle/fluid/tests/unittests/test_cholesky_op.py +++ b/python/paddle/fluid/tests/unittests/test_cholesky_op.py @@ -58,7 +58,7 @@ class TestCholeskyOp(OpTest): def test_check_grad(self): places = [fluid.CPUPlace()] - if core.is_compiled_with_cuda(): + if core.is_compiled_with_cuda() and (not core.is_compiled_with_rocm()): places.append(fluid.CUDAPlace(0)) for p in places: self.func(p) @@ -92,7 +92,10 @@ class TestCholeskyOp2D(TestCholeskyOp): class TestDygraph(unittest.TestCase): def test_dygraph(self): - paddle.disable_static() + if core.is_compiled_with_rocm(): + paddle.disable_static(place=fluid.CPUPlace()) + else: + paddle.disable_static() a = np.random.rand(3, 3) a_t = np.transpose(a, [1, 0]) x_data = np.matmul(a, a_t) + 1e-03 @@ -103,7 +106,7 @@ class TestDygraph(unittest.TestCase): class TestCholeskySingularAPI(unittest.TestCase): def setUp(self): self.places = [fluid.CPUPlace()] - if core.is_compiled_with_cuda(): + if core.is_compiled_with_cuda() and (not core.is_compiled_with_rocm()): self.places.append(fluid.CUDAPlace(0)) def check_static_result(self, place, with_out=False): diff --git a/python/paddle/fluid/tests/unittests/test_compare_op.py b/python/paddle/fluid/tests/unittests/test_compare_op.py index 63a43432b4e555542f395a69819c385be0c090e4..fbf7384b86bc1c844dee09c5b439a523026044e5 100644 --- a/python/paddle/fluid/tests/unittests/test_compare_op.py +++ b/python/paddle/fluid/tests/unittests/test_compare_op.py @@ -61,6 +61,9 @@ def create_test_class(op_type, typename, callback): for _type_name in {'float32', 'float64', 'int32', 'int64'}: + if _type_name == 'float64' and core.is_compiled_with_rocm(): + _type_name = 'float32' + create_test_class('less_than', _type_name, lambda _a, _b: _a < _b) create_test_class('less_equal', _type_name, lambda _a, _b: _a <= _b) create_test_class('greater_than', _type_name, lambda _a, _b: _a > _b)