From fd9d6fdac285ba34f0e9a91c53bc07859ae2dd1c Mon Sep 17 00:00:00 2001 From: AshburnLee <1578034415@qq.com> Date: Wed, 20 Jan 2021 22:52:23 +0800 Subject: [PATCH] [cherry-pick]Add tf32 support for A100 tensor core acceleration for cuBLAS (#28732) (#30612) * Add tf32 support for A100 tensor core acceleration for cuBLAS (#28732) * Fixed an error * Fixed an error --- paddle/fluid/platform/cuda_helper.h | 7 ++- paddle/fluid/platform/device_context.h | 14 ++++- paddle/fluid/pybind/pybind.cc | 1 + .../fluid/tests/unittests/test_tf32_cublas.py | 57 +++++++++++++++++++ 4 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_tf32_cublas.py diff --git a/paddle/fluid/platform/cuda_helper.h b/paddle/fluid/platform/cuda_helper.h index 721d64d8914..2a1f0b9ac5c 100644 --- a/paddle/fluid/platform/cuda_helper.h +++ b/paddle/fluid/platform/cuda_helper.h @@ -84,8 +84,13 @@ class CublasHandleHolder { if (math_type == CUBLAS_TENSOR_OP_MATH) { PADDLE_RETRY_CUDA_SUCCESS( dynload::cublasSetMathMode(handle_, CUBLAS_TENSOR_OP_MATH)); +#if CUDA_VERSION >= 11000 + } else if (math_type == CUBLAS_TF32_TENSOR_OP_MATH) { + PADDLE_ENFORCE_CUDA_SUCCESS( + dynload::cublasSetMathMode(handle_, CUBLAS_TF32_TENSOR_OP_MATH)); +#endif // CUDA_VERSION >= 11000 } -#endif +#endif // CUDA_VERSION >= 9000 } ~CublasHandleHolder() PADDLE_MAY_THROW { diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 68f901e8af7..a6612a5061f 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -198,7 +198,11 @@ class CUDAContext { /*! \brief Call cublas function safely. */ template inline void CublasCall(Callback&& callback) const { - cublas_handle_->Call(std::forward(callback)); + if (cublas_tf32_tensor_core_handle_) { + cublas_tf32_tensor_core_handle_->Call(std::forward(callback)); + } else { + cublas_handle_->Call(std::forward(callback)); + } } /*! \brief Check whether tensor core is supported */ @@ -225,7 +229,11 @@ class CUDAContext { #if CUDA_VERSION >= 9000 cublas_tensor_core_handle_.reset( new CublasHandleHolder(RawStream(), CUBLAS_TENSOR_OP_MATH)); -#endif +#if CUDA_VERSION >= 11000 + cublas_tf32_tensor_core_handle_.reset( + new CublasHandleHolder(RawStream(), CUBLAS_TF32_TENSOR_OP_MATH)); +#endif // CUDA_VERSION >= 11000 +#endif // CUDA_VERSION >= 9000 } } @@ -268,6 +276,7 @@ class CUDAContext { void DestoryCuBlasContext() { cublas_handle_.reset(); cublas_tensor_core_handle_.reset(); + cublas_tf32_tensor_core_handle_.reset(); } void DestoryCuSolverContext() { @@ -284,6 +293,7 @@ class CUDAContext { cudnnHandle_t cudnn_handle_; std::unique_ptr cublas_handle_; std::unique_ptr cublas_tensor_core_handle_; + std::unique_ptr cublas_tf32_tensor_core_handle_; cusolverDnHandle_t cusolver_dn_handle_; DISABLE_COPY_AND_ASSIGN(CUDAContext); }; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 0d365f2b3a5..72b3c9645ba 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -58,6 +58,7 @@ limitations under the License. */ #include "paddle/fluid/operators/py_func_op.h" #include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/cpu_info.h" +#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/init.h" diff --git a/python/paddle/fluid/tests/unittests/test_tf32_cublas.py b/python/paddle/fluid/tests/unittests/test_tf32_cublas.py new file mode 100644 index 00000000000..32d8c3dc322 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_tf32_cublas.py @@ -0,0 +1,57 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import six +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core + + +class TestTF32Switch(unittest.TestCase): + def test_on_off(self): + if core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + self.assertTrue(core.get_cublas_switch()) # default + core.set_cublas_switch(False) + self.assertFalse(core.get_cublas_switch()) # turn off + core.set_cublas_switch(True) + self.assertTrue(core.get_cublas_switch()) # turn on + + core.set_cublas_switch(True) # restore the switch + else: + pass + + +class TestTF32OnMatmul(unittest.TestCase): + def test_dygraph_without_out(self): + if core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + core.set_cublas_switch(False) # turn off + with fluid.dygraph.guard(place): + input_array1 = np.random.rand(4, 12, 64, 88).astype("float32") + input_array2 = np.random.rand(4, 12, 88, 512).astype("float32") + data1 = paddle.to_tensor(input_array1) + data2 = paddle.to_tensor(input_array2) + out = paddle.matmul(data1, data2) + expected_result = np.matmul(input_array1, input_array2) + self.assertTrue(np.allclose(expected_result, out.numpy(), 1e-03)) + core.set_cublas_switch(True) # restore the switch + else: + pass + + +if __name__ == '__main__': + unittest.main() -- GitLab