未验证 提交 ba415ee7 编写于 作者: H HongyuJia 提交者: GitHub

[CppExtension Cuda] Add cuda unit test for CppExtension (#52900) (#53269)

* [CppExtension Cuda] Add cuda unit test for CppExtension

* update extra_compile_args for CUDAExtension

* add debug info

* Add patch to fix CUDA12 compile error

* patch for all env

* add windows judgement

* Try to fix setup function not found error

* fix mix_relu_and_extension include file

* fix setup compile error

* remove useless debug comments

* add sleep, debug CI-build

* add space to disable cmake cache

* remove debug info

* add space to pass CI-build
上级 ac01ddda
...@@ -21,6 +21,14 @@ set(PYBIND_TAG v2.10.3) ...@@ -21,6 +21,14 @@ set(PYBIND_TAG v2.10.3)
set(PYBIND_INCLUDE_DIR ${THIRD_PARTY_PATH}/pybind/src/extern_pybind/include) set(PYBIND_INCLUDE_DIR ${THIRD_PARTY_PATH}/pybind/src/extern_pybind/include)
include_directories(${PYBIND_INCLUDE_DIR}) include_directories(${PYBIND_INCLUDE_DIR})
set(PYBIND_PATCH_COMMAND "")
if(NOT WIN32)
file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/pybind/cast.h.patch
native_dst)
set(PYBIND_PATCH_COMMAND patch -d ${PYBIND_INCLUDE_DIR}/pybind11 <
${native_dst})
endif()
ExternalProject_Add( ExternalProject_Add(
extern_pybind extern_pybind
${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE}
...@@ -33,6 +41,7 @@ ExternalProject_Add( ...@@ -33,6 +41,7 @@ ExternalProject_Add(
# third-party library version changes cannot be incorporated. # third-party library version changes cannot be incorporated.
# reference: https://cmake.org/cmake/help/latest/module/ExternalProject.html # reference: https://cmake.org/cmake/help/latest/module/ExternalProject.html
UPDATE_COMMAND "" UPDATE_COMMAND ""
PATCH_COMMAND ${PYBIND_PATCH_COMMAND}
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
BUILD_COMMAND "" BUILD_COMMAND ""
INSTALL_COMMAND "" INSTALL_COMMAND ""
......
...@@ -82,6 +82,7 @@ else() ...@@ -82,6 +82,7 @@ else()
set(WARPCTC_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) set(WARPCTC_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
set(WARPCTC_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) set(WARPCTC_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
endif() endif()
ExternalProject_Add( ExternalProject_Add(
extern_warpctc extern_warpctc
${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE}
......
diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h
index 3a404602..9054478c 100644
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@@ -42,7 +42,9 @@ using make_caster = type_caster<intrinsic_t<type>>;
// Shortcut for calling a caster's `cast_op_type` cast operator for casting a type_caster to a T
template <typename T>
typename make_caster<T>::template cast_op_type<T> cast_op(make_caster<T> &caster) {
- return caster.operator typename make_caster<T>::template cast_op_type<T>();
+ // https://github.com/pybind/pybind11/issues/4606 with CUDA 12
+ //return caster.operator typename make_caster<T>::template cast_op_type<T>();
+ return caster;
}
template <typename T>
typename make_caster<T>::template cast_op_type<typename std::add_rvalue_reference<T>::type>
...@@ -15,7 +15,9 @@ ...@@ -15,7 +15,9 @@
import os import os
from site import getsitepackages from site import getsitepackages
from paddle.utils.cpp_extension import CppExtension, setup from utils import extra_compile_args
from paddle.utils.cpp_extension import CUDAExtension, setup
paddle_includes = [] paddle_includes = []
for site_packages_path in getsitepackages(): for site_packages_path in getsitepackages():
...@@ -30,10 +32,14 @@ paddle_includes.append(os.path.dirname(os.path.abspath(__file__))) ...@@ -30,10 +32,14 @@ paddle_includes.append(os.path.dirname(os.path.abspath(__file__)))
setup( setup(
name='custom_cpp_extension', name='custom_cpp_extension',
ext_modules=CppExtension( ext_modules=CUDAExtension(
sources=["custom_extension.cc", "custom_sub.cc"], sources=[
"custom_extension.cc",
"custom_sub.cc",
"custom_relu_forward.cu",
],
include_dirs=paddle_includes, include_dirs=paddle_includes,
extra_compile_args={'cc': ['-w', '-g']}, extra_compile_args=extra_compile_args,
verbose=True, verbose=True,
), ),
) )
...@@ -20,6 +20,8 @@ ...@@ -20,6 +20,8 @@
paddle::Tensor custom_sub(paddle::Tensor x, paddle::Tensor y); paddle::Tensor custom_sub(paddle::Tensor x, paddle::Tensor y);
paddle::Tensor relu_cuda_forward(const paddle::Tensor& x);
paddle::Tensor custom_add(const paddle::Tensor& x, const paddle::Tensor& y) { paddle::Tensor custom_add(const paddle::Tensor& x, const paddle::Tensor& y) {
return x.exp() + y.exp(); return x.exp() + y.exp();
} }
...@@ -46,6 +48,7 @@ PYBIND11_MODULE(custom_cpp_extension, m) { ...@@ -46,6 +48,7 @@ PYBIND11_MODULE(custom_cpp_extension, m) {
m.def("nullable_tensor", &nullable_tensor, "returned Tensor might be None"); m.def("nullable_tensor", &nullable_tensor, "returned Tensor might be None");
m.def( m.def(
"optional_tensor", &optional_tensor, "returned Tensor might be optional"); "optional_tensor", &optional_tensor, "returned Tensor might be optional");
m.def("relu_cuda_forward", &relu_cuda_forward, "relu(x)");
py::class_<Power>(m, "Power") py::class_<Power>(m, "Power")
.def(py::init<int, int>()) .def(py::init<int, int>())
......
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/extension.h"
#define CHECK_GPU_INPUT(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
template <typename data_t>
__global__ void relu_cuda_forward_kernel(const data_t* x,
data_t* y,
int64_t num) {
int64_t gid = blockIdx.x * blockDim.x + threadIdx.x;
for (int64_t i = gid; i < num; i += blockDim.x * gridDim.x) {
y[i] = x[i] > static_cast<data_t>(0.) ? x[i] : static_cast<data_t>(0.);
}
}
paddle::Tensor relu_cuda_forward(const paddle::Tensor& x) {
CHECK_GPU_INPUT(x);
auto out = paddle::empty_like(x);
PD_CHECK(x.place() == paddle::DefaultGPUPlace());
int64_t numel = x.numel();
int64_t block = 512;
int64_t grid = (numel + block - 1) / block;
PD_DISPATCH_FLOATING_AND_HALF_TYPES(
x.type(), "relu_cuda_forward_kernel", ([&] {
relu_cuda_forward_kernel<data_t><<<grid, block, 0, x.stream()>>>(
x.data<data_t>(), out.data<data_t>(), numel);
}));
return out;
}
...@@ -21,7 +21,7 @@ from paddle.utils.cpp_extension import CppExtension, setup ...@@ -21,7 +21,7 @@ from paddle.utils.cpp_extension import CppExtension, setup
setup( setup(
name='mix_relu_extension', name='mix_relu_extension',
ext_modules=CppExtension( ext_modules=CppExtension(
sources=["mix_relu_and_extension.cc", "custom_sub.cc"], sources=["mix_relu_and_extension.cc"],
include_dirs=paddle_includes include_dirs=paddle_includes
+ [os.path.dirname(os.path.abspath(__file__))], + [os.path.dirname(os.path.abspath(__file__))],
extra_compile_args={'cc': ['-w', '-g']}, extra_compile_args={'cc': ['-w', '-g']},
......
...@@ -18,6 +18,7 @@ import unittest ...@@ -18,6 +18,7 @@ import unittest
from site import getsitepackages from site import getsitepackages
import numpy as np import numpy as np
from utils import check_output
import paddle import paddle
from paddle.utils.cpp_extension import load from paddle.utils.cpp_extension import load
...@@ -27,7 +28,7 @@ if os.name == 'nt' or sys.platform.startswith('darwin'): ...@@ -27,7 +28,7 @@ if os.name == 'nt' or sys.platform.startswith('darwin'):
sys.exit() sys.exit()
# Compile and load cpp extension Just-In-Time. # Compile and load cpp extension Just-In-Time.
sources = ["custom_extension.cc", "custom_sub.cc"] sources = ["custom_extension.cc", "custom_sub.cc", "custom_relu_forward.cu"]
paddle_includes = [] paddle_includes = []
for site_packages_path in getsitepackages(): for site_packages_path in getsitepackages():
paddle_includes.append( paddle_includes.append(
...@@ -69,6 +70,8 @@ class TestCppExtensionJITInstall(unittest.TestCase): ...@@ -69,6 +70,8 @@ class TestCppExtensionJITInstall(unittest.TestCase):
self._test_extension_class() self._test_extension_class()
self._test_nullable_tensor() self._test_nullable_tensor()
self._test_optional_tensor() self._test_optional_tensor()
if paddle.is_compiled_with_cuda():
self._test_cuda_relu()
def _test_extension_function(self): def _test_extension_function(self):
for dtype in self.dtypes: for dtype in self.dtypes:
...@@ -130,6 +133,14 @@ class TestCppExtensionJITInstall(unittest.TestCase): ...@@ -130,6 +133,14 @@ class TestCppExtensionJITInstall(unittest.TestCase):
err_msg=f'extension out: {x},\n numpy out: {x_np}', err_msg=f'extension out: {x},\n numpy out: {x_np}',
) )
def _test_cuda_relu(self):
paddle.set_device('gpu')
x = np.random.uniform(-1, 1, [4, 8]).astype('float32')
x = paddle.to_tensor(x, dtype='float32')
out = custom_cpp_extension.relu_cuda_forward(x)
pd_out = paddle.nn.functional.relu(x)
check_output(out, pd_out, "out")
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -18,6 +18,7 @@ import sys ...@@ -18,6 +18,7 @@ import sys
import unittest import unittest
import numpy as np import numpy as np
from utils import check_output
import paddle import paddle
from paddle import static from paddle import static
...@@ -154,6 +155,8 @@ class TestCppExtensionSetupInstall(unittest.TestCase): ...@@ -154,6 +155,8 @@ class TestCppExtensionSetupInstall(unittest.TestCase):
self._test_static() self._test_static()
self._test_dynamic() self._test_dynamic()
self._test_double_grad_dynamic() self._test_double_grad_dynamic()
if paddle.is_compiled_with_cuda():
self._test_cuda_relu()
def _test_extension_function_plain(self): def _test_extension_function_plain(self):
import custom_cpp_extension import custom_cpp_extension
...@@ -314,6 +317,16 @@ class TestCppExtensionSetupInstall(unittest.TestCase): ...@@ -314,6 +317,16 @@ class TestCppExtensionSetupInstall(unittest.TestCase):
), ),
) )
def _test_cuda_relu(self):
import custom_cpp_extension
paddle.set_device('gpu')
x = np.random.uniform(-1, 1, [4, 8]).astype('float32')
x = paddle.to_tensor(x, dtype='float32')
out = custom_cpp_extension.relu_cuda_forward(x)
pd_out = paddle.nn.functional.relu(x)
check_output(out, pd_out, "out")
if __name__ == '__main__': if __name__ == '__main__':
if os.name == 'nt' or sys.platform.startswith('darwin'): if os.name == 'nt' or sys.platform.startswith('darwin'):
......
...@@ -16,6 +16,8 @@ import os ...@@ -16,6 +16,8 @@ import os
import sys import sys
from site import getsitepackages from site import getsitepackages
import numpy as np
from paddle.utils.cpp_extension.extension_utils import IS_WINDOWS from paddle.utils.cpp_extension.extension_utils import IS_WINDOWS
IS_MAC = sys.platform.startswith('darwin') IS_MAC = sys.platform.startswith('darwin')
...@@ -37,3 +39,43 @@ for site_packages_path in getsitepackages(): ...@@ -37,3 +39,43 @@ for site_packages_path in getsitepackages():
extra_cc_args = ['-w', '-g'] if not IS_WINDOWS else ['/w'] extra_cc_args = ['-w', '-g'] if not IS_WINDOWS else ['/w']
extra_nvcc_args = ['-O3'] extra_nvcc_args = ['-O3']
extra_compile_args = {'cc': extra_cc_args, 'nvcc': extra_nvcc_args} extra_compile_args = {'cc': extra_cc_args, 'nvcc': extra_nvcc_args}
def check_output(out, pd_out, name):
if out is None and pd_out is None:
return
assert out is not None, "out value of " + name + " is None"
assert pd_out is not None, "pd_out value of " + name + " is None"
if isinstance(out, list) and isinstance(pd_out, list):
for idx in range(len(out)):
np.testing.assert_array_equal(
out[idx],
pd_out[idx],
err_msg='custom op {}: {},\n paddle api {}: {}'.format(
name, out[idx], name, pd_out[idx]
),
)
else:
np.testing.assert_array_equal(
out,
pd_out,
err_msg='custom op {}: {},\n paddle api {}: {}'.format(
name, out, name, pd_out
),
)
def check_output_allclose(out, pd_out, name, rtol=5e-5, atol=1e-2):
if out is None and pd_out is None:
return
assert out is not None, "out value of " + name + " is None"
assert pd_out is not None, "pd_out value of " + name + " is None"
np.testing.assert_allclose(
out,
pd_out,
rtol,
atol,
err_msg='custom op {}: {},\n paddle api {}: {}'.format(
name, out, name, pd_out
),
)
...@@ -180,6 +180,9 @@ def custom_write_stub(resource, pyfile): ...@@ -180,6 +180,9 @@ def custom_write_stub(resource, pyfile):
def __bootstrap__(): def __bootstrap__():
assert os.path.exists(so_path) assert os.path.exists(so_path)
# load custom op shared library with abs path
custom_ops = paddle.utils.cpp_extension.load_op_meta_info_and_register_op(so_path)
if os.name == 'nt' or sys.platform.startswith('darwin'): if os.name == 'nt' or sys.platform.startswith('darwin'):
# Cpp Extension only support Linux now # Cpp Extension only support Linux now
mod = types.ModuleType(__name__) mod = types.ModuleType(__name__)
...@@ -193,10 +196,8 @@ def custom_write_stub(resource, pyfile): ...@@ -193,10 +196,8 @@ def custom_write_stub(resource, pyfile):
except ImportError: except ImportError:
mod = types.ModuleType(__name__) mod = types.ModuleType(__name__)
# load custom op shared library with abs path for custom_op in custom_ops:
custom_ops = paddle.utils.cpp_extension.load_op_meta_info_and_register_op(so_path) setattr(mod, custom_op, eval(custom_op))
for custom_ops in custom_ops:
setattr(mod, custom_ops, eval(custom_ops))
__bootstrap__() __bootstrap__()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册