diff --git a/dnn/src/cuda/utils.cpp b/dnn/src/cuda/utils.cpp index be8175d19711e42fae92d1115b645d78b69aa004..52335d612a13339fa5de7030688b7dde7e833fb3 100644 --- a/dnn/src/cuda/utils.cpp +++ b/dnn/src/cuda/utils.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/cuda/utils.cuh" #include "src/cuda/utils.h" @@ -30,49 +31,48 @@ struct DevicePropRec { constexpr int MAX_NR_DEVICE = 32; DevicePropRec device_prop_rec[MAX_NR_DEVICE]; -const char *cublasGetErrorString(cublasStatus_t error) { - switch (error) - { - case CUBLAS_STATUS_SUCCESS: - return "CUBLAS_STATUS_SUCCESS"; - case CUBLAS_STATUS_NOT_INITIALIZED: - return "CUBLAS_STATUS_NOT_INITIALIZED"; - case CUBLAS_STATUS_ALLOC_FAILED: - return "CUBLAS_STATUS_ALLOC_FAILED"; - case CUBLAS_STATUS_INVALID_VALUE: - return "CUBLAS_STATUS_INVALID_VALUE"; - case CUBLAS_STATUS_ARCH_MISMATCH: - return "CUBLAS_STATUS_ARCH_MISMATCH"; - case CUBLAS_STATUS_MAPPING_ERROR: - return "CUBLAS_STATUS_MAPPING_ERROR"; - case CUBLAS_STATUS_EXECUTION_FAILED: - return "CUBLAS_STATUS_EXECUTION_FAILED"; - case CUBLAS_STATUS_INTERNAL_ERROR: - return "CUBLAS_STATUS_INTERNAL_ERROR"; - case CUBLAS_STATUS_LICENSE_ERROR: - return "CUBLAS_STATUS_LICENSE_ERROR"; - case CUBLAS_STATUS_NOT_SUPPORTED: - return "CUBLAS_STATUS_NOT_SUPPORTED"; - } - return "Unknown CUBLAS error"; +const char* cublasGetErrorString(cublasStatus_t error) { + switch (error) { + case CUBLAS_STATUS_SUCCESS: + return "CUBLAS_STATUS_SUCCESS"; + case CUBLAS_STATUS_NOT_INITIALIZED: + return "CUBLAS_STATUS_NOT_INITIALIZED"; + case CUBLAS_STATUS_ALLOC_FAILED: + return "CUBLAS_STATUS_ALLOC_FAILED"; + case CUBLAS_STATUS_INVALID_VALUE: + return "CUBLAS_STATUS_INVALID_VALUE"; + case CUBLAS_STATUS_ARCH_MISMATCH: + return "CUBLAS_STATUS_ARCH_MISMATCH"; + case CUBLAS_STATUS_MAPPING_ERROR: + return "CUBLAS_STATUS_MAPPING_ERROR"; + case CUBLAS_STATUS_EXECUTION_FAILED: + return "CUBLAS_STATUS_EXECUTION_FAILED"; + case CUBLAS_STATUS_INTERNAL_ERROR: + return "CUBLAS_STATUS_INTERNAL_ERROR"; + case CUBLAS_STATUS_LICENSE_ERROR: + return "CUBLAS_STATUS_LICENSE_ERROR"; + case CUBLAS_STATUS_NOT_SUPPORTED: + return "CUBLAS_STATUS_NOT_SUPPORTED"; + } + return "Unknown CUBLAS error"; } -} // anonymous namespace +} // anonymous namespace -void cuda::__throw_cuda_error__(cudaError_t err, const char *msg) { +void cuda::__throw_cuda_error__(cudaError_t err, const char* msg) { auto s = ssprintf("cuda error %s(%d) occurred; expr: %s", - cudaGetErrorString(err), int(err), msg); + cudaGetErrorString(err), int(err), msg); megdnn_throw(s.c_str()); } -void cuda::__throw_cudnn_error__(cudnnStatus_t err, const char *msg) { +void cuda::__throw_cudnn_error__(cudnnStatus_t err, const char* msg) { auto s = ssprintf("cudnn error %s(%d) occurred; expr: %s", - cudnnGetErrorString(err), int(err), msg); + cudnnGetErrorString(err), int(err), msg); megdnn_throw(s.c_str()); } -void cuda::__throw_cublas_error__(cublasStatus_t err, const char *msg) { +void cuda::__throw_cublas_error__(cublasStatus_t err, const char* msg) { auto s = ssprintf("cublas error %s(%d) occurred; expr: %s", - cublasGetErrorString(err), int(err), msg); + cublasGetErrorString(err), int(err), msg); megdnn_throw(s.c_str()); } @@ -92,17 +92,17 @@ void cuda::__throw_cutlass_error__(cutlass::Status err, const char* msg) { megdnn_throw(s.c_str()); } -void cuda::report_error(const char *msg) { +void cuda::report_error(const char* msg) { megdnn_throw(msg); MEGDNN_MARK_USED_VAR(msg); } uint32_t cuda::safe_size_in_kern(size_t size) { if (!size || size > Uint32Fastdiv::MAX_DIVIDEND) { - megdnn_throw(ssprintf( - "invalid size for element-wise kernel: %zu; " - "max supported size is %u", - size, Uint32Fastdiv::MAX_DIVIDEND)); + megdnn_throw( + ssprintf("invalid size for element-wise kernel: %zu; " + "max supported size is %u", + size, Uint32Fastdiv::MAX_DIVIDEND)); } return size; } @@ -111,7 +111,7 @@ cudaDeviceProp cuda::current_device_prop() { int dev; cuda_check(cudaGetDevice(&dev)); megdnn_assert(dev < MAX_NR_DEVICE, "device number too large: %d", dev); - auto &&rec = device_prop_rec[dev]; + auto&& rec = device_prop_rec[dev]; if (!rec.init) { std::lock_guard lock(rec.mtx); if (!rec.init) { @@ -137,6 +137,19 @@ size_t cuda::max_batch_x_channel_size() { return current_device_prop().maxGridSize[2]; } +uint32_t cuda::param_buffer_start_address() { + auto&& device_prop = current_device_prop(); + int cap = 10 * device_prop.major + device_prop.minor; + // maxwell and pascal: 0x140 + if (cap >= 50 && cap < 70) + return 0x140; + // volta ~ ampere: 0x160 + else if (cap >= 70) + return 0x160; + megdnn_throw( + ssprintf("unsupported cuda compute capability %d", cap).c_str()); +} + const char* cuda::current_device_arch_name() { auto&& device_prop = current_device_prop(); int cap = 10 * device_prop.major + device_prop.minor; @@ -155,4 +168,3 @@ const char* cuda::current_device_arch_name() { } // vim: syntax=cpp.doxygen - diff --git a/dnn/src/cuda/utils.h b/dnn/src/cuda/utils.h index 542eb6bcd542ac8f03c22594876fd30af5904b8c..05fc028488fa94929174935122f9102e2db0f18c 100644 --- a/dnn/src/cuda/utils.h +++ b/dnn/src/cuda/utils.h @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #pragma once @@ -24,19 +25,19 @@ namespace megdnn { namespace cuda { -static inline HandleImpl *concrete_handle(Handle *handle) { +static inline HandleImpl* concrete_handle(Handle* handle) { return static_cast(handle); } -static inline cudnnHandle_t cudnn_handle(Handle *handle) { +static inline cudnnHandle_t cudnn_handle(Handle* handle) { return concrete_handle(handle)->cudnn_handle(); } -static inline cublasHandle_t cublas_handle(Handle *handle) { +static inline cublasHandle_t cublas_handle(Handle* handle) { return concrete_handle(handle)->cublas_handle(); } -static inline cudaStream_t cuda_stream(Handle *handle) { +static inline cudaStream_t cuda_stream(Handle* handle) { return concrete_handle(handle)->stream(); } @@ -44,9 +45,8 @@ static inline megcore::AsyncErrorInfo* async_error_info(Handle* handle) { return concrete_handle(handle)->megcore_context().error_info; } -static inline void CUDART_CB callback_free( - cudaStream_t /* stream */, cudaError_t status, void *userData) -{ +static inline void CUDART_CB callback_free(cudaStream_t /* stream */, + cudaError_t status, void* userData) { cuda_check(status); free(userData); } @@ -64,9 +64,12 @@ bool is_compute_capability_equalto(int major, int minor); //! third demension size_t max_batch_x_channel_size(); +//! get param buffer start address at cmem[0] +uint32_t param_buffer_start_address(); + const char* current_device_arch_name(); -} // namespace cuda -} // namespace megdnn +} // namespace cuda +} // namespace megdnn // vim: syntax=cpp.doxygen diff --git a/dnn/test/common/conv_bias.cpp b/dnn/test/common/conv_bias.cpp index 030bf12ce2ee6e66042e2d1ed289d3adb9c237e9..14a1b26c7d87fbef46d378ca22b3c930571bbee4 100644 --- a/dnn/test/common/conv_bias.cpp +++ b/dnn/test/common/conv_bias.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "test/common/conv_bias.h" #include "megdnn/opr_param_defs.h" @@ -413,7 +414,7 @@ std::vector get_int8_nchw44_args(size_t kernel_size, size_t pack_size, megdnn_assert(kernel_size > 0, "not support kernel_size"); using NLMode = param::ConvBias::NonlineMode; - //// clang-format off + // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) { for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) { for (size_t b : {1,2}) { @@ -795,7 +796,7 @@ void check_conv_bias(DType src_dtype, DType filter_dtype, DType bias_dtype, return z; }; megdnn_assert(rng != nullptr && bias_rng != nullptr); - checker.set_rng(0, rng.get()) + checker.set_rng(0, rng.get()) .set_rng(1, rng.get()) .set_rng(2, rng.get()) .set_rng(3, rng.get()); @@ -1152,8 +1153,7 @@ void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m, handle->create_operator(); winograd_preprocess_opr->param().output_block_size = m; winograd_preprocess_opr->param().format = format; - winograd_preprocess_opr->param().compute_mode = - param.compute_mode; + winograd_preprocess_opr->param().compute_mode = param.compute_mode; TensorLayout filter_transform_layout; winograd_preprocess_opr->deduce_layout(tensors[1].layout, filter_transform_layout);