From 01063218f273931f6856777b7aa797109fedbbae Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Sat, 18 Sep 2021 13:37:34 +0800 Subject: [PATCH] split cuda_profiler into .h and .cc (#35821) * split cuda_profiler into .h and .cc * fix cmake * remove inline --- paddle/fluid/platform/CMakeLists.txt | 1 + paddle/fluid/platform/cuda_profiler.cc | 45 ++++++++++++++++++++++++++ paddle/fluid/platform/cuda_profiler.h | 21 +++--------- paddle/fluid/pybind/CMakeLists.txt | 4 +++ 4 files changed, 55 insertions(+), 16 deletions(-) create mode 100644 paddle/fluid/platform/cuda_profiler.cc diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index d0e701b0235..2540170ed54 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -60,6 +60,7 @@ cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info) IF(WITH_GPU) nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog enforce monitor dynload_cuda) + nv_library(cuda_profiler SRCS cuda_profiler.cc DEPS enforce) ENDIF() IF(WITH_ROCM) hip_library(gpu_info SRCS gpu_info.cc DEPS gflags glog enforce monitor dynload_cuda) diff --git a/paddle/fluid/platform/cuda_profiler.cc b/paddle/fluid/platform/cuda_profiler.cc new file mode 100644 index 00000000000..998dd80dc5e --- /dev/null +++ b/paddle/fluid/platform/cuda_profiler.cc @@ -0,0 +1,45 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/platform/cuda_profiler.h" + +namespace paddle { +namespace platform { + +void CudaProfilerInit(std::string output_file, std::string output_mode, + std::string config_file) { + PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv", + platform::errors::InvalidArgument( + "Unsupported cuda profiler output mode, expect `kvp` or " + "`csv`, but received `%s`.", + output_mode)); + cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair; + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaProfilerInitialize(config_file.c_str(), output_file.c_str(), mode)); +} + +void CudaProfilerStart() { PADDLE_ENFORCE_CUDA_SUCCESS(cudaProfilerStart()); } + +void CudaProfilerStop() { PADDLE_ENFORCE_CUDA_SUCCESS(cudaProfilerStop()); } + +#ifndef _WIN32 +void CudaNvtxRangePush(std::string name) { + dynload::nvtxRangePushA(name.c_str()); +} + +void CudaNvtxRangePop() { dynload::nvtxRangePop(); } +#endif + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/cuda_profiler.h b/paddle/fluid/platform/cuda_profiler.h index 6edc141205a..5780b877d1a 100644 --- a/paddle/fluid/platform/cuda_profiler.h +++ b/paddle/fluid/platform/cuda_profiler.h @@ -24,27 +24,16 @@ namespace paddle { namespace platform { void CudaProfilerInit(std::string output_file, std::string output_mode, - std::string config_file) { - PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv", - platform::errors::InvalidArgument( - "Unsupported cuda profiler output mode, expect `kvp` or " - "`csv`, but received `%s`.", - output_mode)); - cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair; - PADDLE_ENFORCE_CUDA_SUCCESS( - cudaProfilerInitialize(config_file.c_str(), output_file.c_str(), mode)); -} + std::string config_file); -void CudaProfilerStart() { PADDLE_ENFORCE_CUDA_SUCCESS(cudaProfilerStart()); } +void CudaProfilerStart(); -void CudaProfilerStop() { PADDLE_ENFORCE_CUDA_SUCCESS(cudaProfilerStop()); } +void CudaProfilerStop(); #ifndef _WIN32 -void CudaNvtxRangePush(std::string name) { - dynload::nvtxRangePushA(name.c_str()); -} +void CudaNvtxRangePush(std::string name); -void CudaNvtxRangePop() { dynload::nvtxRangePop(); } +void CudaNvtxRangePop(); #endif } // namespace platform diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 4ca46758838..b30e6c39f54 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -17,6 +17,10 @@ if (WITH_GPU OR WITH_ROCM) set(PYBIND_DEPS ${PYBIND_DEPS} cuda_device_guard) endif() +if (WITH_GPU) + set(PYBIND_DEPS ${PYBIND_DEPS} cuda_profiler) +endif() + if (WITH_NCCL OR WITH_RCCL) set(PYBIND_DEPS ${PYBIND_DEPS} nccl_wrapper) set(PYBIND_DEPS ${PYBIND_DEPS} reducer) -- GitLab