diff --git a/paddle/platform/cuda_profiler.h b/paddle/platform/cuda_profiler.h new file mode 100644 index 0000000000000000000000000000000000000000..d3a6e597271f9c6e4b42f0da79f6452f02e76ddc --- /dev/null +++ b/paddle/platform/cuda_profiler.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include + +namespace paddle { +namespace platform { + +static std::vector kCudaProfileConfiguration = { + "gpustarttimestamp", + "gpuendtimestamp", + "gridsize3d", + "threadblocksize", + "dynsmemperblock", + "stasmemperblock", + "regperthread", + "memtransfersize", + "memtransferdir", + "memtransferhostmemtype", + "streamid", + "cacheconfigrequested", + "cacheconfigexecuted", + "countermodeaggregate", + "enableonstart 0", + "active_warps", + "active_cycles", +}; + +void CudaProfilerInit(std::string output_file, std::string output_mode) { + std::array buf; + std::string tmpl = "/tmp/cuda_profile_config.XXXXXX"; + PADDLE_ENFORCE_LT(tmpl.size(), buf.size()); + memcpy(buf.data(), tmpl.data(), tmpl.size()); + auto result = mktemp(buf.data()); + PADDLE_ENFORCE(strlen(result) != 0); + std::string config = result; + + { + std::ofstream ofs(config, std::ios::out | std::ios::trunc); + PADDLE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate()); + for (const auto& line : kCudaProfileConfiguration) { + ofs << line << std::endl; + } + } + + PADDLE_ENFORCE(output_mode == "key_value" || output_mode == "csv"); + cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair; + PADDLE_ENFORCE( + cudaProfilerInitialize(config.c_str(), output_file.c_str(), mode)); +} + +void CudaProfilerStart() { PADDLE_ENFORCE(cudaProfilerStart()); } + +void CudaProfilerStop() { PADDLE_ENFORCE((cudaProfilerStop())); } +} +} diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index f55a1edce31ccf2498dcfcf0b30ba1012d7a7d1a..c16d3e0cbe01f90a5aa9a5d7a523cd4e282e4771 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -37,6 +37,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include "paddle/operators/nccl/nccl_gpu_common.h" +#include "paddle/platform/cuda_profiler.h" #include "paddle/platform/gpu_info.h" #endif @@ -460,6 +461,10 @@ All parameter, weight, gradient are variables in Paddle. m.def("op_support_gpu", OpSupportGPU); #ifdef PADDLE_WITH_CUDA m.def("get_cuda_device_count", platform::GetCUDADeviceCount); + + m.def("nvprof_init", platform::CudaProfilerInit); + m.def("nvprof_start", platform::CudaProfilerStart); + m.def("nvprof_stop", platform::CudaProfilerStop); #endif return m.ptr(); diff --git a/python/paddle/v2/fluid/profiler.py b/python/paddle/v2/fluid/profiler.py new file mode 100644 index 0000000000000000000000000000000000000000..b94ef67b48ec00a329e875fc671759fda6d925f6 --- /dev/null +++ b/python/paddle/v2/fluid/profiler.py @@ -0,0 +1,59 @@ +import paddle.v2.fluid.core as core + + +def nvporf_init(output_file, output_mode=None): + """ + Initialize the CUDA profiler. + This methods must be called before nvprof_start. + + :param output_file: The output file name. + :type output_file: string + :param output_mode: The output mode has Key-Value pair format and + Comma separated values format. + It should be 'key-value' or 'csv'. + :type output_mode: string + """ + if output_mode is None: + output_mode = 'csv' + if output_mode != 'key-value' or output_mode != 'csv': + raise ValueError("The output mode must be 'key-value' or 'csv'.") + core.nvprof_init(output_file, output_mode) + + +def nvporf_start(): + """ + Enables profiler collection by the active CUDA profiling tool. + """ + core.nvprof_start() + + +def nvporf_stop(): + """ + Disables profiler collection. + """ + core.nvprof_stop() + + +class profiler(object): + def __init__(self, output_file, output_mode=None, enabled=True): + self.enabled = enabled + if not self.enabled: + return + self.entered = False + nvporf_init(output_file, output_mode) + + def __enter__(self): + if not self.enabled: + return + if self.entered: + raise RuntimeError("The profiler traces are not reentrant") + self.entered = True + nvporf_start() + return self + + def __exit__(self, exc_type, exc_value, tb): + if exc_value is not None: + raise exc_value + if not self.enabled: + return + nvporf_stop() diff --git a/python/paddle/v2/fluid/tests/test_profiler.py b/python/paddle/v2/fluid/tests/test_profiler.py new file mode 100644 index 0000000000000000000000000000000000000000..7da7a28cf6e5cee0f5633e31703a9833963cade1 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_profiler.py @@ -0,0 +1,17 @@ +import paddle.v2.fluid.profiler as profiler +import paddle.v2.fluid.layers as layers +import numpy as np + +place = core.GPUPlace(0) +exe = Executor(place) + +epoc = 8 +dshape = [4, 3, 28, 28] +data = layers.data(name='data', shape=dshape, dtype='float32') +conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) + +input = core.LoDTensor() +with profiler("cuda_profiler.txt") as nvprof: + for i in range(epoc): + input.set(np.random.random(dshape).astype("float32"), place) + exe.run(framework.default_main_program(), feed={'data': data})