diff --git a/paddle/platform/cuda_profiler.h b/paddle/platform/cuda_profiler.h index d3a6e597271f9c6e4b42f0da79f6452f02e76ddc..c096ce37c56d5d6c34d543dcd6889a560e44286c 100644 --- a/paddle/platform/cuda_profiler.h +++ b/paddle/platform/cuda_profiler.h @@ -14,33 +14,15 @@ limitations under the License. */ #pragma once #include +#include #include #include namespace paddle { namespace platform { -static std::vector kCudaProfileConfiguration = { - "gpustarttimestamp", - "gpuendtimestamp", - "gridsize3d", - "threadblocksize", - "dynsmemperblock", - "stasmemperblock", - "regperthread", - "memtransfersize", - "memtransferdir", - "memtransferhostmemtype", - "streamid", - "cacheconfigrequested", - "cacheconfigexecuted", - "countermodeaggregate", - "enableonstart 0", - "active_warps", - "active_cycles", -}; - -void CudaProfilerInit(std::string output_file, std::string output_mode) { +void CudaProfilerInit(std::string output_file, std::string output_mode, + std::vector config_flags) { std::array buf; std::string tmpl = "/tmp/cuda_profile_config.XXXXXX"; PADDLE_ENFORCE_LT(tmpl.size(), buf.size()); @@ -52,12 +34,12 @@ void CudaProfilerInit(std::string output_file, std::string output_mode) { { std::ofstream ofs(config, std::ios::out | std::ios::trunc); PADDLE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate()); - for (const auto& line : kCudaProfileConfiguration) { + for (const auto& line : config_flags) { ofs << line << std::endl; } } - PADDLE_ENFORCE(output_mode == "key_value" || output_mode == "csv"); + PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv"); cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair; PADDLE_ENFORCE( cudaProfilerInitialize(config.c_str(), output_file.c_str(), mode)); @@ -66,5 +48,6 @@ void CudaProfilerInit(std::string output_file, std::string output_mode) { void CudaProfilerStart() { PADDLE_ENFORCE(cudaProfilerStart()); } void CudaProfilerStop() { PADDLE_ENFORCE((cudaProfilerStop())); } -} -} + +} // namespace platform +} // namespace paddle diff --git a/python/paddle/v2/fluid/profiler.py b/python/paddle/v2/fluid/profiler.py index b94ef67b48ec00a329e875fc671759fda6d925f6..f31d6f0a617c42601c164603692d59f8d722c48b 100644 --- a/python/paddle/v2/fluid/profiler.py +++ b/python/paddle/v2/fluid/profiler.py @@ -1,7 +1,20 @@ import paddle.v2.fluid.core as core +import subprocess +__all__ = ['CudaProfiler'] -def nvporf_init(output_file, output_mode=None): +NV_FLAGS = [ + "gpustarttimestamp", + "gpuendtimestamp", + "gridsize3d", + "threadblocksize", + "streamid", + "enableonstart 0", + "conckerneltrace", +] + + +def nvporf_init(output_file, output_mode=None, flags=None): """ Initialize the CUDA profiler. This methods must be called before nvprof_start. @@ -10,14 +23,15 @@ def nvporf_init(output_file, output_mode=None): :type output_file: string :param output_mode: The output mode has Key-Value pair format and Comma separated values format. - It should be 'key-value' or 'csv'. + It should be 'kv' or 'csv'. :type output_mode: string """ if output_mode is None: output_mode = 'csv' - if output_mode != 'key-value' or output_mode != 'csv': + if output_mode not in ['kv', 'csv']: raise ValueError("The output mode must be 'key-value' or 'csv'.") - core.nvprof_init(output_file, output_mode) + flags = NV_FLAGS if flags is None else flags + core.nvprof_init(output_file, output_mode, flags) def nvporf_start(): @@ -34,13 +48,14 @@ def nvporf_stop(): core.nvprof_stop() -class profiler(object): - def __init__(self, output_file, output_mode=None, enabled=True): +class CudaProfiler(object): + def __init__(self, output_file, output_mode=None, flags=None, enabled=True): self.enabled = enabled if not self.enabled: return self.entered = False - nvporf_init(output_file, output_mode) + self.out_file = output_file + nvporf_init(output_file, output_mode, flags) def __enter__(self): if not self.enabled: diff --git a/python/paddle/v2/fluid/tests/test_profiler.py b/python/paddle/v2/fluid/tests/test_profiler.py index 7da7a28cf6e5cee0f5633e31703a9833963cade1..1fec5c99bf76a7706a1ae529b4d12aa0dad4da57 100644 --- a/python/paddle/v2/fluid/tests/test_profiler.py +++ b/python/paddle/v2/fluid/tests/test_profiler.py @@ -1,17 +1,28 @@ +import unittest +import numpy as np +import paddle.v2.fluid as fluid import paddle.v2.fluid.profiler as profiler import paddle.v2.fluid.layers as layers -import numpy as np -place = core.GPUPlace(0) -exe = Executor(place) -epoc = 8 -dshape = [4, 3, 28, 28] -data = layers.data(name='data', shape=dshape, dtype='float32') -conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) +class TestProfiler(unittest.TestCase): + def test_nvprof(self): + if not fluid.core.is_compile_gpu(): + return + epoc = 8 + dshape = [4, 3, 28, 28] + data = layers.data(name='data', shape=[3, 28, 28], dtype='float32') + conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) + + place = fluid.GPUPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + with profiler.CudaProfiler("cuda_profiler.txt", 'csv') as nvprof: + for i in range(epoc): + input = np.random.random(dshape).astype("float32") + exe.run(fluid.default_main_program(), feed={'data': input}) + -input = core.LoDTensor() -with profiler("cuda_profiler.txt") as nvprof: - for i in range(epoc): - input.set(np.random.random(dshape).astype("float32"), place) - exe.run(framework.default_main_program(), feed={'data': data}) +if __name__ == '__main__': + unittest.main()