提交 623f62a7 编写于 作者: D dangqingqing

Add cuda profiler tools and expose it in Python.

上级 322d69f2
......@@ -14,33 +14,15 @@ limitations under the License. */
#pragma once
#include <cuda_profiler_api.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
namespace paddle {
namespace platform {
static std::vector<std::string> kCudaProfileConfiguration = {
"gpustarttimestamp",
"gpuendtimestamp",
"gridsize3d",
"threadblocksize",
"dynsmemperblock",
"stasmemperblock",
"regperthread",
"memtransfersize",
"memtransferdir",
"memtransferhostmemtype",
"streamid",
"cacheconfigrequested",
"cacheconfigexecuted",
"countermodeaggregate",
"enableonstart 0",
"active_warps",
"active_cycles",
};
void CudaProfilerInit(std::string output_file, std::string output_mode) {
void CudaProfilerInit(std::string output_file, std::string output_mode,
std::vector<std::string> config_flags) {
std::array<char, 128> buf;
std::string tmpl = "/tmp/cuda_profile_config.XXXXXX";
PADDLE_ENFORCE_LT(tmpl.size(), buf.size());
......@@ -52,12 +34,12 @@ void CudaProfilerInit(std::string output_file, std::string output_mode) {
{
std::ofstream ofs(config, std::ios::out | std::ios::trunc);
PADDLE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate());
for (const auto& line : kCudaProfileConfiguration) {
for (const auto& line : config_flags) {
ofs << line << std::endl;
}
}
PADDLE_ENFORCE(output_mode == "key_value" || output_mode == "csv");
PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv");
cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair;
PADDLE_ENFORCE(
cudaProfilerInitialize(config.c_str(), output_file.c_str(), mode));
......@@ -66,5 +48,6 @@ void CudaProfilerInit(std::string output_file, std::string output_mode) {
void CudaProfilerStart() { PADDLE_ENFORCE(cudaProfilerStart()); }
void CudaProfilerStop() { PADDLE_ENFORCE((cudaProfilerStop())); }
}
}
} // namespace platform
} // namespace paddle
import paddle.v2.fluid.core as core
import subprocess
__all__ = ['CudaProfiler']
def nvporf_init(output_file, output_mode=None):
NV_FLAGS = [
"gpustarttimestamp",
"gpuendtimestamp",
"gridsize3d",
"threadblocksize",
"streamid",
"enableonstart 0",
"conckerneltrace",
]
def nvporf_init(output_file, output_mode=None, flags=None):
"""
Initialize the CUDA profiler.
This methods must be called before nvprof_start.
......@@ -10,14 +23,15 @@ def nvporf_init(output_file, output_mode=None):
:type output_file: string
:param output_mode: The output mode has Key-Value pair format and
Comma separated values format.
It should be 'key-value' or 'csv'.
It should be 'kv' or 'csv'.
:type output_mode: string
"""
if output_mode is None:
output_mode = 'csv'
if output_mode != 'key-value' or output_mode != 'csv':
if output_mode not in ['kv', 'csv']:
raise ValueError("The output mode must be 'key-value' or 'csv'.")
core.nvprof_init(output_file, output_mode)
flags = NV_FLAGS if flags is None else flags
core.nvprof_init(output_file, output_mode, flags)
def nvporf_start():
......@@ -34,13 +48,14 @@ def nvporf_stop():
core.nvprof_stop()
class profiler(object):
def __init__(self, output_file, output_mode=None, enabled=True):
class CudaProfiler(object):
def __init__(self, output_file, output_mode=None, flags=None, enabled=True):
self.enabled = enabled
if not self.enabled:
return
self.entered = False
nvporf_init(output_file, output_mode)
self.out_file = output_file
nvporf_init(output_file, output_mode, flags)
def __enter__(self):
if not self.enabled:
......
import unittest
import numpy as np
import paddle.v2.fluid as fluid
import paddle.v2.fluid.profiler as profiler
import paddle.v2.fluid.layers as layers
import numpy as np
place = core.GPUPlace(0)
exe = Executor(place)
epoc = 8
dshape = [4, 3, 28, 28]
data = layers.data(name='data', shape=dshape, dtype='float32')
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
class TestProfiler(unittest.TestCase):
def test_nvprof(self):
if not fluid.core.is_compile_gpu():
return
epoc = 8
dshape = [4, 3, 28, 28]
data = layers.data(name='data', shape=[3, 28, 28], dtype='float32')
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
place = fluid.GPUPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
with profiler.CudaProfiler("cuda_profiler.txt", 'csv') as nvprof:
for i in range(epoc):
input = np.random.random(dshape).astype("float32")
exe.run(fluid.default_main_program(), feed={'data': input})
input = core.LoDTensor()
with profiler("cuda_profiler.txt") as nvprof:
for i in range(epoc):
input.set(np.random.random(dshape).astype("float32"), place)
exe.run(framework.default_main_program(), feed={'data': data})
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册