提交 623f62a7 编写于 作者: D dangqingqing

Add cuda profiler tools and expose it in Python.

上级 322d69f2
...@@ -14,33 +14,15 @@ limitations under the License. */ ...@@ -14,33 +14,15 @@ limitations under the License. */
#pragma once #pragma once
#include <cuda_profiler_api.h> #include <cuda_profiler_api.h>
#include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
namespace paddle { namespace paddle {
namespace platform { namespace platform {
static std::vector<std::string> kCudaProfileConfiguration = { void CudaProfilerInit(std::string output_file, std::string output_mode,
"gpustarttimestamp", std::vector<std::string> config_flags) {
"gpuendtimestamp",
"gridsize3d",
"threadblocksize",
"dynsmemperblock",
"stasmemperblock",
"regperthread",
"memtransfersize",
"memtransferdir",
"memtransferhostmemtype",
"streamid",
"cacheconfigrequested",
"cacheconfigexecuted",
"countermodeaggregate",
"enableonstart 0",
"active_warps",
"active_cycles",
};
void CudaProfilerInit(std::string output_file, std::string output_mode) {
std::array<char, 128> buf; std::array<char, 128> buf;
std::string tmpl = "/tmp/cuda_profile_config.XXXXXX"; std::string tmpl = "/tmp/cuda_profile_config.XXXXXX";
PADDLE_ENFORCE_LT(tmpl.size(), buf.size()); PADDLE_ENFORCE_LT(tmpl.size(), buf.size());
...@@ -52,12 +34,12 @@ void CudaProfilerInit(std::string output_file, std::string output_mode) { ...@@ -52,12 +34,12 @@ void CudaProfilerInit(std::string output_file, std::string output_mode) {
{ {
std::ofstream ofs(config, std::ios::out | std::ios::trunc); std::ofstream ofs(config, std::ios::out | std::ios::trunc);
PADDLE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate()); PADDLE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate());
for (const auto& line : kCudaProfileConfiguration) { for (const auto& line : config_flags) {
ofs << line << std::endl; ofs << line << std::endl;
} }
} }
PADDLE_ENFORCE(output_mode == "key_value" || output_mode == "csv"); PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv");
cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair; cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair;
PADDLE_ENFORCE( PADDLE_ENFORCE(
cudaProfilerInitialize(config.c_str(), output_file.c_str(), mode)); cudaProfilerInitialize(config.c_str(), output_file.c_str(), mode));
...@@ -66,5 +48,6 @@ void CudaProfilerInit(std::string output_file, std::string output_mode) { ...@@ -66,5 +48,6 @@ void CudaProfilerInit(std::string output_file, std::string output_mode) {
void CudaProfilerStart() { PADDLE_ENFORCE(cudaProfilerStart()); } void CudaProfilerStart() { PADDLE_ENFORCE(cudaProfilerStart()); }
void CudaProfilerStop() { PADDLE_ENFORCE((cudaProfilerStop())); } void CudaProfilerStop() { PADDLE_ENFORCE((cudaProfilerStop())); }
}
} } // namespace platform
} // namespace paddle
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import subprocess
__all__ = ['CudaProfiler']
def nvporf_init(output_file, output_mode=None): NV_FLAGS = [
"gpustarttimestamp",
"gpuendtimestamp",
"gridsize3d",
"threadblocksize",
"streamid",
"enableonstart 0",
"conckerneltrace",
]
def nvporf_init(output_file, output_mode=None, flags=None):
""" """
Initialize the CUDA profiler. Initialize the CUDA profiler.
This methods must be called before nvprof_start. This methods must be called before nvprof_start.
...@@ -10,14 +23,15 @@ def nvporf_init(output_file, output_mode=None): ...@@ -10,14 +23,15 @@ def nvporf_init(output_file, output_mode=None):
:type output_file: string :type output_file: string
:param output_mode: The output mode has Key-Value pair format and :param output_mode: The output mode has Key-Value pair format and
Comma separated values format. Comma separated values format.
It should be 'key-value' or 'csv'. It should be 'kv' or 'csv'.
:type output_mode: string :type output_mode: string
""" """
if output_mode is None: if output_mode is None:
output_mode = 'csv' output_mode = 'csv'
if output_mode != 'key-value' or output_mode != 'csv': if output_mode not in ['kv', 'csv']:
raise ValueError("The output mode must be 'key-value' or 'csv'.") raise ValueError("The output mode must be 'key-value' or 'csv'.")
core.nvprof_init(output_file, output_mode) flags = NV_FLAGS if flags is None else flags
core.nvprof_init(output_file, output_mode, flags)
def nvporf_start(): def nvporf_start():
...@@ -34,13 +48,14 @@ def nvporf_stop(): ...@@ -34,13 +48,14 @@ def nvporf_stop():
core.nvprof_stop() core.nvprof_stop()
class profiler(object): class CudaProfiler(object):
def __init__(self, output_file, output_mode=None, enabled=True): def __init__(self, output_file, output_mode=None, flags=None, enabled=True):
self.enabled = enabled self.enabled = enabled
if not self.enabled: if not self.enabled:
return return
self.entered = False self.entered = False
nvporf_init(output_file, output_mode) self.out_file = output_file
nvporf_init(output_file, output_mode, flags)
def __enter__(self): def __enter__(self):
if not self.enabled: if not self.enabled:
......
import unittest
import numpy as np
import paddle.v2.fluid as fluid
import paddle.v2.fluid.profiler as profiler import paddle.v2.fluid.profiler as profiler
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import numpy as np
place = core.GPUPlace(0)
exe = Executor(place)
epoc = 8 class TestProfiler(unittest.TestCase):
dshape = [4, 3, 28, 28] def test_nvprof(self):
data = layers.data(name='data', shape=dshape, dtype='float32') if not fluid.core.is_compile_gpu():
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) return
epoc = 8
dshape = [4, 3, 28, 28]
data = layers.data(name='data', shape=[3, 28, 28], dtype='float32')
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
place = fluid.GPUPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
with profiler.CudaProfiler("cuda_profiler.txt", 'csv') as nvprof:
for i in range(epoc):
input = np.random.random(dshape).astype("float32")
exe.run(fluid.default_main_program(), feed={'data': input})
input = core.LoDTensor() if __name__ == '__main__':
with profiler("cuda_profiler.txt") as nvprof: unittest.main()
for i in range(epoc):
input.set(np.random.random(dshape).astype("float32"), place)
exe.run(framework.default_main_program(), feed={'data': data})
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册