提交 6cf2dcbc 编写于 作者: D dangqingqing

Add cuda profiler tools.

上级 1f6002ed
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cuda_profiler_api.h>
#include <stdlib.h>
#include <string.h>
namespace paddle {
namespace platform {
static std::vector<std::string> kCudaProfileConfiguration = {
"gpustarttimestamp",
"gpuendtimestamp",
"gridsize3d",
"threadblocksize",
"dynsmemperblock",
"stasmemperblock",
"regperthread",
"memtransfersize",
"memtransferdir",
"memtransferhostmemtype",
"streamid",
"cacheconfigrequested",
"cacheconfigexecuted",
"countermodeaggregate",
"enableonstart 0",
"active_warps",
"active_cycles",
};
void CudaProfilerInit(std::string output_file, std::string output_mode) {
std::array<char, 128> buf;
std::string tmpl = "/tmp/cuda_profile_config.XXXXXX";
PADDLE_ENFORCE_LT(tmpl.size(), buf.size());
memcpy(buf.data(), tmpl.data(), tmpl.size());
auto result = mktemp(buf.data());
PADDLE_ENFORCE(strlen(result) != 0);
std::string config = result;
{
std::ofstream ofs(config, std::ios::out | std::ios::trunc);
PADDLE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate());
for (const auto& line : kCudaProfileConfiguration) {
ofs << line << std::endl;
}
}
PADDLE_ENFORCE(output_mode == "key_value" || output_mode == "csv");
cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair;
PADDLE_ENFORCE(
cudaProfilerInitialize(config.c_str(), output_file.c_str(), mode));
}
void CudaProfilerStart() { PADDLE_ENFORCE(cudaProfilerStart()); }
void CudaProfilerStop() { PADDLE_ENFORCE((cudaProfilerStop())); }
}
}
......@@ -37,6 +37,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include "paddle/operators/nccl/nccl_gpu_common.h"
#include "paddle/platform/cuda_profiler.h"
#include "paddle/platform/gpu_info.h"
#endif
......@@ -460,6 +461,10 @@ All parameter, weight, gradient are variables in Paddle.
m.def("op_support_gpu", OpSupportGPU);
#ifdef PADDLE_WITH_CUDA
m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
m.def("nvprof_init", platform::CudaProfilerInit);
m.def("nvprof_start", platform::CudaProfilerStart);
m.def("nvprof_stop", platform::CudaProfilerStop);
#endif
return m.ptr();
......
import paddle.v2.fluid.core as core
def nvporf_init(output_file, output_mode=None):
"""
Initialize the CUDA profiler.
This methods must be called before nvprof_start.
:param output_file: The output file name.
:type output_file: string
:param output_mode: The output mode has Key-Value pair format and
Comma separated values format.
It should be 'key-value' or 'csv'.
:type output_mode: string
"""
if output_mode is None:
output_mode = 'csv'
if output_mode != 'key-value' or output_mode != 'csv':
raise ValueError("The output mode must be 'key-value' or 'csv'.")
core.nvprof_init(output_file, output_mode)
def nvporf_start():
"""
Enables profiler collection by the active CUDA profiling tool.
"""
core.nvprof_start()
def nvporf_stop():
"""
Disables profiler collection.
"""
core.nvprof_stop()
class profiler(object):
def __init__(self, output_file, output_mode=None, enabled=True):
self.enabled = enabled
if not self.enabled:
return
self.entered = False
nvporf_init(output_file, output_mode)
def __enter__(self):
if not self.enabled:
return
if self.entered:
raise RuntimeError("The profiler traces are not reentrant")
self.entered = True
nvporf_start()
return self
def __exit__(self, exc_type, exc_value, tb):
if exc_value is not None:
raise exc_value
if not self.enabled:
return
nvporf_stop()
import paddle.v2.fluid.profiler as profiler
import paddle.v2.fluid.layers as layers
import numpy as np
place = core.GPUPlace(0)
exe = Executor(place)
epoc = 8
dshape = [4, 3, 28, 28]
data = layers.data(name='data', shape=dshape, dtype='float32')
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
input = core.LoDTensor()
with profiler("cuda_profiler.txt") as nvprof:
for i in range(epoc):
input.set(np.random.random(dshape).astype("float32"), place)
exe.run(framework.default_main_program(), feed={'data': data})
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册