未验证 提交 6299a90a 编写于 作者: W Wilber 提交者: GitHub

[CUDA] [NVTX] Lite add nvtx to support performance debug. (#3764)

上级 142ee7f2
......@@ -98,6 +98,7 @@ lite_option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OF
lite_option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF)
lite_option(LITE_WITH_PRECISION_PROFILE "Enable precision profile in profile mode ON in lite" OFF)
lite_option(LITE_WITH_LOG "Enable log printing or not." ON)
lite_option(LITE_WITH_NVTX "Enable nvtx or not, please enable LITE_WITH_CUDA first." OFF)
lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF)
lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF)
# publish options
......
......@@ -265,3 +265,12 @@ endif(NOT WIN32)
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
if (LITE_WITH_NVTX)
if (${CUDA_VERSION} GREATER 10.0)
add_definitions("-DLITE_WITH_NVTX")
else()
message(WARNING "CUDA_VERSION should be larger than 10.0 to enable NVTX, force set LITE_WITH_NVTX OFF")
set(LITE_WITH_NVTX OFF)
endif()
endif()
......@@ -6,6 +6,8 @@ get_property(cuda_deps GLOBAL PROPERTY CUDA_MODULES)
nv_library(target_wrapper_cuda SRCS target_wrapper.cc DEPS ${cuda_deps})
nv_library(cuda_blas SRCS blas.cc DEPS ${cuda_deps})
nv_library(nvtx_wrapper SRCS nvtx_wrapper DEPS ${cuda_deps})
lite_cc_library(cuda_context SRCS context.cc DEPS device_info)
add_subdirectory(math)
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef LITE_WITH_NVTX
#include "lite/backends/cuda/nvtx_wrapper.h"
#include <cuda.h>
#include <cuda_runtime.h>
namespace paddle {
namespace lite {
NVTXRangeAnnotation::NVTXRangeAnnotation(nvtxDomainHandle_t domain)
: domain_(domain), isGenerating_(false) {}
NVTXRangeAnnotation::NVTXRangeAnnotation(NVTXRangeAnnotation&& other)
: domain_(other.domain_), isGenerating_(other.isGenerating_) {
other.isGenerating_ = false;
}
NVTXRangeAnnotation::~NVTXRangeAnnotation() {
if (isGenerating_) {
nvtxDomainRangePop(domain_);
}
}
void NVTXRangeAnnotation::generate(nvtxStringHandle_t stringHandle,
Color color) {
nvtxEventAttributes_t attributes = nvtxEventAttributes_t();
attributes.version = NVTX_VERSION;
attributes.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
attributes.colorType = NVTX_COLOR_ARGB;
attributes.color = static_cast<decltype(attributes.color)>(color);
attributes.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
attributes.message.registered = stringHandle;
nvtxDomainRangePushEx(domain_, &attributes);
isGenerating_ = true;
}
const NVTXAnnotator& NVTXAnnotator::Global() {
static const NVTXAnnotator annotator("Paddle-Lite");
return annotator;
}
bool NVTXAnnotator::IsEnabled() const { return domain_ != nullptr; }
NVTXRangeAnnotation NVTXAnnotator::AnnotateBlock() const {
return NVTXRangeAnnotation(domain_);
}
nvtxStringHandle_t NVTXAnnotator::RegisterString(const char* str) const {
return nvtxDomainRegisterStringA(domain_, str);
}
NVTXAnnotator::NVTXAnnotator(const char* domainName)
: domain_(nvtxDomainCreateA(domainName)) {}
} // namespace lite
} // namespace paddle
#endif
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cuda.h>
#include <cuda_runtime.h>
#include "nvtx3/nvToolsExt.h"
namespace paddle {
namespace lite {
enum class Color : uint32_t {
Engine = 0xFFD2691E,
Runner = 0xFFFFD700,
};
// Generate an NVTX range that is started when `generate` is called
// and closed when the object is destroyed.
class NVTXRangeAnnotation {
public:
explicit NVTXRangeAnnotation(nvtxDomainHandle_t domain);
NVTXRangeAnnotation(NVTXRangeAnnotation&& other);
NVTXRangeAnnotation(const NVTXRangeAnnotation&) = delete;
NVTXRangeAnnotation& operator=(const NVTXRangeAnnotation&) = delete;
~NVTXRangeAnnotation();
void generate(nvtxStringHandle_t stringHandle, Color color);
private:
nvtxDomainHandle_t domain_;
bool isGenerating_;
};
class NVTXAnnotator {
public:
static const NVTXAnnotator& Global();
public:
bool IsEnabled() const;
NVTXRangeAnnotation AnnotateBlock() const;
nvtxStringHandle_t RegisterString(const char*) const;
private:
// Only a global instance of that object is allowed.
// It can be accessed by call `NVTXAnnotator::Global()` function.
explicit NVTXAnnotator(const char* domainName);
private:
nvtxDomainHandle_t domain_;
};
} // namespace lite
} // namespace paddle
......@@ -132,7 +132,8 @@ lite_cc_library(type_system SRCS type_system.cc DEPS tensor target_wrapper)
lite_cc_library(program SRCS program.cc
DEPS op kernel model_parser ${ops} ${cpp_wrapper}
PROFILE_DEPS lite_profiler)
PROFILE_DEPS lite_profiler
CUDA_DEPS nvtx_wrapper)
if (NOT LITE_ON_TINY_PUBLISH)
lite_cc_library(optimizer SRCS optimizer.cc DEPS mir_pass_manager model_parser program)
......
......@@ -151,10 +151,27 @@ void RuntimeProgram::Run() {
inst_precision_profiler.GetSummaryHeader();
#endif
#ifdef LITE_WITH_NVTX
const NVTXAnnotator& annotator = NVTXAnnotator::Global();
NVTXRangeAnnotation annotation_one_loop = annotator.AnnotateBlock();
if (annotator.IsEnabled()) {
annotation_one_loop.generate(register_layer_names_.back(),
lite::Color::Engine);
}
#endif
int idx = -1;
for (auto& inst : instructions_) {
++idx;
#ifndef LITE_WITH_FPGA
if (inst.is_feed_fetch_op()) continue;
#endif
#ifdef LITE_WITH_NVTX
NVTXRangeAnnotation annotation = annotator.AnnotateBlock();
nvtxStringHandle_t registered_name = register_layer_names_[idx];
if (annotator.IsEnabled()) {
annotation.generate(registered_name, lite::Color::Runner);
}
#endif
#ifdef LITE_WITH_CUDA
if (inst.need_sync()) {
inst.Sync();
......
......@@ -26,6 +26,9 @@
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/profiler.h"
#endif
#ifdef LITE_WITH_NVTX
#include "lite/backends/cuda/nvtx_wrapper.h"
#endif
namespace paddle {
namespace lite {
......@@ -174,6 +177,15 @@ class LITE_API RuntimeProgram {
}
#ifdef LITE_WITH_PROFILE
set_profiler();
#endif
#ifdef LITE_WITH_NVTX
const NVTXAnnotator& annotator = NVTXAnnotator::Global();
for (auto& inst : instructions_) {
NVTXRangeAnnotation annotation = annotator.AnnotateBlock();
register_layer_names_.push_back(annotator.RegisterString(
const_cast<paddle::lite::OpLite*>(inst.op())->Type().c_str()));
}
register_layer_names_.push_back(annotator.RegisterString("one_loop"));
#endif
}
~RuntimeProgram() {
......@@ -214,6 +226,9 @@ class LITE_API RuntimeProgram {
}
}
#endif
#ifdef LITE_WITH_NVTX
std::vector<nvtxStringHandle_t> register_layer_names_;
#endif
};
} // namespace lite
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册