diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index b7e84031e7b7e821d51caf405978d07995ea0e91..1838506c8931b2e1ff82adf6f277925dc9d53374 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -87,11 +87,11 @@ nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) cc_library(timer SRCS timer.cc) cc_test(timer_test SRCS timer_test.cc DEPS timer) -cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto device_context ${GPU_CTX_DEPS}) +cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) if(WITH_GPU) - nv_library(profiler SRCS profiler.cc profiler.cu DEPS device_context device_tracer) + nv_library(profiler SRCS profiler.cc profiler.cu DEPS device_tracer gpu_info enforce) else() - cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer) + cc_library(profiler SRCS profiler.cc DEPS device_tracer enforce) endif() cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) diff --git a/paddle/fluid/platform/device_tracer.h b/paddle/fluid/platform/device_tracer.h index 6ee2c36146215e278c87d22dd7b8fd5cb7e33865..d4418d836d66e329af8ed3f5ec05f49d47146b3e 100644 --- a/paddle/fluid/platform/device_tracer.h +++ b/paddle/fluid/platform/device_tracer.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "paddle/fluid/platform/dynload/cupti.h" +#include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/profiler.pb.h" @@ -32,8 +33,6 @@ inline uint64_t PosixInNsec() { return 1000 * (static_cast(tv.tv_sec) * 1000000 + tv.tv_usec); } -class Event; - // DeviceTracer performs the following tasks: // 1. Register cuda callbacks for various events: kernel, memcpy, etc. // 2. Collect cuda statistics: start/end ts, memory, etc. diff --git a/paddle/fluid/platform/event.h b/paddle/fluid/platform/event.h new file mode 100644 index 0000000000000000000000000000000000000000..a4db23758b1c477114cd03dcd0e9f51296c575c6 --- /dev/null +++ b/paddle/fluid/platform/event.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include + +namespace paddle { +namespace platform { + +enum EventType { kMark, kPushRange, kPopRange }; + +class Event { + public: + // The DeviceContext is used to get the cuda stream. + // If CPU profiling mode, can pass nullptr. + Event(EventType type, std::string name, uint32_t thread_id); + + const EventType& type() const; + std::string name() const { return name_; } + uint32_t thread_id() const { return thread_id_; } + +#ifdef PADDLE_WITH_CUDA +#ifndef PADDLE_WITH_CUPTI + cudaEvent_t event() const { return event_; } + int device() const { return device_; } +#endif +#endif + + double CpuElapsedMs(const Event& e) const; + double CudaElapsedMs(const Event& e) const; + + private: + EventType type_; + std::string name_; + uint32_t thread_id_; + int64_t cpu_ns_; +#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_CUPTI + int64_t gpu_ns_ = 0; + + public: + void AddCudaElapsedTime(int64_t start_ns, int64_t end_ns) { + gpu_ns_ += end_ns - start_ns; + } + + private: +#else + cudaEvent_t event_ = nullptr; + int device_ = -1; +#endif +#endif +}; +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler.cu b/paddle/fluid/platform/profiler.cu index e115c554caf383bad29aa5d065ec0126427f8e78..aed276b16e95f954539d3fadac65309314ed34f1 100644 --- a/paddle/fluid/platform/profiler.cu +++ b/paddle/fluid/platform/profiler.cu @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/profiler.h" - #include +#include "paddle/fluid/platform/profiler.h" namespace paddle { namespace platform { @@ -22,26 +21,27 @@ namespace platform { __global__ void DummyKernel(int *a) { a[0] = 0; } static void ForEachDevice(std::function func) { - auto original_device = GetCurrentDeviceId(); - int count = GetCUDADeviceCount(); + auto original_device = platform::GetCurrentDeviceId(); + int count = platform::GetCUDADeviceCount(); for (int i = 0; i < count; i++) { - SetDeviceId(i); + platform::SetDeviceId(i); func(i); } - SetDeviceId(original_device); + platform::SetDeviceId(original_device); } void DummyKernelAndEvent() { for (int i = 0; i < 5; i++) { ForEachDevice([](int d) { - CUDADeviceContext *dev_ctx = new CUDADeviceContext(CUDAPlace(d)); + platform::SetDeviceId(d); + cudaStream_t stream; + PADDLE_ENFORCE(cudaStreamCreate(&stream)); Mark("_cuda_startup_"); int *ptr; PADDLE_ENFORCE(cudaMalloc(&ptr, sizeof(int))); - DummyKernel<<<1, 1, 0, dev_ctx->stream()>>>(ptr); - dev_ctx->Wait(); + DummyKernel<<<1, 1, 0, stream>>>(ptr); + PADDLE_ENFORCE(cudaStreamSynchronize(stream)); PADDLE_ENFORCE(cudaFree(ptr)); - delete dev_ctx; }); } } diff --git a/paddle/fluid/platform/profiler.h b/paddle/fluid/platform/profiler.h index 4057e5ea0564099667517eb2c54feb2ff13303d1..aec0ae34292d62905de0e1f459b2b6db4554ebb7 100644 --- a/paddle/fluid/platform/profiler.h +++ b/paddle/fluid/platform/profiler.h @@ -17,54 +17,13 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/device_context.h" - -namespace paddle { -namespace platform { - -enum EventType { kMark, kPushRange, kPopRange }; - -class Event { - public: - // The DeviceContext is used to get the cuda stream. - // If CPU profiling mode, can pass nullptr. - Event(EventType type, std::string name, uint32_t thread_id); - - const EventType& type() const; - std::string name() const { return name_; } - uint32_t thread_id() const { return thread_id_; } - +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/event.h" #ifdef PADDLE_WITH_CUDA -#ifndef PADDLE_WITH_CUPTI - cudaEvent_t event() const { return event_; } - int device() const { return device_; } -#endif +#include "paddle/fluid/platform/gpu_info.h" #endif - - double CpuElapsedMs(const Event& e) const; - double CudaElapsedMs(const Event& e) const; - - private: - EventType type_; - std::string name_; - uint32_t thread_id_; - int64_t cpu_ns_; -#ifdef PADDLE_WITH_CUDA -#ifdef PADDLE_WITH_CUPTI - int64_t gpu_ns_ = 0; - - public: - void AddCudaElapsedTime(int64_t start_ns, int64_t end_ns) { - gpu_ns_ += end_ns - start_ns; - } - - private: -#else - cudaEvent_t event_ = nullptr; - int device_ = -1; -#endif -#endif -}; +namespace paddle { +namespace platform { enum ProfilerState { kDisabled, // disabled state diff --git a/paddle/fluid/platform/profiler_test.cc b/paddle/fluid/platform/profiler_test.cc index 528fe03c67a282248c551525e899279b561ce5d2..a851488e72d27dfcbd04546d9b531d26257f611c 100644 --- a/paddle/fluid/platform/profiler_test.cc +++ b/paddle/fluid/platform/profiler_test.cc @@ -33,7 +33,6 @@ TEST(Event, CpuElapsedTime) { } TEST(RecordEvent, RecordEvent) { - using paddle::platform::DeviceContext; using paddle::platform::Event; using paddle::platform::EventType; using paddle::platform::RecordEvent;