diff --git a/paddle/fluid/platform/device_tracer.cc b/paddle/fluid/platform/device_tracer.cc index 8fa8dbd67c936439840cffa073b6fa6693dd31a1..dc1d751141187edb7738e42c41514614d4d399b0 100644 --- a/paddle/fluid/platform/device_tracer.cc +++ b/paddle/fluid/platform/device_tracer.cc @@ -189,6 +189,8 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, } } // namespace +#endif // PADDLE_WITH_CUPTI + class DeviceTracerImpl : public DeviceTracer { public: DeviceTracerImpl() : enabled_(false) {} @@ -244,6 +246,8 @@ class DeviceTracerImpl : public DeviceTracer { if (enabled_) { return; } + +#ifdef PADDLE_WITH_CUPTI EnableActivity(); // Register callbacks for buffer requests and completed by CUPTI. @@ -262,6 +266,7 @@ class DeviceTracerImpl : public DeviceTracer { dynload::cuptiEnableCallback(1, subscriber_, CUPTI_CB_DOMAIN_DRIVER_API, CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel)); CUPTI_CALL(dynload::cuptiGetTimestamp(&start_ns_)); +#endif // PADDLE_WITH_CUPTI enabled_ = true; } @@ -313,16 +318,21 @@ class DeviceTracerImpl : public DeviceTracer { } void Disable() { +#ifdef PADDLE_WITH_CUPTI // flush might cause additional calls to DeviceTracker. dynload::cuptiActivityFlushAll(CUPTI_ACTIVITY_FLAG_FLUSH_FORCED); +#endif // PADDLE_WITH_CUPTI std::lock_guard l(trace_mu_); +#ifdef PADDLE_WITH_CUPTI DisableActivity(); dynload::cuptiUnsubscribe(subscriber_); CUPTI_CALL(dynload::cuptiGetTimestamp(&end_ns_)); +#endif // PADDLE_WITH_CUPTI enabled_ = false; } private: +#ifdef PADDLE_WITH_CUPTI static void CUPTIAPI ApiCallback(void *userdata, CUpti_CallbackDomain domain, CUpti_CallbackId cbid, const void *cbdata) { auto *cbInfo = reinterpret_cast(cbdata); @@ -340,7 +350,8 @@ class DeviceTracerImpl : public DeviceTracer { VLOG(1) << "Unhandled API Callback for " << domain << " " << cbid; } } - + CUpti_SubscriberHandle subscriber_; +#endif // PADDLE_WITH_CUPTI std::mutex trace_mu_; bool enabled_; uint64_t start_ns_; @@ -349,45 +360,9 @@ class DeviceTracerImpl : public DeviceTracer { std::vector mem_records_; std::vector cpu_records_; std::unordered_map correlations_; - CUpti_SubscriberHandle subscriber_; -}; - -#endif // PADDLE_WITH_CUPTI - -class DeviceTracerDummy : public DeviceTracer { - public: - DeviceTracerDummy() {} - - void AddAnnotation(uint64_t id, const std::string &anno) {} - - void AddCPURecords(const std::string &anno, uint64_t start_ns, - uint64_t end_ns, int64_t device_id, int64_t thread_id) {} - - void AddMemRecords(const std::string &name, uint64_t start_ns, - uint64_t end_ns, int64_t device_id, int64_t stream_id, - uint32_t correlation_id, uint64_t bytes) {} - - void AddKernelRecords(uint64_t start, uint64_t end, int64_t device_id, - int64_t stream_id, uint32_t correlation_id) {} - - bool IsEnabled() { return false; } - - void Enable() {} - - proto::Profile GenProfile(const std::string &profile_path) { - return proto::Profile(); - } - - void Disable() {} }; -void CreateTracer(DeviceTracer **t) { -#ifdef PADDLE_WITH_CUPTI - *t = new DeviceTracerImpl(); -#else - *t = new DeviceTracerDummy(); -#endif // PADDLE_WITH_CUPTI -} +void CreateTracer(DeviceTracer **t) { *t = new DeviceTracerImpl(); } DeviceTracer *GetDeviceTracer() { std::call_once(tracer_once_flag, CreateTracer, &tracer); diff --git a/paddle/fluid/platform/device_tracer.h b/paddle/fluid/platform/device_tracer.h index d2a571f4345b544ad5e74f4629c3967593d6d628..322996fb4f54d34ebbb034a6e1de420e9c532545 100644 --- a/paddle/fluid/platform/device_tracer.h +++ b/paddle/fluid/platform/device_tracer.h @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include +#include +#include // NOLINT #include #include "paddle/fluid/platform/dynload/cupti.h" @@ -25,6 +28,12 @@ namespace platform { // WARN: Under Development. Don't depend on it yet. ////////////////////// +inline uint64_t PosixInNsec() { + struct timeval tv; + gettimeofday(&tv, nullptr); + return 1000 * (static_cast(tv.tv_sec) * 1000000 + tv.tv_usec); +} + // DeviceTracer performs the following tasks: // 1. Register cuda callbacks for various events: kernel, memcpy, etc. // 2. Collect cuda statistics: start/end ts, memory, etc. diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index 7c8d8a5964fa5258bebaf2c8522886ae5886ab2c..d0286719b9ea1aa671294f519051ac1e269c4e93 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -15,7 +15,6 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler.h" #include -#include #include #include #include @@ -97,12 +96,6 @@ inline uint64_t GetTimeInNsec() { .count(); } -inline uint64_t PosixInNsec() { - struct timeval tv; - gettimeofday(&tv, nullptr); - return 1000 * (static_cast(tv.tv_sec) * 1000000 + tv.tv_usec); -} - Event::Event(EventType type, std::string name, uint32_t thread_id, const DeviceContext* dev_ctx) : type_(type), name_(name), thread_id_(thread_id), has_cuda_(false) { diff --git a/python/paddle/fluid/profiler.py b/python/paddle/fluid/profiler.py index 60e9215457e2a7867d5d9ec69f65dd70bcba9745..01983a830351b018770e6358f604781ffaae5800 100644 --- a/python/paddle/fluid/profiler.py +++ b/python/paddle/fluid/profiler.py @@ -218,7 +218,7 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'): def profiler(state, sorted_key=None, profile_path='/tmp/profile'): """The profiler interface. Different from cuda_profiler, this profiler can be used to profile both CPU - and GPU program. By defalut, it records the CPU and GPU operator kernels, + and GPU program. By default, it records the CPU and GPU operator kernels, if you want to profile other program, you can refer the profiling tutorial to add more records in C++ code. @@ -231,7 +231,7 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'): state (string) : The profiling state, which should be 'CPU' or 'GPU', telling the profiler to use CPU timer or GPU timer for profiling. Although users may have already specified the execution place - (CPUPlace/CUDAPlace) in the begining, for flexibility the profiler + (CPUPlace/CUDAPlace) in the beginning, for flexibility the profiler would not inherit this place. sorted_key (string) : If None, the profiling results will be printed in the order of first end time of events. Otherwise, the profiling