From 740c1b91441add9b2dbe7e007118d587f8f2f85c Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Fri, 31 May 2019 16:02:49 +0800 Subject: [PATCH] init profiler for Lite (#17640) --- CMakeLists.txt | 1 + cmake/configure.cmake | 4 + paddle/fluid/lite/CMakeLists.txt | 23 +- paddle/fluid/lite/api/CMakeLists.txt | 31 +-- paddle/fluid/lite/core/CMakeLists.txt | 1 + paddle/fluid/lite/core/profile/CMakeLists.txt | 6 + .../fluid/lite/core/profile/basic_profiler.cc | 26 +++ .../fluid/lite/core/profile/basic_profiler.h | 201 ++++++++++++++++++ .../lite/core/profile/basic_profiler_test.cc | 46 ++++ paddle/fluid/lite/core/program.h | 19 +- paddle/fluid/lite/tools/build.sh | 8 +- 11 files changed, 342 insertions(+), 24 deletions(-) create mode 100644 paddle/fluid/lite/core/profile/CMakeLists.txt create mode 100644 paddle/fluid/lite/core/profile/basic_profiler.cc create mode 100644 paddle/fluid/lite/core/profile/basic_profiler.h create mode 100644 paddle/fluid/lite/core/profile/basic_profiler_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 93fbc04ca1a..4ef4a4c351e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,6 +141,7 @@ option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF) option(LITE_WITH_X86 "Enable X86 in lite mode" ON) option(LITE_WITH_ARM "Enable ARM in lite mode" OFF) option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF) +option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF) set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING diff --git a/cmake/configure.cmake b/cmake/configure.cmake index a0966d7005d..385a9572f58 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -176,6 +176,10 @@ if (LITE_WITH_ARM) add_definitions("-DLITE_WITH_ARM") endif() +if (LITE_WITH_PROFILE) + add_definitions("-DLITE_WITH_PROFILE") +endif() + if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) add_definitions("-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK") endif() diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt index 93c3d9167c3..028384cca07 100644 --- a/paddle/fluid/lite/CMakeLists.txt +++ b/paddle/fluid/lite/CMakeLists.txt @@ -7,6 +7,7 @@ message(STATUS "LIGHT_FRAMEWORK:\t${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}") message(STATUS "LITE_WITH_CUDA:\t${LITE_WITH_CUDA}") message(STATUS "LITE_WITH_X86:\t${LITE_WITH_X86}") message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}") +message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}") set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install") @@ -33,7 +34,7 @@ endfunction() function (lite_deps TARGET) set(options "") set(oneValueArgs "") - set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS ARGS) + set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS) cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(deps ${lite_deps_DEPS}) @@ -56,6 +57,12 @@ function (lite_deps TARGET) endforeach(var) endif() + if(LITE_WITH_PROFILE) + foreach(var ${lite_deps_PROFILE_DEPS}) + set(deps ${deps} ${var}) + endforeach(var) + endif() + set(${TARGET} ${deps} PARENT_SCOPE) endfunction() @@ -63,7 +70,7 @@ endfunction() function(lite_cc_library TARGET) set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS ARGS) + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(deps "") @@ -71,7 +78,8 @@ function(lite_cc_library TARGET) DEPS ${args_DEPS} X86_DEPS ${args_X86_DEPS} CUDA_DEPS ${args_CUDA_DEPS} - ARM_DEPS ${args_ARM_DEPS}) + ARM_DEPS ${args_ARM_DEPS} + PROFILE_DEPS ${args_PROFILE_DEPS}) cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS}) endfunction() @@ -79,7 +87,7 @@ endfunction() function(lite_cc_binary TARGET) set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS ARGS) + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(deps "") @@ -87,7 +95,9 @@ function(lite_cc_binary TARGET) DEPS ${args_DEPS} X86_DEPS ${args_X86_DEPS} CUDA_DEPS ${args_CUDA_DEPS} - ARM_DEPS ${args_ARM_DEPS}) + ARM_DEPS ${args_ARM_DEPS} + PROFILE_DEPS ${args_PROFILE_DEPS} + ) cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS}) endfunction() @@ -102,7 +112,7 @@ endfunction() function(lite_cc_test TARGET) set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS ARGS) + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(deps "") @@ -111,6 +121,7 @@ function(lite_cc_test TARGET) X86_DEPS ${args_X86_DEPS} CUDA_DEPS ${args_CUDA_DEPS} ARM_DEPS ${args_ARM_DEPS} + PROFILE_DEPS ${args_PROFILE_DEPS} ) _lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS}) register_test_offline("${TARGET}") diff --git a/paddle/fluid/lite/api/CMakeLists.txt b/paddle/fluid/lite/api/CMakeLists.txt index 689eb658ae1..c079c387aee 100644 --- a/paddle/fluid/lite/api/CMakeLists.txt +++ b/paddle/fluid/lite/api/CMakeLists.txt @@ -25,21 +25,22 @@ set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inferenc set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING "A path setting inference demo download directories.") -if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) -lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc - DEPS cxx_api_lite model_parser_lite target_wrapper_host - ${ops_lite} ${host_kernels} ${x86_kernels} - ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model - --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) -add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz) -endif(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) - -if(WITH_TESTING) -lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz") -# add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz) -endif(WITH_TESTING) - -# lite_cc_test(test_light_api SRCS light_api_test.cc DEPS light_api_lite ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) +# lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc +# DEPS cxx_api_lite model_parser_lite target_wrapper_host +# PROFILE_DEPS basic_profiler_lite +# ${ops_lite} ${host_kernels} ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model +# --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) + +if((NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) AND WITH_TESTING) + lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc + DEPS cxx_api_lite model_parser_lite target_wrapper_host + ${ops_lite} ${host_kernels} ${x86_kernels} + PROFILE_DEPS basic_profiler_lite + ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model + --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) + lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz") + add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz) +endif() lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc DEPS diff --git a/paddle/fluid/lite/core/CMakeLists.txt b/paddle/fluid/lite/core/CMakeLists.txt index 4ccb2062ae1..e1a23fa31e6 100644 --- a/paddle/fluid/lite/core/CMakeLists.txt +++ b/paddle/fluid/lite/core/CMakeLists.txt @@ -33,6 +33,7 @@ cc_library(program_lite SRCS program.cc DEPS op_lite kernel_lite) cc_library(optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite) add_subdirectory(mir) +add_subdirectory(profile) # for mobile, unnecessary to compile the following testings. if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) diff --git a/paddle/fluid/lite/core/profile/CMakeLists.txt b/paddle/fluid/lite/core/profile/CMakeLists.txt new file mode 100644 index 00000000000..43731e8a414 --- /dev/null +++ b/paddle/fluid/lite/core/profile/CMakeLists.txt @@ -0,0 +1,6 @@ +if (NOT LITE_WITH_PROFILE) + return() +endif() + +lite_cc_library(basic_profiler_lite SRCS basic_profiler.cc) +lite_cc_test(test_basic_profiler SRCS basic_profiler_test.cc DEPS basic_profiler_lite) diff --git a/paddle/fluid/lite/core/profile/basic_profiler.cc b/paddle/fluid/lite/core/profile/basic_profiler.cc new file mode 100644 index 00000000000..86d5cd39ea9 --- /dev/null +++ b/paddle/fluid/lite/core/profile/basic_profiler.cc @@ -0,0 +1,26 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/core/profile/basic_profiler.h" + +namespace paddle { +namespace lite { +namespace profile { + +const int BasicTimer::data_w = 10; +const int BasicTimer::name_w = 10; + +} // namespace profile +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/core/profile/basic_profiler.h b/paddle/fluid/lite/core/profile/basic_profiler.h new file mode 100644 index 00000000000..c50aeab4af5 --- /dev/null +++ b/paddle/fluid/lite/core/profile/basic_profiler.h @@ -0,0 +1,201 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * This file implements BasicProfile, a profiler that helps to profile the basic + * CPU execution. It can display the min, max, average lantency of the execution + * of each kernel. + */ +#pragma once +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include +#include + +namespace paddle { +namespace lite { +namespace profile { + +/* Base class of all the profile records */ +template +class TimerBase { + public: + void Start() { self()->Start(); } + void Stop() { self()->Stop(); } + void Log(uint32_t x) { return self()->Log(x); } + std::string basic_repr() const { return const_self()->basic_repr(); } + + void SetId(int id) { self()->SetId(id); } + void SetKey(const std::string &key) { self()->SetKey(key); } + + int id() const { return const_self()->id(); } + + protected: + ChildT *self() { return reinterpret_cast(this); } + const ChildT *const_self() const { + return reinterpret_cast(this); + } +}; + +class BasicTimer : TimerBase { + uint64_t total_{}; + uint64_t count_{}; + uint32_t max_{std::numeric_limits::min()}; + uint32_t min_{std::numeric_limits::max()}; + int id_{-1}; + std::string key_; + std::chrono::time_point timer_{}; + + // TODO(Superjomn) make static + static const int name_w; + static const int data_w; + + public: + BasicTimer() = default; + BasicTimer(int id, const std::string &key) : id_(id), key_(key) {} + + void SetId(int id) { id_ = id; } + void SetKey(const std::string &key) { key_ = key; } + void Start() { timer_ = std::chrono::high_resolution_clock::now(); } + void Stop() { + auto duration = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - timer_); + Log(duration.count()); + } + + int count() const { return count_; } + + void Log(uint32_t timespan) { + total_ += timespan; + max_ = std::max(max_, timespan); + min_ = std::min(min_, timespan); + count_++; + } + + static std::string basic_repr_header() { + std::stringstream ss; + ss << std::setw(name_w) << "kernel" // + << std::setw(data_w) << "average" // + << std::setw(data_w) << "min" // + << std::setw(data_w) << "max" // + << std::setw(data_w) << "count"; + return ss.str(); + } + + std::string basic_repr() const { + std::stringstream ss; + ss << std::setw(name_w) << key() // + << std::setw(data_w) << ave() // + << std::setw(data_w) << min() // + << std::setw(data_w) << max() // + << std::setw(data_w) << count_; + return ss.str(); + } + + const std::string &key() const { return key_; } + + int id() const { + CHECK_GE(id_, 0) << "id is not inited"; + return id_; + } + + double ave() const { return total_ * 1. / count_; } + double max() const { return max_; } + double min() const { return min_; } + + // BasicRecord(const BasicRecord &) = delete; + void operator=(const BasicTimer &) = delete; +}; + +/* + * A basic profiler, with each record logs the total latency. + */ +template +class BasicProfiler { + public: + explicit BasicProfiler(const std::string &name) : name_(name) {} + using record_t = TimerT; + + static BasicProfiler &Global() { + static std::unique_ptr x(new BasicProfiler("[global]")); + return *x; + } + + record_t &NewRcd(const std::string &key) { + records_.emplace_back(); + records_.back().SetId(records_.size() - 1); + records_.back().SetKey(key); + return records_.back(); + } + + const record_t &record(int id) { + CHECK_LT(id, records_.size()); + CHECK_GE(id, 0); + return records_[id]; + } + + record_t *mutable_record(int id) { + CHECK_LT(id, records_.size()); + CHECK_GE(id, 0); + return &records_[id]; + } + + std::string basic_repr() const { + std::stringstream ss; + for (const auto &rcd : records_) { + ss << rcd.basic_repr() << "\n"; + } + return ss.str(); + } + + ~BasicProfiler() { + LOG(INFO) << "Profile dumps:"; + LOG(INFO) << "\n" + BasicTimer::basic_repr_header() + "\n" + basic_repr(); + } + + private: + std::string name_; + std::vector records_; +}; + +struct ProfileBlock { + explicit ProfileBlock(int id) : id_(id) { + BasicProfiler::Global().mutable_record(id_)->Start(); + } + + ~ProfileBlock() { + BasicProfiler::Global().mutable_record(id_)->Stop(); + } + + private: + int id_{}; +}; + +#define LITE_PROFILE_ONE(key__) \ + static int key__##__profiler_id = \ + ::paddle::lite::profile::BasicProfiler< \ + ::paddle::lite::profile::BasicTimer>::Global() \ + .NewRcd(#key__) \ + .id(); \ + ::paddle::lite::profile::ProfileBlock key__##profiler__(key__##__profiler_id); + +} // namespace profile +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/core/profile/basic_profiler_test.cc b/paddle/fluid/lite/core/profile/basic_profiler_test.cc new file mode 100644 index 00000000000..0154e02ff65 --- /dev/null +++ b/paddle/fluid/lite/core/profile/basic_profiler_test.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/core/profile/basic_profiler.h" +#include +#include +#include // NOLINT +#include // NOLINT + +namespace paddle { +namespace lite { +namespace profile { + +TEST(basic_record, init) { + BasicTimer timer; + timer.SetKey("hello"); +} + +TEST(basic_profile, init) { + auto& rcd = BasicProfiler::Global().NewRcd("fc"); + for (int i = 11; i < 100; i++) { + rcd.Log(i); + } + + LOG(INFO) << BasicProfiler::Global().basic_repr(); +} + +TEST(basic_profile, real_latency) { + LITE_PROFILE_ONE(test0); + std::this_thread::sleep_for(std::chrono::milliseconds(1200)); +} + +} // namespace profile +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/core/program.h b/paddle/fluid/lite/core/program.h index b25b6ae7d16..20b61da3573 100644 --- a/paddle/fluid/lite/core/program.h +++ b/paddle/fluid/lite/core/program.h @@ -22,6 +22,9 @@ #include "paddle/fluid/lite/core/mir/node.h" #include "paddle/fluid/lite/core/op_lite.h" #include "paddle/fluid/lite/core/op_registry.h" +#ifdef LITE_WITH_PROFILE +#include "paddle/fluid/lite/core/profile/basic_profiler.h" +#endif // LITE_WITH_PROFILE namespace paddle { namespace lite { @@ -103,9 +106,18 @@ struct Program { struct Instruct { Instruct(const std::shared_ptr& op, std::unique_ptr&& kernel) - : op_(op), kernel_(std::move(kernel)) {} + : op_(op), kernel_(std::move(kernel)) { +#ifdef LITE_WITH_PROFILE + profile_id_ = profile::BasicProfiler::Global() + .NewRcd(kernel_->SerializedKernelType()) + .id(); +#endif // LITE_WITH_PROFILE + } void Run() { +#ifdef LITE_WITH_PROFILE + profile::ProfileBlock x(profile_id_); +#endif // LITE_WITH_PROFILE CHECK(op_); CHECK(kernel_); if (first_epoch_) { @@ -128,6 +140,11 @@ struct Instruct { std::shared_ptr op_; std::unique_ptr kernel_; bool first_epoch_{true}; + +#ifdef LITE_WITH_PROFILE + // for profiler + int profile_id_{-1}; +#endif // LITE_WITH_PROFILE }; /* diff --git a/paddle/fluid/lite/tools/build.sh b/paddle/fluid/lite/tools/build.sh index a2b0ade09fa..e3a3fcc990c 100755 --- a/paddle/fluid/lite/tools/build.sh +++ b/paddle/fluid/lite/tools/build.sh @@ -8,6 +8,10 @@ function cmake_x86 { cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags} } +function cmake_x86_for_CI { + cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags} -DLITE_WITH_PROFILE=ON +} + function cmake_gpu { cmake .. " -DWITH_GPU=ON {common_flags} -DLITE_WITH_GPU=ON" } @@ -57,7 +61,7 @@ function cmake_arm { function build { file=$1 for _test in $(cat $file); do - make $_test -j8 + make $_test -j$(expr $(nproc) - 2) done } @@ -81,7 +85,7 @@ function build_test_server { mkdir -p ./build cd ./build export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/paddle/build/third_party/install/mklml/lib" - cmake_x86 + cmake_x86_for_CI build $TESTS_FILE test_lite $TESTS_FILE } -- GitLab