提交 c7621e9e 编写于 作者: Y Yan Chunwei 提交者: GitHub

init profiler for Lite (#17640)

上级 b5c410a2
......@@ -141,6 +141,7 @@ option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
option(LITE_WITH_ARM "Enable ARM in lite mode" OFF)
option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF)
option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF)
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
......
......@@ -176,6 +176,10 @@ if (LITE_WITH_ARM)
add_definitions("-DLITE_WITH_ARM")
endif()
if (LITE_WITH_PROFILE)
add_definitions("-DLITE_WITH_PROFILE")
endif()
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
add_definitions("-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK")
endif()
......@@ -7,6 +7,7 @@ message(STATUS "LIGHT_FRAMEWORK:\t${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}")
message(STATUS "LITE_WITH_CUDA:\t${LITE_WITH_CUDA}")
message(STATUS "LITE_WITH_X86:\t${LITE_WITH_X86}")
message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}")
message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}")
set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install")
......@@ -33,7 +34,7 @@ endfunction()
function (lite_deps TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS ARGS)
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(deps ${lite_deps_DEPS})
......@@ -56,6 +57,12 @@ function (lite_deps TARGET)
endforeach(var)
endif()
if(LITE_WITH_PROFILE)
foreach(var ${lite_deps_PROFILE_DEPS})
set(deps ${deps} ${var})
endforeach(var)
endif()
set(${TARGET} ${deps} PARENT_SCOPE)
endfunction()
......@@ -63,7 +70,7 @@ endfunction()
function(lite_cc_library TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS ARGS)
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(deps "")
......@@ -71,7 +78,8 @@ function(lite_cc_library TARGET)
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS})
ARM_DEPS ${args_ARM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS})
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
endfunction()
......@@ -79,7 +87,7 @@ endfunction()
function(lite_cc_binary TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS ARGS)
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(deps "")
......@@ -87,7 +95,9 @@ function(lite_cc_binary TARGET)
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS})
ARM_DEPS ${args_ARM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
)
cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
endfunction()
......@@ -102,7 +112,7 @@ endfunction()
function(lite_cc_test TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS ARGS)
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(deps "")
......@@ -111,6 +121,7 @@ function(lite_cc_test TARGET)
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
)
_lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS})
register_test_offline("${TARGET}")
......
......@@ -25,21 +25,22 @@ set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inferenc
set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
"A path setting inference demo download directories.")
if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc
DEPS cxx_api_lite model_parser_lite target_wrapper_host
${ops_lite} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz)
endif(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
if(WITH_TESTING)
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz")
# add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz)
endif(WITH_TESTING)
# lite_cc_test(test_light_api SRCS light_api_test.cc DEPS light_api_lite ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
# lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc
# DEPS cxx_api_lite model_parser_lite target_wrapper_host
# PROFILE_DEPS basic_profiler_lite
# ${ops_lite} ${host_kernels} ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
# --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
if((NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) AND WITH_TESTING)
lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc
DEPS cxx_api_lite model_parser_lite target_wrapper_host
${ops_lite} ${host_kernels} ${x86_kernels}
PROFILE_DEPS basic_profiler_lite
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz")
add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz)
endif()
lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
DEPS
......
......@@ -33,6 +33,7 @@ cc_library(program_lite SRCS program.cc DEPS op_lite kernel_lite)
cc_library(optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite)
add_subdirectory(mir)
add_subdirectory(profile)
# for mobile, unnecessary to compile the following testings.
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
......
if (NOT LITE_WITH_PROFILE)
return()
endif()
lite_cc_library(basic_profiler_lite SRCS basic_profiler.cc)
lite_cc_test(test_basic_profiler SRCS basic_profiler_test.cc DEPS basic_profiler_lite)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/profile/basic_profiler.h"
namespace paddle {
namespace lite {
namespace profile {
const int BasicTimer::data_w = 10;
const int BasicTimer::name_w = 10;
} // namespace profile
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* This file implements BasicProfile, a profiler that helps to profile the basic
* CPU execution. It can display the min, max, average lantency of the execution
* of each kernel.
*/
#pragma once
#include <glog/logging.h>
#include <time.h>
#include <algorithm>
#include <chrono> // NOLINT
#include <iomanip>
#include <limits>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
namespace paddle {
namespace lite {
namespace profile {
/* Base class of all the profile records */
template <typename ChildT>
class TimerBase {
public:
void Start() { self()->Start(); }
void Stop() { self()->Stop(); }
void Log(uint32_t x) { return self()->Log(x); }
std::string basic_repr() const { return const_self()->basic_repr(); }
void SetId(int id) { self()->SetId(id); }
void SetKey(const std::string &key) { self()->SetKey(key); }
int id() const { return const_self()->id(); }
protected:
ChildT *self() { return reinterpret_cast<ChildT *>(this); }
const ChildT *const_self() const {
return reinterpret_cast<const ChildT *>(this);
}
};
class BasicTimer : TimerBase<BasicTimer> {
uint64_t total_{};
uint64_t count_{};
uint32_t max_{std::numeric_limits<uint32_t>::min()};
uint32_t min_{std::numeric_limits<uint32_t>::max()};
int id_{-1};
std::string key_;
std::chrono::time_point<std::chrono::high_resolution_clock> timer_{};
// TODO(Superjomn) make static
static const int name_w;
static const int data_w;
public:
BasicTimer() = default;
BasicTimer(int id, const std::string &key) : id_(id), key_(key) {}
void SetId(int id) { id_ = id; }
void SetKey(const std::string &key) { key_ = key; }
void Start() { timer_ = std::chrono::high_resolution_clock::now(); }
void Stop() {
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - timer_);
Log(duration.count());
}
int count() const { return count_; }
void Log(uint32_t timespan) {
total_ += timespan;
max_ = std::max(max_, timespan);
min_ = std::min(min_, timespan);
count_++;
}
static std::string basic_repr_header() {
std::stringstream ss;
ss << std::setw(name_w) << "kernel" //
<< std::setw(data_w) << "average" //
<< std::setw(data_w) << "min" //
<< std::setw(data_w) << "max" //
<< std::setw(data_w) << "count";
return ss.str();
}
std::string basic_repr() const {
std::stringstream ss;
ss << std::setw(name_w) << key() //
<< std::setw(data_w) << ave() //
<< std::setw(data_w) << min() //
<< std::setw(data_w) << max() //
<< std::setw(data_w) << count_;
return ss.str();
}
const std::string &key() const { return key_; }
int id() const {
CHECK_GE(id_, 0) << "id is not inited";
return id_;
}
double ave() const { return total_ * 1. / count_; }
double max() const { return max_; }
double min() const { return min_; }
// BasicRecord(const BasicRecord &) = delete;
void operator=(const BasicTimer &) = delete;
};
/*
* A basic profiler, with each record logs the total latency.
*/
template <typename TimerT>
class BasicProfiler {
public:
explicit BasicProfiler(const std::string &name) : name_(name) {}
using record_t = TimerT;
static BasicProfiler &Global() {
static std::unique_ptr<BasicProfiler> x(new BasicProfiler("[global]"));
return *x;
}
record_t &NewRcd(const std::string &key) {
records_.emplace_back();
records_.back().SetId(records_.size() - 1);
records_.back().SetKey(key);
return records_.back();
}
const record_t &record(int id) {
CHECK_LT(id, records_.size());
CHECK_GE(id, 0);
return records_[id];
}
record_t *mutable_record(int id) {
CHECK_LT(id, records_.size());
CHECK_GE(id, 0);
return &records_[id];
}
std::string basic_repr() const {
std::stringstream ss;
for (const auto &rcd : records_) {
ss << rcd.basic_repr() << "\n";
}
return ss.str();
}
~BasicProfiler() {
LOG(INFO) << "Profile dumps:";
LOG(INFO) << "\n" + BasicTimer::basic_repr_header() + "\n" + basic_repr();
}
private:
std::string name_;
std::vector<record_t> records_;
};
struct ProfileBlock {
explicit ProfileBlock(int id) : id_(id) {
BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Start();
}
~ProfileBlock() {
BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Stop();
}
private:
int id_{};
};
#define LITE_PROFILE_ONE(key__) \
static int key__##__profiler_id = \
::paddle::lite::profile::BasicProfiler< \
::paddle::lite::profile::BasicTimer>::Global() \
.NewRcd(#key__) \
.id(); \
::paddle::lite::profile::ProfileBlock key__##profiler__(key__##__profiler_id);
} // namespace profile
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/profile/basic_profiler.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <chrono> // NOLINT
#include <thread> // NOLINT
namespace paddle {
namespace lite {
namespace profile {
TEST(basic_record, init) {
BasicTimer timer;
timer.SetKey("hello");
}
TEST(basic_profile, init) {
auto& rcd = BasicProfiler<BasicTimer>::Global().NewRcd("fc");
for (int i = 11; i < 100; i++) {
rcd.Log(i);
}
LOG(INFO) << BasicProfiler<BasicTimer>::Global().basic_repr();
}
TEST(basic_profile, real_latency) {
LITE_PROFILE_ONE(test0);
std::this_thread::sleep_for(std::chrono::milliseconds(1200));
}
} // namespace profile
} // namespace lite
} // namespace paddle
......@@ -22,6 +22,9 @@
#include "paddle/fluid/lite/core/mir/node.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#ifdef LITE_WITH_PROFILE
#include "paddle/fluid/lite/core/profile/basic_profiler.h"
#endif // LITE_WITH_PROFILE
namespace paddle {
namespace lite {
......@@ -103,9 +106,18 @@ struct Program {
struct Instruct {
Instruct(const std::shared_ptr<OpLite>& op,
std::unique_ptr<KernelBase>&& kernel)
: op_(op), kernel_(std::move(kernel)) {}
: op_(op), kernel_(std::move(kernel)) {
#ifdef LITE_WITH_PROFILE
profile_id_ = profile::BasicProfiler<profile::BasicTimer>::Global()
.NewRcd(kernel_->SerializedKernelType())
.id();
#endif // LITE_WITH_PROFILE
}
void Run() {
#ifdef LITE_WITH_PROFILE
profile::ProfileBlock x(profile_id_);
#endif // LITE_WITH_PROFILE
CHECK(op_);
CHECK(kernel_);
if (first_epoch_) {
......@@ -128,6 +140,11 @@ struct Instruct {
std::shared_ptr<OpLite> op_;
std::unique_ptr<KernelBase> kernel_;
bool first_epoch_{true};
#ifdef LITE_WITH_PROFILE
// for profiler
int profile_id_{-1};
#endif // LITE_WITH_PROFILE
};
/*
......
......@@ -8,6 +8,10 @@ function cmake_x86 {
cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags}
}
function cmake_x86_for_CI {
cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags} -DLITE_WITH_PROFILE=ON
}
function cmake_gpu {
cmake .. " -DWITH_GPU=ON {common_flags} -DLITE_WITH_GPU=ON"
}
......@@ -57,7 +61,7 @@ function cmake_arm {
function build {
file=$1
for _test in $(cat $file); do
make $_test -j8
make $_test -j$(expr $(nproc) - 2)
done
}
......@@ -81,7 +85,7 @@ function build_test_server {
mkdir -p ./build
cd ./build
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/paddle/build/third_party/install/mklml/lib"
cmake_x86
cmake_x86_for_CI
build $TESTS_FILE
test_lite $TESTS_FILE
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册