From e3fb95aee4dec56bc09aef498c8963852f2f126f Mon Sep 17 00:00:00 2001 From: Yuan Shuai Date: Sun, 1 Sep 2019 22:49:30 +0800 Subject: [PATCH] [ARM][CPU] Fix time counter of arm cpu profiler (#1925) * Fix timer of arm cpu profiler. test=develop * Fix un-added op in cmake.test=develop * fix cmake error * fix cmake error, test=develop * Fix pass sequence. test=develop * replace option with lite_option. test=develop * disable profile mode by default. test=develop * Fix error option name. test=develop --- CMakeLists.txt | 44 ++++++++++++----------- cmake/configure.cmake | 3 ++ cmake/lite_utils.cmake | 56 ++++++++++++++++++++++++++++++ lite/core/optimizer.h | 7 ++-- lite/core/profile/basic_profiler.h | 19 +++++++--- lite/core/program.cc | 4 ++- lite/operators/CMakeLists.txt | 2 ++ 7 files changed, 105 insertions(+), 30 deletions(-) create mode 100644 cmake/lite_utils.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e1ee0100e..03275b1a8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,8 +14,9 @@ cmake_minimum_required(VERSION 3.0) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +include(lite_utils) -option(WITH_PADDLE_MOBILE "Use the paddle-mobile legacy build" OFF) +lite_option(WITH_PADDLE_MOBILE "Use the paddle-mobile legacy build" OFF) if (WITH_PADDLE_MOBILE) add_subdirectory(mobile) return() @@ -44,13 +45,13 @@ find_package(Threads REQUIRED) include(simd) ################################ Exposed Configurations ####################################### -option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) -option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) -option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) -option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) -option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND}) -option(WITH_ARM_DOTPROD "Compile PaddlePaddle with ARM dot production" ON) -option(WITH_SYSTEM_BLAS "Use system blas library" OFF) +lite_option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) +lite_option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ON IF ${AVX_FOUND}) +lite_option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) +lite_option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) +lite_option(WITH_MKL "Compile PaddlePaddle with MKL support." ON IF ${AVX_FOUND}) +lite_option(WITH_ARM_DOTPROD "Compile PaddlePaddle with ARM dot production" ON) +lite_option(WITH_SYSTEM_BLAS "Use system blas library" OFF) # TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter. if(ANDROID OR IOS OR ARMLINUX) set(WITH_GPU OFF CACHE STRING @@ -68,20 +69,21 @@ if(ANDROID OR IOS OR ARMLINUX) endif() # for lite, both server and mobile framework. -option(LITE_WITH_JAVA "Enable Java JNI lib in lite mode" OFF) -option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF) -option(LITE_WITH_X86 "Enable X86 in lite mode" ON) -option(LITE_WITH_ARM "Enable ARM in lite mode" OFF) -option(LITE_WITH_NPU "Enable NPU in lite mode" OFF) -option(LITE_WITH_OPENMP "Enable OpenMP in lite framework" ON) -option(LITE_WITH_OPENCL "Enable OpenCL support in lite" OFF) -option(LITE_WITH_FPGA "Enable FPGA support in lite" OFF) -option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF) -option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF) -option(LITE_SHUTDOWN_LOG "Shutdown log system or not." OFF) -option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF) +lite_option(LITE_WITH_JAVA "Enable Java JNI lib in lite mode" OFF) +lite_option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF) +lite_option(LITE_WITH_X86 "Enable X86 in lite mode" ON) +lite_option(LITE_WITH_ARM "Enable ARM in lite mode" OFF) +lite_option(LITE_WITH_NPU "Enable NPU in lite mode" OFF) +lite_option(LITE_WITH_OPENMP "Enable OpenMP in lite framework" ON) +lite_option(LITE_WITH_OPENCL "Enable OpenCL support in lite" OFF) +lite_option(LITE_WITH_FPGA "Enable FPGA support in lite" OFF) +lite_option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF) +lite_option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF) +lite_option(LITE_WITH_PRECISION_PROFILE "Enable precision profile in profile mode ON in lite" OFF IF LITE_WITH_PROFILE) +lite_option(LITE_SHUTDOWN_LOG "Shutdown log system or not." OFF) +lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF) # publish options -option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF) +lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF) set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING "A path setting third party libraries download & build directories.") diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 8aa7f4a08b..b919c147c7 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -164,6 +164,9 @@ endif() if (LITE_WITH_PROFILE) add_definitions("-DLITE_WITH_PROFILE") + if (LITE_WITH_PRECISION_PROFILE) + add_definitions("-DLITE_WITH_PRECISION_PROFILE") + endif() endif() if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) diff --git a/cmake/lite_utils.cmake b/cmake/lite_utils.cmake new file mode 100644 index 0000000000..f07ea85936 --- /dev/null +++ b/cmake/lite_utils.cmake @@ -0,0 +1,56 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ---------------------------------------------------------------------------- +# section: Provides an paddle lite config option macro +# usageļ¼š lite_option(var "help string to describe the var" [if or IF (condition)]) +# ---------------------------------------------------------------------------- +macro(lite_option variable description value) + set(__value ${value}) + set(__condition "") + set(__varname "__value") + foreach(arg ${ARGN}) + if(arg STREQUAL "IF" OR arg STREQUAL "if") + set(__varname "__condition") + else() + list(APPEND ${__varname} ${arg}) + endif() + endforeach() + unset(__varname) + if(__condition STREQUAL "") + set(__condition 2 GREATER 1) + endif() + + if(${__condition}) + if(__value MATCHES ";") + if(${__value}) + option(${variable} "${description}" ON) + else() + option(${variable} "${description}" OFF) + endif() + elseif(DEFINED ${__value}) + if(${__value}) + option(${variable} "${description}" ON) + else() + option(${variable} "${description}" OFF) + endif() + else() + option(${variable} "${description}" ${__value}) + endif() + else() + unset(${variable} CACHE) + endif() + unset(__condition) + unset(__value) +endmacro() diff --git a/lite/core/optimizer.h b/lite/core/optimizer.h index 12df2f94cb..f1b92e0610 100644 --- a/lite/core/optimizer.h +++ b/lite/core/optimizer.h @@ -54,14 +54,15 @@ class Optimizer { if (passes.empty()) { RunPasses(std::vector{ - {"lite_quant_dequant_fuse_pass", // - "lite_conv_bn_fuse_pass", // + {"lite_quant_dequant_fuse_pass", // + "lite_conv_elementwise_fuse_pass", // conv-elemwise-bn + "lite_conv_bn_fuse_pass", // + "lite_conv_elementwise_fuse_pass", // conv-bn-elemwise // This pass is disabled to force some opencl kernels selected for // final running, otherwise, they will be fused to ARM fusion // kernels, and the OpenCL devices will be discarded. // TODO(Superjomn) Refine the fusion related design to select fusion // kernels for devices automatically. - "lite_conv_elementwise_fuse_pass", // "lite_conv_activation_fuse_pass", // "lite_fc_fuse_pass", // "lite_shuffle_channel_fuse_pass", // diff --git a/lite/core/profile/basic_profiler.h b/lite/core/profile/basic_profiler.h index 4756322cb7..f55a5764a0 100644 --- a/lite/core/profile/basic_profiler.h +++ b/lite/core/profile/basic_profiler.h @@ -61,7 +61,7 @@ class BasicTimer : TimerBase { uint32_t min_{std::numeric_limits::max()}; int id_{-1}; std::string key_; - std::chrono::time_point timer_{}; + uint64_t timer_{}; // TODO(Superjomn) make static static const int name_w; @@ -73,11 +73,20 @@ class BasicTimer : TimerBase { void SetId(int id) { id_ = id; } void SetKey(const std::string &key) { key_ = key; } - void Start() { timer_ = std::chrono::high_resolution_clock::now(); } + void Start() { + timer_ = static_cast( + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + } void Stop() { - auto duration = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - timer_); - Log(duration.count()); + auto duration = static_cast< + uint64_t>( // timer unit: microsecond, 1second = 1e6 microsecond + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() - + timer_); + Log(duration); } int count() const { return count_; } diff --git a/lite/core/program.cc b/lite/core/program.cc index ebe2d3bd79..179cdf909a 100644 --- a/lite/core/program.cc +++ b/lite/core/program.cc @@ -118,8 +118,10 @@ void RuntimeProgram::Run() { inst.Run(); #ifdef LITE_WITH_PROFILE +#ifdef LITE_WITH_PRECISION_PROFILE LITE_PRECISION_PROFILE(inst) -#endif +#endif // LITE_WITH_PRECISION_PROFILE +#endif // LITE_WITH_PROFILE } } diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt index c5cfedb847..4eb38caa3d 100644 --- a/lite/operators/CMakeLists.txt +++ b/lite/operators/CMakeLists.txt @@ -63,6 +63,8 @@ add_operator(reduce_mean_op basic SRCS reduce_mean_op.cc DEPS ${op_DEPS}) add_operator(stack_op basic SRCS stack_op.cc DEPS ${op_DEPS}) add_operator(cast_op_lite basic SRCS cast_op.cc DEPS ${op_DEPS}) add_operator(assign_op basic SRCS assign_op.cc DEPS ${op_DEPS}) +add_operator(flatten_op basic SRCS flatten_op.cc DEPS ${op_DEPS}) +add_operator(fake_quantize_range_abs_max_op basic SRCS fake_quantize_range_abs_max.cc DEPS ${op_DEPS}) # for OCR specific add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS}) -- GitLab