From e3fb95aee4dec56bc09aef498c8963852f2f126f Mon Sep 17 00:00:00 2001
From: Yuan Shuai <ysh329@users.noreply.github.com>
Date: Sun, 1 Sep 2019 22:49:30 +0800
Subject: [PATCH] [ARM][CPU] Fix time counter of arm cpu profiler (#1925)

* Fix timer of arm cpu profiler. test=develop

* Fix un-added op in cmake.test=develop

* fix cmake error

* fix cmake error, test=develop

* Fix pass sequence. test=develop

* replace option with lite_option. test=develop

* disable profile mode by default. test=develop

* Fix error option name. test=develop
---
 CMakeLists.txt                     | 44 ++++++++++++-----------
 cmake/configure.cmake              |  3 ++
 cmake/lite_utils.cmake             | 56 ++++++++++++++++++++++++++++++
 lite/core/optimizer.h              |  7 ++--
 lite/core/profile/basic_profiler.h | 19 +++++++---
 lite/core/program.cc               |  4 ++-
 lite/operators/CMakeLists.txt      |  2 ++
 7 files changed, 105 insertions(+), 30 deletions(-)
 create mode 100644 cmake/lite_utils.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8e1ee0100e..03275b1a8d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,8 +14,9 @@
 
 cmake_minimum_required(VERSION 3.0)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+include(lite_utils)
 
-option(WITH_PADDLE_MOBILE   "Use the paddle-mobile legacy build"    OFF)
+lite_option(WITH_PADDLE_MOBILE   "Use the paddle-mobile legacy build"    OFF)
 if (WITH_PADDLE_MOBILE)
     add_subdirectory(mobile)
     return()
@@ -44,13 +45,13 @@ find_package(Threads REQUIRED)
 include(simd)
 
 ################################ Exposed Configurations #######################################
-option(WITH_DSO         "Compile PaddlePaddle with dynamic linked CUDA" ON)
-option(WITH_AVX         "Compile PaddlePaddle with AVX intrinsics"      ${AVX_FOUND})
-option(WITH_PYTHON      "Compile PaddlePaddle with python interpreter"  ON)
-option(WITH_TESTING     "Compile PaddlePaddle with unit testing"        OFF)
-option(WITH_MKL         "Compile PaddlePaddle with MKL support."        ${AVX_FOUND})
-option(WITH_ARM_DOTPROD "Compile PaddlePaddle with ARM dot production"  ON)
-option(WITH_SYSTEM_BLAS   "Use system blas library"           OFF)
+lite_option(WITH_DSO         "Compile PaddlePaddle with dynamic linked CUDA" ON)
+lite_option(WITH_AVX         "Compile PaddlePaddle with AVX intrinsics"      ON IF ${AVX_FOUND})
+lite_option(WITH_PYTHON      "Compile PaddlePaddle with python interpreter"  ON)
+lite_option(WITH_TESTING     "Compile PaddlePaddle with unit testing"        OFF)
+lite_option(WITH_MKL         "Compile PaddlePaddle with MKL support."        ON IF ${AVX_FOUND})
+lite_option(WITH_ARM_DOTPROD "Compile PaddlePaddle with ARM dot production"  ON)
+lite_option(WITH_SYSTEM_BLAS   "Use system blas library"           OFF)
 # TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
 if(ANDROID OR IOS OR ARMLINUX)
     set(WITH_GPU OFF CACHE STRING
@@ -68,20 +69,21 @@ if(ANDROID OR IOS OR ARMLINUX)
 endif()
 
 # for lite, both server and mobile framework.
-option(LITE_WITH_JAVA "Enable Java JNI lib in lite mode" OFF)
-option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
-option(LITE_WITH_X86  "Enable X86 in lite mode"  ON)
-option(LITE_WITH_ARM  "Enable ARM in lite mode"  OFF)
-option(LITE_WITH_NPU  "Enable NPU in lite mode"  OFF)
-option(LITE_WITH_OPENMP "Enable OpenMP in lite framework" ON)
-option(LITE_WITH_OPENCL   "Enable OpenCL support in lite" OFF)
-option(LITE_WITH_FPGA   "Enable FPGA support in lite" OFF)
-option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK  "Enable light-weight framework" OFF)
-option(LITE_WITH_PROFILE  "Enable profile mode in lite framework"  OFF)
-option(LITE_SHUTDOWN_LOG "Shutdown log system or not." OFF)
-option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF)
+lite_option(LITE_WITH_JAVA "Enable Java JNI lib in lite mode" OFF)
+lite_option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
+lite_option(LITE_WITH_X86  "Enable X86 in lite mode"  ON)
+lite_option(LITE_WITH_ARM  "Enable ARM in lite mode"  OFF)
+lite_option(LITE_WITH_NPU  "Enable NPU in lite mode"  OFF)
+lite_option(LITE_WITH_OPENMP "Enable OpenMP in lite framework" ON)
+lite_option(LITE_WITH_OPENCL   "Enable OpenCL support in lite" OFF)
+lite_option(LITE_WITH_FPGA   "Enable FPGA support in lite" OFF)
+lite_option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK  "Enable light-weight framework" OFF)
+lite_option(LITE_WITH_PROFILE  "Enable profile mode in lite framework"  OFF)
+lite_option(LITE_WITH_PRECISION_PROFILE "Enable precision profile in profile mode ON in lite" OFF IF LITE_WITH_PROFILE)
+lite_option(LITE_SHUTDOWN_LOG "Shutdown log system or not." OFF)
+lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF)
 # publish options
-option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF)
+lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF)
 
 set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
         "A path setting third party libraries download & build directories.")
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
index 8aa7f4a08b..b919c147c7 100644
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -164,6 +164,9 @@ endif()
 
 if (LITE_WITH_PROFILE)
     add_definitions("-DLITE_WITH_PROFILE")
+    if (LITE_WITH_PRECISION_PROFILE)
+        add_definitions("-DLITE_WITH_PRECISION_PROFILE")
+    endif()
 endif()
 
 if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
diff --git a/cmake/lite_utils.cmake b/cmake/lite_utils.cmake
new file mode 100644
index 0000000000..f07ea85936
--- /dev/null
+++ b/cmake/lite_utils.cmake
@@ -0,0 +1,56 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ----------------------------------------------------------------------------
+# section: Provides an paddle lite config option macro
+# usage：  lite_option(var "help string to describe the var" [if or IF (condition)])
+# ----------------------------------------------------------------------------
+macro(lite_option variable description value)
+    set(__value ${value})
+    set(__condition "")
+    set(__varname "__value")
+    foreach(arg ${ARGN})
+        if(arg STREQUAL "IF" OR arg STREQUAL "if")
+            set(__varname "__condition")
+        else()
+            list(APPEND ${__varname} ${arg})
+        endif()
+    endforeach()
+    unset(__varname)
+    if(__condition STREQUAL "")
+        set(__condition 2 GREATER 1)
+    endif()
+
+    if(${__condition})
+        if(__value MATCHES ";")
+            if(${__value})
+                option(${variable} "${description}" ON)
+            else()
+                option(${variable} "${description}" OFF)
+            endif()
+        elseif(DEFINED ${__value})
+            if(${__value})
+                option(${variable} "${description}" ON)
+            else()
+                option(${variable} "${description}" OFF)
+            endif()
+        else()
+             option(${variable} "${description}" ${__value})
+        endif()
+    else()
+        unset(${variable} CACHE)
+    endif()
+    unset(__condition)
+    unset(__value)
+endmacro()
diff --git a/lite/core/optimizer.h b/lite/core/optimizer.h
index 12df2f94cb..f1b92e0610 100644
--- a/lite/core/optimizer.h
+++ b/lite/core/optimizer.h
@@ -54,14 +54,15 @@ class Optimizer {
 
     if (passes.empty()) {
       RunPasses(std::vector<std::string>{
-          {"lite_quant_dequant_fuse_pass",  //
-           "lite_conv_bn_fuse_pass",        //
+          {"lite_quant_dequant_fuse_pass",     //
+           "lite_conv_elementwise_fuse_pass",  // conv-elemwise-bn
+           "lite_conv_bn_fuse_pass",           //
+           "lite_conv_elementwise_fuse_pass",  // conv-bn-elemwise
            // This pass is disabled to force some opencl kernels selected for
            // final running, otherwise, they will be fused to ARM fusion
            // kernels, and the OpenCL devices will be discarded.
            // TODO(Superjomn) Refine the fusion related design to select fusion
            // kernels for devices automatically.
-           "lite_conv_elementwise_fuse_pass",             //
            "lite_conv_activation_fuse_pass",              //
            "lite_fc_fuse_pass",                           //
            "lite_shuffle_channel_fuse_pass",              //
diff --git a/lite/core/profile/basic_profiler.h b/lite/core/profile/basic_profiler.h
index 4756322cb7..f55a5764a0 100644
--- a/lite/core/profile/basic_profiler.h
+++ b/lite/core/profile/basic_profiler.h
@@ -61,7 +61,7 @@ class BasicTimer : TimerBase<BasicTimer> {
   uint32_t min_{std::numeric_limits<uint32_t>::max()};
   int id_{-1};
   std::string key_;
-  std::chrono::time_point<std::chrono::high_resolution_clock> timer_{};
+  uint64_t timer_{};
 
   // TODO(Superjomn) make static
   static const int name_w;
@@ -73,11 +73,20 @@ class BasicTimer : TimerBase<BasicTimer> {
 
   void SetId(int id) { id_ = id; }
   void SetKey(const std::string &key) { key_ = key; }
-  void Start() { timer_ = std::chrono::high_resolution_clock::now(); }
+  void Start() {
+    timer_ = static_cast<uint64_t>(
+        std::chrono::duration_cast<std::chrono::microseconds>(
+            std::chrono::system_clock::now().time_since_epoch())
+            .count());
+  }
   void Stop() {
-    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
-        std::chrono::high_resolution_clock::now() - timer_);
-    Log(duration.count());
+    auto duration = static_cast<
+        uint64_t>(  // timer unit: microsecond, 1second = 1e6 microsecond
+        std::chrono::duration_cast<std::chrono::microseconds>(
+            std::chrono::system_clock::now().time_since_epoch())
+            .count() -
+        timer_);
+    Log(duration);
   }
 
   int count() const { return count_; }
diff --git a/lite/core/program.cc b/lite/core/program.cc
index ebe2d3bd79..179cdf909a 100644
--- a/lite/core/program.cc
+++ b/lite/core/program.cc
@@ -118,8 +118,10 @@ void RuntimeProgram::Run() {
 
     inst.Run();
 #ifdef LITE_WITH_PROFILE
+#ifdef LITE_WITH_PRECISION_PROFILE
     LITE_PRECISION_PROFILE(inst)
-#endif
+#endif  // LITE_WITH_PRECISION_PROFILE
+#endif  // LITE_WITH_PROFILE
   }
 }
 
diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt
index c5cfedb847..4eb38caa3d 100644
--- a/lite/operators/CMakeLists.txt
+++ b/lite/operators/CMakeLists.txt
@@ -63,6 +63,8 @@ add_operator(reduce_mean_op basic SRCS reduce_mean_op.cc DEPS ${op_DEPS})
 add_operator(stack_op basic SRCS stack_op.cc DEPS ${op_DEPS})
 add_operator(cast_op_lite basic SRCS cast_op.cc DEPS ${op_DEPS})
 add_operator(assign_op basic SRCS assign_op.cc DEPS ${op_DEPS})
+add_operator(flatten_op basic SRCS flatten_op.cc DEPS ${op_DEPS})
+add_operator(fake_quantize_range_abs_max_op basic SRCS fake_quantize_range_abs_max.cc DEPS ${op_DEPS})
 
 # for OCR specific
 add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS})
-- 
GitLab