[Lite] enable cross compile and run on mobile of lite (#17541)

* add cmake * update * fix proto pd * fix compile * tmp save * fix protobuf device version * fix protobuf and host compile * fix std c++11 support on android * change array to vector to fix ndk c++_static * fix rt and add dockerfile * fix android compile issue with latest merge * init arm kernels * enable run on arm * update format * update format * update format

[Lite] enable cross compile and run on mobile of lite (#17541)
* add cmake * update * fix proto pd * fix compile * tmp save * fix protobuf device version * fix protobuf and host compile * fix std c++11 support on android * change array to vector to fix ndk c++_static * fix rt and add dockerfile * fix android compile issue with latest merge * init arm kernels * enable run on arm * update format * update format * update format
310fd514 · tensor-tang · GitHub · e9f33320 · 310fd514 · 310fd514
51 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,6 +19,19 @@ set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})

 include(system)

+if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    cmake_minimum_required(VERSION 3.10)
+    # only android yet
+    set(ANDROID TRUE)
+    include(cross_compiling/android)
+    include(cross_compiling/host)
+    set(CMAKE_SYSTEM_NAME Android)
+    set(CMAKE_SYSTEM_VERSION ${ANDROID_API_LEVEL})
+    set(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ARCH_ABI})
+    set(CMAKE_ANDROID_NDK ${ANDROID_NDK})
+    set(CMAKE_ANDROID_STL_TYPE ${ANDROID_STL_TYPE})
+endif()
+
 project(paddle CXX C)
 message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
        "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
@@ -41,7 +54,9 @@ if(WIN32)
    set(CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} ${PADDLE_LINK_FLAGS}")
 endif(WIN32)

-find_package(CUDA QUIET)
+if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    find_package(CUDA QUIET)
+endif()
 find_package(Git REQUIRED)
 find_package(Threads REQUIRED)

@@ -79,6 +94,30 @@ option(PY_VERSION       "Compile PaddlePaddle with python3 support"     ${PY_VER
 option(WITH_FAST_MATH   "Make use of fast math library, might affect the precision to some extent" ON)
 option(WITH_DGC   "Use DGC(Deep Gradient Compression) or not" ON)

+if(ANDROID OR IOS)
+    set(WITH_GPU OFF CACHE STRING
+        "Disable GPU when cross-compiling for Android and iOS" FORCE)
+    set(WITH_DSO OFF CACHE STRING
+        "Disable DSO when cross-compiling for Android and iOS" FORCE)
+    set(WITH_AVX OFF CACHE STRING
+        "Disable AVX when cross-compiling for Android and iOS" FORCE)
+    set(WITH_PYTHON OFF CACHE STRING
+        "Disable PYTHON when cross-compiling for Android and iOS" FORCE)
+    set(WITH_RDMA OFF CACHE STRING
+        "Disable RDMA when cross-compiling for Android and iOS" FORCE)
+    set(WITH_MKL OFF CACHE STRING
+        "Disable MKL when cross-compiling for Android and iOS" FORCE)
+
+    if(NOT CMAKE_BUILD_TYPE)
+        set(CMAKE_BUILD_TYPE "Release" CACHE STRING
+            "Default use Release in android" FORCE)
+    endif()
+    if(NOT THIRD_PARTY_BUILD_TYPE)
+        set(THIRD_PARTY_BUILD_TYPE "MinSizeRel" CACHE STRING
+            "Default use MinSizeRel in android" FORCE)
+    endif()
+endif()
+
 # for lite, both server and mobile framework.
 option(WITH_LITE "Enable lite framework" OFF)
 option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
@@ -89,8 +128,6 @@ option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK  "Enable light-weight framework" OFF)
 set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
        "A path setting third party libraries download & build directories.")

-set(THIRD_PARTY_BUILD_TYPE Release)
-
 # CMAKE_BUILD_TYPE
 if(NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
@@ -107,7 +144,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
    include(external/gflags)    # download, build, install gflags
    include(external/glog)      # download, build, install glog
    include(external/gtest)     # download, build, install gtest
-    include(external/zlib)     # download, build, install gtest
+    #include(external/zlib)     # download, build, install gtest
    include(external/protobuf)  # download, build, install protobuf
    include(external/eigen)     # download eigen3

@@ -115,7 +152,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
    include(configure)          # add paddle env configuration

    add_definitions(-std=c++11)
-
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
    add_subdirectory(paddle)

    return()

--- a/Dockerfile.android
+++ b/Dockerfile.android
+FROM ubuntu:16.04
+
+RUN echo '\
+deb <mirror> <version> main restricted universe multiverse\n\
+deb <mirror> <version>-updates main restricted universe multiverse\n\
+deb <mirror> <version>-backports main restricted universe multiverse\n\
+deb <mirror> <version>-security main restricted universe multiverse\n'\
+> /etc/apt/sources.list
+RUN sed -ie 's|<mirror>|http://mirrors.tuna.tsinghua.edu.cn/ubuntu/|' /etc/apt/sources.list
+RUN sed -ie 's|<version>|xenial|' /etc/apt/sources.list
+
+RUN apt-get update && apt-get upgrade -y
+RUN apt-get install -y --no-install-recommends \
+        curl \
+        unzip \
+        git \
+        make \
+        cmake-curses-gui \
+        python \
+        python-pip \
+        python-setuptools \
+        clang-format-5.0 \
+        graphviz \
+        g++-arm-linux-gnueabi \
+        gcc-arm-linux-gnueabi \
+        gcc \
+        g++
+RUN apt-get autoremove -y && apt-get clean
+RUN ln -s clang-format-5.0 /usr/bin/clang-format
+RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade pip
+RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple wheel
+RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pre-commit
+RUN cd /tmp && curl -O https://dl.google.com/android/repository/android-ndk-r17c-linux-x86_64.zip
+RUN curl -O https://mms-res.cdn.bcebos.com/cmake-3.10.3-Linux-x86_64.tar.gz && \
+        tar xzf cmake-3.10.3-Linux-x86_64.tar.gz && \
+        mv cmake-3.10.3-Linux-x86_64 /opt/cmake-3.10 && \
+        mv /usr/bin/cmake /usr/bin/cmake.bak && ln -s /opt/cmake-3.10/bin/cmake /usr/bin/cmake && \
+        mv /usr/bin/ccmake /usr/bin/ccmake.bak && ln -s /opt/cmake-3.10/bin/ccmake /usr/bin/ccmake
+RUN cd /opt && unzip /tmp/android-ndk-r17c-linux-x86_64.zip
+ENV NDK_ROOT /opt/android-ndk-r17c
--- a/cmake/cross_compiling/android.cmake
+++ b/cmake/cross_compiling/android.cmake
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if(NOT ANDROID)
+    return()
+endif()
+
+if(NOT DEFINED ANDROID_NDK)
+    set(ANDROID_NDK $ENV{NDK_ROOT})
+    if(NOT ANDROID_NDK)
+        message(FATAL_ERROR "Must set ANDROID_NDK or env NDK_ROOT")
+    endif()
+endif()
+
+if(NOT DEFINED ANDROID_ARCH_ABI)
+    set(ANDROID_ARCH_ABI "arm64-v8a" CACHE STRING "Choose android platform")
+endif()
+
+if(NOT DEFINED ANDROID_API_LEVEL)
+    set(ANDROID_API_LEVEL "22")
+endif()
+
+if(NOT DEFINED ANDROID_STL_TYPE)
+    set(ANDROID_STL_TYPE "c++_static" CACHE STRING "stl type")
+endif()
+
+set(ANDROID_ARCH_ABI_LIST "arm64-v8a" "armeabi-v7a" "armeabi-v6" "armeabi"
+    "mips" "mips64" "x86" "x86_64")
+set_property(CACHE ANDROID_ARCH_ABI PROPERTY STRINGS ${ANDROID_ARCH_ABI_LIST})
+if (NOT ANDROID_ARCH_ABI IN_LIST ANDROID_ARCH_ABI_LIST)
+    message(FATAL_ERROR "ANDROID_ARCH_ABI must be in one of ${ANDROID_ARCH_ABI_LIST}")
+endif()
+
+if(ANDROID_ARCH_ABI STREQUAL "armeabi-v7a")
+    message(STATUS "NEON is enabled on arm-v7a")
+endif()
+
+set(ANDROID_STL_TYPE_LITS "gnustl_static" "c++_static")
+set_property(CACHE ANDROID_STL_TYPE PROPERTY STRINGS ${ANDROID_STL_TYPE_LITS}) 
+if (NOT ANDROID_STL_TYPE IN_LIST ANDROID_STL_TYPE_LITS)
+    message(FATAL_ERROR "ANDROID_STL_TYPE must be in one of ${ANDROID_STL_TYPE_LITS}")
+endif()
+
+set(ANDROID_PIE TRUE)
--- a/cmake/cross_compiling/host.cmake
+++ b/cmake/cross_compiling/host.cmake
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set(HOST_C_COMPILER $ENV{CC})
+set(HOST_CXX_COMPILER $ENV{CXX})
+
+if(NOT HOST_C_COMPILER)
+    find_program(HOST_C_COMPILER NAMES gcc PATH
+        /usr/bin
+        /usr/local/bin)
+endif()
+
+if(NOT HOST_CXX_COMPILER)
+    find_program(HOST_CXX_COMPILER NAMES g++ PATH
+        /usr/bin
+        /usr/local/bin)
+endif()
+
+if(NOT HOST_C_COMPILER OR NOT EXISTS ${HOST_C_COMPILER})
+    MESSAGE(FATAL_ERROR "Cannot find host C compiler. export CC=/path/to/cc")
+ENDIF()
+
+if(NOT HOST_CXX_COMPILER OR NOT EXISTS ${HOST_CXX_COMPILER})
+    MESSAGE(FATAL_ERROR "Cannot find host C compiler. export CC=/path/to/cc")
+ENDIF()
+
+MESSAGE(STATUS "Found host C compiler: " ${HOST_C_COMPILER})
+MESSAGE(STATUS "Found host CXX compiler: " ${HOST_CXX_COMPILER})
+
--- a/cmake/external/gflags.cmake
+++ b/cmake/external/gflags.cmake
@@ -25,6 +25,24 @@ ENDIF(WIN32)

 INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})

+SET(OPTIONAL_ARGS "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
+                  "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
+                  "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
+                  "-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}"
+                  "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}"
+                  "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
+                  "-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}"
+                  "-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}")
+
+if(ANDROID)
+  SET(OPTIONAL_ARGS ${OPTIONAL_ARGS}
+                    "-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
+                    "-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
+                    "-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
+                    "-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
+                    "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}")
+endif()
+
 ExternalProject_Add(
    extern_gflags
    ${EXTERNAL_PROJECT_LOG_ARGS}
@@ -32,19 +50,12 @@ ExternalProject_Add(
    GIT_TAG         77592648e3f3be87d6c7123eb81cbad75f9aef5a
    PREFIX          ${GFLAGS_SOURCES_DIR}
    UPDATE_COMMAND  ""
-    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-                    -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-                    -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-                    -DBUILD_STATIC_LIBS=ON
+    CMAKE_ARGS      -DBUILD_STATIC_LIBS=ON
                    -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                    -DBUILD_TESTING=OFF
                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+                    ${OPTIONAL_ARGS}
                    ${EXTERNAL_OPTIONAL_ARGS}
    CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
                     -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON

--- a/cmake/external/glog.cmake
+++ b/cmake/external/glog.cmake
@@ -31,6 +31,24 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
 SET(GLOG_REPOSITORY "https://github.com/google/glog.git")
 SET(GLOG_TAG "v0.3.5")

+SET(OPTIONAL_ARGS "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
+                  "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
+                  "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
+                  "-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}"
+                  "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}"
+                  "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
+                  "-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}"
+                  "-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}")
+
+if(ANDROID)
+  SET(OPTIONAL_ARGS ${OPTIONAL_ARGS}
+                    "-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
+                    "-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
+                    "-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
+                    "-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
+                    "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}")
+endif()
+
 ExternalProject_Add(
    extern_glog
    ${EXTERNAL_PROJECT_LOG_ARGS}
@@ -39,14 +57,7 @@ ExternalProject_Add(
    GIT_TAG         ${GLOG_TAG}
    PREFIX          ${GLOG_SOURCES_DIR}
    UPDATE_COMMAND  ""
-    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-                    -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}
-                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-                    -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
+    CMAKE_ARGS      ${OPTIONAL_ARGS}
                    -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
                    -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib
                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON

--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@@ -142,7 +142,6 @@ IF (WIN32)
 ENDIF(WIN32)

 if (NOT "${PROTOBUF_ROOT}" STREQUAL "")
-
    find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include NO_DEFAULT_PATH)
    find_library(PROTOBUF_LIBRARY protobuf libprotobuf.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
    find_library(PROTOBUF_LITE_LIBRARY protobuf-lite libprotobuf-lite.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
@@ -178,12 +177,28 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
        "${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}"
         PARENT_SCOPE)

+    SET(PROTOBUF_REPO "https://github.com/protocolbuffers/protobuf.git")
+    SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546")
    SET(OPTIONAL_CACHE_ARGS "")
    SET(OPTIONAL_ARGS "")
+
    IF(BUILD_FOR_HOST)
-        SET(OPTIONAL_ARGS "-Dprotobuf_WITH_ZLIB=OFF")
-    ELSE()
        SET(OPTIONAL_ARGS
+            "-DCMAKE_C_COMPILER=${HOST_C_COMPILER}"
+            "-DCMAKE_CXX_COMPILER=${HOST_CXX_COMPILER}"
+            "-Dprotobuf_WITH_ZLIB=OFF"
+            "-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}")
+        SET(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}")
+    ELSE()
+        # protobuf have compile issue when use android stl c++_static
+        SET(PROTOBUF_REPO "https://github.com/tensor-tang/protobuf.git")
+        SET(PROTOBUF_TAG "mobile")
+        SET(OPTIONAL_ARGS "-Dprotobuf_WITH_ZLIB=OFF"
+            "-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
+            "-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
+            "-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
+            "-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
+            "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}"
            "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
            "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
            "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
@@ -191,25 +206,18 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
            "-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}"
            "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
            "-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}"
-            "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}"
-            "-Dprotobuf_WITH_ZLIB=ON"
-            "-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}"
-            ${EXTERNAL_OPTIONAL_ARGS})
-        SET(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}")
+            "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}")
    ENDIF()
    IF(WIN32)
        SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} "-DCMAKE_GENERATOR_PLATFORM=x64")
    ENDIF()

-    SET(PROTOBUF_REPO "https://github.com/protocolbuffers/protobuf.git")
-    SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546")
-
    ExternalProject_Add(
        ${TARGET_NAME}
        ${EXTERNAL_PROJECT_LOG_ARGS}
        PREFIX          ${PROTOBUF_SOURCES_DIR}
        UPDATE_COMMAND  ""
-        DEPENDS         zlib
+        #DEPENDS         zlib
        GIT_REPOSITORY  ${PROTOBUF_REPO}
        GIT_TAG         ${PROTOBUF_TAG}
        CONFIGURE_COMMAND
@@ -233,6 +241,13 @@ ENDFUNCTION()

 SET(PROTOBUF_VERSION 3.1.0)

+IF(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    build_protobuf(protobuf_host TRUE)
+    LIST(APPEND external_project_dependencies protobuf_host)
+    SET(PROTOBUF_PROTOC_EXECUTABLE ${protobuf_host_PROTOC_EXECUTABLE}
+        CACHE FILEPATH "protobuf executable." FORCE)
+ENDIF()
+
 IF(NOT PROTOBUF_FOUND)
    build_protobuf(extern_protobuf FALSE)

@@ -245,7 +260,12 @@ IF(NOT PROTOBUF_FOUND)
    SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY}
        CACHE FILEPATH "protoc library." FORCE)

-    SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE}
-        CACHE FILEPATH "protobuf executable." FORCE)
-    PROMPT_PROTOBUF_LIB(extern_protobuf)
+    IF(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+        PROMPT_PROTOBUF_LIB(protobuf_host extern_protobuf)
+    ELSE()
+        SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE}
+            CACHE FILEPATH "protobuf executable." FORCE)
+        PROMPT_PROTOBUF_LIB(extern_protobuf)
+    ENDIF()
+
 ENDIF(NOT PROTOBUF_FOUND)
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -93,7 +93,10 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
 if(NOT APPLE)
  find_package(Threads REQUIRED)
  link_libraries(${CMAKE_THREAD_LIBS_INIT})
-  set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt")
+  set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl")
+  if (NOT ANDROID)
+    set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -lrt")
+  endif()
 endif(NOT APPLE)

 set_property(GLOBAL PROPERTY FLUID_MODULES "")

--- a/paddle/fluid/incubate/CMakeLists.txt
+++ b/paddle/fluid/incubate/CMakeLists.txt
-include_directories(lite)
\ No newline at end of file
+include_directories(lite)
--- a/paddle/fluid/inference/analysis/dot.h
+++ b/paddle/fluid/inference/analysis/dot.h
@@ -23,10 +23,10 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "paddle/fluid/lite/utils/logging.h"
-#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
+// #include "paddle/fluid/lite/utils/logging.h"
+// #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
 #include <glog/logging.h>
-#endif
+// #endif

 namespace paddle {
 namespace inference {

--- a/paddle/fluid/lite/CMakeLists.txt
+++ b/paddle/fluid/lite/CMakeLists.txt
@@ -39,3 +39,4 @@ add_subdirectory(kernels)
 add_subdirectory(model_parser)
 add_subdirectory(utils)
 add_subdirectory(api)
+ 
--- a/paddle/fluid/lite/api/CMakeLists.txt
+++ b/paddle/fluid/lite/api/CMakeLists.txt
-set(cxx_api_lite_deps scope_lite optimizer_lite target_wrapper_host optimizer_lite model_parser_lite)
+set(cxx_api_lite_deps scope_lite optimizer_lite target_wrapper_host model_parser_lite)
 if(LITE_WITH_CUDA)
    set(cxx_api_lite_deps ${cxx_api_lite_deps} kernels_cuda)
    cc_library(cxx_api_lite_cuda SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} target_wrapper_cuda)
@@ -17,7 +17,7 @@ endif()
 cc_library(light_api_lite SRCS light_api.cc DEPS ${light_api_deps} ${ops_lite} ${host_kernels})

 message(STATUS "get ops ${ops_lite}")
-message(STATUS "get kernels ${host_kernels}")
+message(STATUS "get kernels ${host_kernels} ${arm_kernels}")

 include(ExternalProject)
 set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inference download url")
@@ -36,4 +36,11 @@ endif(WITH_TESTING)

 lite_cc_test(test_light_api SRCS light_api_test.cc DEPS light_api_lite ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)

-cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc DEPS cxx_api_lite model_parser_lite target_wrapper_host ${ops_lite} ${host_kernels})
+cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
+    DEPS 
+    cxx_api_lite
+    model_parser_lite 
+    target_wrapper_host 
+    mir_passes 
+    ${ops_lite} ${host_kernels} ${arm_kernels})
+ 
--- a/paddle/fluid/lite/api/cxx_api_bin.cc
+++ b/paddle/fluid/lite/api/cxx_api_bin.cc
@@ -13,28 +13,36 @@
 // limitations under the License.

 #include "paddle/fluid/lite/api/cxx_api.h"
+
+#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
 #include "paddle/fluid/lite/core/mir/passes.h"
+#endif
+
 #include "paddle/fluid/lite/core/op_registry.h"

 namespace paddle {
 namespace lite {

 void Run(const char* model_dir) {
-  lite::Executor predictor;
-#ifndef LITE_WITH_CUDA
-  std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)}});
-#else
-  std::vector<Place> valid_places({
-      Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
-      Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
-      Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
-      Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
-      Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
-      Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
-  });
-#endif
+  lite::ExecutorLite predictor;
+  // #ifndef LITE_WITH_CUDA
+  //   std::vector<Place> valid_places({Place{TARGET(kHost),
+  //   PRECISION(kFloat)}});
+  // #elif defined(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+  // #else
+  //   std::vector<Place> valid_places({
+  //       Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
+  //       Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
+  //       Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
+  //       Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
+  //       Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
+  //       Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
+  //   });
+  // #endif
+
+  std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});

-  predictor.Build(model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)},
+  predictor.Build(model_dir, Place{TARGET(kARM), PRECISION(kFloat)},
                  valid_places);

  auto* input_tensor = predictor.GetInput(0);
@@ -71,12 +79,12 @@ USE_LITE_OP(fc);
 USE_LITE_OP(scale);
 USE_LITE_OP(feed);
 USE_LITE_OP(fetch);
-USE_LITE_OP(io_copy);
-USE_LITE_KERNEL(fc, kHost, kFloat, kNCHW, def);
-USE_LITE_KERNEL(mul, kHost, kFloat, kNCHW, def);
-USE_LITE_KERNEL(scale, kHost, kFloat, kNCHW, def);
-USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
-USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
+// USE_LITE_OP(io_copy);
+USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
+USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);

 #ifdef LITE_WITH_CUDA
 USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);

--- a/paddle/fluid/lite/core/CMakeLists.txt
+++ b/paddle/fluid/lite/core/CMakeLists.txt
@@ -16,7 +16,7 @@ proto_library(framework_proto_lite SRCS framework.proto)

 cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite)
 cc_library(variable_lite SRCS variable.cc)
-cc_library(op_registry_lite SRCS op_registry.cc)
+cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite)
 cc_library(scope_lite SRCS scope.cc)
 cc_library(context_lite SRCS context.cc DEPS any_lite)
 cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite compatible_pb_lite)
@@ -46,3 +46,4 @@ lite_cc_test(test_tensor_lite SRCS lite_tensor_test.cc DEPS lite_tensor)
 lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils_lite)
 #lite_cc_test(test_optimizer_lite SRCS optimizer_test.cc DEPS mir_pass_manager program_fake_utils mir_passes optimizer_lite fc_op_lite)
 lite_cc_test(test_types_lite SRCS types_test.cc DEPS types_lite)
+ 
--- a/paddle/fluid/lite/core/mir/CMakeLists.txt
+++ b/paddle/fluid/lite/core/mir/CMakeLists.txt
-cc_library(mir_node SRCS node.cc)
+cc_library(mir_node SRCS node.cc DEPS framework_proto_lite)
 cc_library(mir_ssa_graph SRCS ssa_graph.cc DEPS mir_node)
 cc_library(mir_pass SRCS pass.cc DEPS mir_ssa_graph)
 cc_library(mir_pass_manager SRCS pass_manager.cc DEPS mir_pass mir_ssa_graph mir_passes)
@@ -48,3 +48,4 @@ if (LITE_WITH_CUDA)
 endif()
 cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc DEPS
        ${test_variable_place_infrence_pass_DEPS})
+ 
--- a/paddle/fluid/lite/core/mir/pass_registry.h
+++ b/paddle/fluid/lite/core/mir/pass_registry.h
@@ -15,7 +15,6 @@
 #pragma once

 #include <string>
-#include "paddle/fluid/lite/core/mir/pass.h"
 #include "paddle/fluid/lite/core/mir/pass_manager.h"

 namespace paddle {
@@ -32,6 +31,10 @@ class PassRegistry {
  bool Touch() const { return true; }
 };

+}  // namespace mir
+}  // namespace lite
+}  // namespace paddle
+
 #define REGISTER_MIR_PASS(name__, class__)                                \
  paddle::lite::mir::PassRegistry mir_pass_registry##name__(#name__,      \
                                                            new class__); \
@@ -43,7 +46,3 @@ class PassRegistry {
  extern bool mir_pass_registry##name__##_fake();              \
  static bool mir_pass_usage##name__ __attribute__((unused)) = \
      mir_pass_registry##name__##_fake();
-
-}  // namespace mir
-}  // namespace lite
-}  // namespace paddle
--- a/paddle/fluid/lite/core/naive_test_model.py
+++ b/paddle/fluid/lite/core/naive_test_model.py
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy
 import sys, os
 import numpy as np
@@ -26,8 +40,6 @@ data_1 = np.array(numpy.random.random([100, 100]), dtype='float32')

 #fluid.default_main_program().desc.

-
-
 #prog = fluid.compiler.CompiledProgram(fluid.default_main_program())
 prog = fluid.default_main_program()

@@ -36,11 +48,9 @@ prog = fluid.default_main_program()
 with open('main_program.pb', 'wb') as f:
    f.write(prog.desc.serialize_to_string())

-
 #outs = exe.run(program=prog, feed={'a':data_1, }, fetch_list=[cost])

 sys.exit(0)
 fluid.io.save_inference_model("./model2", [a.name], [a1], exe)

 print(numpy.array(outs))
-
--- a/paddle/fluid/lite/core/op_lite.cc
+++ b/paddle/fluid/lite/core/op_lite.cc
@@ -71,7 +71,7 @@ bool OpLite::Run() {
 bool OpLite::Attach(const OpDesc &opdesc, lite::Scope *scope) {
  // valid_places_.clear();
  CHECK(scope != nullptr);
-  //CHECK(!op_info_.get());
+  // CHECK(!op_info_.get());
  scope_ = scope;
  op_info_.reset(new OpInfo);  // Force clean the out-of-date infomation.
  op_info_->Build(opdesc.ReadonlyProto());

--- a/paddle/fluid/lite/core/op_lite.h
+++ b/paddle/fluid/lite/core/op_lite.h
@@ -131,7 +131,6 @@ class OpLite : public Registry {
    return var->GetMutable<T>();
  }

-
 protected:
  lite::Scope *scope_{};
  std::unique_ptr<KernelBase> kernel_;

--- a/paddle/fluid/lite/core/op_registry.cc
+++ b/paddle/fluid/lite/core/op_registry.cc
@@ -59,6 +59,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
    case TARGET(kCUDA): {
      CREATE_KERNEL(kCUDA);
    } break;
+    case TARGET(kARM): {
+      CREATE_KERNEL(kARM);
+    } break;
    default:
      CHECK(false) << "not supported kernel target " << TargetToStr(target);
  }
@@ -67,7 +70,10 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
  return std::list<std::unique_ptr<KernelBase>>();
 }

-KernelRegistry::KernelRegistry() {
+KernelRegistry::KernelRegistry()
+    : registries_(static_cast<int>(TARGET(NUM)) *
+                  static_cast<int>(PRECISION(NUM)) *
+                  static_cast<int>(DATALAYOUT(NUM))) {
 #define INIT_FOR(target__, precision__, layout__)                            \
  registries_[KernelRegistry::GetKernelOffset<TARGET(target__),              \
                                              PRECISION(precision__),        \
@@ -79,10 +85,15 @@ KernelRegistry::KernelRegistry() {
  // Currently, just register 2 kernel targets.
  INIT_FOR(kCUDA, kFloat, kNCHW);
  INIT_FOR(kCUDA, kAny, kNCHW);
+  INIT_FOR(kCUDA, kAny, kAny);
+
  INIT_FOR(kHost, kFloat, kNCHW);
  INIT_FOR(kHost, kAny, kNCHW);
  INIT_FOR(kHost, kAny, kAny);
-  INIT_FOR(kCUDA, kAny, kAny);
+
+  INIT_FOR(kARM, kFloat, kNCHW);
+  INIT_FOR(kARM, kAny, kNCHW);
+  INIT_FOR(kARM, kAny, kAny);
 #undef INIT_FOR
 }


--- a/paddle/fluid/lite/core/op_registry.h
+++ b/paddle/fluid/lite/core/op_registry.h
@@ -19,6 +19,7 @@
 #include <string>
 #include <unordered_map>
 #include <utility>
+#include <vector>
 #include "paddle/fluid/lite/core/kernel.h"
 #include "paddle/fluid/lite/core/op_lite.h"
 #include "paddle/fluid/lite/core/target_wrapper.h"
@@ -75,7 +76,11 @@ class KernelRegistry final {
              KernelRegistryForTarget<TARGET(kHost), PRECISION(kAny),
                                      DATALAYOUT(kAny)> *,  //
              KernelRegistryForTarget<TARGET(kCUDA), PRECISION(kAny),
-                                      DATALAYOUT(kAny)> *  //
+                                      DATALAYOUT(kAny)> *,  //
+              KernelRegistryForTarget<TARGET(kARM), PRECISION(kAny),
+                                      DATALAYOUT(kAny)> *,  //
+              KernelRegistryForTarget<TARGET(kARM), PRECISION(kFloat),
+                                      DATALAYOUT(kNCHW)> *  //
              >;

  KernelRegistry();
@@ -92,8 +97,9 @@ class KernelRegistry final {
    using kernel_registor_t =
        KernelRegistryForTarget<Target, Precision, Layout>;
    auto &varient = registries_[GetKernelOffset<Target, Precision, Layout>()];
-    varient.template get<kernel_registor_t *>()->Register(name,
-                                                          std::move(creator));
+    auto *reg = varient.template get<kernel_registor_t *>();
+    CHECK(reg) << "Can not be empty of " << name;
+    reg->Register(name, std::move(creator));
  }

  template <TargetType Target, PrecisionType Precision = PRECISION(kFloat),
@@ -125,23 +131,20 @@ class KernelRegistry final {

  std::string DebugString() const {
    std::stringstream ss;
-
    ss << "KernelCreator<host, float>:" << std::endl;
-    ss << registries_[GetKernelOffset<TARGET(kHost), PRECISION(kFloat),
-                                      DATALAYOUT(kAny)>()]
-              .get<KernelRegistryForTarget<TARGET(kHost), PRECISION(kFloat),
-                                           DATALAYOUT(kNCHW)> *>()
-              ->DebugString();
-    ss << std::endl;
+    constexpr TargetType tgt = TARGET(kHost);
+    constexpr PrecisionType dt = PRECISION(kFloat);
+    constexpr DataLayoutType lt = DATALAYOUT(kNCHW);
+    constexpr DataLayoutType kany = DATALAYOUT(kAny);
+    using kernel_registor_t = KernelRegistryForTarget<tgt, dt, lt>;
+    auto *reg = registries_[GetKernelOffset<tgt, dt, kany>()]
+                    .template get<kernel_registor_t *>();
+    ss << reg->DebugString() << std::endl;
    return ss.str();
  }

 private:
-  mutable std::array<any_kernel_registor_t,
-                     static_cast<int>(TARGET(NUM)) *
-                         static_cast<int>(PRECISION(NUM)) *
-                         static_cast<int>(DATALAYOUT(NUM))>
-      registries_;
+  mutable std::vector<any_kernel_registor_t> registries_;
 };

 template <TargetType target, PrecisionType precision, DataLayoutType layout,

--- a/paddle/fluid/lite/core/optimizer.h
+++ b/paddle/fluid/lite/core/optimizer.h
@@ -46,6 +46,7 @@ class Optimizer {
    SpecifyKernelPickTactic(kernel_pick_factor);
    InitTargetTypeTransformPass();

+#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
    if (passes.empty()) {
      RunPasses(std::vector<std::string>{{
          "static_kernel_pick_pass",        //
@@ -62,6 +63,7 @@ class Optimizer {
    } else {
      RunPasses(passes);
    }
+#endif
    exec_scope_ = program.exec_scope;
  }


--- a/paddle/fluid/lite/core/target_wrapper.h
+++ b/paddle/fluid/lite/core/target_wrapper.h
@@ -30,6 +30,7 @@ enum class TargetType : int {
  kHost,
  kX86,
  kCUDA,
+  kARM,
  kAny,  // any target
  NUM,   // number of fields.
 };

--- a/paddle/fluid/lite/cuda/CMakeLists.txt
+++ b/paddle/fluid/lite/cuda/CMakeLists.txt
@@ -4,3 +4,4 @@ endif()

 nv_library(target_wrapper_cuda SRCS target_wrapper.cc)
 nv_library(cuda_blas_lite SRCS blas.cc)
+ 
--- a/paddle/fluid/lite/host/CMakeLists.txt
+++ b/paddle/fluid/lite/host/CMakeLists.txt
 cc_library(target_wrapper_host SRCS target_wrapper.cc DEPS target_wrapper_lite)
+ 
--- a/paddle/fluid/lite/kernels/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/CMakeLists.txt
@@ -4,3 +4,4 @@ add_subdirectory(host)
 add_subdirectory(arm)
 add_subdirectory(cuda)
 add_subdirectory(x86)
+ 
--- a/paddle/fluid/lite/kernels/arm/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/arm/CMakeLists.txt
+if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    return()
+endif()
+
 message(STATUS "compile with lite ARM kernels")
+
+cc_library(fc_compute_arm SRCS fc_compute.cc DEPS ${lite_kernel_deps} eigen3)
+cc_library(relu_compute_arm SRCS relu_compute.cc DEPS ${lite_kernel_deps})
+cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} eigen3)
+cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} eigen3)
+
+cc_library(feed_compute_arm SRCS feed_compute.cc DEPS ${lite_kernel_deps})
+cc_library(fetch_compute_arm SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
+
+# lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS ${lite_kernel_deps} fc_compute_arm)
+
+set(arm_kernels
+    feed_compute_arm
+    fetch_compute_arm
+    fc_compute_arm
+    relu_compute_arm
+    mul_compute_arm
+    scale_compute_arm
+    )
+
+set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels")
+ 
--- a/paddle/fluid/lite/kernels/arm/fc_compute.cc
+++ b/paddle/fluid/lite/kernels/arm/fc_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
+#include <Eigen/Core>
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+// NOTE should use pure std C++ implementation.
+void FcCompute::Run() {
+  auto& param = this->Param<operators::FcParam>();
+
+  CHECK_GE(param.input->dims().size(), 2UL);
+  CHECK_EQ(param.output->dims().size(), 2UL);
+
+  fc_compute_eigen(
+      param.input->data<float>(),  // x
+      param.input->dims().Slice(0, param.in_num_col_dims).production(),
+      param.input->dims()
+          .Slice(param.in_num_col_dims, param.input->dims().size())
+          .production(),
+      param.w->data<float>(),     // w
+      param.w->dims()[1],         // w_w
+      param.w->dims()[0],         // w_h
+      param.bias->data<float>(),  // b
+      param.output->mutable_data<float>());
+}
+
+TargetType FcCompute::target() const { return TARGET(kARM); }
+
+PrecisionType FcCompute::precision() const { return PRECISION(kFloat); }
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(fc, kARM, kFloat, kNCHW,
+                     paddle::lite::kernels::arm::FcCompute, def)
+    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("W", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
+    .Finalize();
--- a/paddle/fluid/lite/kernels/arm/fc_compute.h
+++ b/paddle/fluid/lite/kernels/arm/fc_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <Eigen/Core>
+#include "paddle/fluid/lite/core/kernel.h"
+#include "paddle/fluid/lite/operators/fc_op.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::FcParam;
+
+  void Run() override;
+
+  TargetType target() const override;
+  PrecisionType precision() const override;
+
+  virtual ~FcCompute() = default;
+};
+
+template <typename T>
+void fc_compute_eigen(const T* x, int x_w, int x_h,  //
+                      const T* w, int w_w, int w_h,  //
+                      const T* b,                    //
+                      T* out) {
+  using matrix_t =
+      Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+
+  Eigen::Map<const matrix_t> X(x, x_h, x_w);
+  Eigen::Map<const matrix_t> W(w, w_h, w_w);
+  Eigen::Map<matrix_t> Out(out, x_h, w_h);
+
+  Out = X * W.transpose();
+
+  if (b) {
+    Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> B(b, w_h);
+    Out = Out.array().rowwise() + B.transpose().array();
+  }
+}
+
+template <typename T>
+__attribute__((optimize("unroll-loops")))  //
+T dot(const T* x, const T* y, int dim) {
+  T out{};
+  for (int i = 0; i < dim; i++) {
+    out += x[i] * y[i];
+  }
+  return out;
+}
+
+template <typename T>
+void fc_compute_naive(const T* x, int x_w, int x_h,  //
+                      const T* w, int w_w, int w_h,  //
+                      const T* b,                    //
+                      T* out) {
+  CHECK_EQ(x_w, w_w);
+  // out shape: (x_h, w_w)
+  memset(out, 0, x_h * w_h * sizeof(T));
+
+  for (int r = 0; r < x_h; r++) {
+    for (int c = 0; c < w_h; c++) {
+      out[r * w_h + c] = dot(&x[r * x_w], &w[c * w_w], w_w) + b[c];
+    }
+  }
+}
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/paddle/fluid/lite/kernels/arm/fc_compute_test.cc
+++ b/paddle/fluid/lite/kernels/arm/fc_compute_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
+#include <gtest/gtest.h>
+#include <vector>
+#include "paddle/fluid/lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+TEST(fc_compute_naive, test) {
+  lite::Tensor x, w, b, out, out1;
+  const int batch_size = 2;
+  x.Resize({batch_size, 3});
+  w.Resize({4, 3});
+  b.Resize({1, 4});
+  out.Resize({batch_size, 4});
+  out1.Resize({batch_size, 4});
+
+  auto x_data = x.mutable_data<float>();
+  auto w_data = w.mutable_data<float>();
+  auto b_data = b.mutable_data<float>();
+  auto out_data = out.mutable_data<float>();
+  auto out_data1 = out1.mutable_data<float>();
+
+  for (int i = 0; i < product(x.dims()); i++) x_data[i] = i;
+  for (int i = 0; i < product(w.dims()); i++) w_data[i] = i;
+  for (int i = 0; i < product(b.dims()); i++) b_data[i] = i;
+
+  fc_compute_naive(x_data, 3, batch_size,  //
+                   w_data, 3, 4,           //
+                   b_data, out_data);
+  fc_compute_eigen(x_data, 3, batch_size,  //
+                   w_data, 3, 4,           //
+                   b_data, out_data1);
+
+  for (int i = 0; i < product(out.dims()); i++) {
+    EXPECT_NEAR(out_data[0], out_data1[0], 1e-6);
+  }
+}
+
+TEST(fc_arm, init) {
+  FcCompute fc;
+  ASSERT_EQ(fc.precision(), PRECISION(kFloat));
+  ASSERT_EQ(fc.target(), TARGET(kARM));
+}
+
+TEST(fc_arm, algorithm) {
+  using matrix_t = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>;
+  using matrix_map_t = Eigen::Map<matrix_t>;
+
+  // dim 10, 20
+  std::vector<float> input(10 * 20);
+  std::vector<float> w(20 * 20);
+  std::vector<float> output(10 * 20);
+
+  Eigen::Map<const matrix_t> input_mat(input.data(), 10, 20);
+  Eigen::Map<const matrix_t> weight_mat(w.data(), 20, 20);
+  matrix_map_t output_mat(output.data(), 10, 20);
+
+  output_mat = weight_mat.transpose() * input_mat;
+}
+
+TEST(fc_arm, compute) {
+  FcCompute fc;
+  operators::FcParam param;
+
+  lite::Tensor x;
+  lite::Tensor w;
+  lite::Tensor bias;
+  lite::Tensor output;
+
+  x.Resize(DDim(std::vector<int64_t>({1, 10, 20})));
+  w.Resize(DDim(std::vector<int64_t>({20, 20})));
+  bias.Resize(DDim(std::vector<int64_t>({1, 10})));
+  output.Resize(DDim(std::vector<int64_t>({10, 20})));
+
+  auto* x_data = x.mutable_data<float>();
+  auto* w_data = w.mutable_data<float>();
+  auto* bias_data = bias.mutable_data<float>();
+  auto* output_data = output.mutable_data<float>();
+
+  for (int i = 0; i < 10 * 20; i++) x_data[i] = i;
+  for (int i = 0; i < 20 * 20; i++) w_data[i] = i;
+  for (int i = 0; i < 10; i++) bias_data[i] = i;
+  for (int i = 0; i < 10 * 20; i++) output_data[i] = 0;
+
+  param.in_num_col_dims = 2;
+  param.input = &x;
+  param.w = &w;
+  param.bias = &bias;
+  param.output = &output;
+  param.in_mat_dims = x.dims();
+
+  fc.SetParam(param);
+  fc.Run();
+
+  LOG(INFO) << "x";
+  for (int i = 0; i < 10 * 20; i++) LOG(INFO) << x_data[i];
+
+  LOG(INFO) << "output:";
+  for (int i = 0; i < 10 * 20; i++) LOG(INFO) << output.data<float>()[i];
+}
+
+TEST(fc, retrive_op) {
+  auto fc =
+      KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kFloat)>("fc");
+  ASSERT_TRUE(fc);
+}
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
--- a/paddle/fluid/lite/kernels/arm/feed_compute.cc
+++ b/paddle/fluid/lite/kernels/arm/feed_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+class FeedCompute
+    : public KernelLite<TARGET(kARM), PRECISION(kAny), DATALAYOUT(kAny)> {
+ public:
+  using param_t = operators::FeedParam;
+
+  void Run() override {
+    auto &param = Param<operators::FeedParam>();
+    LOG(INFO) << "feed_list.size: " << param.feed_list->size();
+    LOG(INFO) << "col " << param.col;
+    const lite::Tensor &feed_item = (*param.feed_list)[0];
+    param.out->ShareDataWith(feed_item);
+    LOG(INFO) << "FEED input " << feed_item << " col " << param.col;
+    LOG(INFO) << "FEED output " << *param.out;
+  }
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(feed, kARM, kAny, kAny,
+                     paddle::lite::kernels::arm::FeedCompute, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
+    .Finalize();
--- a/paddle/fluid/lite/kernels/arm/fetch_compute.cc
+++ b/paddle/fluid/lite/kernels/arm/fetch_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+class FetchCompute
+    : public KernelLite<TARGET(kARM), PRECISION(kAny), DATALAYOUT(kAny)> {
+ public:
+  using param_t = operators::FeedParam;
+
+  void Run() override {
+    auto& param = Param<operators::FetchParam>();
+    auto* fetch_list = param.fetch_list;
+    if (fetch_list->size() <= static_cast<size_t>(param.col)) {
+      fetch_list->resize(param.col + 1);
+    }
+
+    auto& dst = fetch_list->at(param.col);
+    dst.ShareDataWith(*param.input);
+  }
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(fetch, kARM, kAny, kAny,
+                     paddle::lite::kernels::arm::FetchCompute, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny),
+                                           DATALAYOUT(kAny), -1)})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny),
+                                              DATALAYOUT(kAny), -1)})
+    .Finalize();
--- a/paddle/fluid/lite/kernels/arm/mul_compute.cc
+++ b/paddle/fluid/lite/kernels/arm/mul_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Eigen/Core>
+#include "paddle/fluid/lite/core/kernel.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/types.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+template <typename T>
+void mul_compute_eigen(const T* x, int x_h, int x_w, const T* y, int y_h,
+                       int y_w, T* out) {
+  using matrix_t =
+      Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+
+  Eigen::Map<const matrix_t> X(x, x_h, x_w);
+  Eigen::Map<const matrix_t> Y(y, y_h, y_w);
+  Eigen::Map<matrix_t> Out(out, x_h, y_w);
+
+  Out = X * Y;
+}
+
+class MulCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::MulParam;
+
+  void Run() override {
+    auto& param = Param<operators::MulParam>();
+    core::dim2 x_shape(
+        {static_cast<int>(
+             param.x->dims().Slice(0, param.x_num_col_dims).production()),
+         static_cast<int>(
+             param.x->dims()
+                 .Slice(param.x_num_col_dims, param.x->dims().size())
+                 .production())});
+    core::dim2 y_shape(
+        {static_cast<int>(
+             param.y->dims().Slice(0, param.y_num_col_dims).production()),
+         static_cast<int>(
+             param.y->dims()
+                 .Slice(param.y_num_col_dims, param.y->dims().size())
+                 .production())});
+
+    mul_compute_eigen(param.x->data<float>(), x_shape.x, x_shape.y,  //
+                      param.y->data<float>(), y_shape.x, y_shape.y,  //
+                      param.output->mutable_data<float>());
+    LOG(INFO) << "MUL x " << *param.x;
+    LOG(INFO) << "MUL W " << *param.y;
+    LOG(INFO) << "MUL out " << *param.output;
+  }
+
+  virtual ~MulCompute() = default;
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(mul, kARM, kFloat, kNCHW,
+                     paddle::lite::kernels::arm::MulCompute, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
+    .Finalize();
--- a/paddle/fluid/lite/kernels/arm/relu_compute.cc
+++ b/paddle/fluid/lite/kernels/arm/relu_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/kernels/arm/relu_compute.h"
--- a/paddle/fluid/lite/kernels/arm/relu_compute.h
+++ b/paddle/fluid/lite/kernels/arm/relu_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include "paddle/fluid/lite/core/kernel.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+class ReluCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+ public:
+  void Run() override {
+    auto& param = Param<operators::ReluParam>();
+    auto n = param.input->dims().production();
+    const float* input = param.input->data<float>();
+    float* output = param.output->mutable_data<float>();
+    for (int i = 0; i < n; i++) {
+      output[i] = std::max(0.f, input[i]);
+    }
+  }
+
+  TargetType target() const override { return TARGET(kARM); }
+  PrecisionType precision() const override { return PRECISION(kFloat); }
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(relu, kARM, kFloat, kNCHW,
+                     paddle::lite::kernels::arm::ReluCompute, def)
+    .Finalize();
--- a/paddle/fluid/lite/kernels/arm/scale_compute.cc
+++ b/paddle/fluid/lite/kernels/arm/scale_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Eigen/Core>
+#include "paddle/fluid/lite/core/kernel.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/types.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+template <typename T>
+void scale_compute(const T* x, T* out, int size, float scale, float bias,
+                   bool bias_before) {
+  if (bias_before) bias *= scale;
+  for (int i = 0; i < size; i++) {
+    out[i] = x[i] * scale + bias;
+  }
+}
+
+class ScaleCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::MulParam;
+
+  void Run() override {
+    auto& param = Param<operators::ScaleParam>();
+    scale_compute(param.x->data<float>(), param.output->mutable_data<float>(),
+                  param.x->dims().production(), param.scale, param.bias,
+                  param.bias_after_scale);
+  }
+
+  virtual ~ScaleCompute() = default;
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(scale, kARM, kFloat, kNCHW,
+                     paddle::lite::kernels::arm::ScaleCompute, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
+    .Finalize();
--- a/paddle/fluid/lite/kernels/arm/use_kernels.h
+++ b/paddle/fluid/lite/kernels/arm/use_kernels.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "paddle/fluid/lite/core/op_registry.h"
+
+USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
+USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
--- a/paddle/fluid/lite/kernels/cuda/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/cuda/CMakeLists.txt
@@ -8,3 +8,4 @@ nv_library(mul_compute_cuda SRCS mul_compute.cc DEPS ${tensor_lite})
 cc_library(io_copy_compute_cuda SRCS io_copy_compute.cc DEPS ${tensor_lite})

 nv_library(kernels_cuda DEPS mul_compute_cuda io_copy_compute_cuda cuda_blas_lite)
+ 
--- a/paddle/fluid/lite/kernels/host/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/host/CMakeLists.txt
@@ -17,3 +17,4 @@ set(host_kernels
    )

 set(host_kernels "${host_kernels}" CACHE INTERNAL "host kernels")
+ 
--- a/paddle/fluid/lite/kernels/x86/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/x86/CMakeLists.txt
@@ -4,3 +4,4 @@ endif()

 cc_library(activation_compute SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op)
 cc_library(elementwise_compute SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_op)
+ 
--- a/paddle/fluid/lite/kernels/x86/activation_compute.cc
+++ b/paddle/fluid/lite/kernels/x86/activation_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/lite/core/kernel.h"

--- a/paddle/fluid/lite/kernels/x86/elementwise_compute.cc
+++ b/paddle/fluid/lite/kernels/x86/elementwise_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/lite/core/kernel.h"

--- a/paddle/fluid/lite/model_parser/CMakeLists.txt
+++ b/paddle/fluid/lite/model_parser/CMakeLists.txt
@@ -9,7 +9,7 @@ endif(WITH_TESTING)


 if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
-    cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite var_desc_lite)
+    cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite framework_proto_lite var_desc_lite)
 else()
    cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS framework_proto_lite proto_desc)
 endif(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
@@ -24,3 +24,4 @@ endif()
 cc_library(model_parser_lite SRCS model_parser.cc DEPS ${model_parser_deps})

 add_subdirectory(pb)
+ 
--- a/paddle/fluid/lite/model_parser/pb/CMakeLists.txt
+++ b/paddle/fluid/lite/model_parser/pb/CMakeLists.txt
 cc_library(var_desc_lite SRCS var_desc.cc DEPS framework_proto_lite)
 cc_library(op_desc_lite SRCS op_desc.cc DEPS framework_proto_lite)
+ 
--- a/paddle/fluid/lite/operators/CMakeLists.txt
+++ b/paddle/fluid/lite/operators/CMakeLists.txt
@@ -22,3 +22,4 @@ set(ops_lite
        PARENT_SCOPE)

 lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite fc_compute_host)
+ 
--- a/paddle/fluid/lite/operators/activation_ops.cc
+++ b/paddle/fluid/lite/operators/activation_ops.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "paddle/fluid/lite/core/op_lite.h"
 #include "paddle/fluid/lite/core/op_registry.h"


--- a/paddle/fluid/lite/operators/elementwise_ops.cc
+++ b/paddle/fluid/lite/operators/elementwise_ops.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "paddle/fluid/lite/core/op_lite.h"
 #include "paddle/fluid/lite/core/op_registry.h"


--- a/paddle/fluid/lite/utils/CMakeLists.txt
+++ b/paddle/fluid/lite/utils/CMakeLists.txt
-if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
-    set(utils_DEPS)
-    lite_cc_test(test_logging_lite SRCS logging_test.cc)
-else()
-    set(utils_DEPS glog)
-endif()
+# if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+#     set(utils_DEPS)
+#     lite_cc_test(test_logging_lite SRCS logging_test.cc)
+# else()
+# endif()
+
+set(utils_DEPS glog)

 lite_cc_test(test_varient SRCS varient_test.cc DEPS utils_lite)
 cc_library(any_lite SRCS any.cc)
 cc_library(utils_lite SRCS cp_logging.cc DEPS ${utils_DEPS} any_lite)
+ 
--- a/paddle/fluid/lite/utils/all.h
+++ b/paddle/fluid/lite/utils/all.h
@@ -14,6 +14,7 @@

 #pragma once

+#include "paddle/fluid/lite/utils/any.h"
 #include "paddle/fluid/lite/utils/check.h"
 #include "paddle/fluid/lite/utils/cp_logging.h"
 #include "paddle/fluid/lite/utils/factory.h"
@@ -21,4 +22,3 @@
 #include "paddle/fluid/lite/utils/io.h"
 #include "paddle/fluid/lite/utils/macros.h"
 #include "paddle/fluid/lite/utils/varient.h"
-#include "paddle/fluid/lite/utils/any.h"
--- a/paddle/fluid/lite/utils/cp_logging.h
+++ b/paddle/fluid/lite/utils/cp_logging.h
@@ -13,8 +13,8 @@
 // limitations under the License.

 #pragma once
-#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
-#include "paddle/fluid/lite/utils/logging.h"
-#else  // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
+// #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
+// #include "paddle/fluid/lite/utils/logging.h"
+// #else  // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
 #include <glog/logging.h>
-#endif  // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
+// #endif  // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
--- a/paddle/fluid/lite/x86/CMakeLists.txt
+++ b/paddle/fluid/lite/x86/CMakeLists.txt
@@ -3,3 +3,4 @@ if (NOT LITE_WITH_X86)
 endif()

 cc_library(target_wrapper_x86 SRCS target_wrapper.cc)
+