diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1a0b0b368616059a9a7e4b75badd6327271f483e..25ef4531915e6f24582eb923afa694c497ded619 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,6 +19,19 @@ set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
 
 include(system)
 
+if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    cmake_minimum_required(VERSION 3.10)
+    # only android yet
+    set(ANDROID TRUE)
+    include(cross_compiling/android)
+    include(cross_compiling/host)
+    set(CMAKE_SYSTEM_NAME Android)
+    set(CMAKE_SYSTEM_VERSION ${ANDROID_API_LEVEL})
+    set(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ARCH_ABI})
+    set(CMAKE_ANDROID_NDK ${ANDROID_NDK})
+    set(CMAKE_ANDROID_STL_TYPE ${ANDROID_STL_TYPE})
+endif()
+
 project(paddle CXX C)
 message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
         "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
@@ -41,7 +54,9 @@ if(WIN32)
     set(CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} ${PADDLE_LINK_FLAGS}")
 endif(WIN32)
 
-find_package(CUDA QUIET)
+if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    find_package(CUDA QUIET)
+endif()
 find_package(Git REQUIRED)
 find_package(Threads REQUIRED)
 
@@ -79,6 +94,30 @@ option(PY_VERSION       "Compile PaddlePaddle with python3 support"     ${PY_VER
 option(WITH_FAST_MATH   "Make use of fast math library, might affect the precision to some extent" ON)
 option(WITH_DGC   "Use DGC(Deep Gradient Compression) or not" ON)
 
+if(ANDROID OR IOS)
+    set(WITH_GPU OFF CACHE STRING
+        "Disable GPU when cross-compiling for Android and iOS" FORCE)
+    set(WITH_DSO OFF CACHE STRING
+        "Disable DSO when cross-compiling for Android and iOS" FORCE)
+    set(WITH_AVX OFF CACHE STRING
+        "Disable AVX when cross-compiling for Android and iOS" FORCE)
+    set(WITH_PYTHON OFF CACHE STRING
+        "Disable PYTHON when cross-compiling for Android and iOS" FORCE)
+    set(WITH_RDMA OFF CACHE STRING
+        "Disable RDMA when cross-compiling for Android and iOS" FORCE)
+    set(WITH_MKL OFF CACHE STRING
+        "Disable MKL when cross-compiling for Android and iOS" FORCE)
+
+    if(NOT CMAKE_BUILD_TYPE)
+        set(CMAKE_BUILD_TYPE "Release" CACHE STRING
+            "Default use Release in android" FORCE)
+    endif()
+    if(NOT THIRD_PARTY_BUILD_TYPE)
+        set(THIRD_PARTY_BUILD_TYPE "MinSizeRel" CACHE STRING
+            "Default use MinSizeRel in android" FORCE)
+    endif()
+endif()
+
 # for lite, both server and mobile framework.
 option(WITH_LITE "Enable lite framework" OFF)
 option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
@@ -89,8 +128,6 @@ option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK  "Enable light-weight framework" OFF)
 set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
         "A path setting third party libraries download & build directories.")
 
-set(THIRD_PARTY_BUILD_TYPE Release)
-
 # CMAKE_BUILD_TYPE
 if(NOT CMAKE_BUILD_TYPE)
     set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
@@ -107,7 +144,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
     include(external/gflags)    # download, build, install gflags
     include(external/glog)      # download, build, install glog
     include(external/gtest)     # download, build, install gtest
-    include(external/zlib)     # download, build, install gtest
+    #include(external/zlib)     # download, build, install gtest
     include(external/protobuf)  # download, build, install protobuf
     include(external/eigen)     # download eigen3
 
@@ -115,7 +152,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
     include(configure)          # add paddle env configuration
 
     add_definitions(-std=c++11)
-
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
     add_subdirectory(paddle)
 
     return()
diff --git a/Dockerfile.android b/Dockerfile.android
new file mode 100644
index 0000000000000000000000000000000000000000..185740df1b01ebef6828e16ef51db9afb1d2dd0e
--- /dev/null
+++ b/Dockerfile.android
@@ -0,0 +1,40 @@
+FROM ubuntu:16.04
+
+RUN echo '\
+deb <mirror> <version> main restricted universe multiverse\n\
+deb <mirror> <version>-updates main restricted universe multiverse\n\
+deb <mirror> <version>-backports main restricted universe multiverse\n\
+deb <mirror> <version>-security main restricted universe multiverse\n'\
+> /etc/apt/sources.list
+RUN sed -ie 's|<mirror>|http://mirrors.tuna.tsinghua.edu.cn/ubuntu/|' /etc/apt/sources.list
+RUN sed -ie 's|<version>|xenial|' /etc/apt/sources.list
+
+RUN apt-get update && apt-get upgrade -y
+RUN apt-get install -y --no-install-recommends \
+        curl \
+        unzip \
+        git \
+        make \
+        cmake-curses-gui \
+        python \
+        python-pip \
+        python-setuptools \
+        clang-format-5.0 \
+        graphviz \
+        g++-arm-linux-gnueabi \
+        gcc-arm-linux-gnueabi \
+        gcc \
+        g++
+RUN apt-get autoremove -y && apt-get clean
+RUN ln -s clang-format-5.0 /usr/bin/clang-format
+RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade pip
+RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple wheel
+RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pre-commit
+RUN cd /tmp && curl -O https://dl.google.com/android/repository/android-ndk-r17c-linux-x86_64.zip
+RUN curl -O https://mms-res.cdn.bcebos.com/cmake-3.10.3-Linux-x86_64.tar.gz && \
+        tar xzf cmake-3.10.3-Linux-x86_64.tar.gz && \
+        mv cmake-3.10.3-Linux-x86_64 /opt/cmake-3.10 && \
+        mv /usr/bin/cmake /usr/bin/cmake.bak && ln -s /opt/cmake-3.10/bin/cmake /usr/bin/cmake && \
+        mv /usr/bin/ccmake /usr/bin/ccmake.bak && ln -s /opt/cmake-3.10/bin/ccmake /usr/bin/ccmake
+RUN cd /opt && unzip /tmp/android-ndk-r17c-linux-x86_64.zip
+ENV NDK_ROOT /opt/android-ndk-r17c
diff --git a/cmake/cross_compiling/android.cmake b/cmake/cross_compiling/android.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..c36057544ab6503befde5642c38c75c25906b585
--- /dev/null
+++ b/cmake/cross_compiling/android.cmake
@@ -0,0 +1,55 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if(NOT ANDROID)
+    return()
+endif()
+
+if(NOT DEFINED ANDROID_NDK)
+    set(ANDROID_NDK $ENV{NDK_ROOT})
+    if(NOT ANDROID_NDK)
+        message(FATAL_ERROR "Must set ANDROID_NDK or env NDK_ROOT")
+    endif()
+endif()
+
+if(NOT DEFINED ANDROID_ARCH_ABI)
+    set(ANDROID_ARCH_ABI "arm64-v8a" CACHE STRING "Choose android platform")
+endif()
+
+if(NOT DEFINED ANDROID_API_LEVEL)
+    set(ANDROID_API_LEVEL "22")
+endif()
+
+if(NOT DEFINED ANDROID_STL_TYPE)
+    set(ANDROID_STL_TYPE "c++_static" CACHE STRING "stl type")
+endif()
+
+set(ANDROID_ARCH_ABI_LIST "arm64-v8a" "armeabi-v7a" "armeabi-v6" "armeabi"
+    "mips" "mips64" "x86" "x86_64")
+set_property(CACHE ANDROID_ARCH_ABI PROPERTY STRINGS ${ANDROID_ARCH_ABI_LIST})
+if (NOT ANDROID_ARCH_ABI IN_LIST ANDROID_ARCH_ABI_LIST)
+    message(FATAL_ERROR "ANDROID_ARCH_ABI must be in one of ${ANDROID_ARCH_ABI_LIST}")
+endif()
+
+if(ANDROID_ARCH_ABI STREQUAL "armeabi-v7a")
+    message(STATUS "NEON is enabled on arm-v7a")
+endif()
+
+set(ANDROID_STL_TYPE_LITS "gnustl_static" "c++_static")
+set_property(CACHE ANDROID_STL_TYPE PROPERTY STRINGS ${ANDROID_STL_TYPE_LITS}) 
+if (NOT ANDROID_STL_TYPE IN_LIST ANDROID_STL_TYPE_LITS)
+    message(FATAL_ERROR "ANDROID_STL_TYPE must be in one of ${ANDROID_STL_TYPE_LITS}")
+endif()
+
+set(ANDROID_PIE TRUE)
diff --git a/cmake/cross_compiling/host.cmake b/cmake/cross_compiling/host.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..b65e45208d86024b5fa6d0cc1ec7bf8857427fa0
--- /dev/null
+++ b/cmake/cross_compiling/host.cmake
@@ -0,0 +1,40 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set(HOST_C_COMPILER $ENV{CC})
+set(HOST_CXX_COMPILER $ENV{CXX})
+
+if(NOT HOST_C_COMPILER)
+    find_program(HOST_C_COMPILER NAMES gcc PATH
+        /usr/bin
+        /usr/local/bin)
+endif()
+
+if(NOT HOST_CXX_COMPILER)
+    find_program(HOST_CXX_COMPILER NAMES g++ PATH
+        /usr/bin
+        /usr/local/bin)
+endif()
+
+if(NOT HOST_C_COMPILER OR NOT EXISTS ${HOST_C_COMPILER})
+    MESSAGE(FATAL_ERROR "Cannot find host C compiler. export CC=/path/to/cc")
+ENDIF()
+
+if(NOT HOST_CXX_COMPILER OR NOT EXISTS ${HOST_CXX_COMPILER})
+    MESSAGE(FATAL_ERROR "Cannot find host C compiler. export CC=/path/to/cc")
+ENDIF()
+
+MESSAGE(STATUS "Found host C compiler: " ${HOST_C_COMPILER})
+MESSAGE(STATUS "Found host CXX compiler: " ${HOST_CXX_COMPILER})
+
diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake
index 911920ed6212b87aa25ba9a1faf7696fbcb22587..42ce7c644f3e8ee51bb5fbce4391b9423ee22cf8 100644
--- a/cmake/external/gflags.cmake
+++ b/cmake/external/gflags.cmake
@@ -25,6 +25,24 @@ ENDIF(WIN32)
 
 INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
 
+SET(OPTIONAL_ARGS "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
+                  "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
+                  "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
+                  "-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}"
+                  "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}"
+                  "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
+                  "-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}"
+                  "-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}")
+
+if(ANDROID)
+  SET(OPTIONAL_ARGS ${OPTIONAL_ARGS}
+                    "-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
+                    "-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
+                    "-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
+                    "-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
+                    "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}")
+endif()
+
 ExternalProject_Add(
     extern_gflags
     ${EXTERNAL_PROJECT_LOG_ARGS}
@@ -32,19 +50,12 @@ ExternalProject_Add(
     GIT_TAG         77592648e3f3be87d6c7123eb81cbad75f9aef5a
     PREFIX          ${GFLAGS_SOURCES_DIR}
     UPDATE_COMMAND  ""
-    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-                    -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-                    -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-                    -DBUILD_STATIC_LIBS=ON
+    CMAKE_ARGS      -DBUILD_STATIC_LIBS=ON
                     -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
                     -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                     -DBUILD_TESTING=OFF
                     -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+                    ${OPTIONAL_ARGS}
                     ${EXTERNAL_OPTIONAL_ARGS}
     CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
                      -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake
index 7fa17ce6b7b106c47c486729d0136748c73176a7..9ac9b8326431addb503acc10d3188a5f8f4e48a5 100644
--- a/cmake/external/glog.cmake
+++ b/cmake/external/glog.cmake
@@ -31,6 +31,24 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
 SET(GLOG_REPOSITORY "https://github.com/google/glog.git")
 SET(GLOG_TAG "v0.3.5")
 
+SET(OPTIONAL_ARGS "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
+                  "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
+                  "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
+                  "-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}"
+                  "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}"
+                  "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
+                  "-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}"
+                  "-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}")
+
+if(ANDROID)
+  SET(OPTIONAL_ARGS ${OPTIONAL_ARGS}
+                    "-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
+                    "-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
+                    "-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
+                    "-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
+                    "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}")
+endif()
+
 ExternalProject_Add(
     extern_glog
     ${EXTERNAL_PROJECT_LOG_ARGS}
@@ -39,14 +57,7 @@ ExternalProject_Add(
     GIT_TAG         ${GLOG_TAG}
     PREFIX          ${GLOG_SOURCES_DIR}
     UPDATE_COMMAND  ""
-    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-                    -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}
-                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-                    -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
+    CMAKE_ARGS      ${OPTIONAL_ARGS}
                     -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
                     -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib
                     -DCMAKE_POSITION_INDEPENDENT_CODE=ON
diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
index 09eb437aede4364f8aa285d5296f21cd8460fca1..41cd1ebaf33a6ec7c61ee8c965eaa0bccbb618b8 100644
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@@ -142,7 +142,6 @@ IF (WIN32)
 ENDIF(WIN32)
 
 if (NOT "${PROTOBUF_ROOT}" STREQUAL "")
-
     find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include NO_DEFAULT_PATH)
     find_library(PROTOBUF_LIBRARY protobuf libprotobuf.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
     find_library(PROTOBUF_LITE_LIBRARY protobuf-lite libprotobuf-lite.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
@@ -178,12 +177,28 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
         "${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}"
          PARENT_SCOPE)
 
+    SET(PROTOBUF_REPO "https://github.com/protocolbuffers/protobuf.git")
+    SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546")
     SET(OPTIONAL_CACHE_ARGS "")
     SET(OPTIONAL_ARGS "")
+
     IF(BUILD_FOR_HOST)
-        SET(OPTIONAL_ARGS "-Dprotobuf_WITH_ZLIB=OFF")
-    ELSE()
         SET(OPTIONAL_ARGS
+            "-DCMAKE_C_COMPILER=${HOST_C_COMPILER}"
+            "-DCMAKE_CXX_COMPILER=${HOST_CXX_COMPILER}"
+            "-Dprotobuf_WITH_ZLIB=OFF"
+            "-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}")
+        SET(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}")
+    ELSE()
+        # protobuf have compile issue when use android stl c++_static
+        SET(PROTOBUF_REPO "https://github.com/tensor-tang/protobuf.git")
+        SET(PROTOBUF_TAG "mobile")
+        SET(OPTIONAL_ARGS "-Dprotobuf_WITH_ZLIB=OFF"
+            "-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
+            "-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
+            "-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
+            "-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
+            "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}"
             "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
             "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
             "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
@@ -191,25 +206,18 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
             "-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}"
             "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
             "-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}"
-            "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}"
-            "-Dprotobuf_WITH_ZLIB=ON"
-            "-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}"
-            ${EXTERNAL_OPTIONAL_ARGS})
-        SET(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}")
+            "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}")
     ENDIF()
     IF(WIN32)
         SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} "-DCMAKE_GENERATOR_PLATFORM=x64")
     ENDIF()
 
-    SET(PROTOBUF_REPO "https://github.com/protocolbuffers/protobuf.git")
-    SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546")
-
     ExternalProject_Add(
         ${TARGET_NAME}
         ${EXTERNAL_PROJECT_LOG_ARGS}
         PREFIX          ${PROTOBUF_SOURCES_DIR}
         UPDATE_COMMAND  ""
-        DEPENDS         zlib
+        #DEPENDS         zlib
         GIT_REPOSITORY  ${PROTOBUF_REPO}
         GIT_TAG         ${PROTOBUF_TAG}
         CONFIGURE_COMMAND
@@ -233,6 +241,13 @@ ENDFUNCTION()
 
 SET(PROTOBUF_VERSION 3.1.0)
 
+IF(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    build_protobuf(protobuf_host TRUE)
+    LIST(APPEND external_project_dependencies protobuf_host)
+    SET(PROTOBUF_PROTOC_EXECUTABLE ${protobuf_host_PROTOC_EXECUTABLE}
+        CACHE FILEPATH "protobuf executable." FORCE)
+ENDIF()
+
 IF(NOT PROTOBUF_FOUND)
     build_protobuf(extern_protobuf FALSE)
 
@@ -245,7 +260,12 @@ IF(NOT PROTOBUF_FOUND)
     SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY}
         CACHE FILEPATH "protoc library." FORCE)
 
-    SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE}
-        CACHE FILEPATH "protobuf executable." FORCE)
-    PROMPT_PROTOBUF_LIB(extern_protobuf)
+    IF(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+        PROMPT_PROTOBUF_LIB(protobuf_host extern_protobuf)
+    ELSE()
+        SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE}
+            CACHE FILEPATH "protobuf executable." FORCE)
+        PROMPT_PROTOBUF_LIB(extern_protobuf)
+    ENDIF()
+
 ENDIF(NOT PROTOBUF_FOUND)
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index 99c078cf7db625124b3e76a0a340c335ff7fff2a..cccff7f0b42593d176ae4af4b7d41bebaa70b92a 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -93,7 +93,10 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
 if(NOT APPLE)
   find_package(Threads REQUIRED)
   link_libraries(${CMAKE_THREAD_LIBS_INIT})
-  set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt")
+  set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl")
+  if (NOT ANDROID)
+    set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -lrt")
+  endif()
 endif(NOT APPLE)
 
 set_property(GLOBAL PROPERTY FLUID_MODULES "")
diff --git a/paddle/fluid/incubate/CMakeLists.txt b/paddle/fluid/incubate/CMakeLists.txt
index a6ded5204921bef2100efb19c131055e3d92724a..552134ba6640b503c1ac9c63e9a1a67130123075 100644
--- a/paddle/fluid/incubate/CMakeLists.txt
+++ b/paddle/fluid/incubate/CMakeLists.txt
@@ -1 +1 @@
-include_directories(lite)
\ No newline at end of file
+include_directories(lite)
diff --git a/paddle/fluid/inference/analysis/dot.h b/paddle/fluid/inference/analysis/dot.h
index d1eef603be48ea047ca6ba63761f84c799699a5d..1cb790f18229003d86adad6cd69e2fa88c02549b 100644
--- a/paddle/fluid/inference/analysis/dot.h
+++ b/paddle/fluid/inference/analysis/dot.h
@@ -23,10 +23,10 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "paddle/fluid/lite/utils/logging.h"
-#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
+// #include "paddle/fluid/lite/utils/logging.h"
+// #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
 #include <glog/logging.h>
-#endif
+// #endif
 
 namespace paddle {
 namespace inference {
diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt
index 3b9a6756953aca32e48a9052d5cd691c4e9622b5..d465c5898184e132ceecd795b6ff8f0d7bc0a814 100644
--- a/paddle/fluid/lite/CMakeLists.txt
+++ b/paddle/fluid/lite/CMakeLists.txt
@@ -39,3 +39,4 @@ add_subdirectory(kernels)
 add_subdirectory(model_parser)
 add_subdirectory(utils)
 add_subdirectory(api)
+ 
diff --git a/paddle/fluid/lite/api/CMakeLists.txt b/paddle/fluid/lite/api/CMakeLists.txt
index 1de5d09394d55c0a06c84394cf1be40fc3f4e0ce..bc145f9c6a6ebc17c555c7bd1eefd54703d83b38 100644
--- a/paddle/fluid/lite/api/CMakeLists.txt
+++ b/paddle/fluid/lite/api/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(cxx_api_lite_deps scope_lite optimizer_lite target_wrapper_host optimizer_lite model_parser_lite)
+set(cxx_api_lite_deps scope_lite optimizer_lite target_wrapper_host model_parser_lite)
 if(LITE_WITH_CUDA)
     set(cxx_api_lite_deps ${cxx_api_lite_deps} kernels_cuda)
     cc_library(cxx_api_lite_cuda SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} target_wrapper_cuda)
@@ -17,7 +17,7 @@ endif()
 cc_library(light_api_lite SRCS light_api.cc DEPS ${light_api_deps} ${ops_lite} ${host_kernels})
 
 message(STATUS "get ops ${ops_lite}")
-message(STATUS "get kernels ${host_kernels}")
+message(STATUS "get kernels ${host_kernels} ${arm_kernels}")
 
 include(ExternalProject)
 set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inference download url")
@@ -36,4 +36,11 @@ endif(WITH_TESTING)
 
 lite_cc_test(test_light_api SRCS light_api_test.cc DEPS light_api_lite ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
 
-cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc DEPS cxx_api_lite model_parser_lite target_wrapper_host ${ops_lite} ${host_kernels})
+cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
+    DEPS 
+    cxx_api_lite
+    model_parser_lite 
+    target_wrapper_host 
+    mir_passes 
+    ${ops_lite} ${host_kernels} ${arm_kernels})
+ 
diff --git a/paddle/fluid/lite/api/cxx_api_bin.cc b/paddle/fluid/lite/api/cxx_api_bin.cc
index f7e7426a45ae600c0edf712fa19e5190832ad212..7b38f072e2327145b38dc3c3a0a85301597ab68f 100644
--- a/paddle/fluid/lite/api/cxx_api_bin.cc
+++ b/paddle/fluid/lite/api/cxx_api_bin.cc
@@ -13,28 +13,36 @@
 // limitations under the License.
 
 #include "paddle/fluid/lite/api/cxx_api.h"
+
+#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
 #include "paddle/fluid/lite/core/mir/passes.h"
+#endif
+
 #include "paddle/fluid/lite/core/op_registry.h"
 
 namespace paddle {
 namespace lite {
 
 void Run(const char* model_dir) {
-  lite::Executor predictor;
-#ifndef LITE_WITH_CUDA
-  std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)}});
-#else
-  std::vector<Place> valid_places({
-      Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
-      Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
-      Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
-      Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
-      Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
-      Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
-  });
-#endif
+  lite::ExecutorLite predictor;
+  // #ifndef LITE_WITH_CUDA
+  //   std::vector<Place> valid_places({Place{TARGET(kHost),
+  //   PRECISION(kFloat)}});
+  // #elif defined(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+  // #else
+  //   std::vector<Place> valid_places({
+  //       Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
+  //       Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
+  //       Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
+  //       Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
+  //       Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
+  //       Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
+  //   });
+  // #endif
+
+  std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
 
-  predictor.Build(model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)},
+  predictor.Build(model_dir, Place{TARGET(kARM), PRECISION(kFloat)},
                   valid_places);
 
   auto* input_tensor = predictor.GetInput(0);
@@ -71,12 +79,12 @@ USE_LITE_OP(fc);
 USE_LITE_OP(scale);
 USE_LITE_OP(feed);
 USE_LITE_OP(fetch);
-USE_LITE_OP(io_copy);
-USE_LITE_KERNEL(fc, kHost, kFloat, kNCHW, def);
-USE_LITE_KERNEL(mul, kHost, kFloat, kNCHW, def);
-USE_LITE_KERNEL(scale, kHost, kFloat, kNCHW, def);
-USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
-USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
+// USE_LITE_OP(io_copy);
+USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
+USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
 
 #ifdef LITE_WITH_CUDA
 USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
diff --git a/paddle/fluid/lite/core/CMakeLists.txt b/paddle/fluid/lite/core/CMakeLists.txt
index bf40d198dee47f958313894f0130edcd51341715..8545fa0f40a9920e4b3541c71ef851904bf7583f 100644
--- a/paddle/fluid/lite/core/CMakeLists.txt
+++ b/paddle/fluid/lite/core/CMakeLists.txt
@@ -16,7 +16,7 @@ proto_library(framework_proto_lite SRCS framework.proto)
 
 cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite)
 cc_library(variable_lite SRCS variable.cc)
-cc_library(op_registry_lite SRCS op_registry.cc)
+cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite)
 cc_library(scope_lite SRCS scope.cc)
 cc_library(context_lite SRCS context.cc DEPS any_lite)
 cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite compatible_pb_lite)
@@ -46,3 +46,4 @@ lite_cc_test(test_tensor_lite SRCS lite_tensor_test.cc DEPS lite_tensor)
 lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils_lite)
 #lite_cc_test(test_optimizer_lite SRCS optimizer_test.cc DEPS mir_pass_manager program_fake_utils mir_passes optimizer_lite fc_op_lite)
 lite_cc_test(test_types_lite SRCS types_test.cc DEPS types_lite)
+ 
diff --git a/paddle/fluid/lite/core/mir/CMakeLists.txt b/paddle/fluid/lite/core/mir/CMakeLists.txt
index db31aeb58e16b129e7d35e4b1c5146ac9f420fe7..00b177fe6ba3bbb8d8cd2e74e1a187fbeb684dd1 100644
--- a/paddle/fluid/lite/core/mir/CMakeLists.txt
+++ b/paddle/fluid/lite/core/mir/CMakeLists.txt
@@ -1,4 +1,4 @@
-cc_library(mir_node SRCS node.cc)
+cc_library(mir_node SRCS node.cc DEPS framework_proto_lite)
 cc_library(mir_ssa_graph SRCS ssa_graph.cc DEPS mir_node)
 cc_library(mir_pass SRCS pass.cc DEPS mir_ssa_graph)
 cc_library(mir_pass_manager SRCS pass_manager.cc DEPS mir_pass mir_ssa_graph mir_passes)
@@ -48,3 +48,4 @@ if (LITE_WITH_CUDA)
 endif()
 cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc DEPS
         ${test_variable_place_infrence_pass_DEPS})
+ 
diff --git a/paddle/fluid/lite/core/mir/pass_registry.h b/paddle/fluid/lite/core/mir/pass_registry.h
index 5c213169b5242a89f7adb8957032f115d74a3752..0586845f3ceb6dfcb2db5bbe5db03422e493226a 100644
--- a/paddle/fluid/lite/core/mir/pass_registry.h
+++ b/paddle/fluid/lite/core/mir/pass_registry.h
@@ -15,7 +15,6 @@
 #pragma once
 
 #include <string>
-#include "paddle/fluid/lite/core/mir/pass.h"
 #include "paddle/fluid/lite/core/mir/pass_manager.h"
 
 namespace paddle {
@@ -32,6 +31,10 @@ class PassRegistry {
   bool Touch() const { return true; }
 };
 
+}  // namespace mir
+}  // namespace lite
+}  // namespace paddle
+
 #define REGISTER_MIR_PASS(name__, class__)                                \
   paddle::lite::mir::PassRegistry mir_pass_registry##name__(#name__,      \
                                                             new class__); \
@@ -43,7 +46,3 @@ class PassRegistry {
   extern bool mir_pass_registry##name__##_fake();              \
   static bool mir_pass_usage##name__ __attribute__((unused)) = \
       mir_pass_registry##name__##_fake();
-
-}  // namespace mir
-}  // namespace lite
-}  // namespace paddle
diff --git a/paddle/fluid/lite/core/naive_test_model.py b/paddle/fluid/lite/core/naive_test_model.py
index f4bbdefceca1434952ab9f5728abd5d1c4594902..832661e5ee86f2759acfeb4a6a410cce6050ad53 100644
--- a/paddle/fluid/lite/core/naive_test_model.py
+++ b/paddle/fluid/lite/core/naive_test_model.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy
 import sys, os
 import numpy as np
@@ -26,8 +40,6 @@ data_1 = np.array(numpy.random.random([100, 100]), dtype='float32')
 
 #fluid.default_main_program().desc.
 
-
-
 #prog = fluid.compiler.CompiledProgram(fluid.default_main_program())
 prog = fluid.default_main_program()
 
@@ -36,11 +48,9 @@ prog = fluid.default_main_program()
 with open('main_program.pb', 'wb') as f:
     f.write(prog.desc.serialize_to_string())
 
-
 #outs = exe.run(program=prog, feed={'a':data_1, }, fetch_list=[cost])
 
 sys.exit(0)
 fluid.io.save_inference_model("./model2", [a.name], [a1], exe)
 
 print(numpy.array(outs))
-
diff --git a/paddle/fluid/lite/core/op_lite.cc b/paddle/fluid/lite/core/op_lite.cc
index e54053026d90707e5c5e1161844d844bbc4fc6dd..47e0c441e75443f066a5f6f69e6876030d649f81 100644
--- a/paddle/fluid/lite/core/op_lite.cc
+++ b/paddle/fluid/lite/core/op_lite.cc
@@ -71,7 +71,7 @@ bool OpLite::Run() {
 bool OpLite::Attach(const OpDesc &opdesc, lite::Scope *scope) {
   // valid_places_.clear();
   CHECK(scope != nullptr);
-  //CHECK(!op_info_.get());
+  // CHECK(!op_info_.get());
   scope_ = scope;
   op_info_.reset(new OpInfo);  // Force clean the out-of-date infomation.
   op_info_->Build(opdesc.ReadonlyProto());
diff --git a/paddle/fluid/lite/core/op_lite.h b/paddle/fluid/lite/core/op_lite.h
index 2f878905ca326ad532c3932555c6d02a84d48e88..8845760234d3d9b5f806abc852a7714ab2d167be 100644
--- a/paddle/fluid/lite/core/op_lite.h
+++ b/paddle/fluid/lite/core/op_lite.h
@@ -131,7 +131,6 @@ class OpLite : public Registry {
     return var->GetMutable<T>();
   }
 
-
  protected:
   lite::Scope *scope_{};
   std::unique_ptr<KernelBase> kernel_;
diff --git a/paddle/fluid/lite/core/op_registry.cc b/paddle/fluid/lite/core/op_registry.cc
index 681cbdafcdeee33c50bbb60dbca88feaac832681..94d487d724b702e451d39155d7963d55a46ea4fe 100644
--- a/paddle/fluid/lite/core/op_registry.cc
+++ b/paddle/fluid/lite/core/op_registry.cc
@@ -59,6 +59,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
     case TARGET(kCUDA): {
       CREATE_KERNEL(kCUDA);
     } break;
+    case TARGET(kARM): {
+      CREATE_KERNEL(kARM);
+    } break;
     default:
       CHECK(false) << "not supported kernel target " << TargetToStr(target);
   }
@@ -67,7 +70,10 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
   return std::list<std::unique_ptr<KernelBase>>();
 }
 
-KernelRegistry::KernelRegistry() {
+KernelRegistry::KernelRegistry()
+    : registries_(static_cast<int>(TARGET(NUM)) *
+                  static_cast<int>(PRECISION(NUM)) *
+                  static_cast<int>(DATALAYOUT(NUM))) {
 #define INIT_FOR(target__, precision__, layout__)                            \
   registries_[KernelRegistry::GetKernelOffset<TARGET(target__),              \
                                               PRECISION(precision__),        \
@@ -79,10 +85,15 @@ KernelRegistry::KernelRegistry() {
   // Currently, just register 2 kernel targets.
   INIT_FOR(kCUDA, kFloat, kNCHW);
   INIT_FOR(kCUDA, kAny, kNCHW);
+  INIT_FOR(kCUDA, kAny, kAny);
+
   INIT_FOR(kHost, kFloat, kNCHW);
   INIT_FOR(kHost, kAny, kNCHW);
   INIT_FOR(kHost, kAny, kAny);
-  INIT_FOR(kCUDA, kAny, kAny);
+
+  INIT_FOR(kARM, kFloat, kNCHW);
+  INIT_FOR(kARM, kAny, kNCHW);
+  INIT_FOR(kARM, kAny, kAny);
 #undef INIT_FOR
 }
 
diff --git a/paddle/fluid/lite/core/op_registry.h b/paddle/fluid/lite/core/op_registry.h
index db08e47a35d4d198aae5d34029cd48cc7a7a51b0..1052419ecda8bcad8d919c0d8f8e2ab3f969440f 100644
--- a/paddle/fluid/lite/core/op_registry.h
+++ b/paddle/fluid/lite/core/op_registry.h
@@ -19,6 +19,7 @@
 #include <string>
 #include <unordered_map>
 #include <utility>
+#include <vector>
 #include "paddle/fluid/lite/core/kernel.h"
 #include "paddle/fluid/lite/core/op_lite.h"
 #include "paddle/fluid/lite/core/target_wrapper.h"
@@ -75,7 +76,11 @@ class KernelRegistry final {
               KernelRegistryForTarget<TARGET(kHost), PRECISION(kAny),
                                       DATALAYOUT(kAny)> *,  //
               KernelRegistryForTarget<TARGET(kCUDA), PRECISION(kAny),
-                                      DATALAYOUT(kAny)> *  //
+                                      DATALAYOUT(kAny)> *,  //
+              KernelRegistryForTarget<TARGET(kARM), PRECISION(kAny),
+                                      DATALAYOUT(kAny)> *,  //
+              KernelRegistryForTarget<TARGET(kARM), PRECISION(kFloat),
+                                      DATALAYOUT(kNCHW)> *  //
               >;
 
   KernelRegistry();
@@ -92,8 +97,9 @@ class KernelRegistry final {
     using kernel_registor_t =
         KernelRegistryForTarget<Target, Precision, Layout>;
     auto &varient = registries_[GetKernelOffset<Target, Precision, Layout>()];
-    varient.template get<kernel_registor_t *>()->Register(name,
-                                                          std::move(creator));
+    auto *reg = varient.template get<kernel_registor_t *>();
+    CHECK(reg) << "Can not be empty of " << name;
+    reg->Register(name, std::move(creator));
   }
 
   template <TargetType Target, PrecisionType Precision = PRECISION(kFloat),
@@ -125,23 +131,20 @@ class KernelRegistry final {
 
   std::string DebugString() const {
     std::stringstream ss;
-
     ss << "KernelCreator<host, float>:" << std::endl;
-    ss << registries_[GetKernelOffset<TARGET(kHost), PRECISION(kFloat),
-                                      DATALAYOUT(kAny)>()]
-              .get<KernelRegistryForTarget<TARGET(kHost), PRECISION(kFloat),
-                                           DATALAYOUT(kNCHW)> *>()
-              ->DebugString();
-    ss << std::endl;
+    constexpr TargetType tgt = TARGET(kHost);
+    constexpr PrecisionType dt = PRECISION(kFloat);
+    constexpr DataLayoutType lt = DATALAYOUT(kNCHW);
+    constexpr DataLayoutType kany = DATALAYOUT(kAny);
+    using kernel_registor_t = KernelRegistryForTarget<tgt, dt, lt>;
+    auto *reg = registries_[GetKernelOffset<tgt, dt, kany>()]
+                    .template get<kernel_registor_t *>();
+    ss << reg->DebugString() << std::endl;
     return ss.str();
   }
 
  private:
-  mutable std::array<any_kernel_registor_t,
-                     static_cast<int>(TARGET(NUM)) *
-                         static_cast<int>(PRECISION(NUM)) *
-                         static_cast<int>(DATALAYOUT(NUM))>
-      registries_;
+  mutable std::vector<any_kernel_registor_t> registries_;
 };
 
 template <TargetType target, PrecisionType precision, DataLayoutType layout,
diff --git a/paddle/fluid/lite/core/optimizer.h b/paddle/fluid/lite/core/optimizer.h
index f585224a07f6ca63f99345a2460445e736e9feab..760239d1f69dadfe23fd27f0ce24e6eb01d03e3d 100644
--- a/paddle/fluid/lite/core/optimizer.h
+++ b/paddle/fluid/lite/core/optimizer.h
@@ -46,6 +46,7 @@ class Optimizer {
     SpecifyKernelPickTactic(kernel_pick_factor);
     InitTargetTypeTransformPass();
 
+#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
     if (passes.empty()) {
       RunPasses(std::vector<std::string>{{
           "static_kernel_pick_pass",        //
@@ -62,6 +63,7 @@ class Optimizer {
     } else {
       RunPasses(passes);
     }
+#endif
     exec_scope_ = program.exec_scope;
   }
 
diff --git a/paddle/fluid/lite/core/target_wrapper.h b/paddle/fluid/lite/core/target_wrapper.h
index df21c406e83b589cb25c705128e2f79c42241764..1f0d1ecf140171e8734bea1323343a522011f423 100644
--- a/paddle/fluid/lite/core/target_wrapper.h
+++ b/paddle/fluid/lite/core/target_wrapper.h
@@ -30,6 +30,7 @@ enum class TargetType : int {
   kHost,
   kX86,
   kCUDA,
+  kARM,
   kAny,  // any target
   NUM,   // number of fields.
 };
diff --git a/paddle/fluid/lite/cuda/CMakeLists.txt b/paddle/fluid/lite/cuda/CMakeLists.txt
index 505759c7d4afef95423ce3815912794ae28255b0..9889b8b1aa02b9f886bf45aaf9b997f0043c3278 100644
--- a/paddle/fluid/lite/cuda/CMakeLists.txt
+++ b/paddle/fluid/lite/cuda/CMakeLists.txt
@@ -4,3 +4,4 @@ endif()
 
 nv_library(target_wrapper_cuda SRCS target_wrapper.cc)
 nv_library(cuda_blas_lite SRCS blas.cc)
+ 
diff --git a/paddle/fluid/lite/host/CMakeLists.txt b/paddle/fluid/lite/host/CMakeLists.txt
index efc29d0e830abdcc7db841a733868e07b4a08c4d..576c6e76c142c8c753181334cf0d9c767221744b 100644
--- a/paddle/fluid/lite/host/CMakeLists.txt
+++ b/paddle/fluid/lite/host/CMakeLists.txt
@@ -1 +1,2 @@
 cc_library(target_wrapper_host SRCS target_wrapper.cc DEPS target_wrapper_lite)
+ 
diff --git a/paddle/fluid/lite/kernels/CMakeLists.txt b/paddle/fluid/lite/kernels/CMakeLists.txt
index 877ac7e05e333d00a6527c151fdbb2f7f53d03dd..0708e7d9a04b318b37d586f58c984156000620a5 100644
--- a/paddle/fluid/lite/kernels/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/CMakeLists.txt
@@ -4,3 +4,4 @@ add_subdirectory(host)
 add_subdirectory(arm)
 add_subdirectory(cuda)
 add_subdirectory(x86)
+ 
diff --git a/paddle/fluid/lite/kernels/arm/CMakeLists.txt b/paddle/fluid/lite/kernels/arm/CMakeLists.txt
index a7060dbd62367ddcdcb0ccc66c54a91750903136..116db446a0252557d0c4346ba346cd1a7e77a291 100644
--- a/paddle/fluid/lite/kernels/arm/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/arm/CMakeLists.txt
@@ -1 +1,27 @@
+if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    return()
+endif()
+
 message(STATUS "compile with lite ARM kernels")
+
+cc_library(fc_compute_arm SRCS fc_compute.cc DEPS ${lite_kernel_deps} eigen3)
+cc_library(relu_compute_arm SRCS relu_compute.cc DEPS ${lite_kernel_deps})
+cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} eigen3)
+cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} eigen3)
+
+cc_library(feed_compute_arm SRCS feed_compute.cc DEPS ${lite_kernel_deps})
+cc_library(fetch_compute_arm SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
+
+# lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS ${lite_kernel_deps} fc_compute_arm)
+
+set(arm_kernels
+    feed_compute_arm
+    fetch_compute_arm
+    fc_compute_arm
+    relu_compute_arm
+    mul_compute_arm
+    scale_compute_arm
+    )
+
+set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels")
+ 
diff --git a/paddle/fluid/lite/kernels/arm/fc_compute.cc b/paddle/fluid/lite/kernels/arm/fc_compute.cc
new file mode 100644
index 0000000000000000000000000000000000000000..6b7060227d8d40b5b75276879fb9ce8e2abd7cdc
--- /dev/null
+++ b/paddle/fluid/lite/kernels/arm/fc_compute.cc
@@ -0,0 +1,60 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
+#include <Eigen/Core>
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+// NOTE should use pure std C++ implementation.
+void FcCompute::Run() {
+  auto& param = this->Param<operators::FcParam>();
+
+  CHECK_GE(param.input->dims().size(), 2UL);
+  CHECK_EQ(param.output->dims().size(), 2UL);
+
+  fc_compute_eigen(
+      param.input->data<float>(),  // x
+      param.input->dims().Slice(0, param.in_num_col_dims).production(),
+      param.input->dims()
+          .Slice(param.in_num_col_dims, param.input->dims().size())
+          .production(),
+      param.w->data<float>(),     // w
+      param.w->dims()[1],         // w_w
+      param.w->dims()[0],         // w_h
+      param.bias->data<float>(),  // b
+      param.output->mutable_data<float>());
+}
+
+TargetType FcCompute::target() const { return TARGET(kARM); }
+
+PrecisionType FcCompute::precision() const { return PRECISION(kFloat); }
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(fc, kARM, kFloat, kNCHW,
+                     paddle::lite::kernels::arm::FcCompute, def)
+    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("W", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
+    .Finalize();
diff --git a/paddle/fluid/lite/kernels/arm/fc_compute.h b/paddle/fluid/lite/kernels/arm/fc_compute.h
new file mode 100644
index 0000000000000000000000000000000000000000..36f3e0723124169905bba40fcd209a516dfd0dce
--- /dev/null
+++ b/paddle/fluid/lite/kernels/arm/fc_compute.h
@@ -0,0 +1,86 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <Eigen/Core>
+#include "paddle/fluid/lite/core/kernel.h"
+#include "paddle/fluid/lite/operators/fc_op.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::FcParam;
+
+  void Run() override;
+
+  TargetType target() const override;
+  PrecisionType precision() const override;
+
+  virtual ~FcCompute() = default;
+};
+
+template <typename T>
+void fc_compute_eigen(const T* x, int x_w, int x_h,  //
+                      const T* w, int w_w, int w_h,  //
+                      const T* b,                    //
+                      T* out) {
+  using matrix_t =
+      Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+
+  Eigen::Map<const matrix_t> X(x, x_h, x_w);
+  Eigen::Map<const matrix_t> W(w, w_h, w_w);
+  Eigen::Map<matrix_t> Out(out, x_h, w_h);
+
+  Out = X * W.transpose();
+
+  if (b) {
+    Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> B(b, w_h);
+    Out = Out.array().rowwise() + B.transpose().array();
+  }
+}
+
+template <typename T>
+__attribute__((optimize("unroll-loops")))  //
+T dot(const T* x, const T* y, int dim) {
+  T out{};
+  for (int i = 0; i < dim; i++) {
+    out += x[i] * y[i];
+  }
+  return out;
+}
+
+template <typename T>
+void fc_compute_naive(const T* x, int x_w, int x_h,  //
+                      const T* w, int w_w, int w_h,  //
+                      const T* b,                    //
+                      T* out) {
+  CHECK_EQ(x_w, w_w);
+  // out shape: (x_h, w_w)
+  memset(out, 0, x_h * w_h * sizeof(T));
+
+  for (int r = 0; r < x_h; r++) {
+    for (int c = 0; c < w_h; c++) {
+      out[r * w_h + c] = dot(&x[r * x_w], &w[c * w_w], w_w) + b[c];
+    }
+  }
+}
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
diff --git a/paddle/fluid/lite/kernels/arm/fc_compute_test.cc b/paddle/fluid/lite/kernels/arm/fc_compute_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5f5de8a89de9eed74716fe97c034903898801f4e
--- /dev/null
+++ b/paddle/fluid/lite/kernels/arm/fc_compute_test.cc
@@ -0,0 +1,130 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
+#include <gtest/gtest.h>
+#include <vector>
+#include "paddle/fluid/lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+TEST(fc_compute_naive, test) {
+  lite::Tensor x, w, b, out, out1;
+  const int batch_size = 2;
+  x.Resize({batch_size, 3});
+  w.Resize({4, 3});
+  b.Resize({1, 4});
+  out.Resize({batch_size, 4});
+  out1.Resize({batch_size, 4});
+
+  auto x_data = x.mutable_data<float>();
+  auto w_data = w.mutable_data<float>();
+  auto b_data = b.mutable_data<float>();
+  auto out_data = out.mutable_data<float>();
+  auto out_data1 = out1.mutable_data<float>();
+
+  for (int i = 0; i < product(x.dims()); i++) x_data[i] = i;
+  for (int i = 0; i < product(w.dims()); i++) w_data[i] = i;
+  for (int i = 0; i < product(b.dims()); i++) b_data[i] = i;
+
+  fc_compute_naive(x_data, 3, batch_size,  //
+                   w_data, 3, 4,           //
+                   b_data, out_data);
+  fc_compute_eigen(x_data, 3, batch_size,  //
+                   w_data, 3, 4,           //
+                   b_data, out_data1);
+
+  for (int i = 0; i < product(out.dims()); i++) {
+    EXPECT_NEAR(out_data[0], out_data1[0], 1e-6);
+  }
+}
+
+TEST(fc_arm, init) {
+  FcCompute fc;
+  ASSERT_EQ(fc.precision(), PRECISION(kFloat));
+  ASSERT_EQ(fc.target(), TARGET(kARM));
+}
+
+TEST(fc_arm, algorithm) {
+  using matrix_t = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>;
+  using matrix_map_t = Eigen::Map<matrix_t>;
+
+  // dim 10, 20
+  std::vector<float> input(10 * 20);
+  std::vector<float> w(20 * 20);
+  std::vector<float> output(10 * 20);
+
+  Eigen::Map<const matrix_t> input_mat(input.data(), 10, 20);
+  Eigen::Map<const matrix_t> weight_mat(w.data(), 20, 20);
+  matrix_map_t output_mat(output.data(), 10, 20);
+
+  output_mat = weight_mat.transpose() * input_mat;
+}
+
+TEST(fc_arm, compute) {
+  FcCompute fc;
+  operators::FcParam param;
+
+  lite::Tensor x;
+  lite::Tensor w;
+  lite::Tensor bias;
+  lite::Tensor output;
+
+  x.Resize(DDim(std::vector<int64_t>({1, 10, 20})));
+  w.Resize(DDim(std::vector<int64_t>({20, 20})));
+  bias.Resize(DDim(std::vector<int64_t>({1, 10})));
+  output.Resize(DDim(std::vector<int64_t>({10, 20})));
+
+  auto* x_data = x.mutable_data<float>();
+  auto* w_data = w.mutable_data<float>();
+  auto* bias_data = bias.mutable_data<float>();
+  auto* output_data = output.mutable_data<float>();
+
+  for (int i = 0; i < 10 * 20; i++) x_data[i] = i;
+  for (int i = 0; i < 20 * 20; i++) w_data[i] = i;
+  for (int i = 0; i < 10; i++) bias_data[i] = i;
+  for (int i = 0; i < 10 * 20; i++) output_data[i] = 0;
+
+  param.in_num_col_dims = 2;
+  param.input = &x;
+  param.w = &w;
+  param.bias = &bias;
+  param.output = &output;
+  param.in_mat_dims = x.dims();
+
+  fc.SetParam(param);
+  fc.Run();
+
+  LOG(INFO) << "x";
+  for (int i = 0; i < 10 * 20; i++) LOG(INFO) << x_data[i];
+
+  LOG(INFO) << "output:";
+  for (int i = 0; i < 10 * 20; i++) LOG(INFO) << output.data<float>()[i];
+}
+
+TEST(fc, retrive_op) {
+  auto fc =
+      KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kFloat)>("fc");
+  ASSERT_TRUE(fc);
+}
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
diff --git a/paddle/fluid/lite/kernels/arm/feed_compute.cc b/paddle/fluid/lite/kernels/arm/feed_compute.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5e51dd5eeb48a90ae90f8b35dbeb9049fd5921c6
--- /dev/null
+++ b/paddle/fluid/lite/kernels/arm/feed_compute.cc
@@ -0,0 +1,48 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+class FeedCompute
+    : public KernelLite<TARGET(kARM), PRECISION(kAny), DATALAYOUT(kAny)> {
+ public:
+  using param_t = operators::FeedParam;
+
+  void Run() override {
+    auto &param = Param<operators::FeedParam>();
+    LOG(INFO) << "feed_list.size: " << param.feed_list->size();
+    LOG(INFO) << "col " << param.col;
+    const lite::Tensor &feed_item = (*param.feed_list)[0];
+    param.out->ShareDataWith(feed_item);
+    LOG(INFO) << "FEED input " << feed_item << " col " << param.col;
+    LOG(INFO) << "FEED output " << *param.out;
+  }
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(feed, kARM, kAny, kAny,
+                     paddle::lite::kernels::arm::FeedCompute, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
+    .Finalize();
diff --git a/paddle/fluid/lite/kernels/arm/fetch_compute.cc b/paddle/fluid/lite/kernels/arm/fetch_compute.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ca491ba92dcb53ad50f0bfc2873d5b82bd4a422a
--- /dev/null
+++ b/paddle/fluid/lite/kernels/arm/fetch_compute.cc
@@ -0,0 +1,51 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+class FetchCompute
+    : public KernelLite<TARGET(kARM), PRECISION(kAny), DATALAYOUT(kAny)> {
+ public:
+  using param_t = operators::FeedParam;
+
+  void Run() override {
+    auto& param = Param<operators::FetchParam>();
+    auto* fetch_list = param.fetch_list;
+    if (fetch_list->size() <= static_cast<size_t>(param.col)) {
+      fetch_list->resize(param.col + 1);
+    }
+
+    auto& dst = fetch_list->at(param.col);
+    dst.ShareDataWith(*param.input);
+  }
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(fetch, kARM, kAny, kAny,
+                     paddle::lite::kernels::arm::FetchCompute, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny),
+                                           DATALAYOUT(kAny), -1)})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny),
+                                              DATALAYOUT(kAny), -1)})
+    .Finalize();
diff --git a/paddle/fluid/lite/kernels/arm/mul_compute.cc b/paddle/fluid/lite/kernels/arm/mul_compute.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5e867f2dc3ed448fc251a1a1a645f9ed9bc651cb
--- /dev/null
+++ b/paddle/fluid/lite/kernels/arm/mul_compute.cc
@@ -0,0 +1,80 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Eigen/Core>
+#include "paddle/fluid/lite/core/kernel.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/types.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+template <typename T>
+void mul_compute_eigen(const T* x, int x_h, int x_w, const T* y, int y_h,
+                       int y_w, T* out) {
+  using matrix_t =
+      Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+
+  Eigen::Map<const matrix_t> X(x, x_h, x_w);
+  Eigen::Map<const matrix_t> Y(y, y_h, y_w);
+  Eigen::Map<matrix_t> Out(out, x_h, y_w);
+
+  Out = X * Y;
+}
+
+class MulCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::MulParam;
+
+  void Run() override {
+    auto& param = Param<operators::MulParam>();
+    core::dim2 x_shape(
+        {static_cast<int>(
+             param.x->dims().Slice(0, param.x_num_col_dims).production()),
+         static_cast<int>(
+             param.x->dims()
+                 .Slice(param.x_num_col_dims, param.x->dims().size())
+                 .production())});
+    core::dim2 y_shape(
+        {static_cast<int>(
+             param.y->dims().Slice(0, param.y_num_col_dims).production()),
+         static_cast<int>(
+             param.y->dims()
+                 .Slice(param.y_num_col_dims, param.y->dims().size())
+                 .production())});
+
+    mul_compute_eigen(param.x->data<float>(), x_shape.x, x_shape.y,  //
+                      param.y->data<float>(), y_shape.x, y_shape.y,  //
+                      param.output->mutable_data<float>());
+    LOG(INFO) << "MUL x " << *param.x;
+    LOG(INFO) << "MUL W " << *param.y;
+    LOG(INFO) << "MUL out " << *param.output;
+  }
+
+  virtual ~MulCompute() = default;
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(mul, kARM, kFloat, kNCHW,
+                     paddle::lite::kernels::arm::MulCompute, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
+    .Finalize();
diff --git a/paddle/fluid/lite/kernels/arm/relu_compute.cc b/paddle/fluid/lite/kernels/arm/relu_compute.cc
new file mode 100644
index 0000000000000000000000000000000000000000..6e27e8ec669aa40406f4991089d09bc7521a0c95
--- /dev/null
+++ b/paddle/fluid/lite/kernels/arm/relu_compute.cc
@@ -0,0 +1,15 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/kernels/arm/relu_compute.h"
diff --git a/paddle/fluid/lite/kernels/arm/relu_compute.h b/paddle/fluid/lite/kernels/arm/relu_compute.h
new file mode 100644
index 0000000000000000000000000000000000000000..29d17bf5918e112dfd065c9cc11910703ab5e92d
--- /dev/null
+++ b/paddle/fluid/lite/kernels/arm/relu_compute.h
@@ -0,0 +1,48 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include "paddle/fluid/lite/core/kernel.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+class ReluCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+ public:
+  void Run() override {
+    auto& param = Param<operators::ReluParam>();
+    auto n = param.input->dims().production();
+    const float* input = param.input->data<float>();
+    float* output = param.output->mutable_data<float>();
+    for (int i = 0; i < n; i++) {
+      output[i] = std::max(0.f, input[i]);
+    }
+  }
+
+  TargetType target() const override { return TARGET(kARM); }
+  PrecisionType precision() const override { return PRECISION(kFloat); }
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(relu, kARM, kFloat, kNCHW,
+                     paddle::lite::kernels::arm::ReluCompute, def)
+    .Finalize();
diff --git a/paddle/fluid/lite/kernels/arm/scale_compute.cc b/paddle/fluid/lite/kernels/arm/scale_compute.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f078318e42ac1e8eaeff752a17d35008d86b2d4a
--- /dev/null
+++ b/paddle/fluid/lite/kernels/arm/scale_compute.cc
@@ -0,0 +1,57 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Eigen/Core>
+#include "paddle/fluid/lite/core/kernel.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/types.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+
+template <typename T>
+void scale_compute(const T* x, T* out, int size, float scale, float bias,
+                   bool bias_before) {
+  if (bias_before) bias *= scale;
+  for (int i = 0; i < size; i++) {
+    out[i] = x[i] * scale + bias;
+  }
+}
+
+class ScaleCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::MulParam;
+
+  void Run() override {
+    auto& param = Param<operators::ScaleParam>();
+    scale_compute(param.x->data<float>(), param.output->mutable_data<float>(),
+                  param.x->dims().production(), param.scale, param.bias,
+                  param.bias_after_scale);
+  }
+
+  virtual ~ScaleCompute() = default;
+};
+
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(scale, kARM, kFloat, kNCHW,
+                     paddle::lite::kernels::arm::ScaleCompute, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
+    .Finalize();
diff --git a/paddle/fluid/lite/kernels/arm/use_kernels.h b/paddle/fluid/lite/kernels/arm/use_kernels.h
new file mode 100644
index 0000000000000000000000000000000000000000..af437bf8e4ad5a56d4f9575d609f4f47e16edb2a
--- /dev/null
+++ b/paddle/fluid/lite/kernels/arm/use_kernels.h
@@ -0,0 +1,22 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "paddle/fluid/lite/core/op_registry.h"
+
+USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
+USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
+USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
diff --git a/paddle/fluid/lite/kernels/cuda/CMakeLists.txt b/paddle/fluid/lite/kernels/cuda/CMakeLists.txt
index 104fb79c7031457b362270f09fc0bf36da98ec09..f35f634a217fabd539c9b124c44bc6cdeb186dd6 100644
--- a/paddle/fluid/lite/kernels/cuda/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/cuda/CMakeLists.txt
@@ -8,3 +8,4 @@ nv_library(mul_compute_cuda SRCS mul_compute.cc DEPS ${tensor_lite})
 cc_library(io_copy_compute_cuda SRCS io_copy_compute.cc DEPS ${tensor_lite})
 
 nv_library(kernels_cuda DEPS mul_compute_cuda io_copy_compute_cuda cuda_blas_lite)
+ 
diff --git a/paddle/fluid/lite/kernels/host/CMakeLists.txt b/paddle/fluid/lite/kernels/host/CMakeLists.txt
index 9bd2120457a9c896d1847876ba08aca29a7ef3db..81c82abbf6b4b37b4741733698c73bc158494d65 100644
--- a/paddle/fluid/lite/kernels/host/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/host/CMakeLists.txt
@@ -17,3 +17,4 @@ set(host_kernels
     )
 
 set(host_kernels "${host_kernels}" CACHE INTERNAL "host kernels")
+ 
diff --git a/paddle/fluid/lite/kernels/x86/CMakeLists.txt b/paddle/fluid/lite/kernels/x86/CMakeLists.txt
index 90e3d20a27e1616afe5b8298d38747e0d4fc098f..b4ac7c7f790eb893a4e6f7375f62de5a0046b333 100644
--- a/paddle/fluid/lite/kernels/x86/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/x86/CMakeLists.txt
@@ -4,3 +4,4 @@ endif()
 
 cc_library(activation_compute SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op)
 cc_library(elementwise_compute SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_op)
+ 
diff --git a/paddle/fluid/lite/kernels/x86/activation_compute.cc b/paddle/fluid/lite/kernels/x86/activation_compute.cc
index 4873a30ba4cc74da957aaef86afd8094a54ac437..3001a98da118f2107245c184ad04a9920660c8c6 100644
--- a/paddle/fluid/lite/kernels/x86/activation_compute.cc
+++ b/paddle/fluid/lite/kernels/x86/activation_compute.cc
@@ -1,3 +1,17 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/lite/core/kernel.h"
diff --git a/paddle/fluid/lite/kernels/x86/elementwise_compute.cc b/paddle/fluid/lite/kernels/x86/elementwise_compute.cc
index e5fabd8732332203b626d8e507af1675595c400b..d4ead92e431e65013670ce81f207456cd3c3760a 100644
--- a/paddle/fluid/lite/kernels/x86/elementwise_compute.cc
+++ b/paddle/fluid/lite/kernels/x86/elementwise_compute.cc
@@ -1,3 +1,17 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/lite/core/kernel.h"
diff --git a/paddle/fluid/lite/model_parser/CMakeLists.txt b/paddle/fluid/lite/model_parser/CMakeLists.txt
index 95d67c32c51e761af70611b4bee64ca7090bebf9..eb725c33f11f331a2f7de6a2d979af632ecc3055 100644
--- a/paddle/fluid/lite/model_parser/CMakeLists.txt
+++ b/paddle/fluid/lite/model_parser/CMakeLists.txt
@@ -9,7 +9,7 @@ endif(WITH_TESTING)
 
 
 if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
-    cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite var_desc_lite)
+    cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite framework_proto_lite var_desc_lite)
 else()
     cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS framework_proto_lite proto_desc)
 endif(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
@@ -24,3 +24,4 @@ endif()
 cc_library(model_parser_lite SRCS model_parser.cc DEPS ${model_parser_deps})
 
 add_subdirectory(pb)
+ 
diff --git a/paddle/fluid/lite/model_parser/pb/CMakeLists.txt b/paddle/fluid/lite/model_parser/pb/CMakeLists.txt
index 22d88aeabf479e9c234cfa1e9660a6d2af9439b4..6910542f2a17f1ec5cdbe5f77203197ae3d57b89 100644
--- a/paddle/fluid/lite/model_parser/pb/CMakeLists.txt
+++ b/paddle/fluid/lite/model_parser/pb/CMakeLists.txt
@@ -1,2 +1,3 @@
 cc_library(var_desc_lite SRCS var_desc.cc DEPS framework_proto_lite)
 cc_library(op_desc_lite SRCS op_desc.cc DEPS framework_proto_lite)
+ 
diff --git a/paddle/fluid/lite/operators/CMakeLists.txt b/paddle/fluid/lite/operators/CMakeLists.txt
index 184acb8485d16830efc775f5d3f14ed7e0652364..2a0f000cb63302e7cc8ec0de61f74540cc14fec1 100644
--- a/paddle/fluid/lite/operators/CMakeLists.txt
+++ b/paddle/fluid/lite/operators/CMakeLists.txt
@@ -22,3 +22,4 @@ set(ops_lite
         PARENT_SCOPE)
 
 lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite fc_compute_host)
+ 
diff --git a/paddle/fluid/lite/operators/activation_ops.cc b/paddle/fluid/lite/operators/activation_ops.cc
index 1e824e8580ef5c8ff4f968ab91ccaa0b1ccd990d..e92bc8e6ec268e276047df92714490f6df473af0 100644
--- a/paddle/fluid/lite/operators/activation_ops.cc
+++ b/paddle/fluid/lite/operators/activation_ops.cc
@@ -1,3 +1,17 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "paddle/fluid/lite/core/op_lite.h"
 #include "paddle/fluid/lite/core/op_registry.h"
 
diff --git a/paddle/fluid/lite/operators/elementwise_ops.cc b/paddle/fluid/lite/operators/elementwise_ops.cc
index f4a22c6fcd8b5ea7b090351a1b9e01afb1c2b173..bba9209fa4db451144c45d205d331e25e0f48152 100644
--- a/paddle/fluid/lite/operators/elementwise_ops.cc
+++ b/paddle/fluid/lite/operators/elementwise_ops.cc
@@ -1,3 +1,17 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "paddle/fluid/lite/core/op_lite.h"
 #include "paddle/fluid/lite/core/op_registry.h"
 
diff --git a/paddle/fluid/lite/utils/CMakeLists.txt b/paddle/fluid/lite/utils/CMakeLists.txt
index 1d299367d235b2b83ed68ce906932af1f6b2da4c..56cb895abd78ff98f38bf677c7aebece8725e7c2 100644
--- a/paddle/fluid/lite/utils/CMakeLists.txt
+++ b/paddle/fluid/lite/utils/CMakeLists.txt
@@ -1,10 +1,12 @@
-if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
-    set(utils_DEPS)
-    lite_cc_test(test_logging_lite SRCS logging_test.cc)
-else()
-    set(utils_DEPS glog)
-endif()
+# if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+#     set(utils_DEPS)
+#     lite_cc_test(test_logging_lite SRCS logging_test.cc)
+# else()
+# endif()
+
+set(utils_DEPS glog)
 
 lite_cc_test(test_varient SRCS varient_test.cc DEPS utils_lite)
 cc_library(any_lite SRCS any.cc)
 cc_library(utils_lite SRCS cp_logging.cc DEPS ${utils_DEPS} any_lite)
+ 
diff --git a/paddle/fluid/lite/utils/all.h b/paddle/fluid/lite/utils/all.h
index 70e71ae3008acee5ccc5b485f7540d506187c472..7cc98a45201eb3e6646bbd057c4247c06d1e32b3 100644
--- a/paddle/fluid/lite/utils/all.h
+++ b/paddle/fluid/lite/utils/all.h
@@ -14,6 +14,7 @@
 
 #pragma once
 
+#include "paddle/fluid/lite/utils/any.h"
 #include "paddle/fluid/lite/utils/check.h"
 #include "paddle/fluid/lite/utils/cp_logging.h"
 #include "paddle/fluid/lite/utils/factory.h"
@@ -21,4 +22,3 @@
 #include "paddle/fluid/lite/utils/io.h"
 #include "paddle/fluid/lite/utils/macros.h"
 #include "paddle/fluid/lite/utils/varient.h"
-#include "paddle/fluid/lite/utils/any.h"
diff --git a/paddle/fluid/lite/utils/cp_logging.h b/paddle/fluid/lite/utils/cp_logging.h
index d356b337abd5f46a0ca30eb9743ee5aa47d4f31e..e3c0f392533dca8178b544c038733393d2784c25 100644
--- a/paddle/fluid/lite/utils/cp_logging.h
+++ b/paddle/fluid/lite/utils/cp_logging.h
@@ -13,8 +13,8 @@
 // limitations under the License.
 
 #pragma once
-#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
-#include "paddle/fluid/lite/utils/logging.h"
-#else  // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
+// #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
+// #include "paddle/fluid/lite/utils/logging.h"
+// #else  // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
 #include <glog/logging.h>
-#endif  // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
+// #endif  // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
diff --git a/paddle/fluid/lite/x86/CMakeLists.txt b/paddle/fluid/lite/x86/CMakeLists.txt
index be772b921b4edc989e3ce25143bb88360fbb10b6..0347593e38af4af7cf2dd421801524bcb4d6d052 100644
--- a/paddle/fluid/lite/x86/CMakeLists.txt
+++ b/paddle/fluid/lite/x86/CMakeLists.txt
@@ -3,3 +3,4 @@ if (NOT LITE_WITH_X86)
 endif()
 
 cc_library(target_wrapper_x86 SRCS target_wrapper.cc)
+