diff --git a/CMakeLists.txt b/CMakeLists.txt
index 59d6fcb07d27e1f3ab259e69d36708b775c1852a..f05e52ee447e06ba812ce5ac52e238dcebc9bbbc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,6 +49,9 @@ set(THIRD_PARTY_BUILD_TYPE Release)
 option(WITH_AVX	    "Compile Paddle Serving with AVX intrinsics"    OFF)
 option(WITH_MKL	    "Compile Paddle Serving with MKL support."      OFF)
 option(WITH_GPU	    "Compile Paddle Serving with NVIDIA GPU"        OFF)
+option(WITH_LITE    "Compile Paddle Serving with Paddle Lite Engine"    OFF)
+option(WITH_XPU	    "Compile Paddle Serving with Baidu Kunlun"        OFF)
+option(WITH_PYTHON  "Compile Paddle Serving with Python"		    ON)
 option(CLIENT  	    "Compile Paddle Serving Client"		    OFF)
 option(SERVER	    "Compile Paddle Serving Server"		    OFF)
 option(APP          "Compile Paddle Serving App package"	    OFF)
@@ -66,40 +69,40 @@ if (NOT DEFINED WITH_MKLDNN)
     endif()
 endif()
 
-if (SERVER)
-include(external/jsoncpp)
-#include(external/rocksdb)
-endif()
 
 if (SERVER OR CLIENT)
-include(external/snappy)
-include(external/leveldb)
-include(external/zlib)
-include(external/boost)
-include(external/protobuf)
-include(external/brpc)
-include(external/gflags)
-include(external/glog)
-include(external/pybind11)
-include(external/python)
-include(generic)
-include(flags)
+    include(external/snappy)
+    include(external/leveldb)
+    include(external/zlib)
+    include(external/boost)
+    include(external/protobuf)
+    include(external/brpc)
+    include(external/gflags)
+    include(external/glog)
+    if (WITH_PYTHON)
+        include(external/pybind11)
+        include(external/python)
+    endif()
+    include(generic)
+    include(flags)
 endif()
 
 if (APP)
-include(external/zlib)
-include(external/boost)
-include(external/protobuf)
-include(external/gflags)
-include(external/glog)
-include(external/pybind11)
-include(external/python)
-include(generic)
+    include(external/zlib)
+    include(external/boost)
+    include(external/protobuf)
+    include(external/gflags)
+    include(external/glog)
+    include(external/pybind11)
+    include(external/python)
+    include(generic)
 endif()
 
 if (SERVER)
-include(external/cudnn)
-include(paddlepaddle)
+    include(external/jsoncpp)
+    #include(external/rocksdb)
+    include(external/cudnn)
+    include(paddlepaddle)
 endif()
 
 message("paddle serving source dir: " ${PADDLE_SERVING_SOURCE_DIR})
@@ -125,26 +128,24 @@ set(EXTERNAL_LIBS
 )
 
 if(SERVER)
-if(WITH_MKLML)
-    list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
-endif()
-endif()
-
+    if(WITH_MKLML)
+        list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
+    endif()
 
-if(SERVER)
-if(WITH_MKLDNN)
-    list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
-endif()
-endif()
+    if(WITH_MKLDNN)
+        list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
+    endif()
 
-if (SERVER)
     list(APPEND EXTERNAL_LIBS paddlepaddle)
 endif()
 
+
 add_subdirectory(core)
 
 if(SERVER)
-add_subdirectory(paddle_inference)
+    add_subdirectory(paddle_inference)
 endif()
 
-add_subdirectory(python)
+if (WITH_PYTHON)
+    add_subdirectory(python)
+endif()
diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake
index 117b8727f68b90c60ece896d5890d41ba04aac8e..0ab248f8c8a0bca9fa6f97f4520a5a9781c9b239 100644
--- a/cmake/external/boost.cmake
+++ b/cmake/external/boost.cmake
@@ -22,6 +22,7 @@ set(BOOST_PROJECT       "extern_boost")
 # version of boost, say, 1.66.0, doesn't build on CentOS 6.  We
 # checked that the devtools package of CentOS 6 installs boost 1.41.0.
 # So we use 1.41.0 here.
+
 set(BOOST_VER           "1.74.0")
 set(BOOST_TAR "boost_1_74_0" CACHE STRING "" FORCE)
 set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake
index 42eae8d4512c013e5457c2aceaa93e6308a87b8e..9fe5e89cbc89edd2238653b6cf5aeda41184a8a6 100644
--- a/cmake/external/brpc.cmake
+++ b/cmake/external/brpc.cmake
@@ -38,13 +38,21 @@ INCLUDE_DIRECTORIES(${BRPC_INCLUDE_DIR})
 # Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args
 set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib|${THIRD_PARTY_PATH}/install/glog")
 
+if(WITH_LITE)
+    set(BRPC_REPO "https://github.com/zhangjun/incubator-brpc.git")
+    set(BRPC_TAG "master")
+else()
+    set(BRPC_REPO "https://github.com/wangjiawei04/brpc")
+    set(BRPC_TAG "6d79e0b17f25107c35b705ea58d888083f59ff47")
+endif()
+
 # If minimal .a is need, you can set  WITH_DEBUG_SYMBOLS=OFF
 ExternalProject_Add(
     extern_brpc
     ${EXTERNAL_PROJECT_LOG_ARGS}
     # TODO(gongwb): change to de newst repo when they changed.
-    GIT_REPOSITORY  "https://github.com/wangjiawei04/brpc"
-    GIT_TAG         "serving-0.4.1"
+    GIT_REPOSITORY  ${BRPC_REPO}
+    GIT_TAG         ${BRPC_TAG}
     PREFIX          ${BRPC_SOURCES_DIR}
     UPDATE_COMMAND  ""
     CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index dd2fe4dc94e7213d6ad15d37f74ab1c6d41d660a..375a1f7d219ca7de34b6362f11c9ab30e75e5304 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -93,7 +93,11 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
 if(NOT APPLE)
   find_package(Threads REQUIRED)
   link_libraries(${CMAKE_THREAD_LIBS_INIT})
-  set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt")
+  if(WITH_LITE OR WITH_XPU)
+    set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -fopenmp -pthread -ldl -lrt")
+  else()
+    set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt")
+  endif()
 endif(NOT APPLE)
 
 set_property(GLOBAL PROPERTY FLUID_MODULES "")
diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake
index ad95b3ef6db215fddf165d0718d46037749af31f..0e202d3b06537646e489510c781cf125e87e3e07 100644
--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -39,6 +39,12 @@ if (WITH_GPU)
     else()
         SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl")
     endif()
+elseif (WITH_LITE)
+    if (WITH_XPU)
+        SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm-xpu")
+    else()
+        SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm")
+    endif()
 else()
     if (WITH_AVX)
         if (WITH_MKLML)
@@ -51,7 +57,12 @@ else()
     endif()
 endif()
 
-SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
+if(WITH_LITE)
+    SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
+else()
+    SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
+endif()
+
 MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}")
 if (WITH_GPU OR WITH_MKLML)
     if (WITH_TRT)
@@ -117,11 +128,24 @@ ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL)
 SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so)
 
 if (WITH_TRT)
-ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
-SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer.so)
+    ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
+    SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer.so)
+    
+    ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL)
+    SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer_plugin.so)
+endif()
 
-ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL)
-SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer_plugin.so)
+if (WITH_LITE)
+    ADD_LIBRARY(paddle_api_full_bundled STATIC IMPORTED GLOBAL)
+    SET_PROPERTY(TARGET paddle_api_full_bundled PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/lite/cxx/lib/libpaddle_api_full_bundled.a)
+    
+    if (WITH_XPU)
+        ADD_LIBRARY(xpuapi SHARED IMPORTED GLOBAL)
+        SET_PROPERTY(TARGET xpuapi PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xpu/lib/libxpuapi.so)
+    
+        ADD_LIBRARY(xpurt SHARED IMPORTED GLOBAL)
+        SET_PROPERTY(TARGET xpurt PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xpu/lib/libxpurt.so)
+    endif()
 endif()
 
 ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
@@ -132,7 +156,14 @@ LIST(APPEND external_project_dependencies paddle)
 LIST(APPEND paddle_depend_libs
     xxhash)
 
+if(WITH_LITE)
+    LIST(APPEND paddle_depend_libs paddle_api_full_bundled)
+    if(WITH_XPU)
+        LIST(APPEND paddle_depend_libs xpuapi xpurt)
+    endif()
+endif()
+
 if(WITH_TRT)
-LIST(APPEND paddle_depend_libs
-    nvinfer nvinfer_plugin)
+    LIST(APPEND paddle_depend_libs
+        nvinfer nvinfer_plugin)
 endif()
diff --git a/core/configure/CMakeLists.txt b/core/configure/CMakeLists.txt
index 8476192dd33c8fdf2583c3c5fc48b8d3e0ba0b9e..8e2b62eb64549bbd2b60f6e744eca3245f884bac 100644
--- a/core/configure/CMakeLists.txt
+++ b/core/configure/CMakeLists.txt
@@ -27,6 +27,8 @@ install(FILES ${inc}
         DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure)
 endif()
 
+if (WITH_PYTHON)
+
 py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto)
 add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(general_model_config_py_proto general_model_config_py_proto_init)
@@ -70,7 +72,7 @@ if (SERVER)
 py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto)
 add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(server_config_py_proto server_config_py_proto_init)
-if (NOT WITH_GPU)
+if (NOT WITH_GPU AND NOT WITH_LITE)
 add_custom_command(TARGET server_config_py_proto POST_BUILD
 		COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
 		COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
@@ -114,3 +116,5 @@ add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
                 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
 endif()
 endif()
+
+endif()
diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto
index c008ee857bb7c69672e399ce44b2420d5db7fb3c..ea03d44f2cf3ff42b3b603ff9ddca7127fe8c15a 100644
--- a/core/configure/proto/server_configure.proto
+++ b/core/configure/proto/server_configure.proto
@@ -45,6 +45,8 @@ message EngineDesc {
   optional bool force_update_static_cache = 15;
   optional bool enable_ir_optimization = 16;
   optional bool use_trt = 17;
+  optional bool use_lite = 18;
+  optional bool use_xpu = 19;
 };
 
 // model_toolkit conf
diff --git a/core/general-server/CMakeLists.txt b/core/general-server/CMakeLists.txt
index aa1b7badc9140301d84bdbd94b3324b52176e837..be6c3477551cb71c3499f6a6c713dd44600b7d58 100644
--- a/core/general-server/CMakeLists.txt
+++ b/core/general-server/CMakeLists.txt
@@ -6,6 +6,11 @@ add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid cube-
 if (WITH_GPU)
     add_dependencies(serving fluid_gpu_engine)
 endif()
+
+if (WITH_LITE)
+    add_dependencies(serving fluid_arm_engine)
+endif()
+
 target_include_directories(serving PUBLIC
         ${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
         )
@@ -15,6 +20,11 @@ if(WITH_GPU)
             -Wl,--no-whole-archive)
 endif()
 
+if(WITH_LITE)
+    target_link_libraries(serving -Wl,--whole-archive fluid_arm_engine
+            -Wl,--no-whole-archive)
+endif()
+
 target_link_libraries(serving -Wl,--whole-archive fluid_cpu_engine
         -Wl,--no-whole-archive)
 
diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h
index 8a6b6707b26474200b57769908e91055b5479a41..ba0c18e06c298553af10836fd488c6cffcd92226 100644
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -38,6 +38,8 @@ class InferEngineCreationParams {
     _static_optimization = false;
     _force_update_static_cache = false;
     _use_trt = false;
+    _use_lite = false;
+    _use_xpu = false;
   }
 
   void set_path(const std::string& path) { _path = path; }
@@ -52,6 +54,10 @@ class InferEngineCreationParams {
 
   void set_use_trt(bool use_trt) { _use_trt = use_trt; }
 
+  void set_use_lite(bool use_lite) { _use_lite = use_lite; }
+
+  void set_use_xpu(bool use_xpu) { _use_xpu = use_xpu; }
+
   bool enable_memory_optimization() const {
     return _enable_memory_optimization;
   }
@@ -60,6 +66,10 @@ class InferEngineCreationParams {
 
   bool use_trt() const { return _use_trt; }
 
+  bool use_lite() const { return _use_lite; }
+
+  bool use_xpu() const { return _use_xpu; }
+
   void set_static_optimization(bool static_optimization = false) {
     _static_optimization = static_optimization;
   }
@@ -79,6 +89,9 @@ class InferEngineCreationParams {
               << "model_path = " << _path << ", "
               << "enable_memory_optimization = " << _enable_memory_optimization
               << ", "
+              << "enable_tensorrt = " << _use_trt << ", "
+              << "enable_lite = " << _use_lite << ", "
+              << "enable_xpu = " << _use_xpu << ", "
               << "enable_ir_optimization = " << _enable_ir_optimization << ", "
               << "static_optimization = " << _static_optimization << ", "
               << "force_update_static_cache = " << _force_update_static_cache;
@@ -91,6 +104,8 @@ class InferEngineCreationParams {
   bool _static_optimization;
   bool _force_update_static_cache;
   bool _use_trt;
+  bool _use_lite;
+  bool _use_xpu;
 };
 
 class InferEngine {
@@ -179,6 +194,14 @@ class ReloadableInferEngine : public InferEngine {
       _infer_engine_params.set_use_trt(conf.use_trt());
     }
 
+    if (conf.has_use_lite()) {
+      _infer_engine_params.set_use_lite(conf.use_lite());
+    }
+
+    if (conf.has_use_xpu()) {
+      _infer_engine_params.set_use_xpu(conf.use_xpu());
+    }
+
     if (!check_need_reload() || load(_infer_engine_params) != 0) {
       LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
       return -1;
diff --git a/paddle_inference/CMakeLists.txt b/paddle_inference/CMakeLists.txt
index dcc49b0c21ce97411a17f645f1de5bcad5f5dc73..4d41f87fbeffb26cf9fc0135f92499c080325e2f 100644
--- a/paddle_inference/CMakeLists.txt
+++ b/paddle_inference/CMakeLists.txt
@@ -13,8 +13,13 @@
 # limitations under the License
 
 if (NOT CLIENT_ONLY)
-add_subdirectory(inferencer-fluid-cpu)
-if (WITH_GPU)
-add_subdirectory(inferencer-fluid-gpu)
-endif()
+    add_subdirectory(inferencer-fluid-cpu)
+    
+    if (WITH_GPU)
+        add_subdirectory(inferencer-fluid-gpu)
+    endif()
+    
+    if (WITH_LITE)
+        add_subdirectory(inferencer-fluid-arm)
+    endif()
 endif()
diff --git a/paddle_inference/inferencer-fluid-arm/CMakeLists.txt b/paddle_inference/inferencer-fluid-arm/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf415d9e039e84ddef964c5a84fc79b5970ed41f
--- /dev/null
+++ b/paddle_inference/inferencer-fluid-arm/CMakeLists.txt
@@ -0,0 +1,10 @@
+FILE(GLOB fluid_arm_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
+add_library(fluid_arm_engine ${fluid_arm_engine_srcs})
+target_include_directories(fluid_arm_engine PUBLIC
+        ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
+add_dependencies(fluid_arm_engine pdserving extern_paddle configure)
+target_link_libraries(fluid_arm_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
+
+install(TARGETS fluid_arm_engine 
+        ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
+        )
diff --git a/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h b/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h
new file mode 100644
index 0000000000000000000000000000000000000000..92408cdacc581f7f9323840b87518df8ab8136ed
--- /dev/null
+++ b/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h
@@ -0,0 +1,289 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <pthread.h>
+#include <fstream>
+#include <map>
+#include <string>
+#include <vector>
+#include "core/configure/include/configure_parser.h"
+#include "core/configure/inferencer_configure.pb.h"
+#include "core/predictor/framework/infer.h"
+#include "paddle_inference_api.h"  // NOLINT
+
+namespace baidu {
+namespace paddle_serving {
+namespace fluid_arm {
+
+class AutoLock {
+ public:
+  explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
+    pthread_mutex_lock(&mutex);
+  }
+
+  ~AutoLock() { pthread_mutex_unlock(&_mut); }
+
+ private:
+  pthread_mutex_t& _mut;
+};
+
+class GlobalPaddleCreateMutex {
+ public:
+  pthread_mutex_t& mutex() { return _mut; }
+
+  static pthread_mutex_t& instance() {
+    static GlobalPaddleCreateMutex gmutex;
+    return gmutex.mutex();
+  }
+
+ private:
+  GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
+
+  pthread_mutex_t _mut;
+};
+
+using paddle_infer::Config;
+using paddle_infer::Predictor;
+using paddle_infer::Tensor;
+using paddle_infer::PrecisionType;
+using paddle_infer::CreatePredictor;
+
+// data interface
+class FluidFamilyCore {
+ public:
+  virtual ~FluidFamilyCore() {}
+  virtual std::vector<std::string> GetInputNames() {
+    return _core->GetInputNames();
+  }
+
+  virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
+    return _core->GetInputHandle(name);
+  }
+
+  virtual std::vector<std::string> GetOutputNames() {
+    return _core->GetOutputNames();
+  }
+
+  virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
+    return _core->GetOutputHandle(name);
+  }
+
+  virtual bool Run() {
+    if (!_core->Run()) {
+      LOG(ERROR) << "Failed call Run with paddle predictor";
+      return false;
+    }
+    return true;
+  }
+
+  virtual int create(const predictor::InferEngineCreationParams& params) = 0;
+
+  virtual int clone(void* origin_core) {
+    if (origin_core == NULL) {
+      LOG(ERROR) << "origin paddle Predictor is null.";
+      return -1;
+    }
+    Predictor* p_predictor = (Predictor*)origin_core;
+    _core = p_predictor->Clone();
+    if (_core.get() == NULL) {
+      LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
+      return -1;
+    }
+    return 0;
+  }
+
+  virtual void* get() { return _core.get(); }
+
+ protected:
+  std::shared_ptr<Predictor> _core;
+};
+
+// infer interface
+class FluidArmAnalysisCore : public FluidFamilyCore {
+ public:
+  int create(const predictor::InferEngineCreationParams& params) {
+    std::string data_path = params.get_path();
+    if (access(data_path.c_str(), F_OK) == -1) {
+      LOG(ERROR) << "create paddle predictor failed, path not exits: "
+                 << data_path;
+      return -1;
+    }
+
+    Config config;
+    config.SetParamsFile(data_path + "/__params__");
+    config.SetProgFile(data_path + "/__model__");
+    config.DisableGpu();
+    config.SetCpuMathLibraryNumThreads(1);
+
+    if (params.enable_memory_optimization()) {
+      config.EnableMemoryOptim();
+    }
+
+    if (params.enable_memory_optimization()) {
+      config.EnableMemoryOptim();
+    }
+
+    if (params.use_lite()) {
+      config.EnableLiteEngine(PrecisionType::kFloat32, true);
+    }
+
+    if (params.use_xpu()) {
+      config.EnableXpu(100);
+    }
+
+    config.SwitchSpecifyInputNames(true);
+    AutoLock lock(GlobalPaddleCreateMutex::instance());
+    _core = CreatePredictor(config);
+    if (NULL == _core.get()) {
+      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
+      return -1;
+    }
+
+    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
+    return 0;
+  }
+};
+
+class FluidArmAnalysisDirCore : public FluidFamilyCore {
+ public:
+  int create(const predictor::InferEngineCreationParams& params) {
+    std::string data_path = params.get_path();
+    if (access(data_path.c_str(), F_OK) == -1) {
+      LOG(ERROR) << "create paddle predictor failed, path not exits: "
+                 << data_path;
+      return -1;
+    }
+
+    Config config;
+    config.SetModel(data_path);
+    config.DisableGpu();
+    config.SwitchSpecifyInputNames(true);
+    config.SetCpuMathLibraryNumThreads(1);
+
+    if (params.enable_memory_optimization()) {
+      config.EnableMemoryOptim();
+    }
+
+    if (params.enable_ir_optimization()) {
+      config.SwitchIrOptim(true);
+    } else {
+      config.SwitchIrOptim(false);
+    }
+
+    if (params.use_lite()) {
+      config.EnableLiteEngine(PrecisionType::kFloat32, true);
+    }
+
+    if (params.use_xpu()) {
+      config.EnableXpu(100);
+    }
+
+    AutoLock lock(GlobalPaddleCreateMutex::instance());
+    _core = CreatePredictor(config);
+    if (NULL == _core.get()) {
+      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
+      return -1;
+    }
+
+    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
+    return 0;
+  }
+};
+
+class Parameter {
+ public:
+  Parameter() : _row(0), _col(0), _params(NULL) {}
+  ~Parameter() {
+    VLOG(2) << "before destroy Parameter, file_name[" << _file_name << "]";
+    destroy();
+  }
+
+  int init(int row, int col, const char* file_name) {
+    destroy();
+    _file_name = file_name;
+    _row = row;
+    _col = col;
+    _params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
+    if (_params == NULL) {
+      LOG(ERROR) << "Load " << _file_name << " malloc error.";
+      return -1;
+    }
+    VLOG(2) << "Load parameter file[" << _file_name << "] success.";
+    return 0;
+  }
+
+  void destroy() {
+    _row = 0;
+    _col = 0;
+    if (_params != NULL) {
+      free(_params);
+      _params = NULL;
+    }
+  }
+
+  int load() {
+    if (_params == NULL || _row <= 0 || _col <= 0) {
+      LOG(ERROR) << "load parameter error [not inited].";
+      return -1;
+    }
+
+    FILE* fs = fopen(_file_name.c_str(), "rb");
+    if (fs == NULL) {
+      LOG(ERROR) << "load " << _file_name << " fopen error.";
+      return -1;
+    }
+    static const uint32_t MODEL_FILE_HEAD_LEN = 16;
+    char head[MODEL_FILE_HEAD_LEN] = {0};
+    if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
+      destroy();
+      LOG(ERROR) << "Load " << _file_name << " read head error.";
+      if (fs != NULL) {
+        fclose(fs);
+        fs = NULL;
+      }
+      return -1;
+    }
+
+    uint32_t matrix_size = _row * _col;
+    if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
+      if (fs != NULL) {
+        fclose(fs);
+        fs = NULL;
+      }
+      VLOG(2) << "load " << _file_name << " read ok.";
+      return 0;
+    } else {
+      LOG(ERROR) << "load " << _file_name << " read error.";
+      destroy();
+      if (fs != NULL) {
+        fclose(fs);
+        fs = NULL;
+      }
+      return -1;
+    }
+    return 0;
+  }
+
+ public:
+  std::string _file_name;
+  int _row;
+  int _col;
+  float* _params;
+};
+
+}  // namespace fluid_arm
+}  // namespace paddle_serving
+}  // namespace baidu
diff --git a/paddle_inference/inferencer-fluid-arm/src/fluid_arm_engine.cpp b/paddle_inference/inferencer-fluid-arm/src/fluid_arm_engine.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2c853c63b135b14939a9938ddeec779d54484393
--- /dev/null
+++ b/paddle_inference/inferencer-fluid-arm/src/fluid_arm_engine.cpp
@@ -0,0 +1,35 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h"
+#include "core/predictor/framework/factory.h"
+
+namespace baidu {
+namespace paddle_serving {
+namespace fluid_arm {
+
+REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
+    ::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmAnalysisCore>,
+    ::baidu::paddle_serving::predictor::InferEngine,
+    "FLUID_ARM_ANALYSIS");
+
+REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
+    ::baidu::paddle_serving::predictor::FluidInferEngine<
+        FluidArmAnalysisDirCore>,
+    ::baidu::paddle_serving::predictor::InferEngine,
+    "FLUID_ARM_ANALYSIS_DIR");
+
+}  // namespace fluid_arm
+}  // namespace paddle_serving
+}  // namespace baidu
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 23e0b6b507f53f1ab60a32854891b79b377638ce..f3762df4616e7e971772b2955954af946132329f 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -7,7 +7,7 @@ if (CLIENT)
 endif()
 
 if (SERVER)
-    if (NOT WITH_GPU)
+    if (NOT WITH_GPU AND NOT WITH_LITE)
         file(INSTALL pipeline DESTINATION paddle_serving_server)
         file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
     else()
@@ -34,7 +34,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in
 endif()
 
 if (SERVER)
-    if (NOT WITH_GPU)
+    if (NOT WITH_GPU AND NOT WITH_LITE)
         configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in
             ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
     else()
@@ -72,7 +72,7 @@ add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINA
 endif()
 
 if (SERVER)
-    if(NOT WITH_GPU)
+    if(NOT WITH_GPU AND NOT WITH_LITE)
         add_custom_command(
             OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
             COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/
@@ -90,6 +90,16 @@ if (SERVER)
             COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
             DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
         add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
+    elseif(WITH_LITE)
+        add_custom_command(
+            OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
+            COMMAND cp -r
+            ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
+            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
+            "server_gpu" arm 
+            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
+            DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
+        add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
     else()
         add_custom_command(
             OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py
index c734e308f07a5e1d1ea74f430aa2ffb2e2a4244b..2a2fcabea89f2e44fad963faace696d7d0af5c93 100644
--- a/python/paddle_serving_app/local_predict.py
+++ b/python/paddle_serving_app/local_predict.py
@@ -57,6 +57,8 @@ class LocalPredictor(object):
                           mem_optim=True,
                           ir_optim=False,
                           use_trt=False,
+                          use_lite=False,
+                          use_xpu=False,
                           use_feed_fetch_ops=False):
         """
         Load model config and set the engine config for the paddle predictor
@@ -70,6 +72,8 @@ class LocalPredictor(object):
             mem_optim: memory optimization, True default.
             ir_optim: open calculation chart optimization, False default.
             use_trt: use nvidia TensorRT optimization, False default
+            use_lite: use Paddle-Lite Engint, False default
+            use_xpu: run predict on Baidu Kunlun, False default
             use_feed_fetch_ops: use feed/fetch ops, False default.
         """
         client_config = "{}/serving_server_conf.prototxt".format(model_path)
@@ -80,9 +84,9 @@ class LocalPredictor(object):
         config = AnalysisConfig(model_path)
         logger.info("load_model_config params: model_path:{}, use_gpu:{},\
             gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{},\
-            use_trt:{}, use_feed_fetch_ops:{}".format(
+            use_trt:{}, use_lite:{}, use_xpu: {}, use_feed_fetch_ops:{}".format(
             model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim,
-            ir_optim, use_trt, use_feed_fetch_ops))
+            ir_optim, use_trt, use_lite, use_xpu, use_feed_fetch_ops))
 
         self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
         self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
@@ -119,6 +123,17 @@ class LocalPredictor(object):
                     use_static=False,
                     use_calib_mode=False)
 
+        if use_lite:
+            config.enable_lite_engine(
+                precision_mode = PrecisionType.Float32,
+                zero_copy = True,
+                passes_filter = [],
+                ops_filter = []
+            )
+
+        if use_xpu:
+            config.enable_xpu(100 * 1024 * 1024)
+
         self.predictor = create_paddle_predictor(config)
 
     def predict(self, feed=None, fetch=None, batch=False, log_id=0):
diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py
index eec5d0a4a7e35bb735a776bb244a00c3a0c39d9f..13f6a61c600995be95b051d3b2691ae68e5e788e 100644
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -77,6 +77,10 @@ def serve_args():
         help="Use Multi-language-service")
     parser.add_argument(
         "--use_trt", default=False, action="store_true", help="Use TensorRT")
+    parser.add_argument(
+        "--use_lite", default=False, action="store_true", help="Use PaddleLite")
+    parser.add_argument(
+        "--use_xpu", default=False, action="store_true", help="Use XPU")
     parser.add_argument(
         "--product_name",
         type=str,
@@ -210,6 +214,8 @@ class Server(object):
         self.use_local_bin = False
         self.gpuid = 0
         self.use_trt = False
+        self.use_lite = False
+        self.use_xpu = False
         self.model_config_paths = None  # for multi-model in a workflow
         self.product_name = None
         self.container_id = None
@@ -279,6 +285,12 @@ class Server(object):
     def set_trt(self):
         self.use_trt = True
 
+    def set_lite(self):
+        self.use_lite = True
+
+    def set_xpu(self):
+        self.use_xpu = True
+
     def _prepare_engine(self, model_config_paths, device):
         if self.model_toolkit_conf == None:
             self.model_toolkit_conf = server_sdk.ModelToolkitConf()
@@ -299,11 +311,17 @@ class Server(object):
             engine.static_optimization = False
             engine.force_update_static_cache = False
             engine.use_trt = self.use_trt
+            engine.use_lite = self.use_lite
+            engine.use_xpu = self.use_xpu
+
+
 
             if device == "cpu":
                 engine.type = "FLUID_CPU_ANALYSIS_DIR"
             elif device == "gpu":
                 engine.type = "FLUID_GPU_ANALYSIS_DIR"
+            elif device == "arm":
+                engine.type = "FLUID_ARM_ANALYSIS_DIR"
 
             self.model_toolkit_conf.engines.extend([engine])
 
@@ -405,10 +423,12 @@ class Server(object):
         for line in version_file.readlines():
             if re.match("cuda_version", line):
                 cuda_version = line.split("\"")[1]
-                if cuda_version != "trt":
-                    device_version = "serving-gpu-cuda" + cuda_version + "-"
-                else:
+                if cuda_version == "trt":
                     device_version = "serving-gpu-" + cuda_version + "-"
+                elif cuda_version == "arm":
+                    device_version = "serving-" + cuda_version + "-"
+                else:
+                    device_version = "serving-gpu-cuda" + cuda_version + "-"
 
         folder_name = device_version + serving_server_version
         tar_name = folder_name + ".tar.gz"
@@ -507,36 +527,65 @@ class Server(object):
                 time.sleep(1)
         else:
             print("Use local bin : {}".format(self.bin_path))
-        self.check_cuda()
-        command = "{} " \
-                  "-enable_model_toolkit " \
-                  "-inferservice_path {} " \
-                  "-inferservice_file {} " \
-                  "-max_concurrency {} " \
-                  "-num_threads {} " \
-                  "-port {} " \
-                  "-reload_interval_s {} " \
-                  "-resource_path {} " \
-                  "-resource_file {} " \
-                  "-workflow_path {} " \
-                  "-workflow_file {} " \
-                  "-bthread_concurrency {} " \
-                  "-gpuid {} " \
-                  "-max_body_size {} ".format(
-                      self.bin_path,
-                      self.workdir,
-                      self.infer_service_fn,
-                      self.max_concurrency,
-                      self.num_threads,
-                      self.port,
-                      self.reload_interval_s,
-                      self.workdir,
-                      self.resource_fn,
-                      self.workdir,
-                      self.workflow_fn,
-                      self.num_threads,
-                      self.gpuid,
-                      self.max_body_size)
+        #self.check_cuda()
+        if self.use_lite:
+            command = "{} " \
+                      "-enable_model_toolkit " \
+                      "-inferservice_path {} " \
+                      "-inferservice_file {} " \
+                      "-max_concurrency {} " \
+                      "-num_threads {} " \
+                      "-port {} " \
+                      "-reload_interval_s {} " \
+                      "-resource_path {} " \
+                      "-resource_file {} " \
+                      "-workflow_path {} " \
+                      "-workflow_file {} " \
+                      "-bthread_concurrency {} " \
+                      "-max_body_size {} ".format(
+                          self.bin_path,
+                          self.workdir,
+                          self.infer_service_fn,
+                          self.max_concurrency,
+                          self.num_threads,
+                          self.port,
+                          self.reload_interval_s,
+                          self.workdir,
+                          self.resource_fn,
+                          self.workdir,
+                          self.workflow_fn,
+                          self.num_threads,
+                          self.max_body_size)
+        else:
+            command = "{} " \
+                      "-enable_model_toolkit " \
+                      "-inferservice_path {} " \
+                      "-inferservice_file {} " \
+                      "-max_concurrency {} " \
+                      "-num_threads {} " \
+                      "-port {} " \
+                      "-reload_interval_s {} " \
+                      "-resource_path {} " \
+                      "-resource_file {} " \
+                      "-workflow_path {} " \
+                      "-workflow_file {} " \
+                      "-bthread_concurrency {} " \
+                      "-gpuid {} " \
+                      "-max_body_size {} ".format(
+                          self.bin_path,
+                          self.workdir,
+                          self.infer_service_fn,
+                          self.max_concurrency,
+                          self.num_threads,
+                          self.port,
+                          self.reload_interval_s,
+                          self.workdir,
+                          self.resource_fn,
+                          self.workdir,
+                          self.workflow_fn,
+                          self.num_threads,
+                          self.gpuid,
+                          self.max_body_size)
         print("Going to Run Comand")
         print(command)
 
diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py
index c2b170fbeb3f9ee772e86c216fe3776f34187743..ffa4c2336fd4307f67fd2f3578a1aa3102850ce9 100644
--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -38,7 +38,9 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
     ir_optim = args.ir_optim
     max_body_size = args.max_body_size
     use_multilang = args.use_multilang
-    workdir = "{}_{}".format(args.workdir, gpuid)
+    workdir = args.workdir
+    if gpuid >= 0:
+        workdir = "{}_{}".format(args.workdir, gpuid)
 
     if model == "":
         print("You must specify your serving model")
@@ -67,6 +69,13 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
     if args.use_trt:
         server.set_trt()
 
+    if args.use_lite:
+        server.set_lite()
+        device = "arm"
+
+    if args.use_xpu:
+        server.set_xpu()
+
     if args.product_name != None:
         server.set_product_name(args.product_name)
     if args.container_id != None:
@@ -95,7 +104,10 @@ def start_multi_card(args):  # pylint: disable=doc-string-missing
                     exit(-1)
         else:
             env_gpus = []
-    if len(gpus) <= 0:
+    if args.use_lite:
+        print("run arm server.")
+        start_gpu_card_model(-1, -1, args)
+    elif len(gpus) <= 0:
         print("gpu_ids not set, going to run cpu service.")
         start_gpu_card_model(-1, -1, args)
     else:
@@ -128,7 +140,8 @@ if __name__ == "__main__":
         if len(gpu_ids) > 0:
             web_service.set_gpus(gpu_ids)
         web_service.prepare_server(
-            workdir=args.workdir, port=args.port, device=args.device)
+            workdir=args.workdir, port=args.port, device=args.device,
+            use_lite=args.use_lite, use_xpu=args.use_xpu, ir_optim=args.ir_optim)
         web_service.run_rpc_service()
 
         app_instance = Flask(__name__)
diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py
index 8389f92cbfda7a209ff0fe4a77497ba2db1dbe1f..4b89d90ee6893c3fafd596dc8f6c5cabc3a248bf 100644
--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -83,10 +83,15 @@ class WebService(object):
                             gpuid=0,
                             thread_num=2,
                             mem_optim=True,
+                            use_lite=False,
+                            use_xpu=False,
                             ir_optim=False):
         device = "gpu"
         if gpuid == -1:
-            device = "cpu"
+            if use_lite:
+                device = "arm"
+            else:
+                device = "cpu"
         op_maker = serving.OpMaker()
         read_op = op_maker.create('general_reader')
         general_infer_op = op_maker.create('general_infer')
@@ -103,6 +108,11 @@ class WebService(object):
         server.set_memory_optimize(mem_optim)
         server.set_ir_optimize(ir_optim)
 
+        if use_lite:
+            server.set_lite()
+        if use_xpu:
+            server.set_xpu()
+
         server.load_model_config(self.model_config)
         if gpuid >= 0:
             server.set_gpuid(gpuid)
@@ -125,9 +135,11 @@ class WebService(object):
                        workdir="",
                        port=9393,
                        device="gpu",
+                       use_lite=False,
+                       use_xpu=False,
+                       ir_optim=False,
                        gpuid=0,
-                       mem_optim=True,
-                       ir_optim=False):
+                       mem_optim=True):
         print("This API will be deprecated later. Please do not use it")
         self.workdir = workdir
         self.port = port
@@ -150,6 +162,8 @@ class WebService(object):
                     -1,
                     thread_num=2,
                     mem_optim=mem_optim,
+                    use_lite=use_lite,
+                    use_xpu=use_xpu,
                     ir_optim=ir_optim))
         else:
             for i, gpuid in enumerate(self.gpus):
@@ -160,6 +174,8 @@ class WebService(object):
                         gpuid,
                         thread_num=2,
                         mem_optim=mem_optim,
+                        use_lite=use_lite,
+                        use_xpu=use_xpu,
                         ir_optim=ir_optim))
 
     def _launch_web_service(self):
diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py
index a73627b69a37325b9895fa8a3217314d0371f539..f519ca2d115128bc6a6e5778dba992bc82bda5c1 100644
--- a/python/pipeline/local_service_handler.py
+++ b/python/pipeline/local_service_handler.py
@@ -44,6 +44,8 @@ class LocalServiceHandler(object):
                  ir_optim=False,
                  available_port_generator=None,
                  use_trt=False,
+                 use_lite=False,
+                 use_xpu=False,
                  use_profile=False):
         """
         Initialization of localservicehandler
@@ -60,6 +62,8 @@ class LocalServiceHandler(object):
            ir_optim: use calculation chart optimization, False default.
            available_port_generator: generate available ports
            use_trt: use nvidia tensorRt engine, False default.
+           use_lite: use Paddle-Lite engine, False default.
+           use_xpu: run predict on Baidu Kunlun, False default.
            use_profile: use profiling, False default.
 
         Returns:
@@ -74,10 +78,16 @@ class LocalServiceHandler(object):
         if devices == "":
             # cpu
             devices = [-1]
-            self._device_type = "cpu"
-            self._port_list.append(available_port_generator.next())
-            _LOGGER.info("Model({}) will be launch in cpu device. Port({})"
-                         .format(model_config, self._port_list))
+            if use_lite:
+                self._device_type = "arm"
+                self._port_list.append(available_port_generator.next())
+                _LOGGER.info("Model({}) will be launch in arm device. Port({})"
+                             .format(model_config, self._port_list))
+            else:
+                self._device_type = "cpu"
+                self._port_list.append(available_port_generator.next())
+                _LOGGER.info("Model({}) will be launch in cpu device. Port({})"
+                             .format(model_config, self._port_list))
         else:
             # gpu
             self._device_type = "gpu"
@@ -96,6 +106,8 @@ class LocalServiceHandler(object):
         self._rpc_service_list = []
         self._server_pros = []
         self._use_trt = use_trt
+        self._use_lite = use_lite
+        self._use_xpu = use_xpu
         self._use_profile = use_profile
         self.fetch_names_ = fetch_names
 
@@ -138,8 +150,11 @@ class LocalServiceHandler(object):
         if self._local_predictor_client is None:
             self._local_predictor_client = LocalPredictor()
             use_gpu = False
+            use_lite = False
             if self._device_type == "gpu":
                 use_gpu = True
+            elif self._device_type == "arm":
+                use_lite = True
             self._local_predictor_client.load_model_config(
                 model_path=self._model_config,
                 use_gpu=use_gpu,
@@ -148,7 +163,9 @@ class LocalServiceHandler(object):
                 thread_num=self._thread_num,
                 mem_optim=self._mem_optim,
                 ir_optim=self._ir_optim,
-                use_trt=self._use_trt)
+                use_trt=self._use_trt,
+                use_lite=use_lite,
+                use_xpu=self._use_xpu)
         return self._local_predictor_client
 
     def get_client_config(self):
@@ -185,7 +202,7 @@ class LocalServiceHandler(object):
 
             server = Server()
         else:
-            #gpu
+            #gpu or arm
             from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
             op_maker = OpMaker()
             read_op = op_maker.create('general_reader')
diff --git a/python/setup.py.app.in b/python/setup.py.app.in
index 8480ed8471e60c7e7eb8f14bf11a1cc2d23204cf..5cb2d137c8491e461e8b3149e8faf7c82512020a 100644
--- a/python/setup.py.app.in
+++ b/python/setup.py.app.in
@@ -32,7 +32,7 @@ if '${PACK}' == 'ON':
 
 
 REQUIRED_PACKAGES = [
-    'six >= 1.10.0', 'sentencepiece<=0.1.92', 'opencv-python<=4.2.0.32', 'pillow',
+    'six >= 1.10.0', 'sentencepiece', 'opencv-python', 'pillow',
     'pyclipper'
 ]