add train_demo, test=develop (#3271)

* add train_demo, test=develop

add train_demo, test=develop (#3271)
* add train_demo, test=develop
ba1b1fca · mapingshuo · GitHub · 17a1a5fc · ba1b1fca · ba1b1fca
9 changed file
--- a/lite/demo/cxx/train_demo/README.md
+++ b/lite/demo/cxx/train_demo/README.md
+
+# Introduction
+  我们都知道，PaddleLite可以做移动端预测，事实上PaddleLite支持在移动端做模型训练。本文给出使用PaddleLite做训练的例子，这一例子对应的任务是“波士顿房价预测”，又称作“fit-a-line”。
+  
+  你可以通过book库中的
+[文档](https://paddlepaddle.org.cn/documentation/docs/zh/user_guides/simple_case/fit_a_line/README.cn.html)
+和
+[源码](https://github.com/PaddlePaddle/book/tree/develop/01.fit_a_line)
+进一步了解“波士顿房价预测”这一任务的定义及其建模过程，
+其使用线性回归（Linear Regression）
+模型做建模。本文主要介绍如何将其迁移至Paddle-Lite进行训练。
+
+注：这是一篇使用C++ API做模型训练的教程，其他API暂时不支持训练功能。
+
+# Requirements
+
+- 一部安卓手机，用于运行训练程序
+- 装了Paddle (version: 1.7.0) 的python
+
+# Quick start
+
+## Step1 build paddle-lite
+
+请按照[paddle-lite官方文档](https://paddle-lite.readthedocs.io/zh/latest/user_guides/source_compile.html#paddlelite) 的教程编译full_publish的paddle-lite lib。以Linux上编译为例，其具体的命令为：
+
+```shell
+## 配置环境
+wget -c https://mms-res.cdn.bcebos.com/cmake-3.10.3-Linux-x86_64.tar.gz --no-check-certificate
+tar xzf cmake-3.10.3-Linux-x86_64.tar.gz
+export PATH=${PWD}'/cmake-3.10.3-Linux-x86_64/bin':$PATH
+
+wget https://dl.google.com/android/repository/android-ndk-r17c-linux-x86_64.zip
+unzip android-ndk-r17c-linux-x86_64.zip
+export NDK_ROOT=/opt/android-ndk-r17c
+
+## 编译
+git clone https://github.com/PaddlePaddle/Paddle-Lite.git
+cd Paddle-Lite
+./lite/tools/build.sh \
+  --arm_os=android \
+  --arm_abi=armv7 \
+  --build_extra=ON \
+  --arm_lang=gcc \
+  --android_stl=c++_static \
+  --build_train=ON full_publish
+```
+
+产物:
+
+```shell
+Paddle-Lite/build.lite.android.armv7.gcc/inference_lite_lib.android.armv7/cxx/lib/libpaddle_full_api_shared.so
+```
+
+## Step2 编译lr_trainer
+
+```shell
+cd Paddle-Lite/lite/demo/cxx/train_demo/cplus_train/
+sh run_build.sh /path/to/your/Paddle-Lite/build.lite.android.armv7.gcc/ /path/to/your/android-ndk-r17c
+```
+
+产物:
+```shell
+bin/
+`-- demo_trainer
+```
+
+## Step3 download model and run it!
+
+在你的笔记本电脑上，用usb连接到手机，开启开发者模式，在任意目录下执行：
+
+```shell
+local_path=/data/local/tmp/linear_regression
+adb shell "mkdir "${local_path}
+
+# download model and push to mobile
+wget http://paddle-tar.bj.bcebos.com/paddle-lite/lite_lr_model.tar.gz
+tar -zxvf lite_lr_model.tar.gz
+adb push lite_lr_model/housing.data ${local_path}
+adb push lite_lr_model/model_dir ${local_path}
+
+# push lib and executable file to moblie
+adb push libpaddle_full_api_shared.so ${local_path}
+adb push demo_trainer ${local_path}
+adb shell chmod +x ${local_path}/demo_trainer
+
+# run it!
+adb shell "export LD_LIBRARY_PATH="${local_path}" && export LIBRARY_PATH="${local_path}" && cd "${local_path}" && ./demo_trainer true"
+```
+
+期望结果：
+
+```
+sample 0: Loss: 564.317
+sample 1: Loss: 463.9
+sample 2: Loss: 1197.54
+sample 3: Loss: 1093.83
+sample 4: Loss: 1282.76
+sample 5: Loss: 792.097
+sample 6: Loss: 491.776
+sample 7: Loss: 698.496
+sample 8: Loss: 248.445
+sample 9: Loss: 325.135
+```
+
+# 更多细节
+上面提到的模型是直接下载得到的，如果你想自己生成，可以执行以下命令：
+
+```shell
+git clone https://github.com/PaddlePaddle/Paddle-Lite.git
+cd Paddle-Lite/lite/demo/cxx/train_demo/
+python train.py --save_model
+```
+
+产物：
+
+```shell
+model_dir/
+|-- fc_0.b_0
+|-- fc_0.w_0
+|-- learning_rate_0
+`-- __model__
+
+md5sum fc_0.w_0: 2c7b3649b2a9cf7bcd19f8b256ce795d
+```
+
+如果你想生成自己的模型用于训练，可以参考`train.py`中保存模型的方式。
+
+# 与Paddle训练结果做校对
+
+## 前10个Loss值
+
+为了验证paddle与lite的一致性，我们控制模型参数一致、数据一致、batch size = 1的情况下，训练10个batch， 记录了二者的loss值。
+
+python + paddle 命令:
+
+```shell
+  fluid train.py --num_steps=10 --batch_size=1
+```
+
+python + paddle 结果:
+
+```shell
+Train cost, Step 0, Cost 564.317017
+Train cost, Step 1, Cost 463.900238
+Train cost, Step 2, Cost 1197.537354
+Train cost, Step 3, Cost 1093.833008
+Train cost, Step 4, Cost 1282.760254
+Train cost, Step 5, Cost 792.097351
+Train cost, Step 6, Cost 491.775848
+Train cost, Step 7, Cost 698.496033
+Train cost, Step 8, Cost 248.444885
+Train cost, Step 9, Cost 325.135132
+```
+
+c++ 与 paddle-lite命令：
+```
+./demo_trainer true
+```
+
+c++ 与 paddle-lite结果：
+```
+sample 0: Loss: 564.317
+sample 1: Loss: 463.9
+sample 2: Loss: 1197.54
+sample 3: Loss: 1093.83
+sample 4: Loss: 1282.76
+sample 5: Loss: 792.097
+sample 6: Loss: 491.776
+sample 7: Loss: 698.496
+sample 8: Loss: 248.445
+sample 9: Loss: 325.135
+```
+
+## Loss 曲线
+
+控制训练时的batch size为20，每个epoch对训练数据做全局shuffle，训练100个epoch后，paddle和lite的loss曲线对比如下。
+
+![lr_loss](image/lr_loss.png)
+
+如果想复现上述效果，paddle+python的运行命令为：
+
+```
+git clone https://github.com/PaddlePaddle/book.git
+cd book/01.fit_a_line
+python train.py
+```
+
+lite + c++的运行命令为：
+```
+./demo_trainer false
+```
--- a/lite/demo/cxx/train_demo/cplus_train/CMakeLists.txt
+++ b/lite/demo/cxx/train_demo/cplus_train/CMakeLists.txt
+cmake_minimum_required(VERSION 2.8)
+set (CMAKE_CXX_STANDARD 11)
+
+# Project's name
+
+if(NOT DEFINED LITE_ROOT)
+  message(FATAL_ERROR "please set LITE_ROOT with
+                 -DLITE_ROOT=/path/to/your/build.lite.android.armv7.gcc/")
+endif()
+
+project(demo_trainer)
+# Set the output folder where your program will be created
+set(CMAKE_BINARY_DIR ${CMAKE_SOURCE_DIR}/bin)
+set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR})
+set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR})
+
+# The following folder will be included
+include_directories("include")
+include_directories("${LITE_ROOT}/inference_lite_lib.android.armv7/cxx/include")
+
+add_executable(demo_trainer ${PROJECT_SOURCE_DIR}/demo_trainer.cc ${PROJECT_SOURCE_DIR}/data_reader.cc)
+
+TARGET_LINK_LIBRARIES(demo_trainer
+"${LITE_ROOT}/inference_lite_lib.android.armv7/cxx/lib/libpaddle_full_api_shared.so")
--- a/lite/demo/cxx/train_demo/cplus_train/data_reader.cc
+++ b/lite/demo/cxx/train_demo/cplus_train/data_reader.cc
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/data_reader.h"
+#include <limits>
+
+using std::string;
+using std::vector;
+
+int FEATURE_NUM = 13;
+float rate = 0.8;
+
+int get_samples(string line, vector<float>* feature, float* label) {
+  std::istringstream reader(line);
+  std::vector<float> numbers;
+  do {
+    // read as many numbers as possible.
+    for (float number; reader >> number;) {
+      numbers.push_back(number);
+    }
+    // consume and discard token from stream.
+    if (reader.fail()) {
+      reader.clear();
+      std::string token;
+      reader >> token;
+    }
+  } while (!reader.eof());
+
+  assert(numbers.size() == FEATURE_NUM + 1);
+  for (int i = 0; i < FEATURE_NUM; i++) {
+    feature->push_back(numbers[i]);
+  }
+  *label = numbers[FEATURE_NUM];
+  return 0;
+}
+
+int normalize(const vector<vector<float>>& origin_features,
+              vector<vector<float>>* features,
+              float rate) {
+  int inf = std::numeric_limits<int>::max();
+  vector<float> min_vec(FEATURE_NUM, static_cast<float>(inf));
+  vector<float> max_vec(FEATURE_NUM, -(static_cast<float>(inf)));
+  vector<float> sum_vec(FEATURE_NUM, 0);
+  vector<float> avg_vec(FEATURE_NUM, 0);
+
+  for (int i = 0; i < origin_features.size(); i++) {
+    for (int j = 0; j < FEATURE_NUM; j++) {
+      min_vec[j] = min(min_vec[j], origin_features[i][j]);
+      max_vec[j] = max(max_vec[j], origin_features[i][j]);
+      sum_vec[j] += origin_features[i][j];
+    }
+  }
+
+  for (int i = 0; i < FEATURE_NUM; i++) {
+    avg_vec[i] = sum_vec[i] / origin_features.size();
+  }
+
+  for (int i = 0; i < origin_features.size() * rate - 1; i++) {
+    vector<float> feat;
+    for (int j = 0; j < FEATURE_NUM; j++) {
+      feat.push_back((origin_features[i][j] - avg_vec[j]) /
+                     (max_vec[j] - min_vec[j]));
+    }
+    features->push_back(feat);
+  }
+}
+
+int read_samples(const string fname,
+                 vector<vector<float>>* features,
+                 vector<float>* labels) {
+  fstream fin;
+  fin.open(fname);
+  if (!static_cast<bool>(fin)) {
+    return 1;
+  }
+  vector<vector<float>> origin_features;
+  vector<string> lines;
+  string line;
+  while (getline(fin, line)) {
+    lines.push_back(line);
+  }
+  fin.close();
+
+  for (int i = 0; i < lines.size(); i++) {
+    vector<float> feat;
+    float lbl = 0;
+    get_samples(lines[i], &feat, &lbl);
+    origin_features.push_back(feat);
+    if (i < lines.size() * rate - 1) {
+      labels->push_back(lbl);
+    }
+  }
+
+  cout << "finish read fata" << endl;
+  normalize(origin_features, features, rate);
+  assert(features->size() == labels->size());
+  return 0;
+}
--- a/lite/demo/cxx/train_demo/cplus_train/demo_trainer.cc
+++ b/lite/demo/cxx/train_demo/cplus_train/demo_trainer.cc
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <math.h>
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+#include <vector>
+#include "include/data_reader.h"
+#include "paddle_api.h"  // NOLINT
+
+using namespace paddle::lite_api;  // NOLINT
+
+class LRModel {
+ public:
+  void InitModel() {
+    // 1. Set CxxConfig
+    CxxConfig config;
+    config.set_model_dir("model_dir");
+    std::vector<Place> valid_places{Place{TARGET(kARM), PRECISION(kFloat)}};
+    config.set_valid_places(valid_places);
+    predictor_ = CreatePaddlePredictor<CxxConfig>(config);
+  }
+
+  float Predict(const vector<vector<float>>& features,
+                const vector<float>& labels) {
+    // Create Tensor
+    assert(features.size() == labels.size());
+    int batch_size = features.size();
+    std::unique_ptr<Tensor> input_tensor(std::move(predictor_->GetInput(0)));
+    input_tensor->Resize(shape_t({batch_size, FEATURE_NUM}));
+    auto* data = input_tensor->mutable_data<float>();
+    for (int i = 0; i < batch_size; i++) {
+      for (int j = 0; j < FEATURE_NUM; j++) {
+        data[FEATURE_NUM * i + j] = features[i][j];
+      }
+    }
+    std::unique_ptr<Tensor> y_tensor(std::move(predictor_->GetInput(1)));
+    y_tensor->Resize(shape_t({batch_size, 1}));
+    auto* y_data = y_tensor->mutable_data<float>();
+    for (int i = 0; i < batch_size; i++) {
+      y_data[i] = labels[i];
+    }
+    predictor_->Run();
+    std::unique_ptr<const Tensor> output_tensor(
+        std::move(predictor_->GetOutput(0)));
+    return output_tensor->data<float>()[0];
+  }
+
+ private:
+  std::shared_ptr<PaddlePredictor> predictor_;
+};
+
+int shuffle(vector<vector<float>>* features, vector<float>* labels) {
+  assert(features->size() == labels->size());
+  vector<int> index;
+  for (int i = 0; i < features->size(); i++) {
+    index.push_back(i);
+  }
+  random_shuffle(index.begin(), index.end());
+
+  vector<vector<float>> tmp_features;
+  vector<float> tmp_labels;
+
+  for (int i = 0; i < features->size(); i++) {
+    tmp_features.push_back((*features)[index[i]]);
+    tmp_labels.push_back((*labels)[index[i]]);
+  }
+
+  for (int i = 0; i < features->size(); i++) {
+    for (int j = 0; j < FEATURE_NUM; j++) {
+      (*features)[i][j] = tmp_features[i][j];
+    }
+    (*labels)[i] = tmp_labels[i];
+  }
+  return 0;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 2) {
+    cerr << "usage: ./demo_trainer is_small" << endl;
+    cerr << "       if is_small is true, the batch size is set to 1, " << endl;
+    cerr << "       and it will only runs for 10 steps." << endl;
+    return 1;
+  }
+  string is_small = argv[1];
+  vector<vector<float>> features;
+  vector<float> labels;
+  read_samples("housing.data", &features, &labels);
+  cout << "sample count: " << features.size() << " " << endl;
+
+  std::shared_ptr<LRModel> local_model(new LRModel());
+  local_model->InitModel();
+
+  if (is_small == "true") {
+    cout << "small mode" << endl;
+    for (int i; i < 10; i++) {
+      vector<vector<float>> batch_feature;
+      vector<float> batch_label;
+      batch_feature.push_back(features[i]);
+      batch_label.push_back(labels[i]);
+      auto loss = local_model->Predict(batch_feature, batch_label);
+      cout << "sample " << i << ": " << loss << endl;
+    }
+  } else if (is_small == "false") {
+    // shuffle
+    cout << "full model" << endl;
+    int epoch = 100;
+    int batch_size = 20;
+    int step = 0;
+    for (int i; i < epoch; i++) {
+      shuffle(&features, &labels);
+      for (int j = 0;
+           j < ceil(static_cast<float>(features.size()) / batch_size);
+           j++) {
+        int start_idx = j * batch_size;
+        int end_idx =
+            min((j + 1) * batch_size, static_cast<int>(features.size()));
+        auto batch_feature = vector<vector<float>>(features.begin() + start_idx,
+                                                   features.begin() + end_idx);
+        auto batch_label =
+            vector<float>(labels.begin() + start_idx, labels.begin() + end_idx);
+        auto loss = local_model->Predict(batch_feature, batch_label);
+        if (step % 10 == 0) {
+          std::cout << "batch: " << i << ", step: " << step
+                    << ", Loss: " << loss << endl;
+        }
+        step += 1;
+      }
+    }
+  } else {
+    cerr << "wrong arg for is_small: " << is_small << endl;
+  }
+}
--- a/lite/demo/cxx/train_demo/cplus_train/include/data_reader.h
+++ b/lite/demo/cxx/train_demo/cplus_train/include/data_reader.h
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <assert.h>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+using std::string;
+using std::vector;
+using std::cerr;
+using std::cout;
+using std::endl;
+using std::min;
+using std::max;
+using std::fstream;
+
+extern int FEATURE_NUM;
+
+int get_samples(string line, const vector<float>& feature, float* label);
+int read_samples(const string fname,
+                 vector<vector<float>>* features,
+                 vector<float>* labels);
--- a/lite/demo/cxx/train_demo/cplus_train/run_build.sh
+++ b/lite/demo/cxx/train_demo/cplus_train/run_build.sh
+
+rm -rf build
+mkdir build
+cd build
+
+LITE_ROOT=$1
+NDK_ROOT=$2
+
+
+cmake .. \
+         -DLITE_ROOT=${LITE_ROOT} \
+         -DNDK_ROOT=${NDK_ROOT} \
+         -DCMAKE_TOOLCHAIN_FILE=${NDK_ROOT}/build/cmake/android.toolchain.cmake \
+         -DANDROID_TOOLCHAIN=gcc \
+         -DANDROID_ABI="armeabi-v7a" \
+         -DANDROID_PLATFORM=android-23 \
+         -DANDROID=true \
+         -DANDROID_STL=c++_static
+make
+cd ..
+# ./bin/demo_trainer
--- a/lite/demo/cxx/train_demo/image/lr_loss.png
+++ b/lite/demo/cxx/train_demo/image/lr_loss.png
--- a/lite/demo/cxx/train_demo/train.py
+++ b/lite/demo/cxx/train_demo/train.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import sys
+import argparse
+
+import math
+import numpy
+
+import paddle
+import paddle.fluid as fluid
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("fit_a_line")
+    parser.add_argument(
+        '--save_model',
+        action='store_true',    
+        help="Whether to save main program")
+    parser.add_argument(
+        '--num_steps',
+        type=int, 
+        default=1000000000000,
+        help="train steps")
+    parser.add_argument(
+        '--num_epochs', type=int, default=100, help="number of epochs.")
+    parser.add_argument(
+        '--batch_size', type=int, default=20, help="batch size.")
+    parser.add_argument(
+        '--shuffle',
+        action='store_true',
+        help="Whether to shuffle train data.")
+    args = parser.parse_args()
+    return args
+
+# For training test cost
+def train_test(executor, program, reader, feeder, fetch_list):
+    accumulated = 1 * [0]
+    count = 0
+    for data_test in reader():
+        outs = executor.run(
+            program=program, feed=feeder.feed(data_test), fetch_list=fetch_list)
+        accumulated = [x_c[0] + x_c[1][0] for x_c in zip(accumulated, outs)]
+        count += 1
+    return [x_d / count for x_d in accumulated]
+
+
+def main():
+    if args.shuffle:
+        print("doing shuffle")
+        train_reader = paddle.batch(
+                         paddle.reader.shuffle(
+                             paddle.dataset.uci_housing.train(), buf_size=500),
+                         batch_size=args.batch_size)
+    else:
+        train_reader = paddle.batch(
+            paddle.dataset.uci_housing.train(), batch_size=args.batch_size)
+    
+    # feature vector of length 13
+    x = fluid.data(name='x', shape=[None, 13], dtype='float32')
+    y = fluid.data(name='y', shape=[None, 1], dtype='float32')
+
+    main_program = fluid.default_main_program()
+    startup_program = fluid.default_startup_program()
+
+    main_program.random_seed = 90
+    startup_program.random_seed = 90
+
+    y_predict = fluid.layers.fc(input=x, size=1, act=None)
+    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+    avg_loss = fluid.layers.mean(cost)
+
+    test_program = main_program.clone(for_test=True)
+
+    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+    sgd_optimizer.minimize(avg_loss)
+
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    num_epochs = args.num_epochs
+
+    # main train loop.
+    feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
+    exe.run(startup_program)
+    if args.save_model:
+        fluid.io.save_persistables(exe, "model_dir")
+
+        # add feed and fetch op
+        feeded_var_names = ['x', 'y']
+        fetch_var_names = ['mean_0.tmp_0']
+        fluid.io.prepend_feed_ops(main_program, feeded_var_names)
+        fluid.io.append_fetch_ops(main_program, fetch_var_names)
+        with open("model_dir/__model__", "wb") as f:
+            f.write(main_program.desc.serialize_to_string())
+
+        with open("debug_main_program", "w") as f:
+            f.write(str(main_program))
+        print("train model saved to model_dir")
+        return
+
+    train_prompt = "Train cost"
+    step = 0 
+    for pass_id in range(num_epochs):
+        for data_train in train_reader():
+            avg_loss_value, = exe.run(
+                main_program,
+                feed=feeder.feed(data_train),
+                fetch_list=[avg_loss])
+            print("%s, Step %d, Cost %f" %
+                      (train_prompt, step, avg_loss_value[0]))
+            if step  == args.num_steps - 1:
+                return
+            step += 1
+
+            if math.isnan(float(avg_loss_value[0])):
+                sys.exit("got NaN loss, training failed.")
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main()
--- a/lite/tools/build.sh
+++ b/lite/tools/build.sh
@@ -14,6 +14,7 @@ readonly NUM_PROC=${LITE_BUILD_THREADS:-4}

 # global variables
 BUILD_EXTRA=OFF
+BUILD_TRAIN=OFF
 BUILD_JAVA=ON
 BUILD_PYTHON=OFF
 BUILD_DIR=$(pwd)
@@ -226,6 +227,7 @@ function make_full_publish_so {
      -DNPU_DDK_ROOT=$NPU_DDK_ROOT \
      -DLITE_WITH_XPU=$BUILD_XPU \
      -DXPU_SDK_ROOT=$XPU_SDK_ROOT \
+      -DLITE_WITH_TRAIN=$BUILD_TRAIN \
      -DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}

  make publish_inference -j$NUM_PROC
@@ -388,6 +390,7 @@ function print_usage {
    echo -e "optional argument:"
    echo -e "--shutdown_log: (OFF|ON); controls whether to shutdown log, default is ON"
    echo -e "--build_extra: (OFF|ON); controls whether to publish extra operators and kernels for (sequence-related model such as OCR or NLP)"
+    echo -e "--build_train: (OFF|ON); controls whether to publish training operators and kernels, build_train is only for full_publish library now"
    echo -e "--build_python: (OFF|ON); controls whether to publish python api lib (ANDROID and IOS is not supported)"
    echo -e "--build_java: (OFF|ON); controls whether to publish java api lib (Only ANDROID is supported)"
    echo -e "--build_dir: directory for building"
@@ -436,6 +439,10 @@ function main {
                BUILD_EXTRA="${i#*=}"
                shift
                ;;
+            --build_train=*)
+                BUILD_TRAIN="${i#*=}"
+                shift
+                ;;
            --build_cv=*)
                BUILD_CV="${i#*=}"
                shift