lite/enhance model

28eec65f · Chunwei · d1b0af43 · 28eec65f · 28eec65f · 28eec65f
32 changed file
--- a/paddle/fluid/inference/analysis/passes/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/passes/CMakeLists.txt
@@ -5,7 +5,7 @@ cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_p
 cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass)
 cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass)
-cc_library(analysis_passes SRCS passes.cc DEPS
+cc_library(analysis_passes SRCS use_passes.cc DEPS
  ir_graph_build_pass
  ir_analysis_pass
  ir_params_sync_among_devices_pass

--- a/paddle/fluid/inference/analysis/passes/passes.cc
+++ b/paddle/fluid/inference/analysis/passes/passes.cc
@@ -12,13 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "paddle/fluid/inference/analysis/passes/passes.h"
 #include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h"
 #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
 #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
 #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
 #include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
 #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
+#include "paddle/fluid/inference/analysis/passes/passes.h"
 namespace paddle {
 namespace inference {

--- a/paddle/fluid/lite/CMakeLists.txt
+++ b/paddle/fluid/lite/CMakeLists.txt
@@ -10,6 +10,9 @@ message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}")
 message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}")
 set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install")
+set(LITE_ON_MOBILE LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
 set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inference download url")
 function(lite_download_and_uncompress INSTALL_DIR URL FILENAME)
@@ -182,3 +185,8 @@ add_subdirectory(model_parser)
 add_subdirectory(utils)
 add_subdirectory(api)
 add_subdirectory(gen_code)
+if (WITH_TESTING)
+    lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz")
+endif()
--- a/paddle/fluid/lite/api/CMakeLists.txt
+++ b/paddle/fluid/lite/api/CMakeLists.txt
-set(cxx_api_lite_deps scope_lite optimizer_lite target_wrapper_host model_parser_lite)
+set(cxx_api_lite_deps
+  scope_lite optimizer_lite target_wrapper_host model_parser_lite program_lite)
 if(LITE_WITH_CUDA)
    set(cxx_api_lite_deps ${cxx_api_lite_deps} kernels_cuda)
    cc_library(cxx_api_lite_cuda SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} target_wrapper_cuda)
    nv_test(test_cxx_api_lite_cuda SRCS cxx_api_test.cc DEPS cxx_api_lite_cuda)
 endif()
-cc_library(cxx_api_lite SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} ${ops_lite} program_lite)
+lite_cc_library(lite_api_test_helper SRCS lite_api_test_helper.cc
+  DEPS scope_lite optimizer_lite target_wrapper_host model_parser_lite program_lite
+       ${ops_lite} ${host_kernels}
+  CUDA_DEPS kernels_cuda
+  X86_DEPS ${x86_kernels}
+  )
+lite_cc_library(cxx_api_lite SRCS cxx_api.cc DEPS lite_api_test_helper)
 set(light_api_deps
-    scope_lite target_wrapper_host model_parser_lite)
+    scope_lite target_wrapper_host model_parser_lite program_lite)
 if(LITE_WITH_CUDA)
    set(light_api_deps ${light_api_deps} target_wrapper_cuda)
 endif()
-#cc_library(light_api_lite SRCS light_api.cc DEPS ${light_api_deps} ${ops_lite} ${host_kernels})
+lite_cc_library(light_api_lite SRCS light_api.cc
+  DEPS ${light_api_deps} ${ops_lite} ${host_kernels}
+  )
 message(STATUS "get ops ${ops_lite}")
 message(STATUS "get Host kernels ${host_kernels}")
@@ -30,18 +39,25 @@ if((NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) AND WITH_TESTING)
       ${ops_lite} ${host_kernels} ${x86_kernels}
       ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
            --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
-    lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz")
    add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz)
 endif()
-if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
-    add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz)
-endif()
-# if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+# These tests needs CLI arguments, and is not supported in ARM CI.
-#     lite_cc_test(test_light_api SRCS light_api_test.cc DEPS light_api_lite ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
+# TODO(Superjomn) support latter.
-# endif()
+if(NOT LITE_ON_MOBILE)
+    lite_cc_test(test_light_api SRCS light_api_test.cc
+      DEPS light_api_lite mir_passes
+      X86_DEPS ${x86_kernels}
+      ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt
+      SERIAL)
+    lite_cc_test(test_apis_lite SRCS apis_test.cc
+      DEPS cxx_api_lite light_api_lite ${ops_lite} mir_passes
+      X86_DEPS ${x86_kernels}
+      ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
+          --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
+endif()
 lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
    DEPS
@@ -51,4 +67,3 @@ lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
    mir_passes
    ${ops_lite} ${host_kernels}
    ARM_DEPS ${arm_kernels})
--- a/paddle/fluid/lite/api/apis_test.cc
+++ b/paddle/fluid/lite/api/apis_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/*
+ * We test multiple apis here.
+ */
+#include <gtest/gtest.h>
+#include <sstream>
+#include <vector>
+#include "paddle/fluid/lite/api/cxx_api.h"
+#include "paddle/fluid/lite/api/light_api.h"
+#include "paddle/fluid/lite/core/mir/pass_registry.h"
+#include "paddle/fluid/lite/core/mir/use_passes.h"
+#include "paddle/fluid/lite/kernels/use_kernels.h"
+#include "paddle/fluid/lite/operators/use_ops.h"
+DEFINE_string(model_dir, "", "");
+DEFINE_string(optimized_model, "", "");
+namespace paddle {
+namespace lite {
+void SetConstInput(lite::Tensor* x) {
+  x->Resize(DDim(std::vector<DDim::value_type>({100, 100})));
+  auto* data = x->mutable_data<float>();
+  for (int i = 0; i < 100 * 100; i++) {
+    data[i] = i;
+  }
+}
+bool CompareTensors(const std::string& name, const ExecutorLite& cxx_api,
+                    const LightPredictor& light_api) {
+  const auto* a = cxx_api.GetTensor(name);
+  const auto* b = light_api.GetTensor(name);
+  return TensorCompareWith(*a, *b);
+}
+#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
+TEST(CXXApi_LightApi, save_and_load_model) {
+  lite::ExecutorLite cxx_api;
+  lite::LightPredictor light_api;
+  // CXXAPi
+  {
+    std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
+                                     Place{TARGET(kX86), PRECISION(kFloat)}});
+    cxx_api.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)},
+                  valid_places);
+    auto* x = cxx_api.GetInput(0);
+    SetConstInput(x);
+    cxx_api.Run();
+    LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model;
+    cxx_api.SaveModel(FLAGS_optimized_model);
+  }
+  // LightApi
+  {
+    light_api.Build(FLAGS_optimized_model);
+    auto* x = light_api.GetInput(0);
+    SetConstInput(x);
+    light_api.Run();
+  }
+  const auto* cxx_out = cxx_api.GetOutput(0);
+  const auto* light_out = light_api.GetOutput(0);
+  ASSERT_TRUE(TensorCompareWith(*cxx_out, *light_out));
+  std::vector<std::string> tensors_with_order({
+      "a", "fc_0.w_0", "fc_0.tmp_0", "scale_0.tmp_0",
+  });
+  for (const auto& tensor_name : tensors_with_order) {
+    ASSERT_TRUE(CompareTensors(tensor_name, cxx_api, light_api));
+  }
+}
+#endif  // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
+}  // namespace lite
+}  // namespace paddle
--- a/paddle/fluid/lite/api/cxx_api.h
+++ b/paddle/fluid/lite/api/cxx_api.h
@@ -78,6 +78,11 @@ class ExecutorLite {
    return &fetch_list.at(offset);
  }
+  const lite::Tensor* GetTensor(const std::string& name) const {
+    auto* var = program_->exec_scope()->FindVar(name);
+    return &var->Get<lite::Tensor>();
+  }
  void Run() { program_->Run(); }
  const framework::proto::ProgramDesc& program_desc() const {

--- a/paddle/fluid/lite/api/cxx_api_bin.cc
+++ b/paddle/fluid/lite/api/cxx_api_bin.cc
@@ -13,9 +13,10 @@
 // limitations under the License.
 #include "paddle/fluid/lite/api/cxx_api.h"
-#include <chrono>  // NOLINT
+#include <chrono>
-#include "paddle/fluid/lite/core/mir/passes.h"
+#include "paddle/fluid/lite/core/mir/use_passes.h"
 #include "paddle/fluid/lite/core/op_registry.h"
 namespace paddle {
 namespace lite {

--- a/paddle/fluid/lite/api/cxx_api_test.cc
+++ b/paddle/fluid/lite/api/cxx_api_test.cc
@@ -16,11 +16,13 @@
 #include <gflags/gflags.h>
 #include <gtest/gtest.h>
 #include <vector>
-#include "paddle/fluid/lite/core/mir/passes.h"
+#include "paddle/fluid/lite/api/lite_api_test_helper.h"
+#include "paddle/fluid/lite/core/compatible_tensor.h"
+#include "paddle/fluid/lite/core/mir/use_passes.h"
+#include "paddle/fluid/lite/core/mir/use_passes.h"
 #include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/kernels/use_kernels.h"
-DEFINE_string(model_dir, "", "");
+#include "paddle/fluid/lite/operators/use_ops.h"
-DEFINE_string(optimized_model, "", "");
 // For training.
 DEFINE_string(startup_program_path, "", "");
@@ -30,40 +32,11 @@ namespace paddle {
 namespace lite {
 TEST(CXXApi, test) {
-  lite::ExecutorLite predictor;
+  const lite::Tensor* out = RunHvyModel();
-#ifndef LITE_WITH_CUDA
-  std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
-                                   Place{TARGET(kX86), PRECISION(kFloat)}});
-#else
-  std::vector<Place> valid_places({
-      Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
-      Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
-      Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
-      Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
-      Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
-      Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
-  });
-#endif
-  predictor.Build(FLAGS_model_dir,
-                  Place{TARGET(kX86), PRECISION(kFloat)},  // origin cuda
-                  valid_places);
-  auto* input_tensor = predictor.GetInput(0);
-  input_tensor->Resize(DDim(std::vector<DDim::value_type>({100, 100})));
-  auto* data = input_tensor->mutable_data<float>();
-  for (int i = 0; i < 100 * 100; i++) {
-    data[i] = i;
-  }
-  // LOG(INFO) << "input " << *input_tensor;
-  predictor.Run();
-  auto* out = predictor.GetOutput(0);
  LOG(INFO) << out << " memory size " << out->data_size();
-  LOG(INFO) << "out " << out->data<float>()[0];
+  for (int i = 0; i < 10; i++) {
-  LOG(INFO) << "out " << out->data<float>()[1];
+    LOG(INFO) << "out " << out->data<float>()[i];
+  }
  LOG(INFO) << "dims " << out->dims();
  // LOG(INFO) << "out " << *out;
 }
@@ -117,44 +90,3 @@ TEST(CXXApi, save_model) {
 }  // namespace lite
 }  // namespace paddle
-USE_LITE_OP(mul);
-USE_LITE_OP(fc);
-USE_LITE_OP(relu);
-USE_LITE_OP(scale);
-USE_LITE_OP(feed);
-USE_LITE_OP(fetch);
-USE_LITE_OP(io_copy);
-USE_LITE_OP(elementwise_add)
-USE_LITE_OP(elementwise_sub)
-USE_LITE_OP(square)
-USE_LITE_OP(softmax)
-USE_LITE_OP(dropout)
-USE_LITE_OP(concat)
-USE_LITE_OP(conv2d)
-USE_LITE_OP(depthwise_conv2d)
-USE_LITE_OP(pool2d)
-USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
-USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
-#ifdef LITE_WITH_X86
-USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(concat, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(conv2d, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(depthwise_conv2d, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(pool2d, kX86, kFloat, kNCHW, def);
-#endif
-#ifdef LITE_WITH_CUDA
-USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
-USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device);
-USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
-#endif
--- a/paddle/fluid/lite/api/light_api.h
+++ b/paddle/fluid/lite/api/light_api.h
@@ -22,6 +22,7 @@
 #include <string>
 #include <utility>
 #include <vector>
+#include "paddle/fluid/lite/core/compatible_tensor.h"
 #include "paddle/fluid/lite/core/context.h"
 #include "paddle/fluid/lite/core/program.h"
 #include "paddle/fluid/lite/core/types.h"
@@ -62,6 +63,11 @@ class LightPredictor {
    return &fetch_list.at(offset);
  }
+  const lite::Tensor* GetTensor(const std::string& name) const {
+    auto* var = program_->exec_scope()->FindVar(name);
+    return &var->Get<lite::Tensor>();
+  }
 private:
  void BuildRuntimeProgram(const framework::proto::ProgramDesc& prog) {
    std::vector<Instruction> insts;
@@ -72,9 +78,8 @@ class LightPredictor {
    // Create the kernels of the target places, and filter out the specific
    // kernel with the target alias.
-    for (auto& op : program.ops_) {
+    for (auto& op : program.ops()) {
-      lite::pb::OpDesc desc(op->op_info()->desc());
+      auto kernel_type = op->op_info()->GetAttr<std::string>(kKernelTypeAttr);
-      auto kernel_type = desc.GetAttr(kKernelTypeAttr).get<std::string>();
      std::string op_type, alias;
      Place place;
      KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place);
@@ -89,8 +94,8 @@ class LightPredictor {
      insts.emplace_back(op, std::move(*it));
    }
    program_.reset(new RuntimeProgram(std::move(insts)));
-    CHECK(program.exec_scope_);
+    CHECK(program.exec_scope());
-    program_->set_exec_scope(program.exec_scope_);
+    program_->set_exec_scope(program.exec_scope());
  }
 private:

--- a/paddle/fluid/lite/api/light_api_test.cc
+++ b/paddle/fluid/lite/api/light_api_test.cc
@@ -15,6 +15,9 @@
 #include "paddle/fluid/lite/api/light_api.h"
 #include <gflags/gflags.h>
 #include <gtest/gtest.h>
+#include "paddle/fluid/lite/core/mir/use_passes.h"
+#include "paddle/fluid/lite/kernels/use_kernels.h"
+#include "paddle/fluid/lite/operators/use_ops.h"
 DEFINE_string(optimized_model, "", "");
@@ -33,29 +36,14 @@ TEST(LightAPI, load) {
  }
  predictor.Run();
+  const auto* output = predictor.GetOutput(0);
+  const float* raw_output = output->data<float>();
+  for (int i = 0; i < 10; i++) {
+    LOG(INFO) << "out " << raw_output[i];
+  }
 }
 }  // namespace lite
 }  // namespace paddle
-USE_LITE_OP(mul);
-USE_LITE_OP(fc);
-USE_LITE_OP(scale);
-USE_LITE_OP(feed);
-USE_LITE_OP(fetch);
-USE_LITE_OP(io_copy);
-USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
-USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
-#ifdef LITE_WITH_X86
-USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def);
-USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def);
-#endif
--- a/paddle/fluid/lite/api/lite_api_test_helper.cc
+++ b/paddle/fluid/lite/api/lite_api_test_helper.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/lite/api/lite_api_test_helper.h"
+DEFINE_string(model_dir, "", "");
+DEFINE_string(optimized_model, "", "");
+namespace paddle {
+namespace lite {
+const lite::Tensor* RunHvyModel() {
+  lite::ExecutorLite predictor;
+#ifndef LITE_WITH_CUDA
+  std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
+                                   Place{TARGET(kX86), PRECISION(kFloat)}});
+#else
+  std::vector<Place> valid_places({
+      Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
+      Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
+      Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
+      Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
+      Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
+      Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
+  });
+#endif
+  predictor.Build(FLAGS_model_dir,
+                  Place{TARGET(kX86), PRECISION(kFloat)},  // origin cuda
+                  valid_places);
+  auto* input_tensor = predictor.GetInput(0);
+  input_tensor->Resize(DDim(std::vector<DDim::value_type>({100, 100})));
+  auto* data = input_tensor->mutable_data<float>();
+  for (int i = 0; i < 100 * 100; i++) {
+    data[i] = i;
+  }
+  // LOG(INFO) << "input " << *input_tensor;
+  predictor.Run();
+  const auto* out = predictor.GetOutput(0);
+  return out;
+}
+}  // namespace lite
+}  // namespace paddle
--- a/paddle/fluid/lite/api/lite_api_test_helper.h
+++ b/paddle/fluid/lite/api/lite_api_test_helper.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <gflags/gflags.h>
+#include "paddle/fluid/lite/api/cxx_api.h"
+#include "paddle/fluid/lite/core/compatible_tensor.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+DECLARE_string(model_dir);
+DECLARE_string(optimized_model);
+namespace paddle {
+namespace lite {
+const lite::Tensor* RunHvyModel();
+}  // namespace lite
+}  // namespace paddle
--- a/paddle/fluid/lite/core/hvy_tensor.h
+++ b/paddle/fluid/lite/core/hvy_tensor.h
@@ -86,6 +86,7 @@ class TensorHvy : public TensorBase<TensorHvy> {
  template <typename T>
  T* mutable_data() {
+    memory_size_ = framework::product(data_.dims()) * sizeof(T);
    return data_.mutable_data<T>(data_.dims(), platform::CPUPlace());
  }
  template <typename T>
@@ -128,8 +129,11 @@ class TensorHvy : public TensorBase<TensorHvy> {
  const framework::LoDTensor& raw_tensor() const { return data_; }
  framework::LoDTensor& raw_tensor() { return data_; }
+  size_t memory_size() const { return memory_size_; }
 private:
  framework::LoDTensor data_;
+  size_t memory_size_{};
 };
 }  // namespace lite

--- a/paddle/fluid/lite/core/lite_tensor.h
+++ b/paddle/fluid/lite/core/lite_tensor.h
@@ -90,6 +90,8 @@ class TensorLite : public TensorBase<TensorLite> {
  void *mutable_data(size_t memory_size);
  void *mutable_data(TargetType target, size_t memory_size);
+  const void *raw_data() const { return buffer_->data(); }
  size_t memory_size() const { return memory_size_; }
  bool IsInitialized() const { return buffer_->data(); }

--- a/paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass_test.cc
+++ b/paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass_test.cc
@@ -20,7 +20,7 @@
 #include "paddle/fluid/lite/api/cxx_api.h"
 #include "paddle/fluid/lite/core/compatible_tensor.h"
 #include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
-#include "paddle/fluid/lite/core/mir/passes.h"
+#include "paddle/fluid/lite/core/mir/use_passes.h"
 #include "paddle/fluid/lite/core/op_registry.h"
 #include "paddle/fluid/lite/core/program.h"

--- a/paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass_test.cc
+++ b/paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass_test.cc
@@ -20,7 +20,7 @@
 #include "paddle/fluid/lite/api/cxx_api.h"
 #include "paddle/fluid/lite/core/compatible_tensor.h"
 #include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
-#include "paddle/fluid/lite/core/mir/passes.h"
+#include "paddle/fluid/lite/core/mir/use_passes.h"
 #include "paddle/fluid/lite/core/op_registry.h"
 #include "paddle/fluid/lite/core/program.h"

--- a/paddle/fluid/lite/core/mir/fc_fuse_pass_test.cc
+++ b/paddle/fluid/lite/core/mir/fc_fuse_pass_test.cc
@@ -17,7 +17,7 @@
 #include <gtest/gtest.h>
 #include <vector>
 #include "paddle/fluid/lite/api/cxx_api.h"
-#include "paddle/fluid/lite/core/mir/passes.h"
+#include "paddle/fluid/lite/core/mir/use_passes.h"
 #include "paddle/fluid/lite/core/op_registry.h"
 DEFINE_string(model_dir, "", "");

--- a/paddle/fluid/lite/core/mir/fusion/conv_bn_fuser.cc
+++ b/paddle/fluid/lite/core/mir/fusion/conv_bn_fuser.cc
@@ -84,7 +84,7 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
                        ->GetMutable<lite::Tensor>();
  size_t bias_size = bn_scale_t->data_size();
  auto bn_scale_d = bn_scale_t->mutable_data<float>();
-  CHECK(bias_size == conv_weight_dims[0])
+  CHECK_EQ(bias_size, static_cast<size_t>(conv_weight_dims[0]))
      << "The BN bias's size should be equal to the size of the first "
      << "dim size of the conv weights";

--- a/paddle/fluid/lite/core/mir/generate_program_pass.cc
+++ b/paddle/fluid/lite/core/mir/generate_program_pass.cc
@@ -24,7 +24,7 @@ namespace lite {
 namespace mir {
 void GenerateProgramPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
-  LOG(INFO) << "final program \n" << Visualize(graph.get());
+  VLOG(4) << "final program \n" << Visualize(graph.get());
  for (auto& item : graph->StmtTopologicalOrder()) {
    if (item->IsStmt()) {
      auto& stmt = item->AsStmt();

--- a/paddle/fluid/lite/core/mir/ssa_graph.cc
+++ b/paddle/fluid/lite/core/mir/ssa_graph.cc
@@ -24,8 +24,10 @@ namespace lite {
 namespace mir {
 bool SSAGraph::CheckBidirectionalConnection() {
-  LOG(INFO) << "node count " << node_storage_.size();
+  VLOG(4) << "node count " << node_storage_.size();
  for (auto &node : node_storage_) {
+    if (node.IsStmt()) VLOG(4) << node.AsStmt().op_info()->Type();
+    if (node.IsArg()) VLOG(4) << node.AsArg().name << " " << node.AsArg().id;
    for (auto *in : node.inlinks) {
      CHECK(in->outlinks.end() !=
            std::find(in->outlinks.begin(), in->outlinks.end(), &node));
@@ -121,6 +123,7 @@ void SSAGraph::Build(const Program &program,
  std::unordered_map<std::string, mir::Node *> arg_update_node_map_;
  for (auto &op : program.ops()) {
+    VLOG(3) << op->op_info()->Type();
    auto *op_node = GraphCreateInstructNode(op, valid_places);
    for (const std::string &name : op->op_info()->input_names()) {
      mir::Node *arg_node = nullptr;

--- a/paddle/fluid/lite/core/mir/ssa_graph_test.cc
+++ b/paddle/fluid/lite/core/mir/ssa_graph_test.cc
@@ -17,7 +17,7 @@
 #include <memory>
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
-#include "paddle/fluid/lite/core/mir/passes.h"
+#include "paddle/fluid/lite/core/mir/use_passes.h"
 #include "paddle/fluid/lite/core/op_registry.h"
 #include "paddle/fluid/lite/core/program_fake_utils.h"

--- a/paddle/fluid/lite/core/mir/passes.h
+++ b/paddle/fluid/lite/core/mir/passes.h
@@ -15,12 +15,6 @@
 #pragma once
 #include "paddle/fluid/lite/core/mir/pass_registry.h"
-namespace paddle {
-namespace lite {
-namespace mir {}  // namespace mir
-}  // namespace lite
-}  // namespace paddle
 #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
 USE_MIR_PASS(demo);
 USE_MIR_PASS(static_kernel_pick_pass);

--- a/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc
+++ b/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc
@@ -13,7 +13,7 @@
 // limitations under the License.
 #include <gtest/gtest.h>
-#include "paddle/fluid/lite/core/mir/passes.h"
+#include "paddle/fluid/lite/core/mir/use_passes.h"
 #include "paddle/fluid/lite/core/optimizer.h"
 #include "paddle/fluid/lite/core/program_fake_utils.h"
 #include "paddle/fluid/lite/kernels/cuda/use_kernels.h"

--- a/paddle/fluid/lite/core/op_lite.h
+++ b/paddle/fluid/lite/core/op_lite.h
@@ -54,9 +54,7 @@ class OpLite : public Registry {
  OpLite() = default;
  explicit OpLite(const std::string &type) : op_type_(type) {}
  explicit OpLite(const std::vector<Place> &valid_places)
-      : valid_places_(valid_places) {
+      : valid_places_(valid_places) {}
-    LOG(INFO) << "valid places " << valid_places.size();
-  }
  void SetValidPlaces(const std::vector<Place> &places) {
    VLOG(3) << "valid places " << valid_places_.size();

--- a/paddle/fluid/lite/core/optimizer_test.cc
+++ b/paddle/fluid/lite/core/optimizer_test.cc
@@ -18,8 +18,8 @@
 #include <utility>
 #include "paddle/fluid/lite/core/mir/generate_program_pass.h"
 #include "paddle/fluid/lite/core/mir/pass_manager.h"
-#include "paddle/fluid/lite/core/mir/passes.h"
 #include "paddle/fluid/lite/core/mir/static_kernel_pick_pass.h"
+#include "paddle/fluid/lite/core/mir/use_passes.h"
 #include "paddle/fluid/lite/core/program_fake_utils.h"
 namespace paddle {

--- a/paddle/fluid/lite/core/profile/basic_profiler.cc
+++ b/paddle/fluid/lite/core/profile/basic_profiler.cc
@@ -19,7 +19,7 @@ namespace lite {
 namespace profile {
 const int BasicTimer::data_w = 10;
-const int BasicTimer::name_w = 10;
+const int BasicTimer::name_w = 15;
 }  // namespace profile
 }  // namespace lite

--- a/paddle/fluid/lite/core/tensor.h
+++ b/paddle/fluid/lite/core/tensor.h
@@ -91,6 +91,18 @@ class DDimBase {
    return os;
  }
+  friend bool operator==(const DDimBase &a, const DDimBase &b) {
+    if (a.size() != b.size()) return false;
+    for (size_t i = 0; i < a.size(); i++) {
+      if (a[i] != b[i]) return false;
+    }
+    return true;
+  }
+  friend bool operator!=(const DDimBase &a, const DDimBase &b) {
+    return !(a == b);
+  }
 private:
  DDimT *self() { return static_cast<DDimT *>(this); }
  const DDimT *const_self() const { return static_cast<const DDimT *>(this); }
@@ -154,6 +166,7 @@ class TensorBase {
  const void *raw_data() const { return const_self()->data(); }
  size_t data_size() const { return const_self()->dims().production(); }
+  size_t memory_size() const { return const_self()->memory_size(); }
  void ShareDataWith(const TensorBase &other) { self()->ShareDataWith(other); }
  void CopyDataFrom(const TensorBase &other) { self()->CopyDataFrom(other); }
@@ -175,5 +188,13 @@ class TensorBase {
  }
 };
+template <typename TensorT>
+bool TensorCompareWith(const TensorT &a, const TensorT &b) {
+  if (a.dims() != b.dims()) return false;
+  LOG(INFO) << "data_size: " << a.data_size();
+  if (memcmp(a.raw_data(), b.raw_data(), a.data_size()) != 0) return false;
+  return true;
+}
 }  // namespace lite
 }  // namespace paddle
--- a/paddle/fluid/lite/kernels/use_kernels.h
+++ b/paddle/fluid/lite/kernels/use_kernels.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+/*
+ * ATTENTION this header file can only include in .cc file.
+ */
+USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
+USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
+#ifdef LITE_WITH_X86
+USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(concat, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(conv2d, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(depthwise_conv2d, kX86, kFloat, kNCHW, def);
+USE_LITE_KERNEL(pool2d, kX86, kFloat, kNCHW, def);
+#endif
+#ifdef LITE_WITH_CUDA
+USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
+USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device);
+USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
+#endif
--- a/paddle/fluid/lite/model_parser/CMakeLists.txt
+++ b/paddle/fluid/lite/model_parser/CMakeLists.txt
 #cc_library(runtime_lite SRCS runtime.cc)
 #TODO(Superjomn) enable it again.
-if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+if(NOT LITE_ON_MOBILE)
    lite_cc_test(test_model_parser_lite SRCS model_parser_test.cc
      DEPS model_parser_lite framework_proto_lite
      ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model)
@@ -13,18 +13,15 @@ endif()
 cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite framework_proto_lite var_desc_lite)
-set(model_parser_deps variable_lite scope_lite ${tensor_lite} scope_lite
+lite_cc_library(model_parser_lite SRCS model_parser.cc DEPS
-                      target_wrapper_host
+    variable_lite scope_lite ${tensor_lite} scope_lite
-                      compatible_pb_lite
+    target_wrapper_host
-                      memory_lite
+    compatible_pb_lite
-                      )
+    memory_lite
-if (LITE_WITH_CUDA)
+    CUDA_DEPS target_wrapper_cuda)
-    set(model_parser_deps ${model_parser_deps} target_wrapper_cuda)
-endif()
-cc_library(model_parser_lite SRCS model_parser.cc DEPS ${model_parser_deps})
 lite_cc_test(test_op_desc_lite SRCS op_desc_test.cc DEPS cpp_op_desc_lite op_desc_lite compatible_pb_lite)
 add_subdirectory(pb)
 add_subdirectory(cpp)
--- a/paddle/fluid/lite/model_parser/model_parser.cc
+++ b/paddle/fluid/lite/model_parser/model_parser.cc
@@ -209,7 +209,7 @@ void TensorToStream(std::ostream &os, const lite::Tensor &tensor) {
    os.write(out.data(), size);
  }
  {  // the 3rd field, tensor data
-    uint64_t size = tensor.data_size();
+    uint64_t size = tensor.memory_size();
    CHECK_LT(size, std::numeric_limits<std::streamsize>::max())
        << "Index overflow when writing tensor";

--- a/paddle/fluid/lite/operators/use_ops.h
+++ b/paddle/fluid/lite/operators/use_ops.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+/*
+ * ATTENTION this header file can only include in .cc file.
+ */
+USE_LITE_OP(mul);
+USE_LITE_OP(fc);
+USE_LITE_OP(relu);
+USE_LITE_OP(scale);
+USE_LITE_OP(feed);
+USE_LITE_OP(fetch);
+USE_LITE_OP(io_copy);
+USE_LITE_OP(elementwise_add)
+USE_LITE_OP(elementwise_sub)
+USE_LITE_OP(square)
+USE_LITE_OP(softmax)
+USE_LITE_OP(dropout)
+USE_LITE_OP(concat)
+USE_LITE_OP(conv2d)
+USE_LITE_OP(depthwise_conv2d)
+USE_LITE_OP(pool2d)
--- a/paddle/fluid/lite/x86/CMakeLists.txt
+++ b/paddle/fluid/lite/x86/CMakeLists.txt
@@ -3,5 +3,3 @@ if (NOT LITE_WITH_X86)
 endif()
 cc_library(target_wrapper_x86 SRCS target_wrapper.cc)