!3784 add lite/test

Merge pull request !3784 from wangzhe/master

!3784 add lite/test
Merge pull request !3784 from wangzhe/master
0972d156 · mindspore-ci-bot · Gitee · 6e23d76b · 85daa091 · 0972d156
71 changed file
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
-set(TEST_DIR ${TOP_DIR}/tests/ut/cpp)
+set(TEST_DIR ${TOP_DIR}/mindspore/lite/test)
 set(LITE_DIR ${TOP_DIR}/mindspore/lite)
+include_directories(${TOP_DIR})
 include_directories(${TEST_DIR})
+include_directories(${LITE_DIR})
 include_directories(${LITE_DIR}/tools)
-include_directories(${LITE_DIR}/lite)
 include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/dependency_gtest.cmake)
+include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/external_libs/gtest.cmake)

 ### anf src
 set(ANF_SRC
@@ -158,7 +160,7 @@ set(TEST_LITE_SRC
        ${LITE_DIR}/tools/common/flag_parser.cc
        ${LITE_DIR}/tools/common/storage.cc
        ${LITE_DIR}/tools/benchmark/benchmark.cc
-        ${LITE_DIR}/test/benchmark_test.cc
+        ${LITE_DIR}/test/st/benchmark_test.cc
        )
 ### gpu runtime
 if (SUPPORT_GPU)
@@ -179,6 +181,7 @@ endif()
 if(BUILD_CONVERTER)
    set(TEST_LITE_SRC
            ${TEST_LITE_SRC}
+            ${TOP_DIR}/mindspore/core/utils/flags.cc
            ${LITE_DIR}/tools/converter/optimizer.cc
            ${LITE_DIR}/src/common/anf_importer/anf_importer.cc
            ${LITE_DIR}/src/common/anf_importer/import_from_meta_graphT.cc
@@ -188,7 +191,7 @@ if(BUILD_CONVERTER)
            ${LITE_DIR}/tools/converter/converter_flags.cc
            ${LITE_DIR}/tools/converter/converter.cc
            ${LITE_DIR}/tools/converter/parser/onnx/onnx.pb.cc
-            ${LITE_DIR}/test/converter_test.cc
+            ${LITE_DIR}/test/st/converter_test.cc
            ${LITE_DIR}/src/gllo/common/node_pass.cc
            ${LITE_DIR}/src/gllo/common/optimizer.cc
            ${LITE_DIR}/src/gllo/common/pass_manager.cc
@@ -233,59 +236,50 @@ else()
 endif()
 ### test src
 file(GLOB_RECURSE TEST_CASE_KERNEL_SRC
-    ${TEST_DIR}/kernel/cpu/arm/fp32/*.cc
-    ${TEST_DIR}/kernel/cpu/arm/int8/*.cc
+    ${TEST_DIR}/ut/src/runtime/kernel/arm/fp32/*.cc
+    ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc
 )

 set(TEST_SRC
    ${TEST_LITE_SRC}
    ${TEST_CASE_KERNEL_SRC}
    ${TEST_DIR}/common/common_test.cc
-    ${TEST_DIR}/common/test_lite_main.cc
-    ${TEST_DIR}/kernel/cpu/arm/common/pack_tests.cc
-    ${TEST_DIR}/device/cpu/arm/infer_test.cc
+    ${TEST_DIR}/main.cc
+    ${TEST_DIR}/ut/src/runtime/kernel/arm/common/pack_tests.cc
+    ${TEST_DIR}/ut/src/infer_test.cc
 #        ${TEST_DIR}/device/cpu/arm/graph_test.cc
 )

 if (SUPPORT_TRAIN)
    set(TEST_SRC
            ${TEST_SRC}
-            ${TEST_DIR}/device/cpu/arm/train_test.cc
+            ${TEST_DIR}/ut/src/train_test.cc
            )
 else()
    set(TEST_SRC
            ${TEST_SRC}
-            ${TEST_DIR}/device/cpu/arm/infer_test.cc
+            ${TEST_DIR}/ut/src/infer_test.cc
            )
 endif()

 if (SUPPORT_GPU)
    set(TEST_SRC
            ${TEST_SRC}
-            ${TEST_DIR}/device/opencl/opencl_infer_tests.cc
-            ${TEST_DIR}/kernel/opencl/utils_cl_tests.cc
-            ${TEST_DIR}/kernel/opencl/arithmetic_tests.cc
-            ${TEST_DIR}/kernel/opencl/convolution_tests.cc
-            ${TEST_DIR}/kernel/opencl/depthwise_conv2d_tests.cc
-            ${TEST_DIR}/kernel/opencl/matmul_tests.cc
-            ${TEST_DIR}/kernel/opencl/max_pooling_cl_tests.cc
-            ${TEST_DIR}/kernel/opencl/avg_pooling_cl_tests.cc
-            ${TEST_DIR}/kernel/opencl/softmax_cl_tests.cc
-            ${TEST_DIR}/kernel/opencl/concat_tests.cc
-            ${TEST_DIR}/kernel/opencl/conv2d_transpose_tests.cc
+            ${TEST_DIR}/ut/stc/runtime/kernel/opencl/matmul_tests.cc
+            ${TEST_DIR}/ut/stc/runtime/kernel/opencl/softmax_cl_tests.cc
            )
 endif()

 if (ENABLE_FP16)
    set(TEST_SRC
            ${TEST_SRC}
-            ${TEST_DIR}/kernel/cpu/arm/fp16/convolution_fp16_tests.cc)
+            ${TEST_DIR}/ut/src/runtime/kernel/arm/fp16/convolution_fp16_tests.cc)
 endif ()


 add_executable(lite-test ${TEST_SRC})

-target_link_libraries(lite-test dl ${SECUREC_LIBRARY} ${GTEST_LIBRARY} mindspore::json)
+target_link_libraries(lite-test dl ${SECUREC_LIBRARY} ${GTEST_LIBRARY} mindspore::json mindspore::gtest)
 if (BUILD_CONVERTER)
    target_link_libraries(lite-test
            anf_exporter_mid

--- a/mindspore/lite/test/common/common_test.cc
+++ b/mindspore/lite/test/common/common_test.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/common_test.h"
+#include "mindspore/core/utils/log_adapter.h"
+
+#ifdef __cplusplus
+#if __cplusplus
+extern "C" {
+#endif
+#endif
+
+namespace mindspore {
+
+void Common::SetUpTestCase() {}
+
+void Common::TearDownTestCase() {}
+
+void Common::SetUp() {}
+
+void Common::TearDown() {}
+
+}  // namespace mindspore
+
+#ifdef __cplusplus
+#if __cplusplus
+}
+#endif
+#endif
--- a/mindspore/lite/test/common/common_test.h
+++ b/mindspore/lite/test/common/common_test.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef TESTS_UT_COMMON_UT_COMMON_H_
+#define TESTS_UT_COMMON_UT_COMMON_H_
+
+#include <cmath>
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <algorithm>
+#include "gtest/gtest.h"
+namespace mindspore {
+class Common : public testing::Test {
+ public:
+  // TestCase only enter once
+  static void SetUpTestCase();
+  static void TearDownTestCase();
+
+  // every TEST_F macro will enter one
+  virtual void SetUp();
+  virtual void TearDown();
+
+  template <typename T>
+  void PrintData(std::string name, T *output_data, int size) {
+    std::cout << "The " << name << " is as follows:" << std::endl;
+    if (typeid(output_data[0]) == typeid(uint8_t) || typeid(output_data[0]) == typeid(int8_t)) {
+      for (size_t i = 0; i < std::min(size, 100); i++) {
+        std::cout << static_cast<int>(output_data[i]) << " ";
+      }
+    } else {
+      for (size_t i = 0; i < std::min(size, 100); i++) {
+        std::cout << output_data[i] << " ";
+      }
+    }
+    std::cout << std::endl;
+  }
+
+  template <typename T>
+  static void CompareOutputData(T *output_data, T *correct_data, int size, float err_bound) {
+    for (size_t i = 0; i < size; i++) {
+      T abs = fabs(output_data[i] - correct_data[i]);
+      ASSERT_LE(abs, err_bound);
+    }
+  }
+
+  void ReadFile(const char *file, size_t *size, char **buf) {
+    ASSERT_NE(nullptr, file);
+    ASSERT_NE(nullptr, size);
+    ASSERT_NE(nullptr, buf);
+    std::string path = std::string(file);
+    std::ifstream ifs(path);
+    ASSERT_EQ(true, ifs.good());
+    ASSERT_EQ(true, ifs.is_open());
+
+    ifs.seekg(0, std::ios::end);
+    *size = ifs.tellg();
+    *buf = new char[*size];
+
+    ifs.seekg(0, std::ios::beg);
+    ifs.read(*buf, *size);
+    ifs.close();
+  }
+};
+}  // namespace mindspore
+#endif  // TESTS_UT_COMMON_UT_COMMON_H_
--- a/mindspore/lite/test/main.cc
+++ b/mindspore/lite/test/main.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <iostream>
+#include "gtest/gtest.h"
+#include "mindspore/core/utils/log_adapter.h"
+
+namespace mindspore {
+extern void InitSubModulesLogLevel();
+}
+
+GTEST_API_ int main(int argc, char** argv) {
+  mindspore::InitSubModulesLogLevel();
+  testing::InitGoogleTest(&argc, argv);
+  int ret = RUN_ALL_TESTS();
+  return ret;
+}
--- a/mindspore/lite/test/benchmark_test.cc
+++ b/mindspore/lite/test/benchmark_test.cc
@@ -15,12 +15,12 @@
 */
 #include <gtest/gtest.h>
 #include <string>
-#include "tests/ut/cpp/common/common_test.h"
+#include "common/common_test.h"
 #include "benchmark/benchmark.h"

 namespace mindspore {
 namespace lite {
-class BenchmarkTest : public UT::Common {
+class BenchmarkTest : public mindspore::Common {
 public:
  BenchmarkTest() {}
 };

--- a/mindspore/lite/test/converter_test.cc
+++ b/mindspore/lite/test/converter_test.cc
@@ -16,11 +16,11 @@
 #include <gtest/gtest.h>
 #include <string>
 #include "converter/converter.h"
-#include "tests/ut/cpp/common/common_test.h"
+#include "common/common_test.h"

 namespace mindspore {
 namespace lite {
-class ConverterTest : public UT::Common {
+class ConverterTest : public mindspore::Common {
 public:
  ConverterTest() {}
 };

--- a/mindspore/lite/test/ut/src/graph_test.cc
+++ b/mindspore/lite/test/ut/src/graph_test.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <climits>
+#include <string>
+#include <iostream>
+#include <memory>
+#include <fstream>
+#include "common/common_test.h"
+#include "mindspore/core/utils/log_adapter.h"
+#include "mindspore/lite/include/lite_session.h"
+#include "mindspore/lite/src/executor.h"
+#include "mindspore/lite/schema/inner/anf_ir_generated.h"
+
+namespace mindspore {
+class TestLiteInference : public mindspore::Common {
+ public:
+  TestLiteInference() {}
+};
+
+std::string RealPath(const char *path) {
+  if (path == nullptr) {
+    return "";
+  }
+  if ((strlen(path)) >= PATH_MAX) {
+    return "";
+  }
+
+  std::shared_ptr<char> resolvedPath(new (std::nothrow) char[PATH_MAX]{0});
+  if (resolvedPath == nullptr) {
+    return "";
+  }
+
+  auto ret = realpath(path, resolvedPath.get());
+  if (ret == nullptr) {
+    return "";
+  }
+  return resolvedPath.get();
+}
+
+char *ReadModelFile(const char *file, size_t *size) {
+  if (file == nullptr) {
+    return nullptr;
+  }
+  MS_ASSERT(size != nullptr);
+  std::ifstream ifs(RealPath(file));
+  if (!ifs.good()) {
+    return nullptr;
+  }
+
+  if (!ifs.is_open()) {
+    return nullptr;
+  }
+
+  ifs.seekg(0, std::ios::end);
+  *size = ifs.tellg();
+  std::unique_ptr<char> buf(new (std::nothrow) char[*size]);
+  if (buf == nullptr) {
+    ifs.close();
+    return nullptr;
+  }
+
+  ifs.seekg(0, std::ios::beg);
+  ifs.read(buf.get(), *size);
+  ifs.close();
+
+  return buf.release();
+}
+
+// TEST_F(TestLiteInference, Net) {
+//  auto msGraph = std::make_shared<lite::GraphDefT>();
+//  msGraph->name = "graph";
+//  auto msSubgraph = std::make_unique<lite::SubGraphDefT>();
+//  msSubgraph->name = "subGraph";
+//
+//  auto node = std::make_unique<lite::OpDefT>();
+//  node->inputIndex = {0, 1};
+//  node->outputIndex = {2};
+//  node->attr.type = lite::OpT_Add;
+//  node->attr.value = new lite::AddT;
+//  node->name = "Add";
+//  node->fmkType = lite::FmkType_CAFFE;
+//  msSubgraph->nodes.emplace_back(std::move(node));
+//
+//  msSubgraph->inputIndex = {0};
+//  msSubgraph->outputIndex = {2};
+//
+//  auto input0 = std::make_unique<lite::TensorDefT>();
+//  input0->refCount = lite::MSCONST_WEIGHT_REFCOUNT;
+//  input0->format = lite::Format_NCHW;
+//  input0->dataType = TypeId::kNumberTypeFloat;
+//  input0->dims = {1, 1, 2, 2};
+//  input0->offset = -1;
+//  msSubgraph->allTensors.emplace_back(std::move(input0));
+//
+//  auto input1 = std::make_unique<lite::TensorDefT>();
+//  input1->refCount = lite::MSCONST_WEIGHT_REFCOUNT;
+//  input1->format = lite::Format_NCHW;
+//  input1->dataType = TypeId::kNumberTypeFloat;
+//  input1->dims = {1, 1, 2, 2};
+//  input1->offset = -1;
+//  input1->data.resize(16);
+//  msSubgraph->allTensors.emplace_back(std::move(input1));
+//
+//  auto output = std::make_unique<lite::TensorDefT>();
+//  output->refCount = 0;
+//  output->format = lite::Format_NCHW;
+//  output->dims = {1, 1, 2, 2};
+//  output->offset = -1;
+//  msSubgraph->allTensors.emplace_back(std::move(output));
+//  msGraph->subgraphs.emplace_back(std::move(msSubgraph));
+//
+//  flatbuffers::FlatBufferBuilder builder(1024);
+//  auto offset = lite::GraphDef::Pack(builder, msGraph.get());
+//  builder.Finish(offset);
+//  int size = builder.GetSize();
+//  auto *content = builder.GetBufferPointer();
+//  mindspore::lite::Context context;
+//  context.allocator = nullptr;
+//  context.deviceCtx.type = mindspore::lite::DeviceType::DT_CPU;
+// #if 0
+//    auto graph = mindspore::lite::inference::LoadModel((char *)content, size);
+//
+//    auto session = mindspore::lite::inference::Session::CreateSession(&context);
+//
+//    std::vector<float> z1 = {1.1, 2.1, 3.1, 4.1};
+//    std::vector<inference::MSTensor *> inputs;
+//    auto t1 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 1, 2, 2}));
+//    memcpy_s(t1->MutableData(), z1.size() * sizeof(float), z1.data(), z1.size() * sizeof(float));
+//
+//    auto t2 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 1, 2, 2}));
+//    memcpy_s(t2->MutableData(), z1.size() * sizeof(float), z1.data(), z1.size() * sizeof(float));
+//
+//    inputs.push_back(t1);
+//    inputs.push_back(t1);
+//    //    VectorRef *outputs = new VectorRef();
+//    auto outputs = session->RunGraph(inputs);
+// #else
+//  auto file = "./efficientnet_b0.ms";
+//  size_t model_size;
+//
+//  char *modelbuf = ReadModelFile(file, &model_size);
+//  auto graph = mindspore::lite::inference::LoadModel(modelbuf, model_size);
+//  auto session = mindspore::lite::inference::Session::CreateSession(&context);
+//  session->CompileGraph(graph);
+//  std::vector<inference::MSTensor *> inputs;
+//  auto t1 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 244, 244, 3}));
+//
+//  inputs.push_back(t1);
+//  auto outputs = session->RunGraph(inputs);
+// #endif
+// }
+
+// TEST_F(TestLiteInference, Conv) {
+//   auto msGraph = std::make_shared<lite::GraphDefT>();
+//   msGraph->name = "graph";
+//   auto msSubgraph = std::make_unique<lite::SubGraphDefT>();
+//   msSubgraph->name = "subGraph";
+//
+//   auto node = std::make_unique<lite::OpDefT>();
+//   node->inputIndex = {0, 1};
+//   node->outputIndex = {2};
+//   node->attr.type = lite::OpT_Conv2D;
+//   auto attr = new lite::Conv2DT;
+//   attr->padMode = lite::PadMode_SAME;
+//   attr->channelIn = 1;
+//   attr->channelOut = 1;
+//   attr->format = lite::Format_NHWC;
+//   attr->strideH = 1;
+//   attr->strideW = 1;
+//   attr->kernelH = 2;
+//   attr->kernelW = 2;
+//
+//   node->attr.value = attr;
+//   node->name = "Conv2D";
+//   node->fmkType = lite::FmkType_CAFFE;
+//   msSubgraph->nodes.emplace_back(std::move(node));
+//
+//   msSubgraph->inputIndex = {0};
+//   msSubgraph->outputIndex = {2};
+//   // MS_LOG(ERROR) << "OutData";
+//
+//   auto input0 = std::make_unique<lite::TensorDefT>();
+//   input0->refCount = lite::MSCONST_WEIGHT_REFCOUNT;
+//   input0->format = lite::Format_NCHW;
+//   input0->dataType = TypeId::kNumberTypeFloat;
+//   input0->dims = {1, 1, 5, 5};
+//   // input0->data.resize(sizeof(float) * 25);
+//   // std::vector<float> input_data = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5};
+//   // memcpy(input0->data.data(), input_data.data(), sizeof(int) * 25);
+//   input0->offset = -1;
+//   msSubgraph->allTensors.emplace_back(std::move(input0));
+//
+//   auto weight = std::make_unique<lite::TensorDefT>();
+//   weight->refCount = lite::MSCONST_WEIGHT_REFCOUNT;
+//   weight->format = lite::Format_KHWC;
+//   weight->dataType = TypeId::kNumberTypeFloat;
+//   weight->dims = {1, 2, 2, 1};
+//   weight->data.resize(sizeof(float) * 4);
+//   std::vector<float> weight_data = {1, 2, 3, 4};
+//   memcpy(weight->data.data(), weight_data.data(), sizeof(int) * 4);
+//   weight->offset = -1;
+//   msSubgraph->allTensors.emplace_back(std::move(weight));
+//
+//   auto output = std::make_unique<lite::TensorDefT>();
+//   output->refCount = 0;
+//   output->format = lite::Format_NCHW;
+//   output->dims = {1, 1, 5, 5};
+//   output->offset = -1;
+//   msSubgraph->allTensors.emplace_back(std::move(output));
+//   msGraph->subgraphs.emplace_back(std::move(msSubgraph));
+//
+//   flatbuffers::FlatBufferBuilder builder(1024);
+//   auto offset = lite::GraphDef::Pack(builder, msGraph.get());
+//   builder.Finish(offset);
+//   int size = builder.GetSize();
+//   auto *content = builder.GetBufferPointer();
+//   mindspore::lite::Context context;
+//   context.allocator = nullptr;
+//   context.deviceCtx.type = mindspore::lite::DeviceType::DT_CPU;
+//   auto graph = mindspore::lite::inference::LoadModel((char *)content, size);
+//   auto session = mindspore::lite::inference::Session::CreateSession(&context);
+//   session->CompileGraph(graph);
+//   std::vector<inference::MSTensor *> inputs;
+//   auto t1 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 3, 244, 244}));
+//
+//   inputs.push_back(t1);
+//   auto outputs = session->RunGraph(inputs);
+// }
+
+}  // namespace mindspore
--- a/mindspore/lite/test/ut/src/infer_test.cc
+++ b/mindspore/lite/test/ut/src/infer_test.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cmath>
+#include <memory>
+#include "mindspore/lite/schema/inner/model_generated.h"
+#include "mindspore/lite/include/model.h"
+#include "common/common_test.h"
+#include "include/lite_session.h"
+#include "include/context.h"
+#include "include/errorcode.h"
+#include "mindspore/core/utils/log_adapter.h"
+
+namespace mindspore {
+class InferTest : public mindspore::Common {
+ public:
+  InferTest() {}
+};
+
+TEST_F(InferTest, TestConvNode) {
+  auto meta_graph = std::make_shared<schema::MetaGraphT>();
+  meta_graph->name = "graph";
+
+  auto node = std::make_unique<schema::CNodeT>();
+  node->inputIndex = {0, 1};
+  node->outputIndex = {2};
+  node->primitive = std::make_unique<schema::PrimitiveT>();
+  node->primitive->value.type = schema::PrimitiveType_Conv2D;
+  auto primitive = new schema::Conv2DT;
+  primitive->padMode = schema::PadMode_SAME;
+  primitive->channelIn = 3;
+  primitive->channelOut = 32;
+  primitive->format = schema::Format_NHWC;
+  primitive->strideH = 1;
+  primitive->strideW = 1;
+  primitive->kernelH = 3;
+  primitive->kernelW = 3;
+  primitive->dilateH = 1;
+  primitive->dilateW = 1;
+  node->primitive->value.value = primitive;
+  node->name = "Conv2D";
+  meta_graph->nodes.emplace_back(std::move(node));
+  meta_graph->inputIndex = {0};
+  meta_graph->outputIndex = {2};
+
+  auto input0 = std::make_unique<schema::TensorT>();
+  input0->nodeType = schema::NodeType::NodeType_ValueNode;
+  input0->format = schema::Format_NHWC;
+  input0->dataType = TypeId::kNumberTypeFloat32;
+  input0->dims = {1, 28, 28, 3};
+  input0->offset = -1;
+  meta_graph->allTensors.emplace_back(std::move(input0));
+
+  auto weight = std::make_unique<schema::TensorT>();
+  weight->nodeType = schema::NodeType::NodeType_ValueNode;
+  weight->format = schema::Format_KHWC;
+  weight->dataType = TypeId::kNumberTypeFloat32;
+  weight->dims = {32, 3, 3, 3};
+
+  auto buf = new char *[1];
+  //================================================================
+  size_t weight_size;
+  std::string weight_path = "./convfp32_weight_32_3_3_3.bin";
+  ReadFile(weight_path.c_str(), &weight_size, buf);
+  ASSERT_NE(nullptr, buf[0]);
+  auto weight_data_temp = reinterpret_cast<float *>(buf[0]);
+  ASSERT_NE(nullptr, weight_data_temp);
+  weight->data.resize(sizeof(float) * 32 * 3 * 3 * 3);
+
+  //================================================================
+  memcpy(weight->data.data(), weight_data_temp, weight_size);
+  weight->offset = -1;
+  meta_graph->allTensors.emplace_back(std::move(weight));
+
+  auto output = std::make_unique<schema::TensorT>();
+  output->nodeType = schema::NodeType::NodeType_Parameter;
+  output->format = schema::Format_NHWC;
+  output->dataType = TypeId::kNumberTypeFloat32;
+  output->dims = {1, 28, 28, 32};
+  output->offset = -1;
+  meta_graph->allTensors.emplace_back(std::move(output));
+
+  flatbuffers::FlatBufferBuilder builder(1024);
+  auto offset = schema::MetaGraph::Pack(builder, meta_graph.get());
+  builder.Finish(offset);
+  size_t size = builder.GetSize();
+  const char *content = reinterpret_cast<char *>(builder.GetBufferPointer());
+
+  auto model = lite::Model::Import(content, size);
+  ASSERT_NE(nullptr, model);
+  meta_graph.reset();
+  content = nullptr;
+  auto context = new lite::Context;
+  context->cpuBindMode = lite::NO_BIND;
+  context->deviceCtx.type = lite::DT_CPU;
+  context->threadNum = 4;
+  auto session = session::LiteSession::CreateSession(context);
+  ASSERT_NE(nullptr, session);
+  auto ret = session->CompileGraph(model.get());
+  ASSERT_EQ(lite::RET_OK, ret);
+  auto inputs = session->GetInputs();
+  ASSERT_EQ(inputs.size(), 1);
+  auto inTensor = inputs.front();
+  ASSERT_NE(nullptr, inTensor);
+  auto data = inTensor->MutableData();
+  //===================================================
+  size_t input_size;
+  std::string input_path = "./convfp32_input_1_28_28_3.bin";
+  ReadFile(input_path.c_str(), &input_size, buf);
+  ASSERT_NE(nullptr, buf[0]);
+  auto input_data = reinterpret_cast<float *>(buf[0]);
+  ASSERT_NE(nullptr, input_data);
+  //===================================================
+  ASSERT_EQ(input_size, inTensor->Size());
+  memcpy(data, input_data, input_size);
+  ret = session->RunGraph();
+  ASSERT_EQ(lite::RET_OK, ret);
+  auto outputs = session->GetOutputs();
+  ASSERT_EQ(outputs.size(), 1);
+  auto outTensor = outputs.front();
+  ASSERT_NE(nullptr, outTensor);
+  ASSERT_EQ(28 * 28 * 32, outTensor->ElementsNum());
+  ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type());
+  auto *outData = reinterpret_cast<float *>(outTensor->MutableData());
+  ASSERT_NE(nullptr, outData);
+  //===================================================
+  size_t output_size;
+  std::string output_path = "./convfp32_out_1_28_28_32.bin";
+  ReadFile(output_path.c_str(), &output_size, buf);
+  ASSERT_NE(nullptr, buf[0]);
+  auto output_data = reinterpret_cast<float *>(buf[0]);
+  ASSERT_NE(nullptr, output_data);
+  //===================================================
+  ASSERT_EQ(output_size, outTensor->Size());
+  for (size_t i = 0; i < outTensor->ElementsNum(); i++) {
+    ASSERT_EQ(output_data[i], outData[i]);
+  }
+  MS_LOG(INFO) << "Passed";
+}
+TEST_F(InferTest, TestAddNode) {
+  auto meta_graph = std::make_shared<schema::MetaGraphT>();
+  meta_graph->name = "graph";
+
+  auto node = std::make_unique<schema::CNodeT>();
+  node->inputIndex = {0, 1};
+  node->outputIndex = {2};
+  node->primitive = std::make_unique<schema::PrimitiveT>();
+  node->primitive->value.type = schema::PrimitiveType_Add;
+  auto primitive = new schema::AddT;
+  node->primitive->value.value = primitive;
+  node->name = "Add";
+  meta_graph->nodes.emplace_back(std::move(node));
+  meta_graph->inputIndex = {0, 1};
+  meta_graph->outputIndex = {2};
+
+  auto input0 = std::make_unique<schema::TensorT>();
+  input0->nodeType = schema::NodeType::NodeType_ValueNode;
+  input0->format = schema::Format_NHWC;
+  input0->dataType = TypeId::kNumberTypeFloat32;
+  input0->dims = {1, 28, 28, 3};
+  input0->offset = -1;
+  meta_graph->allTensors.emplace_back(std::move(input0));
+
+  auto weight = std::make_unique<schema::TensorT>();
+  weight->nodeType = schema::NodeType::NodeType_ValueNode;
+  weight->format = schema::Format_KHWC;
+  weight->dataType = TypeId::kNumberTypeFloat32;
+  weight->dims = {1, 28, 28, 3};
+
+  weight->offset = -1;
+  meta_graph->allTensors.emplace_back(std::move(weight));
+
+  auto output = std::make_unique<schema::TensorT>();
+  output->nodeType = schema::NodeType::NodeType_Parameter;
+  output->format = schema::Format_NHWC;
+  output->dataType = TypeId::kNumberTypeFloat32;
+  output->offset = -1;
+  meta_graph->allTensors.emplace_back(std::move(output));
+
+  flatbuffers::FlatBufferBuilder builder(1024);
+  auto offset = schema::MetaGraph::Pack(builder, meta_graph.get());
+  builder.Finish(offset);
+  size_t size = builder.GetSize();
+  const char *content = reinterpret_cast<char *>(builder.GetBufferPointer());
+
+  auto model = lite::Model::Import(content, size);
+  ASSERT_NE(nullptr, model);
+  meta_graph.reset();
+  content = nullptr;
+  auto context = new lite::Context;
+  context->cpuBindMode = lite::NO_BIND;
+  context->deviceCtx.type = lite::DT_GPU;
+  context->threadNum = 4;
+  auto session = session::LiteSession::CreateSession(context);
+  ASSERT_NE(nullptr, session);
+  auto ret = session->CompileGraph(model.get());
+  ASSERT_EQ(lite::RET_OK, ret);
+  auto inputs = session->GetInputs();
+  ASSERT_EQ(inputs.size(), 2);
+  auto inTensor = inputs.front();
+  ASSERT_NE(nullptr, inTensor);
+  (void)inTensor->MutableData();
+  auto inTensor1 = inputs.back();
+  ASSERT_NE(nullptr, inTensor1);
+  (void)inTensor1->MutableData();
+  ret = session->RunGraph();
+  ASSERT_EQ(lite::RET_OK, ret);
+  auto outputs = session->GetOutputs();
+  ASSERT_EQ(outputs.size(), 1);
+  auto outTensor = outputs.front();
+  ASSERT_NE(nullptr, outTensor);
+  ASSERT_EQ(28 * 28 * 3, outTensor->ElementsNum());
+  ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type());
+  auto *outData = reinterpret_cast<float *>(outTensor->MutableData());
+  ASSERT_NE(nullptr, outData);
+  // //===================================================
+  // size_t output_size;
+  // std::string output_path = "./convfp32_out_1_28_28_32.bin";
+  // ReadFile(output_path.c_str(), &output_size, buf);
+  // ASSERT_NE(nullptr, buf[0]);
+  // auto output_data = reinterpret_cast<float *>(buf[0]);
+  // ASSERT_NE(nullptr, output_data);
+  // //===================================================
+  // ASSERT_EQ(output_size, outTensor->Size());
+  // for (size_t i = 0; i < outTensor->ElementsNum(); i++) {
+  //   ASSERT_EQ(output_data[i], outData[i]);
+  // }
+  MS_LOG(INFO) << "Passed";
+}
+
+TEST_F(InferTest, TestModel) {
+  auto buf = new char *[1];
+  size_t model_size;
+  std::string model_path = "./model.ms";
+  ReadFile(model_path.c_str(), &model_size, buf);
+  ASSERT_NE(nullptr, buf[0]);
+
+  auto model = lite::Model::Import(buf[0], model_size);
+  ASSERT_NE(nullptr, model);
+  delete[] buf[0];
+  auto context = new lite::Context;
+  context->cpuBindMode = lite::NO_BIND;
+  context->deviceCtx.type = lite::DT_CPU;
+  context->threadNum = 4;
+  auto session = session::LiteSession::CreateSession(context);
+  ASSERT_NE(nullptr, session);
+  auto ret = session->CompileGraph(model.get());
+  ASSERT_EQ(lite::RET_OK, ret);
+  auto inputs = session->GetInputs();
+  ASSERT_EQ(inputs.size(), 1);
+  auto inTensor = inputs.front();
+  ASSERT_NE(nullptr, inTensor);
+  (void)inTensor->MutableData();
+  ret = session->RunGraph();
+  ASSERT_EQ(lite::RET_OK, ret);
+  auto outputs = session->GetOutputs();
+  MS_LOG(INFO) << "Passed";
+}
+
+// TEST_F(TrainTest, TestMultiNode) {
+//  auto msGraph = std::make_shared<schema::GraphDefT>();
+//  msGraph->name = "graph";
+//  auto msSubgraph = std::make_unique<schema::SubGraphDefT>();
+//  msSubgraph->name = "subGraph";
+//
+//  auto conv = std::make_unique<schema::OpDefT>();
+//  conv->inputIndex = {0, 1};
+//  conv->outputIndex = {2};
+//  conv->attr.type = schema::OpT_Conv2D;
+//  auto conv_attr = new schema::Conv2DT;
+//  conv_attr->padMode = schema::PadMode_SAME;
+//  conv_attr->format = schema::Format_NHWC;
+//  conv_attr->strideH = 1;
+//  conv_attr->strideW = 1;
+//  conv_attr->kernelH = 3;
+//  conv_attr->kernelW = 3;
+//  conv_attr->dilateH = 1;
+//  conv_attr->dilateW = 1;
+//
+//  conv->attr.value = conv_attr;
+//  conv->name = "Conv2D";
+//  conv->fmkType = schema::FmkType_CAFFE;
+//  msSubgraph->nodes.emplace_back(std::move(conv));
+//
+//  auto matMul1 = std::make_unique<schema::OpDefT>();
+//  matMul1->inputIndex = {2, 3};
+//  matMul1->outputIndex = {4};
+//  matMul1->attr.type = schema::OpT_MatMul;
+//  auto matMul_attr1 = new schema::MatMulT;
+//  matMul_attr1->transposeA = false;
+//  matMul_attr1->transposeB = true;
+//  matMul1->attr.value = matMul_attr1;
+//  matMul1->name = "matmul1";
+//  matMul1->fmkType = schema::FmkType_CAFFE;
+//  msSubgraph->nodes.emplace_back(std::move(matMul1));
+//
+//  auto matMul2 = std::make_unique<schema::OpDefT>();
+//  matMul2->inputIndex = {4, 5};
+//  matMul2->outputIndex = {6};
+//  matMul2->attr.type = schema::OpT_MatMul;
+//  auto matMul_attr2 = new schema::MatMulT;
+//  matMul_attr2->transposeA = false;
+//  matMul_attr2->transposeB = true;
+//  matMul2->attr.value = matMul_attr2;
+//  matMul2->name = "matmul2";
+//  matMul2->fmkType = schema::FmkType_CAFFE;
+//  msSubgraph->nodes.emplace_back(std::move(matMul2));
+//
+//  msSubgraph->inputIndex = {0};
+//  msSubgraph->outputIndex = {6};
+//
+//  auto input0 = std::make_unique<schema::TensorDefT>();
+//  input0->refCount = schema::MSCONST_WEIGHT_REFCOUNT;
+//  input0->format = schema::Format_NHWC;
+//  input0->dataType = TypeId::kNumberTypeFloat32;
+//  input0->dims = {1, 5, 5, 3};
+//  input0->offset = -1;
+//  msSubgraph->allTensors.emplace_back(std::move(input0));
+//
+//  auto conv_weight = std::make_unique<schema::TensorDefT>();
+//  conv_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT;
+//  conv_weight->format = schema::Format_KHWC;
+//  conv_weight->dataType = TypeId::kNumberTypeFloat32;
+//  conv_weight->dims = {8, 3, 3, 3};
+//  conv_weight->data.resize(8*3*3*3*sizeof(float));
+//  msSubgraph->allTensors.emplace_back(std::move(conv_weight));
+//
+//  auto conv_output = std::make_unique<schema::TensorDefT>();
+//  conv_output->refCount = 0;
+//  conv_output->format = schema::Format_NHWC;
+//  conv_output->dataType = TypeId::kNumberTypeFloat32;
+//  conv_output->dims = {1, 5, 5, 8};
+//  msSubgraph->allTensors.emplace_back(std::move(conv_output));
+//
+//  auto add_weight = std::make_unique<schema::TensorDefT>();
+//  add_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT;
+//  add_weight->format = schema::Format_NHWC;
+//  add_weight->dataType = TypeId::kNumberTypeFloat32;
+//  add_weight->dims = {1, 5, 5, 8};
+//  add_weight->data.resize(5*5*8*sizeof(float));
+//  msSubgraph->allTensors.emplace_back(std::move(add_weight));
+//
+//  auto add_output = std::make_unique<schema::TensorDefT>();
+//  add_output->refCount = 0;
+//  add_output->format = schema::Format_NHWC;
+//  add_output->dataType = TypeId::kNumberTypeFloat32;
+//  add_output->dims = {1, 5, 5, 8};
+//  msSubgraph->allTensors.emplace_back(std::move(add_output));
+//
+//  auto mul_weight = std::make_unique<schema::TensorDefT>();
+//  mul_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT;
+//  mul_weight->format = schema::Format_NHWC;
+//  mul_weight->dataType = TypeId::kNumberTypeFloat32;
+//  mul_weight->dims = {1, 5, 5, 8};
+//  mul_weight->data.resize(5*5*8*sizeof(float));
+//  msSubgraph->allTensors.emplace_back(std::move(mul_weight));
+//
+//  auto mul_output = std::make_unique<schema::TensorDefT>();
+//  mul_output->refCount = 0;
+//  mul_output->format = schema::Format_NHWC;
+//  mul_output->dataType = TypeId::kNumberTypeFloat32;
+//  mul_output->dims = {1, 5, 5, 8};
+//  msSubgraph->allTensors.emplace_back(std::move(mul_output));
+//  msGraph->subgraphs.emplace_back(std::move(msSubgraph));
+//
+//  flatbuffers::FlatBufferBuilder builder(1024);
+//  auto offset = schema::GraphDef::Pack(builder, msGraph.get());
+//  builder.Finish(offset);
+//  size_t size = builder.GetSize();
+//  const char *content = (char *)builder.GetBufferPointer();
+//  const std::string strstub = "";
+//
+//  auto func_graph = inference::LoadModel(content, size, strstub);
+//  ASSERT_NE(nullptr, func_graph);
+//  auto session = inference::MSSession::CreateSession(kCPUDevice, 0);
+//  ASSERT_NE(nullptr, session);
+//  auto graphId = session->CompileGraph(func_graph);
+//
+//  auto inTensor =
+//    std::shared_ptr<inference::MSTensor>(inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, {1, 5, 5, 3}));
+//  ASSERT_NE(nullptr, inTensor);
+//  ASSERT_EQ(sizeof(float) * (5 * 5 * 3), inTensor->Size());
+//  (void)inTensor->MutableData();
+//
+//  std::vector<std::shared_ptr<inference::MSTensor>> inputs;
+//  inputs.emplace_back(inTensor);
+//  auto outputs = session->RunGraph(graphId, inputs);
+//  ASSERT_EQ(1, outputs.size());
+//  ASSERT_EQ(1, outputs.front().size());
+//  auto runOutput = outputs.front().front();
+//  ASSERT_NE(nullptr, runOutput);
+//  ASSERT_EQ(5 * 5 * 8, runOutput->ElementsNum());
+//  ASSERT_EQ(TypeId::kNumberTypeFloat32, runOutput->data_type());
+//  MS_LOG(INFO) << "Passed";
+//}
+}  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/common/pack_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/common/pack_tests.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <memory>
+#include "mindspore/core/utils/log_adapter.h"
+#include "common/common_test.h"
+#include "mindspore/lite/src/common/file_utils.h"
+#include "mindspore/lite/src/runtime/kernel/arm/opclib/pack.h"
+#include "mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h"
+
+namespace mindspore {
+class TestPack : public mindspore::Common {
+ public:
+  TestPack() {}
+};
+
+void InitConvParamPack(ConvParameter *conv_param) {
+  conv_param->input_batch_ = 1;
+  conv_param->input_h_ = 28;
+  conv_param->input_w_ = 28;
+  conv_param->input_channel_ = 3;
+
+  conv_param->output_batch_ = 1;
+  conv_param->output_h_ = 28;
+  conv_param->output_w_ = 28;
+  conv_param->output_channel_ = 32;
+
+  conv_param->kernel_h_ = 3;
+  conv_param->kernel_w_ = 3;
+
+  conv_param->stride_h_ = 1;
+  conv_param->stride_w_ = 1;
+
+  conv_param->dilation_h_ = 1;
+  conv_param->dilation_w_ = 1;
+
+  conv_param->pad_h_ = 1;
+  conv_param->pad_w_ = 1;
+}
+
+TEST_F(TestPack, PackInputFp32) {
+  size_t input_size;
+  std::string input_path = "./test_data/conv/convfp32_input_1_28_28_3.bin";
+  auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
+  auto conv_param = new ConvParameter;
+  InitConvParamPack(conv_param);
+  int kernel_h = conv_param->kernel_h_;
+  int kernel_w = conv_param->kernel_w_;
+  int in_batch = conv_param->input_batch_;
+  int in_channel = conv_param->input_channel_;
+  int in_h = conv_param->input_h_;
+  int in_w = conv_param->input_w_;
+  int out_h = conv_param->output_h_;
+  int out_w = conv_param->output_w_;
+
+  int thread_count = 1;
+  int tile_n = 8;
+  int output_count = out_h * out_w;
+  int output_tile_count = UP_DIV(output_count, tile_n);
+
+  int inchannel_block = 4;
+  int channel_block = UP_DIV(in_channel, inchannel_block);
+  int kernel_plane = kernel_h * kernel_w;
+  int unit_size = kernel_plane * channel_block * inchannel_block;
+  int packed_input_size = output_tile_count * tile_n * unit_size;
+
+  auto packed_input = reinterpret_cast<float *>(malloc(in_batch * packed_input_size * sizeof(float)));
+  memset(packed_input, 0, in_batch * packed_input_size * sizeof(float));
+
+  for (int b = 0; b < in_batch; b++) {
+    int in_batch_offset = b * in_channel * in_h * in_w;
+    int gemm_in_batch_offset = b * packed_input_size;
+    for (int thread_id = 0; thread_id < output_tile_count; thread_id += thread_count) {
+      int start_index = thread_id * tile_n;
+      int real_cal_num = (output_count - start_index) < tile_n ? (output_count - tile_n) : tile_n;
+      float *gemm_input =
+        reinterpret_cast<float *>(packed_input) + thread_id * unit_size * tile_n + gemm_in_batch_offset;
+      Im2ColPackUnitFp32(input_data + in_batch_offset, conv_param, gemm_input, real_cal_num, start_index);
+    }
+  }
+
+  printf("==================output data=================\n");
+  for (int i = 0; i < 20; i++) {
+    std::cout << packed_input[i] << " ,";
+  }
+  std::cout << std::endl;
+
+  std::string file_path = "./test_data/conv/convfp32_packinput.txt";
+  // mindspore::lite::WriteToTxt<float>(file_path, packed_data, in_batch * packed_input_size);
+
+  delete input_data;
+  delete conv_param;
+  free(packed_input);
+  MS_LOG(INFO) << "TestPackInputFp32 passed";
+}
+
+TEST_F(TestPack, PackWeightFp32) {
+  auto conv_param = new ConvParameter;
+  InitConvParamPack(conv_param);
+
+  int k_h = conv_param->kernel_h_;
+  int k_w = conv_param->kernel_w_;
+  int in_channel = conv_param->input_channel_;
+  int out_channel = conv_param->output_channel_;
+  int ic4 = UP_DIV(in_channel, C4NUM);
+  int oc8 = UP_DIV(out_channel, C8NUM);
+
+  size_t weight_size;
+  std::string weight_path = "./test_data/conv/convfp32_weight_32_3_3_3.bin";
+  auto weight_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size));
+  auto packed_weight = reinterpret_cast<float *>(malloc(k_h * k_w * ic4 * C4NUM * oc8 * C8NUM * sizeof(float)));
+  PackWeightFp32(weight_data, conv_param, packed_weight);
+
+  printf("==================output data=================\n");
+  for (int i = 0; i < 20; i++) {
+    std::cout << packed_weight[i] << " ,";
+  }
+  std::cout << std::endl;
+
+  free(packed_weight);
+  delete conv_param;
+
+  MS_LOG(INFO) << "TestPackWeightFp32 passed";
+}
+
+#ifdef ENABLE_FP16
+TEST_F(TestPack, PackInputFp16) {
+  // todo
+  size_t input_size;
+  std::string input_path = "./test_data/conv/convfp32_input_1_28_28_3.bin";
+  auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
+  int input_ele_size = input_size / sizeof(float);
+  auto fp16_input_data = new float16_t[input_ele_size];
+  for (int i = 0; i < input_ele_size; i++) {
+    fp16_input_data[i] = (float16_t)input_data[i];
+  }
+
+  auto conv_param = new ConvParameter;
+  InitConvParamPack(conv_param);
+  int kernel_h = conv_param->kernel_h_;
+  int kernel_w = conv_param->kernel_w_;
+  int in_batch = conv_param->input_batch_;
+  int in_channel = conv_param->input_channel_;
+  int in_h = conv_param->input_h_;
+  int in_w = conv_param->input_w_;
+  int out_h = conv_param->output_h_;
+  int out_w = conv_param->output_w_;
+
+  int thread_count = 1;
+  int tile_n = 16;
+  int output_count = out_h * out_w;
+  int output_tile_count = UP_DIV(output_count, tile_n);
+
+  int inchannel_block = 8;
+  int channel_block = UP_DIV(in_channel, inchannel_block);
+  int kernel_plane = kernel_h * kernel_w;
+  int unit_size = kernel_plane * channel_block * inchannel_block;
+  int packed_input_size = output_tile_count * tile_n * unit_size;
+
+  auto packed_input = reinterpret_cast<float *>(malloc(in_batch * packed_input_size * sizeof(float16_t)));
+  memset(packed_input, 0, in_batch * packed_input_size * sizeof(float16_t));
+
+  for (int b = 0; b < in_batch; b++) {
+    int in_batch_offset = b * in_channel * in_h * in_w;
+    int gemm_in_batch_offset = b * packed_input_size;
+    for (int thread_id = 0; thread_id < output_tile_count; thread_id += thread_count) {
+      int start_index = thread_id * tile_n;
+      int real_cal_num = (output_count - start_index) < tile_n ? (output_count - tile_n) : tile_n;
+      float16_t *gemm_input =
+        reinterpret_cast<float16_t *>(packed_input) + thread_id * unit_size * tile_n + gemm_in_batch_offset;
+      Im2ColPackUnitFp16(fp16_input_data + in_batch_offset, conv_param, gemm_input, real_cal_num, start_index);
+    }
+  }
+
+  printf("==================output data=================\n");
+  for (int i = 0; i < 20; i++) {
+    std::cout << packed_input[i] << " ,";
+  }
+  std::cout << std::endl;
+
+  delete input_data;
+  delete[] fp16_input_data;
+  delete conv_param;
+  delete packed_input;
+  MS_LOG(INFO) << "TestPackInputFp16 passed";
+}
+#endif
+
+TEST_F(TestPack, PackInputUint8) {
+  auto conv_param = new ConvParameter;
+  InitConvParamPack(conv_param);
+  int kernel_h = conv_param->kernel_h_;
+  int kernel_w = conv_param->kernel_w_;
+  int in_batch = conv_param->input_batch_;
+  int in_channel = conv_param->input_channel_;
+  int in_h = conv_param->input_h_;
+  int in_w = conv_param->input_w_;
+  int out_h = conv_param->output_h_;
+  int out_w = conv_param->output_w_;
+
+  int thread_count = 1;
+  int tile_n = 8;
+  int output_count = out_h * out_w;
+  int output_tile_count = UP_DIV(output_count, tile_n);
+
+  int inchannel_block = 4;
+  int channel_block = UP_DIV(in_channel, inchannel_block);
+  int kernel_plane = kernel_h * kernel_w;
+  int unit_size = kernel_plane * channel_block * inchannel_block;
+  int packed_input_size = output_tile_count * tile_n * unit_size;
+
+  // input
+  size_t input_size;
+  std::string input_path = "./test_data/conv/convuint8_input_1_28_28_3.bin";
+  auto input_data = reinterpret_cast<uint8_t *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
+  auto int8_input = reinterpret_cast<int8_t *>(malloc(input_size));
+  for (int i = 0; i < input_size; i++) {
+    int8_input[i] = (int8_t)(input_data[i] - 128);
+  }
+  auto packed_input = reinterpret_cast<int8_t *>(malloc(in_batch * packed_input_size));
+  memset(packed_input, 0, in_batch * packed_input_size);
+  int32_t *input_sum = reinterpret_cast<int32_t *>(malloc(tile_n * thread_count * sizeof(int32_t)));
+
+  for (int b = 0; b < in_batch; b++) {
+    int in_batch_offset = b * in_channel * in_h * in_w;
+    int gemm_in_batch_offset = b * packed_input_size;
+    for (int thread_id = 0; thread_id < output_tile_count; thread_id += thread_count) {
+      int start_index = thread_id * tile_n;
+      int real_cal_num = (output_count - start_index) < tile_n ? (output_count - tile_n) : tile_n;
+      int8_t *gemm_input =
+        reinterpret_cast<int8_t *>(packed_input) + thread_id * unit_size * tile_n + gemm_in_batch_offset;
+      memset(input_sum, 0, tile_n * thread_count * sizeof(int32_t));
+      Im2ColPackUnitInt8(int8_input + in_batch_offset, gemm_input, real_cal_num, start_index, input_sum, conv_param);
+    }
+  }
+
+  printf("==================output data=================\n");
+  for (int i = 0; i < 20; i++) {
+    std::cout << static_cast<int>(packed_input[i]) << " ,";
+  }
+  std::cout << std::endl;
+
+  delete input_data;
+  delete conv_param;
+  free(int8_input);
+  free(packed_input);
+  free(input_sum);
+  MS_LOG(INFO) << "TestPackInputUint8 passed";
+}
+
+TEST_F(TestPack, PackWeightUint8) {
+  auto conv_param = new ConvParameter;
+  InitConvParamPack(conv_param);
+
+  int k_h = conv_param->kernel_h_;
+  int k_w = conv_param->kernel_w_;
+  int in_channel = conv_param->input_channel_;
+  int out_channel = conv_param->output_channel_;
+  int ic4 = UP_DIV(in_channel, C4NUM);
+  int oc4 = UP_DIV(out_channel, C4NUM);
+
+  size_t weight_size;
+  std::string weight_path = "./test_data/conv/convuint8_weight_32_3_3_3.bin";
+  auto weight_data = reinterpret_cast<uint8_t *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size));
+  auto int8_weight = reinterpret_cast<int8_t *>(malloc(weight_size));
+  for (int i = 0; i < weight_size; i++) {
+    int8_weight[i] = (int8_t)(weight_data[i] - 128);
+  }
+  int32_t filter_zp = 20;
+
+  int32_t *weight_sum = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t) * out_channel));
+  for (int i = 0; i < out_channel; i++) weight_sum[i] = filter_zp * ic4 * C4NUM * k_h * k_w;
+  auto packed_weight = reinterpret_cast<int8_t *>(malloc(k_h * k_w * ic4 * C4NUM * oc4 * C4NUM));
+  PackWeightInt8(int8_weight, conv_param, packed_weight, weight_sum);
+
+  printf("==================output data=================\n");
+  for (int i = 0; i < 20; i++) {
+    std::cout << static_cast<int>(packed_weight[i]) << " ,";
+  }
+  std::cout << std::endl;
+
+  free(weight_sum);
+  free(int8_weight);
+  free(packed_weight);
+  delete conv_param;
+
+  MS_LOG(INFO) << "TestPackWeightUint8 passed";
+}
+}  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp16/convolution_fp16_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp16/convolution_fp16_tests.cc
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <iostream>
+#include "mindspore/core/utils/log_adapter.h"
+#include "common/common_test.h"
+#include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/activation.h"
+#include "mindspore/lite/src/kernel_registry.h"
+#include "mindspore/lite/src/lite_kernel.h"
+
+namespace mindspore {
+
+class TestActivationFp32 : public mindspore::Common {
+ public:
+  TestActivationFp32() {}
+};
+
+TEST_F(TestActivationFp32, ReluFp32) {
+  float input[8] = {-3, -2, -1, 0, 1, 5, 6, 7};
+  float output[8] = {0};
+  Relu(input, 8, output);
+  float expect[8] = {0, 0, 0, 0, 1, 5, 6, 7};
+  for (int i = 0; i < 8; ++i) {
+    ASSERT_EQ(output[i], expect[i]);
+  }
+}
+
+TEST_F(TestActivationFp32, Relu6Fp32) {
+  float input[8] = {-3, -2, -1, 0, 1, 5, 6, 7};
+  float output[8] = {0};
+  Relu6(input, 8, output);
+  float expect[8] = {0, 0, 0, 0, 1, 5, 6, 6};
+  for (int i = 0; i < 8; ++i) {
+    ASSERT_EQ(output[i], expect[i]);
+  }
+  MS_LOG(INFO) << "TestActivationFp32 passed";
+}
+
+TEST_F(TestActivationFp32, LReluFp32) {
+  float input[8] = {-3, -2, -1, 0, 1, 5, 6, 7};
+  float output[8] = {0};
+  LRelu(input, 8, output, 0.01);
+  float expect[8] = {-0.03, -0.02, -0.01, 0, 1, 5, 6, 7};
+  for (int i = 0; i < 8; ++i) {
+    ASSERT_EQ(output[i], expect[i]);
+  }
+  MS_LOG(INFO) << "TestActivationFp32 passed";
+}
+
+TEST_F(TestActivationFp32, SigmoidFp32) {
+  float input[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+  float output[8] = {0};
+  Sigmoid(input, 8, output);
+
+  // expect output {0.5, 0.731059, 0.880797, 0.952574, 0.982014, 0.993307, 0.997527, 0.999089};
+  printf("==================output data=================\n");
+  for (int i = 0; i < 8; ++i) {
+    std::cout << output[i] << " ";
+  }
+  std::cout << std::endl;
+  MS_LOG(INFO) << "TestSigmoidFp32 passed";
+}
+
+TEST_F(TestActivationFp32, TanhFp32) {
+  float input[7] = {-3, -2, -1, 0, 1, 2, 3};
+  float output[7] = {0};
+  Tanh(input, 7, output);
+  float expect[8] = {-0.995055, -0.964028, -0.761594, 0.000000, 0.761594, 0.964028, 0.995055};
+  for (int i = 0; i < 8; ++i) {
+    EXPECT_NEAR(output[i], expect[i], 0.00001);
+  }
+  MS_LOG(INFO) << "TanhFp32 passed";
+}
+
+TEST_F(TestActivationFp32, HSwishFp32) {
+  std::vector<lite::tensor::Tensor *> inputs_tensor;
+  std::vector<lite::tensor::Tensor *> outputs_tensor;
+
+  ActivationParameter op_param;
+  op_param.op_parameter_.type_ = schema::PrimitiveType_Activation;
+  op_param.type_ = schema::ActivationType_HSWISH;
+  op_param.alpha_ = 0.01;
+
+  std::vector<float> input = {-3.0, -2.0, -1.0, 0.0, 1.0, 5.0, 6.0, 7.0};
+  std::vector<int> in_shape = {8};
+
+  lite::tensor::Tensor input0_tensor;
+  inputs_tensor.push_back(&input0_tensor);
+  input0_tensor.SetData(input.data());
+  input0_tensor.set_shape(in_shape);
+
+  std::vector<float> output(8);
+  std::vector<int> output_shape = {8};
+
+  lite::tensor::Tensor output0_tensor;
+  outputs_tensor.push_back(&output0_tensor);
+  output0_tensor.SetData(output.data());
+
+  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, schema::PrimitiveType_Activation};
+  auto creator = lite::KernelRegistry::GetInstance()->GetKernelCreator(desc);
+  ASSERT_NE(creator, nullptr);
+  lite::Context ctx;
+  ctx.threadNum = 7;
+  kernel::LiteKernel *kernel =
+    creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc);
+  ASSERT_NE(kernel, nullptr);
+  auto output_tensor_shape = output0_tensor.shape();
+  kernel->Run();
+
+  std::vector<float> expect_output = {-0, -0.33333334, -0.33333334, 0, 0.6666667, 5, 6, 7};
+  CompareOutputData(output.data(), expect_output.data(), 8, 0.00001);
+
+  input0_tensor.SetData(nullptr);
+  output0_tensor.SetData(nullptr);
+}
+}  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/add_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/add_int8_tests.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <memory>
+#include "common/common_test.h"
+#include "mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h"
+#include "mindspore/lite/src/kernel_registry.h"
+#include "mindspore/lite/include/context.h"
+
+namespace mindspore {
+class TestQuantizedAdd : public mindspore::Common {
+ public:
+  TestQuantizedAdd() {}
+};
+
+TEST_F(TestQuantizedAdd, Add) {
+  lite::tensor::Tensor in_tensor0(kNumberTypeInt8, {1, 1, 2, 5});
+  lite::tensor::Tensor in_tensor1(kNumberTypeInt8, {1, 1, 2, 5});
+  lite::tensor::Tensor out_tensor(kNumberTypeInt8, {1, 1, 2, 5});
+
+  int8_t input_data0[] = {-102, 25, -51, 89, -102, 25, -51, 89, -102, 25};  // -0.8 0.2 -0.4 0.7
+  int8_t input_data1[] = {38, 51, 64, -102, 38, 51, 64, -102, 38, 51};  // 0.3 0.4 0.5 -0.8
+  int8_t output_data[10] = {0};
+  in_tensor0.SetData(input_data0);
+  in_tensor1.SetData(input_data1);
+  out_tensor.SetData(output_data);
+
+  const lite::tensor::QuantArg quant_in0 = {0.00784314f, 0};  // -1.0--1.0 -> 0--255
+  const lite::tensor::QuantArg quant_in1 = {0.00784314f, 0};
+  const lite::tensor::QuantArg quant_out = {0.00784314f, 0};
+  in_tensor0.AddQuantParam(quant_in0);
+  in_tensor1.AddQuantParam(quant_in1);
+  out_tensor.AddQuantParam(quant_out);
+
+  std::vector<lite::tensor::Tensor *> inputs = {&in_tensor0, &in_tensor1};
+  std::vector<lite::tensor::Tensor *> outputs = {&out_tensor};
+
+  OpParameter parameter = {};
+  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, schema::PrimitiveType_Add};
+
+  auto creator = lite::KernelRegistry::GetInstance()->GetKernelCreator(desc);
+  ASSERT_NE(creator, nullptr);
+
+  auto ctx = std::make_shared<lite::Context>();
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc);
+  ASSERT_NE(kernel, nullptr);
+
+  auto ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  int8_t expect0[10] = {-64, 76, 13, -13, -64, 76, 13, -13, -64, 76};  // -0.5 0.6 0.1 -0.1
+  for (int i = 0; i < 10; ++i) {
+    EXPECT_EQ(output_data[i], expect0[i]);
+  }
+
+  in_tensor0.SetData(nullptr);
+  in_tensor1.SetData(nullptr);
+  out_tensor.SetData(nullptr);
+}
+}  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_0.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_0.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_1.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_1.bin
+L[?-"R>q>{B>?yx?_>JSD>G0?
\ No newline at end of file
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_2.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_2.bin
+J[q?P?>g?A?>oo?7G?x<"?
\ No newline at end of file
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_3.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_3.bin
+WU>X8?*?!v>F>0?.<C?d?
\ No newline at end of file
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_4.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_input_4.bin
+R?]?>c~?um?z1->??'?U?
\ No newline at end of file
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_out.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/batchnorm/fusedBatchnorm_out.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/2conv1x1conv1_input_nc4hwc4.txt
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/2conv1x1conv1_input_nc4hwc4.txt
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/conv1x1fp32_bias1_nhwc.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/conv1x1fp32_bias1_nhwc.bin
+:eQݿc?p@E(o=ű*΢=͕^C?-?=@$?(W!=+>@@?	-JP ?k?M
+wq>3=Rj
@E%@!H￸l==\j/m2>b@B
\ No newline at end of file
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/conv1x1fp32_input1_nhwc.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/conv1x1fp32_input1_nhwc.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/conv1x1fp32_output1_nhwc.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/conv1x1fp32_output1_nhwc.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/conv1x1fp32_weight1_nhwc.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/conv1x1fp32_weight1_nhwc.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32.tflite
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32.tflite
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_input_1_128_128_24.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_input_1_128_128_24.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_input_1_128_128_32.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_input_1_128_128_32.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_input_1_28_28_3.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_input_1_28_28_3.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_input_1_3_28_28.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_input_1_3_28_28.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_out_1_127_127_24.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_out_1_127_127_24.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_out_1_128_128_32.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_out_1_128_128_32.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_out_1_28_28_32.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_out_1_28_28_32.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_weight_24_3_3_24.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_weight_24_3_3_24.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_weight_32_3_3_3.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_weight_32_3_3_3.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_weight_32_3_3_32.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_weight_32_3_3_32.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/inception_v1_quant.tflite
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/inception_v1_quant.tflite
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/mv1_quant.tflite
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/mv1_quant.tflite
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/uint8_1_224_224_3.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/uint8_1_224_224_3.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/uint8_1_28_28_16.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/uint8_1_28_28_16.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/uint8_out_1_112_112_32.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/uint8_out_1_112_112_32.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/uint8_out_1_28_28_32.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/uint8_out_1_28_28_32.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/convDw/convDwfp32_input.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/convDw/convDwfp32_input.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/convDw/convDwfp32_output.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/convDw/convDwfp32_output.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/convDw/convDwfp32_weight.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/convDw/convDwfp32_weight.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nchw_bias1.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nchw_bias1.bin
+ꜿX>+?6@?ܕe?;ܿ?ƄR??ýP`ڄ=??Cm?H?<<?<C?=N?턾Ͽ׿B>ڈ3?:v	?-?
\ No newline at end of file
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nchw_output1.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nchw_output1.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nchw_weight1.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nchw_weight1.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nhwc_input1.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconv_fp32_nhwc_input1.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconvDw/deconvDw_input.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconvDw/deconvDw_input.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconvDw/deconvDw_output.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconvDw/deconvDw_output.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconvDw/deconvDw_weight.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconvDw/deconvDw_weight.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/FcFp32_bias1.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/FcFp32_bias1.bin
+3:?i߾te??6\X`^j6@h'>ݼ>6?%Fm?|C)@l>F@vC(*n6@{?@¹,~@d6>@(R@g?8@
\ No newline at end of file
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/FcFp32_input1.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/FcFp32_input1.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/FcFp32_output1.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/FcFp32_output1.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/FcFp32_weight1.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/matmul/FcFp32_weight1.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/inception_v1_quant/inception_v1_224_quant.tflite
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/inception_v1_quant/inception_v1_224_quant.tflite
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant.ckpt.data-00000-of-00001
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant.ckpt.data-00000-of-00001
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant.ckpt.index
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant.ckpt.index
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant.ckpt.meta
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant.ckpt.meta
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant.tflite
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant.tflite
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant_eval.pbtxt
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant_eval.pbtxt
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant_info.txt
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/models/mobilenet_quant/mobilenet_v1_1.0_224_quant_info.txt
+Model: mobilenet_v1_1.0_224_quant
+Input: input
+Output: MobilenetV1/Predictions/Reshape_1
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avg_pool_1_128_128_24.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avg_pool_1_128_128_24.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avgpoolingfp32_out_1_28_28_3.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/avgpoolingfp32_out_1_28_28_3.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/max_pool_1_128_128_24.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/max_pool_1_128_128_24.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolingfp32_out_1_28_28_3.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/pooling/maxpoolingfp32_out_1_28_28_3.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxfp32_out_1_28_28_3.bin
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxfp32_out_1_28_28_3.bin
--- a/mindspore/lite/test/ut/src/runtime/kernel/common_utils_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/common_utils_test.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include "common/common_test.h"
+#include "backend/kernel_compiler/common_utils.h"
+
+namespace mindspore {
+namespace kernel {
+class CommonUtilTest : public mindspore::Common {
+ public:
+  CommonUtilTest() = default;
+};
+
+TEST_F(CommonUtilTest, BucketReduceSparseGradient1) {
+  // The indices is a vector and the grad is a tensor with shape (6, 2)
+  /* 0
+   * 0
+   * 1
+   * 1
+   * 0
+   * 3
+   */
+  std::vector<int> indices{0, 0, 1, 1, 0, 3};
+  /* 0 1
+   * 2 3
+   * 4 5
+   * 6 7
+   * 8 9
+   * 10 11
+   */
+  std::vector<float> grad;
+  for (int i = 0; i < 6 * 2; i++) {
+    grad.push_back(i);
+  }
+  std::vector<int> unique_indices(6);
+  std::vector<float> summed_grad(12);
+  std::vector<int> tmp_indices(6);
+  std::vector<float> tmp_grad(12);
+
+  SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 6});
+  SparseGradient workspace_grad({tmp_grad.data(), tmp_indices.data(), 6});
+  SparseGradient input_grad({grad.data(), indices.data(), 6});
+
+  ReduceSparseGradientParam param;
+  param.input_grad_ = &input_grad;
+  param.workspace_grad_ = &workspace_grad;
+  param.output_grad_ = &unique_grad;
+  param.max_index_ = 6;
+  param.value_stride_ = 2;
+  BucketReduceSparseGradient(param);
+
+  EXPECT_EQ(unique_grad.indices_size_, 3);
+  std::vector<int> expect_indices({0, 1, 3});
+  for (size_t i = 0; i < unique_grad.indices_size_; ++i) {
+    EXPECT_EQ(unique_grad.indices_[i], expect_indices[i]);
+  }
+  /* 10 13
+   * 10 12
+   * 10 11
+   */
+  std::vector<int> expect_value({10, 13, 10, 12, 10, 11});
+  for (size_t i = 0; i < unique_grad.indices_size_ * 2; ++i) {
+    EXPECT_EQ(unique_grad.value_[i], expect_value[i]);
+  }
+}
+
+TEST_F(CommonUtilTest, BucketReduceSparseGradient2) {
+  // The indices is a vector and the grad is a tensor with shape (6, 2)
+  /* 0
+   * 0
+   * 1
+   * 1
+   * 0
+   * 6
+   */
+  std::vector<int> indices{0, 0, 1, 1, 0, 6};
+  /* 0 1
+   * 2 3
+   * 4 5
+   * 6 7
+   * 8 9
+   * 10 11
+   */
+  std::vector<float> grad;
+  for (int i = 0; i < 6 * 2; i++) {
+    grad.push_back(i);
+  }
+  std::vector<int> unique_indices(6);
+  std::vector<float> summed_grad(12);
+  std::vector<int> tmp_indices(6);
+  std::vector<float> tmp_grad(12);
+  SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 6});
+  SparseGradient workspace_grad({tmp_grad.data(), tmp_indices.data(), 6});
+  SparseGradient input_grad({grad.data(), indices.data(), 6});
+
+  ReduceSparseGradientParam param;
+  param.input_grad_ = &input_grad;
+  param.workspace_grad_ = &workspace_grad;
+  param.output_grad_ = &unique_grad;
+  param.max_index_ = 6;
+  param.value_stride_ = 2;
+  BucketReduceSparseGradient(param);
+
+  EXPECT_EQ(unique_grad.indices_size_, 2);
+
+  std::vector<int> expect_indices({0, 1});
+  for (size_t i = 0; i < unique_grad.indices_size_; ++i) {
+    EXPECT_EQ(unique_grad.indices_[i], expect_indices[i]);
+  }
+
+  /* 10 13
+   * 10 12
+   */
+  std::vector<int> expect_value({10, 13, 10, 12});
+  for (size_t i = 0; i < unique_grad.indices_size_ * 2; ++i) {
+    EXPECT_EQ(unique_grad.value_[i], expect_value[i]);
+  }
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <iostream>
+#include <memory>
+#include "mindspore/core/utils/log_adapter.h"
+#include "common/common_test.h"
+#include "mindspore/lite/src/common/file_utils.h"
+#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
+#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
+#include "mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h"
+
+// using namespace mindspore::kernel;
+// using namespace mindspore::lite;
+// using namespace mindspore;
+
+namespace mindspore {
+class TestMatMulOpenCL : public mindspore::Common {
+ public:
+  TestMatMulOpenCL() {}
+};
+
+TEST_F(TestMatMulOpenCL, MatMulFp32) {
+  auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
+  ocl_runtime->Init();
+  size_t input_size;
+  int ci = 1280;
+  int co = 1001;
+  std::string input_path = "./test_data/matmul/matmul_fp32_input.bin";
+  auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
+
+  size_t weight_size;
+  std::string weight_path = "./test_data/matmul/matmul_fp32_weight.bin";
+  auto weight_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size));
+
+  lite::tensor::Tensor *tensor_x = new lite::tensor::Tensor(TypeId(kNumberTypeFloat32), {1, ci});
+
+  lite::tensor::Tensor *tensor_w = new lite::tensor::Tensor(TypeId(kNumberTypeFloat32), {co, ci});
+  tensor_w->SetData(weight_data);
+
+  lite::tensor::Tensor *tensor_out = new lite::tensor::Tensor(TypeId(kNumberTypeFloat32), {1, co});
+  std::vector<lite::tensor::Tensor *> inputs{tensor_x, tensor_w};
+  std::vector<lite::tensor::Tensor *> outputs{tensor_out};
+  auto *arith_kernel = new MatMulOpenCLKernel(nullptr, inputs, outputs, false);
+  arith_kernel->Init();
+
+  std::vector<LiteKernel *> kernels{arith_kernel};
+  auto *pGraph = new SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
+  pGraph->Init();
+
+  memcpy(inputs[0]->Data(), input_data, sizeof(float) * ci);
+  pGraph->Run();
+
+  printf("==================output data=================\n");
+  float *output_data = reinterpret_cast<float *>(tensor_out->Data());
+  std::cout << std::endl;
+  for (int i = 0; i < co; i++) {
+    std::cout << output_data[i] << ", ";
+  }
+  std::cout << std::endl;
+
+  size_t output_size;
+  std::string output_path = "./test_data/matmul/matmul_fp32_output.bin";
+  auto correct_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(output_path.c_str(), &output_size));
+
+  // compare
+  CompareOutputData(output_data, correct_data, co * sizeof(float), 0.00001);
+
+  delete input_data;
+  delete weight_data;
+  delete tensor_x;
+  delete tensor_w;
+  delete tensor_out;
+  delete correct_data;
+  MS_LOG(INFO) << "TestMatMulFp32 passed";
+}
+}  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/opencl_kernel_tests.h
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/opencl_kernel_tests.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include "common/common_test.h"
+#include "mindspore/core/utils/log_adapter.h"
+#include "mindspore/lite/src/common/file_utils.h"
+#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
+#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
+
+#ifndef TESTS_UT_OPENCL_KERNLE_TESTS_H
+#define TESTS_UT_OPENCL_KERNLE_TESTS_H
+
+namespace mindspore {
+
+class TestOpenCLKernel : public mindspore::Common {
+ public:
+  TestOpenCLKernel() {}
+};
+
+}  // namespace mindspore
+#endif  // TESTS_UT_OPENCL_KERNLE_TESTS_H
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_cl_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_cl_tests.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <iostream>
+#include <memory>
+#include "mindspore/core/utils/log_adapter.h"
+#include "common/common_test.h"
+#include "mindspore/lite/src/common/file_utils.h"
+#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
+#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
+#include "mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h"
+
+// using namespace mindspore::kernel;
+// using namespace mindspore::lite;
+// using namespace mindspore;
+
+namespace mindspore {
+
+class TestSoftmaxOpenCL : public mindspore::Common {};
+
+void InitSoftaxParam(SoftmaxParameter *param) { param->axis_ = -1; }
+
+TEST_F(TestSoftmaxOpenCL, SoftmaxFp32) {
+  std::cout << "======" << std::endl;
+  MS_LOG(INFO) << "start TEST_F TestSoftmaxOpenCL";
+  auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
+  ocl_runtime->Init();
+
+  MS_LOG(INFO) << "create SoftmaxParameter";
+  auto param = new SoftmaxParameter();
+  InitSoftaxParam(param);
+
+  MS_LOG(INFO) << "create Tensors";
+  std::vector<int> shape_in = {1, 2, 2, 1};
+  std::vector<int> shape_out = {1, 2, 2, 1};
+  auto data_type = kNumberTypeFloat32;
+  auto tensorType = schema::NodeType_ValueNode;
+  lite::tensor::Tensor *tensor_in = new lite::tensor::Tensor(data_type, shape_in, schema::Format_NCHW, tensorType);
+  lite::tensor::Tensor *tensor_out = new lite::tensor::Tensor(data_type, shape_out, schema::Format_NCHW, tensorType);
+  std::vector<lite::tensor::Tensor *> inputs{tensor_in};
+  std::vector<lite::tensor::Tensor *> outputs{tensor_out};
+
+  MS_LOG(INFO) << "create OpenCL Kernel";
+  auto *Softmax_kernel = new SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
+  Softmax_kernel->Init();
+  std::vector<LiteKernel *> kernels{Softmax_kernel};
+
+  MS_LOG(INFO) << "create SubGraphOpenCLKernel";
+  auto *pGraph = new SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
+  pGraph->Init();
+
+  MS_LOG(INFO) << "initialize data";
+  std::vector<lite::tensor::Tensor *> tensor_map = {tensor_in};
+  for (auto &tensor_file : tensor_map) {
+    auto tensor = tensor_file;
+    size_t size = tensor->Size();
+    const float data[4] = {std::log(1.0f), std::log(2.0f), std::log(3.0f), std::log(4.0f)};
+    memcpy(tensor->Data(), data, size);
+  }
+
+  MS_LOG(INFO) << "pGraph->Run()";
+  pGraph->Run();
+
+  MS_LOG(INFO) << "==================output data=================";
+  float *output_data = reinterpret_cast<float *>(tensor_out->Data());
+  size_t output_size = tensor_out->Size();
+
+  printf("output:");
+  for (int i = 0; i < 4; i++) {
+    printf("%.3f ", output_data[i]);
+  }
+  printf("\n");
+  float expect[4] = {1.0f, 2.0f, 3.0f, 4.0f};
+
+  for (int i = 0; i < tensor_out->ElementsNum(); ++i) {
+    if (std::fabs(output_data[i] - expect[i]) > 1e-5) {
+      printf("idx[%d] except=%.3f output=%.3f .", i, expect[i], output_data[i]);
+    }
+  }
+  printf("\nTest all close OK for %zu!\n", output_size);
+  lite::CompareOutputData(output_data, expect, 4);
+}
+
+}  // namespace mindspore
--- a/mindspore/lite/test/ut/src/train_test.cc
+++ b/mindspore/lite/test/ut/src/train_test.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cmath>
+#include <iostream>
+#include <memory>
+#include "utils/base_ref_utils.h"
+#include "mindspore/lite/schema/inner/model_generated.h"
+#include "mindspore/lite/src/train/model_impl.h"
+#include "mindspore/lite/include/model.h"
+#include "mindspore/lite/src/train/train_session.h"
+#include "common/common_test.h"
+#include "mindspore/core/utils/log_adapter.h"
+
+namespace mindspore {
+class TrainTest : public mindspore::Common {
+ public:
+  TrainTest() {}
+};
+
+TEST_F(TrainTest, TestConvNode) {
+  auto meta_graph = std::make_shared<schema::MetaGraphT>();
+  meta_graph->name = "graph";
+
+  auto node = std::make_unique<schema::CNodeT>();
+  node->inputIndex = {0, 1};
+  node->outputIndex = {2};
+  node->primitive = std::make_unique<schema::PrimitiveT>();
+  node->primitive->value.type = schema::PrimitiveType_Conv2D;
+  auto primitive = new schema::Conv2DT;
+  primitive->padMode = schema::PadMode_SAME;
+  primitive->channelIn = 3;
+  primitive->channelOut = 32;
+  primitive->format = schema::Format_NHWC;
+  primitive->strideH = 1;
+  primitive->strideW = 1;
+  primitive->kernelH = 3;
+  primitive->kernelW = 3;
+  primitive->dilateH = 1;
+  primitive->dilateW = 1;
+  node->primitive->value.value = primitive;
+  node->name = "Conv2D";
+  meta_graph->nodes.emplace_back(std::move(node));
+  meta_graph->inputIndex = {0};
+  meta_graph->outputIndex = {2};
+
+  auto input0 = std::make_unique<schema::TensorT>();
+  input0->nodeType = schema::NodeType::NodeType_Parameter;  // todo use ValueNode?
+  input0->format = schema::Format_NHWC;
+  input0->dataType = TypeId::kNumberTypeFloat32;
+  input0->dims = {1, 28, 28, 3};
+  input0->offset = -1;
+  meta_graph->allTensors.emplace_back(std::move(input0));
+
+  auto weight = std::make_unique<schema::TensorT>();
+  weight->nodeType = schema::NodeType::NodeType_ValueNode;
+  weight->format = schema::Format_KHWC;
+  weight->dataType = TypeId::kNumberTypeFloat32;
+  weight->dims = {32, 3, 3, 3};
+
+  auto buf = new char *[1];
+  //================================================================
+  size_t weight_size;
+  std::string weight_path = "./convfp32_weight_32_3_3_3.bin";
+  ReadFile(weight_path.c_str(), &weight_size, buf);
+  ASSERT_NE(nullptr, buf[0]);
+  auto weight_data_temp = reinterpret_cast<float *>(buf[0]);
+  ASSERT_NE(nullptr, weight_data_temp);
+  weight->data.resize(sizeof(float) * 32 * 3 * 3 * 3);
+
+  //================================================================
+  memcpy(weight->data.data(), weight_data_temp, weight_size);
+  weight->offset = -1;
+  meta_graph->allTensors.emplace_back(std::move(weight));
+
+  auto output = std::make_unique<schema::TensorT>();
+  output->nodeType = schema::NodeType::NodeType_Parameter;
+  output->format = schema::Format_NHWC;
+  output->dataType = TypeId::kNumberTypeFloat32;
+  output->dims = {1, 28, 28, 32};
+  output->offset = -1;
+  meta_graph->allTensors.emplace_back(std::move(output));
+
+  flatbuffers::FlatBufferBuilder builder(1024);
+  auto offset = schema::MetaGraph::Pack(builder, meta_graph.get());
+  builder.Finish(offset);
+  size_t size = builder.GetSize();
+  const char *content = reinterpret_cast<char *>(builder.GetBufferPointer());
+
+  auto model = lite::Model::Import(content, size);
+  ASSERT_NE(nullptr, model);
+  auto session = new session::TrainSession();  // inference::MSSession::CreateSession(kCPUDevice, 0);
+  ASSERT_NE(nullptr, session);
+  auto graphId = session->CompileGraph(NOT_NULL(model->GetModelImpl()));
+
+  auto inTensor = new tensor::Tensor(TypeId::kNumberTypeFloat32, {1, 28, 28, 3});
+  ASSERT_NE(nullptr, inTensor);
+  ASSERT_EQ(sizeof(float) * (28 * 28 * 3), inTensor->Size());
+  auto ret = inTensor->MallocData();
+  ASSERT_EQ(0, ret);
+  auto data = inTensor->Data();
+  //===================================================
+  size_t input_size;
+  std::string input_path = "./convfp32_input_1_28_28_3.bin";
+  ReadFile(input_path.c_str(), &input_size, buf);
+  ASSERT_NE(nullptr, buf[0]);
+  auto input_data = reinterpret_cast<float *>(buf[0]);
+  ASSERT_NE(nullptr, input_data);
+  //===================================================
+  memcpy(data, input_data, input_size);
+  std::vector<std::shared_ptr<tensor::Tensor>> inputs;
+  inputs.emplace_back(inTensor);
+  VectorRef outputsRef;
+  session->RunGraph(graphId, inputs, &outputsRef);
+  auto outputs = TransformVectorRefToMultiTensor(outputsRef);
+  ASSERT_EQ(1, outputs.size());
+  ASSERT_EQ(1, outputs.front().size());
+  auto runOutput = outputs.front().front();
+  ASSERT_NE(nullptr, runOutput);
+  ASSERT_EQ(28 * 28 * 32, runOutput->ElementsNum());
+  ASSERT_EQ(TypeId::kNumberTypeFloat32, runOutput->data_type());
+  auto *outData = reinterpret_cast<float *>(runOutput->MutableData());
+  //===================================================
+  size_t output_size;
+  std::string output_path = "./convfp32_out_1_28_28_32.bin";
+  ReadFile(output_path.c_str(), &output_size, buf);
+  ASSERT_NE(nullptr, buf[0]);
+  auto output_data = reinterpret_cast<float *>(buf[0]);
+  ASSERT_NE(nullptr, output_data);
+  //===================================================
+  ASSERT_EQ(output_size, runOutput->Size());
+  for (size_t i = 0; i < runOutput->ElementsNum(); i++) {
+    ASSERT_EQ(output_data[i], outData[i]);
+  }
+  MS_LOG(INFO) << "Passed";
+}
+
+// TEST_F(TrainTest, TestMultiNode) {
+//  auto msGraph = std::make_shared<schema::GraphDefT>();
+//  msGraph->name = "graph";
+//  auto msSubgraph = std::make_unique<schema::SubGraphDefT>();
+//  msSubgraph->name = "subGraph";
+//
+//  auto conv = std::make_unique<schema::OpDefT>();
+//  conv->inputIndex = {0, 1};
+//  conv->outputIndex = {2};
+//  conv->attr.type = schema::OpT_Conv2D;
+//  auto conv_attr = new schema::Conv2DT;
+//  conv_attr->padMode = schema::PadMode_SAME;
+//  conv_attr->format = schema::Format_NHWC;
+//  conv_attr->strideH = 1;
+//  conv_attr->strideW = 1;
+//  conv_attr->kernelH = 3;
+//  conv_attr->kernelW = 3;
+//  conv_attr->dilateH = 1;
+//  conv_attr->dilateW = 1;
+//
+//  conv->attr.value = conv_attr;
+//  conv->name = "Conv2D";
+//  conv->fmkType = schema::FmkType_CAFFE;
+//  msSubgraph->nodes.emplace_back(std::move(conv));
+//
+//  auto matMul1 = std::make_unique<schema::OpDefT>();
+//  matMul1->inputIndex = {2, 3};
+//  matMul1->outputIndex = {4};
+//  matMul1->attr.type = schema::OpT_MatMul;
+//  auto matMul_attr1 = new schema::MatMulT;
+//  matMul_attr1->transposeA = false;
+//  matMul_attr1->transposeB = true;
+//  matMul1->attr.value = matMul_attr1;
+//  matMul1->name = "matmul1";
+//  matMul1->fmkType = schema::FmkType_CAFFE;
+//  msSubgraph->nodes.emplace_back(std::move(matMul1));
+//
+//  auto matMul2 = std::make_unique<schema::OpDefT>();
+//  matMul2->inputIndex = {4, 5};
+//  matMul2->outputIndex = {6};
+//  matMul2->attr.type = schema::OpT_MatMul;
+//  auto matMul_attr2 = new schema::MatMulT;
+//  matMul_attr2->transposeA = false;
+//  matMul_attr2->transposeB = true;
+//  matMul2->attr.value = matMul_attr2;
+//  matMul2->name = "matmul2";
+//  matMul2->fmkType = schema::FmkType_CAFFE;
+//  msSubgraph->nodes.emplace_back(std::move(matMul2));
+//
+//  msSubgraph->inputIndex = {0};
+//  msSubgraph->outputIndex = {6};
+//
+//  auto input0 = std::make_unique<schema::TensorDefT>();
+//  input0->refCount = schema::MSCONST_WEIGHT_REFCOUNT;
+//  input0->format = schema::Format_NHWC;
+//  input0->dataType = TypeId::kNumberTypeFloat32;
+//  input0->dims = {1, 5, 5, 3};
+//  input0->offset = -1;
+//  msSubgraph->allTensors.emplace_back(std::move(input0));
+//
+//  auto conv_weight = std::make_unique<schema::TensorDefT>();
+//  conv_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT;
+//  conv_weight->format = schema::Format_KHWC;
+//  conv_weight->dataType = TypeId::kNumberTypeFloat32;
+//  conv_weight->dims = {8, 3, 3, 3};
+//  conv_weight->data.resize(8*3*3*3*sizeof(float));
+//  msSubgraph->allTensors.emplace_back(std::move(conv_weight));
+//
+//  auto conv_output = std::make_unique<schema::TensorDefT>();
+//  conv_output->refCount = 0;
+//  conv_output->format = schema::Format_NHWC;
+//  conv_output->dataType = TypeId::kNumberTypeFloat32;
+//  conv_output->dims = {1, 5, 5, 8};
+//  msSubgraph->allTensors.emplace_back(std::move(conv_output));
+//
+//  auto add_weight = std::make_unique<schema::TensorDefT>();
+//  add_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT;
+//  add_weight->format = schema::Format_NHWC;
+//  add_weight->dataType = TypeId::kNumberTypeFloat32;
+//  add_weight->dims = {1, 5, 5, 8};
+//  add_weight->data.resize(5*5*8*sizeof(float));
+//  msSubgraph->allTensors.emplace_back(std::move(add_weight));
+//
+//  auto add_output = std::make_unique<schema::TensorDefT>();
+//  add_output->refCount = 0;
+//  add_output->format = schema::Format_NHWC;
+//  add_output->dataType = TypeId::kNumberTypeFloat32;
+//  add_output->dims = {1, 5, 5, 8};
+//  msSubgraph->allTensors.emplace_back(std::move(add_output));
+//
+//  auto mul_weight = std::make_unique<schema::TensorDefT>();
+//  mul_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT;
+//  mul_weight->format = schema::Format_NHWC;
+//  mul_weight->dataType = TypeId::kNumberTypeFloat32;
+//  mul_weight->dims = {1, 5, 5, 8};
+//  mul_weight->data.resize(5*5*8*sizeof(float));
+//  msSubgraph->allTensors.emplace_back(std::move(mul_weight));
+//
+//  auto mul_output = std::make_unique<schema::TensorDefT>();
+//  mul_output->refCount = 0;
+//  mul_output->format = schema::Format_NHWC;
+//  mul_output->dataType = TypeId::kNumberTypeFloat32;
+//  mul_output->dims = {1, 5, 5, 8};
+//  msSubgraph->allTensors.emplace_back(std::move(mul_output));
+//  msGraph->subgraphs.emplace_back(std::move(msSubgraph));
+//
+//  flatbuffers::FlatBufferBuilder builder(1024);
+//  auto offset = schema::GraphDef::Pack(builder, msGraph.get());
+//  builder.Finish(offset);
+//  size_t size = builder.GetSize();
+//  const char *content = (char *)builder.GetBufferPointer();
+//  const std::string strstub = "";
+//
+//  auto func_graph = inference::LoadModel(content, size, strstub);
+//  ASSERT_NE(nullptr, func_graph);
+//  auto session = inference::MSSession::CreateSession(kCPUDevice, 0);
+//  ASSERT_NE(nullptr, session);
+//  auto graphId = session->CompileGraph(func_graph);
+//
+//  auto inTensor =
+//    std::shared_ptr<inference::MSTensor>(inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, {1, 5, 5, 3}));
+//  ASSERT_NE(nullptr, inTensor);
+//  ASSERT_EQ(sizeof(float) * (5 * 5 * 3), inTensor->Size());
+//  (void)inTensor->MutableData();
+//
+//  std::vector<std::shared_ptr<inference::MSTensor>> inputs;
+//  inputs.emplace_back(inTensor);
+//  auto outputs = session->RunGraph(graphId, inputs);
+//  ASSERT_EQ(1, outputs.size());
+//  ASSERT_EQ(1, outputs.front().size());
+//  auto runOutput = outputs.front().front();
+//  ASSERT_NE(nullptr, runOutput);
+//  ASSERT_EQ(5 * 5 * 8, runOutput->ElementsNum());
+//  ASSERT_EQ(TypeId::kNumberTypeFloat32, runOutput->data_type());
+//  MS_LOG(INFO) << "Passed";
+// }
+}  // namespace mindspore