diff --git a/.gitattributes b/.gitattributes
index 8f1c91f5901a8791f5365513a60ddae94aa810bd..4b9474d4b1461821902f325b009998eba2b3971f 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -19,3 +19,4 @@ ci/resource/dump/relayout_format_8.10.0.mdl filter=lfs diff=lfs merge=lfs -text
 ci/resource/dump/batch_conv_bias_with_policy_8.8.0.mdl filter=lfs diff=lfs merge=lfs -text
 ci/resource/prof/model_with_err_assert.mdl filter=lfs diff=lfs merge=lfs -text
 ci/resource/prof/test_mge.mge filter=lfs diff=lfs merge=lfs -text
+lite/test/resource/lite/ax_models/64-58063ce2.axe filter=lfs diff=lfs merge=lfs -text
diff --git a/lite/test/test_network.cpp b/lite/test/test_network.cpp
index 5763e0b9a1b17084d2dbc9c0d653be3b70390e4d..5acace4ea0fdfa26bebf5a7acce4e7582f324acd 100644
--- a/lite/test/test_network.cpp
+++ b/lite/test/test_network.cpp
@@ -15,6 +15,11 @@
 #include "./test_common.h"
 #include "megbrain/tensor.h"
 
+#ifndef WIN32
+#include <dirent.h>
+#include <string.h>
+#endif
+
 #include <chrono>
 #include <memory>
 #include <random>
@@ -497,6 +502,115 @@ void test_input_no_copy(int record) {
         compare_lite_tensor<float>(output_tensor, outputs[i]);
     }
 }
+
+void test_io_no_copy_ax(std::string model_name, int record = 1) {
+    std::string model_path = model_name;
+    std::vector<std::string> input_names, output_names;
+
+    std::vector<std::vector<std::shared_ptr<Tensor>>> inputs;
+    std::vector<std::vector<std::shared_ptr<Tensor>>> outputs;
+
+    std::shared_ptr<Network> network = std::make_shared<Network>();
+    network->load_model(model_path);
+
+    input_names = network->get_all_input_name();
+    output_names = network->get_all_output_name();
+
+    // prepare test data
+    for (int i = 0; i < 3; i++) {
+        std::vector<std::shared_ptr<Tensor>> net_inputs;
+        std::vector<std::shared_ptr<Tensor>> net_outputs;
+
+        for (size_t j = 0; j < input_names.size(); j++) {
+            auto in_tesnor = network->get_io_tensor(input_names[j]);
+            auto in_layout = in_tesnor->get_layout();
+            auto tmp_in = std::make_shared<Tensor>(LiteDeviceType::LITE_CPU, in_layout);
+
+            auto size = in_tesnor->get_tensor_total_size_in_byte() /
+                        in_layout.get_elem_size();
+            if (in_layout.data_type == LiteDataType::LITE_INT16) {
+                auto ptr = static_cast<short*>(tmp_in->get_memory_ptr());
+                for (size_t id = 0; id < size; id++) {
+                    ptr[id] = i + 1;
+                }
+            } else if (in_layout.data_type == LiteDataType::LITE_UINT8) {
+                auto ptr = static_cast<uint8_t*>(tmp_in->get_memory_ptr());
+                for (size_t id = 0; id < size; id++) {
+                    ptr[id] = i + 1;
+                }
+            }
+            net_inputs.push_back(tmp_in);
+            in_tesnor->copy_from(*tmp_in);
+        }
+
+        inputs.push_back(net_inputs);
+        network->forward();
+        network->wait();
+
+        for (size_t j = 0; j < output_names.size(); j++) {
+            auto out_tesnor = network->get_io_tensor(output_names[j]);
+            auto out_layout = out_tesnor->get_layout();
+            auto tmp_out =
+                    std::make_shared<Tensor>(LiteDeviceType::LITE_CPU, out_layout);
+
+            tmp_out->copy_from(*out_tesnor);
+            net_outputs.push_back(tmp_out);
+        }
+        outputs.push_back(net_outputs);
+    }
+
+    Config config;
+    config.options.force_output_use_user_specified_memory = true;
+    config.options.comp_node_seq_record_level = record;
+    config.options.const_shape = true;
+
+    std::shared_ptr<Network> network_record = std::make_shared<Network>(config);
+
+    network_record->load_model(model_path);
+
+    for (int i = 0; i < 3; i++) {
+        for (size_t j = 0; j < inputs[i].size(); j++) {
+            auto input_tensor = network_record->get_io_tensor(input_names[j]);
+            input_tensor->reset(
+                    inputs[i][j]->get_memory_ptr(), inputs[i][j]->get_layout());
+        }
+
+        std::vector<std::shared_ptr<Tensor>> net_outputs;
+
+        for (size_t j = 0; j < outputs[i].size(); j++) {
+            auto output_tensor = network_record->get_io_tensor(output_names[j]);
+            auto tmp_out = std::make_shared<Tensor>(
+                    LiteDeviceType::LITE_CPU, output_tensor->get_layout());
+            output_tensor->reset(
+                    tmp_out->get_memory_ptr(), output_tensor->get_layout());
+            net_outputs.push_back(tmp_out);
+        }
+
+        network_record->forward();
+        network_record->wait();
+
+        for (size_t j = 0; j < outputs[i].size(); j++) {
+            auto output_tensor = network_record->get_io_tensor(output_names[j]);
+            compare_lite_tensor<float>(output_tensor, outputs[i][j]);
+        }
+    }
+    printf("profile the model %s run\n", model_path.c_str());
+    std::vector<std::shared_ptr<Tensor>> net_outputs;
+    for (size_t j = 0; j < outputs[0].size(); j++) {
+        auto output_tensor = network_record->get_io_tensor(output_names[j]);
+        auto tmp_out = std::make_shared<Tensor>(
+                LiteDeviceType::LITE_CPU, output_tensor->get_layout());
+        output_tensor->reset(tmp_out->get_memory_ptr(), output_tensor->get_layout());
+        net_outputs.push_back(tmp_out);
+    }
+    lite::Timer timer("profile");
+    for (int i = 0; i < 10; i++) {
+        network_record->forward();
+        network_record->wait();
+    }
+    auto sum_time = timer.get_used_time();
+    printf("model %s used time average %f ms\n", model_path.c_str(), sum_time / 10);
+}
 }  // namespace
 
 TEST(TestNetWork, OutputNoCopy) {
@@ -515,6 +629,28 @@ TEST(TestNetWork, IONoCopyRecord) {
     test_input_no_copy(1);
 }
 
+TEST(TestNetWork, IONoCopyRecordAx) {
+    std::vector<std::string> file_names;
+#ifndef WIN32
+    DIR* dirptr = NULL;
+    struct dirent* dirp;
+    std::string model_dir = "./ax_models";
+    dirptr = opendir(model_dir.c_str());
+    while (dirptr != NULL && (dirp = readdir(dirptr)) != NULL) {
+        std::string file_name(dirp->d_name);
+        if (file_name.find(".axe", 0) != std::string::npos) {
+            file_names.push_back(model_dir + "/" + file_name);
+        }
+    }
+    closedir(dirptr);
+#endif
+
+    for (auto file_name : file_names) {
+        printf("test model: %s\n", file_name.c_str());
+        test_io_no_copy_ax(file_name);
+    }
+}
+
 TEST(TestNetWork, OutputDynamicAlloc) {
     Config config;
     config.options.force_output_dynamic_alloc = true;