From 6b98f36543596d49441e80c54629b5e1d0013a47 Mon Sep 17 00:00:00 2001
From: BUG1989 <248857878@qq.com>
Date: Tue, 18 May 2021 21:16:26 +0800
Subject: [PATCH] add the optest of tim-vx (#677)

* timvx optest, prelu

* timvx optest, relu, relu1, pooling, dropout

* fix, op test cmake

* timvx optest, conv
---
 .github/workflows/linux-x64-cpu-timvx-gcc.yml |   2 +
 .github/workflows/test-coverage.yml           |  14 +-
 examples/tm_mobilefacenet.cpp                 |   2 +-
 examples/tm_mobilefacenet_uint8.cpp           |   2 +-
 examples/tm_openpose.cpp                      |   4 +-
 examples/tm_retinaface.cpp                    |   2 +-
 source/operator/prototype/relu1.c             |   1 -
 tests/CMakeLists.txt                          |  25 +-
 tests/common/compiler_fp16.h                  | 153 +++++++++++++
 tests/op/test_onnx_op.h                       |   2 +-
 tests/op/test_op.h                            |  90 ++++----
 tests/op/test_timvx_op_convolution.cpp        | 214 ++++++++++++++++++
 tests/op/test_timvx_op_dropout.cpp            | 138 +++++++++++
 tests/op/test_timvx_op_pooling.cpp            | 159 +++++++++++++
 ..._prelu_timvx.c => test_timvx_op_prelu.cpp} |  18 +-
 tests/op/test_timvx_op_relu.cpp               | 138 +++++++++++
 tests/op/test_timvx_op_relu1.cpp              | 138 +++++++++++
 17 files changed, 1031 insertions(+), 71 deletions(-)
 create mode 100644 tests/common/compiler_fp16.h
 create mode 100644 tests/op/test_timvx_op_convolution.cpp
 create mode 100644 tests/op/test_timvx_op_dropout.cpp
 create mode 100644 tests/op/test_timvx_op_pooling.cpp
 rename tests/op/{test_op_prelu_timvx.c => test_timvx_op_prelu.cpp} (89%)
 create mode 100644 tests/op/test_timvx_op_relu.cpp
 create mode 100644 tests/op/test_timvx_op_relu1.cpp

diff --git a/.github/workflows/linux-x64-cpu-timvx-gcc.yml b/.github/workflows/linux-x64-cpu-timvx-gcc.yml
index 53e97ffc..d3d1c1f6 100644
--- a/.github/workflows/linux-x64-cpu-timvx-gcc.yml
+++ b/.github/workflows/linux-x64-cpu-timvx-gcc.yml
@@ -21,6 +21,8 @@ jobs:
         cp -rvf ./TIM-VX/prebuilt-sdk/x86_64_linux/lib/*      ./3rdparty/tim-vx/lib/x86_64/
         cp -rvf ./TIM-VX/include  ./source/device/tim-vx/
         cp -rvf ./TIM-VX/src      ./source/device/tim-vx/
+        rm ./source/device/tim-vx/src/tim/vx/*_test.cc
+        rm ./source/device/tim-vx/src/tim/vx/ops/*_test.cc
     - name: configure
       run: mkdir build && cd build && cmake -DTENGINE_ENABLE_TIM_VX=ON ..
     - name: build
diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml
index 60212ded..fdc64716 100644
--- a/.github/workflows/test-coverage.yml
+++ b/.github/workflows/test-coverage.yml
@@ -75,8 +75,10 @@ jobs:
         cp -rvf ./TIM-VX/prebuilt-sdk/x86_64_linux/lib/*      ./3rdparty/tim-vx/lib/x86_64/
         cp -rvf ./TIM-VX/include  ./source/device/tim-vx/
         cp -rvf ./TIM-VX/src      ./source/device/tim-vx/
+        rm ./source/device/tim-vx/src/tim/vx/*_test.cc
+        rm ./source/device/tim-vx/src/tim/vx/ops/*_test.cc
     - name: configure
-      run: mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=debug -DTENGINE_ENABLE_TIM_VX=ON -DTENGINE_ENABLE_TIM_VX_INTEGRATION=ON -DTENGINE_COVERAGE=ON ..
+      run: mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=debug -DTENGINE_ENABLE_TIM_VX=ON -DTENGINE_ENABLE_TIM_VX_INTEGRATION=ON -DTENGINE_COVERAGE=ON -DTENGINE_BUILD_TESTS=ON ..
     - name: build
       run: cmake --build build -j 2
     - name: models
@@ -85,12 +87,20 @@ jobs:
         wget ftp://219.139.34.182/tengine/ci_test/images.zip
         unzip images.zip
         git clone https://github.com/BUG1989/tengine_test_data.git
+        git clone https://github.com/BUG1989/onnx_op_test_data.git
+        mv onnx_op_test_data onnx_node        
     - name: test-timvx-simulator
       run: |
         cd build
-        export LD_LIBRARY_PATH=../3rdparty/tim-vx/lib/x86_64/
+        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../3rdparty/tim-vx/lib/x86_64/
+        cp -v ../3rdparty/tim-vx/include/CL/cl_viv_vx_ext.h ./tests/
         ./examples/tm_classification_timvx -m ./tengine_test_data/mobilenet_uint8.tmfile -i ./images/cat.jpg -g 224,224, -s 0.017,0.017,0.017
         ./examples/tm_classification_timvx -m ./tengine_test_data/squeezenet_uint8.tmfile -i ./images/cat.jpg -g 227,227, -s 0.017,0.017,0.017
+    - name: test-op
+      run: |
+        cd build
+        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../3rdparty/tim-vx/lib/x86_64/
+        ctest --output-on-failure
     - name: lcov-collect
       run: |
         cd build
diff --git a/examples/tm_mobilefacenet.cpp b/examples/tm_mobilefacenet.cpp
index b429f4cd..5b6b8f84 100644
--- a/examples/tm_mobilefacenet.cpp
+++ b/examples/tm_mobilefacenet.cpp
@@ -50,7 +50,7 @@ void init(const char* modelfile)
     graph = create_graph(NULL, "tengine", modelfile);
     if (graph == NULL)
     {
-        fprintf(stderr, "grph nullptr %d\n", get_tengine_errno());
+        fprintf(stderr, "grph nullptr\n");
     }
     else
     {
diff --git a/examples/tm_mobilefacenet_uint8.cpp b/examples/tm_mobilefacenet_uint8.cpp
index cdc4780d..325aed62 100644
--- a/examples/tm_mobilefacenet_uint8.cpp
+++ b/examples/tm_mobilefacenet_uint8.cpp
@@ -60,7 +60,7 @@ void init(const char* modelfile)
     graph = create_graph(NULL, "tengine", modelfile);
     if (graph == NULL)
     {
-        fprintf(stderr, "grph nullptr %d\n", get_tengine_errno());
+        fprintf(stderr, "graph is nullptr.\n");
     }
     else
     {
diff --git a/examples/tm_openpose.cpp b/examples/tm_openpose.cpp
index 4e03db46..44510a33 100644
--- a/examples/tm_openpose.cpp
+++ b/examples/tm_openpose.cpp
@@ -187,7 +187,6 @@ int main(int argc, char* argv[])
     if (graph == nullptr)
     {
         std::cout << "Create graph0 failed\n";
-        std::cout << "errno: " << get_tengine_errno() << "\n";
         return -1;
     }
 
@@ -256,8 +255,7 @@ int main(int argc, char* argv[])
 
     if (get_tensor_shape(out_tensor, out_dim, 4) <= 0)
     {
-        std::cout << "get tensor shape failed, errno: " << get_tengine_errno() << "\n";
-        return 1;
+        return -1;
     }
 
     float* outdata = ( float* )get_tensor_buffer(out_tensor);
diff --git a/examples/tm_retinaface.cpp b/examples/tm_retinaface.cpp
index 54d32a25..494a1e30 100644
--- a/examples/tm_retinaface.cpp
+++ b/examples/tm_retinaface.cpp
@@ -478,7 +478,7 @@ int main(int argc, char* argv[])
     graph_t graph = create_graph(NULL, "tengine", model_file);
     if (graph == nullptr)
     {
-        printf("Load model to graph failed(%d).\n", get_tengine_errno());
+        printf("Load model to graph failed.\n");
         return -1;
     }
 
diff --git a/source/operator/prototype/relu1.c b/source/operator/prototype/relu1.c
index ee47a75b..96c15188 100644
--- a/source/operator/prototype/relu1.c
+++ b/source/operator/prototype/relu1.c
@@ -22,7 +22,6 @@
  * Author: bzhang@openailab.com
  */
 
-#include "relu_param.h"
 
 #include "api/c_api.h"
 #include "graph/tensor.h"
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 755378ae..79341931 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -51,7 +51,30 @@ tengine_test(tm_mfn                models/test_tm_mfn.cpp)
 ]]
 
 if (TENGINE_ENABLE_TIM_VX)
-    tengine_test(test_op_prelu_timvx        op/test_op_prelu_timvx.c)
+    function (tengine_timvx_op_test name file)
+        add_executable (${name} ${CMAKE_CURRENT_SOURCE_DIR}/${file} "${PROJECT_SOURCE_DIR}/tests/common/tengine_operations.cpp")
+
+        target_link_libraries (${name} PRIVATE ${CMAKE_PROJECT_NAME})
+
+        target_include_directories (${name} PRIVATE "${PROJECT_SOURCE_DIR}/source")
+        target_include_directories (${name} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}")
+        target_include_directories (${name} PRIVATE "${PROJECT_BINARY_DIR}")
+        target_include_directories (${name} PRIVATE "${PROJECT_BINARY_DIR}/source")
+        target_include_directories (${name} PRIVATE "${PROJECT_SOURCE_DIR}/tests/common")
+
+        if (${TENGINE_TARGET_PROCESSOR} MATCHES "ARM" AND (NOT ANDROID AND NOT OHOS) AND TENGINE_TARGET_PROCESSOR_32Bit)
+            target_compile_options (${name} PRIVATE "-mfp16-format=ieee")
+        endif()
+
+        add_test (${name} ${name})
+    endfunction()
+
+    tengine_timvx_op_test(test_timvx_op_convolution  op/test_timvx_op_convolution.cpp)
+    tengine_timvx_op_test(test_timvx_op_dropout      op/test_timvx_op_dropout.cpp)
+    tengine_timvx_op_test(test_timvx_op_pooling      op/test_timvx_op_pooling.cpp)
+    tengine_timvx_op_test(test_timvx_op_prelu        op/test_timvx_op_prelu.cpp)
+    tengine_timvx_op_test(test_timvx_op_relu         op/test_timvx_op_relu.cpp)
+    # tengine_timvx_op_test(test_timvx_op_relu1        op/test_timvx_op_relu1.cpp)
 endif()
 
 
diff --git a/tests/common/compiler_fp16.h b/tests/common/compiler_fp16.h
new file mode 100644
index 00000000..1857d7ee
--- /dev/null
+++ b/tests/common/compiler_fp16.h
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2020, OPEN AI LAB
+ * Author: haitao@openailab.com
+ */
+
+#ifndef __COMPILIER_FP16_H__
+#define __COMPILIER_FP16_H__
+
+#ifdef MACOS
+
+#else
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined __ARM_ARCH || defined __riscv
+
+#define fp16_to_fp32(data) \
+    ({                     \
+        float f = data;    \
+        f;                 \
+    })
+
+#define fp32_to_fp16(data) \
+    ({                     \
+        __fp16 f = data;   \
+        f;                 \
+    })
+
+#else
+#ifdef _MSC_VER
+#pragma  pack (push,1)
+struct fp16_pack
+{
+    unsigned short frac : 10;
+    unsigned char exp : 5;
+    unsigned char sign : 1;
+};
+
+struct fp32_pack
+{
+    unsigned int frac : 23;
+    unsigned char exp : 8;
+    unsigned char sign : 1;
+};
+#pragma pack(pop)
+#else
+struct fp16_pack
+{
+    unsigned short frac : 10;
+    unsigned char exp : 5;
+    unsigned char sign : 1;
+} __attribute__((packed));
+
+struct fp32_pack
+{
+    unsigned int frac : 23;
+    unsigned char exp : 8;
+    unsigned char sign : 1;
+} __attribute__((packed));
+#endif
+
+typedef struct fp16_pack __fp16;
+
+static inline float fp16_to_fp32(__fp16 data)
+{
+    float f;
+    struct fp32_pack* fp32 = ( struct fp32_pack* )&f;
+    struct fp16_pack* fp16 = &data;
+
+    int exp = fp16->exp;
+
+    if(exp == 31 && fp16->frac != 0)
+    {
+        // return __builtin_inf()-__builtin_inf();
+        fp32->sign = fp16->sign;
+        fp32->exp = 255;
+        fp32->frac = 1;
+
+        return f;
+    }
+
+    if(exp == 31)
+        exp = 255;
+    if(exp == 0)
+        exp = 0;
+    else
+        exp = (exp - 15) + 127;
+
+    fp32->exp = exp;
+    fp32->sign = fp16->sign;
+    fp32->frac = (( int )fp16->frac) << 13;
+
+    return f;
+}
+
+static inline __fp16 fp32_to_fp16(float data)
+{
+    struct fp32_pack* fp32 = ( struct fp32_pack* )&data;
+    struct fp16_pack fp16;
+
+    int exp = fp32->exp;
+
+    if(fp32->exp == 255 && fp32->frac != 0)
+    {
+        // NaN
+        fp16.exp = 31;
+        fp16.frac = 1;
+        fp16.sign = fp32->sign;
+
+        return fp16;
+    }
+
+    if((exp - 127) < -14)
+        exp = 0;
+    else if((exp - 127) > 15)
+        exp = 31;
+    else
+        exp = exp - 127 + 15;
+
+    fp16.exp = exp;
+    fp16.frac = fp32->frac >> 13;
+    fp16.sign = fp32->sign;
+
+    return fp16;
+}
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/tests/op/test_onnx_op.h b/tests/op/test_onnx_op.h
index 6ab5568d..1ae0369d 100644
--- a/tests/op/test_onnx_op.h
+++ b/tests/op/test_onnx_op.h
@@ -143,7 +143,7 @@ int float_mismatch(float* current, float* reference, int size)
         float tmp = fabs(current[i]) - fabs(reference[i]);
         if(fabs(tmp) > 0.0001)
         {
-            fprintf(stderr,"test failed, index:%d, a:%f, b:%f\n", i, current[i], reference[i]);
+            fprintf(stderr, "test failed, index:%d, a:%f, b:%f\n", i, current[i], reference[i]);
             return -1;
         }
     }
diff --git a/tests/op/test_op.h b/tests/op/test_op.h
index 76e9056d..6bba0dae 100644
--- a/tests/op/test_op.h
+++ b/tests/op/test_op.h
@@ -7,7 +7,8 @@
 #include <stdlib.h>
 #include <time.h>
 
-#include "float.h"
+//#include "float.h"
+#include "compiler_fp16.h"
 #include "tengine/c_api.h"
 
 #include "graph/graph.h"
@@ -47,7 +48,7 @@ void dump_tensor_line(void* data_ptr, int offset, int data_type, int w)
         }
         case TENGINE_DT_FP16:
         {
-            fp16_t* p = ( fp16_t* )data_ptr;
+            __fp16* p = ( __fp16* )data_ptr;
 
 #ifdef __ARM_ARCH
             for(int i = 0; i < w - 1; i++)
@@ -107,7 +108,7 @@ void dump_tensor(tensor_t tensor, const char* message)
     int dim_array[MAX_SHAPE_DIM_NUM] = { 0 };
     int dim_count = get_tensor_shape(tensor, dim_array, MAX_SHAPE_DIM_NUM);
     if (0 >= dim_count)
-        fprintf(stderr, "Cannot get tensor shape. ERRNO: %d", get_tengine_errno());
+        fprintf(stderr, "Cannot get tensor shape.");
 
     int line_count = 1;
     for (int i = 0; i < dim_count - 1; i++)
@@ -144,7 +145,7 @@ void dump_tensor(tensor_t tensor, const char* message)
             break;
         }
         default:
-            fprintf(stderr, "Cannot found the type of tensor. ERRNO: %d.\n", get_tengine_errno());
+            fprintf(stderr, "Cannot found the type of tensor.\n");
     }
 
     // print leader
@@ -186,7 +187,7 @@ void dump_node_input(node_t test_node, int index)
     tensor_t tensor = get_node_input_tensor(test_node, index);
     if(NULL == tensor)
     {
-        fprintf(stderr, "Get input tensor(%d) from the node failed. ERRNO: %d.\n", index, get_tengine_errno());
+        fprintf(stderr, "Get input tensor(%d) from the node failed.\n", index);
         return;
     }
 
@@ -204,7 +205,7 @@ void dump_node_output(node_t test_node, int index)
     tensor_t tensor = get_node_output_tensor(test_node, index);
     if(NULL == tensor)
     {
-        fprintf(stderr, "Get output tensor from the node failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "Get output tensor from the node failed.\n");
         return;
     }
 
@@ -222,7 +223,7 @@ int create_node(graph_t graph, const char* node_name, int n, int c, int h, int w
     node_t node = create_graph_node(graph, node_name, "InputOp");
     if (NULL == node)
     {
-        fprintf(stderr, "Create node(%s) with shape [n c h w] = [%d %d %d %d] failed. ERRNO: %d.\n", node_name, n, c, h, w, get_tengine_errno());
+        fprintf(stderr, "Create node(%s) with shape [n c h w] = [%d %d %d %d] failed.\n", node_name, n, c, h, w);
         return -1;
     }
 
@@ -231,7 +232,7 @@ int create_node(graph_t graph, const char* node_name, int n, int c, int h, int w
     {
         release_graph_node(node);
 
-        fprintf(stderr, "Create tensor from node(%s) with shape [n c h w] = [%d %d %d %d] failed. ERRNO: %d.\n", node_name, n, c, h, w, get_tengine_errno());
+        fprintf(stderr, "Create tensor from node(%s) with shape [n c h w] = [%d %d %d %d] failed.\n", node_name, n, c, h, w);
         return -1;
     }
 
@@ -272,7 +273,6 @@ int create_input_node(graph_t graph, const char* node_name, int data_type, int l
     if (NULL == node)
     {
         fprintf(stderr, "Create %d dims node(%s) failed. ", dims_count, node_name);
-        fprintf(stderr, "ERRNO: %d.\n", get_tengine_errno());
         return -1;
     }
 
@@ -282,7 +282,6 @@ int create_input_node(graph_t graph, const char* node_name, int data_type, int l
         release_graph_node(node);
 
         fprintf(stderr, "Create %d dims tensor for node(%s) failed. ", dims_count, node_name);
-        fprintf(stderr, "ERRNO: %d.\n", get_tengine_errno());
 
         return -1;
     }
@@ -294,7 +293,6 @@ int create_input_node(graph_t graph, const char* node_name, int data_type, int l
         release_graph_node(node);
 
         fprintf(stderr, "Set %d dims output tensor for node(%s) failed. ", dims_count, node_name);
-        fprintf(stderr, "ERRNO: %d.\n", get_tengine_errno());
 
         return -1;
     }
@@ -346,7 +344,7 @@ int create_input_node(graph_t graph, const char* node_name, int data_type, int l
             }
         }
         default:
-            fprintf(stderr, "Cannot support %d dims tensor. ERRNO: %d.\n", dims_count, get_tengine_errno());
+            fprintf(stderr, "Cannot support %d dims tensor.\n", dims_count);
     }
 
     release_graph_tensor(tensor);
@@ -421,7 +419,7 @@ void fill_input_float_tensor_by_index(graph_t graph, int input_node_index, int t
 {
     tensor_t tensor = get_graph_input_tensor(graph, input_node_index, tensor_index);
     if(NULL == tensor)
-        fprintf(stderr, "Cannot find the %dth tensor via node index(%d). ERRNO: %d.\n", tensor_index, input_node_index, get_tengine_errno());
+        fprintf(stderr, "Cannot find the %dth tensor via node index(%d).\n", tensor_index, input_node_index);
 
     int buf_size = get_tensor_buffer_size(tensor);
     float* data = (float* )malloc(buf_size);
@@ -431,11 +429,11 @@ void fill_input_float_tensor_by_index(graph_t graph, int input_node_index, int t
 
     int ret = set_tensor_buffer(tensor, (void* )data, buf_size);
     if(0 != ret)
-        fprintf(stderr, "Set buffer for tensor failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "Set buffer for tensor failed.\n");
 
     ret = fill_fp32_tensor(tensor, value);
     if(0 != ret)
-        fprintf(stderr, "Fill buffer for tensor failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "Fill buffer for tensor failed.\n");
 }
 
 
@@ -443,18 +441,18 @@ void fill_input_uint8_tensor_by_index(graph_t graph, int input_node_index, int t
 {
     tensor_t tensor = get_graph_input_tensor(graph, input_node_index, tensor_index);
     if(NULL == tensor)
-        fprintf(stderr, "Cannot find the %dth tensor via node index(%d). ERRNO: %d.\n", tensor_index, input_node_index, get_tengine_errno());
+        fprintf(stderr, "Cannot find the %dth tensor via node index(%d).\n", tensor_index, input_node_index);
 
     int buf_size = get_tensor_buffer_size(tensor);
     uint8_t* data = (uint8_t* )malloc(buf_size);
 
     int ret = set_tensor_buffer(tensor, (void* )data, buf_size);
     if(0 != ret)
-        fprintf(stderr, "Set buffer for tensor failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "Set buffer for tensor failed.\n");
 
     ret = fill_uint8_tensor(tensor, value);
     if(0 != ret)
-        fprintf(stderr, "Fill buffer for tensor failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "Fill buffer for tensor failed.\n");
 }
 
 
@@ -462,11 +460,11 @@ void fill_input_float_tensor_by_name(graph_t graph, const char* node_name, int t
 {
     node_t node = get_graph_node(graph, node_name);
     if(NULL == node)
-        fprintf(stderr, "Cannot get node via node name(%s). ERRNO: %d.\n", node_name, get_tengine_errno());
+        fprintf(stderr, "Cannot get node via node name(%s).\n", node_name);
 
     tensor_t tensor = get_node_input_tensor(node, tensor_index);
     if(NULL == tensor)
-        fprintf(stderr, "Cannot find the %dth tensor via node name(%s). ERRNO: %d.\n", tensor_index, node_name, get_tengine_errno());
+        fprintf(stderr, "Cannot find the %dth tensor via node name(%s)\n", tensor_index, node_name);
 
     int buf_size = get_tensor_buffer_size(tensor);
     float* data = (float* )malloc(buf_size);
@@ -480,7 +478,7 @@ void fill_input_float_tensor_by_name(graph_t graph, const char* node_name, int t
 
     ret = fill_fp32_tensor(tensor, value);
     if(0 != ret)
-        fprintf(stderr, "Fill buffer for tensor failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "Fill buffer for tensor failed.\n");
 }
 
 
@@ -488,11 +486,11 @@ void fill_input_float_buffer_tensor_by_name(graph_t graph, const char* node_name
 {
     node_t node = get_graph_node(graph, node_name);
     if(NULL == node)
-        fprintf(stderr, "Cannot get node via node name(%s). ERRNO: %d.\n", node_name, get_tengine_errno());
+        fprintf(stderr, "Cannot get node via node name(%s).\n", node_name);
 
     tensor_t tensor = get_node_input_tensor(node, tensor_index);
     if(NULL == tensor)
-        fprintf(stderr, "Cannot find the %dth tensor via node name(%s). ERRNO: %d.\n", tensor_index, node_name, get_tengine_errno());
+        fprintf(stderr, "Cannot find the %dth tensor via node name(%s).\n", tensor_index, node_name);
 
     int ret = set_tensor_buffer(tensor, value, buf_size);
     if(0 != ret)
@@ -505,14 +503,14 @@ void fill_input_integer_tensor_by_name(graph_t graph, const char* node_name, int
     node_t node = get_graph_node(graph, node_name);
     if(NULL == node)
     {
-        fprintf(stderr, "Cannot get node via node name(%s). ERRNO: %d.\n", node_name, get_tengine_errno());
+        fprintf(stderr, "Cannot get node via node name(%s).\n", node_name);
         return;
     }
 
     tensor_t tensor = get_node_input_tensor(node, tensor_index);
     if(NULL == tensor)
     {
-        fprintf(stderr, "Cannot find the %dth tensor via node name(%s). ERRNO: %d.\n", tensor_index, node_name, get_tengine_errno());
+        fprintf(stderr, "Cannot find the %dth tensor via node name(%s).\n", tensor_index, node_name);
         return;
     }
 
@@ -534,14 +532,6 @@ int test_graph_init()
     // TODO: fix this fatal issue
     init_tengine();
 
-    int ret = clr_tengine_errno();
-    if (0 != ret)
-    {
-        fprintf(stderr, "Graph init error. ERRNO: %d.\n", ret);
-        // disable this -1 state for now
-        //return -1;
-    }
-
     return 0;
 }
 
@@ -550,17 +540,15 @@ int test_graph_run(graph_t graph)
 {
     if(prerun_graph(graph) < 0)
     {
-        fprintf(stderr, "Pre-run graph failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "Pre-run graph failed.\n");
         return -1;
     }
 
     dump_graph(graph);
-    if (0 != get_tengine_errno())
-        fprintf(stderr, "Dump graph error. But ignored this for now. ERRNO: %d.\n", get_tengine_errno());
 
     if (0 != run_graph(graph, 1))
     {
-        fprintf(stderr, "Run graph error. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "Run graph error.\n");
         return -1;
     }
 
@@ -581,26 +569,26 @@ graph_t create_common_test_graph(const char* test_node_name, int data_type, int
     graph_t graph = create_graph(NULL, NULL, NULL);
     if(NULL == graph)
     {
-        fprintf(stderr, "get graph failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "get graph failed.\n");
         return NULL;
     }
 
     if(set_graph_layout(graph, layout) < 0)
     {
-        fprintf(stderr, "set layout failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "set layout failed.\n");
         return NULL;
     }
 
     const char* input_name = "input_node";
     if(create_input_node(graph, input_name, data_type, layout, n, c, h, w) < 0)
     {
-        fprintf(stderr, "create input node failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "create input node failed.\n");
         return NULL;
     }
 
     if(test_func(graph, input_name, test_node_name, data_type, layout, n, c, h ,w) < 0)
     {
-        fprintf(stderr, "create test node failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "create test node failed.\n");
         return NULL;
     }
 
@@ -610,13 +598,13 @@ graph_t create_common_test_graph(const char* test_node_name, int data_type, int
 
     if(set_graph_input_node(graph, inputs, sizeof(inputs) / sizeof(char*)) < 0)
     {
-        fprintf(stderr, "set inputs failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "set inputs failed.\n");
         return NULL;
     }
 
     if(set_graph_output_node(graph, outputs, sizeof(outputs) / sizeof(char*)) < 0)
     {
-        fprintf(stderr, "set outputs failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "set outputs failed.\n");
         return NULL;
     }
 
@@ -638,26 +626,26 @@ graph_t create_timvx_test_graph(const char* test_node_name, int data_type, int l
     graph_t graph = create_graph(timvx_context, NULL, NULL);
     if(NULL == graph)
     {
-        fprintf(stderr, "get graph failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "get graph failed.\n");
         return NULL;
     }
 
     if(set_graph_layout(graph, layout) < 0)
     {
-        fprintf(stderr, "set layout failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "set layout failed.\n");
         return NULL;
     }
 
     const char* input_name = "input_node";
     if(create_input_node(graph, input_name, data_type, layout, n, c, h, w) < 0)
     {
-        fprintf(stderr, "create input node failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "create input node failed.\n");
         return NULL;
     }
 
     if(test_func(graph, input_name, test_node_name, data_type, layout, n, c, h ,w) < 0)
     {
-        fprintf(stderr, "create test node failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "create test node failed.\n");
         return NULL;
     }
 
@@ -667,13 +655,13 @@ graph_t create_timvx_test_graph(const char* test_node_name, int data_type, int l
 
     if(set_graph_input_node(graph, inputs, sizeof(inputs) / sizeof(char*)) < 0)
     {
-        fprintf(stderr, "set inputs failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "set inputs failed.\n");
         return NULL;
     }
 
     if(set_graph_output_node(graph, outputs, sizeof(outputs) / sizeof(char*)) < 0)
     {
-        fprintf(stderr, "set outputs failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "set outputs failed.\n");
         return NULL;
     }
 
@@ -725,8 +713,8 @@ int compare_tensor(tensor_t a, tensor_t b)
         }
         case TENGINE_DT_FP16:
         {
-            fp16_t* a_data_ptr = (fp16_t*)get_tensor_buffer(a);
-            fp16_t* b_data_ptr = (fp16_t*)get_tensor_buffer(b);
+            __fp16* a_data_ptr = (__fp16*)get_tensor_buffer(a);
+            __fp16* b_data_ptr = (__fp16*)get_tensor_buffer(b);
 
             for (int i = 0; i < element_size; i++)
             {
diff --git a/tests/op/test_timvx_op_convolution.cpp b/tests/op/test_timvx_op_convolution.cpp
new file mode 100644
index 00000000..d5cd6f86
--- /dev/null
+++ b/tests/op/test_timvx_op_convolution.cpp
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2020, OPEN AI LAB
+ * Author: qtang@openailab.com
+ */
+
+
+#include "test_op.h"
+
+#include "graph/graph.h"
+#include "graph/node.h"
+#include "graph/tensor.h"
+#include "operator/prototype/convolution_param.h"
+
+
+int create_test_convolution_node(graph_t graph, const char* input_name, const char* node_name, int data_type, int layout, int n, int c, int h, int w)
+{
+    (void)layout; (void)n; (void)c; (void)h; (void)w;
+
+    /* create the test node */
+    struct node* test_node = (struct node* )create_graph_node(graph, node_name, "Convolution");
+
+    tensor_t input_tensor = get_graph_tensor(graph, input_name);
+
+    if(NULL == input_tensor)
+    {
+        fprintf(stderr, "create test node failed.\n");
+        return -1;
+    }
+
+    /* create the sub node to product another input tensors which the test node is needed, such as weight/bias/slope tensor. */
+    /* weight */
+    node_t weight_node = create_graph_node(graph, "weight", "Const");
+    tensor_t weight_tensor = create_graph_tensor(graph, "weight", TENGINE_DT_UINT8);
+    set_node_output_tensor(weight_node, 0, weight_tensor, TENSOR_TYPE_CONST);
+    int weight_dims[4] = {1, 1, 3, 3};  // channel num
+    set_tensor_shape(weight_tensor, weight_dims, 4);
+
+    /* bias */
+    // node_t bias_node = create_graph_node(graph, "bias", "Const");
+    // tensor_t bias_tensor = create_graph_tensor(graph, "bias", TENGINE_DT_INT32);
+    // set_node_output_tensor(bias_node, 0, bias_tensor, TENSOR_TYPE_CONST);
+    // int bias_dims[1] = {1};  // channel num
+    // set_tensor_shape(bias_tensor, bias_dims, 1); 
+
+    /* input tensors of test node */
+    set_node_input_tensor(test_node, 0, input_tensor);
+    set_node_input_tensor(test_node, 1, weight_tensor);
+    // set_node_input_tensor(test_node, 2, bias_tensor);
+
+    /* output tensors of test node */
+    tensor_t output_tensor = create_graph_tensor(graph, node_name, data_type);
+    set_node_output_tensor(test_node, 0, output_tensor, TENSOR_TYPE_VAR);
+
+    /* set params */
+    struct conv_param* conv_param = ( struct conv_param* )(struct node* )test_node->op.param_mem;
+
+    conv_param->kernel_h = 3;
+    conv_param->kernel_w = 3;
+    conv_param->stride_h = 1;
+    conv_param->stride_w = 1;
+    conv_param->pad_h0 = 1;
+    conv_param->pad_h1 = 1;
+    conv_param->pad_w0 = 1;
+    conv_param->pad_w1 = 1;
+    conv_param->dilation_h = 1;
+    conv_param->dilation_w = 1;
+    conv_param->input_channel = 1;
+    conv_param->output_channel = 1;
+    conv_param->group = 1;
+    conv_param->activation = -1;
+
+    return 0;
+}
+
+/*
+ * scale = (max - min) / 255
+ * zero_point = -min / scale
+ * uint8   = clip(round(float32 / scale) + zero_point, 0, 255)
+ * float32 = (uint8 - zero_point) * scale
+ */
+float input_fp32[9] = {-3, -2, 1,
+                        1,  0, 2,
+                        1,  1, 1};
+float input_scale = 0.0196078f;
+int input_zero_point = 153;
+
+float weight_fp32[9] = {1, 1, 1,
+                        1, 1, 1,
+                        1, 1, 1};
+float weight_scale = 0.0039216f;
+int weight_zero_point = 0;
+
+float reference_out[9] = {-4, -1, 1,
+                          -2,  2, 3,
+                           3,  6, 4};
+float output_scale = 0.03921568f;
+int output_zero_point = 102;
+
+
+void get_uint8_data(float* data_fp32, uint8_t* date_u8, int size, float scale, int zero_point)
+{
+    for (int i = 0; i < size; i++)
+    {
+        int udata = (round)(data_fp32[i] / scale + zero_point);
+        if (udata > 255)
+            udata = 255;
+        else if (udata < 0)
+            udata = 0;
+
+        date_u8[i] = udata;
+    }
+}
+
+int main(int argc, char* argv[])
+{
+    int n = 1, c = 1, h = 3, w = 3;
+    const char* test_node_name = "conv";
+    int data_type = TENGINE_DT_UINT8;
+    int layout = TENGINE_LAYOUT_NCHW;
+
+    // init
+    int ret = test_graph_init();
+    if (0 != ret)
+        fprintf(stderr, "Tengine init failed.\n");
+
+    // create
+    struct graph* ir_graph = (struct graph* )create_timvx_test_graph(test_node_name, data_type, layout, n, c, h, w, &create_test_convolution_node);
+    if(NULL == ir_graph)
+        return -1;
+
+    set_log_level(LOG_INFO);
+    dump_graph(ir_graph);
+
+    // set quantize params
+    struct tensor* input_tensor = (struct tensor*)get_graph_tensor(ir_graph, "input_node");
+    struct tensor* weight_tensor = (struct tensor*)get_graph_tensor(ir_graph, "weight");
+    struct tensor* output_tensor = (struct tensor*)get_graph_tensor(ir_graph, "conv");
+
+//    tensor_t weight_tesnor = get_graph_input_tensor(ir_graph, 1, 0);
+    set_tensor_quant_param(input_tensor, &input_scale, &input_zero_point, 1);
+    set_tensor_quant_param(weight_tensor, &weight_scale, &weight_zero_point, 1);
+    set_tensor_quant_param(output_tensor, &output_scale, &output_zero_point, 1);
+
+    // set input data
+    uint8_t input_u8[9] = {0};
+    get_uint8_data(input_fp32, input_u8, 9, input_scale, input_zero_point);
+    set_tensor_buffer(input_tensor, input_u8, 9);
+
+    // set weight data
+    uint8_t weight_u8[9] = {0};
+    get_uint8_data(weight_fp32, weight_u8, 9, weight_scale, weight_zero_point);
+    set_tensor_buffer(weight_tensor, weight_u8, 9);
+
+    // set bias data
+    // fill_input_uint8_tensor_by_index(graph, 0, 0, 0.0f);
+
+    // graph run
+    ret = test_graph_run(ir_graph);
+    if (0 != ret)
+    {
+        fprintf(stderr, "Run graph error. ERRNO: %d.\n", ret);
+        test_graph_release(ir_graph);
+        return -1;
+    }
+
+    // get output and dequant
+    uint8_t* output_u8 = ( uint8_t* )output_tensor->data;
+    int output_size = output_tensor->elem_num;
+
+    get_tensor_quant_param(output_tensor, &output_scale, &output_zero_point, 1);
+    float* output_data = ( float* )malloc(output_size * sizeof(float));
+    for (int i = 0; i < output_size; i++)
+        output_data[i] = (( float )output_u8[i] - ( float )output_zero_point) * output_scale;
+
+    // check the result
+    ret = 0;
+    for (int i = 0; i< output_size; i++)
+    {
+        if (fabsf(output_data[i] - reference_out[i]) > 0.1)
+        {
+            fprintf(stderr, "index:%d, a:%f, b:%f\n", i, output_data[i], reference_out[i]);
+            ret = -1;
+        }
+    }
+
+    if (ret == 0)
+        fprintf(stderr, "test pass.\n");
+    else
+        fprintf(stderr, "test failed.\n");
+
+    // exit
+    test_graph_release(ir_graph);
+
+    return ret;
+}
diff --git a/tests/op/test_timvx_op_dropout.cpp b/tests/op/test_timvx_op_dropout.cpp
new file mode 100644
index 00000000..ac991bf6
--- /dev/null
+++ b/tests/op/test_timvx_op_dropout.cpp
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2020, OPEN AI LAB
+ * Author: qtang@openailab.com
+ */
+
+
+#include "test_op.h"
+
+
+int create_test_dropout_node(graph_t graph, const char* input_name, const char* node_name, int data_type, int layout, int n, int c, int h, int w)
+{
+    (void)layout; (void)n; (void)c; (void)h; (void)w;
+
+    /* create the test node */
+    node_t test_node = create_graph_node(graph, node_name, "Dropout");
+
+    tensor_t input_tensor = get_graph_tensor(graph, input_name);
+
+    if(NULL == input_tensor)
+    {
+        fprintf(stderr, "create test node failed.\n");
+        return -1;
+    }
+
+    /* input tensors of test node */
+    set_node_input_tensor(test_node, 0, input_tensor);
+
+    /* output tensors of test node */
+    tensor_t output_tensor = create_graph_tensor(graph, node_name, data_type);
+    set_node_output_tensor(test_node, 0, output_tensor, TENSOR_TYPE_VAR);
+
+    return 0;
+}
+
+float reference_out[3] = {-10.f, -10.f, -10.f};
+
+/*
+ * scale = (max - min) / 255
+ * zero_point = -min / scale
+ * uint8   = clip(round(float32 / scale) + zero_point, 0, 255)
+ * float32 = (uint8 - zero_point) * scale
+ */
+float input_scale = 0.039216f;
+int input_zero_point = 255;
+float output_scale = 0.039216f;
+int output_zero_point = 255;
+
+int main(int argc, char* argv[])
+{
+    int n = 1, c = 3, h = 4, w = 5;
+    const char* test_node_name = "dropout";
+    int data_type = TENGINE_DT_UINT8;
+    int layout = TENGINE_LAYOUT_NCHW;
+
+    // init
+    int ret = test_graph_init();
+    if (0 != ret)
+        fprintf(stderr, "Tengine init failed.\n");
+
+    // create
+    graph_t graph = create_timvx_test_graph(test_node_name, data_type, layout, n, c, h, w, &create_test_dropout_node);
+    if(NULL == graph)
+        return -1;
+
+    // set quantize params
+    tensor_t input_tesnor = get_graph_input_tensor(graph, 0, 0);
+    tensor_t output_tesnor = get_graph_output_tensor(graph, 0, 0);
+    set_tensor_quant_param(input_tesnor, &input_scale, &input_zero_point, 1);
+    set_tensor_quant_param(output_tesnor, &output_scale, &output_zero_point, 1);
+
+    // set input data
+    fill_input_uint8_tensor_by_index(graph, 0, 0, -10.0f);
+
+    // graph run
+    ret = test_graph_run(graph);
+    if (0 != ret)
+    {
+        fprintf(stderr, "Run graph error. ERRNO: %d.\n", ret);
+        test_graph_release(graph);
+        return -1;
+    }
+
+    // get output and dequant
+    struct tensor* output_tensor = (struct tensor*)get_graph_output_tensor(graph, 0, 0);
+    uint8_t* output_u8 = ( uint8_t* )output_tensor->data;
+    int output_size = output_tensor->elem_num;
+    int out_c = output_tensor->dims[1];
+    int cstep = output_tensor->dims[2] * output_tensor->dims[3];
+
+    get_tensor_quant_param(output_tensor, &output_scale, &output_zero_point, 1);
+    float* output_data = ( float* )malloc(output_size * sizeof(float));
+    for (int i = 0; i < output_size; i++)
+        output_data[i] = (( float )output_u8[i] - ( float )output_zero_point) * output_scale;
+
+    // check the result
+    ret = 0;
+    for (int i = 0; i< out_c; i++)
+    {
+        float* output_value =  (float *)output_data + i * cstep;
+        for (int j = 0; j < cstep; j++)
+        {
+            if (fabsf(output_value[j] - reference_out[i]) > 0.01)
+            {
+                fprintf(stderr, "index:%d, a:%f, b:%f\n", j, output_value[j], reference_out[i]);
+                ret = -1;
+            }
+        }
+    }
+
+    if (ret == 0)
+        fprintf(stderr, "test pass.\n");
+    else
+        fprintf(stderr, "test failed.\n");
+
+    // exit
+    test_graph_release(graph);
+
+    return ret;
+}
diff --git a/tests/op/test_timvx_op_pooling.cpp b/tests/op/test_timvx_op_pooling.cpp
new file mode 100644
index 00000000..61a02c7a
--- /dev/null
+++ b/tests/op/test_timvx_op_pooling.cpp
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2020, OPEN AI LAB
+ * Author: qtang@openailab.com
+ */
+
+
+#include "test_op.h"
+#include "operator/prototype/pooling_param.h"
+
+
+int create_test_pool_node(graph_t graph, const char* input_name, const char* node_name, int data_type, int layout, int n, int c, int h, int w)
+{
+    (void)layout; (void)n; (void)c; (void)h; (void)w;
+
+    /* create the test node */
+    struct node* test_node = (struct node* )create_graph_node(graph, node_name, "Pooling");
+
+    tensor_t input_tensor = get_graph_tensor(graph, input_name);
+
+    if(NULL == input_tensor)
+    {
+        fprintf(stderr, "create test node failed.\n");
+        return -1;
+    }
+
+    /* input tensors of test node */
+    set_node_input_tensor(test_node, 0, input_tensor);
+
+    /* output tensors of test node */
+    tensor_t output_tensor = create_graph_tensor(graph, node_name, data_type);
+    set_node_output_tensor(test_node, 0, output_tensor, TENSOR_TYPE_VAR);
+
+    /* set params */
+    struct pool_param* pool_param = ( struct pool_param* )(struct node* )test_node->op.param_mem;
+
+    pool_param->pool_method = POOL_MAX;
+    pool_param->global = 0;
+    pool_param->kernel_h = 3;
+    pool_param->kernel_w = 3;
+    pool_param->stride_h = 2;
+    pool_param->stride_w = 2;
+    pool_param->pad_h0 = 0;
+    pool_param->pad_h1 = 0;
+    pool_param->pad_w0 = 0;
+    pool_param->pad_w1 = 0;
+    pool_param->pad_h0_org = 0;
+    pool_param->pad_h1_org = 0;
+    pool_param->pad_w0_org = 0;
+    pool_param->pad_w1_org = 0;
+    pool_param->caffe_flavor = 0;
+    pool_param->funct = NULL;
+
+    return 0;
+}
+
+float reference_out[3] = {-10.f, -10.f, -10.f};
+
+/*
+ * scale = (max - min) / 255
+ * zero_point = -min / scale
+ * uint8   = clip(round(float32 / scale) + zero_point, 0, 255)
+ * float32 = (uint8 - zero_point) * scale
+ */
+float input_scale = 0.039216f;
+int input_zero_point = 255;
+float output_scale = 0.039216f;
+int output_zero_point = 255;
+
+int main(int argc, char* argv[])
+{
+    int n = 1, c = 3, h = 4, w = 5;
+    const char* test_node_name = "pooling";
+    int data_type = TENGINE_DT_UINT8;
+    int layout = TENGINE_LAYOUT_NCHW;
+
+    // init
+    int ret = test_graph_init();
+    if (0 != ret)
+        fprintf(stderr, "Tengine init failed.\n");
+
+    // create
+    graph_t graph = create_timvx_test_graph(test_node_name, data_type, layout, n, c, h, w, &create_test_pool_node);
+    if(NULL == graph)
+        return -1;
+
+    // set quantize params
+    tensor_t input_tesnor = get_graph_input_tensor(graph, 0, 0);
+    tensor_t output_tesnor = get_graph_output_tensor(graph, 0, 0);
+    set_tensor_quant_param(input_tesnor, &input_scale, &input_zero_point, 1);
+    set_tensor_quant_param(output_tesnor, &output_scale, &output_zero_point, 1);
+
+    // set input data
+    fill_input_uint8_tensor_by_index(graph, 0, 0, -10.0f);
+
+    // graph run
+    ret = test_graph_run(graph);
+    if (0 != ret)
+    {
+        fprintf(stderr, "Run graph error. ERRNO: %d.\n", ret);
+        test_graph_release(graph);
+        return -1;
+    }
+
+    // get output and dequant
+    struct tensor* output_tensor = (struct tensor*)get_graph_output_tensor(graph, 0, 0);
+    uint8_t* output_u8 = ( uint8_t* )output_tensor->data;
+    int output_size = output_tensor->elem_num;
+    int out_c = output_tensor->dims[1];
+    int cstep = output_tensor->dims[2] * output_tensor->dims[3];
+
+    get_tensor_quant_param(output_tensor, &output_scale, &output_zero_point, 1);
+    float* output_data = ( float* )malloc(output_size * sizeof(float));
+    for (int i = 0; i < output_size; i++)
+        output_data[i] = (( float )output_u8[i] - ( float )output_zero_point) * output_scale;
+
+    // check the result
+    ret = 0;
+    for (int i = 0; i< out_c; i++)
+    {
+        float* output_value =  (float *)output_data + i * cstep;
+        for (int j = 0; j < cstep; j++)
+        {
+            if (fabsf(output_value[j] - reference_out[i]) > 0.01)
+            {
+                fprintf(stderr, "index:%d, a:%f, b:%f\n", j, output_value[j], reference_out[i]);
+                ret = -1;
+            }
+        }
+    }
+
+    if (ret == 0)
+        fprintf(stderr, "test pass.\n");
+    else
+        fprintf(stderr, "test failed.\n");
+
+    // exit
+    test_graph_release(graph);
+
+    return ret;
+}
diff --git a/tests/op/test_op_prelu_timvx.c b/tests/op/test_timvx_op_prelu.cpp
similarity index 89%
rename from tests/op/test_op_prelu_timvx.c
rename to tests/op/test_timvx_op_prelu.cpp
index c512794e..fe3fc0a6 100644
--- a/tests/op/test_op_prelu_timvx.c
+++ b/tests/op/test_timvx_op_prelu.cpp
@@ -22,6 +22,7 @@
  * Author: qtang@openailab.com
  */
 
+
 #include "test_op.h"
 
 
@@ -36,7 +37,7 @@ int create_test_prelu_node(graph_t graph, const char* input_name, const char* no
 
     if(NULL == input_tensor)
     {
-        fprintf(stderr, "create test node failed. ERRNO: %d.\n", get_tengine_errno());
+        fprintf(stderr, "create test node failed.\n");
         return -1;
     }
 
@@ -72,12 +73,12 @@ float result_value[3] = {-1.f, -2.f, -3.f};
  */
 float input_scale = 0.039216f;
 int input_zero_point = 255;
-float output_scale = 0.007843f;
-int output_zero_point = 382;
+float output_scale = 0.011764f;
+int output_zero_point = 255;
 
 int main(int argc, char* argv[])
 {
-    int n = 1, c = 3, h = 6, w = 6;
+    int n = 1, c = 3, h = 4, w = 5;
     const char* test_node_name = "prelu";
     int data_type = TENGINE_DT_UINT8;
     int layout = TENGINE_LAYOUT_NCHW;
@@ -85,7 +86,7 @@ int main(int argc, char* argv[])
     // init
     int ret = test_graph_init();
     if (0 != ret)
-        fprintf(stderr, "Tengine init failed. ERRNO: %d.", get_tengine_errno());
+        fprintf(stderr, "Tengine init failed.\n");
 
     // create
     graph_t graph = create_timvx_test_graph(test_node_name, data_type, layout, n, c, h, w, &create_test_prelu_node);
@@ -102,7 +103,7 @@ int main(int argc, char* argv[])
     fill_input_uint8_tensor_by_index(graph, 0, 0, -10.0f);
 
     // set slope data, need cost fp32 to fp16
-    __fp16* slope_fp16 = (__fp16*)sys_malloc(3 * sizeof(__fp16));
+    __fp16* slope_fp16 = (__fp16*)malloc(3 * sizeof(__fp16));
     for (int k = 0; k < 3; k++)
         slope_fp16[k] = fp32_to_fp16(slope_value[k]);
 
@@ -118,7 +119,7 @@ int main(int argc, char* argv[])
     }
 
     // get output and dequant
-    struct ir_tensor* output_tensor = get_graph_output_tensor(graph, 0, 0);
+    struct tensor* output_tensor = (struct tensor*)get_graph_output_tensor(graph, 0, 0);
     uint8_t* output_u8 = ( uint8_t* )output_tensor->data;
     int output_size = output_tensor->elem_num;
     int out_c = output_tensor->dims[1];
@@ -138,9 +139,8 @@ int main(int argc, char* argv[])
         {
             if (fabsf(output_value[j] - result_value[i]) > 0.01)
             {
-                fprintf(stderr, "Check result failed, current %f, expect %f\n", output_value[j], result_value[i]);
+                fprintf(stderr, "index:%d, a:%f, b:%f\n", j, output_value[j], result_value[i]);
                 ret = -1;
-                break;
             }
         }
     }
diff --git a/tests/op/test_timvx_op_relu.cpp b/tests/op/test_timvx_op_relu.cpp
new file mode 100644
index 00000000..1ed17f27
--- /dev/null
+++ b/tests/op/test_timvx_op_relu.cpp
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2020, OPEN AI LAB
+ * Author: qtang@openailab.com
+ */
+
+
+#include "test_op.h"
+
+
+int create_test_relu_node(graph_t graph, const char* input_name, const char* node_name, int data_type, int layout, int n, int c, int h, int w)
+{
+    (void)layout; (void)n; (void)c; (void)h; (void)w;
+
+    /* create the test node */
+    node_t test_node = create_graph_node(graph, node_name, "ReLU");
+
+    tensor_t input_tensor = get_graph_tensor(graph, input_name);
+
+    if(NULL == input_tensor)
+    {
+        fprintf(stderr, "create test node failed.\n");
+        return -1;
+    }
+
+    /* input tensors of test node */
+    set_node_input_tensor(test_node, 0, input_tensor);
+
+    /* output tensors of test node */
+    tensor_t output_tensor = create_graph_tensor(graph, node_name, data_type);
+    set_node_output_tensor(test_node, 0, output_tensor, TENSOR_TYPE_VAR);
+
+    return 0;
+}
+
+float reference_out[3] = {0.f, 0.f, 0.f};
+
+/*
+ * scale = (max - min) / 255
+ * zero_point = -min / scale
+ * uint8   = clip(round(float32 / scale) + zero_point, 0, 255)
+ * float32 = (uint8 - zero_point) * scale
+ */
+float input_scale = 0.039216f;
+int input_zero_point = 255;
+float output_scale = 0.039216f;
+int output_zero_point = 255;
+
+int main(int argc, char* argv[])
+{
+    int n = 1, c = 3, h = 4, w = 5;
+    const char* test_node_name = "relu";
+    int data_type = TENGINE_DT_UINT8;
+    int layout = TENGINE_LAYOUT_NCHW;
+
+    // init
+    int ret = test_graph_init();
+    if (0 != ret)
+        fprintf(stderr, "Tengine init failed.\n");
+
+    // create
+    graph_t graph = create_timvx_test_graph(test_node_name, data_type, layout, n, c, h, w, &create_test_relu_node);
+    if(NULL == graph)
+        return -1;
+
+    // set quantize params
+    tensor_t input_tesnor = get_graph_input_tensor(graph, 0, 0);
+    tensor_t output_tesnor = get_graph_output_tensor(graph, 0, 0);
+    set_tensor_quant_param(input_tesnor, &input_scale, &input_zero_point, 1);
+    set_tensor_quant_param(output_tesnor, &output_scale, &output_zero_point, 1);
+
+    // set input data
+    fill_input_uint8_tensor_by_index(graph, 0, 0, -10.0f);
+
+    // graph run
+    ret = test_graph_run(graph);
+    if (0 != ret)
+    {
+        fprintf(stderr, "Run graph error. ERRNO: %d.\n", ret);
+        test_graph_release(graph);
+        return -1;
+    }
+
+    // get output and dequant
+    struct tensor* output_tensor = (struct tensor*)get_graph_output_tensor(graph, 0, 0);
+    uint8_t* output_u8 = ( uint8_t* )output_tensor->data;
+    int output_size = output_tensor->elem_num;
+    int out_c = output_tensor->dims[1];
+    int cstep = output_tensor->dims[2] * output_tensor->dims[3];
+
+    get_tensor_quant_param(output_tensor, &output_scale, &output_zero_point, 1);
+    float* output_data = ( float* )malloc(output_size * sizeof(float));
+    for (int i = 0; i < output_size; i++)
+        output_data[i] = (( float )output_u8[i] - ( float )output_zero_point) * output_scale;
+
+    // check the result
+    ret = 0;
+    for (int i = 0; i< out_c; i++)
+    {
+        float* output_value =  (float *)output_data + i * cstep;
+        for (int j = 0; j < cstep; j++)
+        {
+            if (fabsf(output_value[j] - reference_out[i]) > 0.01)
+            {
+                fprintf(stderr, "index:%d, a:%f, b:%f\n", j, output_value[j], reference_out[i]);
+                ret = -1;
+            }
+        }
+    }
+
+    if (ret == 0)
+        fprintf(stderr, "test pass.\n");
+    else
+        fprintf(stderr, "test failed.\n");
+
+    // exit
+    test_graph_release(graph);
+
+    return ret;
+}
diff --git a/tests/op/test_timvx_op_relu1.cpp b/tests/op/test_timvx_op_relu1.cpp
new file mode 100644
index 00000000..8e3134a8
--- /dev/null
+++ b/tests/op/test_timvx_op_relu1.cpp
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2020, OPEN AI LAB
+ * Author: qtang@openailab.com
+ */
+
+
+#include "test_op.h"
+
+
+int create_test_relu1_node(graph_t graph, const char* input_name, const char* node_name, int data_type, int layout, int n, int c, int h, int w)
+{
+    (void)layout; (void)n; (void)c; (void)h; (void)w;
+
+    /* create the test node */
+    node_t test_node = create_graph_node(graph, node_name, "ReLU1");
+
+    tensor_t input_tensor = get_graph_tensor(graph, input_name);
+
+    if(NULL == input_tensor)
+    {
+        fprintf(stderr, "create test node failed.\n");
+        return -1;
+    }
+
+    /* input tensors of test node */
+    set_node_input_tensor(test_node, 0, input_tensor);
+
+    /* output tensors of test node */
+    tensor_t output_tensor = create_graph_tensor(graph, node_name, data_type);
+    set_node_output_tensor(test_node, 0, output_tensor, TENSOR_TYPE_VAR);
+
+    return 0;
+}
+
+float reference_out[3] = {-10.f, -10.f, -10.f};
+
+/*
+ * scale = (max - min) / 255
+ * zero_point = -min / scale
+ * uint8   = clip(round(float32 / scale) + zero_point, 0, 255)
+ * float32 = (uint8 - zero_point) * scale
+ */
+float input_scale = 0.039216f;
+int input_zero_point = 255;
+float output_scale = 0.039216f;
+int output_zero_point = 255;
+
+int main(int argc, char* argv[])
+{
+    int n = 1, c = 3, h = 4, w = 5;
+    const char* test_node_name = "relu1";
+    int data_type = TENGINE_DT_UINT8;
+    int layout = TENGINE_LAYOUT_NCHW;
+
+    // init
+    int ret = test_graph_init();
+    if (0 != ret)
+        fprintf(stderr, "Tengine init failed.\n");
+
+    // create
+    graph_t graph = create_timvx_test_graph(test_node_name, data_type, layout, n, c, h, w, &create_test_relu1_node);
+    if(NULL == graph)
+        return -1;
+
+    // set quantize params
+    tensor_t input_tesnor = get_graph_input_tensor(graph, 0, 0);
+    tensor_t output_tesnor = get_graph_output_tensor(graph, 0, 0);
+    set_tensor_quant_param(input_tesnor, &input_scale, &input_zero_point, 1);
+    set_tensor_quant_param(output_tesnor, &output_scale, &output_zero_point, 1);
+
+    // set input data
+    fill_input_uint8_tensor_by_index(graph, 0, 0, -10.0f);
+
+    // graph run
+    ret = test_graph_run(graph);
+    if (0 != ret)
+    {
+        fprintf(stderr, "Run graph error. ERRNO: %d.\n", ret);
+        test_graph_release(graph);
+        return -1;
+    }
+
+    // get output and dequant
+    struct tensor* output_tensor = (struct tensor*)get_graph_output_tensor(graph, 0, 0);
+    uint8_t* output_u8 = ( uint8_t* )output_tensor->data;
+    int output_size = output_tensor->elem_num;
+    int out_c = output_tensor->dims[1];
+    int cstep = output_tensor->dims[2] * output_tensor->dims[3];
+
+    get_tensor_quant_param(output_tensor, &output_scale, &output_zero_point, 1);
+    float* output_data = ( float* )malloc(output_size * sizeof(float));
+    for (int i = 0; i < output_size; i++)
+        output_data[i] = (( float )output_u8[i] - ( float )output_zero_point) * output_scale;
+
+    // check the result
+    ret = 0;
+    for (int i = 0; i< out_c; i++)
+    {
+        float* output_value =  (float *)output_data + i * cstep;
+        for (int j = 0; j < cstep; j++)
+        {
+            if (fabsf(output_value[j] - reference_out[i]) > 0.01)
+            {
+                fprintf(stderr, "index:%d, a:%f, b:%f\n", j, output_value[j], reference_out[i]);
+                ret = -1;
+            }
+        }
+    }
+
+    if (ret == 0)
+        fprintf(stderr, "test pass.\n");
+    else
+        fprintf(stderr, "test failed.\n");
+
+    // exit
+    test_graph_release(graph);
+
+    return ret;
+}
-- 
GitLab