update timvx_bn (#721)

44ff45f7 · BowShotDS · GitHub · 2597262e · 44ff45f7 · 44ff45f7
7 changed file
--- a/source/device/tim-vx/op/timvx_batchnorm.cc
+++ b/source/device/tim-vx/op/timvx_batchnorm.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "operator/op.h"
+#include "batchnorm_param.h"
+}
+
+
+bool VXEngine::AddBatchNormNode(struct node* ir_node)
+{
+    struct graph* ir_graph = ir_node->graph;
+
+    std::vector<std::shared_ptr<tim::vx::Tensor> > bn_in_tensor(ir_node->input_num);
+
+    int in_set[5] = {0, 3, 4, 1, 2};
+    for (int i = 0; i < ir_node->input_num; i++)
+    {
+        int idx = in_set[i];
+        struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[idx]);
+        bn_in_tensor[i] = this->vx_tensor_map[input_tensor->index];
+    }
+    struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    struct batchnorm_param* param = (struct batchnorm_param*)ir_node->op.param_mem;
+
+    auto sigmoid = graph->CreateOperation<tim::vx::ops::BatchNorm>(param->eps);
+    (*sigmoid)
+            .BindInputs({ bn_in_tensor })
+            .BindOutputs({ this->vx_tensor_map[output_tensor->index] });
+
+    return true;
+}
+
--- a/source/device/tim-vx/op/timvx_convolution.cc
+++ b/source/device/tim-vx/op/timvx_convolution.cc
@@ -73,7 +73,7 @@ bool VXEngine::AddConvolutionNode(struct node* ir_node)
    }

    int multiplier = 0;
-    if (param->group == weight_tensor->dims[0])
+    if ( (param->group == weight_tensor->dims[0]) && (param->group != 1) )
        multiplier = 1;
    auto conv = this->graph->CreateOperation<tim::vx::ops::Conv2d>(
        weight_tensor->dims[0], tim::vx::PadType::AUTO,

--- a/source/device/tim-vx/op/timvx_interp.cc
+++ b/source/device/tim-vx/op/timvx_interp.cc
@@ -53,19 +53,24 @@ bool VXEngine::AddInterpNode(struct node* ir_node)
        TLOG_ERR("Tengine: VX does not support resize type(%d).\n", (int)resize_type);
    }

-    std::vector<std::shared_ptr<tim::vx::Tensor> > add_in_tensor(ir_node->input_num);
-    for (int i = 0; i < ir_node->input_num; i++)
-    {
-        struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[i]);
-        add_in_tensor[i] = this->vx_tensor_map[input_tensor->index];
-    }
+//    std::vector<std::shared_ptr<tim::vx::Tensor> > add_in_tensor(ir_node->input_num);
+//    for (int i = 0; i < ir_node->input_num; i++)
+//    {
+//        struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[i]);
+//        add_in_tensor[i] = this->vx_tensor_map[input_tensor->index];
+//    }
+
+    std::shared_ptr<tim::vx::Tensor> add_in_tensor;
+    struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    add_in_tensor = this->vx_tensor_map[input_tensor->index];

    auto resize = graph->CreateOperation<tim::vx::ops::Resize>(resize_type, 0.0f, false, false, param->output_height, param->output_width);
    vx_node_map[ir_node->index] = resize;

    (*resize)
-        .BindInputs(add_in_tensor)
-        .BindOutputs({ this->vx_tensor_map[output_tensor->index] });
+            //.BindInputs(add_in_tensor)
+            .BindInput(add_in_tensor)
+            .BindOutputs({ this->vx_tensor_map[output_tensor->index] });

    return true;
 }
--- a/source/device/tim-vx/op/timvx_resize.cc
+++ b/source/device/tim-vx/op/timvx_resize.cc
@@ -53,19 +53,16 @@ bool VXEngine::AddResizeNode(struct node* ir_node)
        TLOG_ERR("Tengine: VX does not support resize type(%d).\n", (int)resize_type);
    }

-    std::vector<std::shared_ptr<tim::vx::Tensor> > add_in_tensor(ir_node->input_num);
-    for (int i = 0; i < ir_node->input_num; i++)
-    {
-        struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[i]);
-        add_in_tensor[i] = this->vx_tensor_map[input_tensor->index];
-    }
+    std::shared_ptr<tim::vx::Tensor> add_in_tensor;
+    struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    add_in_tensor = this->vx_tensor_map[input_tensor->index];

    auto resize = graph->CreateOperation<tim::vx::ops::Resize>(resize_type, 0.0f, false, false, output_tensor->dims[2], output_tensor->dims[3]);
    vx_node_map[ir_node->index] = resize;

    (*resize)
-        .BindInputs(add_in_tensor)
-        .BindOutputs({ this->vx_tensor_map[output_tensor->index] });
+            .BindInput(add_in_tensor)
+            .BindOutputs({ this->vx_tensor_map[output_tensor->index] });

    return true;
 }
--- a/source/device/tim-vx/timvx_executor.cc
+++ b/source/device/tim-vx/timvx_executor.cc
@@ -23,7 +23,6 @@
 */

 #include "timvx_executor.hpp"
-
 #include "timvx_define.h"

 #ifdef TIMVX_MODEL_CACHE
@@ -36,7 +35,6 @@
 #include <fstream>
 #endif

-///////////////////////////////////////////////////////////////////////////////////////

 VXEngine::VXEngine()
 {
@@ -57,6 +55,9 @@ int VXEngine::VXTensorMap(struct graph* ir_graph, int ir_tensor_idx, int spec_ty
        tim::vx::DataType datatype;
        switch(ir_tensor->data_type)
        {
+            case (0):
+                datatype = tim::vx::DataType::FLOAT32;
+                break;
            case (1):
                datatype = tim::vx::DataType::FLOAT16;
                break;
@@ -67,8 +68,8 @@ int VXEngine::VXTensorMap(struct graph* ir_graph, int ir_tensor_idx, int spec_ty
                datatype = tim::vx::DataType::INT32;
                break;
            default:
-                TLOG_ERR("FP32 Tensor: Tensor_name(%s) tensor_index(%d) tensor_data_type(%d) .\n",ir_tensor->name, ir_tensor->index, ir_tensor->data_type);
-                return -1;
+                TLOG_ERR("Tensor date type: Tensor_name(%s) tensor_index(%d) tensor_data_type(%d) .\n",ir_tensor->name, ir_tensor->index, ir_tensor->data_type);
+                break;
        }

        tim::vx::ShapeType vx_shape;
@@ -102,16 +103,26 @@ int VXEngine::VXTensorMap(struct graph* ir_graph, int ir_tensor_idx, int spec_ty
        /* create the vx tesnor */
        std::shared_ptr<tim::vx::Tensor> vx_tensor;

+        fprintf(stderr,"tensor name %s\n",ir_tensor->name);
+
        if (spec_type == SPEC_TYPE_OUTPUT)
        {
            tim::vx::TensorSpec vx_spec(datatype, vx_shape,
                                        tim::vx::TensorAttribute::OUTPUT, vx_quant);
            vx_tensor = this->graph->CreateTensor(vx_spec);
        }
+        else if (ir_tensor->data_type == TENGINE_DT_FP32)
+        {
+            tim::vx::Quantization none_quant(tim::vx::QuantType::NONE, 1, 0);
+            tim::vx::TensorSpec vx_spec(datatype, vx_shape,
+                                        tim::vx::TensorAttribute::CONSTANT, none_quant);
+            vx_tensor = this->graph->CreateTensor(vx_spec, ir_tensor->data);
+        }
        else if (spec_type == SPEC_TYPE_DWCONV)
        {
+            auto tmpvx = vx_shape[ir_tensor->dim_num - 2];
            vx_shape[ir_tensor->dim_num - 2] = vx_shape[ir_tensor->dim_num - 1];
-            vx_shape[ir_tensor->dim_num - 1] = 1;
+            vx_shape[ir_tensor->dim_num - 1] = tmpvx;
            tim::vx::TensorSpec vx_spec(datatype, vx_shape,
                                        tim::vx::TensorAttribute::CONSTANT, vx_quant);
            vx_tensor = this->graph->CreateTensor(vx_spec, ir_tensor->data);
@@ -146,6 +157,7 @@ int VXEngine::VXTensorMap(struct graph* ir_graph, int ir_tensor_idx, int spec_ty
        }
        else if (ir_tensor->tensor_type == TENSOR_TYPE_CONST)
        {
+            fprintf(stderr," vx_shape %d %d %d %d\n", vx_shape[0], vx_shape[1], vx_shape[2], vx_shape[3]);
            tim::vx::TensorSpec vx_spec(datatype, vx_shape,
                                        tim::vx::TensorAttribute::CONSTANT, vx_quant);
            vx_tensor = this->graph->CreateTensor(vx_spec, ir_tensor->data);
@@ -169,6 +181,9 @@ int VXEngine::Build(struct subgraph* subgraph)

        switch (op_type)
        {
+            case OP_BATCHNORM:
+                this->AddBatchNormNode(ir_node);
+                break;
            case OP_CLIP:
                this->AddClipNode(ir_node);
                break;
@@ -386,7 +401,7 @@ int VXEngine::VXEnginePreRun(struct subgraph* subgraph)
            if (ir_node->op.type == OP_CONV)
            {
                auto conv_param = (struct conv_param*)ir_node->op.param_mem;
-                if (conv_param->group == conv_param->output_channel)
+                if ((conv_param->group == conv_param->output_channel) && (conv_param->output_channel != 1))
                {
                    this->VXTensorMap(ir_graph, ir_node->input_tensors[1], SPEC_TYPE_DWCONV);
                }
@@ -496,6 +511,10 @@ int VXEngine::VXEngineRun(struct subgraph* subgraph)
                TLOG_INFO("Tengine: Copy output data from VX tensor to CPU failed.\n");
                return -1;
            }
+
+
+            char dir_str[32] = { 0 };
+            extract_feature_from_tensor_timvx(dir_str, ir_tensor->name, ir_tensor);
        }



--- a/source/device/tim-vx/timvx_executor.hpp
+++ b/source/device/tim-vx/timvx_executor.hpp
@@ -55,9 +55,10 @@ extern "C"
 #include "tim/vx/operation.h"

 #include "tim/vx/ops/activations.h"
-#include "tim/vx/ops/depth2space.h"
+#include "tim/vx/ops/batchnorm.h"
 #include "tim/vx/ops/concat.h"
 #include "tim/vx/ops/conv2d.h"
+#include "tim/vx/ops/depth2space.h"
 #include "tim/vx/ops/elementwise.h"
 #include "tim/vx/ops/fullyconnected.h"
 #include "tim/vx/ops/gather.h"
@@ -95,6 +96,7 @@ private:
    int Build(struct subgraph* subgraph);
    int VXTensorMap(struct graph* ir_graph, int ir_tensor_idx, int spec_type);

+    bool AddBatchNormNode(struct node* ir_node);
    bool AddClipNode(struct node* ir_node);
    bool AddConcatNode(struct node* ir_node);
    bool AddConvolutionNode(struct node* ir_node);

--- a/source/device/tim-vx/timvx_limit.hpp
+++ b/source/device/tim-vx/timvx_limit.hpp
@@ -37,7 +37,7 @@ const int timvx_supported_ops[] = {
 //    OP_ADD_N,
 //    OP_ARGMAX,
 //    OP_ARGMIN,
-//    OP_BATCHNORM,
+    OP_BATCHNORM,
 //    OP_BATCHTOSPACEND,
 //    OP_BIAS,
 //    OP_BROADMUL,