添加yolov4demo (#389)

* add tm_yolov4

添加yolov4demo (#389)
* add tm_yolov4
42a1a3f5 · shitouren1994 · GitHub · 156aa419 · 42a1a3f5 · 42a1a3f5
隐藏空白更改
内联并排

Showing with 546 addition and 0 deletion

examples/CMakeLists.txt examples/CMakeLists.txt +1 -0

examples/tm_yolov4.cpp examples/tm_yolov4.cpp +545 -0

未找到文件。
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -22,6 +22,7 @@ tengine_example(tm_yolov3_uint8             tm_yolov3_uint8.cpp)
 tengine_example(tm_landmark                 tm_landmark.cpp)
 tengine_example(tm_landmark_uint8           tm_landmark_uint8.cpp)
 tengine_example(tm_mobilefacenet            tm_mobilefacenet.cpp)
+tengine_example(tm_yolov4                   tm_yolov4.cpp)


 # add examples with opencv

--- a/examples/tm_yolov4.cpp
+++ b/examples/tm_yolov4.cpp
+#include <stdio.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <vector>
+#include <algorithm>
+
+#include "common.h"
+#include "tengine_c_api.h"
+#include "tengine_operations.h"
+
+#define YOLOV4_NUM_BOXES 3
+#define YOLOV4_TOTAL_ANCHOR 9
+#define CLASSES_COCO 80
+
+const float s_thresh = 0.5;
+const float s_hier_thresh = 0.5;
+const float s_nms = 0.45;
+
+float s_anchors[] = {12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401};
+
+typedef struct layer
+{
+    int total_anchor;
+    int box, c, h, w;
+    int out_n, out_c, out_h, out_w;
+    int classes;
+    int inputs;
+    int outputs;
+    int* anchor_mask;
+    float* anchors;
+    float* output;
+    int coords;
+} layer;
+
+typedef struct
+{
+    float x, y, w, h;
+} box;
+
+typedef struct
+{
+    box bbox;
+    float x, y, w, h;
+    int classes;
+    float* prob;
+    float objectness;
+    int sort_class;
+} detection;
+
+layer make_darknet_layer(int w, int h, int net_w, int net_h, int n, int total, int classes)
+{
+    layer l = {0};
+    l.box = n;
+    l.total_anchor = total;
+    l.h = h;
+    l.w = w;
+    l.c = n * (classes + 4 + 1);
+    l.out_w = l.w;
+    l.out_h = l.h;
+    l.out_c = l.c;
+    l.classes = classes;
+    l.inputs = l.w * l.h * l.c;
+
+    l.anchors = ( float* )calloc(total * 2, sizeof(float));
+    l.anchor_mask = ( int* )calloc(n, sizeof(int));
+    if (9 == total)
+    {
+        for (int i = 0; i < total * 2; ++i)
+        {
+            l.anchors[i] = s_anchors[i];
+        }
+        if (l.w == net_w / 32)
+        {
+            int j = 6;
+            for (int i = 0; i < l.box; ++i)
+                l.anchor_mask[i] = j++;
+        }
+        if (l.w == net_w / 16)
+        {
+            int j = 3;
+            for (int i = 0; i < l.box; ++i)
+                l.anchor_mask[i] = j++;
+        }
+        if (l.w == net_w / 8)
+        {
+            int j = 0;
+            for (int i = 0; i < l.box; ++i)
+                l.anchor_mask[i] = j++;
+        }
+    }
+    l.outputs = l.inputs;
+    l.output = ( float* )calloc(l.outputs, sizeof(float));
+
+    return l;
+}
+
+int entry_index(layer l, int box, int channel, int loc)
+{
+    return box * l.w * l.h * (4 + l.classes + 1) + channel * l.w * l.h + loc;
+}
+
+inline void logistic_cpu(float* input, int size)
+{
+    for (int i = 0; i < size; ++i)
+    {
+        input[i] = 1.f / (1.f + expf(-input[i]));
+    }
+}
+
+inline float logistic_cpu(float input)
+{
+    return 1.f / (1.f + expf(-input));
+}
+
+void decodebox(layer l, box& b, int box_index, int row, int col, int input_w, int input_h)
+{
+    b.x = (col + logistic_cpu(b.x)) / l.w;
+    b.y = (row + logistic_cpu(b.y)) / l.h;
+    b.w = exp(b.w) * l.anchors[2 * l.anchor_mask[box_index]] / input_w;
+    b.h = exp(b.h) * l.anchors[2 * l.anchor_mask[box_index] + 1] / input_h;
+}
+
+void correct_yolo_boxes(std::vector<detection*>& dets, int n, int w, int h, int netw, int neth)
+{
+    int i;
+    int new_w = 0;
+    int new_h = 0;
+    if ((( float )netw / w) < (( float )neth / h))
+    {
+        new_w = netw;
+        new_h = (h * netw) / w;
+    }
+    else
+    {
+        new_h = neth;
+        new_w = (w * neth) / h;
+    }
+    for (i = 0; i < n; ++i)
+    {
+        box b = dets[i]->bbox;
+        b.x = (b.x - (netw - new_w) / 2. / netw) / (( float )new_w / netw);
+        b.y = (b.y - (neth - new_h) / 2. / neth) / (( float )new_h / neth);
+        b.w *= ( float )netw / new_w;
+        b.h *= ( float )neth / new_h;
+
+        dets[i]->bbox = b;
+    }
+}
+
+std::vector<detection*> forward_darknet_layer_cpu(const float* input, layer l, int img_w, int img_h, int net_w,
+                                                  int net_h)
+{
+    std::vector<detection*> dets;
+    memcpy(( void* )l.output, ( void* )input, sizeof(float) * l.inputs);
+
+    for (int i = 0; i < l.box; i++)
+    {
+        int index = entry_index(l, i, 4, 0);
+        logistic_cpu(l.output + index, l.w * l.h);
+        for (size_t loc = 0; loc < l.w * l.h; loc++)
+        {
+            if (l.output[index + loc] > s_thresh)
+            {
+                /* row col */
+                int row = loc / l.w;
+                int col = loc % l.w;
+
+                detection* temp_detection = ( detection* )calloc(1, sizeof(detection));
+
+                /* objectness */
+                temp_detection->objectness = l.output[index + loc];
+
+                /* bbox */
+                temp_detection->bbox.x = l.output[entry_index(l, i, 0, loc)];
+                temp_detection->bbox.y = l.output[entry_index(l, i, 1, loc)];
+                temp_detection->bbox.w = l.output[entry_index(l, i, 2, loc)];
+                temp_detection->bbox.h = l.output[entry_index(l, i, 3, loc)];
+                decodebox(l, temp_detection->bbox, i, row, col, net_w, net_h);
+
+                /* classes_prob */
+                temp_detection->prob = ( float* )calloc(l.classes, sizeof(float));
+                for (int j = 5; j < l.classes + 5; j++)
+                {
+                    int grid_index = entry_index(l, i, j, loc);
+                    logistic_cpu(l.output + grid_index, 1);
+                    temp_detection->prob[j - 5] = l.output[grid_index] > s_thresh ? l.output[grid_index] : 0;
+                }
+
+                /* classes_num */
+                temp_detection->classes = l.classes;
+
+                dets.push_back(temp_detection);
+            }
+        }
+    }
+
+    if (dets.size() > 0)
+    {
+        correct_yolo_boxes(dets, dets.size(), img_w, img_h, net_w, net_h);
+    }
+
+    return dets;
+}
+
+int nms_comparator(const detection* pa, const detection* pb)
+{
+    float diff = 0;
+    if (pb->sort_class >= 0)
+    {
+        diff = pb->prob[pb->sort_class] - pb->prob[pb->sort_class];
+    }
+    else
+    {
+        diff = pb->objectness - pb->objectness;
+    }
+    if (diff < 0)
+        return -1;
+    else if (diff > 0)
+        return 1;
+    return 0;
+}
+
+float overlap(float x1, float w1, float x2, float w2)
+{
+    float l1 = x1 - w1 / 2;
+    float l2 = x2 - w2 / 2;
+    float left = l1 > l2 ? l1 : l2;
+    float r1 = x1 + w1 / 2;
+    float r2 = x2 + w2 / 2;
+    float right = r1 < r2 ? r1 : r2;
+    return right - left;
+}
+
+float box_intersection(box a, box b)
+{
+    float w = overlap(a.x, a.w, b.x, b.w);
+    float h = overlap(a.y, a.h, b.y, b.h);
+    if (w < 0 || h < 0)
+        return 0;
+    float area = w * h;
+    return area;
+}
+
+float box_union(box a, box b)
+{
+    float i = box_intersection(a, b);
+    float u = a.w * a.h + b.w * b.h - i;
+    return u;
+}
+
+float box_iou(box a, box b)
+{
+    return box_intersection(a, b) / box_union(a, b);
+}
+
+void do_nms_sort(std::vector<detection*>& dets, int total, int classes, float thresh)
+{
+    int i, j, k;
+    k = total - 1;
+    for (i = 0; i <= k; ++i)
+    {
+        if (dets[i]->objectness == 0)
+        {
+            detection* swap = dets[i];
+            dets[i] = dets[k];
+            dets[k] = swap;
+            --k;
+            --i;
+        }
+    }
+    total = k + 1;
+
+    for (k = 0; k < classes; ++k)
+    {
+        for (i = 0; i < total; ++i)
+        {
+            dets[i]->sort_class = k;
+        }
+        std::sort(dets.begin(), dets.end(), nms_comparator);
+        for (i = 0; i < total; ++i)
+        {
+            if (dets[i]->prob[k] == 0)
+                continue;
+            box a = dets[i]->bbox;
+            for (j = i + 1; j < total; ++j)
+            {
+                box b = dets[j]->bbox;
+                if (box_iou(a, b) > thresh)
+                {
+                    dets[j]->prob[k] = 0;
+                }
+            }
+        }
+    }
+}
+
+void get_input_data_darknet(const char* image_file, float* input_data, int net_h, int net_w)
+{
+    int size = 3 * net_w * net_h;
+    image sized;
+    image im = load_image_stb(image_file, 3);
+    for (int i = 0; i < im.c * im.h * im.w; i++)
+    {
+        im.data[i] = im.data[i] / 255;
+    }
+    sized = letterbox(im, net_w, net_h);
+    memcpy(input_data, sized.data, size * sizeof(float));
+
+    free_image(sized);
+    free_image(im);
+}
+
+void show_usage()
+{
+    fprintf(
+        stderr,
+        "[Usage]:  [-h]\n    [-m model_file] [-i image_file] [-r repeat_count] [-t thread_count] [-s size:608:512] \n");
+}
+
+int main(int argc, char* argv[])
+{
+    const char* model_file = nullptr;
+    const char* image_file = nullptr;
+    int net_h = 608;
+    int net_w = 608;
+    int repeat_count = 1;
+    int num_thread = 1;
+
+    int res;
+    while ((res = getopt(argc, argv, "m:i:r:t:h:s:")) != -1)
+    {
+        switch (res)
+        {
+            case 'm':
+                model_file = optarg;
+                break;
+            case 'i':
+                image_file = optarg;
+                break;
+            case 'r':
+                repeat_count = std::strtoul(optarg, nullptr, 10);
+                break;
+            case 't':
+                num_thread = std::strtoul(optarg, nullptr, 10);
+                break;
+            case 's':
+                net_w = std::strtoul(optarg, nullptr, 10);
+                net_h = net_w;
+                fprintf(stderr, "set net input size: %d %d\n", net_h, net_w);
+                break;
+            case 'h':
+                show_usage();
+                return 0;
+            default:
+                break;
+        }
+    }
+
+    /* check files */
+    if (nullptr == model_file)
+    {
+        fprintf(stderr, "Error: Tengine model file not specified!\n");
+        show_usage();
+        return -1;
+    }
+
+    if (nullptr == image_file)
+    {
+        fprintf(stderr, "Error: Image file not specified!\n");
+        show_usage();
+        return -1;
+    }
+
+    if (!check_file_exist(model_file) || !check_file_exist(image_file))
+        return -1;
+
+    /* init */
+    init_tengine();
+    fprintf(stderr, "tengine-lite library version: %s\n", get_tengine_version());
+
+    /* create graph, load tengine model xxx.tmfile */
+    graph_t graph = create_graph(nullptr, "tengine", model_file);
+    if (graph == nullptr)
+    {
+        fprintf(stderr, "Create graph failed.\n");
+        fprintf(stderr, "errno: %d \n", get_tengine_errno());
+        return -1;
+    }
+
+    /* set the input shape to initial the graph, and prerun graph to infer shape */
+    int img_size = net_h * net_w * 3;
+    int dims[] = {1, 3, net_h, net_w};    // nchw
+
+    std::vector<float> input_data(img_size);
+
+    tensor_t input_tensor = get_graph_input_tensor(graph, 0, 0);
+    if (input_tensor == nullptr)
+    {
+        fprintf(stderr, "Get input tensor failed\n");
+        return -1;
+    }
+
+    if (set_tensor_shape(input_tensor, dims, 4) < 0)
+    {
+        fprintf(stderr, "Set input tensor shape failed\n");
+        return -1;
+    }
+
+    if (prerun_graph(graph) < 0)
+    {
+        fprintf(stderr, "Prerun graph failed\n");
+        return -1;
+    }
+
+    /* prepare process input data, set the data mem to input tensor */
+    get_input_data_darknet(image_file, input_data.data(), net_h, net_w);
+    if (set_tensor_buffer(input_tensor, input_data.data(), img_size * 4) < 0)
+    {
+        fprintf(stderr, "Set input tensor buffer failed\n");
+        return -1;
+    }
+
+    /* run graph */
+    double min_time = __DBL_MAX__;
+    double max_time = -__DBL_MAX__;
+    double total_time = 0.;
+    for (int i = 0; i < 1; i++)
+    {
+        double start = get_current_time();
+        if (run_graph(graph, 1) < 0)
+        {
+            fprintf(stderr, "Run graph failed\n");
+            return -1;
+        }
+        double end = get_current_time();
+        double cur = end - start;
+        total_time += cur;
+        min_time = std::min(min_time, cur);
+        max_time = std::max(max_time, cur);
+    }
+    fprintf(stderr, "Repeat %d times, thread %d, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", 1, 1,
+            total_time, max_time, min_time);
+    fprintf(stderr, "--------------------------------------\n");
+
+    image img = imread(image_file);
+    int output_node_num = get_graph_output_node_number(graph);
+
+    /* save layer */
+    std::vector<layer> layers_params;
+    layers_params.clear();
+
+    /* save detection reslult */
+    std::vector<detection*> detections;
+    detections.clear();
+
+    /* decode layer one by one*/
+    for (int node = 0; node < output_node_num; ++node)
+    {
+        tensor_t out_tensor = get_graph_output_tensor(graph, node, 0);
+        int out_dim[4];
+        get_tensor_shape(out_tensor, out_dim, 4);
+        layer l_params;
+        int out_w = out_dim[3];
+        int out_h = out_dim[2];
+        l_params = make_darknet_layer(out_w, out_h, net_w, net_h, YOLOV4_NUM_BOXES, YOLOV4_TOTAL_ANCHOR, CLASSES_COCO);
+        layers_params.push_back(l_params);
+        float* out_data = ( float* )get_tensor_buffer(out_tensor);
+        std::vector<detection*> l_dets = forward_darknet_layer_cpu(out_data, l_params, img.w, img.h, net_w, net_h);
+        if (l_dets.size() == 0)
+            continue;
+        detections.insert(detections.end(), l_dets.begin(), l_dets.end());
+    }
+
+    if (detections.size() == 0)
+    {
+        fprintf(stderr, "no object detect");
+        return 0;
+    }
+
+    /* do nms */
+    do_nms_sort(detections, detections.size(), CLASSES_COCO, s_nms);
+
+    /* print output dectections */
+    int i, j;
+    for (i = 0; i < detections.size(); ++i)
+    {
+        int cls = -1;
+        for (j = 0; j < CLASSES_COCO; ++j)
+        {
+            if (detections[i]->prob[j] > 0.5)
+            {
+                if (cls < 0)
+                {
+                    cls = j;
+                }
+                fprintf(stderr, "%d: %.0f%%\n", cls, detections[i]->prob[j] * 100);
+            }
+        }
+        if (cls >= 0)
+        {
+            box b = detections[i]->bbox;
+            int left = (b.x - b.w / 2.) * img.w;
+            int right = (b.x + b.w / 2.) * img.w;
+            int top = (b.y - b.h / 2.) * img.h;
+            int bot = (b.y + b.h / 2.) * img.h;
+            draw_box(img, left, top, right, bot, 2, 125, 0, 125);
+            fprintf(stderr, "left = %d,right = %d,top = %d,bot = %d\n", left, right, top, bot);
+        }
+
+        if (detections[i]->prob)
+            free(detections[i]->prob);
+    }
+
+    save_image(img, "tengine_example_out");
+
+    /* free resource */
+    /* release tengine */
+    for (int i = 0; i < output_node_num; ++i)
+    {
+        tensor_t out_tensor = get_graph_output_tensor(graph, i, 0);
+        release_graph_tensor(out_tensor);
+    }
+
+    free_image(img);
+
+    for (int i = 0; i < layers_params.size(); i++)
+    {
+        layer l = layers_params[i];
+        if (l.output)
+            free(l.output);
+        if (l.anchors)
+            free(l.anchors);
+        if (l.anchor_mask)
+            free(l.anchor_mask);
+    }
+
+    release_graph_tensor(input_tensor);
+    postrun_graph(graph);
+    destroy_graph(graph);
+    release_tengine();
+
+    return 0;
+}