tm_yolov3_tiny.cpp 23.0 KB
Newer Older
B
BUG1989 已提交
1 2 3
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
B
BUG1989 已提交
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * License); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*
 * Copyright (c) 2020, OPEN AI LAB
 * Author: ruizhang@openailab.com
 */

#include <unistd.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <string>
#include <vector>
#include <sys/time.h>
#include <stdlib.h>
#include <algorithm>
#include "common.h"
#include "tengine_c_api.h"
#include "tengine_operations.h"
#include <math.h>

#define DEFAULT_REPEAT_COUNT 1
#define DEFAULT_THREAD_COUNT 1

using namespace std;

typedef struct
{
    float x, y, w, h;
} box;

typedef struct
{
    box bbox;
    int classes;
    float* prob;
    float* mask;
    float objectness;
    int sort_class;
} detection;

typedef struct layer
{
    int layer_type;
    int batch;
    int total;
    int n, c, h, w;
    int out_n, out_c, out_h, out_w;
    int classes;
    int inputs;
    int outputs;
    int* mask;
    float* biases;
    float* output;
    int coords;
} layer;

const int classes = 80;
77
const float thresh = 0.55;
B
BUG1989 已提交
78 79 80 81 82
const float hier_thresh = 0.5;
const float nms = 0.45;
const int relative = 1;

// yolov3
83
float biases[18] = {10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326};
B
BUG1989 已提交
84
// tiny
85
float biases_tiny[12] = {10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319};
B
BUG1989 已提交
86 87 88
// yolov2
float biases_yolov2[10] = {0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828};

89
layer make_darknet_layer(int batch, int w, int h, int net_w, int net_h, int n, int total, int classes, int layer_type)
B
BUG1989 已提交
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
{
    layer l = {0};
    l.n = n;
    l.total = total;
    l.batch = batch;
    l.h = h;
    l.w = w;
    l.c = n * (classes + 4 + 1);
    l.out_w = l.w;
    l.out_h = l.h;
    l.out_c = l.c;
    l.classes = classes;
    l.inputs = l.w * l.h * l.c;

    l.biases = ( float* )calloc(total * 2, sizeof(float));
105
    if (layer_type == 0)
B
BUG1989 已提交
106 107
    {
        l.mask = ( int* )calloc(n, sizeof(int));
108
        if (9 == total)
B
BUG1989 已提交
109
        {
110
            for (int i = 0; i < total * 2; ++i)
B
BUG1989 已提交
111 112 113
            {
                l.biases[i] = biases[i];
            }
114
            if (l.w == net_w / 32)
B
BUG1989 已提交
115 116
            {
                int j = 6;
117
                for (int i = 0; i < l.n; ++i)
B
BUG1989 已提交
118 119
                    l.mask[i] = j++;
            }
120
            if (l.w == net_w / 16)
B
BUG1989 已提交
121 122
            {
                int j = 3;
123
                for (int i = 0; i < l.n; ++i)
B
BUG1989 已提交
124 125
                    l.mask[i] = j++;
            }
126
            if (l.w == net_w / 8)
B
BUG1989 已提交
127 128
            {
                int j = 0;
129
                for (int i = 0; i < l.n; ++i)
B
BUG1989 已提交
130 131 132
                    l.mask[i] = j++;
            }
        }
133
        if (6 == total)
B
BUG1989 已提交
134
        {
135
            for (int i = 0; i < total * 2; ++i)
B
BUG1989 已提交
136 137 138
            {
                l.biases[i] = biases_tiny[i];
            }
139
            if (l.w == net_w / 32)
B
BUG1989 已提交
140 141
            {
                int j = 3;
142
                for (int i = 0; i < l.n; ++i)
B
BUG1989 已提交
143 144
                    l.mask[i] = j++;
            }
145
            if (l.w == net_w / 16)
B
BUG1989 已提交
146 147
            {
                int j = 0;
148
                for (int i = 0; i < l.n; ++i)
B
BUG1989 已提交
149 150 151 152
                    l.mask[i] = j++;
            }
        }
    }
153
    else if (1 == layer_type)
B
BUG1989 已提交
154 155
    {
        l.coords = 4;
156
        for (int i = 0; i < total * 2; ++i)
B
BUG1989 已提交
157 158 159 160 161 162
        {
            l.biases[i] = biases_yolov2[i];
        }
    }
    l.layer_type = layer_type;
    l.outputs = l.inputs;
163
    l.output = ( float* )calloc(batch * l.outputs, sizeof(float));
B
BUG1989 已提交
164 165 166 167 168 169

    return l;
}

void free_darknet_layer(layer l)
{
170
    if (NULL != l.biases)
B
BUG1989 已提交
171 172 173 174
    {
        free(l.biases);
        l.biases = NULL;
    }
175
    if (NULL != l.mask)
B
BUG1989 已提交
176 177 178 179
    {
        free(l.mask);
        l.mask = NULL;
    }
180
    if (NULL != l.output)
B
BUG1989 已提交
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
    {
        free(l.output);
        l.output = NULL;
    }
}

static int entry_index(layer l, int batch, int location, int entry)
{
    int n = location / (l.w * l.h);
    int loc = location % (l.w * l.h);
    return batch * l.outputs + n * l.w * l.h * (4 + l.classes + 1) + entry * l.w * l.h + loc;
}

void logistic_cpu(float* input, int size)
{
196
    for (int i = 0; i < size; ++i)
B
BUG1989 已提交
197 198 199 200 201 202 203 204
    {
        input[i] = 1.f / (1.f + expf(-input[i]));
    }
}

void forward_darknet_layer_cpu(const float* input, layer l)
{
    memcpy(( void* )l.output, ( void* )input, sizeof(float) * l.inputs * l.batch);
205
    if (0 == l.layer_type)
B
BUG1989 已提交
206
    {
207
        for (int b = 0; b < l.batch; ++b)
B
BUG1989 已提交
208
        {
209
            for (int n = 0; n < l.n; ++n)
B
BUG1989 已提交
210 211 212 213 214 215 216 217 218 219 220 221 222 223
            {
                int index = entry_index(l, b, n * l.w * l.h, 0);
                logistic_cpu(l.output + index, 2 * l.w * l.h);
                index = entry_index(l, b, n * l.w * l.h, 4);
                logistic_cpu(l.output + index, (1 + l.classes) * l.w * l.h);
            }
        }
    }
}

int yolo_num_detections(layer l, float thresh)
{
    int i, n, b;
    int count = 0;
224
    for (b = 0; b < l.batch; ++b)
B
BUG1989 已提交
225
    {
226
        for (i = 0; i < l.w * l.h; ++i)
B
BUG1989 已提交
227
        {
228
            for (n = 0; n < l.n; ++n)
B
BUG1989 已提交
229 230
            {
                int obj_index = entry_index(l, b, n * l.w * l.h + i, 4);
231
                if (l.output[obj_index] > thresh)
B
BUG1989 已提交
232 233 234 235 236 237 238 239 240 241 242
                    ++count;
            }
        }
    }
    return count;
}

int num_detections(vector<layer> layers_params, float thresh)
{
    int i;
    int s = 0;
243
    for (i = 0; i < ( int )layers_params.size(); ++i)
B
BUG1989 已提交
244 245
    {
        layer l = layers_params[i];
246
        if (0 == l.layer_type)
B
BUG1989 已提交
247
            s += yolo_num_detections(l, thresh);
248 249
        else if (1 == l.layer_type)
            s += l.w * l.h * l.n;
B
BUG1989 已提交
250 251
    }

252
    fprintf(stderr, "%s,%d\n", __func__, s);
B
BUG1989 已提交
253 254 255 256 257 258 259 260
    return s;
}

detection* make_network_boxes(vector<layer> layers_params, float thresh, int* num)
{
    layer l = layers_params[0];
    int i;
    int nboxes = num_detections(layers_params, thresh);
261
    if (num)
B
BUG1989 已提交
262 263 264
        *num = nboxes;
    detection* dets = ( detection* )calloc(nboxes, sizeof(detection));

265
    for (i = 0; i < nboxes; ++i)
B
BUG1989 已提交
266 267 268 269 270 271 272 273 274 275 276
    {
        dets[i].prob = ( float* )calloc(l.classes, sizeof(float));
    }
    return dets;
}

void correct_yolo_boxes(detection* dets, int n, int w, int h, int netw, int neth, int relative)
{
    int i;
    int new_w = 0;
    int new_h = 0;
277
    if ((( float )netw / w) < (( float )neth / h))
B
BUG1989 已提交
278 279 280 281 282 283 284 285 286
    {
        new_w = netw;
        new_h = (h * netw) / w;
    }
    else
    {
        new_h = neth;
        new_w = (w * neth) / h;
    }
287
    for (i = 0; i < n; ++i)
B
BUG1989 已提交
288 289 290 291 292 293
    {
        box b = dets[i].bbox;
        b.x = (b.x - (netw - new_w) / 2. / netw) / (( float )new_w / netw);
        b.y = (b.y - (neth - new_h) / 2. / neth) / (( float )new_h / neth);
        b.w *= ( float )netw / new_w;
        b.h *= ( float )neth / new_h;
294
        if (!relative)
B
BUG1989 已提交
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
        {
            b.x *= w;
            b.w *= w;
            b.y *= h;
            b.h *= h;
        }
        dets[i].bbox = b;
    }
}

box get_yolo_box(float* x, float* biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride)
{
    box b;
    b.x = (i + x[index + 0 * stride]) / lw;
    b.y = (j + x[index + 1 * stride]) / lh;
    b.w = exp(x[index + 2 * stride]) * biases[2 * n] / w;
    b.h = exp(x[index + 3 * stride]) * biases[2 * n + 1] / h;

    return b;
}

316 317
int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int* map, int relative,
                        detection* dets)
B
BUG1989 已提交
318 319 320 321
{
    int i, j, n, b;
    float* predictions = l.output;
    int count = 0;
322
    for (b = 0; b < l.batch; ++b)
B
BUG1989 已提交
323
    {
324
        for (i = 0; i < l.w * l.h; ++i)
B
BUG1989 已提交
325 326 327
        {
            int row = i / l.w;
            int col = i % l.w;
328
            for (n = 0; n < l.n; ++n)
B
BUG1989 已提交
329 330 331
            {
                int obj_index = entry_index(l, b, n * l.w * l.h + i, 4);
                float objectness = predictions[obj_index];
332
                if (objectness <= thresh)
B
BUG1989 已提交
333 334 335 336 337 338 339
                    continue;
                int box_index = entry_index(l, b, n * l.w * l.h + i, 0);

                dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw,
                                                neth, l.w * l.h);
                dets[count].objectness = objectness;
                dets[count].classes = l.classes;
340
                for (j = 0; j < l.classes; ++j)
B
BUG1989 已提交
341 342 343 344 345 346 347 348 349 350 351 352 353
                {
                    int class_index = entry_index(l, b, n * l.w * l.h + i, 4 + 1 + j);
                    float prob = objectness * predictions[class_index];
                    dets[count].prob[j] = (prob > thresh) ? prob : 0;
                }
                ++count;
            }
        }
    }
    correct_yolo_boxes(dets, count, w, h, netw, neth, relative);
    return count;
}

354
void correct_region_boxes(detection* dets, int n, int w, int h, int netw, int neth, int relative)
B
BUG1989 已提交
355 356
{
    int i;
357 358 359 360
    int new_w = 0;
    int new_h = 0;
    if ((( float )netw / w) < (( float )neth / h))
    {
B
BUG1989 已提交
361
        new_w = netw;
362 363 364 365
        new_h = (h * netw) / w;
    }
    else
    {
B
BUG1989 已提交
366
        new_h = neth;
367
        new_w = (w * neth) / h;
B
BUG1989 已提交
368
    }
369 370
    for (i = 0; i < n; ++i)
    {
B
BUG1989 已提交
371
        box b = dets[i].bbox;
372 373 374 375 376 377
        b.x = (b.x - (netw - new_w) / 2. / netw) / (( float )new_w / netw);
        b.y = (b.y - (neth - new_h) / 2. / neth) / (( float )new_h / neth);
        b.w *= ( float )netw / new_w;
        b.h *= ( float )neth / new_h;
        if (!relative)
        {
B
BUG1989 已提交
378 379 380 381 382 383 384 385 386
            b.x *= w;
            b.w *= w;
            b.y *= h;
            b.h *= h;
        }
        dets[i].bbox = b;
    }
}

387
box get_region_box(float* x, float* biases, int n, int index, int i, int j, int w, int h, int stride)
B
BUG1989 已提交
388 389
{
    box b;
390 391 392 393
    b.x = (i + x[index + 0 * stride]) / w;
    b.y = (j + x[index + 1 * stride]) / h;
    b.w = exp(x[index + 2 * stride]) * biases[2 * n] / w;
    b.h = exp(x[index + 3 * stride]) * biases[2 * n + 1] / h;
B
BUG1989 已提交
394 395 396
    return b;
}

397 398
void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int* map, float tree_thresh,
                           int relative, detection* dets)
B
BUG1989 已提交
399
{
400 401
    int i, j, n;
    float* predictions = l.output;
B
BUG1989 已提交
402

403
    for (i = 0; i < l.w * l.h; ++i)
B
BUG1989 已提交
404 405 406
    {
        int row = i / l.w;
        int col = i % l.w;
407
        for (n = 0; n < l.n; ++n)
B
BUG1989 已提交
408
        {
409 410
            int index = n * l.w * l.h + i;
            for (j = 0; j < l.classes; ++j)
B
BUG1989 已提交
411 412 413
            {
                dets[index].prob[j] = 0;
            }
414 415 416
            int obj_index = entry_index(l, 0, n * l.w * l.h + i, l.coords);
            int box_index = entry_index(l, 0, n * l.w * l.h + i, 0);
            int mask_index = entry_index(l, 0, n * l.w * l.h + i, 4);
B
BUG1989 已提交
417
            float scale = predictions[obj_index];
418
            dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w * l.h);
B
BUG1989 已提交
419
            dets[index].objectness = scale > thresh ? scale : 0;
420
            if (dets[index].mask)
B
BUG1989 已提交
421
            {
422
                for (j = 0; j < l.coords - 4; ++j)
B
BUG1989 已提交
423
                {
424
                    dets[index].mask[j] = l.output[mask_index + j * l.w * l.h];
B
BUG1989 已提交
425 426
                }
            }
427 428
            // int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1);
            if (dets[index].objectness)
B
BUG1989 已提交
429
            {
430
                for (j = 0; j < l.classes; ++j)
B
BUG1989 已提交
431
                {
432 433
                    int class_index = entry_index(l, 0, n * l.w * l.h + i, l.coords + 1 + j);
                    float prob = scale * predictions[class_index];
B
BUG1989 已提交
434 435 436 437 438
                    dets[index].prob[j] = (prob > thresh) ? prob : 0;
                }
            }
        }
    }
439
    correct_region_boxes(dets, l.w * l.h * l.n, w, h, netw, neth, relative);
B
BUG1989 已提交
440 441 442 443 444 445
}

void fill_network_boxes(vector<layer> layers_params, int img_w, int img_h, int net_w, int net_h, float thresh,
                        float hier, int* map, int relative, detection* dets)
{
    int j;
446
    for (j = 0; j < ( int )layers_params.size(); ++j)
B
BUG1989 已提交
447 448
    {
        layer l = layers_params[j];
449
        if (0 == l.layer_type)
B
BUG1989 已提交
450 451 452 453 454 455
        {
            int count = get_yolo_detections(l, img_w, img_h, net_w, net_h, thresh, map, relative, dets);
            dets += count;
        }
        else
        {
456 457
            get_region_detections(l, img_w, img_h, net_w, net_h, thresh, map, hier, relative, dets);
            dets += l.w * l.h * l.n;
B
BUG1989 已提交
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
        }
    }
}

detection* get_network_boxes(vector<layer> layers_params, int img_w, int img_h, int net_w, int net_h, float thresh,
                             float hier, int* map, int relative, int* num)
{
    // make network boxes
    detection* dets = make_network_boxes(layers_params, thresh, num);

    // fill network boxes
    fill_network_boxes(layers_params, img_w, img_h, net_w, net_h, thresh, hier, map, relative, dets);
    return dets;
}

// release detection memory
void free_detections(detection* dets, int nboxes)
{
    int i;
477
    for (i = 0; i < nboxes; ++i)
B
BUG1989 已提交
478 479 480 481 482 483 484 485 486 487 488
    {
        free(dets[i].prob);
    }
    free(dets);
}

int nms_comparator(const void* pa, const void* pb)
{
    detection a = *( detection* )pa;
    detection b = *( detection* )pb;
    float diff = 0;
489
    if (b.sort_class >= 0)
B
BUG1989 已提交
490 491 492 493 494 495 496
    {
        diff = a.prob[b.sort_class] - b.prob[b.sort_class];
    }
    else
    {
        diff = a.objectness - b.objectness;
    }
497
    if (diff < 0)
B
BUG1989 已提交
498
        return 1;
499
    else if (diff > 0)
B
BUG1989 已提交
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
        return -1;
    return 0;
}

float overlap(float x1, float w1, float x2, float w2)
{
    float l1 = x1 - w1 / 2;
    float l2 = x2 - w2 / 2;
    float left = l1 > l2 ? l1 : l2;
    float r1 = x1 + w1 / 2;
    float r2 = x2 + w2 / 2;
    float right = r1 < r2 ? r1 : r2;
    return right - left;
}

float box_intersection(box a, box b)
{
    float w = overlap(a.x, a.w, b.x, b.w);
    float h = overlap(a.y, a.h, b.y, b.h);
519
    if (w < 0 || h < 0)
B
BUG1989 已提交
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
        return 0;
    float area = w * h;
    return area;
}

float box_union(box a, box b)
{
    float i = box_intersection(a, b);
    float u = a.w * a.h + b.w * b.h - i;
    return u;
}

float box_iou(box a, box b)
{
    return box_intersection(a, b) / box_union(a, b);
}

void do_nms_sort(detection* dets, int total, int classes, float thresh)
{
    int i, j, k;
    k = total - 1;
541
    for (i = 0; i <= k; ++i)
B
BUG1989 已提交
542
    {
543
        if (dets[i].objectness == 0)
B
BUG1989 已提交
544 545 546 547 548 549 550 551 552 553
        {
            detection swap = dets[i];
            dets[i] = dets[k];
            dets[k] = swap;
            --k;
            --i;
        }
    }
    total = k + 1;

554
    for (k = 0; k < classes; ++k)
B
BUG1989 已提交
555
    {
556
        for (i = 0; i < total; ++i)
B
BUG1989 已提交
557 558 559 560
        {
            dets[i].sort_class = k;
        }
        qsort(dets, total, sizeof(detection), nms_comparator);
561
        for (i = 0; i < total; ++i)
B
BUG1989 已提交
562
        {
563
            if (dets[i].prob[k] == 0)
B
BUG1989 已提交
564 565
                continue;
            box a = dets[i].bbox;
566
            for (j = i + 1; j < total; ++j)
B
BUG1989 已提交
567 568
            {
                box b = dets[j].bbox;
569
                if (box_iou(a, b) > thresh)
B
BUG1989 已提交
570 571 572 573 574 575 576 577 578 579 580 581 582
                {
                    dets[j].prob[k] = 0;
                }
            }
        }
    }
}

image letterbox_image(image im, int w, int h);

void rgbgr_image(image im)
{
    int i;
583
    for (i = 0; i < im.w * im.h; ++i)
B
BUG1989 已提交
584 585 586 587 588 589 590 591 592 593
    {
        float swap = im.data[i];
        im.data[i] = im.data[i + im.w * im.h * 2];
        im.data[i + im.w * im.h * 2] = swap;
    }
}

void fill_image(image m, float s)
{
    int i;
594
    for (i = 0; i < m.h * m.w * m.c; ++i)
B
BUG1989 已提交
595 596 597 598 599 600 601
        m.data[i] = s;
}

image letterbox_image(image im, int w, int h)
{
    int new_w = im.w;
    int new_h = im.h;
602
    if ((( float )w / im.w) < (( float )h / im.h))
B
BUG1989 已提交
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624
    {
        new_w = w;
        new_h = (im.h * w) / im.w;
    }
    else
    {
        new_h = h;
        new_w = (im.w * h) / im.h;
    }
    image resized = resize_image(im, new_w, new_h);
    image boxed = make_image(w, h, im.c);
    fill_image(boxed, .5);
    add_image(resized, boxed, (w - new_w) / 2, (h - new_h) / 2);
    free_image(resized);
    return boxed;
}

void get_input_data_darknet(const char* image_file, float* input_data, int net_h, int net_w)
{
    int size = 3 * net_w * net_h;
    image sized;
    image im = load_image_stb(image_file, 3);
625
    for (int i = 0; i < im.c * im.h * im.w; i++)
B
BUG1989 已提交
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
    {
        im.data[i] = im.data[i] / 255;
    }
    sized = letterbox(im, net_w, net_h);
    memcpy(input_data, sized.data, size * sizeof(float));

    free_image(sized);
    free_image(im);
}

void show_usage()
{
    fprintf(stderr, "[Usage]:  [-h]\n    [-m model_file] [-i image_file] [-r repeat_count] [-t thread_count]\n");
}

int main(int argc, char* argv[])
{
    int repeat_count = DEFAULT_REPEAT_COUNT;
644
    int num_thread = DEFAULT_THREAD_COUNT;
B
BUG1989 已提交
645 646 647 648
    char* model_file = nullptr;
    char* image_file = nullptr;

    int layer_type = 0;
649
    int numBBoxes = 3;
B
BUG1989 已提交
650 651 652 653 654
    int total_numAnchors = 6;
    int net_w = 416;
    int net_h = 416;

    int res;
655
    while ((res = getopt(argc, argv, "m:i:r:t:h:")) != -1)
B
BUG1989 已提交
656
    {
657
        switch (res)
B
BUG1989 已提交
658 659 660 661 662 663 664 665 666 667 668
        {
            case 'm':
                model_file = optarg;
                break;
            case 'i':
                image_file = optarg;
                break;
            case 'r':
                repeat_count = std::strtoul(optarg, nullptr, 10);
                break;
            case 't':
669
                num_thread = std::strtoul(optarg, nullptr, 10);
B
BUG1989 已提交
670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
                break;
            case 'h':
                show_usage();
                return 0;
            default:
                break;
        }
    }

    /* check files */
    if (nullptr == model_file)
    {
        fprintf(stderr, "Error: Tengine model file not specified!\n");
        show_usage();
        return -1;
    }

    if (nullptr == image_file)
    {
        fprintf(stderr, "Error: Image file not specified!\n");
        show_usage();
        return -1;
    }

    if (!check_file_exist(model_file) || !check_file_exist(image_file))
        return -1;
696

B
BUG1989 已提交
697 698 699 700 701
    /* set runtime options */
    struct options opt;
    opt.num_thread = num_thread;
    opt.cluster = TENGINE_CLUSTER_ALL;
    opt.precision = TENGINE_MODE_FP32;
B
BUG1989 已提交
702
    opt.affinity = 0;
B
BUG1989 已提交
703

B
BUG1989 已提交
704
    /* inital tengine */
B
BUG1989 已提交
705 706 707 708 709
    if (init_tengine() != 0)
    {
        fprintf(stderr, "Initial tengine failed.\n");
        return -1;
    }
B
BUG1989 已提交
710 711 712 713 714 715 716 717 718 719 720 721
    fprintf(stderr, "tengine-lite library version: %s\n", get_tengine_version());

    /* create graph, load tengine model xxx.tmfile */
    graph_t graph = create_graph(nullptr, "tengine", model_file);
    if (graph == nullptr)
    {
        fprintf(stderr, "Create graph failed.\n");
        fprintf(stderr, "errno: %d \n", get_tengine_errno());
        return -1;
    }

    /* set the input shape to initial the graph, and prerun graph to infer shape */
722 723
    int img_size = net_h * net_w * 3;
    int dims[] = {1, 3, net_h, net_w};    // nchw
B
BUG1989 已提交
724 725 726 727 728 729 730 731 732 733 734 735 736

    std::vector<float> input_data(img_size);

    tensor_t input_tensor = get_graph_input_tensor(graph, 0, 0);
    if (input_tensor == nullptr)
    {
        fprintf(stderr, "Get input tensor failed\n");
        return -1;
    }

    if (set_tensor_shape(input_tensor, dims, 4) < 0)
    {
        fprintf(stderr, "Set input tensor shape failed\n");
737
        return -1;
B
BUG1989 已提交
738 739
    }

B
BUG1989 已提交
740
    if (set_tensor_buffer(input_tensor, input_data.data(), img_size * 4) < 0)
B
BUG1989 已提交
741
    {
B
BUG1989 已提交
742
        fprintf(stderr, "Set input tensor buffer failed\n");
B
BUG1989 已提交
743
        return -1;
B
BUG1989 已提交
744
    }    
B
BUG1989 已提交
745

B
BUG1989 已提交
746 747
    /* prerun graph, set work options(num_thread, cluster, precision) */
    if (prerun_graph_multithread(graph, opt) < 0)
B
BUG1989 已提交
748
    {
B
BUG1989 已提交
749
        fprintf(stderr, "Prerun multithread graph failed.\n");
B
BUG1989 已提交
750 751 752
        return -1;
    }

B
BUG1989 已提交
753 754 755
    /* prepare process input data, set the data mem to input tensor */
    get_input_data_darknet(image_file, input_data.data(), net_h, net_w);

B
BUG1989 已提交
756
    /* run graph */
757 758
    double min_time = __DBL_MAX__;
    double max_time = -__DBL_MAX__;
B
BUG1989 已提交
759
    double total_time = 0.;
760
    for (int i = 0; i < repeat_count; i++)
B
BUG1989 已提交
761 762 763 764 765 766 767 768 769 770 771 772 773
    {
        double start = get_current_time();
        if (run_graph(graph, 1) < 0)
        {
            fprintf(stderr, "Run graph failed\n");
            return -1;
        }
        double end = get_current_time();
        double cur = end - start;
        total_time += cur;
        min_time = std::min(min_time, cur);
        max_time = std::max(max_time, cur);
    }
774 775
    fprintf(stderr, "Repeat %d times, thread %d, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", repeat_count,
            num_thread, total_time / repeat_count, max_time, min_time);
B
BUG1989 已提交
776 777 778 779 780 781 782 783
    fprintf(stderr, "--------------------------------------\n");

    /* process the detection result */
    image img = imread(image_file);
    int output_node_num = get_graph_output_node_number(graph);

    vector<layer> layers_params;
    layers_params.clear();
784
    for (int i = 0; i < output_node_num; ++i)
B
BUG1989 已提交
785 786 787 788 789 790 791 792 793 794 795 796 797 798
    {
        tensor_t out_tensor = get_graph_output_tensor(graph, i, 0);    //"detection_out"
        int out_dim[4];
        get_tensor_shape(out_tensor, out_dim, 4);
        layer l_params;
        int out_w = out_dim[3];
        int out_h = out_dim[2];
        l_params = make_darknet_layer(1, out_w, out_h, net_w, net_h, numBBoxes, total_numAnchors, classes, layer_type);
        layers_params.push_back(l_params);
        float* out_data = ( float* )get_tensor_buffer(out_tensor);
        forward_darknet_layer_cpu(out_data, l_params);
    }
    int nboxes = 0;
    // get network boxes
799 800 801
    detection* dets =
        get_network_boxes(layers_params, img.w, img.h, net_w, net_h, thresh, hier_thresh, 0, relative, &nboxes);

B
BUG1989 已提交
802 803 804 805
    if (nms != 0)
    {
        do_nms_sort(dets, nboxes, classes, nms);
    }
806

B
BUG1989 已提交
807
    int i, j;
808
    for (i = 0; i < nboxes; ++i)
B
BUG1989 已提交
809 810
    {
        int cls = -1;
811
        for (j = 0; j < classes; ++j)
B
BUG1989 已提交
812
        {
813
            if (dets[i].prob[j] > 0.5)
B
BUG1989 已提交
814
            {
815
                if (cls < 0)
B
BUG1989 已提交
816 817 818 819 820 821
                {
                    cls = j;
                }
                fprintf(stderr, "%d: %.0f%%\n", cls, dets[i].prob[j] * 100);
            }
        }
822
        if (cls >= 0)
B
BUG1989 已提交
823 824
        {
            box b = dets[i].bbox;
825
            int left = (b.x - b.w / 2.) * img.w;
B
BUG1989 已提交
826
            int right = (b.x + b.w / 2.) * img.w;
827 828
            int top = (b.y - b.h / 2.) * img.h;
            int bot = (b.y + b.h / 2.) * img.h;
B
BUG1989 已提交
829 830 831 832 833 834 835 836 837 838 839 840 841
            draw_box(img, left, top, right, bot, 2, 125, 0, 125);
            fprintf(stderr, "left = %d,right = %d,top = %d,bot = %d\n", left, right, top, bot);
        }

        if (dets[i].mask)
            free(dets[i].mask);
        if (dets[i].prob)
            free(dets[i].prob);
    }
    free(dets);
    save_image(img, "tengine_example_out");

    /* release tengine */
842
    for (int i = 0; i < output_node_num; ++i)
B
BUG1989 已提交
843 844 845 846 847 848 849
    {
        tensor_t out_tensor = get_graph_output_tensor(graph, i, 0);
        release_graph_tensor(out_tensor);
    }

    free_image(img);

850
    for (int i = 0; i < layers_params.size(); i++)
B
BUG1989 已提交
851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867
    {
        layer l = layers_params[i];
        if (l.output)
            free(l.output);
        if (l.biases)
            free(l.biases);
        if (l.mask)
            free(l.mask);
    }

    release_graph_tensor(input_tensor);
    postrun_graph(graph);
    destroy_graph(graph);
    release_tengine();

    return 0;
}