未验证 提交 40909634 编写于 作者: B BUG1989 提交者: GitHub

Update YOLOv4 Tiny example (#609)

* try to fix dynamic shape of pooling

* update yolov4 tiny example
上级 4c66ff82
......@@ -18,698 +18,293 @@
*/
/*
* Copyright (c) 2020, OPEN AI LAB
* Copyright (c) 2021, OPEN AI LAB
* Author: 942002795@qq.com
* Update: xwwang@openailab.com
*/
#include <iostream>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <string>
#include <math.h>
#include <vector>
#include <stdlib.h>
#include <string>
#include <algorithm>
#include "common.h"
#include "tengine_c_api.h"
#include "tengine_operations.h"
#include <math.h>
#include <cmath>
#include <stdlib.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
using namespace std;
#include "common.h"
#include "tengine_c_api.h"
#include "tengine_operations.h"
typedef struct
struct Object
{
float x, y, w, h;
} box;
cv::Rect_<float> rect;
int label;
float prob;
};
typedef struct
{
box bbox;
int classes;
float* prob;
float* mask;
float objectness;
int sort_class;
} detection;
typedef struct layer
static inline float sigmoid(float x)
{
int layer_type;
int batch;
int total;
int n, c, h, w;
int out_n, out_c, out_h, out_w;
int classes;
int inputs;
int outputs;
int* mask;
float* biases;
float* output;
int coords;
} layer;
// yolov3
float biases[18] = {10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326};
// tiny
float biases_tiny[12] = {10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319};
// yolov2
float biases_yolov2[10] = {0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828};
layer make_darknet_layer(int batch, int w, int h, int net_w, int net_h, int n, int total, int classes, int layer_type)
{
layer l = {0};
l.n = n;
l.total = total;
l.batch = batch;
l.h = h;
l.w = w;
l.c = n * (classes + 4 + 1);
l.out_w = l.w;
l.out_h = l.h;
l.out_c = l.c;
l.classes = classes;
l.inputs = l.w * l.h * l.c;
l.biases = ( float* )calloc(total * 2, sizeof(float));
if (layer_type == 0)
{
l.mask = ( int* )calloc(n, sizeof(int));
if (9 == total)
{
for (int i = 0; i < total * 2; ++i)
{
l.biases[i] = biases[i];
}
if (l.w == net_w / 32)
{
int j = 6;
for (int i = 0; i < l.n; ++i)
l.mask[i] = j++;
}
if (l.w == net_w / 16)
{
int j = 3;
for (int i = 0; i < l.n; ++i)
l.mask[i] = j++;
}
if (l.w == net_w / 8)
{
int j = 0;
for (int i = 0; i < l.n; ++i)
l.mask[i] = j++;
}
}
if (6 == total)
{
for (int i = 0; i < total * 2; ++i)
{
l.biases[i] = biases_tiny[i];
}
if (l.w == net_w / 32)
{
int j = 3;
for (int i = 0; i < l.n; ++i)
l.mask[i] = j++;
}
if (l.w == net_w / 16)
{
int j = 1;
for (int i = 0; i < l.n; ++i)
l.mask[i] = j++;
}
}
}
else if (1 == layer_type)
{
l.coords = 4;
for (int i = 0; i < total * 2; ++i)
{
l.biases[i] = biases_yolov2[i];
}
}
l.layer_type = layer_type;
l.outputs = l.inputs;
l.output = ( float* )calloc((size_t)batch * l.outputs, sizeof(float));
return l;
return static_cast<float>(1.f / (1.f + exp(-x)));
}
void free_darknet_layer(layer l)
static inline float intersection_area(const Object& a, const Object& b)
{
if (NULL != l.biases)
{
free(l.biases);
l.biases = NULL;
}
if (NULL != l.mask)
{
free(l.mask);
l.mask = NULL;
}
if (NULL != l.output)
{
free(l.output);
l.output = NULL;
}
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static int entry_index(layer l, int batch, int location, int entry)
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
{
int n = location / (l.w * l.h);
int loc = location % (l.w * l.h);
return batch * l.outputs + n * l.w * l.h * (4 + l.classes + 1) + entry * l.w * l.h + loc;
}
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
void logistic_cpu(float* input, int size)
{
for (int i = 0; i < size; ++i)
while (i <= j)
{
input[i] = 1.f / (1.f + expf(-input[i]));
}
}
while (faceobjects[i].prob > p)
i++;
void forward_darknet_layer_cpu(const float* input, layer l)
{
memcpy(( void* )l.output, ( void* )input, sizeof(float) * l.inputs * l.batch);
if (0 == l.layer_type)
{
for (int b = 0; b < l.batch; ++b)
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
for (int n = 0; n < l.n; ++n)
{
int index = entry_index(l, b, n * l.w * l.h, 0);
logistic_cpu(l.output + index, 2 * l.w * l.h);
index = entry_index(l, b, n * l.w * l.h, 4);
logistic_cpu(l.output + index, (1 + l.classes) * l.w * l.h);
}
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
}
int yolo_num_detections(layer l, float thresh)
{
int i, n, b;
int count = 0;
for (b = 0; b < l.batch; ++b)
#pragma omp parallel sections
{
for (i = 0; i < l.w * l.h; ++i)
#pragma omp section
{
for (n = 0; n < l.n; ++n)
{
int obj_index = entry_index(l, b, n * l.w * l.h + i, 4);
if (l.output[obj_index] > thresh)
{
// printf(".....%d -- %f\n",obj_index, l.output[obj_index]);
++count;
}
}
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
return count;
}
int num_detections(vector<layer> layers_params, float thresh)
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
{
int i;
int s = 0;
for (i = 0; i < ( int )layers_params.size(); ++i)
{
layer l = layers_params[i];
if (0 == l.layer_type)
s += yolo_num_detections(l, thresh);
else if (1 == l.layer_type)
s += l.w * l.h * l.n;
}
if (faceobjects.empty())
return;
return s;
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}
detection* make_network_boxes(vector<layer> layers_params, float thresh, int* num)
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
layer l = layers_params[0];
int i;
int nboxes = num_detections(layers_params, thresh);
if (num)
*num = nboxes;
detection* dets = ( detection* )calloc(nboxes, sizeof(detection));
for (i = 0; i < nboxes; ++i)
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
dets[i].prob = ( float* )calloc(l.classes, sizeof(float));
areas[i] = faceobjects[i].rect.area();
}
return dets;
}
void correct_yolo_boxes(detection* dets, int n, int w, int h, int netw, int neth, int relative)
{
int i;
int new_w = 0;
int new_h = 0;
new_w = netw;
new_h = neth;
for (i = 0; i < n; ++i)
for (int i = 0; i < n; i++)
{
box b = dets[i].bbox;
b.x = (b.x - (netw - new_w) / 2. / netw) / (( float )new_w / netw);
b.y = (b.y - (neth - new_h) / 2. / neth) / (( float )new_h / neth);
b.w *= ( float )netw / new_w;
b.h *= ( float )neth / new_h;
if (!relative)
const Object& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
b.x *= w;
b.w *= w;
b.y *= h;
b.h *= h;
const Object& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
dets[i].bbox = b;
if (keep)
picked.push_back(i);
}
}
box get_yolo_box(float* x, float* biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride)
void get_input_data_yolov4(const char* image_file, float* input_data, int img_h, int img_w, const float* mean, const float* scale)
{
box b;
b.x = (i + x[index + 0 * stride]) / lw;
b.y = (j + x[index + 1 * stride]) / lh;
b.w = exp(x[index + 2 * stride]) * biases[2 * n] / w;
b.h = exp(x[index + 3 * stride]) * biases[2 * n + 1] / h;
cv::Mat sample = cv::imread(image_file, 1);
cv::Mat img;
return b;
}
if (sample.channels() == 1)
cv::cvtColor(sample, img, cv::COLOR_GRAY2RGB);
else
cv::cvtColor(sample, img, cv::COLOR_BGR2RGB);
int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int* map, int relative,
detection* dets)
{
int i, j, n, b;
float* predictions = l.output;
int count = 0;
for (b = 0; b < l.batch; ++b)
{
for (i = 0; i < l.w * l.h; ++i)
/* resize process */
cv::resize(img, img, cv::Size(img_w, img_h));
img.convertTo(img, CV_32FC3);
float* img_data = (float* )img.data;
/* nhwc to nchw */
for (int h = 0; h < img_h; h++)
{ for (int w = 0; w < img_w; w++)
{
int row = i / l.w;
int col = i % l.w;
for (n = 0; n < l.n; ++n)
for (int c = 0; c < 3; c++)
{
int obj_index = entry_index(l, b, n * l.w * l.h + i, 4);
float objectness = predictions[obj_index];
if (objectness <= thresh)
continue;
int box_index = entry_index(l, b, n * l.w * l.h + i, 0);
dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw,
neth, l.w * l.h);
dets[count].objectness = objectness;
dets[count].classes = l.classes;
for (j = 0; j < l.classes; ++j)
{
int class_index = entry_index(l, b, n * l.w * l.h + i, 4 + 1 + j);
float prob = objectness * predictions[class_index];
dets[count].prob[j] = (prob > thresh) ? prob : 0;
}
++count;
int in_index = h * img_w * 3 + w * 3 + c;
int out_index = c * img_h * img_w + h * img_w + w;
input_data[out_index] = (img_data[in_index] - mean[c]) * scale[c];
}
}
}
correct_yolo_boxes(dets, count, w, h, netw, neth, relative);
return count;
}
void correct_region_boxes(detection* dets, int n, int w, int h, int netw, int neth, int relative)
static void generate_proposals(int stride, const float* feat, float prob_threshold, std::vector<Object>& objects)
{
int i;
int new_w = 0;
int new_h = 0;
if ((( float )netw / w) < (( float )neth / h))
{
new_w = netw;
new_h = (h * netw) / w;
}
else
{
new_h = neth;
new_w = (w * neth) / h;
}
for (i = 0; i < n; ++i)
{
box b = dets[i].bbox;
b.x = (b.x - (netw - new_w) / 2. / netw) / (( float )new_w / netw);
b.y = (b.y - (neth - new_h) / 2. / neth) / (( float )new_h / neth);
b.w *= ( float )netw / new_w;
b.h *= ( float )neth / new_h;
if (!relative)
{
b.x *= w;
b.w *= w;
b.y *= h;
b.h *= h;
}
dets[i].bbox = b;
}
}
box get_region_box(float* x, float* biases, int n, int index, int i, int j, int w, int h, int stride)
{
box b;
b.x = (i + x[index + 0 * stride]) / w;
b.y = (j + x[index + 1 * stride]) / h;
b.w = exp(x[index + 2 * stride]) * biases[2 * n] / w;
b.h = exp(x[index + 3 * stride]) * biases[2 * n + 1] / h;
return b;
}
static float anchors[12] = {10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319};
int anchor_num = 3;
int feat_w = 416 / stride;
int feat_h = 416 / stride;
int cls_num = 80;
int anchor_group = 0;
if(stride == 16)
anchor_group = 1;
if(stride == 32)
anchor_group = 2;
void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int* map, float tree_thresh,
int relative, detection* dets)
{
int i, j, n;
float* predictions = l.output;
for (i = 0; i < l.w * l.h; ++i)
for (int h = 0; h <= feat_h - 1; h++)
{
int row = i / l.w;
int col = i % l.w;
for (n = 0; n < l.n; ++n)
for (int w = 0; w <= feat_w - 1; w++)
{
int index = n * l.w * l.h + i;
for (j = 0; j < l.classes; ++j)
{
dets[index].prob[j] = 0;
}
int obj_index = entry_index(l, 0, n * l.w * l.h + i, l.coords);
int box_index = entry_index(l, 0, n * l.w * l.h + i, 0);
int mask_index = entry_index(l, 0, n * l.w * l.h + i, 4);
float scale = predictions[obj_index];
dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w * l.h);
dets[index].objectness = scale > thresh ? scale : 0;
if (dets[index].mask)
for (int anchor = 0; anchor <= anchor_num - 1; anchor++)
{
for (j = 0; j < l.coords - 4; ++j)
int class_index = 0;
float class_score = -FLT_MAX;
int channel_size = feat_h * feat_w;
for (int s = 0; s <= cls_num - 1; s++)
{
dets[index].mask[j] = l.output[mask_index + j * l.w * l.h];
int score_index = anchor * 85 * channel_size + feat_w * h + w + (s + 5) * channel_size;
float score = feat[score_index];
if(score > class_score)
{
class_index = s;
class_score = score;
}
}
}
// int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1);
if (dets[index].objectness)
{
for (j = 0; j < l.classes; ++j)
float box_score = feat[anchor * 85 * channel_size + feat_w * h + w + 4 * channel_size];
float final_score = sigmoid(box_score) * sigmoid(class_score);
if(final_score >= prob_threshold)
{
int class_index = entry_index(l, 0, n * l.w * l.h + i, l.coords + 1 + j);
float prob = scale * predictions[class_index];
dets[index].prob[j] = (prob > thresh) ? prob : 0;
int dx_index = anchor * 85 * channel_size + feat_w * h + w + 0 * channel_size;
int dy_index = anchor * 85 * channel_size + feat_w * h + w + 1 * channel_size;
int dw_index = anchor * 85 * channel_size + feat_w * h + w + 2 * channel_size;
int dh_index = anchor * 85 * channel_size + feat_w * h + w + 3 * channel_size;
float dx = sigmoid(feat[dx_index]);
float dy = sigmoid(feat[dy_index]);
float dw = feat[dw_index];
float dh = feat[dh_index];
float anchor_w = anchors[(anchor_group - 1) * 6 + anchor * 2 + 0];
float anchor_h = anchors[(anchor_group - 1) * 6 + anchor * 2 + 1];
float pred_x = (w + dx) * stride;
float pred_y = (h + dy) * stride;
float pred_w = exp(dw) * anchor_w ;
float pred_h = exp(dh) * anchor_h ;
float x0 = (pred_x - pred_w * 0.5f);
float y0 = (pred_y - pred_h * 0.5f);
float x1 = (pred_x + pred_w * 0.5f);
float y1 = (pred_y + pred_h * 0.5f);
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = class_index;
obj.prob = final_score;
objects.push_back(obj);
}
}
}
}
correct_region_boxes(dets, l.w * l.h * l.n, w, h, netw, neth, relative);
}
void fill_network_boxes(vector<layer> layers_params, int img_w, int img_h, int net_w, int net_h, float thresh,
float hier, int* map, int relative, detection* dets)
{
int j;
for (j = 0; j < ( int )layers_params.size(); ++j)
{
layer l = layers_params[j];
if (0 == l.layer_type)
{
int count = get_yolo_detections(l, img_w, img_h, net_w, net_h, thresh, map, relative, dets);
dets += count;
}
else
{
get_region_detections(l, img_w, img_h, net_w, net_h, thresh, map, hier, relative, dets);
dets += l.w * l.h * l.n;
}
}
}
detection* get_network_boxes(vector<layer> layers_params, int img_w, int img_h, int net_w, int net_h, float thresh,
float hier, int* map, int relative, int* num)
{
// make network boxes
detection* dets = make_network_boxes(layers_params, thresh, num);
// fill network boxes
fill_network_boxes(layers_params, img_w, img_h, net_w, net_h, thresh, hier, map, relative, dets);
return dets;
}
// release detection memory
void free_detections(detection* dets, int nboxes)
{
int i;
for (i = 0; i < nboxes; ++i)
{
free(dets[i].prob);
}
free(dets);
}
int nms_comparator(const void* pa, const void* pb)
{
detection a = *( detection* )pa;
detection b = *( detection* )pb;
float diff = 0;
if (b.sort_class >= 0)
{
diff = a.prob[b.sort_class] - b.prob[b.sort_class];
}
else
{
diff = a.objectness - b.objectness;
}
if (diff < 0)
return 1;
else if (diff > 0)
return -1;
return 0;
}
float overlap(float x1, float w1, float x2, float w2)
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
float l1 = x1 - w1 / 2;
float l2 = x2 - w2 / 2;
float left = l1 > l2 ? l1 : l2;
float r1 = x1 + w1 / 2;
float r2 = x2 + w2 / 2;
float right = r1 < r2 ? r1 : r2;
return right - left;
}
static const char* class_names[] = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"
};
float box_intersection(box a, box b)
{
float w = overlap(a.x, a.w, b.x, b.w);
float h = overlap(a.y, a.h, b.y, b.h);
if (w < 0 || h < 0)
return 0;
float area = w * h;
return area;
}
cv::Mat image = bgr.clone();
float box_union(box a, box b)
{
float i = box_intersection(a, b);
float u = a.w * a.h + b.w * b.h - i;
return u;
}
float box_iou(box a, box b)
{
return box_intersection(a, b) / box_union(a, b);
}
void do_nms_sort(detection* dets, int total, int classes, float thresh)
{
int i, j, k;
k = total - 1;
for (i = 0; i <= k; ++i)
for (size_t i = 0; i < objects.size(); i++)
{
if (dets[i].objectness == 0)
{
detection swap = dets[i];
dets[i] = dets[k];
dets[k] = swap;
--k;
--i;
}
}
total = k + 1;
for (k = 0; k < classes; ++k)
{
for (i = 0; i < total; ++i)
{
dets[i].sort_class = k;
}
qsort(dets, total, sizeof(detection), nms_comparator);
for (i = 0; i < total; ++i)
{
if (dets[i].prob[k] == 0)
continue;
box a = dets[i].bbox;
for (j = i + 1; j < total; ++j)
{
box b = dets[j].bbox;
if (box_iou(a, b) > thresh)
{
dets[j].prob[k] = 0;
}
}
}
}
}
const Object& obj = objects[i];
image letterbox_image(image im, int w, int h);
fprintf(stderr, "%2d: %3.0f%%, [%4.0f, %4.0f, %4.0f, %4.0f], %s\n", obj.label, obj.prob * 100, obj.rect.x,
obj.rect.y, obj.rect.x + obj.rect.width, obj.rect.y + obj.rect.height, class_names[obj.label]);
void rgbgr_image(image im)
{
int i;
for (i = 0; i < im.w * im.h; ++i)
{
float swap = im.data[i];
im.data[i] = im.data[i + im.w * im.h * 2];
im.data[i + im.w * im.h * 2] = swap;
}
}
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
void fill_image(image m, float s)
{
int i;
for (i = 0; i < m.h * m.w * m.c; ++i)
m.data[i] = s;
}
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
image letterbox_image(image im, int w, int h)
{
int new_w = im.w;
int new_h = im.h;
if ((( float )w / im.w) < (( float )h / im.h))
{
new_w = w;
new_h = (im.h * w) / im.w;
}
else
{
new_h = h;
new_w = (im.w * h) / im.h;
}
image resized = resize_image(im, new_w, new_h);
image boxed = make_image(w, h, im.c);
fill_image(boxed, .5);
add_image(resized, boxed, (w - new_w) / 2, (h - new_h) / 2);
free_image(resized);
return boxed;
}
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
void get_input_data_darknet(const char* image_file, float* input_data, int img_h, int img_w, const float* mean, const float* scale)
{
cv::Mat sample = cv::imread(image_file, 1);
cv::Mat img;
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
if (sample.channels() == 1)
cv::cvtColor(sample, img, cv::COLOR_GRAY2RGB);
else
cv::cvtColor(sample, img, cv::COLOR_BGR2RGB);
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
/* letterbox process */
int letterbox = 416;
float lb = (float)letterbox;
int h0 = 0;
int w0 = 0;
if ( img.rows > img.cols)
{
h0 = lb;
w0 = int(img.cols*(lb/img.rows));
cv::putText(image, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.5,
cv::Scalar(0, 0, 0));
}
else
{
h0 = int(img.rows*(lb/img.cols));
w0 = lb;
}
cv::resize(img, img, cv::Size(w0, h0));
img.convertTo(img, CV_32FC3);
cv::Mat img_new(lb, lb, CV_32FC3, cv::Scalar(0.5/scale[0]+mean[0], 0.5/scale[1]+mean[1], 0.5/scale[2]+mean[2]));
int dh = int((lb - h0) / 2);
int dw = int((lb - w0) / 2);
for (int hi = 0; hi < h0; ++hi)
{
for (int wi = 0; wi < w0; ++wi)
{
for (int ci = 0; ci < 3; ++ci)
{
int ii = hi*w0*3 + wi*3 + ci;
int oo = (dh + hi)*w0*3 + (dw + wi)*3 + ci;
((float*)img_new.data)[oo] = ((float*)img.data)[ii];
}
}
}
img_new.convertTo(img_new, CV_32FC3);
float* img_data = ( float* )img_new.data;
/* nhwc to nchw */
int hw = img_h * img_w;
for (int h = 0; h < img_h; h++)
{
for (int w = 0; w < img_w; w++)
{
for (int c = 0; c < 3; c++)
{
input_data[c * hw + h * img_w + w] = (*img_data - mean[c]) * scale[c];
img_data++;
}
}
}
cv::imwrite("yolov4_tiny_out.jpg", image);
}
static const char* class_names[] = {"person", "bicycle", "car", "motorcycle", "airplane", "bus",
"train", "truck", "boat", "traffic light", "fire hydrant",
"stop sign", "parking meter", "bench", "bird", "cat", "dog",
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat",
"baseball glove", "skateboard", "surfboard", "tennis racket",
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
"banana", "apple", "sandwich", "orange", "broccoli", "carrot",
"hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop",
"mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
"toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair drier", "toothbrush"};
void show_usage()
{
fprintf(stderr, "[Usage]: [-h]\n [-m model_file] [-i image_file] [-r repeat_count] [-t thread_count]\n");
fprintf(
stderr,
"[Usage]: [-h]\n [-m model_file] [-i image_file] [-r repeat_count] [-t thread_count] \n");
}
int main(int argc, char* argv[])
{
const char* model_file = nullptr;
const char* image_file = nullptr;
int layer_type = 0;
int numBBoxes = 3;
int total_numAnchors = 6;
int img_w = 416;
int img_h = 416;
int repeat_count = 1;
int num_thread = 1;
const int classes = 80;
const float thresh = 0.25;
const float hier_thresh = 0.5;
const float nms = 0.45;
const int relative = 1;
int img_w = 416;
int img_c = 3;
const float mean[3] = {0, 0, 0};
const float scale[3] = {0.003921, 0.003921, 0.003921};
int repeat_count = 1;
int num_thread = 1;
int res;
while ((res = getopt(argc, argv, "m:i:r:t:h:")) != -1)
{
......@@ -753,6 +348,13 @@ int main(int argc, char* argv[])
if (!check_file_exist(model_file) || !check_file_exist(image_file))
return -1;
cv::Mat img = cv::imread(image_file, 1);
if (img.empty())
{
fprintf(stderr, "cv::imread %s failed\n", image_file);
return -1;
}
/* set runtime options */
struct options opt;
opt.num_thread = num_thread;
......@@ -777,11 +379,9 @@ int main(int argc, char* argv[])
return -1;
}
/* set the input shape to initial the graph, and prerun graph to infer shape */
int img_size = img_h * img_w * 3;
int dims[] = {1, 3, img_h, img_w}; // nchw
std::vector<float> input_data(img_size);
int img_size = img_h * img_w * img_c;
int dims[] = {1, 3, img_h, img_w};
float* input_data = ( float* )malloc(img_size * sizeof(float));
tensor_t input_tensor = get_graph_input_tensor(graph, 0, 0);
if (input_tensor == nullptr)
......@@ -796,11 +396,11 @@ int main(int argc, char* argv[])
return -1;
}
if (set_tensor_buffer(input_tensor, input_data.data(), img_size * 4) < 0)
if (set_tensor_buffer(input_tensor, input_data, img_size * 4) < 0)
{
fprintf(stderr, "Set input tensor buffer failed\n");
return -1;
}
}
/* prerun graph, set work options(num_thread, cluster, precision) */
if (prerun_graph_multithread(graph, opt) < 0)
......@@ -810,8 +410,7 @@ int main(int argc, char* argv[])
}
/* prepare process input data, set the data mem to input tensor */
// get_input_data_darknet(image_file, input_data.data(), net_h, net_w);
get_input_data_darknet(image_file, input_data.data(), img_h, img_w, mean, scale);
get_input_data_yolov4(image_file, input_data, img_h, img_w, mean, scale);
/* run graph */
double min_time = DBL_MAX;
......@@ -831,95 +430,75 @@ int main(int argc, char* argv[])
min_time = std::min(min_time, cur);
max_time = std::max(max_time, cur);
}
fprintf(stderr, "Repeat %d times, thread %d, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", repeat_count,
num_thread, total_time / repeat_count, max_time, min_time);
fprintf(stderr, "Repeat %d times, thread %d, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", repeat_count, num_thread,
total_time/repeat_count, max_time, min_time);
fprintf(stderr, "--------------------------------------\n");
/* process the detection result */
image img = imread(image_file);
int output_node_num = get_graph_output_node_number(graph);
vector<layer> layers_params;
layers_params.clear();
for (int i = 0; i < output_node_num; ++i)
{
tensor_t out_tensor = get_graph_output_tensor(graph, i, 0); //"detection_out"
int out_dim[4];
get_tensor_shape(out_tensor, out_dim, 4);
layer l_params;
int out_w = out_dim[3];
int out_h = out_dim[2];
l_params = make_darknet_layer(1, out_w, out_h, img_w, img_h, numBBoxes, total_numAnchors, classes, layer_type);
layers_params.push_back(l_params);
float* out_data = ( float* )get_tensor_buffer(out_tensor);
forward_darknet_layer_cpu(out_data, l_params);
}
int nboxes = 0;
// get network boxes
detection* dets =
get_network_boxes(layers_params, img.w, img.h, img_w, img_h, thresh, hier_thresh, 0, relative, &nboxes);
tensor_t p16_output = get_graph_output_tensor(graph, 1, 0);
tensor_t p32_output = get_graph_output_tensor(graph, 0, 0);
if (nms != 0)
{
do_nms_sort(dets, nboxes, classes, nms);
}
float* p16_data = ( float*)get_tensor_buffer(p16_output);
float* p32_data = ( float*)get_tensor_buffer(p32_output);
int i, j;
for (i = 0; i < nboxes; ++i)
{
int cls = -1;
float best_class_prob = thresh;
for (j = 0; j < classes; ++j)
{
if (dets[i].prob[j] > best_class_prob)
{
cls = j;
best_class_prob = dets[i].prob[j];
}
}
if (cls >= 0)
{
box b = dets[i].bbox;
int left = (b.x - b.w / 2.) * img.w;
int right = (b.x + b.w / 2.) * img.w;
int top = (b.y - b.h / 2.) * img.h;
int bot = (b.y + b.h / 2.) * img.h;
draw_box(img, left, top, right, bot, 2, 125, 0, 125);
fprintf(stderr, "%2d: %3.0f%%, [%4d,%4d,%4d,%4d], %s\n", cls, best_class_prob * 100, left, top, right, bot, class_names[cls]);
}
/* postprocess */
const float prob_threshold = 0.45f;
const float nms_threshold = 0.25f;
if (dets[i].mask)
free(dets[i].mask);
if (dets[i].prob)
free(dets[i].prob);
}
free(dets);
save_image(img, "yolov4_tiny_out");
std::vector<Object> proposals;
std::vector<Object> objects16;
std::vector<Object> objects32;
std::vector<Object> objects;
/* release tengine */
for (int i = 0; i < output_node_num; ++i)
{
tensor_t out_tensor = get_graph_output_tensor(graph, i, 0);
release_graph_tensor(out_tensor);
}
generate_proposals(32, p32_data, prob_threshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
generate_proposals(16, p16_data, prob_threshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
qsort_descent_inplace(proposals);
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
/* yolov4 tiny draw the result */
int raw_h = img.rows;
int raw_w = img.cols;
free_image(img);
float ratio_x = (float)raw_w / img_w;
float ratio_y = (float)raw_h / img_h;
for (int i = 0; i < layers_params.size(); i++)
int count = picked.size();
fprintf(stderr, "detection num: %d\n",count);
objects.resize(count);
for (int i = 0; i < count; i++)
{
layer l = layers_params[i];
if (l.output)
free(l.output);
if (l.biases)
free(l.biases);
if (l.mask)
free(l.mask);
objects[i] = proposals[picked[i]];
float x0 = (objects[i].rect.x);
float y0 = (objects[i].rect.y);
float x1 = (objects[i].rect.x + objects[i].rect.width);
float y1 = (objects[i].rect.y + objects[i].rect.height);
x0 = x0 * ratio_x;
y0 = y0 * ratio_y;
x1 = x1 * ratio_x;
y1 = y1 * ratio_y;
x0 = std::max(std::min(x0, (float)(raw_w - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(raw_h - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(raw_w - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(raw_h - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
}
release_graph_tensor(input_tensor);
draw_objects(img, objects);
/* release tengine */
postrun_graph(graph);
destroy_graph(graph);
release_tengine();
return 0;
}
......@@ -185,7 +185,6 @@ void get_input_data_focas(const char* image_file, float* input_data, int img_h,
float* input_temp = (float* )malloc(3 * 640 * 640 * sizeof(float));
/* nhwc to nchw */
int hw = img_h * img_w;
for (int h = 0; h < img_h; h++)
{
for (int w = 0; w < img_w; w++)
......@@ -357,7 +356,7 @@ int main(int argc, char* argv[])
int img_w = 640;
int img_c = 3;
const float mean[3] = {0, 0, 0};
const float scale[] = {0.003921, 0.003921, 0.003921};
const float scale[3] = {0.003921, 0.003921, 0.003921};
int repeat_count = 1;
int num_thread = 1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册