提交 eaa1cb37 编写于 作者: B BUG1989

add yolov4-tiny timvx example

上级 e6152e2a
......@@ -86,7 +86,7 @@ ENDIF()
# set(OpenCV_DIR /mnt/d/ubuntu/opencv_install/linux-armv7/lib/cmake/opencv4)
FIND_PACKAGE(OpenCV QUIET)
IF (OpenCV_FOUND AND ${TENGINE_TARGET_PROCESSOR} MATCHES "X86")
IF (OpenCV_FOUND)
# macro for adding examples
FUNCTION (TENGINE_EXAMPLE_CV name file)
ADD_EXECUTABLE (${name} "${CMAKE_CURRENT_SOURCE_DIR}/${file}" "${CMAKE_CURRENT_SOURCE_DIR}/common/tengine_operations.c")
......@@ -104,6 +104,7 @@ IF (OpenCV_FOUND AND ${TENGINE_TARGET_PROCESSOR} MATCHES "X86")
TENGINE_EXAMPLE_CV (tm_crnn tm_crnn.cpp)
TENGINE_EXAMPLE_CV (tm_alphapose tm_alphapose.cpp)
TENGINE_EXAMPLE_CV (tm_yolov4_tiny tm_yolov4_tiny.cpp)
TENGINE_EXAMPLE_CV (tm_yolov4_tiny_uint8 tm_yolov4_tiny_uint8.cpp)
TENGINE_EXAMPLE_CV (tm_yolov4_tiny_timvx tm_yolov4_tiny_timvx.cpp)
TENGINE_EXAMPLE_CV (tm_yolov5s tm_yolov5s.cpp)
TENGINE_EXAMPLE_CV (tm_yolov5s_timvx tm_yolov5s_timvx.cpp)
......
......@@ -266,7 +266,7 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 1, 2, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
......@@ -278,7 +278,7 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.5,
cv::putText(image, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 1,
cv::Scalar(0, 0, 0));
}
......
此差异已折叠。
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2021, OPEN AI LAB
* Author: 942002795@qq.com
* Update: xwwang@openailab.com
*/
#include <math.h>
#include <vector>
#include <string>
#include <algorithm>
#include <cmath>
#include <stdlib.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "common.h"
#include "tengine/c_api.h"
#include "tengine_operations.h"
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static inline float sigmoid(float x)
{
return static_cast<float>(1.f / (1.f + exp(-x)));
}
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
{
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
{
while (faceobjects[i].prob > p)
i++;
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
{
if (faceobjects.empty())
return;
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
void get_input_data_yolov4_uint8(const char* image_file, uint8_t * input_data, int img_h, int img_w, const float* mean, const float* scale,
float input_scale, int zero_point)
{
cv::Mat sample = cv::imread(image_file, 1);
cv::Mat img;
if (sample.channels() == 1)
cv::cvtColor(sample, img, cv::COLOR_GRAY2RGB);
else
cv::cvtColor(sample, img, cv::COLOR_BGR2RGB);
/* resize process */
cv::resize(img, img, cv::Size(img_w, img_h));
img.convertTo(img, CV_32FC3);
float* img_data = (float* )img.data;
/* nhwc to nchw */
for (int h = 0; h < img_h; h++)
{ for (int w = 0; w < img_w; w++)
{
for (int c = 0; c < 3; c++)
{
int in_index = h * img_w * 3 + w * 3 + c;
int out_index = c * img_h * img_w + h * img_w + w;
float input_fp32 = (img_data[in_index] - mean[c]) * scale[c];
/* quant to uint8 */
int udata = (round)(input_fp32 / input_scale + ( float )zero_point);
if (udata > 255)
udata = 255;
else if (udata < 0)
udata = 0;
input_data[out_index] = udata;
}
}
}
}
static void generate_proposals(int stride, const float* feat, float prob_threshold, std::vector<Object>& objects)
{
static float anchors[12] = {10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319};
int anchor_num = 3;
int feat_w = 416 / stride;
int feat_h = 416 / stride;
int cls_num = 80;
int anchor_group = 0;
if(stride == 16)
anchor_group = 1;
if(stride == 32)
anchor_group = 2;
for (int h = 0; h <= feat_h - 1; h++)
{
for (int w = 0; w <= feat_w - 1; w++)
{
for (int anchor = 0; anchor <= anchor_num - 1; anchor++)
{
int class_index = 0;
float class_score = -FLT_MAX;
int channel_size = feat_h * feat_w;
for (int s = 0; s <= cls_num - 1; s++)
{
int score_index = anchor * 85 * channel_size + feat_w * h + w + (s + 5) * channel_size;
float score = feat[score_index];
if(score > class_score)
{
class_index = s;
class_score = score;
}
}
float box_score = feat[anchor * 85 * channel_size + feat_w * h + w + 4 * channel_size];
float final_score = sigmoid(box_score) * sigmoid(class_score);
if(final_score >= prob_threshold)
{
int dx_index = anchor * 85 * channel_size + feat_w * h + w + 0 * channel_size;
int dy_index = anchor * 85 * channel_size + feat_w * h + w + 1 * channel_size;
int dw_index = anchor * 85 * channel_size + feat_w * h + w + 2 * channel_size;
int dh_index = anchor * 85 * channel_size + feat_w * h + w + 3 * channel_size;
float dx = sigmoid(feat[dx_index]);
float dy = sigmoid(feat[dy_index]);
float dw = feat[dw_index];
float dh = feat[dh_index];
float anchor_w = anchors[(anchor_group - 1) * 6 + anchor * 2 + 0];
float anchor_h = anchors[(anchor_group - 1) * 6 + anchor * 2 + 1];
float pred_x = (w + dx) * stride;
float pred_y = (h + dy) * stride;
float pred_w = exp(dw) * anchor_w ;
float pred_h = exp(dh) * anchor_h ;
float x0 = (pred_x - pred_w * 0.5f);
float y0 = (pred_y - pred_h * 0.5f);
float x1 = (pred_x + pred_w * 0.5f);
float y1 = (pred_y + pred_h * 0.5f);
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = class_index;
obj.prob = final_score;
objects.push_back(obj);
}
}
}
}
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%2d: %3.0f%%, [%4.0f, %4.0f, %4.0f, %4.0f], %s\n", obj.label, obj.prob * 100, obj.rect.x,
obj.rect.y, obj.rect.x + obj.rect.width, obj.rect.y + obj.rect.height, class_names[obj.label]);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 1, 2, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 1,
cv::Scalar(0, 0, 0));
}
cv::imwrite("yolov4_tiny_out.jpg", image);
}
void show_usage()
{
fprintf(
stderr,
"[Usage]: [-h]\n [-m model_file] [-i image_file] [-r repeat_count] [-t thread_count] \n");
}
int main(int argc, char* argv[])
{
const char* model_file = nullptr;
const char* image_file = nullptr;
int img_h = 416;
int img_w = 416;
int img_c = 3;
const float mean[3] = {0, 0, 0};
const float scale[3] = {0.003921, 0.003921, 0.003921};
int repeat_count = 1;
int num_thread = 1;
int res;
while ((res = getopt(argc, argv, "m:i:r:t:h:")) != -1)
{
switch (res)
{
case 'm':
model_file = optarg;
break;
case 'i':
image_file = optarg;
break;
case 'r':
repeat_count = std::strtoul(optarg, nullptr, 10);
break;
case 't':
num_thread = std::strtoul(optarg, nullptr, 10);
break;
case 'h':
show_usage();
return 0;
default:
break;
}
}
/* check files */
if (nullptr == model_file)
{
fprintf(stderr, "Error: Tengine model file not specified!\n");
show_usage();
return -1;
}
if (nullptr == image_file)
{
fprintf(stderr, "Error: Image file not specified!\n");
show_usage();
return -1;
}
if (!check_file_exist(model_file) || !check_file_exist(image_file))
return -1;
cv::Mat img = cv::imread(image_file, 1);
if (img.empty())
{
fprintf(stderr, "cv::imread %s failed\n", image_file);
return -1;
}
/* set runtime options */
struct options opt;
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_UINT8;
opt.affinity = 0;
/* inital tengine */
if (init_tengine() != 0)
{
fprintf(stderr, "Initial tengine failed.\n");
return -1;
}
fprintf(stderr, "tengine-lite library version: %s\n", get_tengine_version());
/* create graph, load tengine model xxx.tmfile */
graph_t graph = create_graph(nullptr, "tengine", model_file);
if (graph == nullptr)
{
fprintf(stderr, "Create graph failed.\n");
return -1;
}
int img_size = img_h * img_w * img_c;
int dims[] = {1, 3, img_h, img_w};
std::vector<uint8_t> input_data(img_size);
tensor_t input_tensor = get_graph_input_tensor(graph, 0, 0);
if (input_tensor == nullptr)
{
fprintf(stderr, "Get input tensor failed\n");
return -1;
}
if (set_tensor_shape(input_tensor, dims, 4) < 0)
{
fprintf(stderr, "Set input tensor shape failed\n");
return -1;
}
if (set_tensor_buffer(input_tensor, input_data.data(), img_size) < 0)
{
fprintf(stderr, "Set input tensor buffer failed\n");
return -1;
}
/* prerun graph, set work options(num_thread, cluster, precision) */
if (prerun_graph_multithread(graph, opt) < 0)
{
fprintf(stderr, "Prerun multithread graph failed.\n");
return -1;
}
/* prepare process input data, set the data mem to input tensor */
float input_scale = 0.f;
int input_zero_point = 0;
get_tensor_quant_param(input_tensor, &input_scale, &input_zero_point, 1);
get_input_data_yolov4_uint8(image_file, input_data.data(), img_h, img_w, mean, scale, input_scale, input_zero_point);
/* run graph */
double min_time = DBL_MAX;
double max_time = DBL_MIN;
double total_time = 0.;
for (int i = 0; i < repeat_count; i++)
{
double start = get_current_time();
if (run_graph(graph, 1) < 0)
{
fprintf(stderr, "Run graph failed\n");
return -1;
}
double end = get_current_time();
double cur = end - start;
total_time += cur;
min_time = std::min(min_time, cur);
max_time = std::max(max_time, cur);
}
fprintf(stderr, "Repeat %d times, thread %d, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", repeat_count, num_thread,
total_time/repeat_count, max_time, min_time);
fprintf(stderr, "--------------------------------------\n");
/* dequant output data */
tensor_t p16_output = get_graph_output_tensor(graph, 1, 0);
tensor_t p32_output = get_graph_output_tensor(graph, 0, 0);
float p16_scale = 0.f;
float p32_scale = 0.f;
int p16_zero_point = 0;
int p32_zero_point = 0;
get_tensor_quant_param(p16_output, &p16_scale, &p16_zero_point, 1);
get_tensor_quant_param(p32_output, &p32_scale, &p32_zero_point, 1);
int p16_count = get_tensor_buffer_size(p16_output) / sizeof(uint8_t);
int p32_count = get_tensor_buffer_size(p32_output) / sizeof(uint8_t);
uint8_t* p16_data_u8 = ( uint8_t* )get_tensor_buffer(p16_output);
uint8_t* p32_data_u8 = ( uint8_t* )get_tensor_buffer(p32_output);
std::vector<float> p16_data(p16_count);
std::vector<float> p32_data(p32_count);
for (int c = 0; c < p16_count; c++)
{
p16_data[c] = (( float )p16_data_u8[c] - ( float )p16_zero_point) * p16_scale;
}
for (int c = 0; c < p32_count; c++)
{
p32_data[c] = (( float )p32_data_u8[c] - ( float )p32_zero_point) * p32_scale;
}
/* postprocess */
const float prob_threshold = 0.45f;
const float nms_threshold = 0.25f;
std::vector<Object> proposals;
std::vector<Object> objects16;
std::vector<Object> objects32;
std::vector<Object> objects;
generate_proposals(32, p32_data.data(), prob_threshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
generate_proposals(16, p16_data.data(), prob_threshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
qsort_descent_inplace(proposals);
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
/* yolov4 tiny draw the result */
int raw_h = img.rows;
int raw_w = img.cols;
float ratio_x = (float)raw_w / img_w;
float ratio_y = (float)raw_h / img_h;
int count = picked.size();
fprintf(stderr, "detection num: %d\n",count);
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
float x0 = (objects[i].rect.x);
float y0 = (objects[i].rect.y);
float x1 = (objects[i].rect.x + objects[i].rect.width);
float y1 = (objects[i].rect.y + objects[i].rect.height);
x0 = x0 * ratio_x;
y0 = y0 * ratio_y;
x1 = x1 * ratio_x;
y1 = y1 * ratio_y;
x0 = std::max(std::min(x0, (float)(raw_w - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(raw_h - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(raw_w - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(raw_h - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
}
draw_objects(img, objects);
/* release tengine */
postrun_graph(graph);
destroy_graph(graph);
release_tengine();
}
......@@ -286,8 +286,7 @@ static int onnx_run(const int8_t* in_data, int8_t** out_data, int element_size,
for (int j = start_2; j < stop_2; ++j)
{
int len = stop_3 - start_3;
int input_off =
n * in_dim_1 * in_dim_2 * in_dim_3 + i * in_dim_2 * in_dim_3 + j * in_dim_3 + start_3;
int input_off = n * in_dim_1 * in_dim_2 * in_dim_3 + i * in_dim_2 * in_dim_3 + j * in_dim_3 + start_3;
memcpy(output, input + input_off * element_size, (size_t)len * element_size);
output += len * element_size;
}
......@@ -374,7 +373,8 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
{
struct node* ir_node = exec_node->ir_node;
struct graph* ir_graph = ir_node->graph;
struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
struct slice_param_ref op_param;
slice_param_t* _param = ( struct slice_param* )(ir_node->op.param_mem);
......@@ -408,12 +408,11 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
// set the output
for (int i = 0; i < op_param.out_num; ++i)
{
struct tensor* out_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[i]);
for (int j = 0; j < op_param.dim_num; ++j)
{
op_param.output_shape[i].dims[j] = out_tensor->dims[j];
op_param.output_shape[i].dims[j] = output_tensor->dims[j];
}
out_data_ptrs[i] = ( int8_t* )out_tensor->data;
out_data_ptrs[i] = ( int8_t* )output_tensor->data;
}
}
else if (op_param.ismxnet || op_param.isonnx)
......@@ -438,20 +437,10 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
op_param.in_shape_2[idx] = input_tensor->dims[idx];
}
}
struct tensor* out_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
// std::vector<int> output_dim = o_tensor->GetShape().GetDim();
out_data_ptrs[0] = ( int8_t* )out_tensor->data;
// Set the int8 output quant param
// if(data_type == TENGINE_DT_INT8)
// {
// auto* o_quant = o_tensor->GetQuantParam();
// QuantParam q_param;
// q_param.scale = op_param.out_scale;
// o_quant->resize(0);
// o_quant->push_back(q_param);
// }
if (input_tensor->dims[0] == out_tensor->dims[0] && input_tensor->dims[1] == out_tensor->dims[1] &&
input_tensor->dims[2] == out_tensor->dims[2] && input_tensor->dims[3] == out_tensor->dims[3])
out_data_ptrs[0] = ( int8_t* )output_tensor->data;
if (input_tensor->dims[0] == output_tensor->dims[0] && input_tensor->dims[1] == output_tensor->dims[1] &&
input_tensor->dims[2] == output_tensor->dims[2] && input_tensor->dims[3] == output_tensor->dims[3])
{
memcpy(( void* )(out_data_ptrs[0]), ( void* )input, mem_size*input_tensor->elem_num);
sys_free(out_data_ptrs);
......@@ -479,24 +468,47 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
dim_idx++;
}
}
struct tensor* out_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
out_data_ptrs[0] = ( int8_t* )out_tensor->data;
// Set the int8 output quant param
// if(data_type == TENGINE_DT_INT8)
// {
// auto* o_quant = o_tensor->GetQuantParam();
// QuantParam q_param;
// q_param.scale = op_param.out_scale;
// o_quant->resize(0);
// o_quant->push_back(q_param);
// }
out_data_ptrs[0] = ( int8_t* )output_tensor->data;
}
int ret = -1;
if (input_tensor->data_type == TENGINE_DT_FP32)
ret = ref_slice_common(input, out_data_ptrs, sizeof(float), &op_param);
else if (input_tensor->data_type == TENGINE_DT_UINT8)
ret = ref_slice_common(input, out_data_ptrs, sizeof(uint8_t), &op_param);
else if (input_tensor->data_type == TENGINE_DT_UINT8) // ugly implement, need to refactor !
{
/* dequant to fp32 */
uint8_t* input_uint8 = input_tensor->data;
uint8_t* output_uint8 = output_tensor->data;
float input_scale = input_tensor->scale;
float output_scale = output_tensor->scale;
int32_t input_zero = input_tensor->zero_point;
int32_t output_zero = output_tensor->zero_point;
float* input_fp32 = (float*)sys_malloc(input_tensor->elem_num * sizeof(float));
float* output_fp32 = (float*)sys_malloc(output_tensor->elem_num * sizeof(float));
out_data_ptrs[0] = ( int8_t* )output_fp32;
for(int i=0; i<input_tensor->elem_num; i++)
{
input_fp32[i] = ((float )input_uint8[i] - (float )input_zero) * input_scale;
}
ret = ref_slice_common((int8_t *)input_fp32, out_data_ptrs, sizeof(float), &op_param);
/* quant to uint8 */
for(int i=0; i<output_tensor->elem_num; i++)
{
int udata = round(output_fp32[i] / output_scale + output_zero);
if (udata > 255)
udata = 255;
else if (udata < 0)
udata = 0;
output_uint8[i] = udata;
}
free(input_fp32);
free(output_fp32);
}
sys_free(out_data_ptrs);
if (ret < 0)
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2021, OPEN AI LAB
* Author: hhchen@openailab.com
*/
#include "timvx_executor.hpp"
extern "C"
{
#include "operator/op.h"
}
bool VXEngine::AddMishNode(struct node* ir_node)
{
struct graph* ir_graph = ir_node->graph;
struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
auto mish = graph->CreateOperation<tim::vx::ops::Mish>();
(*mish)
.BindInputs({ this->vx_tensor_map[input_tensor->index] })
.BindOutputs({ this->vx_tensor_map[output_tensor->index] });
return true;
}
......@@ -33,7 +33,6 @@ extern "C"
bool VXEngine::AddSliceNode(struct node* ir_node)
{
struct graph* ir_graph = ir_node->graph;
struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
......@@ -42,7 +41,7 @@ bool VXEngine::AddSliceNode(struct node* ir_node)
uint32_t axis = output_tensor->dim_num - 1 - param->axis;
std::vector<int32_t> start;
for (int i = output_tensor->dim_num - 1; i >= 0; i--)
for (int i = 0; i < output_tensor->dim_num; i++)
{
if (axis == i)
start.push_back(param->begin);
......@@ -51,12 +50,12 @@ bool VXEngine::AddSliceNode(struct node* ir_node)
}
std::vector<int32_t> length;
for (int i = output_tensor->dim_num - 1; i >= 0; i--)
for (int i = 0; i < output_tensor->dim_num; i++)
{
if (axis == i)
length.push_back(param->end - param->begin);
else
length.push_back(output_tensor->dims[i]);
length.push_back(output_tensor->dims[output_tensor->dim_num - 1 - i]);
}
auto slice = this->graph->CreateOperation<tim::vx::ops::Slice>(output_tensor->dim_num, start, length);
......
......@@ -224,6 +224,9 @@ int VXEngine::Build(struct subgraph* subgraph)
case OP_INTERP:
this->AddInterpNode(ir_node);
break;
case OP_MISH:
this->AddMishNode(ir_node);
break;
case OP_PERMUTE:
this->AddPermuteNode(ir_node);
break;
......
......@@ -103,6 +103,7 @@ private:
bool AddGatherNode(struct node* node);
bool AddHardSwishNode(struct node* node);
bool AddInterpNode(struct node* ir_node);
bool AddMishNode(struct node* ir_node);
bool AddPermuteNode(struct node* ir_node);
bool AddPoolingNode(struct node* ir_node);
bool AddPReluNode(struct node* ir_node);
......
......@@ -121,7 +121,7 @@ const int timvx_supported_ops[] = {
// OP_UNSQUEEZE,
OP_UPSAMPLE,
// OP_ZEROSLIKE,
// OP_MISH,
OP_MISH,
// OP_LOGSOFTMAX,
// OP_RELU1,
// OP_L2NORMALIZATION,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册