未验证 提交 c410d7a9 编写于 作者: R rogday 提交者: GitHub

Merge pull request #20671 from rogday:yolov4x-mish

Add support for YOLOv4x-mish

* backport to 3.4 for supporting yolov4x-mish

* add YOLOv4x-mish test

* address review comments
Co-authored-by: NGuo Xu <guoxu@1school.com.cn>
上级 6fa63dcc
......@@ -470,7 +470,7 @@ namespace cv {
fused_layer_names.push_back(last_layer);
}
void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors, float thresh, float nms_threshold, float scale_x_y)
void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors, float thresh, float nms_threshold, float scale_x_y, int new_coords)
{
cv::dnn::LayerParams region_param;
region_param.name = "Region-name";
......@@ -484,6 +484,7 @@ namespace cv {
region_param.set<float>("thresh", thresh);
region_param.set<float>("nms_threshold", nms_threshold);
region_param.set<float>("scale_x_y", scale_x_y);
region_param.set<int>("new_coords", new_coords);
std::vector<float> usedAnchors(numAnchors * 2);
for (int i = 0; i < numAnchors; ++i)
......@@ -882,6 +883,7 @@ namespace cv {
float thresh = getParam<float>(layer_params, "thresh", 0.2);
float nms_threshold = getParam<float>(layer_params, "nms_threshold", 0.0);
float scale_x_y = getParam<float>(layer_params, "scale_x_y", 1.0);
int new_coords = getParam<int>(layer_params, "new_coords", 0);
std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
CV_Assert(!anchors_values.empty());
......@@ -894,7 +896,7 @@ namespace cv {
CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
setParams.setPermute(false);
setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y);
setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y, new_coords);
}
else {
CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type);
......
......@@ -64,6 +64,7 @@ class RegionLayerImpl CV_FINAL : public RegionLayer
public:
int coords, classes, anchors, classfix;
float thresh, nmsThreshold, scale_x_y;
int new_coords;
bool useSoftmax, useLogistic;
#ifdef HAVE_OPENCL
UMat blob_umat;
......@@ -83,6 +84,7 @@ public:
useLogistic = params.get<bool>("logistic", false);
nmsThreshold = params.get<float>("nms_threshold", 0.4);
scale_x_y = params.get<float>("scale_x_y", 1.0); // Yolov4
new_coords = params.get<int>("new_coords", 0); // Yolov4x-mish
CV_Assert(nmsThreshold >= 0.);
CV_Assert(coords == 4);
......@@ -113,7 +115,7 @@ public:
{
#ifdef HAVE_DNN_NGRAPH
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_2) && preferableTarget != DNN_TARGET_MYRIAD;
return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_2) && preferableTarget != DNN_TARGET_MYRIAD && new_coords == 0;
#endif
return backendId == DNN_BACKEND_OPENCV;
}
......@@ -259,26 +261,28 @@ public:
const float *srcData = inpBlob.ptr<float>();
float *dstData = outBlob.ptr<float>();
// logistic activation for t0, for each grid cell (X x Y x Anchor-index)
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
int index = cell_size*i;
float x = srcData[index + 4];
dstData[index + 4] = logistic_activate(x); // logistic activation
}
if (useSoftmax) { // Yolo v2
if (new_coords == 0) {
// logistic activation for t0, for each grid cell (X x Y x Anchor-index)
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
int index = cell_size*i;
softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
float x = srcData[index + 4];
dstData[index + 4] = logistic_activate(x); // logistic activation
}
}
else if (useLogistic) { // Yolo v3
for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
int index = cell_size*i;
const float* input = srcData + index + 5;
float* output = dstData + index + 5;
for (int c = 0; c < classes; ++c)
output[c] = logistic_activate(input[c]);
if (useSoftmax) { // Yolo v2
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
int index = cell_size*i;
softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
}
}
else if (useLogistic) { // Yolo v3
for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
int index = cell_size*i;
const float* input = srcData + index + 5;
float* output = dstData + index + 5;
for (int c = 0; c < classes; ++c)
output[c] = logistic_activate(input[c]);
}
}
}
for (int b = 0; b < batch_size; ++b)
......@@ -290,20 +294,46 @@ public:
int index = (y*cols + x)*anchors + a; // index for each grid-cell & anchor
int p_index = index_sample_offset + index * cell_size + 4;
float scale = dstData[p_index];
if (classfix == -1 && scale < .5) scale = 0; // if(t0 < 0.5) t0 = 0;
if (classfix == -1 && scale < .5)
{
scale = 0; // if(t0 < 0.5) t0 = 0;
}
int box_index = index_sample_offset + index * cell_size;
float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f;
float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f;
dstData[box_index + 0] = (x + x_tmp) / cols;
dstData[box_index + 1] = (y + y_tmp) / rows;
dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
int class_index = index_sample_offset + index * cell_size + 5;
for (int j = 0; j < classes; ++j) {
float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
if (new_coords == 1) {
float x_tmp = (srcData[box_index + 0] - 0.5f) * scale_x_y + 0.5f;
float y_tmp = (srcData[box_index + 1] - 0.5f) * scale_x_y + 0.5f;
dstData[box_index + 0] = (x + x_tmp) / cols;
dstData[box_index + 1] = (y + y_tmp) / rows;
dstData[box_index + 2] = (srcData[box_index + 2]) * (srcData[box_index + 2]) * 4 * biasData[2 * a] / wNorm;
dstData[box_index + 3] = (srcData[box_index + 3]) * (srcData[box_index + 3]) * 4 * biasData[2 * a + 1] / hNorm;
scale = srcData[p_index];
if (classfix == -1 && scale < thresh)
{
scale = 0; // if(t0 < 0.5) t0 = 0;
}
int class_index = index_sample_offset + index * cell_size + 5;
for (int j = 0; j < classes; ++j) {
float prob = scale*srcData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
}
}
else
{
float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f;
float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f;
dstData[box_index + 0] = (x + x_tmp) / cols;
dstData[box_index + 1] = (y + y_tmp) / rows;
dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
int class_index = index_sample_offset + index * cell_size + 5;
for (int j = 0; j < classes; ++j) {
float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
}
}
}
if (nmsThreshold > 0) {
......
......@@ -681,6 +681,78 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny)
#endif
}
TEST_P(Test_Darknet_nets, YOLOv4x_mish)
{
applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB));
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif
#if defined(INF_ENGINE_RELEASE)
if (target == DNN_TARGET_MYRIAD) // NC_OUT_OF_MEMORY
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif
// batchId, classId, confidence, left, top, right, bottom
const int N0 = 3;
const int N1 = 5;
static const float ref_[/* (N0 + N1) * 7 */] = {
0, 16, 0.925536f, 0.17188f, 0.386832f, 0.406138f, 0.941696f,
0, 1, 0.912028f, 0.162125f, 0.208863f, 0.741316f, 0.729332f,
0, 7, 0.841018f, 0.608953f, 0.128653f, 0.900692f, 0.295657f,
1, 2, 0.925697f, 0.650438f, 0.458118f, 0.813927f, 0.661775f,
1, 0, 0.882156f, 0.203644f, 0.365763f, 0.265473f, 0.632195f,
1, 2, 0.848857f, 0.451044f, 0.462997f, 0.496629f, 0.522719f,
1, 9, 0.736015f, 0.374503f, 0.316029f, 0.399358f, 0.392883f,
1, 9, 0.727129f, 0.662469f, 0.373687f, 0.687877f, 0.441335f,
};
Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_);
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : 8e-5;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.042 : 3e-4;
std::string config_file = "yolov4x-mish.cfg";
std::string weights_file = "yolov4x-mish.weights";
#if defined(INF_ENGINE_RELEASE)
if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD &&
getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
{
scoreDiff = 0.04;
iouDiff = 0.2;
}
#endif
{
SCOPED_TRACE("batch size 1");
testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff);
}
{
SCOPED_TRACE("batch size 2");
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_OPENCL)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000))
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
else if (target == DNN_TARGET_MYRIAD &&
getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
}
#endif
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff);
}
}
INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets());
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册