/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // (3-clause BSD License) // // Copyright (C) 2017, Intel Corporation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * Neither the names of the copyright holders nor the names of the contributors // may be used to endorse or promote products derived from this software // without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall copyright holders or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ /*M/////////////////////////////////////////////////////////////////////////////////////// //MIT License // //Copyright (c) 2017 Joseph Redmon // //Permission is hereby granted, free of charge, to any person obtaining a copy //of this software and associated documentation files (the "Software"), to deal //in the Software without restriction, including without limitation the rights //to use, copy, modify, merge, publish, distribute, sublicense, and/or sell //copies of the Software, and to permit persons to whom the Software is //furnished to do so, subject to the following conditions: // //The above copyright notice and this permission notice shall be included in all //copies or substantial portions of the Software. // //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE //SOFTWARE. // //M*/ #include "../precomp.hpp" #include #include #include #include #include "darknet_io.hpp" namespace cv { namespace dnn { namespace darknet { template T getParam(const std::map ¶ms, const std::string param_name, T init_val) { std::map::const_iterator it = params.find(param_name); if (it != params.end()) { std::stringstream ss(it->second); ss >> init_val; } return init_val; } static const std::string kFirstLayerName = "data"; class setLayersParams { NetParameter *net; int layer_id; std::string last_layer; std::vector fused_layer_names; public: setLayersParams(NetParameter *_net) : net(_net), layer_id(0), last_layer(kFirstLayerName) {} void setLayerBlobs(int i, std::vector blobs) { cv::dnn::LayerParams ¶ms = net->layers[i].layerParams; params.blobs = blobs; } void setBatchNorm() { cv::dnn::LayerParams bn_param; bn_param.name = "BatchNorm-name"; bn_param.type = "BatchNorm"; bn_param.set("has_weight", true); bn_param.set("has_bias", true); bn_param.set("eps", 1E-6); // .000001f in Darknet Yolo darknet::LayerParameter lp; std::string layer_name = cv::format("bn_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = bn_param.type; lp.layerParams = bn_param; lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); } cv::dnn::LayerParams getParamConvolution(int kernel, int pad, int stride, int filters_num) { cv::dnn::LayerParams params; params.name = "Convolution-name"; params.type = "Convolution"; params.set("kernel_size", kernel); params.set("pad", pad); params.set("stride", stride); params.set("bias_term", false); // true only if(BatchNorm == false) params.set("num_output", filters_num); return params; } void setConvolution(int kernel, int pad, int stride, int filters_num, int channels_num, int groups, int use_batch_normalize) { cv::dnn::LayerParams conv_param = getParamConvolution(kernel, pad, stride, filters_num); darknet::LayerParameter lp; std::string layer_name = cv::format("conv_%d", layer_id); // use BIAS in any case if (!use_batch_normalize) { conv_param.set("bias_term", true); } conv_param.set("group", groups); lp.layer_name = layer_name; lp.layer_type = conv_param.type; lp.layerParams = conv_param; lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); if (use_batch_normalize) setBatchNorm(); layer_id++; fused_layer_names.push_back(last_layer); } cv::dnn::LayerParams getParamFullyConnected(int output) { cv::dnn::LayerParams params; params.name = "FullyConnected-name"; params.type = "InnerProduct"; params.set("bias_term", false); // true only if(BatchNorm == false) params.set("num_output", output); return params; } void setFullyConnected(int output, int use_batch_normalize) { cv::dnn::LayerParams fullyconnected_param = getParamFullyConnected(output); darknet::LayerParameter lp; std::string layer_name = cv::format("fullyConnected_%d", layer_id); // use BIAS in any case if (!use_batch_normalize) { fullyconnected_param.set("bias_term", true); } lp.layer_name = layer_name; lp.layer_type = fullyconnected_param.type; lp.layerParams = fullyconnected_param; lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); if (use_batch_normalize) setBatchNorm(); layer_id++; fused_layer_names.push_back(last_layer); } void setActivation(String type) { cv::dnn::LayerParams activation_param; if (type == "relu") { activation_param.type = "ReLU"; } else if (type == "leaky") { activation_param.set("negative_slope", 0.1f); activation_param.type = "ReLU"; } else if (type == "swish") { activation_param.type = "Swish"; } else if (type == "mish") { activation_param.type = "Mish"; } else if (type == "logistic") { activation_param.type = "Sigmoid"; } else if (type == "tanh") { activation_param.type = "TanH"; } else { CV_Error(cv::Error::StsParseError, "Unsupported activation: " + type); } std::string layer_name = cv::format("%s_%d", type.c_str(), layer_id); darknet::LayerParameter lp; lp.layer_name = layer_name; lp.layer_type = activation_param.type; lp.layerParams = activation_param; lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); fused_layer_names.back() = last_layer; } void setMaxpool(int kernel, int pad, int stride) { cv::dnn::LayerParams maxpool_param; maxpool_param.set("pool", "max"); maxpool_param.set("kernel_size", kernel); maxpool_param.set("pad_l", floor((float)pad / 2)); maxpool_param.set("pad_r", ceil((float)pad / 2)); maxpool_param.set("pad_t", floor((float)pad / 2)); maxpool_param.set("pad_b", ceil((float)pad / 2)); maxpool_param.set("ceil_mode", false); maxpool_param.set("stride", stride); maxpool_param.name = "Pooling-name"; maxpool_param.type = "Pooling"; darknet::LayerParameter lp; std::string layer_name = cv::format("pool_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = maxpool_param.type; lp.layerParams = maxpool_param; lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); layer_id++; fused_layer_names.push_back(last_layer); } void setAvgpool() { cv::dnn::LayerParams avgpool_param; avgpool_param.set("pool", "ave"); avgpool_param.set("global_pooling", true); avgpool_param.name = "Pooling-name"; avgpool_param.type = "Pooling"; darknet::LayerParameter lp; std::string layer_name = cv::format("avgpool_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = avgpool_param.type; lp.layerParams = avgpool_param; lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); layer_id++; fused_layer_names.push_back(last_layer); } void setSoftmax() { cv::dnn::LayerParams softmax_param; softmax_param.name = "Softmax-name"; softmax_param.type = "Softmax"; darknet::LayerParameter lp; std::string layer_name = cv::format("softmax_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = softmax_param.type; lp.layerParams = softmax_param; lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); layer_id++; fused_layer_names.push_back(last_layer); } void setConcat(int number_of_inputs, int *input_indexes) { cv::dnn::LayerParams concat_param; concat_param.name = "Concat-name"; concat_param.type = "Concat"; concat_param.set("axis", 1); // channels are in axis = 1 darknet::LayerParameter lp; std::string layer_name = cv::format("concat_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = concat_param.type; lp.layerParams = concat_param; for (int i = 0; i < number_of_inputs; ++i) lp.bottom_indexes.push_back(fused_layer_names.at(input_indexes[i])); last_layer = layer_name; net->layers.push_back(lp); layer_id++; fused_layer_names.push_back(last_layer); } void setIdentity(int bottom_index) { cv::dnn::LayerParams identity_param; identity_param.name = "Identity-name"; identity_param.type = "Identity"; darknet::LayerParameter lp; std::string layer_name = cv::format("identity_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = identity_param.type; lp.layerParams = identity_param; lp.bottom_indexes.push_back(fused_layer_names.at(bottom_index)); last_layer = layer_name; net->layers.push_back(lp); layer_id++; fused_layer_names.push_back(last_layer); } void setSlice(int input_index, int split_size, int group_id) { int begin[] = {0, split_size * group_id, 0, 0}; cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin, 4); int end[] = {-1, begin[1] + split_size, -1, -1}; cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end, 4); darknet::LayerParameter lp; lp.layer_name = cv::format("slice_%d", layer_id); lp.layer_type = "Slice"; lp.layerParams.set("begin", paramBegin); lp.layerParams.set("end", paramEnd); lp.bottom_indexes.push_back(fused_layer_names.at(input_index)); net->layers.push_back(lp); layer_id++; last_layer = lp.layer_name; fused_layer_names.push_back(last_layer); } void setReorg(int stride) { cv::dnn::LayerParams reorg_params; reorg_params.name = "Reorg-name"; reorg_params.type = "Reorg"; reorg_params.set("reorg_stride", stride); darknet::LayerParameter lp; std::string layer_name = cv::format("reorg_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = reorg_params.type; lp.layerParams = reorg_params; lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); layer_id++; fused_layer_names.push_back(last_layer); } void setPermute(bool isDarknetLayer = true) { cv::dnn::LayerParams permute_params; permute_params.name = "Permute-name"; permute_params.type = "Permute"; int permute[] = { 0, 2, 3, 1 }; cv::dnn::DictValue paramOrder = cv::dnn::DictValue::arrayInt(permute, 4); permute_params.set("order", paramOrder); darknet::LayerParameter lp; std::string layer_name = cv::format("permute_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = permute_params.type; lp.layerParams = permute_params; lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); if (isDarknetLayer) { layer_id++; fused_layer_names.push_back(last_layer); } } void setRegion(float thresh, int coords, int classes, int anchors, int classfix, int softmax, int softmax_tree, float *biasData) { cv::dnn::LayerParams region_param; region_param.name = "Region-name"; region_param.type = "Region"; region_param.set("thresh", thresh); region_param.set("coords", coords); region_param.set("classes", classes); region_param.set("anchors", anchors); region_param.set("classfix", classfix); region_param.set("softmax_tree", softmax_tree); region_param.set("softmax", softmax); cv::Mat biasData_mat = cv::Mat(1, anchors * 2, CV_32F, biasData).clone(); region_param.blobs.push_back(biasData_mat); darknet::LayerParameter lp; std::string layer_name = "detection_out"; lp.layer_name = layer_name; lp.layer_type = region_param.type; lp.layerParams = region_param; lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); layer_id++; fused_layer_names.push_back(last_layer); } void setYolo(int classes, const std::vector& mask, const std::vector& anchors, float thresh, float nms_threshold, float scale_x_y) { cv::dnn::LayerParams region_param; region_param.name = "Region-name"; region_param.type = "Region"; const int numAnchors = mask.size(); region_param.set("classes", classes); region_param.set("anchors", numAnchors); region_param.set("logistic", true); region_param.set("thresh", thresh); region_param.set("nms_threshold", nms_threshold); region_param.set("scale_x_y", scale_x_y); std::vector usedAnchors(numAnchors * 2); for (int i = 0; i < numAnchors; ++i) { usedAnchors[i * 2] = anchors[mask[i] * 2]; usedAnchors[i * 2 + 1] = anchors[mask[i] * 2 + 1]; } cv::Mat biasData_mat = cv::Mat(1, numAnchors * 2, CV_32F, &usedAnchors[0]).clone(); region_param.blobs.push_back(biasData_mat); darknet::LayerParameter lp; std::string layer_name = cv::format("yolo_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = region_param.type; lp.layerParams = region_param; lp.bottom_indexes.push_back(last_layer); lp.bottom_indexes.push_back(kFirstLayerName); last_layer = layer_name; net->layers.push_back(lp); layer_id++; fused_layer_names.push_back(last_layer); } void setShortcut(int from, float alpha) { cv::dnn::LayerParams shortcut_param; shortcut_param.name = "Shortcut-name"; shortcut_param.type = "Eltwise"; if (alpha != 1) { std::vector coeffs(2, 1); coeffs[0] = alpha; shortcut_param.set("coeff", DictValue::arrayReal(&coeffs[0], coeffs.size())); } shortcut_param.set("op", "sum"); shortcut_param.set("output_channels_mode", "input_0_truncate"); darknet::LayerParameter lp; std::string layer_name = cv::format("shortcut_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = shortcut_param.type; lp.layerParams = shortcut_param; lp.bottom_indexes.push_back(last_layer); lp.bottom_indexes.push_back(fused_layer_names.at(from)); last_layer = layer_name; net->layers.push_back(lp); layer_id++; fused_layer_names.push_back(last_layer); } void setScaleChannels(int from) { cv::dnn::LayerParams shortcut_param; shortcut_param.type = "Scale"; darknet::LayerParameter lp; std::string layer_name = cv::format("scale_channels_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = shortcut_param.type; lp.layerParams = shortcut_param; lp.bottom_indexes.push_back(fused_layer_names.at(from)); lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); layer_id++; fused_layer_names.push_back(last_layer); } void setUpsample(int scaleFactor) { cv::dnn::LayerParams param; param.name = "Upsample-name"; param.type = "Resize"; param.set("zoom_factor", scaleFactor); param.set("interpolation", "nearest"); darknet::LayerParameter lp; std::string layer_name = cv::format("upsample_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = param.type; lp.layerParams = param; lp.bottom_indexes.push_back(last_layer); last_layer = layer_name; net->layers.push_back(lp); layer_id++; fused_layer_names.push_back(last_layer); } }; std::string escapeString(const std::string &src) { std::string dst; for (size_t i = 0; i < src.size(); ++i) if (src[i] > ' ' && src[i] <= 'z') dst += src[i]; return dst; } template std::vector getNumbers(const std::string &src) { std::vector dst; std::stringstream ss(src); for (std::string str; std::getline(ss, str, ',');) { std::stringstream line(str); T val; line >> val; dst.push_back(val); } return dst; } bool ReadDarknetFromCfgStream(std::istream &ifile, NetParameter *net) { bool read_net = false; int layers_counter = -1; for (std::string line; std::getline(ifile, line);) { line = escapeString(line); if (line.empty()) continue; switch (line[0]) { case '\0': break; case '#': break; case ';': break; case '[': if (line == "[net]") { read_net = true; } else { // read section read_net = false; ++layers_counter; const size_t layer_type_size = line.find(']') - 1; CV_Assert(layer_type_size < line.size()); std::string layer_type = line.substr(1, layer_type_size); net->layers_cfg[layers_counter]["layer_type"] = layer_type; } break; default: // read entry const size_t separator_index = line.find('='); CV_Assert(separator_index < line.size()); if (separator_index != std::string::npos) { std::string name = line.substr(0, separator_index); std::string value = line.substr(separator_index + 1, line.size() - (separator_index + 1)); name = escapeString(name); value = escapeString(value); if (name.empty() || value.empty()) continue; if (read_net) net->net_cfg[name] = value; else net->layers_cfg[layers_counter][name] = value; } } } std::string anchors = net->layers_cfg[net->layers_cfg.size() - 1]["anchors"]; std::vector vec = getNumbers(anchors); std::map &net_params = net->net_cfg; net->width = getParam(net_params, "width", 416); net->height = getParam(net_params, "height", 416); net->channels = getParam(net_params, "channels", 3); CV_Assert(net->width > 0 && net->height > 0 && net->channels > 0); MatShape tensor_shape(3); tensor_shape[0] = net->channels; tensor_shape[1] = net->width; tensor_shape[2] = net->height; net->out_channels_vec.resize(net->layers_cfg.size()); layers_counter = -1; setLayersParams setParams(net); typedef std::map >::iterator it_type; for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) { ++layers_counter; std::map &layer_params = i->second; std::string layer_type = layer_params["layer_type"]; if (layer_type == "convolutional") { int kernel_size = getParam(layer_params, "size", -1); int pad = getParam(layer_params, "pad", 0); int padding = getParam(layer_params, "padding", 0); int stride = getParam(layer_params, "stride", 1); int filters = getParam(layer_params, "filters", -1); int groups = getParam(layer_params, "groups", 1); bool batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; int flipped = getParam(layer_params, "flipped", 0); if (flipped == 1) CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented"); if (pad) padding = kernel_size / 2; // Cannot divide 0 CV_Assert(stride > 0); CV_Assert(kernel_size > 0 && filters > 0); CV_Assert(tensor_shape[0] > 0); CV_Assert(tensor_shape[0] % groups == 0); setParams.setConvolution(kernel_size, padding, stride, filters, tensor_shape[0], groups, batch_normalize); tensor_shape[0] = filters; tensor_shape[1] = (tensor_shape[1] - kernel_size + 2 * padding) / stride + 1; tensor_shape[2] = (tensor_shape[2] - kernel_size + 2 * padding) / stride + 1; } else if (layer_type == "connected") { int output = getParam(layer_params, "output", 1); bool batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; CV_Assert(output > 0); setParams.setFullyConnected(output, batch_normalize); if(layers_counter && tensor_shape[1] > 1) net->out_channels_vec[layers_counter-1] = total(tensor_shape); tensor_shape[0] = output; tensor_shape[1] = 1; tensor_shape[2] = 1; } else if (layer_type == "maxpool") { int kernel_size = getParam(layer_params, "size", 2); int stride = getParam(layer_params, "stride", 2); int padding = getParam(layer_params, "padding", kernel_size - 1); // Cannot divide 0 CV_Assert(stride > 0); setParams.setMaxpool(kernel_size, padding, stride); tensor_shape[1] = (tensor_shape[1] - kernel_size + padding) / stride + 1; tensor_shape[2] = (tensor_shape[2] - kernel_size + padding) / stride + 1; } else if (layer_type == "avgpool") { setParams.setAvgpool(); tensor_shape[1] = 1; tensor_shape[2] = 1; } else if (layer_type == "softmax") { int groups = getParam(layer_params, "groups", 1); if (groups != 1) CV_Error(Error::StsNotImplemented, "Softmax from Darknet with groups != 1"); setParams.setSoftmax(); } else if (layer_type == "route") { std::string bottom_layers = getParam(layer_params, "layers", ""); CV_Assert(!bottom_layers.empty()); int groups = getParam(layer_params, "groups", 1); std::vector layers_vec = getNumbers(bottom_layers); tensor_shape[0] = 0; for (size_t k = 0; k < layers_vec.size(); ++k) { layers_vec[k] = layers_vec[k] >= 0 ? layers_vec[k] : (layers_vec[k] + layers_counter); tensor_shape[0] += net->out_channels_vec[layers_vec[k]]; } if (groups > 1) { int group_id = getParam(layer_params, "group_id", 0); tensor_shape[0] /= groups; int split_size = tensor_shape[0] / layers_vec.size(); for (size_t k = 0; k < layers_vec.size(); ++k) setParams.setSlice(layers_vec[k], split_size, group_id); if (layers_vec.size() > 1) { // layer ids in layers_vec - inputs of Slice layers // after adding offset to layers_vec: layer ids - ouputs of Slice layers for (size_t k = 0; k < layers_vec.size(); ++k) layers_vec[k] += layers_vec.size(); setParams.setConcat(layers_vec.size(), layers_vec.data()); } } else { if (layers_vec.size() == 1) setParams.setIdentity(layers_vec.at(0)); else setParams.setConcat(layers_vec.size(), layers_vec.data()); } } else if (layer_type == "dropout" || layer_type == "cost") { setParams.setIdentity(layers_counter-1); } else if (layer_type == "reorg") { int stride = getParam(layer_params, "stride", 2); // Cannot divide 0 CV_Assert(stride > 0); tensor_shape[0] = tensor_shape[0] * (stride * stride); tensor_shape[1] = tensor_shape[1] / stride; tensor_shape[2] = tensor_shape[2] / stride; setParams.setReorg(stride); } else if (layer_type == "region") { float thresh = getParam(layer_params, "thresh", 0.001); int coords = getParam(layer_params, "coords", 4); int classes = getParam(layer_params, "classes", -1); int num_of_anchors = getParam(layer_params, "num", -1); int classfix = getParam(layer_params, "classfix", 0); bool softmax = (getParam(layer_params, "softmax", 0) == 1); bool softmax_tree = (getParam(layer_params, "tree", "").size() > 0); std::string anchors_values = getParam(layer_params, "anchors", std::string()); CV_Assert(!anchors_values.empty()); std::vector anchors_vec = getNumbers(anchors_values); CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size()); setParams.setPermute(false); setParams.setRegion(thresh, coords, classes, num_of_anchors, classfix, softmax, softmax_tree, anchors_vec.data()); } else if (layer_type == "shortcut") { std::string bottom_layer = getParam(layer_params, "from", ""); float alpha = getParam(layer_params, "alpha", 1); float beta = getParam(layer_params, "beta", 0); if (beta != 0) CV_Error(Error::StsNotImplemented, "Non-zero beta"); CV_Assert(!bottom_layer.empty()); int from = std::atoi(bottom_layer.c_str()); from = from < 0 ? from + layers_counter : from; setParams.setShortcut(from, alpha); } else if (layer_type == "scale_channels") { std::string bottom_layer = getParam(layer_params, "from", ""); CV_Assert(!bottom_layer.empty()); int from = std::atoi(bottom_layer.c_str()); from = from < 0 ? from + layers_counter : from; setParams.setScaleChannels(from); } else if (layer_type == "upsample") { int scaleFactor = getParam(layer_params, "stride", 1); setParams.setUpsample(scaleFactor); tensor_shape[1] = tensor_shape[1] * scaleFactor; tensor_shape[2] = tensor_shape[2] * scaleFactor; } else if (layer_type == "yolo") { int classes = getParam(layer_params, "classes", -1); int num_of_anchors = getParam(layer_params, "num", -1); float thresh = getParam(layer_params, "thresh", 0.2); float nms_threshold = getParam(layer_params, "nms_threshold", 0.0); float scale_x_y = getParam(layer_params, "scale_x_y", 1.0); std::string anchors_values = getParam(layer_params, "anchors", std::string()); CV_Assert(!anchors_values.empty()); std::vector anchors_vec = getNumbers(anchors_values); std::string mask_values = getParam(layer_params, "mask", std::string()); CV_Assert(!mask_values.empty()); std::vector mask_vec = getNumbers(mask_values); CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size()); setParams.setPermute(false); setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y); } else { CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type); } std::string activation = getParam(layer_params, "activation", "linear"); if (activation != "linear") setParams.setActivation(activation); net->out_channels_vec[layers_counter] = tensor_shape[0]; } return true; } bool ReadDarknetFromWeightsStream(std::istream &ifile, NetParameter *net) { int32_t major_ver, minor_ver, revision; ifile.read(reinterpret_cast(&major_ver), sizeof(int32_t)); ifile.read(reinterpret_cast(&minor_ver), sizeof(int32_t)); ifile.read(reinterpret_cast(&revision), sizeof(int32_t)); uint64_t seen; if ((major_ver * 10 + minor_ver) >= 2) { ifile.read(reinterpret_cast(&seen), sizeof(uint64_t)); } else { int32_t iseen = 0; ifile.read(reinterpret_cast(&iseen), sizeof(int32_t)); seen = iseen; } bool transpose = (major_ver > 1000) || (minor_ver > 1000); if(transpose) CV_Error(cv::Error::StsNotImplemented, "Transpose the weights (except for convolutional) is not implemented"); MatShape tensor_shape(3); tensor_shape[0] = net->channels; tensor_shape[1] = net->width; tensor_shape[2] = net->height; int cv_layers_counter = -1; int darknet_layers_counter = -1; setLayersParams setParams(net); typedef std::map >::iterator it_type; for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) { ++darknet_layers_counter; ++cv_layers_counter; std::map &layer_params = i->second; std::string layer_type = layer_params["layer_type"]; if (layer_type == "convolutional" || layer_type == "connected") { size_t weights_size; int filters; bool use_batch_normalize; cv::Mat weightsBlob; if(layer_type == "convolutional") { int kernel_size = getParam(layer_params, "size", -1); filters = getParam(layer_params, "filters", -1); int groups = getParam(layer_params, "groups", 1); use_batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; CV_Assert(kernel_size > 0 && filters > 0); CV_Assert(tensor_shape[0] > 0); CV_Assert(tensor_shape[0] % groups == 0); weights_size = filters * (tensor_shape[0] / groups) * kernel_size * kernel_size; int sizes_weights[] = { filters, tensor_shape[0] / groups, kernel_size, kernel_size }; weightsBlob.create(4, sizes_weights, CV_32F); } else { filters = getParam(layer_params, "output", 1); use_batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; CV_Assert(filters>0); weights_size = total(tensor_shape) * filters; int sizes_weights[] = { filters, total(tensor_shape) }; weightsBlob.create(2, sizes_weights, CV_32F); } CV_Assert(weightsBlob.isContinuous()); cv::Mat meanData_mat(1, filters, CV_32F); // mean cv::Mat stdData_mat(1, filters, CV_32F); // variance cv::Mat weightsData_mat(1, filters, CV_32F);// scale cv::Mat biasData_mat(1, filters, CV_32F); // bias ifile.read(reinterpret_cast(biasData_mat.ptr()), sizeof(float)*filters); if (use_batch_normalize) { ifile.read(reinterpret_cast(weightsData_mat.ptr()), sizeof(float)*filters); ifile.read(reinterpret_cast(meanData_mat.ptr()), sizeof(float)*filters); ifile.read(reinterpret_cast(stdData_mat.ptr()), sizeof(float)*filters); } ifile.read(reinterpret_cast(weightsBlob.ptr()), sizeof(float)*weights_size); // set conv/connected weights std::vector layer_blobs; layer_blobs.push_back(weightsBlob); if (!use_batch_normalize) { // use BIAS in any case layer_blobs.push_back(biasData_mat); } setParams.setLayerBlobs(cv_layers_counter, layer_blobs); // set batch normalize (mean, variance, scale, bias) if (use_batch_normalize) { ++cv_layers_counter; std::vector bn_blobs; bn_blobs.push_back(meanData_mat); bn_blobs.push_back(stdData_mat); bn_blobs.push_back(weightsData_mat); bn_blobs.push_back(biasData_mat); setParams.setLayerBlobs(cv_layers_counter, bn_blobs); } } if (layer_type == "region" || layer_type == "yolo") { ++cv_layers_counter; // For permute. } std::string activation = getParam(layer_params, "activation", "linear"); if (activation != "linear") ++cv_layers_counter; // For ReLU, Swish, Mish, Sigmoid, etc if(!darknet_layers_counter) tensor_shape.resize(1); tensor_shape[0] = net->out_channels_vec[darknet_layers_counter]; } return true; } } void ReadNetParamsFromCfgStreamOrDie(std::istream &ifile, darknet::NetParameter *net) { if (!darknet::ReadDarknetFromCfgStream(ifile, net)) { CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter stream"); } } void ReadNetParamsFromBinaryStreamOrDie(std::istream &ifile, darknet::NetParameter *net) { if (!darknet::ReadDarknetFromWeightsStream(ifile, net)) { CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter stream"); } } } }