提交 f10169ae 编写于 作者: S sjtubinlong

improve c++ normalize and argmax performance

上级 5b80d74e
......@@ -82,7 +82,7 @@ if (WIN32)
add_definitions(-DSTATIC_LIB)
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -std=c++11")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -fopenmp -std=c++11")
set(CMAKE_STATIC_LIBRARY_PREFIX "")
endif()
......
#include "seg_predictor.h"
#include <unsupported/Eigen/CXX11/Tensor>
namespace PaddleSolution {
......@@ -78,26 +79,8 @@ namespace PaddleSolution {
//post process
_mask.clear();
_scoremap.clear();
int out_img_len = eval_height * eval_width;
for (int i = 0; i < out_img_len; ++i) {
float max_value = -1;
int label = 0;
for (int j = 0; j < eval_num_class; ++j) {
int index = i + j * out_img_len;
if (index >= blob_out_len) {
break;
}
float value = p_out[index];
if (value > max_value) {
max_value = value;
label = j;
}
}
if (label == 0) max_value = 0;
_mask[i] = uchar(label);
_scoremap[i] = uchar(max_value * 255);
}
std::vector<int> out_shape{eval_num_class, eval_height, eval_width};
utils::argmax(p_out, out_shape, _mask, _scoremap);
cv::Mat mask_png = cv::Mat(eval_height, eval_width, CV_8UC1);
mask_png.data = _mask.data();
std::string nname(fname);
......@@ -251,6 +234,7 @@ namespace PaddleSolution {
int idx = u * default_batch_size + i;
imgs_batch.push_back(imgs[idx]);
}
if (!_preprocessor->batch_process(imgs_batch, input_buffer.data(), org_height.data(), org_width.data())) {
return -1;
}
......
......@@ -32,21 +32,7 @@ namespace PaddleSolution {
if (*ori_h != rh || *ori_w != rw) {
cv::resize(im, im, resize_size, 0, 0, cv::INTER_LINEAR);
}
float* pmean = _config->_mean.data();
float* pscale = _config->_std.data();
for (int h = 0; h < rh; ++h) {
const uchar* ptr = im.ptr<uchar>(h);
int im_index = 0;
for (int w = 0; w < rw; ++w) {
for (int c = 0; c < channels; ++c) {
int top_index = (c * rh + h) * rw + w;
float pixel = static_cast<float>(ptr[im_index++]);
pixel = (pixel / 255 - pmean[c]) / pscale[c];
data[top_index] = pixel;
}
}
}
utils::normalize(im, data, _config->_mean, _config->_std);
return true;
}
......
#pragma once
#include "preprocessor.h"
#include "utils/utils.h"
namespace PaddleSolution {
......
......@@ -4,6 +4,10 @@
#include <vector>
#include <string>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#ifdef _WIN32
#include <filesystem>
#else
......@@ -59,5 +63,58 @@ namespace PaddleSolution {
return imgs;
}
#endif
// normalize and HWC_BGR -> CHW_RGB
inline void normalize(cv::Mat& im, float* data, std::vector<float>& fmean, std::vector<float>& fstd) {
int rh = im.rows;
int rw = im.cols;
int rc = im.channels();
double normf = (double)1.0 / 255.0;
#pragma omp parallel for
for (int h = 0; h < rh; ++h) {
const uchar* ptr = im.ptr<uchar>(h);
int im_index = 0;
for (int w = 0; w < rw; ++w) {
for (int c = 0; c < rc; ++c) {
int top_index = (c * rh + h) * rw + w;
float pixel = static_cast<float>(ptr[im_index++]);
pixel = (pixel * normf - fmean[c]) / fstd[c];
data[top_index] = pixel;
}
}
}
}
// argmax
inline void argmax(float* out, std::vector<int>& shape, std::vector<uchar>& mask, std::vector<uchar>& scoremap) {
int out_img_len = shape[1] * shape[2];
int blob_out_len = out_img_len * shape[0];
/*
Eigen::TensorMap<Eigen::Tensor<float, 3>> out_3d(out, shape[0], shape[1], shape[2]);
Eigen::Tensor<Eigen::DenseIndex, 2> argmax = out_3d.argmax(0);
*/
float max_value = -1;
int label = 0;
#pragma omp parallel private(label)
for (int i = 0; i < out_img_len; ++i) {
max_value = -1;
label = 0;
#pragma omp for reduction(max : max_value)
for (int j = 0; j < shape[0]; ++j) {
int index = i + j * out_img_len;
if (index >= blob_out_len) {
continue;
}
float value = out[index];
if (value > max_value) {
max_value = value;
label = j;
}
}
if (label == 0) max_value = 0;
mask[i] = uchar(label);
scoremap[i] = uchar(max_value * 255);
}
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册