diff --git a/inference/CMakeLists.txt b/inference/CMakeLists.txt index 994befc87be458ecab679c637d17cfd6239019fb..86b378b2ed0c7cac4f2269f4f94a2165c1db81e8 100644 --- a/inference/CMakeLists.txt +++ b/inference/CMakeLists.txt @@ -82,7 +82,7 @@ if (WIN32) add_definitions(-DSTATIC_LIB) endif() else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -std=c++11") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -fopenmp -std=c++11") set(CMAKE_STATIC_LIBRARY_PREFIX "") endif() diff --git a/inference/predictor/seg_predictor.cpp b/inference/predictor/seg_predictor.cpp index ee32d75561e5d93fa11c7013d1a4a9f845dc9919..d70084f67f2f3c38624e15fc6a454aca22482572 100644 --- a/inference/predictor/seg_predictor.cpp +++ b/inference/predictor/seg_predictor.cpp @@ -1,4 +1,5 @@ #include "seg_predictor.h" +#include namespace PaddleSolution { @@ -78,26 +79,8 @@ namespace PaddleSolution { //post process _mask.clear(); _scoremap.clear(); - int out_img_len = eval_height * eval_width; - for (int i = 0; i < out_img_len; ++i) { - float max_value = -1; - int label = 0; - for (int j = 0; j < eval_num_class; ++j) { - int index = i + j * out_img_len; - if (index >= blob_out_len) { - break; - } - float value = p_out[index]; - if (value > max_value) { - max_value = value; - label = j; - } - } - if (label == 0) max_value = 0; - _mask[i] = uchar(label); - _scoremap[i] = uchar(max_value * 255); - } - + std::vector out_shape{eval_num_class, eval_height, eval_width}; + utils::argmax(p_out, out_shape, _mask, _scoremap); cv::Mat mask_png = cv::Mat(eval_height, eval_width, CV_8UC1); mask_png.data = _mask.data(); std::string nname(fname); @@ -251,6 +234,7 @@ namespace PaddleSolution { int idx = u * default_batch_size + i; imgs_batch.push_back(imgs[idx]); } + if (!_preprocessor->batch_process(imgs_batch, input_buffer.data(), org_height.data(), org_width.data())) { return -1; } diff --git a/inference/preprocessor/preprocessor_seg.cpp b/inference/preprocessor/preprocessor_seg.cpp index a3177da5cbb907c27a05d8c5e9290fc70ef9ab02..c2d056bfd2706ad441b96d76165804c0d81cdfaf 100644 --- a/inference/preprocessor/preprocessor_seg.cpp +++ b/inference/preprocessor/preprocessor_seg.cpp @@ -32,21 +32,7 @@ namespace PaddleSolution { if (*ori_h != rh || *ori_w != rw) { cv::resize(im, im, resize_size, 0, 0, cv::INTER_LINEAR); } - - float* pmean = _config->_mean.data(); - float* pscale = _config->_std.data(); - for (int h = 0; h < rh; ++h) { - const uchar* ptr = im.ptr(h); - int im_index = 0; - for (int w = 0; w < rw; ++w) { - for (int c = 0; c < channels; ++c) { - int top_index = (c * rh + h) * rw + w; - float pixel = static_cast(ptr[im_index++]); - pixel = (pixel / 255 - pmean[c]) / pscale[c]; - data[top_index] = pixel; - } - } - } + utils::normalize(im, data, _config->_mean, _config->_std); return true; } diff --git a/inference/preprocessor/preprocessor_seg.h b/inference/preprocessor/preprocessor_seg.h index 8c280ab1d9a4e972de55e9afd2935a3a28e6bd90..eba904b8949b3c000799ee84541699989fea425a 100644 --- a/inference/preprocessor/preprocessor_seg.h +++ b/inference/preprocessor/preprocessor_seg.h @@ -1,6 +1,7 @@ #pragma once #include "preprocessor.h" +#include "utils/utils.h" namespace PaddleSolution { diff --git a/inference/utils/utils.h b/inference/utils/utils.h index e349618a28282257b01ac44d661f292850cc19b9..894636499bb55b9018cd40072455ae5cedd8a63f 100644 --- a/inference/utils/utils.h +++ b/inference/utils/utils.h @@ -4,6 +4,10 @@ #include #include +#include +#include +#include + #ifdef _WIN32 #include #else @@ -59,5 +63,58 @@ namespace PaddleSolution { return imgs; } #endif + + // normalize and HWC_BGR -> CHW_RGB + inline void normalize(cv::Mat& im, float* data, std::vector& fmean, std::vector& fstd) { + int rh = im.rows; + int rw = im.cols; + int rc = im.channels(); + double normf = (double)1.0 / 255.0; + #pragma omp parallel for + for (int h = 0; h < rh; ++h) { + const uchar* ptr = im.ptr(h); + int im_index = 0; + for (int w = 0; w < rw; ++w) { + for (int c = 0; c < rc; ++c) { + int top_index = (c * rh + h) * rw + w; + float pixel = static_cast(ptr[im_index++]); + pixel = (pixel * normf - fmean[c]) / fstd[c]; + data[top_index] = pixel; + } + } + } + } + + // argmax + inline void argmax(float* out, std::vector& shape, std::vector& mask, std::vector& scoremap) { + int out_img_len = shape[1] * shape[2]; + int blob_out_len = out_img_len * shape[0]; + /* + Eigen::TensorMap> out_3d(out, shape[0], shape[1], shape[2]); + Eigen::Tensor argmax = out_3d.argmax(0); + */ + float max_value = -1; + int label = 0; + #pragma omp parallel private(label) + for (int i = 0; i < out_img_len; ++i) { + max_value = -1; + label = 0; + #pragma omp for reduction(max : max_value) + for (int j = 0; j < shape[0]; ++j) { + int index = i + j * out_img_len; + if (index >= blob_out_len) { + continue; + } + float value = out[index]; + if (value > max_value) { + max_value = value; + label = j; + } + } + if (label == 0) max_value = 0; + mask[i] = uchar(label); + scoremap[i] = uchar(max_value * 255); + } + } } }