提交 5be92b99 编写于 作者: W wuzewu

Merge branch 'develop' into release/v0.3.0

......@@ -71,7 +71,7 @@ $ pip install -r requirements.txt
* [模型导出](./docs/model_export.md)
* [使用Python预测](./deploy/python/)
* [使用C++预测](./deploy/cpp/)
* [人像分割在移动端的部署](./lite)
* [移动端预测部署](./deploy/lite/)
### 高级功能
......
......@@ -8,3 +8,5 @@
[3. 服务化部署(仅支持 Linux)](./serving)
[4. 移动端部署(仅支持Android)](./lite)
......@@ -142,7 +142,7 @@ if(WITH_MKL)
if (WIN32)
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
else ()
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.1)
endif ()
endif()
else()
......
......@@ -24,7 +24,7 @@ int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_conf.empty() || FLAGS_input_dir.empty()) {
std::cout << "Usage: ./predictor --conf=/config/path/to/your/model "
<< "--input_dir=/directory/of/your/input/images";
<< "--input_dir=/directory/of/your/input/images" << std::endl;
return -1;
}
// 1. create a predictor and init it with conf
......
......@@ -83,7 +83,6 @@ namespace PaddleSolution {
int blob_out_len = length;
int seg_out_len = eval_height * eval_width * eval_num_class;
if (blob_out_len != seg_out_len) {
LOG(ERROR) << " [FATAL] unequal: input vs output [" <<
seg_out_len << "|" << blob_out_len << "]" << std::endl;
......@@ -97,25 +96,22 @@ namespace PaddleSolution {
cv::Mat mask_png = cv::Mat(eval_height, eval_width, CV_8UC1);
mask_png.data = _mask.data();
std::string nname(fname);
auto pos = fname.find(".");
auto pos = fname.rfind(".");
nname[pos] = '_';
std::string mask_save_name = nname + ".png";
std::string mask_save_name = nname + "_mask.png";
cv::imwrite(mask_save_name, mask_png);
cv::Mat scoremap_png = cv::Mat(eval_height, eval_width, CV_8UC1);
scoremap_png.data = _scoremap.data();
std::string scoremap_save_name = nname
+ std::string("_scoremap.png");
std::string scoremap_save_name = nname + std::string("_scoremap.png");
cv::imwrite(scoremap_save_name, scoremap_png);
std::cout << "save mask of [" << fname << "] done" << std::endl;
if (height && width) {
int recover_height = *height;
int recover_width = *width;
cv::Mat recover_png = cv::Mat(recover_height,
recover_width, CV_8UC1);
cv::Mat recover_png = cv::Mat(recover_height, recover_width, CV_8UC1);
cv::resize(scoremap_png, recover_png,
cv::Size(recover_width, recover_height),
0, 0, cv::INTER_CUBIC);
cv::Size(recover_width, recover_height), 0, 0, cv::INTER_CUBIC);
std::string recover_name = nname + std::string("_recover.png");
cv::imwrite(recover_name, recover_png);
}
......@@ -176,8 +172,13 @@ namespace PaddleSolution {
}
paddle::PaddleTensor im_tensor;
im_tensor.name = "image";
im_tensor.shape = std::vector<int>{ batch_size, channels,
eval_height, eval_width };
if (!_model_config._use_pr) {
im_tensor.shape = std::vector<int>{ batch_size, channels,
eval_height, eval_width };
} else {
im_tensor.shape = std::vector<int>{ batch_size, eval_height,
eval_width, channels};
}
im_tensor.data.Reset(input_buffer.data(),
real_buffer_size * sizeof(float));
im_tensor.dtype = paddle::PaddleDType::FLOAT32;
......@@ -202,19 +203,45 @@ namespace PaddleSolution {
std::cout << _outputs[0].shape[j] << ",";
}
std::cout << ")" << std::endl;
const size_t nums = _outputs.front().data.length()
/ sizeof(float);
if (out_num % batch_size != 0 || out_num != nums) {
LOG(ERROR) << "outputs data size mismatch with shape size.";
size_t nums = _outputs.front().data.length() / sizeof(float);
if (_model_config._use_pr) {
nums = _outputs.front().data.length() / sizeof(int64_t);
}
// size mismatch checking
bool size_mismatch = out_num % batch_size;
size_mismatch |= (!_model_config._use_pr) && (nums != out_num);
size_mismatch |= _model_config._use_pr && (nums != eval_height * eval_width);
if (size_mismatch) {
LOG(ERROR) << "output with a unexpected size";
return -1;
}
if (_model_config._use_pr) {
std::vector<uchar> out_data;
out_data.resize(out_num);
auto addr = reinterpret_cast<int64_t*>(_outputs[0].data.data());
for (int r = 0; r < out_num; ++r) {
out_data[r] = (int)(addr[r]);
}
for (int r = 0; r < batch_size; ++r) {
cv::Mat mask_png = cv::Mat(eval_height, eval_width, CV_8UC1);
mask_png.data = out_data.data() + eval_height*eval_width*r;
auto name = imgs_batch[r];
auto pos = name.rfind(".");
name[pos] = '_';
std::string mask_save_name = name + "_mask.png";
cv::imwrite(mask_save_name, mask_png);
}
continue;
}
for (int i = 0; i < batch_size; ++i) {
float* output_addr = reinterpret_cast<float*>(
_outputs[0].data.data())
+ i * (out_num / batch_size);
+ i * (nums / batch_size);
output_mask(imgs_batch[i], output_addr,
out_num / batch_size,
nums / batch_size,
&org_height[i],
&org_width[i]);
}
......@@ -278,8 +305,14 @@ namespace PaddleSolution {
return -1;
}
auto im_tensor = _main_predictor->GetInputTensor("image");
im_tensor->Reshape({ batch_size, channels,
if (!_model_config._use_pr) {
im_tensor->Reshape({ batch_size, channels,
eval_height, eval_width });
} else {
im_tensor->Reshape({ batch_size, eval_height,
eval_width, channels});
}
im_tensor->copy_from_cpu(input_buffer.data());
auto t1 = std::chrono::high_resolution_clock::now();
......@@ -292,7 +325,6 @@ namespace PaddleSolution {
auto output_names = _main_predictor->GetOutputNames();
auto output_t = _main_predictor->GetOutputTensor(
output_names[0]);
std::vector<float> out_data;
std::vector<int> output_shape = output_t->shape();
int out_num = 1;
......@@ -303,6 +335,30 @@ namespace PaddleSolution {
}
std::cout << ")" << std::endl;
if (_model_config._use_pr) {
std::vector<int64_t> out_data;
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
std::vector<uchar> mask_data;
mask_data.resize(out_num);
auto addr = reinterpret_cast<int64_t*>(out_data.data());
for (int r = 0; r < out_num; ++r) {
mask_data[r] = (int)(addr[r]);
}
for (int r = 0; r < batch_size; ++r) {
cv::Mat mask_png = cv::Mat(eval_height, eval_width, CV_8UC1);
mask_png.data = mask_data.data() + eval_height*eval_width*r;
auto name = imgs_batch[r];
auto pos = name.rfind(".");
name[pos] = '_';
std::string mask_save_name = name + "_mask.png";
cv::imwrite(mask_save_name, mask_png);
}
continue;
}
std::vector<float> out_data;
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
for (int i = 0; i < batch_size; ++i) {
......
......@@ -40,14 +40,18 @@ namespace PaddleSolution {
LOG(ERROR) << "Only support rgb(gray) and rgba image.";
return false;
}
cv::Size resize_size(_config->_resize[0], _config->_resize[1]);
int rw = resize_size.width;
int rh = resize_size.height;
if (*ori_h != rh || *ori_w != rw) {
cv::resize(im, im, resize_size, 0, 0, cv::INTER_LINEAR);
}
utils::normalize(im, data, _config->_mean, _config->_std);
if (!_config->_use_pr) {
utils::normalize(im, data, _config->_mean, _config->_std);
} else {
utils::flatten_mat(im, data);
}
return true;
}
......
......@@ -25,6 +25,7 @@ class PaddleSegModelConfigPaser {
:_class_num(0),
_channels(0),
_use_gpu(0),
_use_pr(0),
_batch_size(1),
_model_file_name("__model__"),
_param_file_name("__params__") {
......@@ -40,6 +41,7 @@ class PaddleSegModelConfigPaser {
_class_num = 0;
_channels = 0;
_use_gpu = 0;
_use_pr = 0;
_batch_size = 1;
_model_file_name.clear();
_model_path.clear();
......@@ -172,6 +174,12 @@ class PaddleSegModelConfigPaser {
std::cerr << "Please set CHANNELS: x" << std::endl;
return false;
}
// 15. use_pr
if (config["DEPLOY"]["USE_PR"].IsDefined()) {
_use_pr = config["DEPLOY"]["USE_PR"].as<int>();
} else {
_use_pr = 0;
}
return true;
}
......@@ -238,6 +246,8 @@ class PaddleSegModelConfigPaser {
std::string _predictor_mode;
// DEPLOY.BATCH_SIZE
int _batch_size;
// USE_PR: OP Optimized model
int _use_pr;
};
} // namespace PaddleSolution
......@@ -23,7 +23,8 @@
#include <opencv2/highgui/highgui.hpp>
#ifdef _WIN32
#include <filesystem>
#define GLOG_NO_ABBREVIATED_SEVERITIES
#include <windows.h>
#else
#include <dirent.h>
#include <sys/types.h>
......@@ -67,15 +68,21 @@ namespace utils {
// scan a directory and get all files with input extensions
inline std::vector<std::string> get_directory_images(
const std::string& path, const std::string& exts) {
std::string pattern(path);
pattern.append("\\*");
std::vector<std::string> imgs;
for (const auto& item :
std::experimental::filesystem::directory_iterator(path)) {
auto suffix = item.path().extension().string();
if (exts.find(suffix) != std::string::npos && suffix.size() > 0) {
auto fullname = path_join(path,
item.path().filename().string());
imgs.push_back(item.path().string());
}
WIN32_FIND_DATA data;
HANDLE hFind;
if ((hFind = FindFirstFile(pattern.c_str(), &data)) != INVALID_HANDLE_VALUE) {
do {
auto fname = std::string(data.cFileName);
auto pos = fname.rfind(".");
auto ext = fname.substr(pos + 1);
if (ext.size() > 1 && exts.find(ext) != std::string::npos) {
imgs.push_back(path + "\\" + data.cFileName);
}
} while (FindNextFile(hFind, &data) != 0);
FindClose(hFind);
}
return imgs;
}
......@@ -103,6 +110,25 @@ namespace utils {
}
}
// flatten a cv::mat
inline void flatten_mat(cv::Mat& im, float* data) {
int rh = im.rows;
int rw = im.cols;
int rc = im.channels();
#pragma omp parallel for
for (int h = 0; h < rh; ++h) {
const uchar* ptr = im.ptr<uchar>(h);
int im_index = 0;
int top_index = h * rw * rc;
for (int w = 0; w < rw; ++w) {
for (int c = 0; c < rc; ++c) {
float pixel = static_cast<float>(ptr[im_index++]);
data[top_index++] = pixel;
}
}
}
}
// argmax
inline void argmax(float* out, std::vector<int>& shape,
std::vector<uchar>& mask, std::vector<uchar>& scoremap) {
......
# PaddleSeg 分割模型预测性能测试
## 测试软件环境
- CUDA 9.0
- CUDNN 7.6
- TensorRT-5.1.5
- PaddlePaddle v1.6.1
- Ubuntu 16.04
- GPU: Tesla V100
- CPU:Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
## 测试方法
- 输入采用 1000张RGB图片,batch_size 统一为 1。
- 重复跑多轮,去掉第一轮预热时间,计后续几轮的平均时间:包括数据拷贝到GPU,预测引擎计算时间,预测结果拷贝回CPU 时间。
- 采用Fluid C++预测引擎
- 测试时开启了 FLAGS_cudnn_exhaustive_search=True,使用exhaustive方式搜索卷积计算算法。
- 对于每个模型,同事测试了`OP`优化模型和原生模型的推理速度, 并分别就是否开启`FP16``FP32`的进行了测试
## 推理速度测试数据
**说明**`OP优化模型`指的是`PaddleSeg 0.3.0`版以后导出的新版模型,把图像的预处理和后处理部分放入 GPU 中进行加速,提高性能。每个模型包含了三种`eval_crop_size``192x192`/`512x512`/`768x768`
<table width="1440">
<tbody>
<tr>
<td rowspan="2" width="432">
<p>模型</p>
</td>
<td colspan="3" width="535">
<p>原始模型&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(单位 ms/image)</p>
</td>
<td colspan="3" width="588">
<p>OP 优化模型&nbsp;&nbsp;&nbsp;&nbsp;(单位 ms/image)</p>
</td>
</tr>
<tr>
<td>
<p>Fluid</p>
</td>
<td>
<p>Fluid-TRT FP32</p>
</td>
<td>
<p>Fluid-TRT FP16</p>
</td>
<td>
<p>Fluid</p>
</td>
<td>
<p>Fluid-TRT FP32</p>
</td>
<td>
<p>Fluid-TRT FP16</p>
</td>
</tr>
<tr>
<td>
<p>deeplabv3p_mobilenetv2-1-0_bn_192x192</p>
</td>
<td>
<p>4.717</p>
</td>
<td>
<p>3.085</p>
</td>
<td>
<p>2.607</p>
</td>
<td>
<p>3.705</p>
</td>
<td>
<p>2.09</p>
</td>
<td>
<p>1.775</p>
</td>
</tr>
<tr>
<td>
<p>deeplabv3p_mobilenetv2-1-0_bn_512x512</p>
</td>
<td>
<p>15.848</p>
</td>
<td>
<p>14.243</p>
</td>
<td>
<p>13.699</p>
</td>
<td>
<p>8.284</p>
</td>
<td>
<p>6.972</p>
</td>
<td>
<p>6.013</p>
</td>
</tr>
<tr>
<td>
<p>deeplabv3p_mobilenetv2-1-0_bn_768x768</p>
</td>
<td>
<p>63.148</p>
</td>
<td>
<p>61.133</p>
</td>
<td>
<p>59.262</p>
</td>
<td>
<p>16.242</p>
</td>
<td>
<p>13.624</p>
</td>
<td>
<p>12.018</p>
</td>
</tr>
<tr>
<td>
<p>deeplabv3p_xception65_bn_192x192</p>
</td>
<td>
<p>9.703</p>
</td>
<td>
<p>9.393</p>
</td>
<td>
<p>6.46</p>
</td>
<td>
<p>8.555</p>
</td>
<td>
<p>8.202</p>
</td>
<td>
<p>5.15</p>
</td>
</tr>
<tr>
<td>
<p>deeplabv3p_xception65_bn_512x512</p>
</td>
<td>
<p>30.944</p>
</td>
<td>
<p>30.031</p>
</td>
<td>
<p>20.716</p>
</td>
<td>
<p>23.571</p>
</td>
<td>
<p>22.601</p>
</td>
<td>
<p>13.327</p>
</td>
</tr>
<tr>
<td>
<p>deeplabv3p_xception65_bn_768x768</p>
</td>
<td>
<p>92.109</p>
</td>
<td>
<p>89.338</p>
</td>
<td>
<p>43.342</p>
</td>
<td>
<p>44.341</p>
</td>
<td>
<p>41.945</p>
</td>
<td>
<p>25.486</p>
</td>
</tr>
<tr>
<td>
<p>icnet_bn_192x192</p>
</td>
<td>
<p>5.706</p>
</td>
<td>
<p>5.057</p>
</td>
<td>
<p>4.515</p>
</td>
<td>
<p>4.694</p>
</td>
<td>
<p>4.066</p>
</td>
<td>
<p>3.369</p>
</td>
</tr>
<tr>
<td>
<p>icnet_bn_512x512</p>
</td>
<td>
<p>18.326</p>
</td>
<td>
<p>16.971</p>
</td>
<td>
<p>16.663</p>
</td>
<td>
<p>10.576</p>
</td>
<td>
<p>9.779</p>
</td>
<td>
<p>9.389</p>
</td>
</tr>
<tr>
<td>
<p>icnet_bn_768x768</p>
</td>
<td>
<p>67.542</p>
</td>
<td>
<p>65.436</p>
</td>
<td>
<p>64.197</p>
</td>
<td>
<p>18.464</p>
</td>
<td>
<p>17.881</p>
</td>
<td>
<p>16.958</p>
</td>
</tr>
<tr>
<td>
<p>pspnet101_bn_192x192</p>
</td>
<td>
<p>20.978</p>
</td>
<td>
<p>18.089</p>
</td>
<td>
<p>11.946</p>
</td>
<td>
<p>20.102</p>
</td>
<td>
<p>17.128</p>
</td>
<td>
<p>11.011</p>
</td>
</tr>
<tr>
<td>
<p>pspnet101_bn_512x512</p>
</td>
<td>
<p>72.085</p>
</td>
<td>
<p>71.114</p>
</td>
<td>
<p>43.009</p>
</td>
<td>
<p>64.584</p>
</td>
<td>
<p>63.715</p>
</td>
<td>
<p>35.806</p>
</td>
</tr>
<tr>
<td>
<p>pspnet101_bn_768x768</p>
</td>
<td>
<p>160.552</p>
</td>
<td>
<p>157.791</p>
</td>
<td>
<p>110.544</p>
</td>
<td>
<p>111.996</p>
</td>
<td>
<p>111.22</p>
</td>
<td>
<p>69.646</p>
</td>
</tr>
<tr>
<td>
<p>pspnet50_bn_192x192</p>
</td>
<td>
<p>13.854</p>
</td>
<td>
<p>12.491</p>
</td>
<td>
<p>9.357</p>
</td>
<td>
<p>12.889</p>
</td>
<td>
<p>11.479</p>
</td>
<td>
<p>8.516</p>
</td>
</tr>
<tr>
<td>
<p>pspnet50_bn_512x512</p>
</td>
<td>
<p>55.868</p>
</td>
<td>
<p>55.205</p>
</td>
<td>
<p>39.659</p>
</td>
<td>
<p>48.647</p>
</td>
<td>
<p>48.076</p>
</td>
<td>
<p>32.403</p>
</td>
</tr>
<tr>
<td>
<p>pspnet50_bn_768x768</p>
</td>
<td>
<p>135.268</p>
</td>
<td>
<p>131.268</p>
</td>
<td>
<p>109.732</p>
</td>
<td>
<p>85.167</p>
</td>
<td>
<p>84.615</p>
</td>
<td>
<p>65.483</p>
</td>
</tr>
<tr>
<td>
<p>unet_bn_coco_192x192</p>
</td>
<td>
<p>7.557</p>
</td>
<td>
<p>7.979</p>
</td>
<td>
<p>8.049</p>
</td>
<td>
<p>4.933</p>
</td>
<td>
<p>4.952</p>
</td>
<td>
<p>4.959</p>
</td>
</tr>
<tr>
<td>
<p>unet_bn_coco_512x512</p>
</td>
<td>
<p>37.131</p>
</td>
<td>
<p>36.668</p>
</td>
<td>
<p>36.706</p>
</td>
<td>
<p>26.857</p>
</td>
<td>
<p>26.917</p>
</td>
<td>
<p>26.928</p>
</td>
</tr>
<tr>
<td>
<p>unet_bn_coco_768x768</p>
</td>
<td>
<p>110.578</p>
</td>
<td>
<p>110.031</p>
</td>
<td>
<p>109.979</p>
</td>
<td>
<p>59.118</p>
</td>
<td>
<p>59.173</p>
</td>
<td>
<p>59.124</p>
</td>
</tr>
</tbody>
</table>
<p>&nbsp;</p>
## 数据分析
### 1. 新版OP优化模型的加速效果
下图是`PaddleSeg 0.3.0`进行OP优化的模型和原模型的性能数据对比(以512x512 为例):
![OP加速对比](https://paddleseg.bj.bcebos.com/inference/benchmark/op_opt_512x512.png)
`分析`
- 优化模型的加速效果在各模型上都很明显,最高优化效果可达100%
- 模型的 `eval_crop_size`越大,加速效果越明显
### 2. 使用 TensorRT 开启 FP16 和 FP32 优化效果分析
在原始模型上的加速效果:
![优化模型](https://paddleseg.bj.bcebos.com/inference/benchmark/trt_opt_origin_512x512.png)
在优化模型上的加速效果:
![原始模型](https://paddleseg.bj.bcebos.com/inference/benchmark/trt_opt_new_512x512.png)
`分析`
- unet和icnet模型,使用Fluid-TensorRT的加速效果不明显,甚至没有加速。
- deeplabv3p_mobilenetv2模型,Fluid-TensorRT在原生模型的加速效果不明显,仅3%-5%的加速效果。在优化模型的加速效果可以达到20%。
- `deeplabv3_xception``pspnet50``pspnet101`模型,`fp16`加速效果很明显,在`768x768` 的size下加速效果最高可达110%。
### 3. 不同的EVAL_CROP_SIZE对图片想能的影响
`deeplabv3p_xception`上的数据对比图:
![xception](https://paddleseg.bj.bcebos.com/inference/benchmark/xception.png)
`deeplabv3p_mobilenet`上的数据对比图:
![xception](https://paddleseg.bj.bcebos.com/inference/benchmark/mobilenet.png)
`unet`上的测试数据对比图:
![xception](https://paddleseg.bj.bcebos.com/inference/benchmark/unet.png)
`icnet`上的测试数据对比图:
![xception](https://paddleseg.bj.bcebos.com/inference/benchmark/unet.png)
`pspnet101`上的测试数据对比图:
![xception](https://paddleseg.bj.bcebos.com/inference/benchmark/pspnet101.png)
`pspnet50`上的测试数据对比图:
![xception](https://paddleseg.bj.bcebos.com/inference/benchmark/pspnet50.png)
`分析`
- 对于同一模型,`eval_crop_size`越大,推理速度越慢
- 同一模型,不管是 TensorRT 优化还是 OP 优化,`eval_crop_size`越大效果越明显
......@@ -33,12 +33,22 @@ gflags.DEFINE_boolean("use_pr", default=False, help="Use optimized model")
gflags.DEFINE_string("trt_mode", default="", help="Use optimized model")
gflags.FLAGS = gflags.FLAGS
# ColorMap for visualization
color_map = [[128, 64, 128], [244, 35, 231], [69, 69, 69], [102, 102, 156],
[190, 153, 153], [153, 153, 153], [250, 170, 29], [219, 219, 0],
[106, 142, 35], [152, 250, 152], [69, 129, 180], [219, 19, 60],
[255, 0, 0], [0, 0, 142], [0, 0, 69], [0, 60, 100], [0, 79, 100],
[0, 0, 230], [119, 10, 32]]
# Generate ColorMap for visualization
def generate_colormap(num_classes):
color_map = num_classes * [0, 0, 0]
for i in range(0, num_classes):
j = 0
lab = i
while lab:
color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
j += 1
lab >>= 3
color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
return color_map
# Paddle-TRT Precision Map
trt_precision_map = {
......@@ -47,6 +57,7 @@ trt_precision_map = {
"fp16": fluid.core.AnalysisConfig.Precision.Half
}
# scan a directory and get all images with support extensions
def get_images_from_dir(img_dir, support_ext=".jpg|.jpeg"):
if (not os.path.exists(img_dir) or not os.path.isdir(img_dir)):
......@@ -59,6 +70,7 @@ def get_images_from_dir(img_dir, support_ext=".jpg|.jpeg"):
imgs.append(item_path)
return imgs
# Deploy Configuration File Parser
class DeployConfig:
def __init__(self, conf_file):
......@@ -69,7 +81,8 @@ class DeployConfig:
configs = yaml.load(fp, Loader=yaml.FullLoader)
deploy_conf = configs["DEPLOY"]
# 1. get eval_crop_size
self.eval_crop_size = ast.literal_eval(deploy_conf["EVAL_CROP_SIZE"])
self.eval_crop_size = ast.literal_eval(
deploy_conf["EVAL_CROP_SIZE"])
# 2. get mean
self.mean = deploy_conf["MEAN"]
# 3. get std
......@@ -77,10 +90,10 @@ class DeployConfig:
# 4. get class_num
self.class_num = deploy_conf["NUM_CLASSES"]
# 5. get paddle model and params file path
self.model_file = os.path.join(
deploy_conf["MODEL_PATH"], deploy_conf["MODEL_FILENAME"])
self.param_file = os.path.join(
deploy_conf["MODEL_PATH"], deploy_conf["PARAMS_FILENAME"])
self.model_file = os.path.join(deploy_conf["MODEL_PATH"],
deploy_conf["MODEL_FILENAME"])
self.param_file = os.path.join(deploy_conf["MODEL_PATH"],
deploy_conf["PARAMS_FILENAME"])
# 6. use_gpu
self.use_gpu = deploy_conf["USE_GPU"]
# 7. predictor_mode
......@@ -90,6 +103,7 @@ class DeployConfig:
# 9. channels
self.channels = deploy_conf["CHANNELS"]
class ImageReader:
def __init__(self, configs):
self.config = configs
......@@ -125,7 +139,7 @@ class ImageReader:
im = im[:, :, :].astype('float32') / 255.0
im -= im_mean
im /= im_std
im = im[np.newaxis,:,:,:]
im = im[np.newaxis, :, :, :]
info = [image_path, im, (ori_w, ori_h)]
return info
......@@ -133,12 +147,15 @@ class ImageReader:
def process(self, imgs, use_pr=False):
imgs_data = []
with ThreadPoolExecutor(max_workers=self.config.batch_size) as exec:
tasks = [exec.submit(self.process_worker, imgs, idx, use_pr)
for idx in range(len(imgs))]
tasks = [
exec.submit(self.process_worker, imgs, idx, use_pr)
for idx in range(len(imgs))
]
for task in as_completed(tasks):
imgs_data.append(task.result())
return imgs_data
class Predictor:
def __init__(self, conf_file):
self.config = DeployConfig(conf_file)
......@@ -147,7 +164,7 @@ class Predictor:
predictor_config = fluid.core.NativeConfig()
predictor_config.prog_file = self.config.model_file
predictor_config.param_file = self.config.param_file
predictor_config.use_gpu = config.use_gpu
predictor_config.use_gpu = self.config.use_gpu
predictor_config.device = 0
predictor_config.fraction_of_gpu_memory = 0
elif self.config.predictor_mode == "ANALYSIS":
......@@ -160,7 +177,7 @@ class Predictor:
precision_type = trt_precision_map[gflags.FLAGS.trt_mode]
use_calib = (gflags.FLAGS.trt_mode == "int8")
predictor_config.enable_tensorrt_engine(
workspace_size=1<<30,
workspace_size=1 << 30,
max_batch_size=self.config.batch_size,
min_subgraph_size=40,
precision_mode=precision_type,
......@@ -176,15 +193,15 @@ class Predictor:
im_tensor = fluid.core.PaddleTensor()
im_tensor.name = "image"
if not use_pr:
im_tensor.shape = [batch_size,
self.config.channels,
self.config.eval_crop_size[1],
self.config.eval_crop_size[0]]
im_tensor.shape = [
batch_size, self.config.channels, self.config.eval_crop_size[1],
self.config.eval_crop_size[0]
]
else:
im_tensor.shape = [batch_size,
self.config.eval_crop_size[1],
self.config.eval_crop_size[0],
self.config.channels]
im_tensor.shape = [
batch_size, self.config.eval_crop_size[1],
self.config.eval_crop_size[0], self.config.channels
]
im_tensor.dtype = fluid.core.PaddleDType.FLOAT32
im_tensor.data = fluid.core.PaddleBuf(inputs.ravel().astype("float32"))
return [im_tensor]
......@@ -202,6 +219,7 @@ class Predictor:
score_png = mask_png[:, :, np.newaxis]
score_png = np.concatenate([score_png] * 3, axis=2)
# visualization score png
color_map = generate_colormap(self.config.class_num)
for i in range(score_png.shape[0]):
for j in range(score_png.shape[1]):
score_png[i, j] = color_map[score_png[i, j, 0]]
......@@ -216,8 +234,12 @@ class Predictor:
vis_result_name = img_name_fix + "_result.png"
result_png = score_png
# if not use_pr:
result_png = cv2.resize(result_png, ori_shape, fx=0, fy=0,
interpolation=cv2.INTER_CUBIC)
result_png = cv2.resize(
result_png,
ori_shape,
fx=0,
fy=0,
interpolation=cv2.INTER_CUBIC)
cv2.imwrite(vis_result_name, result_png, [cv2.CV_8UC1])
print("save result of [" + img_name + "] done.")
......@@ -239,7 +261,8 @@ class Predictor:
if i + batch_size >= len(images):
real_batch_size = len(images) - i
reader_start = time.time()
img_datas = self.image_reader.process(images[i: i + real_batch_size])
img_datas = self.image_reader.process(images[i:i + real_batch_size],
gflags.FLAGS.use_pr)
input_data = np.concatenate([item[1] for item in img_datas])
input_data = self.create_tensor(
input_data, real_batch_size, use_pr=gflags.FLAGS.use_pr)
......@@ -247,27 +270,29 @@ class Predictor:
infer_start = time.time()
output_data = self.predictor.run(input_data)[0]
infer_end = time.time()
reader_time += (reader_end - reader_start)
infer_time += (infer_end - infer_start)
output_data = output_data.as_ndarray()
post_start = time.time()
self.output_result(img_datas, output_data, gflags.FLAGS.use_pr)
post_end = time.time()
reader_time += (reader_end - reader_start)
infer_time += (infer_end - infer_start)
post_time += (post_end - post_start)
# finishing process all images
total_end = time.time()
# compute whole processing time
total_runtime = (total_end - total_start)
print("images_num=[%d],preprocessing_time=[%f],infer_time=[%f],postprocessing_time=[%f],total_runtime=[%f]"
% (len(images), reader_time, infer_time, post_time, total_runtime))
print(
"images_num=[%d],preprocessing_time=[%f],infer_time=[%f],postprocessing_time=[%f],total_runtime=[%f]"
% (len(images), reader_time, infer_time, post_time, total_runtime))
def run(deploy_conf, imgs_dir, support_extensions=".jpg|.jpeg"):
# 1. scan and get all images with valid extensions in directory imgs_dir
imgs = get_images_from_dir(imgs_dir)
if len(imgs) == 0:
print("No Image (with extensions : %s) found in [%s]"
% (support_extensions, imgs_dir))
print("No Image (with extensions : %s) found in [%s]" %
(support_extensions, imgs_dir))
return -1
# 2. create a predictor
seg_predictor = Predictor(deploy_conf)
......@@ -275,17 +300,19 @@ def run(deploy_conf, imgs_dir, support_extensions=".jpg|.jpeg"):
seg_predictor.predict(imgs)
return 0
if __name__ == "__main__":
# 0. parse the arguments
gflags.FLAGS(sys.argv)
if (gflags.FLAGS.conf == "" or gflags.FLAGS.input_dir == ""):
print("Usage: python infer.py --conf=/config/path/to/your/model "
+"--input_dir=/directory/of/your/input/images [--use_pr=True]")
print("Usage: python infer.py --conf=/config/path/to/your/model " +
"--input_dir=/directory/of/your/input/images [--use_pr=True]")
exit(-1)
# set empty to turn off as default
trt_mode = gflags.FLAGS.trt_mode
if (trt_mode != "" and trt_mode not in trt_precision_map):
print("Invalid trt_mode [%s], only support[int8, fp16, fp32]" % trt_mode)
print(
"Invalid trt_mode [%s], only support[int8, fp16, fp32]" % trt_mode)
exit(-1)
# run inference
run(gflags.FLAGS.conf, gflags.FLAGS.input_dir)
......@@ -52,6 +52,7 @@ def parse_args():
def export_inference_config():
deploy_cfg = '''DEPLOY:
USE_GPU : 1
USE_PR : 1
MODEL_PATH : "%s"
MODEL_FILENAME : "%s"
PARAMS_FILENAME : "%s"
......
......@@ -124,6 +124,56 @@ def sigmoid_to_softmax(logit):
logit = fluid.layers.transpose(logit, [0, 3, 1, 2])
return logit
def export_preprocess(image):
"""导出模型的预处理流程"""
image = fluid.layers.transpose(image, [0, 3, 1, 2])
origin_shape = fluid.layers.shape(image)[-2:]
# 不同AUG_METHOD方法的resize
if cfg.AUG.AUG_METHOD == 'unpadding':
h_fix = cfg.AUG.FIX_RESIZE_SIZE[1]
w_fix = cfg.AUG.FIX_RESIZE_SIZE[0]
image = fluid.layers.resize_bilinear(
image,
out_shape=[h_fix, w_fix],
align_corners=False,
align_mode=0)
elif cfg.AUG.AUG_METHOD == 'rangescaling':
size = cfg.AUG.INF_RESIZE_VALUE
value = fluid.layers.reduce_max(origin_shape)
scale = float(size) / value.astype('float32')
image = fluid.layers.resize_bilinear(
image, scale=scale, align_corners=False, align_mode=0)
# 存储resize后图像shape
valid_shape = fluid.layers.shape(image)[-2:]
# padding到eval_crop_size大小
width = cfg.EVAL_CROP_SIZE[0]
height = cfg.EVAL_CROP_SIZE[1]
pad_target = fluid.layers.assign(
np.array([height, width]).astype('float32'))
up = fluid.layers.assign(np.array([0]).astype('float32'))
down = pad_target[0] - valid_shape[0]
left = up
right = pad_target[1] - valid_shape[1]
paddings = fluid.layers.concat([up, down, left, right])
paddings = fluid.layers.cast(paddings, 'int32')
image = fluid.layers.pad2d(
image, paddings=paddings, pad_value=127.5)
# normalize
mean = np.array(cfg.MEAN).reshape(1, len(cfg.MEAN), 1, 1)
mean = fluid.layers.assign(mean.astype('float32'))
std = np.array(cfg.STD).reshape(1, len(cfg.STD), 1, 1)
std = fluid.layers.assign(std.astype('float32'))
image = (image / 255 - mean) / std
# 使后面的网络能通过类似image.shape获取特征图的shape
image = fluid.layers.reshape(
image, shape=[-1, cfg.DATASET.DATA_DIM, height, width])
return image, valid_shape, origin_shape
def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
if not ModelPhase.is_valid_phase(phase):
......@@ -146,21 +196,11 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
if ModelPhase.is_predict(phase):
origin_image = fluid.layers.data(
name='image',
shape=[-1, 1, 1, cfg.DATASET.DATA_DIM],
shape=[-1, -1, -1, cfg.DATASET.DATA_DIM],
dtype='float32',
append_batch_size=False)
image = fluid.layers.transpose(origin_image, [0, 3, 1, 2])
origin_shape = fluid.layers.shape(image)[-2:]
mean = np.array(cfg.MEAN).reshape(1, len(cfg.MEAN), 1, 1)
mean = fluid.layers.assign(mean.astype('float32'))
std = np.array(cfg.STD).reshape(1, len(cfg.STD), 1, 1)
std = fluid.layers.assign(std.astype('float32'))
image = fluid.layers.resize_bilinear(
image,
out_shape=[height, width],
align_corners=False,
align_mode=0)
image = (image / 255 - mean) / std
image, valid_shape, origin_shape = export_preprocess(origin_image)
else:
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
......@@ -198,7 +238,6 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
raise Exception(
"softmax loss can not combine with dice loss or bce loss"
)
logits = model_func(image, class_num)
# 根据选择的loss函数计算相应的损失函数
......@@ -252,13 +291,17 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
logit = sigmoid_to_softmax(logit)
else:
logit = softmax(logit)
# 获取有效部分
logit = fluid.layers.slice(
logit, axes=[2, 3], starts=[0, 0], ends=valid_shape)
logit = fluid.layers.resize_bilinear(
logit,
out_shape=origin_shape,
align_corners=False,
align_mode=0)
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
logit = fluid.layers.argmax(logit, axis=3)
logit = fluid.layers.argmax(logit, axis=1)
return origin_image, logit
if class_num == 1:
......
......@@ -122,6 +122,12 @@ class SegConfig(dict):
len(self.MODEL.MULTI_LOSS_WEIGHT) != 3:
self.MODEL.MULTI_LOSS_WEIGHT = [1.0, 0.4, 0.16]
if self.AUG.AUG_METHOD not in ['unpadding', 'stepscaling', 'rangescaling']:
raise ValueError(
'AUG.AUG_METHOD config error, only support `unpadding`, `unpadding` and `rangescaling`'
)
def update_from_list(self, config_list):
if len(config_list) % 2 != 0:
raise ValueError(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册