未验证 提交 a7a8a685 编写于 作者: J Jason 提交者: GitHub

Merge branch 'develop' into develop_draw

......@@ -14,3 +14,5 @@
- [模型量化](../docs/deploy/paddlelite/slim/quant.md)
- [模型裁剪](../docs/deploy/paddlelite/slim/prune.md)
- [Android平台](../docs/deploy/paddlelite/android.md)
- [OpenVINO部署](../docs/deploy/openvino/introduction.md)
- [树莓派部署](../docs/deploy/raspberry/Raspberry.md)
\ No newline at end of file
......@@ -29,6 +29,10 @@ using namespace std::chrono; // NOLINT
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_bool(use_trt, false, "Infering with TensorRT");
DEFINE_bool(use_mkl, true, "Infering with MKL");
DEFINE_int32(mkl_thread_num,
omp_get_num_procs(),
"Number of mkl threads");
DEFINE_int32(gpu_id, 0, "GPU card id");
DEFINE_string(key, "", "key of encryption");
DEFINE_string(image, "", "Path of test image file");
......@@ -56,6 +60,8 @@ int main(int argc, char** argv) {
model.Init(FLAGS_model_dir,
FLAGS_use_gpu,
FLAGS_use_trt,
FLAGS_use_mkl,
FLAGS_mkl_thread_num,
FLAGS_gpu_id,
FLAGS_key);
......
......@@ -31,6 +31,10 @@ using namespace std::chrono; // NOLINT
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_bool(use_trt, false, "Infering with TensorRT");
DEFINE_bool(use_mkl, true, "Infering with MKL");
DEFINE_int32(mkl_thread_num,
omp_get_num_procs(),
"Number of mkl threads");
DEFINE_int32(gpu_id, 0, "GPU card id");
DEFINE_string(key, "", "key of encryption");
DEFINE_string(image, "", "Path of test image file");
......@@ -61,6 +65,8 @@ int main(int argc, char** argv) {
model.Init(FLAGS_model_dir,
FLAGS_use_gpu,
FLAGS_use_trt,
FLAGS_use_mkl,
FLAGS_mkl_thread_num,
FLAGS_gpu_id,
FLAGS_key);
int imgs = 1;
......
......@@ -30,6 +30,10 @@ using namespace std::chrono; // NOLINT
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_bool(use_trt, false, "Infering with TensorRT");
DEFINE_bool(use_mkl, true, "Infering with MKL");
DEFINE_int32(mkl_thread_num,
omp_get_num_procs(),
"Number of mkl threads");
DEFINE_int32(gpu_id, 0, "GPU card id");
DEFINE_string(key, "", "key of encryption");
DEFINE_string(image, "", "Path of test image file");
......@@ -58,6 +62,8 @@ int main(int argc, char** argv) {
model.Init(FLAGS_model_dir,
FLAGS_use_gpu,
FLAGS_use_trt,
FLAGS_use_mkl,
FLAGS_mkl_thread_num,
FLAGS_gpu_id,
FLAGS_key);
int imgs = 1;
......
......@@ -35,8 +35,12 @@ using namespace std::chrono; // NOLINT
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_bool(use_trt, false, "Infering with TensorRT");
DEFINE_bool(use_mkl, true, "Infering with MKL");
DEFINE_int32(gpu_id, 0, "GPU card id");
DEFINE_string(key, "", "key of encryption");
DEFINE_int32(mkl_thread_num,
omp_get_num_procs(),
"Number of mkl threads");
DEFINE_bool(use_camera, false, "Infering with Camera");
DEFINE_int32(camera_id, 0, "Camera id");
DEFINE_string(video_path, "", "Path of input video");
......@@ -62,6 +66,8 @@ int main(int argc, char** argv) {
model.Init(FLAGS_model_dir,
FLAGS_use_gpu,
FLAGS_use_trt,
FLAGS_use_mkl,
FLAGS_mkl_thread_num,
FLAGS_gpu_id,
FLAGS_key);
......
......@@ -35,6 +35,7 @@ using namespace std::chrono; // NOLINT
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_bool(use_trt, false, "Infering with TensorRT");
DEFINE_bool(use_mkl, true, "Infering with MKL");
DEFINE_int32(gpu_id, 0, "GPU card id");
DEFINE_bool(use_camera, false, "Infering with Camera");
DEFINE_int32(camera_id, 0, "Camera id");
......@@ -42,6 +43,9 @@ DEFINE_string(video_path, "", "Path of input video");
DEFINE_bool(show_result, false, "show the result of each frame with a window");
DEFINE_bool(save_result, true, "save the result of each frame to a video");
DEFINE_string(key, "", "key of encryption");
DEFINE_int32(mkl_thread_num,
omp_get_num_procs(),
"Number of mkl threads");
DEFINE_string(save_dir, "output", "Path to save visualized image");
DEFINE_double(threshold,
0.5,
......@@ -64,6 +68,8 @@ int main(int argc, char** argv) {
model.Init(FLAGS_model_dir,
FLAGS_use_gpu,
FLAGS_use_trt,
FLAGS_use_mkl,
FLAGS_mkl_thread_num,
FLAGS_gpu_id,
FLAGS_key);
// Open video
......
......@@ -35,8 +35,12 @@ using namespace std::chrono; // NOLINT
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_bool(use_trt, false, "Infering with TensorRT");
DEFINE_bool(use_mkl, true, "Infering with MKL");
DEFINE_int32(gpu_id, 0, "GPU card id");
DEFINE_string(key, "", "key of encryption");
DEFINE_int32(mkl_thread_num,
omp_get_num_procs(),
"Number of mkl threads");
DEFINE_bool(use_camera, false, "Infering with Camera");
DEFINE_int32(camera_id, 0, "Camera id");
DEFINE_string(video_path, "", "Path of input video");
......@@ -62,6 +66,8 @@ int main(int argc, char** argv) {
model.Init(FLAGS_model_dir,
FLAGS_use_gpu,
FLAGS_use_trt,
FLAGS_use_mkl,
FLAGS_mkl_thread_num,
FLAGS_gpu_id,
FLAGS_key);
// Open video
......
......@@ -70,6 +70,8 @@ class Model {
* @param model_dir: the directory which contains model.yml
* @param use_gpu: use gpu or not when infering
* @param use_trt: use Tensor RT or not when infering
* @param use_mkl: use mkl or not when infering
* @param mkl_thread_num: number of threads for mkldnn when infering
* @param gpu_id: the id of gpu when infering with using gpu
* @param key: the key of encryption when using encrypted model
* @param use_ir_optim: use ir optimization when infering
......@@ -77,15 +79,26 @@ class Model {
void Init(const std::string& model_dir,
bool use_gpu = false,
bool use_trt = false,
bool use_mkl = true,
int mkl_thread_num = 4,
int gpu_id = 0,
std::string key = "",
bool use_ir_optim = true) {
create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, use_ir_optim);
create_predictor(
model_dir,
use_gpu,
use_trt,
use_mkl,
mkl_thread_num,
gpu_id,
key,
use_ir_optim);
}
void create_predictor(const std::string& model_dir,
bool use_gpu = false,
bool use_trt = false,
bool use_mkl = true,
int mkl_thread_num = 4,
int gpu_id = 0,
std::string key = "",
bool use_ir_optim = true);
......
......@@ -47,7 +47,7 @@ struct Box {
// confidence score
float score;
std::vector<float> coordinate;
Mask<float> mask;
Mask<int> mask;
};
/*
......
......@@ -21,6 +21,7 @@
#include <unordered_map>
#include <utility>
#include <vector>
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
......@@ -216,8 +217,7 @@ class Padding : public Transform {
}
if (item["im_padding_value"].IsDefined()) {
im_value_ = item["im_padding_value"].as<std::vector<float>>();
}
else {
} else {
im_value_ = {0, 0, 0};
}
}
......
......@@ -11,16 +11,25 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <math.h>
#include <omp.h>
#include <algorithm>
#include <fstream>
#include <cstring>
#include "include/paddlex/paddlex.h"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
namespace PaddleX {
void Model::create_predictor(const std::string& model_dir,
bool use_gpu,
bool use_trt,
bool use_mkl,
int mkl_thread_num,
int gpu_id,
std::string key,
bool use_ir_optim) {
......@@ -40,7 +49,7 @@ void Model::create_predictor(const std::string& model_dir,
}
#endif
if (yaml_input == "") {
// 读取配置文件
// read yaml file
std::ifstream yaml_fin(yaml_file);
yaml_fin.seekg(0, std::ios::end);
size_t yaml_file_size = yaml_fin.tellg();
......@@ -48,7 +57,7 @@ void Model::create_predictor(const std::string& model_dir,
yaml_fin.seekg(0);
yaml_fin.read(&yaml_input[0], yaml_file_size);
}
// 读取配置文件内容
// load yaml file
if (!load_config(yaml_input)) {
std::cerr << "Parse file 'model.yml' failed!" << std::endl;
exit(-1);
......@@ -57,6 +66,10 @@ void Model::create_predictor(const std::string& model_dir,
if (key == "") {
config.SetModel(model_file, params_file);
}
if (use_mkl && name != "HRNet" && name != "DeepLabv3p") {
config.EnableMKLDNN();
config.SetCpuMathLibraryNumThreads(mkl_thread_num);
}
if (use_gpu) {
config.EnableUseGpu(100, gpu_id);
} else {
......@@ -64,13 +77,13 @@ void Model::create_predictor(const std::string& model_dir,
}
config.SwitchUseFeedFetchOps(false);
config.SwitchSpecifyInputNames(true);
// 开启图优化
// enable graph Optim
#if defined(__arm__) || defined(__aarch64__)
config.SwitchIrOptim(false);
#else
config.SwitchIrOptim(use_ir_optim);
#endif
// 开启内存优化
// enable Memory Optim
config.EnableMemoryOptim();
if (use_trt) {
config.EnableTensorRtEngine(
......@@ -108,9 +121,9 @@ bool Model::load_config(const std::string& yaml_input) {
return false;
}
}
// 构建数据处理流
// build data preprocess stream
transforms_.Init(config["Transforms"], to_rgb);
// 读入label list
// read label list
labels.clear();
for (const auto& item : config["_Attributes"]["labels"]) {
int index = labels.size();
......@@ -152,19 +165,19 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
"to function predict()!" << std::endl;
return false;
}
// 处理输入图像
// im preprocess
if (!preprocess(im, &inputs_)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
// 使用加载的模型进行预测
// predict
auto in_tensor = predictor_->GetInputTensor("image");
int h = inputs_.new_im_size_[0];
int w = inputs_.new_im_size_[1];
in_tensor->Reshape({1, 3, h, w});
in_tensor->copy_from_cpu(inputs_.im_data_.data());
predictor_->ZeroCopyRun();
// 取出模型的输出结果
// get result
auto output_names = predictor_->GetOutputNames();
auto output_tensor = predictor_->GetOutputTensor(output_names[0]);
std::vector<int> output_shape = output_tensor->shape();
......@@ -174,7 +187,7 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
}
outputs_.resize(size);
output_tensor->copy_to_cpu(outputs_.data());
// 对模型输出结果进行后处理
// postprocess
auto ptr = std::max_element(std::begin(outputs_), std::end(outputs_));
result->category_id = std::distance(std::begin(outputs_), ptr);
result->score = *ptr;
......@@ -198,12 +211,12 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
return false;
}
inputs_batch_.assign(im_batch.size(), ImageBlob());
// 处理输入图像
// preprocess
if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
// 使用加载的模型进行预测
// predict
int batch_size = im_batch.size();
auto in_tensor = predictor_->GetInputTensor("image");
int h = inputs_batch_[0].new_im_size_[0];
......@@ -218,7 +231,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
in_tensor->copy_from_cpu(inputs_data.data());
// in_tensor->copy_from_cpu(inputs_.im_data_.data());
predictor_->ZeroCopyRun();
// 取出模型的输出结果
// get result
auto output_names = predictor_->GetOutputNames();
auto output_tensor = predictor_->GetOutputTensor(output_names[0]);
std::vector<int> output_shape = output_tensor->shape();
......@@ -228,7 +241,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
}
outputs_.resize(size);
output_tensor->copy_to_cpu(outputs_.data());
// 对模型输出结果进行后处理
// postprocess
(*results).clear();
(*results).resize(batch_size);
int single_batch_size = size / batch_size;
......@@ -258,7 +271,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
return false;
}
// 处理输入图像
// preprocess
if (!preprocess(im, &inputs_)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
......@@ -288,7 +301,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
im_info_tensor->copy_from_cpu(im_info);
im_shape_tensor->copy_from_cpu(im_shape);
}
// 使用加载的模型进行预测
// predict
predictor_->ZeroCopyRun();
std::vector<float> output_box;
......@@ -306,7 +319,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
return true;
}
int num_boxes = size / 6;
// 解析预测框box
// box postprocess
for (int i = 0; i < num_boxes; ++i) {
Box box;
box.category_id = static_cast<int>(round(output_box[i * 6]));
......@@ -321,7 +334,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
box.coordinate = {xmin, ymin, w, h};
result->boxes.push_back(std::move(box));
}
// 实例分割需解析mask
// mask postprocess
if (name == "MaskRCNN") {
std::vector<float> output_mask;
auto output_mask_tensor = predictor_->GetOutputTensor(output_names[1]);
......@@ -337,12 +350,22 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
result->mask_resolution = output_mask_shape[2];
for (int i = 0; i < result->boxes.size(); ++i) {
Box* box = &result->boxes[i];
auto begin_mask =
output_mask.begin() + (i * classes + box->category_id) * mask_pixels;
auto end_mask = begin_mask + mask_pixels;
box->mask.data.assign(begin_mask, end_mask);
box->mask.shape = {static_cast<int>(box->coordinate[2]),
static_cast<int>(box->coordinate[3])};
auto begin_mask =
output_mask.data() + (i * classes + box->category_id) * mask_pixels;
cv::Mat bin_mask(result->mask_resolution,
result->mask_resolution,
CV_32FC1,
begin_mask);
cv::resize(bin_mask,
bin_mask,
cv::Size(box->mask.shape[0], box->mask.shape[1]));
cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
auto mask_int_begin = reinterpret_cast<float*>(bin_mask.data);
auto mask_int_end =
mask_int_begin + box->mask.shape[0] * box->mask.shape[1];
box->mask.data.assign(mask_int_begin, mask_int_end);
}
}
return true;
......@@ -366,12 +389,12 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
inputs_batch_.assign(im_batch.size(), ImageBlob());
int batch_size = im_batch.size();
// 处理输入图像
// preprocess
if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
// 对RCNN类模型做批量padding
// RCNN model padding
if (batch_size > 1) {
if (name == "FasterRCNN" || name == "MaskRCNN") {
int max_h = -1;
......@@ -452,10 +475,10 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
im_info_tensor->copy_from_cpu(im_info.data());
im_shape_tensor->copy_from_cpu(im_shape.data());
}
// 使用加载的模型进行预测
// predict
predictor_->ZeroCopyRun();
// 读取所有box
// get all box
std::vector<float> output_box;
auto output_names = predictor_->GetOutputNames();
auto output_box_tensor = predictor_->GetOutputTensor(output_names[0]);
......@@ -472,7 +495,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
}
auto lod_vector = output_box_tensor->lod();
int num_boxes = size / 6;
// 解析预测框box
// box postprocess
(*results).clear();
(*results).resize(batch_size);
for (int i = 0; i < lod_vector[0].size() - 1; ++i) {
......@@ -492,7 +515,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
}
}
// 实例分割需解析mask
// mask postprocess
if (name == "MaskRCNN") {
std::vector<float> output_mask;
auto output_mask_tensor = predictor_->GetOutputTensor(output_names[1]);
......@@ -509,14 +532,24 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
for (int i = 0; i < lod_vector[0].size() - 1; ++i) {
(*results)[i].mask_resolution = output_mask_shape[2];
for (int j = 0; j < (*results)[i].boxes.size(); ++j) {
Box* box = &(*results)[i].boxes[j];
Box* box = &(*results)[i].boxes[i];
int category_id = box->category_id;
auto begin_mask = output_mask.begin() +
(mask_idx * classes + category_id) * mask_pixels;
auto end_mask = begin_mask + mask_pixels;
box->mask.data.assign(begin_mask, end_mask);
box->mask.shape = {static_cast<int>(box->coordinate[2]),
static_cast<int>(box->coordinate[3])};
auto begin_mask =
output_mask.data() + (i * classes + box->category_id) * mask_pixels;
cv::Mat bin_mask(output_mask_shape[2],
output_mask_shape[2],
CV_32FC1,
begin_mask);
cv::resize(bin_mask,
bin_mask,
cv::Size(box->mask.shape[0], box->mask.shape[1]));
cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
auto mask_int_begin = reinterpret_cast<float*>(bin_mask.data);
auto mask_int_end =
mask_int_begin + box->mask.shape[0] * box->mask.shape[1];
box->mask.data.assign(mask_int_begin, mask_int_end);
mask_idx++;
}
}
......@@ -537,7 +570,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
return false;
}
// 处理输入图像
// preprocess
if (!preprocess(im, &inputs_)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
......@@ -549,10 +582,10 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
im_tensor->Reshape({1, 3, h, w});
im_tensor->copy_from_cpu(inputs_.im_data_.data());
// 使用加载的模型进行预测
// predict
predictor_->ZeroCopyRun();
// 获取预测置信度,经过argmax后的labelmap
// get labelmap
auto output_names = predictor_->GetOutputNames();
auto output_label_tensor = predictor_->GetOutputTensor(output_names[0]);
std::vector<int> output_label_shape = output_label_tensor->shape();
......@@ -565,7 +598,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
result->label_map.data.resize(size);
output_label_tensor->copy_to_cpu(result->label_map.data.data());
// 获取预测置信度scoremap
// get scoremap
auto output_score_tensor = predictor_->GetOutputTensor(output_names[1]);
std::vector<int> output_score_shape = output_score_tensor->shape();
size = 1;
......@@ -577,7 +610,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
result->score_map.data.resize(size);
output_score_tensor->copy_to_cpu(result->score_map.data.data());
// 解析输出结果到原图大小
// get origin image result
std::vector<uint8_t> label_map(result->label_map.data.begin(),
result->label_map.data.end());
cv::Mat mask_label(result->label_map.shape[1],
......@@ -647,7 +680,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
return false;
}
// 处理输入图像
// preprocess
inputs_batch_.assign(im_batch.size(), ImageBlob());
if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
std::cerr << "Preprocess failed!" << std::endl;
......@@ -670,10 +703,10 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
im_tensor->copy_from_cpu(inputs_data.data());
// im_tensor->copy_from_cpu(inputs_.im_data_.data());
// 使用加载的模型进行预测
// predict
predictor_->ZeroCopyRun();
// 获取预测置信度,经过argmax后的labelmap
// get labelmap
auto output_names = predictor_->GetOutputNames();
auto output_label_tensor = predictor_->GetOutputTensor(output_names[0]);
std::vector<int> output_label_shape = output_label_tensor->shape();
......@@ -698,7 +731,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
(*results)[i].label_map.data.data());
}
// 获取预测置信度scoremap
// get scoremap
auto output_score_tensor = predictor_->GetOutputTensor(output_names[1]);
std::vector<int> output_score_shape = output_score_tensor->shape();
size = 1;
......@@ -722,7 +755,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
(*results)[i].score_map.data.data());
}
// 解析输出结果到原图大小
// get origin image result
for (int i = 0; i < batch_size; ++i) {
std::vector<uint8_t> label_map((*results)[i].label_map.data.begin(),
(*results)[i].label_map.data.end());
......
......@@ -12,12 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/paddlex/transforms.h"
#include <math.h>
#include <iostream>
#include <string>
#include <vector>
#include <math.h>
#include "include/paddlex/transforms.h"
namespace PaddleX {
......@@ -195,7 +197,7 @@ std::shared_ptr<Transform> Transforms::CreateTransform(
}
bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
// 按照transforms中预处理算子顺序处理图像
// do all preprocess ops by order
if (to_rgb_) {
cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
}
......@@ -211,8 +213,8 @@ bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
}
}
// 将图像由NHWC转为NCHW格式
// 同时转为连续的内存块存储到ImageBlob
// data format NHWC to NCHW
// img data save to ImageBlob
int h = im->rows;
int w = im->cols;
int c = im->channels();
......
......@@ -47,7 +47,7 @@ cv::Mat Visualize(const cv::Mat& img,
boxes[i].coordinate[2],
boxes[i].coordinate[3]);
// 生成预测框和标题
// draw box and title
std::string text = boxes[i].category;
int c1 = colormap[3 * boxes[i].category_id + 0];
int c2 = colormap[3 * boxes[i].category_id + 1];
......@@ -63,13 +63,13 @@ cv::Mat Visualize(const cv::Mat& img,
origin.x = roi.x;
origin.y = roi.y;
// 生成预测框标题的背景
// background
cv::Rect text_back = cv::Rect(boxes[i].coordinate[0],
boxes[i].coordinate[1] - text_size.height,
text_size.width,
text_size.height);
// 绘图和文字
// draw
cv::rectangle(vis_img, roi, roi_color, 2);
cv::rectangle(vis_img, text_back, roi_color, -1);
cv::putText(vis_img,
......@@ -80,18 +80,16 @@ cv::Mat Visualize(const cv::Mat& img,
cv::Scalar(255, 255, 255),
thickness);
// 生成实例分割mask
// mask
if (boxes[i].mask.data.size() == 0) {
continue;
}
cv::Mat bin_mask(result.mask_resolution,
result.mask_resolution,
std::vector<float> mask_data;
mask_data.assign(boxes[i].mask.data.begin(), boxes[i].mask.data.end());
cv::Mat bin_mask(boxes[i].mask.shape[1],
boxes[i].mask.shape[0],
CV_32FC1,
boxes[i].mask.data.data());
cv::resize(bin_mask,
bin_mask,
cv::Size(boxes[i].mask.shape[0], boxes[i].mask.shape[1]));
cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1);
bin_mask.copyTo(full_mask(roi));
cv::Mat mask_ch[3];
......
......@@ -23,6 +23,7 @@ import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.imgproc.Imgproc;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
......@@ -101,6 +102,15 @@ public class Transforms {
if (info.containsKey("coarsest_stride")) {
padding.coarsest_stride = (int) info.get("coarsest_stride");
}
if (info.containsKey("im_padding_value")) {
List<Double> im_padding_value = (List<Double>) info.get("im_padding_value");
if (im_padding_value.size()!=3){
Log.e(TAG, "len of im_padding_value in padding must == 3.");
}
for (int k =0; i<im_padding_value.size(); i++){
padding.paddding_value[k] = im_padding_value.get(k);
}
}
if (info.containsKey("target_size")) {
if (info.get("target_size") instanceof Integer) {
padding.width = (int) info.get("target_size");
......@@ -124,7 +134,7 @@ public class Transforms {
if(transformsMode.equalsIgnoreCase("RGB")){
Imgproc.cvtColor(inputMat, inputMat, Imgproc.COLOR_BGR2RGB);
}else if(!transformsMode.equalsIgnoreCase("BGR")){
Log.e(TAG, "transformsMode only support RGB or BGR");
Log.e(TAG, "transformsMode only support RGB or BGR.");
}
inputMat.convertTo(inputMat, CvType.CV_32FC(3));
......@@ -136,16 +146,15 @@ public class Transforms {
int h = inputMat.height();
int c = inputMat.channels();
imageBlob.setImageData(new float[w * h * c]);
int[] channelStride = new int[]{w * h, w * h * 2};
for (int y = 0; y < h; y++) {
for (int x = 0;
x < w; x++) {
double[] color = inputMat.get(y, x);
imageBlob.getImageData()[y * w + x] = (float) (color[0]);
imageBlob.getImageData()[y * w + x + channelStride[0]] = (float) (color[1]);
imageBlob.getImageData()[y * w + x + channelStride[1]] = (float) (color[2]);
}
Mat singleChannelMat = new Mat(h, w, CvType.CV_32FC(1));
float[] singleChannelImageData = new float[w * h];
for (int i = 0; i < c; i++) {
Core.extractChannel(inputMat, singleChannelMat, i);
singleChannelMat.get(0, 0, singleChannelImageData);
System.arraycopy(singleChannelImageData ,0, imageBlob.getImageData(),i*w*h, w*h);
}
return imageBlob;
}
......@@ -248,6 +257,7 @@ public class Transforms {
private double width;
private double height;
private double coarsest_stride;
private double[] paddding_value = {0.0, 0.0, 0.0};
public Mat run(Mat inputMat, ImageBlob imageBlob) {
int origin_w = inputMat.width();
......@@ -264,7 +274,7 @@ public class Transforms {
}
imageBlob.setNewImageSize(inputMat.height(),2);
imageBlob.setNewImageSize(inputMat.width(),3);
Core.copyMakeBorder(inputMat, inputMat, 0, (int)padding_h, 0, (int)padding_w, Core.BORDER_CONSTANT, new Scalar(0));
Core.copyMakeBorder(inputMat, inputMat, 0, (int)padding_h, 0, (int)padding_w, Core.BORDER_CONSTANT, new Scalar(paddding_value));
return inputMat;
}
}
......
......@@ -31,8 +31,11 @@ import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.imgproc.Imgproc;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
......@@ -120,13 +123,11 @@ public class Visualize {
int new_w = (int)imageBlob.getNewImageSize()[3];
Mat mask = new Mat(new_h, new_w, CvType.CV_32FC(1));
float[] scoreData = new float[new_h*new_w];
for (int h = 0; h < new_h; h++) {
for (int w = 0; w < new_w; w++){
scoreData[new_h * h + w] = (1-result.getMask().getScoreData()[cutoutClass + h * new_h + w]) * 255;
}
}
System.arraycopy(result.getMask().getScoreData() ,cutoutClass*new_h*new_w, scoreData ,0, new_h*new_w);
mask.put(0,0, scoreData);
Core.multiply(mask, new Scalar(255), mask);
mask.convertTo(mask,CvType.CV_8UC(1));
ListIterator<Map.Entry<String, int[]>> reverseReshapeInfo = new ArrayList<Map.Entry<String, int[]>>(imageBlob.getReshapeInfo().entrySet()).listIterator(imageBlob.getReshapeInfo().size());
while (reverseReshapeInfo.hasPrevious()) {
Map.Entry<String, int[]> entry = reverseReshapeInfo.previous();
......
......@@ -8,7 +8,9 @@ SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
SET(OPENVINO_DIR "" CACHE PATH "Location of libraries")
SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
SET(GFLAGS_DIR "" CACHE PATH "Location of libraries")
SET(GLOG_DIR "" CACHE PATH "Location of libraries")
SET(NGRAPH_LIB "" CACHE PATH "Location of libraries")
SET(ARCH "" CACHE PATH "Location of libraries")
include(cmake/yaml-cpp.cmake)
......@@ -27,6 +29,12 @@ macro(safe_set_static_flag)
endforeach(flag_var)
endmacro()
if(NOT WIN32)
if (NOT DEFINED ARCH OR ${ARCH} STREQUAL "")
message(FATAL_ERROR "please set ARCH with -DARCH=x86 OR armv7")
endif()
endif()
if (NOT DEFINED OPENVINO_DIR OR ${OPENVINO_DIR} STREQUAL "")
message(FATAL_ERROR "please set OPENVINO_DIR with -DOPENVINO_DIR=/path/influence_engine")
endif()
......@@ -39,19 +47,32 @@ if (NOT DEFINED GFLAGS_DIR OR ${GFLAGS_DIR} STREQUAL "")
message(FATAL_ERROR "please set GFLAGS_DIR with -DGFLAGS_DIR=/path/gflags")
endif()
if (NOT DEFINED GLOG_DIR OR ${GLOG_DIR} STREQUAL "")
message(FATAL_ERROR "please set GLOG_DIR with -DLOG_DIR=/path/glog")
endif()
if (NOT DEFINED NGRAPH_LIB OR ${NGRAPH_LIB} STREQUAL "")
message(FATAL_ERROR "please set NGRAPH_DIR with -DNGRAPH_DIR=/path/ngraph")
endif()
include_directories("${OPENVINO_DIR}")
link_directories("${OPENVINO_DIR}/lib")
include_directories("${OPENVINO_DIR}/include")
link_directories("${OPENVINO_DIR}/external/tbb/lib")
include_directories("${OPENVINO_DIR}/external/tbb/include/tbb")
link_directories("${OPENVINO_DIR}/lib")
link_directories("${OPENVINO_DIR}/external/tbb/lib")
if(WIN32)
link_directories("${OPENVINO_DIR}/lib/intel64/Release")
link_directories("${OPENVINO_DIR}/bin/intel64/Release")
endif()
link_directories("${GFLAGS_DIR}/lib")
include_directories("${GFLAGS_DIR}/include")
link_directories("${GLOG_DIR}/lib")
include_directories("${GLOG_DIR}/include")
link_directories("${NGRAPH_LIB}")
link_directories("${NGRAPH_LIB}/lib")
......@@ -79,14 +100,29 @@ else()
set(CMAKE_STATIC_LIBRARY_PREFIX "")
endif()
if(WITH_STATIC_LIB)
if(WIN32)
set(DEPS ${OPENVINO_DIR}/lib/intel64/Release/inference_engine${CMAKE_STATIC_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/Release/inference_engine_legacy${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
if (ARCH STREQUAL "armv7")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv7-a")
if(WITH_STATIC_LIB)
set(DEPS ${OPENVINO_DIR}/lib/armv7l/libinference_engine${CMAKE_STATIC_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/armv7l/libinference_engine_legacy${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
set(DEPS ${OPENVINO_DIR}/lib/armv7l/libinference_engine${CMAKE_SHARED_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/armv7l/libinference_engine_legacy${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
else()
if(WITH_STATIC_LIB)
set(DEPS ${OPENVINO_DIR}/lib/intel64/libinference_engine${CMAKE_STATIC_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/libinference_engine_legacy${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
else()
set(DEPS ${OPENVINO_DIR}/lib/intel64/libinference_engine${CMAKE_SHARED_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/libinference_engine_legacy${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
endif()
endif()
endif(WIN32)
if (NOT WIN32)
set(DEPS ${DEPS}
......@@ -94,7 +130,7 @@ if (NOT WIN32)
)
else()
set(DEPS ${DEPS}
glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt)
glog gflags_static libyaml-cppmt)
set(DEPS ${DEPS} libcmt shlwapi)
endif(NOT WIN32)
......@@ -105,7 +141,14 @@ if (NOT WIN32)
endif()
set(DEPS ${DEPS} ${OpenCV_LIBS})
add_executable(classifier src/classifier.cpp src/transforms.cpp src/paddlex.cpp)
add_executable(classifier demo/classifier.cpp src/transforms.cpp src/paddlex.cpp)
ADD_DEPENDENCIES(classifier ext-yaml-cpp)
target_link_libraries(classifier ${DEPS})
add_executable(segmenter demo/segmenter.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
ADD_DEPENDENCIES(segmenter ext-yaml-cpp)
target_link_libraries(segmenter ${DEPS})
add_executable(detector demo/detector.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
ADD_DEPENDENCIES(detector ext-yaml-cpp)
target_link_libraries(detector ${DEPS})
......@@ -13,12 +13,32 @@
"variables": [
{
"name": "OPENCV_DIR",
"value": "C:/projects/opencv",
"value": "/path/to/opencv",
"type": "PATH"
},
{
"name": "OPENVINO_LIB",
"value": "C:/projetcs/inference_engine",
"name": "OPENVINO_DIR",
"value": "C:/Program Files (x86)/IntelSWTools/openvino/deployment_tools/inference_engine",
"type": "PATH"
},
{
"name": "NGRAPH_LIB",
"value": "C:/Program Files (x86)/IntelSWTools/openvino/deployment_tools/ngraph/lib",
"type": "PATH"
},
{
"name": "GFLAGS_DIR",
"value": "/path/to/gflags",
"type": "PATH"
},
{
"name": "WITH_STATIC_LIB",
"value": "True",
"type": "BOOL"
},
{
"name": "GLOG_DIR",
"value": "/path/to/glog",
"type": "PATH"
}
]
......
find_package(Git REQUIRED)
include(ExternalProject)
......
......@@ -22,7 +22,7 @@
#include "include/paddlex/paddlex.h"
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_string(cfg_dir, "", "Path of inference model");
DEFINE_string(cfg_file, "", "Path of PaddelX model yml file");
DEFINE_string(device, "CPU", "Device name");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(image_list, "", "Path of test image list file");
......@@ -35,8 +35,8 @@ int main(int argc, char** argv) {
std::cerr << "--model_dir need to be defined" << std::endl;
return -1;
}
if (FLAGS_cfg_dir == "") {
std::cerr << "--cfg_dir need to be defined" << std::endl;
if (FLAGS_cfg_file == "") {
std::cerr << "--cfg_file need to be defined" << std::endl;
return -1;
}
if (FLAGS_image == "" & FLAGS_image_list == "") {
......@@ -44,11 +44,11 @@ int main(int argc, char** argv) {
return -1;
}
// 加载模型
// load model
PaddleX::Model model;
model.Init(FLAGS_model_dir, FLAGS_cfg_dir, FLAGS_device);
model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_device);
// 进行预测
// predict
if (FLAGS_image_list != "") {
std::ifstream inf(FLAGS_image_list);
if (!inf) {
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <omp.h>
#include <algorithm>
#include <chrono> // NOLINT
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <utility>
#include "include/paddlex/paddlex.h"
#include "include/paddlex/visualize.h"
using namespace std::chrono; // NOLINT
DEFINE_string(model_dir, "", "Path of openvino model xml file");
DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(image_list, "", "Path of test image list file");
DEFINE_string(device, "CPU", "Device name");
DEFINE_string(save_dir, "", "Path to save visualized image");
DEFINE_int32(batch_size, 1, "Batch size of infering");
DEFINE_double(threshold,
0.5,
"The minimum scores of target boxes which are shown");
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir == "") {
std::cerr << "--model_dir need to be defined" << std::endl;
return -1;
}
if (FLAGS_cfg_file == "") {
std::cerr << "--cfg_file need to be defined" << std::endl;
return -1;
}
if (FLAGS_image == "" & FLAGS_image_list == "") {
std::cerr << "--image or --image_list need to be defined" << std::endl;
return -1;
}
// load model
PaddleX::Model model;
model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_device);
int imgs = 1;
auto colormap = PaddleX::GenerateColorMap(model.labels.size());
// predict
if (FLAGS_image_list != "") {
std::ifstream inf(FLAGS_image_list);
if (!inf) {
std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
return -1;
}
std::string image_path;
while (getline(inf, image_path)) {
PaddleX::DetResult result;
cv::Mat im = cv::imread(image_path, 1);
model.predict(im, &result);
if (FLAGS_save_dir != "") {
cv::Mat vis_img = PaddleX::Visualize(
im, result, model.labels, colormap, FLAGS_threshold);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
cv::imwrite(save_path, vis_img);
std::cout << "Visualized output saved as " << save_path << std::endl;
}
}
} else {
PaddleX::DetResult result;
cv::Mat im = cv::imread(FLAGS_image, 1);
model.predict(im, &result);
for (int i = 0; i < result.boxes.size(); ++i) {
std::cout << "image file: " << FLAGS_image << std::endl;
std::cout << ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score
<< ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0]
<< ", " << result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", "
<< result.boxes[i].coordinate[3] << ")" << std::endl;
}
if (FLAGS_save_dir != "") {
// visualize
cv::Mat vis_img = PaddleX::Visualize(
im, result, model.labels, colormap, FLAGS_threshold);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
cv::imwrite(save_path, vis_img);
result.clear();
std::cout << "Visualized output saved as " << save_path << std::endl;
}
}
return 0;
}
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <utility>
#include "include/paddlex/paddlex.h"
#include "include/paddlex/visualize.h"
DEFINE_string(model_dir, "", "Path of openvino model xml file");
DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(image_list, "", "Path of test image list file");
DEFINE_string(device, "CPU", "Device name");
DEFINE_string(save_dir, "", "Path to save visualized image");
DEFINE_int32(batch_size, 1, "Batch size of infering");
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir == "") {
std::cerr << "--model_dir need to be defined" << std::endl;
return -1;
}
if (FLAGS_cfg_file == "") {
std::cerr << "--cfg_file need to be defined" << std::endl;
return -1;
}
if (FLAGS_image == "" & FLAGS_image_list == "") {
std::cerr << "--image or --image_list need to be defined" << std::endl;
return -1;
}
// load model
PaddleX::Model model;
model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_device);
int imgs = 1;
auto colormap = PaddleX::GenerateColorMap(model.labels.size());
if (FLAGS_image_list != "") {
std::ifstream inf(FLAGS_image_list);
if (!inf) {
std::cerr << "Fail to open file " << FLAGS_image_list <<std::endl;
return -1;
}
std::string image_path;
while (getline(inf, image_path)) {
PaddleX::SegResult result;
cv::Mat im = cv::imread(image_path, 1);
model.predict(im, &result);
if (FLAGS_save_dir != "") {
cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, image_path);
cv::imwrite(save_path, vis_img);
std::cout << "Visualized output saved as " << save_path << std::endl;
}
}
} else {
PaddleX::SegResult result;
cv::Mat im = cv::imread(FLAGS_image, 1);
model.predict(im, &result);
if (FLAGS_save_dir != "") {
cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
cv::imwrite(save_path, vis_img);
std::cout << "Visualized` output saved as " << save_path << std::endl;
}
result.clear();
}
return 0;
}
......@@ -54,4 +54,4 @@ class ConfigPaser {
YAML::Node Transforms_;
};
} // namespace PaddleDetection
} // namespace PaddleX
......@@ -17,6 +17,8 @@
#include <functional>
#include <iostream>
#include <numeric>
#include <map>
#include <string>
#include "yaml-cpp/yaml.h"
......@@ -30,35 +32,40 @@
#include "include/paddlex/config_parser.h"
#include "include/paddlex/results.h"
#include "include/paddlex/transforms.h"
using namespace InferenceEngine;
namespace PaddleX {
class Model {
public:
void Init(const std::string& model_dir,
const std::string& cfg_dir,
const std::string& cfg_file,
std::string device) {
create_predictor(model_dir, cfg_dir, device);
create_predictor(model_dir, cfg_file, device);
}
void create_predictor(const std::string& model_dir,
const std::string& cfg_dir,
const std::string& cfg_file,
std::string device);
bool load_config(const std::string& model_dir);
bool preprocess(cv::Mat* input_im);
bool preprocess(cv::Mat* input_im, ImageBlob* inputs);
bool predict(const cv::Mat& im, ClsResult* result);
bool predict(const cv::Mat& im, DetResult* result);
bool predict(const cv::Mat& im, SegResult* result);
std::string type;
std::string name;
std::vector<std::string> labels;
std::map<int, std::string> labels;
Transforms transforms_;
Blob::Ptr inputs_;
Blob::Ptr output_;
CNNNetwork network_;
ExecutableNetwork executable_network_;
ImageBlob inputs_;
InferenceEngine::Blob::Ptr output_;
InferenceEngine::CNNNetwork network_;
InferenceEngine::ExecutableNetwork executable_network_;
};
} // namespce of PaddleX
} // namespace PaddleX
......@@ -61,11 +61,11 @@ class DetResult : public BaseResult {
class SegResult : public BaseResult {
public:
Mask<int64_t> label_map;
Mask<int> label_map;
Mask<float> score_map;
void clear() {
label_map.clear();
score_map.clear();
}
};
} // namespce of PaddleX
} // namespace PaddleX
......@@ -16,26 +16,54 @@
#include <yaml-cpp/yaml.h>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <memory>
#include <string>
#include <vector>
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <inference_engine.hpp>
using namespace InferenceEngine;
namespace PaddleX {
/*
* @brief
* This class represents object for storing all preprocessed data
* */
class ImageBlob {
public:
// Original image height and width
InferenceEngine::Blob::Ptr ori_im_size_;
// Newest image height and width after process
std::vector<int> new_im_size_ = std::vector<int>(2);
// Image height and width before resize
std::vector<std::vector<int>> im_size_before_resize_;
// Reshape order
std::vector<std::string> reshape_order_;
// Resize scale
float scale = 1.0;
// Buffer for image data after preprocessing
InferenceEngine::Blob::Ptr blob;
void clear() {
im_size_before_resize_.clear();
reshape_order_.clear();
}
};
// Abstraction of preprocessing opration class
class Transform {
public:
virtual void Init(const YAML::Node& item) = 0;
virtual bool Run(cv::Mat* im) = 0;
virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
};
class Normalize : public Transform {
......@@ -45,7 +73,7 @@ class Normalize : public Transform {
std_ = item["std"].as<std::vector<float>>();
}
virtual bool Run(cv::Mat* im);
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
std::vector<float> mean_;
......@@ -61,8 +89,8 @@ class ResizeByShort : public Transform {
} else {
max_size_ = -1;
}
};
virtual bool Run(cv::Mat* im);
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
float GenerateScale(const cv::Mat& im);
......@@ -70,6 +98,55 @@ class ResizeByShort : public Transform {
int max_size_;
};
/*
* @brief
* This class execute resize by long operation on image matrix. At first, it resizes
* the long side of image matrix to specified length. Accordingly, the short side
* will be resized in the same proportion.
* */
class ResizeByLong : public Transform {
public:
virtual void Init(const YAML::Node& item) {
long_size_ = item["long_size"].as<int>();
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int long_size_;
};
/*
* @brief
* This class execute resize operation on image matrix. It resizes width and height
* to specified length.
* */
class Resize : public Transform {
public:
virtual void Init(const YAML::Node& item) {
if (item["interp"].IsDefined()) {
interp_ = item["interp"].as<std::string>();
}
if (item["target_size"].IsScalar()) {
height_ = item["target_size"].as<int>();
width_ = item["target_size"].as<int>();
} else if (item["target_size"].IsSequence()) {
std::vector<int> target_size = item["target_size"].as<std::vector<int>>();
width_ = target_size[0];
height_ = target_size[1];
}
if (height_ <= 0 || width_ <= 0) {
std::cerr << "[Resize] target_size should greater than 0" << std::endl;
exit(-1);
}
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int height_;
int width_;
std::string interp_;
};
class CenterCrop : public Transform {
public:
......@@ -83,22 +160,65 @@ class CenterCrop : public Transform {
height_ = crop_size[1];
}
}
virtual bool Run(cv::Mat* im);
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int height_;
int width_;
};
/*
* @brief
* This class execute padding operation on image matrix. It makes border on edge
* of image matrix.
* */
class Padding : public Transform {
public:
virtual void Init(const YAML::Node& item) {
if (item["coarsest_stride"].IsDefined()) {
coarsest_stride_ = item["coarsest_stride"].as<int>();
if (coarsest_stride_ < 1) {
std::cerr << "[Padding] coarest_stride should greater than 0"
<< std::endl;
exit(-1);
}
}
if (item["target_size"].IsDefined()) {
if (item["target_size"].IsScalar()) {
width_ = item["target_size"].as<int>();
height_ = item["target_size"].as<int>();
} else if (item["target_size"].IsSequence()) {
width_ = item["target_size"].as<std::vector<int>>()[0];
height_ = item["target_size"].as<std::vector<int>>()[1];
}
}
if (item["im_padding_value"].IsDefined()) {
im_value_ = item["im_padding_value"].as<std::vector<float>>();
} else {
im_value_ = {0, 0, 0};
}
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int coarsest_stride_ = -1;
int width_ = 0;
int height_ = 0;
std::vector<float> im_value_;
};
class Transforms {
public:
void Init(const YAML::Node& node, bool to_rgb = true);
void Init(const YAML::Node& node, std::string type, bool to_rgb = true);
std::shared_ptr<Transform> CreateTransform(const std::string& name);
bool Run(cv::Mat* im, Blob::Ptr blob);
bool Run(cv::Mat* im, ImageBlob* data);
private:
std::vector<std::shared_ptr<Transform>> transforms_;
bool to_rgb_ = true;
std::string type_;
};
} // namespace PaddleX
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <map>
#include <vector>
#ifdef _WIN32
#include <direct.h>
#include <io.h>
#else // Linux/Unix
#include <dirent.h>
#include <sys/io.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#endif
#include <string>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "include/paddlex/results.h"
#ifdef _WIN32
#define OS_PATH_SEP "\\"
#else
#define OS_PATH_SEP "/"
#endif
namespace PaddleX {
/*
* @brief
* Generate visualization colormap for each class
*
* @param number of class
* @return color map, the size of vector is 3 * num_class
* */
std::vector<int> GenerateColorMap(int num_class);
/*
* @brief
* Visualize the detection result
*
* @param img: initial image matrix
* @param results: the detection result
* @param labels: label map
* @param colormap: visualization color map
* @return visualized image matrix
* */
cv::Mat Visualize(const cv::Mat& img,
const DetResult& results,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap,
float threshold = 0.5);
/*
* @brief
* Visualize the segmentation result
*
* @param img: initial image matrix
* @param results: the detection result
* @param labels: label map
* @param colormap: visualization color map
* @return visualized image matrix
* */
cv::Mat Visualize(const cv::Mat& img,
const SegResult& result,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap);
/*
* @brief
* generate save path for visualized image matrix
*
* @param save_dir: directory for saving visualized image matrix
* @param file_path: sourcen image file path
* @return path of saving visualized result
* */
std::string generate_save_path(const std::string& save_dir,
const std::string& file_path);
} // namespace PaddleX
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from six import text_type as _text_type
import argparse
import sys
from utils import logging
import paddlex as pdx
def arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_dir",
"-m",
type=_text_type,
default=None,
help="define model directory path")
parser.add_argument(
"--save_dir",
"-s",
type=_text_type,
default=None,
help="path to save inference model")
parser.add_argument(
"--fixed_input_shape",
"-fs",
default=None,
help="export openvino model with input shape:[w,h]")
parser.add_argument(
"--data_type",
"-dp",
default="FP32",
help="option, FP32 or FP16, the data_type of openvino IR")
return parser
def export_openvino_model(model, args):
if model.model_type == "detector" or model.__class__.__name__ == "FastSCNN":
logging.error(
"Only image classifier models and semantic segmentation models(except FastSCNN) are supported to export to openvino")
try:
import x2paddle
if x2paddle.__version__ < '0.7.4':
logging.error("You need to upgrade x2paddle >= 0.7.4")
except:
logging.error(
"You need to install x2paddle first, pip install x2paddle>=0.7.4")
import x2paddle.convert as x2pc
x2pc.paddle2onnx(args.model_dir, args.save_dir)
import mo.main as mo
from mo.utils.cli_parser import get_onnx_cli_parser
onnx_parser = get_onnx_cli_parser()
onnx_parser.add_argument("--model_dir",type=_text_type)
onnx_parser.add_argument("--save_dir",type=_text_type)
onnx_parser.add_argument("--fixed_input_shape")
onnx_input = os.path.join(args.save_dir, 'x2paddle_model.onnx')
onnx_parser.set_defaults(input_model=onnx_input)
onnx_parser.set_defaults(output_dir=args.save_dir)
shape = '[1,3,'
shape = shape + args.fixed_input_shape[1:]
if model.__class__.__name__ == "YOLOV3":
shape = shape + ",[1,2]"
inputs = "image,im_size"
onnx_parser.set_defaults(input = inputs)
onnx_parser.set_defaults(input_shape = shape)
mo.main(onnx_parser,'onnx')
def main():
parser = arg_parser()
args = parser.parse_args()
assert args.model_dir is not None, "--model_dir should be defined while exporting openvino model"
assert args.save_dir is not None, "--save_dir should be defined to create openvino model"
model = pdx.load_model(args.model_dir)
if model.status == "Normal" or model.status == "Prune":
logging.error(
"Only support inference model, try to export model first as below,",
exit=False)
export_openvino_model(model, args)
if __name__ == "__main__":
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import argparse
import deploy
def arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_dir",
"-m",
type=str,
default=None,
help="path to openvino model .xml file")
parser.add_argument(
"--device",
"-d",
type=str,
default='CPU',
help="Specify the target device to infer on:[CPU, GPU, FPGA, HDDL, MYRIAD,HETERO]"
"Default value is CPU")
parser.add_argument(
"--img", "-i", type=str, default=None, help="path to an image files")
parser.add_argument(
"--img_list", "-l", type=str, default=None, help="Path to a imglist")
parser.add_argument(
"--cfg_file",
"-c",
type=str,
default=None,
help="Path to PaddelX model yml file")
return parser
def main():
parser = arg_parser()
args = parser.parse_args()
model_xml = args.model_dir
model_yaml = args.cfg_file
#model init
if ("CPU" not in args.device):
predictor = deploy.Predictor(model_xml, model_yaml, args.device)
else:
predictor = deploy.Predictor(model_xml, model_yaml)
#predict
if (args.img_list != None):
f = open(args.img_list)
lines = f.readlines()
for im_path in lines:
print(im_path)
predictor.predict(im_path.strip('\n'))
f.close()
else:
im_path = args.img
predictor.predict(im_path)
if __name__ == "__main__":
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import os.path as osp
import time
import cv2
import numpy as np
import yaml
from six import text_type as _text_type
from openvino.inference_engine import IECore
class Predictor:
def __init__(self, model_xml, model_yaml, device="CPU"):
self.device = device
if not osp.exists(model_xml):
print("model xml file is not exists in {}".format(model_xml))
self.model_xml = model_xml
self.model_bin = osp.splitext(model_xml)[0] + ".bin"
if not osp.exists(model_yaml):
print("model yaml file is not exists in {}".format(model_yaml))
with open(model_yaml) as f:
self.info = yaml.load(f.read(), Loader=yaml.Loader)
self.model_type = self.info['_Attributes']['model_type']
self.model_name = self.info['Model']
self.num_classes = self.info['_Attributes']['num_classes']
self.labels = self.info['_Attributes']['labels']
if self.info['Model'] == 'MaskRCNN':
if self.info['_init_params']['with_fpn']:
self.mask_head_resolution = 28
else:
self.mask_head_resolution = 14
transforms_mode = self.info.get('TransformsMode', 'RGB')
if transforms_mode == 'RGB':
to_rgb = True
else:
to_rgb = False
self.transforms = self.build_transforms(self.info['Transforms'],
to_rgb)
self.predictor, self.net = self.create_predictor()
self.total_time = 0
self.count_num = 0
def create_predictor(self):
#initialization for specified device
print("Creating Inference Engine")
ie = IECore()
print("Loading network files:\n\t{}\n\t{}".format(self.model_xml,
self.model_bin))
net = ie.read_network(model=self.model_xml, weights=self.model_bin)
net.batch_size = 1
network_config = {}
if self.device == "MYRIAD":
network_config = {'VPU_HW_STAGES_OPTIMIZATION': 'NO'}
exec_net = ie.load_network(
network=net, device_name=self.device, config=network_config)
return exec_net, net
def build_transforms(self, transforms_info, to_rgb=True):
if self.model_type == "classifier":
import transforms.cls_transforms as transforms
elif self.model_type == "detector":
import transforms.det_transforms as transforms
elif self.model_type == "segmenter":
import transforms.seg_transforms as transforms
op_list = list()
for op_info in transforms_info:
op_name = list(op_info.keys())[0]
op_attr = op_info[op_name]
if not hasattr(transforms, op_name):
raise Exception(
"There's no operator named '{}' in transforms of {}".
format(op_name, self.model_type))
op_list.append(getattr(transforms, op_name)(**op_attr))
eval_transforms = transforms.Compose(op_list)
if hasattr(eval_transforms, 'to_rgb'):
eval_transforms.to_rgb = to_rgb
self.arrange_transforms(eval_transforms)
return eval_transforms
def arrange_transforms(self, eval_transforms):
if self.model_type == 'classifier':
import transforms.cls_transforms as transforms
arrange_transform = transforms.ArrangeClassifier
elif self.model_type == 'segmenter':
import transforms.seg_transforms as transforms
arrange_transform = transforms.ArrangeSegmenter
elif self.model_type == 'detector':
import transforms.det_transforms as transforms
arrange_name = 'Arrange{}'.format(self.model_name)
arrange_transform = getattr(transforms, arrange_name)
else:
raise Exception("Unrecognized model type: {}".format(
self.model_type))
if type(eval_transforms.transforms[-1]).__name__.startswith('Arrange'):
eval_transforms.transforms[-1] = arrange_transform(mode='test')
else:
eval_transforms.transforms.append(arrange_transform(mode='test'))
def raw_predict(self, preprocessed_input):
self.count_num += 1
feed_dict = {}
if self.model_name == "YOLOv3":
inputs = self.net.inputs
for name in inputs:
if (len(inputs[name].shape) == 2):
feed_dict[name] = preprocessed_input['im_size']
elif (len(inputs[name].shape) == 4):
feed_dict[name] = preprocessed_input['image']
else:
pass
else:
input_blob = next(iter(self.net.inputs))
feed_dict[input_blob] = preprocessed_input['image']
#Start sync inference
print("Starting inference in synchronous mode")
res = self.predictor.infer(inputs=feed_dict)
#Processing output blob
print("Processing output blob")
return res
def preprocess(self, image):
res = dict()
if self.model_type == "classifier":
im, = self.transforms(image)
im = np.expand_dims(im, axis=0).copy()
res['image'] = im
elif self.model_type == "detector":
if self.model_name == "YOLOv3":
im, im_shape = self.transforms(image)
im = np.expand_dims(im, axis=0).copy()
im_shape = np.expand_dims(im_shape, axis=0).copy()
res['image'] = im
res['im_size'] = im_shape
if self.model_name.count('RCNN') > 0:
im, im_resize_info, im_shape = self.transforms(image)
im = np.expand_dims(im, axis=0).copy()
im_resize_info = np.expand_dims(im_resize_info, axis=0).copy()
im_shape = np.expand_dims(im_shape, axis=0).copy()
res['image'] = im
res['im_info'] = im_resize_info
res['im_shape'] = im_shape
elif self.model_type == "segmenter":
im, im_info = self.transforms(image)
im = np.expand_dims(im, axis=0).copy()
res['image'] = im
res['im_info'] = im_info
return res
def classifier_postprocess(self, preds, topk=1):
""" 对分类模型的预测结果做后处理
"""
true_topk = min(self.num_classes, topk)
output_name = next(iter(self.net.outputs))
pred_label = np.argsort(-preds[output_name][0])[:true_topk]
result = [{
'category_id': l,
'category': self.labels[l],
'score': preds[output_name][0][l],
} for l in pred_label]
print(result)
return result
def segmenter_postprocess(self, preds, preprocessed_inputs):
""" 对语义分割结果做后处理
"""
it = iter(self.net.outputs)
next(it)
score_name = next(it)
score_map = np.squeeze(preds[score_name])
score_map = np.transpose(score_map, (1, 2, 0))
label_name = next(it)
label_map = np.squeeze(preds[label_name]).astype('uint8')
im_info = preprocessed_inputs['im_info']
for info in im_info[::-1]:
if info[0] == 'resize':
w, h = info[1][1], info[1][0]
label_map = cv2.resize(label_map, (w, h), cv2.INTER_NEAREST)
score_map = cv2.resize(score_map, (w, h), cv2.INTER_LINEAR)
elif info[0] == 'padding':
w, h = info[1][1], info[1][0]
label_map = label_map[0:h, 0:w]
score_map = score_map[0:h, 0:w, :]
else:
raise Exception("Unexpected info '{}' in im_info".format(info[
0]))
return {'label_map': label_map, 'score_map': score_map}
def detector_postprocess(self, preds, preprocessed_inputs):
"""对图像检测结果做后处理
"""
output_name = next(iter(self.net.outputs))
outputs = preds[output_name][0]
result = []
for out in outputs:
if (out[0] > 0):
result.append(out.tolist())
else:
pass
print(result)
return result
def predict(self, image, topk=1, threshold=0.5):
preprocessed_input = self.preprocess(image)
model_pred = self.raw_predict(preprocessed_input)
if self.model_type == "classifier":
results = self.classifier_postprocess(model_pred, topk)
elif self.model_type == "detector":
results = self.detector_postprocess(model_pred, preprocessed_input)
elif self.model_type == "segmenter":
results = self.segmenter_postprocess(model_pred,
preprocessed_input)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import cls_transforms
from . import det_transforms
from . import seg_transforms
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .ops import *
import random
import os.path as osp
import numpy as np
from PIL import Image, ImageEnhance
class ClsTransform:
"""分类Transform的基类
"""
def __init__(self):
pass
class Compose(ClsTransform):
"""根据数据预处理/增强算子对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强算子。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
def __init__(self, transforms):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
if len(transforms) < 1:
raise ValueError('The length of transforms ' + \
'must be equal or larger than 1!')
self.transforms = transforms
def __call__(self, im, label=None):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 根据网络所需字段所组成的tuple;
字段由transforms中的最后一个数据预处理操作决定。
"""
if isinstance(im, np.ndarray):
if len(im.shape) != 3:
raise Exception(
"im should be 3-dimension, but now is {}-dimensions".
format(len(im.shape)))
else:
try:
im = cv2.imread(im).astype('float32')
except:
raise TypeError('Can\'t read The image file {}!'.format(im))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
for op in self.transforms:
outputs = op(im, label)
im = outputs[0]
if len(outputs) == 2:
label = outputs[1]
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
print(
"{} is already in ComposedTransforms, need to remove it from add_augmenters().".
format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class Normalize(ClsTransform):
"""对图像进行标准化。
1. 对图像进行归一化到区间[0.0, 1.0]。
2. 对图像进行减均值除以标准差操作。
Args:
mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
"""
def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
self.mean = mean
self.std = std
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std)
if label is None:
return (im, )
else:
return (im, label)
class ResizeByShort(ClsTransform):
"""根据图像短边对图像重新调整大小(resize)。
1. 获取图像的长边和短边长度。
2. 根据短边与short_size的比例,计算长边的目标长度,
此时高、宽的resize比例为short_size/原图短边长度。
3. 如果max_size>0,调整resize比例:
如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度;
4. 根据调整大小的比例对图像进行resize。
Args:
short_size (int): 调整大小后的图像目标短边长度。默认为256。
max_size (int): 长边目标长度的最大限制。默认为-1。
"""
def __init__(self, short_size=256, max_size=-1):
self.short_size = short_size
self.max_size = max_size
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
im, (resized_width, resized_height),
interpolation=cv2.INTER_LINEAR)
if label is None:
return (im, )
else:
return (im, label)
class CenterCrop(ClsTransform):
"""以图像中心点扩散裁剪长宽为`crop_size`的正方形
1. 计算剪裁的起始点。
2. 剪裁图像。
Args:
crop_size (int): 裁剪的目标边长。默认为224。
"""
def __init__(self, crop_size=224):
self.crop_size = crop_size
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
im = center_crop(im, self.crop_size)
if label is None:
return (im, )
else:
return (im, label)
class ArrangeClassifier(ClsTransform):
"""获取训练/验证/预测所需信息。注意:此操作不需用户自己显示调用
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
"""
def __init__(self, mode=None):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode must be in ['train', 'eval', 'test', 'quant']!")
self.mode = mode
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当mode为'train'或'eval'时,返回(im, label),分别对应图像np.ndarray数据、
图像类别id;当mode为'test'或'quant'时,返回(im, ),对应图像np.ndarray数据。
"""
im = permute(im, False).astype('float32')
if self.mode == 'train' or self.mode == 'eval':
outputs = (im, label)
else:
outputs = (im, )
return outputs
class ComposedClsTransforms(Compose):
""" 分类模型的基础Transforms流程,具体如下
训练阶段:
1. 随机从图像中crop一块子图,并resize成crop_size大小
2. 将1的输出按0.5的概率随机进行水平翻转
3. 将图像进行归一化
验证/预测阶段:
1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14
2. 从图像中心crop出一个大小为crop_size的图像
3. 将图像进行归一化
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
crop_size(int|list): 输入模型里的图像大小
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
crop_size=[224, 224],
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
width = crop_size
if isinstance(crop_size, list):
if crop_size[0] != crop_size[1]:
raise Exception(
"In classifier model, width and height should be equal, please modify your parameter `crop_size`"
)
width = crop_size[0]
if width % 32 != 0:
raise Exception(
"In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`"
)
if mode == 'train':
pass
else:
# 验证/预测时的transforms
transforms = [
ResizeByShort(short_size=int(width * 1.14)),
CenterCrop(crop_size=width), Normalize(
mean=mean, std=std)
]
super(ComposedClsTransforms, self).__init__(transforms)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
import random
import os.path as osp
import numpy as np
import cv2
from PIL import Image, ImageEnhance
from .ops import *
class DetTransform:
"""检测数据处理基类
"""
def __init__(self):
pass
class Compose(DetTransform):
"""根据数据预处理/增强列表对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强列表。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
def __init__(self, transforms):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
if len(transforms) < 1:
raise ValueError('The length of transforms ' + \
'must be equal or larger than 1!')
self.transforms = transforms
self.use_mixup = False
for t in self.transforms:
if type(t).__name__ == 'MixupImage':
self.use_mixup = True
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息,dict中的字段如下:
- im_id (np.ndarray): 图像序列号,形状为(1,)。
- image_shape (np.ndarray): 图像原始大小,形状为(2,),
image_shape[0]为高,image_shape[1]为宽。
- mixup (list): list为[im, im_info, label_info],分别对应
与当前图像进行mixup的图像np.ndarray数据、图像相关信息、标注框相关信息;
注意,当前epoch若无需进行mixup,则无该字段。
label_info (dict): 存储与标注框相关的信息,dict中的字段如下:
- gt_bbox (np.ndarray): 真实标注框坐标[x1, y1, x2, y2],形状为(n, 4),
其中n代表真实标注框的个数。
- gt_class (np.ndarray): 每个真实标注框对应的类别序号,形状为(n, 1),
其中n代表真实标注框的个数。
- gt_score (np.ndarray): 每个真实标注框对应的混合得分,形状为(n, 1),
其中n代表真实标注框的个数。
- gt_poly (list): 每个真实标注框内的多边形分割区域,每个分割区域由点的x、y坐标组成,
长度为n,其中n代表真实标注框的个数。
- is_crowd (np.ndarray): 每个真实标注框中是否是一组对象,形状为(n, 1),
其中n代表真实标注框的个数。
- difficult (np.ndarray): 每个真实标注框中的对象是否为难识别对象,形状为(n, 1),
其中n代表真实标注框的个数。
Returns:
tuple: 根据网络所需字段所组成的tuple;
字段由transforms中的最后一个数据预处理操作决定。
"""
def decode_image(im_file, im_info, label_info):
if im_info is None:
im_info = dict()
if isinstance(im_file, np.ndarray):
if len(im_file.shape) != 3:
raise Exception(
"im should be 3-dimensions, but now is {}-dimensions".
format(len(im_file.shape)))
im = im_file
else:
try:
im = cv2.imread(im_file).astype('float32')
except:
raise TypeError('Can\'t read The image file {}!'.format(
im_file))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
# make default im_info with [h, w, 1]
im_info['im_resize_info'] = np.array(
[im.shape[0], im.shape[1], 1.], dtype=np.float32)
im_info['image_shape'] = np.array([im.shape[0],
im.shape[1]]).astype('int32')
if not self.use_mixup:
if 'mixup' in im_info:
del im_info['mixup']
# decode mixup image
if 'mixup' in im_info:
im_info['mixup'] = \
decode_image(im_info['mixup'][0],
im_info['mixup'][1],
im_info['mixup'][2])
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
outputs = decode_image(im, im_info, label_info)
im = outputs[0]
im_info = outputs[1]
if len(outputs) == 3:
label_info = outputs[2]
for op in self.transforms:
if im is None:
return None
outputs = op(im, im_info, label_info)
im = outputs[0]
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
print(
"{} is already in ComposedTransforms, need to remove it from add_augmenters().".
format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class ResizeByShort(DetTransform):
"""根据图像的短边调整图像大小(resize)。
1. 获取图像的长边和短边长度。
2. 根据短边与short_size的比例,计算长边的目标长度,
此时高、宽的resize比例为short_size/原图短边长度。
3. 如果max_size>0,调整resize比例:
如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度。
4. 根据调整大小的比例对图像进行resize。
Args:
target_size (int): 短边目标长度。默认为800。
max_size (int): 长边目标长度的最大限制。默认为1333。
Raises:
TypeError: 形参数据类型不满足需求。
"""
def __init__(self, short_size=800, max_size=1333):
self.max_size = int(max_size)
if not isinstance(short_size, int):
raise TypeError(
"Type of short_size is invalid. Must be Integer, now is {}".
format(type(short_size)))
self.short_size = short_size
if not (isinstance(self.max_size, int)):
raise TypeError("max_size: input type is invalid.")
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
其中,im_info更新字段为:
- im_resize_info (np.ndarray): resize后的图像高、resize后的图像宽、resize后的图像相对原始图的缩放比例
三者组成的np.ndarray,形状为(3,)。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if im_info is None:
im_info = dict()
if not isinstance(im, np.ndarray):
raise TypeError("ResizeByShort: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('ResizeByShort: image is not 3-dimensional.')
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im_resize_info = [resized_height, resized_width, scale]
im = cv2.resize(
im, (resized_width, resized_height),
interpolation=cv2.INTER_LINEAR)
im_info['im_resize_info'] = np.array(im_resize_info).astype(np.float32)
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class Padding(DetTransform):
"""1.将图像的长和宽padding至coarsest_stride的倍数。如输入图像为[300, 640],
`coarest_stride`为32,则由于300不为32的倍数,因此在图像最右和最下使用0值
进行padding,最终输出图像为[320, 640]。
2.或者,将图像的长和宽padding到target_size指定的shape,如输入的图像为[300,640],
a. `target_size` = 960,在图像最右和最下使用0值进行padding,最终输出
图像为[960, 960]。
b. `target_size` = [640, 960],在图像最右和最下使用0值进行padding,最终
输出图像为[640, 960]。
1. 如果coarsest_stride为1,target_size为None则直接返回。
2. 获取图像的高H、宽W。
3. 计算填充后图像的高H_new、宽W_new。
4. 构建大小为(H_new, W_new, 3)像素值为0的np.ndarray,
并将原图的np.ndarray粘贴于左上角。
Args:
coarsest_stride (int): 填充后的图像长、宽为该参数的倍数,默认为1。
target_size (int|list|tuple): 填充后的图像长、宽,默认为None,coarset_stride优先级更高。
Raises:
TypeError: 形参`target_size`数据类型不满足需求。
ValueError: 形参`target_size`为(list|tuple)时,长度不满足需求。
"""
def __init__(self, coarsest_stride=1, target_size=None):
self.coarsest_stride = coarsest_stride
if target_size is not None:
if not isinstance(target_size, int):
if not isinstance(target_size, tuple) and not isinstance(
target_size, list):
raise TypeError(
"Padding: Type of target_size must in (int|list|tuple)."
)
elif len(target_size) != 2:
raise ValueError(
"Padding: Length of target_size must equal 2.")
self.target_size = target_size
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
ValueError: coarsest_stride,target_size需有且只有一个被指定。
ValueError: target_size小于原图的大小。
"""
if im_info is None:
im_info = dict()
if not isinstance(im, np.ndarray):
raise TypeError("Padding: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('Padding: image is not 3-dimensional.')
im_h, im_w, im_c = im.shape[:]
if isinstance(self.target_size, int):
padding_im_h = self.target_size
padding_im_w = self.target_size
elif isinstance(self.target_size, list) or isinstance(self.target_size,
tuple):
padding_im_w = self.target_size[0]
padding_im_h = self.target_size[1]
elif self.coarsest_stride > 0:
padding_im_h = int(
np.ceil(im_h / self.coarsest_stride) * self.coarsest_stride)
padding_im_w = int(
np.ceil(im_w / self.coarsest_stride) * self.coarsest_stride)
else:
raise ValueError(
"coarsest_stridei(>1) or target_size(list|int) need setting in Padding transform"
)
pad_height = padding_im_h - im_h
pad_width = padding_im_w - im_w
if pad_height < 0 or pad_width < 0:
raise ValueError(
'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
.format(im_w, im_h, padding_im_w, padding_im_h))
padding_im = np.zeros(
(padding_im_h, padding_im_w, im_c), dtype=np.float32)
padding_im[:im_h, :im_w, :] = im
if label_info is None:
return (padding_im, im_info)
else:
return (padding_im, im_info, label_info)
class Resize(DetTransform):
"""调整图像大小(resize)。
- 当目标大小(target_size)类型为int时,根据插值方式,
将图像resize为[target_size, target_size]。
- 当目标大小(target_size)类型为list或tuple时,根据插值方式,
将图像resize为target_size。
注意:当插值方式为“RANDOM”时,则随机选取一种插值方式进行resize。
Args:
target_size (int/list/tuple): 短边目标长度。默认为608。
interp (str): resize的插值方式,与opencv的插值方式对应,取值范围为
['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC',
'AREA', 'LANCZOS4', 'RANDOM']中。
"""
# The interpolation mode
interp_dict = {
'NEAREST': cv2.INTER_NEAREST,
'LINEAR': cv2.INTER_LINEAR,
'CUBIC': cv2.INTER_CUBIC,
'AREA': cv2.INTER_AREA,
'LANCZOS4': cv2.INTER_LANCZOS4
}
def __init__(self, target_size=608, interp='LINEAR'):
self.interp = interp
if not (interp == "RANDOM" or interp in self.interp_dict):
raise ValueError("interp should be one of {}".format(
self.interp_dict.keys()))
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise TypeError(
'when target is list or tuple, it should include 2 elements, but it is {}'
.format(target_size))
elif not isinstance(target_size, int):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(target_size)))
self.target_size = target_size
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if im_info is None:
im_info = dict()
if not isinstance(im, np.ndarray):
raise TypeError("Resize: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('Resize: image is not 3-dimensional.')
if self.interp == "RANDOM":
interp = random.choice(list(self.interp_dict.keys()))
else:
interp = self.interp
im = resize(im, self.target_size, self.interp_dict[interp])
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class Normalize(DetTransform):
"""对图像进行标准化。
1. 归一化图像到到区间[0.0, 1.0]。
2. 对图像进行减均值除以标准差操作。
Args:
mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
Raises:
TypeError: 形参数据类型不满足需求。
"""
def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
self.mean = mean
self.std = std
if not (isinstance(self.mean, list) and isinstance(self.std, list)):
raise TypeError("NormalizeImage: input type is invalid.")
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
raise TypeError('NormalizeImage: std is invalid!')
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
"""
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std)
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class ArrangeYOLOv3(DetTransform):
"""获取YOLOv3模型训练/验证/预测所需信息。
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
"""
def __init__(self, mode=None):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode must be in ['train', 'eval', 'test', 'quant']!")
self.mode = mode
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当mode为'train'时,返回(im, gt_bbox, gt_class, gt_score, im_shape),分别对应
图像np.ndarray数据、真实标注框、真实标注框对应的类别、真实标注框混合得分、图像大小信息;
当mode为'eval'时,返回(im, im_shape, im_id, gt_bbox, gt_class, difficult),
分别对应图像np.ndarray数据、图像大小信息、图像id、真实标注框、真实标注框对应的类别、
真实标注框是否为难识别对象;当mode为'test'或'quant'时,返回(im, im_shape),
分别对应图像np.ndarray数据、图像大小信息。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
im = permute(im, False)
if self.mode == 'train':
pass
elif self.mode == 'eval':
pass
else:
if im_info is None:
raise TypeError('Cannot do ArrangeYolov3! ' +
'Becasuse the im_info can not be None!')
im_shape = im_info['image_shape']
outputs = (im, im_shape)
return outputs
class ComposedYOLOv3Transforms(Compose):
"""YOLOv3模型的图像预处理流程,具体如下,
训练阶段:
1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage
2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调
3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand
4. 随机裁剪图像
5. 将4步骤的输出图像Resize成shape参数的大小
6. 随机0.5的概率水平翻转图像
7. 图像归一化
验证/预测阶段:
1. 将图像Resize成shape参数大小
2. 图像归一化
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小
mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
shape=[608, 608],
mixup_epoch=250,
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
width = shape
if isinstance(shape, list):
if shape[0] != shape[1]:
raise Exception(
"In YOLOv3 model, width and height should be equal")
width = shape[0]
if width % 32 != 0:
raise Exception(
"In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...."
)
if mode == 'train':
# 训练时的transforms,包含数据增强
pass
else:
# 验证/预测时的transforms
transforms = [
Resize(
target_size=width, interp='CUBIC'), Normalize(
mean=mean, std=std)
]
super(ComposedYOLOv3Transforms, self).__init__(transforms)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import math
import numpy as np
from PIL import Image, ImageEnhance
def normalize(im, mean, std):
im = im / 255.0
im -= mean
im /= std
return im
def permute(im, to_bgr=False):
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
if to_bgr:
im = im[[2, 1, 0], :, :]
return im
def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR):
value = max(im.shape[0], im.shape[1])
scale = float(long_size) / float(value)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
im, (resized_width, resized_height), interpolation=interpolation)
return im
def resize(im, target_size=608, interp=cv2.INTER_LINEAR):
if isinstance(target_size, list) or isinstance(target_size, tuple):
w = target_size[0]
h = target_size[1]
else:
w = target_size
h = target_size
im = cv2.resize(im, (w, h), interpolation=interp)
return im
def random_crop(im,
crop_size=224,
lower_scale=0.08,
lower_ratio=3. / 4,
upper_ratio=4. / 3):
scale = [lower_scale, 1.0]
ratio = [lower_ratio, upper_ratio]
aspect_ratio = math.sqrt(np.random.uniform(*ratio))
w = 1. * aspect_ratio
h = 1. / aspect_ratio
bound = min((float(im.shape[0]) / im.shape[1]) / (h**2),
(float(im.shape[1]) / im.shape[0]) / (w**2))
scale_max = min(scale[1], bound)
scale_min = min(scale[0], bound)
target_area = im.shape[0] * im.shape[1] * np.random.uniform(
scale_min, scale_max)
target_size = math.sqrt(target_area)
w = int(target_size * w)
h = int(target_size * h)
i = np.random.randint(0, im.shape[0] - h + 1)
j = np.random.randint(0, im.shape[1] - w + 1)
im = im[i:i + h, j:j + w, :]
im = cv2.resize(im, (crop_size, crop_size))
return im
def center_crop(im, crop_size=224):
height, width = im.shape[:2]
w_start = (width - crop_size) // 2
h_start = (height - crop_size) // 2
w_end = w_start + crop_size
h_end = h_start + crop_size
im = im[h_start:h_end, w_start:w_end, :]
return im
def horizontal_flip(im):
if len(im.shape) == 3:
im = im[:, ::-1, :]
elif len(im.shape) == 2:
im = im[:, ::-1]
return im
def vertical_flip(im):
if len(im.shape) == 3:
im = im[::-1, :, :]
elif len(im.shape) == 2:
im = im[::-1, :]
return im
def bgr2rgb(im):
return im[:, :, ::-1]
def hue(im, hue_lower, hue_upper):
delta = np.random.uniform(hue_lower, hue_upper)
u = np.cos(delta * np.pi)
w = np.sin(delta * np.pi)
bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
[0.211, -0.523, 0.311]])
ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
[1.0, -1.107, 1.705]])
t = np.dot(np.dot(ityiq, bt), tyiq).T
im = np.dot(im, t)
return im
def saturation(im, saturation_lower, saturation_upper):
delta = np.random.uniform(saturation_lower, saturation_upper)
gray = im * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
gray = gray.sum(axis=2, keepdims=True)
gray *= (1.0 - delta)
im *= delta
im += gray
return im
def contrast(im, contrast_lower, contrast_upper):
delta = np.random.uniform(contrast_lower, contrast_upper)
im *= delta
return im
def brightness(im, brightness_lower, brightness_upper):
delta = np.random.uniform(brightness_lower, brightness_upper)
im += delta
return im
def rotate(im, rotate_lower, rotate_upper):
rotate_delta = np.random.uniform(rotate_lower, rotate_upper)
im = im.rotate(int(rotate_delta))
return im
def resize_padding(im, max_side_len=2400):
'''
resize image to a size multiple of 32 which is required by the network
:param im: the resized image
:param max_side_len: limit of max image size to avoid out of memory in gpu
:return: the resized image and the resize ratio
'''
h, w, _ = im.shape
resize_w = w
resize_h = h
# limit the max side
if max(resize_h, resize_w) > max_side_len:
ratio = float(
max_side_len) / resize_h if resize_h > resize_w else float(
max_side_len) / resize_w
else:
ratio = 1.
resize_h = int(resize_h * ratio)
resize_w = int(resize_w * ratio)
resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32
resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32
resize_h = max(32, resize_h)
resize_w = max(32, resize_w)
im = cv2.resize(im, (int(resize_w), int(resize_h)))
#im = cv2.resize(im, (512, 512))
ratio_h = resize_h / float(h)
ratio_w = resize_w / float(w)
_ratio = np.array([ratio_h, ratio_w]).reshape(-1, 2)
return im, _ratio
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .ops import *
import random
import os.path as osp
import numpy as np
from PIL import Image
import cv2
from collections import OrderedDict
class SegTransform:
""" 分割transform基类
"""
def __init__(self):
pass
class Compose(SegTransform):
"""根据数据预处理/增强算子对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强算子。
Raises:
TypeError: transforms不是list对象
ValueError: transforms元素个数小于1。
"""
def __init__(self, transforms):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
if len(transforms) < 1:
raise ValueError('The length of transforms ' + \
'must be equal or larger than 1!')
self.transforms = transforms
self.to_rgb = False
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。
Returns:
tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。
"""
if im_info is None:
im_info = list()
if isinstance(im, np.ndarray):
if len(im.shape) != 3:
raise Exception(
"im should be 3-dimensions, but now is {}-dimensions".
format(len(im.shape)))
else:
try:
im = cv2.imread(im).astype('float32')
except:
raise ValueError('Can\'t read The image file {}!'.format(im))
if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
if label is not None:
if not isinstance(label, np.ndarray):
label = np.asarray(Image.open(label))
for op in self.transforms:
if isinstance(op, SegTransform):
outputs = op(im, im_info, label)
im = outputs[0]
if len(outputs) >= 2:
im_info = outputs[1]
if len(outputs) == 3:
label = outputs[2]
else:
im = execute_imgaug(op, im)
if label is not None:
outputs = (im, im_info, label)
else:
outputs = (im, im_info)
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class RandomHorizontalFlip(SegTransform):
"""以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。
Args:
prob (float): 随机水平翻转的概率。默认值为0.5。
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if random.random() < self.prob:
im = horizontal_flip(im)
if label is not None:
label = horizontal_flip(label)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomVerticalFlip(SegTransform):
"""以一定的概率对图像进行垂直翻转。当存在标注图像时,则同步进行翻转。
Args:
prob (float): 随机垂直翻转的概率。默认值为0.1。
"""
def __init__(self, prob=0.1):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if random.random() < self.prob:
im = vertical_flip(im)
if label is not None:
label = vertical_flip(label)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class Resize(SegTransform):
"""调整图像大小(resize),当存在标注图像时,则同步进行处理。
- 当目标大小(target_size)类型为int时,根据插值方式,
将图像resize为[target_size, target_size]。
- 当目标大小(target_size)类型为list或tuple时,根据插值方式,
将图像resize为target_size, target_size的输入应为[w, h]或(w, h)。
Args:
target_size (int|list|tuple): 目标大小。
interp (str): resize的插值方式,与opencv的插值方式对应,
可选的值为['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4'],默认为"LINEAR"。
Raises:
TypeError: target_size不是int/list/tuple。
ValueError: target_size为list/tuple时元素个数不等于2。
AssertionError: interp的取值不在['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4']之内。
"""
# The interpolation mode
interp_dict = {
'NEAREST': cv2.INTER_NEAREST,
'LINEAR': cv2.INTER_LINEAR,
'CUBIC': cv2.INTER_CUBIC,
'AREA': cv2.INTER_AREA,
'LANCZOS4': cv2.INTER_LANCZOS4
}
def __init__(self, target_size, interp='LINEAR'):
self.interp = interp
assert interp in self.interp_dict, "interp should be one of {}".format(
interp_dict.keys())
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise ValueError(
'when target is list or tuple, it should include 2 elements, but it is {}'
.format(target_size))
elif not isinstance(target_size, int):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(target_size)))
self.target_size = target_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info跟新字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
Raises:
ZeroDivisionError: im的短边为0。
TypeError: im不是np.ndarray数据。
ValueError: im不是3维nd.ndarray。
"""
if im_info is None:
im_info = OrderedDict()
im_info.append(('resize', im.shape[:2]))
if not isinstance(im, np.ndarray):
raise TypeError("ResizeImage: image type is not np.ndarray.")
if len(im.shape) != 3:
raise ValueError('ResizeImage: image is not 3-dimensional.')
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if float(im_size_min) == 0:
raise ZeroDivisionError('ResizeImage: min size of image is 0')
if isinstance(self.target_size, int):
resize_w = self.target_size
resize_h = self.target_size
else:
resize_w = self.target_size[0]
resize_h = self.target_size[1]
im_scale_x = float(resize_w) / float(im_shape[1])
im_scale_y = float(resize_h) / float(im_shape[0])
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp_dict[self.interp])
if label is not None:
label = cv2.resize(
label,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp_dict['NEAREST'])
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeByLong(SegTransform):
"""对图像长边resize到固定值,短边按比例进行缩放。当存在标注图像时,则同步进行处理。
Args:
long_size (int): resize后图像的长边大小。
"""
def __init__(self, long_size):
self.long_size = long_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info新增字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
"""
if im_info is None:
im_info = OrderedDict()
im_info.append(('resize', im.shape[:2]))
im = resize_long(im, self.long_size)
if label is not None:
label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeByShort(SegTransform):
"""根据图像的短边调整图像大小(resize)。
1. 获取图像的长边和短边长度。
2. 根据短边与short_size的比例,计算长边的目标长度,
此时高、宽的resize比例为short_size/原图短边长度。
3. 如果max_size>0,调整resize比例:
如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度。
4. 根据调整大小的比例对图像进行resize。
Args:
target_size (int): 短边目标长度。默认为800。
max_size (int): 长边目标长度的最大限制。默认为1333。
Raises:
TypeError: 形参数据类型不满足需求。
"""
def __init__(self, short_size=800, max_size=1333):
self.max_size = int(max_size)
if not isinstance(short_size, int):
raise TypeError(
"Type of short_size is invalid. Must be Integer, now is {}".
format(type(short_size)))
self.short_size = short_size
if not (isinstance(self.max_size, int)):
raise TypeError("max_size: input type is invalid.")
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info更新字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if im_info is None:
im_info = OrderedDict()
if not isinstance(im, np.ndarray):
raise TypeError("ResizeByShort: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('ResizeByShort: image is not 3-dimensional.')
im_info.append(('resize', im.shape[:2]))
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
im, (resized_width, resized_height),
interpolation=cv2.INTER_NEAREST)
if label is not None:
im = cv2.resize(
label, (resized_width, resized_height),
interpolation=cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeRangeScaling(SegTransform):
"""对图像长边随机resize到指定范围内,短边按比例进行缩放。当存在标注图像时,则同步进行处理。
Args:
min_value (int): 图像长边resize后的最小值。默认值400。
max_value (int): 图像长边resize后的最大值。默认值600。
Raises:
ValueError: min_value大于max_value
"""
def __init__(self, min_value=400, max_value=600):
if min_value > max_value:
raise ValueError('min_value must be less than max_value, '
'but they are {} and {}.'.format(min_value,
max_value))
self.min_value = min_value
self.max_value = max_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_value == self.max_value:
random_size = self.max_value
else:
random_size = int(
np.random.uniform(self.min_value, self.max_value) + 0.5)
im = resize_long(im, random_size, cv2.INTER_LINEAR)
if label is not None:
label = resize_long(label, random_size, cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeStepScaling(SegTransform):
"""对图像按照某一个比例resize,这个比例以scale_step_size为步长
在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时,则同步进行处理。
Args:
min_scale_factor(float), resize最小尺度。默认值0.75。
max_scale_factor (float), resize最大尺度。默认值1.25。
scale_step_size (float), resize尺度范围间隔。默认值0.25。
Raises:
ValueError: min_scale_factor大于max_scale_factor
"""
def __init__(self,
min_scale_factor=0.75,
max_scale_factor=1.25,
scale_step_size=0.25):
if min_scale_factor > max_scale_factor:
raise ValueError(
'min_scale_factor must be less than max_scale_factor, '
'but they are {} and {}.'.format(min_scale_factor,
max_scale_factor))
self.min_scale_factor = min_scale_factor
self.max_scale_factor = max_scale_factor
self.scale_step_size = scale_step_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_scale_factor == self.max_scale_factor:
scale_factor = self.min_scale_factor
elif self.scale_step_size == 0:
scale_factor = np.random.uniform(self.min_scale_factor,
self.max_scale_factor)
else:
num_steps = int((self.max_scale_factor - self.min_scale_factor) /
self.scale_step_size + 1)
scale_factors = np.linspace(self.min_scale_factor,
self.max_scale_factor,
num_steps).tolist()
np.random.shuffle(scale_factors)
scale_factor = scale_factors[0]
im = cv2.resize(
im, (0, 0),
fx=scale_factor,
fy=scale_factor,
interpolation=cv2.INTER_LINEAR)
if label is not None:
label = cv2.resize(
label, (0, 0),
fx=scale_factor,
fy=scale_factor,
interpolation=cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class Normalize(SegTransform):
"""对图像进行标准化。
1.尺度缩放到 [0,1]。
2.对图像进行减均值除以标准差操作。
Args:
mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。
std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
Raises:
ValueError: mean或std不是list对象。std包含0。
"""
def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
self.mean = mean
self.std = std
if not (isinstance(self.mean, list) and isinstance(self.std, list)):
raise ValueError("{}: input type is invalid.".format(self))
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
raise ValueError('{}: std is invalid!'.format(self))
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class Padding(SegTransform):
"""对图像或标注图像进行padding,padding方向为右和下。
根据提供的值对图像或标注图像进行padding操作。
Args:
target_size (int|list|tuple): padding后图像的大小。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认值为255。
Raises:
TypeError: target_size不是int|list|tuple。
ValueError: target_size为list|tuple时元素个数不等于2。
"""
def __init__(self,
target_size,
im_padding_value=[127.5, 127.5, 127.5],
label_padding_value=255):
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise ValueError(
'when target is list or tuple, it should include 2 elements, but it is {}'
.format(target_size))
elif not isinstance(target_size, int):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(target_size)))
self.target_size = target_size
self.im_padding_value = im_padding_value
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info新增字段为:
-shape_before_padding (tuple): 保存padding之前图像的形状(h, w)。
Raises:
ValueError: 输入图像im或label的形状大于目标值
"""
if im_info is None:
im_info = OrderedDict()
im_info.append(('padding', im.shape[:2]))
im_height, im_width = im.shape[0], im.shape[1]
if isinstance(self.target_size, int):
target_height = self.target_size
target_width = self.target_size
else:
target_height = self.target_size[1]
target_width = self.target_size[0]
pad_height = target_height - im_height
pad_width = target_width - im_width
if pad_height < 0 or pad_width < 0:
raise ValueError(
'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
.format(im_width, im_height, target_width, target_height))
else:
im = cv2.copyMakeBorder(
im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None:
label = cv2.copyMakeBorder(
label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomPaddingCrop(SegTransform):
"""对图像和标注图进行随机裁剪,当所需要的裁剪尺寸大于原图时,则进行padding操作。
Args:
crop_size (int|list|tuple): 裁剪图像大小。默认为512。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认值为255。
Raises:
TypeError: crop_size不是int/list/tuple。
ValueError: target_size为list/tuple时元素个数不等于2。
"""
def __init__(self,
crop_size=512,
im_padding_value=[127.5, 127.5, 127.5],
label_padding_value=255):
if isinstance(crop_size, list) or isinstance(crop_size, tuple):
if len(crop_size) != 2:
raise ValueError(
'when crop_size is list or tuple, it should include 2 elements, but it is {}'
.format(crop_size))
elif not isinstance(crop_size, int):
raise TypeError(
"Type of crop_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(crop_size)))
self.crop_size = crop_size
self.im_padding_value = im_padding_value
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if isinstance(self.crop_size, int):
crop_width = self.crop_size
crop_height = self.crop_size
else:
crop_width = self.crop_size[0]
crop_height = self.crop_size[1]
img_height = im.shape[0]
img_width = im.shape[1]
if img_height == crop_height and img_width == crop_width:
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
else:
pad_height = max(crop_height - img_height, 0)
pad_width = max(crop_width - img_width, 0)
if (pad_height > 0 or pad_width > 0):
im = cv2.copyMakeBorder(
im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None:
label = cv2.copyMakeBorder(
label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
img_height = im.shape[0]
img_width = im.shape[1]
if crop_height > 0 and crop_width > 0:
h_off = np.random.randint(img_height - crop_height + 1)
w_off = np.random.randint(img_width - crop_width + 1)
im = im[h_off:(crop_height + h_off), w_off:(w_off + crop_width
), :]
if label is not None:
label = label[h_off:(crop_height + h_off), w_off:(
w_off + crop_width)]
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomBlur(SegTransform):
"""以一定的概率对图像进行高斯模糊。
Args:
prob (float): 图像模糊概率。默认为0.1。
"""
def __init__(self, prob=0.1):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.prob <= 0:
n = 0
elif self.prob >= 1:
n = 1
else:
n = int(1.0 / self.prob)
if n > 0:
if np.random.randint(0, n) == 0:
radius = np.random.randint(3, 10)
if radius % 2 != 1:
radius = radius + 1
if radius > 9:
radius = 9
im = cv2.GaussianBlur(im, (radius, radius), 0, 0)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomScaleAspect(SegTransform):
"""裁剪并resize回原始尺寸的图像和标注图像。
按照一定的面积比和宽高比对图像进行裁剪,并reszie回原始图像的图像,当存在标注图时,同步进行。
Args:
min_scale (float):裁取图像占原始图像的面积比,取值[0,1],为0时则返回原图。默认为0.5。
aspect_ratio (float): 裁取图像的宽高比范围,非负值,为0时返回原图。默认为0.33。
"""
def __init__(self, min_scale=0.5, aspect_ratio=0.33):
self.min_scale = min_scale
self.aspect_ratio = aspect_ratio
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_scale != 0 and self.aspect_ratio != 0:
img_height = im.shape[0]
img_width = im.shape[1]
for i in range(0, 10):
area = img_height * img_width
target_area = area * np.random.uniform(self.min_scale, 1.0)
aspectRatio = np.random.uniform(self.aspect_ratio,
1.0 / self.aspect_ratio)
dw = int(np.sqrt(target_area * 1.0 * aspectRatio))
dh = int(np.sqrt(target_area * 1.0 / aspectRatio))
if (np.random.randint(10) < 5):
tmp = dw
dw = dh
dh = tmp
if (dh < img_height and dw < img_width):
h1 = np.random.randint(0, img_height - dh)
w1 = np.random.randint(0, img_width - dw)
im = im[h1:(h1 + dh), w1:(w1 + dw), :]
label = label[h1:(h1 + dh), w1:(w1 + dw)]
im = cv2.resize(
im, (img_width, img_height),
interpolation=cv2.INTER_LINEAR)
label = cv2.resize(
label, (img_width, img_height),
interpolation=cv2.INTER_NEAREST)
break
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomDistort(SegTransform):
"""对图像进行随机失真。
1. 对变换的操作顺序进行随机化操作。
2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。
Args:
brightness_range (float): 明亮度因子的范围。默认为0.5。
brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
contrast_range (float): 对比度因子的范围。默认为0.5。
contrast_prob (float): 随机调整对比度的概率。默认为0.5。
saturation_range (float): 饱和度因子的范围。默认为0.5。
saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
hue_range (int): 色调因子的范围。默认为18。
hue_prob (float): 随机调整色调的概率。默认为0.5。
"""
def __init__(self,
brightness_range=0.5,
brightness_prob=0.5,
contrast_range=0.5,
contrast_prob=0.5,
saturation_range=0.5,
saturation_prob=0.5,
hue_range=18,
hue_prob=0.5):
self.brightness_range = brightness_range
self.brightness_prob = brightness_prob
self.contrast_range = contrast_range
self.contrast_prob = contrast_prob
self.saturation_range = saturation_range
self.saturation_prob = saturation_prob
self.hue_range = hue_range
self.hue_prob = hue_prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
brightness_lower = 1 - self.brightness_range
brightness_upper = 1 + self.brightness_range
contrast_lower = 1 - self.contrast_range
contrast_upper = 1 + self.contrast_range
saturation_lower = 1 - self.saturation_range
saturation_upper = 1 + self.saturation_range
hue_lower = -self.hue_range
hue_upper = self.hue_range
ops = [brightness, contrast, saturation, hue]
random.shuffle(ops)
params_dict = {
'brightness': {
'brightness_lower': brightness_lower,
'brightness_upper': brightness_upper
},
'contrast': {
'contrast_lower': contrast_lower,
'contrast_upper': contrast_upper
},
'saturation': {
'saturation_lower': saturation_lower,
'saturation_upper': saturation_upper
},
'hue': {
'hue_lower': hue_lower,
'hue_upper': hue_upper
}
}
prob_dict = {
'brightness': self.brightness_prob,
'contrast': self.contrast_prob,
'saturation': self.saturation_prob,
'hue': self.hue_prob
}
for id in range(4):
params = params_dict[ops[id].__name__]
prob = prob_dict[ops[id].__name__]
params['im'] = im
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ArrangeSegmenter(SegTransform):
"""获取训练/验证/预测所需的信息。
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内
"""
def __init__(self, mode):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode should be defined as one of ['train', 'eval', 'test', 'quant']!"
)
self.mode = mode
def __call__(self, im, im_info, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当mode为'train'或'eval'时,返回的tuple为(im, label),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当mode为'test'时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;当mode为
'quant'时,返回的tuple为(im,),为图像np.ndarray数据。
"""
im = permute(im, False)
if self.mode == 'train' or self.mode == 'eval':
label = label[np.newaxis, :, :]
return (im, label)
elif self.mode == 'test':
return (im, im_info)
else:
return (im, )
class ComposedSegTransforms(Compose):
""" 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下
训练阶段:
1. 随机对图像以0.5的概率水平翻转
2. 按不同的比例随机Resize原图
3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
4. 图像归一化
预测阶段:
1. 图像归一化
Args:
mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
train_crop_size(list): 模型训练阶段,随机从原图crop的大小
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
train_crop_size=[769, 769],
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]):
if mode == 'train':
# 训练时的transforms,包含数据增强
pass
else:
# 验证/预测时的transforms
transforms = [Normalize(mean=mean, std=std)]
super(ComposedSegTransforms, self).__init__(transforms)
# download pre-compiled opencv lib
OPENCV_URL=https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2
if [ ! -d "./deps/opencv3gcc4.8" ]; then
mkdir -p deps
cd deps
wget -c ${OPENCV_URL}
tar xvfj opencv3gcc4.8.tar.bz2
rm -rf opencv3gcc4.8.tar.bz2
cd ..
fi
# openvino预编译库的路径
OPENVINO_DIR=/path/to/inference_engine/
# gflags预编译库的路径
GFLAGS_DIR=/path/to/gflags
# OpenVINO预编译库的路径
OPENVINO_DIR=$INTEL_OPENVINO_DIR/inference_engine
# ngraph lib的路径,编译openvino时通常会生成
NGRAPH_LIB=/path/to/ngraph/lib/
NGRAPH_LIB=$INTEL_OPENVINO_DIR/deployment_tools/ngraph/lib
# gflags预编译库的路径
GFLAGS_DIR=$(pwd)/deps/gflags
# glog预编译库的路径
GLOG_DIR=$(pwd)/deps/glog
# opencv使用自带预编译版本
OPENCV_DIR=$(pwd)/deps/opencv/
#cpu架构
ARCH=x86
export ARCH
# opencv预编译库的路径, 如果使用自带预编译版本可不修改
OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/
# 下载自带预编译版本
sh $(pwd)/scripts/bootstrap.sh
#下载并编译third-part lib
sh $(pwd)/scripts/install_third-party.sh
rm -rf build
mkdir -p build
......@@ -16,6 +25,8 @@ cd build
cmake .. \
-DOPENCV_DIR=${OPENCV_DIR} \
-DGFLAGS_DIR=${GFLAGS_DIR} \
-DGLOG_DIR=${GLOG_DIR} \
-DOPENVINO_DIR=${OPENVINO_DIR} \
-DNGRAPH_LIB=${NGRAPH_LIB}
-DNGRAPH_LIB=${NGRAPH_LIB} \
-DARCH=${ARCH}
make
# download third-part lib
if [ ! -d "./deps" ]; then
mkdir deps
fi
if [ ! -d "./deps/gflag" ]; then
cd deps
git clone https://github.com/gflags/gflags
cd gflags
cmake .
make -j 8
cd ..
cd ..
fi
if [ ! -d "./deps/glog" ]; then
cd deps
git clone https://github.com/google/glog
sudo apt-get install autoconf automake libtool
cd glog
./autogen.sh
./configure
make -j 8
cd ..
cd ..
fi
if [ "$ARCH" = "x86" ]; then
OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/x86opencv/opencv.tar.bz2
else
OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/armopencv/opencv.tar.bz2
fi
if [ ! -d "./deps/opencv" ]; then
cd deps
wget -c ${OPENCV_URL}
tar xvfj opencv.tar.bz2
rm -rf opencv.tar.bz2
cd ..
fi
......@@ -13,28 +13,47 @@
// limitations under the License.
#include "include/paddlex/paddlex.h"
#include <iostream>
#include <fstream>
using namespace InferenceEngine;
namespace PaddleX {
void Model::create_predictor(const std::string& model_dir,
const std::string& cfg_dir,
const std::string& cfg_file,
std::string device) {
Core ie;
network_ = ie.ReadNetwork(model_dir, model_dir.substr(0, model_dir.size() - 4) + ".bin");
InferenceEngine::Core ie;
network_ = ie.ReadNetwork(
model_dir, model_dir.substr(0, model_dir.size() - 4) + ".bin");
network_.setBatchSize(1);
InputInfo::Ptr input_info = network_.getInputsInfo().begin()->second;
input_info->getPreProcess().setResizeAlgorithm(RESIZE_BILINEAR);
input_info->setLayout(Layout::NCHW);
input_info->setPrecision(Precision::FP32);
InferenceEngine::InputsDataMap inputInfo(network_.getInputsInfo());
std::string imageInputName;
for (const auto & inputInfoItem : inputInfo) {
if (inputInfoItem.second->getTensorDesc().getDims().size() == 4) {
imageInputName = inputInfoItem.first;
inputInfoItem.second->setPrecision(InferenceEngine::Precision::FP32);
inputInfoItem.second->getPreProcess().setResizeAlgorithm(
InferenceEngine::RESIZE_BILINEAR);
inputInfoItem.second->setLayout(InferenceEngine::Layout::NCHW);
}
if (inputInfoItem.second->getTensorDesc().getDims().size() == 2) {
imageInputName = inputInfoItem.first;
inputInfoItem.second->setPrecision(InferenceEngine::Precision::FP32);
}
}
if (device == "MYRIAD") {
std::map<std::string, std::string> networkConfig;
networkConfig["VPU_HW_STAGES_OPTIMIZATION"] = "ON";
executable_network_ = ie.LoadNetwork(network_, device, networkConfig);
} else {
executable_network_ = ie.LoadNetwork(network_, device);
load_config(cfg_dir);
}
load_config(cfg_file);
}
bool Model::load_config(const std::string& cfg_dir) {
YAML::Node config = YAML::LoadFile(cfg_dir);
bool Model::load_config(const std::string& cfg_file) {
YAML::Node config = YAML::LoadFile(cfg_file);
type = config["_Attributes"]["model_type"].as<std::string>();
name = config["Model"].as<std::string>();
bool to_rgb = true;
......@@ -48,22 +67,26 @@ bool Model::load_config(const std::string& cfg_dir) {
return false;
}
}
// 构建数据处理流
transforms_.Init(config["Transforms"], to_rgb);
// 读入label list
labels.clear();
labels = config["_Attributes"]["labels"].as<std::vector<std::string>>();
// init preprocess ops
transforms_.Init(config["Transforms"], type, to_rgb);
// read label list
for (const auto& item : config["_Attributes"]["labels"]) {
int index = labels.size();
labels[index] = item.as<std::string>();
}
return true;
}
bool Model::preprocess(cv::Mat* input_im) {
if (!transforms_.Run(input_im, inputs_)) {
bool Model::preprocess(cv::Mat* input_im, ImageBlob* inputs) {
if (!transforms_.Run(input_im, inputs)) {
return false;
}
return true;
}
bool Model::predict(const cv::Mat& im, ClsResult* result) {
inputs_.clear();
if (type == "detector") {
std::cerr << "Loading model is a 'detector', DetResult should be passed to "
"function predict()!"
......@@ -75,34 +98,221 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
<< std::endl;
return false;
}
// 处理输入图像
InferRequest infer_request = executable_network_.CreateInferRequest();
// preprocess
InferenceEngine::InferRequest infer_request =
executable_network_.CreateInferRequest();
std::string input_name = network_.getInputsInfo().begin()->first;
inputs_ = infer_request.GetBlob(input_name);
auto im_clone = im.clone();
if (!preprocess(&im_clone)) {
inputs_.blob = infer_request.GetBlob(input_name);
cv::Mat im_clone = im.clone();
if (!preprocess(&im_clone, &inputs_)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
// predict
infer_request.Infer();
std::string output_name = network_.getOutputsInfo().begin()->first;
output_ = infer_request.GetBlob(output_name);
MemoryBlob::CPtr moutput = as<MemoryBlob>(output_);
InferenceEngine::MemoryBlob::CPtr moutput =
InferenceEngine::as<InferenceEngine::MemoryBlob>(output_);
auto moutputHolder = moutput->rmap();
float* outputs_data = moutputHolder.as<float *>();
// 对模型输出结果进行后处理
// post process
auto ptr = std::max_element(outputs_data, outputs_data+sizeof(outputs_data));
result->category_id = std::distance(outputs_data, ptr);
result->score = *ptr;
result->category = labels[result->category_id];
//for (int i=0;i<sizeof(outputs_data);i++){
// std::cout << labels[i] << std::endl;
// std::cout << outputs_[i] << std::endl;
// }
return true;
}
bool Model::predict(const cv::Mat& im, DetResult* result) {
inputs_.clear();
result->clear();
if (type == "classifier") {
std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
"to function predict()!" << std::endl;
return false;
} else if (type == "segmenter") {
std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
"to function predict()!" << std::endl;
return false;
}
InferenceEngine::InferRequest infer_request =
executable_network_.CreateInferRequest();
InferenceEngine::InputsDataMap input_maps = network_.getInputsInfo();
std::string inputName;
for (const auto & input_map : input_maps) {
if (input_map.second->getTensorDesc().getDims().size() == 4) {
inputName = input_map.first;
inputs_.blob = infer_request.GetBlob(inputName);
}
if (input_map.second->getTensorDesc().getDims().size() == 2) {
inputName = input_map.first;
inputs_.ori_im_size_ = infer_request.GetBlob(inputName);
}
}
cv::Mat im_clone = im.clone();
if (!preprocess(&im_clone, &inputs_)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
infer_request.Infer();
InferenceEngine::OutputsDataMap out_map = network_.getOutputsInfo();
auto iter = out_map.begin();
std::string outputName = iter->first;
InferenceEngine::Blob::Ptr output = infer_request.GetBlob(outputName);
InferenceEngine::MemoryBlob::CPtr moutput =
InferenceEngine::as<InferenceEngine::MemoryBlob>(output);
InferenceEngine::TensorDesc blob_output = moutput->getTensorDesc();
std::vector<size_t> output_shape = blob_output.getDims();
auto moutputHolder = moutput->rmap();
float* data = moutputHolder.as<float *>();
int size = 1;
for (auto& i : output_shape) {
size *= static_cast<int>(i);
}
int num_boxes = size / 6;
for (int i = 0; i < num_boxes; ++i) {
if (data[i * 6] > 0) {
Box box;
box.category_id = static_cast<int>(data[i * 6]);
box.category = labels[box.category_id];
box.score = data[i * 6 + 1];
float xmin = data[i * 6 + 2];
float ymin = data[i * 6 + 3];
float xmax = data[i * 6 + 4];
float ymax = data[i * 6 + 5];
float w = xmax - xmin + 1;
float h = ymax - ymin + 1;
box.coordinate = {xmin, ymin, w, h};
result->boxes.push_back(std::move(box));
}
}
}
} // namespce of PaddleX
bool Model::predict(const cv::Mat& im, SegResult* result) {
result->clear();
inputs_.clear();
if (type == "classifier") {
std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
"to function predict()!" << std::endl;
return false;
} else if (type == "detector") {
std::cerr << "Loading model is a 'detector', DetResult should be passed to "
"function predict()!" << std::endl;
return false;
}
// init infer
InferenceEngine::InferRequest infer_request =
executable_network_.CreateInferRequest();
std::string input_name = network_.getInputsInfo().begin()->first;
inputs_.blob = infer_request.GetBlob(input_name);
// preprocess
cv::Mat im_clone = im.clone();
if (!preprocess(&im_clone, &inputs_)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
// predict
infer_request.Infer();
InferenceEngine::OutputsDataMap out_map = network_.getOutputsInfo();
auto iter = out_map.begin();
iter++;
std::string output_name_score = iter->first;
InferenceEngine::Blob::Ptr output_score =
infer_request.GetBlob(output_name_score);
InferenceEngine::MemoryBlob::CPtr moutput_score =
InferenceEngine::as<InferenceEngine::MemoryBlob>(output_score);
InferenceEngine::TensorDesc blob_score = moutput_score->getTensorDesc();
std::vector<size_t> output_score_shape = blob_score.getDims();
int size = 1;
for (auto& i : output_score_shape) {
size *= static_cast<int>(i);
result->score_map.shape.push_back(static_cast<int>(i));
}
result->score_map.data.resize(size);
auto moutputHolder_score = moutput_score->rmap();
float* score_data = moutputHolder_score.as<float *>();
memcpy(result->score_map.data.data(), score_data, moutput_score->byteSize());
iter++;
std::string output_name_label = iter->first;
InferenceEngine::Blob::Ptr output_label =
infer_request.GetBlob(output_name_label);
InferenceEngine::MemoryBlob::CPtr moutput_label =
InferenceEngine::as<InferenceEngine::MemoryBlob>(output_label);
InferenceEngine::TensorDesc blob_label = moutput_label->getTensorDesc();
std::vector<size_t> output_label_shape = blob_label.getDims();
size = 1;
for (auto& i : output_label_shape) {
size *= static_cast<int>(i);
result->label_map.shape.push_back(static_cast<int>(i));
}
result->label_map.data.resize(size);
auto moutputHolder_label = moutput_label->rmap();
int* label_data = moutputHolder_label.as<int *>();
memcpy(result->label_map.data.data(), label_data, moutput_label->byteSize());
std::vector<uint8_t> label_map(result->label_map.data.begin(),
result->label_map.data.end());
cv::Mat mask_label(result->label_map.shape[1],
result->label_map.shape[2],
CV_8UC1,
label_map.data());
cv::Mat mask_score(result->score_map.shape[2],
result->score_map.shape[3],
CV_32FC1,
result->score_map.data.data());
int idx = 1;
int len_postprocess = inputs_.im_size_before_resize_.size();
for (std::vector<std::string>::reverse_iterator iter =
inputs_.reshape_order_.rbegin();
iter != inputs_.reshape_order_.rend();
++iter) {
if (*iter == "padding") {
auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
inputs_.im_size_before_resize_.pop_back();
auto padding_w = before_shape[0];
auto padding_h = before_shape[1];
mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w));
mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w));
} else if (*iter == "resize") {
auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
inputs_.im_size_before_resize_.pop_back();
auto resize_w = before_shape[0];
auto resize_h = before_shape[1];
cv::resize(mask_label,
mask_label,
cv::Size(resize_h, resize_w),
0,
0,
cv::INTER_NEAREST);
cv::resize(mask_score,
mask_score,
cv::Size(resize_h, resize_w),
0,
0,
cv::INTER_LINEAR);
}
++idx;
}
result->label_map.data.assign(mask_label.begin<uint8_t>(),
mask_label.end<uint8_t>());
result->label_map.shape = {mask_label.rows, mask_label.cols};
result->score_map.data.assign(mask_score.begin<float>(),
mask_score.end<float>());
result->score_map.shape = {mask_score.rows, mask_score.cols};
return true;
}
} // namespace PaddleX
......@@ -12,11 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/paddlex/transforms.h"
#include <math.h>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include "include/paddlex/transforms.h"
namespace PaddleX {
......@@ -26,7 +30,7 @@ std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
{"CUBIC", cv::INTER_CUBIC},
{"LANCZOS4", cv::INTER_LANCZOS4}};
bool Normalize::Run(cv::Mat* im){
bool Normalize::Run(cv::Mat* im, ImageBlob* data) {
for (int h = 0; h < im->rows; h++) {
for (int w = 0; w < im->cols; w++) {
im->at<cv::Vec3f>(h, w)[0] =
......@@ -40,19 +44,6 @@ bool Normalize::Run(cv::Mat* im){
return true;
}
bool CenterCrop::Run(cv::Mat* im) {
int height = static_cast<int>(im->rows);
int width = static_cast<int>(im->cols);
if (height < height_ || width < width_) {
std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
return false;
}
int offset_x = static_cast<int>((width - width_) / 2);
int offset_y = static_cast<int>((height - height_) / 2);
cv::Rect crop_roi(offset_x, offset_y, width_, height_);
*im = (*im)(crop_roi);
return true;
}
float ResizeByShort::GenerateScale(const cv::Mat& im) {
......@@ -70,17 +61,115 @@ float ResizeByShort::GenerateScale(const cv::Mat& im) {
return scale;
}
bool ResizeByShort::Run(cv::Mat* im) {
bool ResizeByShort::Run(cv::Mat* im, ImageBlob* data) {
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("resize");
float scale = GenerateScale(*im);
int width = static_cast<int>(scale * im->cols);
int height = static_cast<int>(scale * im->rows);
int width = static_cast<int>(round(scale * im->cols));
int height = static_cast<int>(round(scale * im->rows));
cv::resize(*im, *im, cv::Size(width, height), 0, 0, cv::INTER_LINEAR);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
data->scale = scale;
return true;
}
void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) {
bool CenterCrop::Run(cv::Mat* im, ImageBlob* data) {
int height = static_cast<int>(im->rows);
int width = static_cast<int>(im->cols);
if (height < height_ || width < width_) {
std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
return false;
}
int offset_x = static_cast<int>((width - width_) / 2);
int offset_y = static_cast<int>((height - height_) / 2);
cv::Rect crop_roi(offset_x, offset_y, width_, height_);
*im = (*im)(crop_roi);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
return true;
}
bool Padding::Run(cv::Mat* im, ImageBlob* data) {
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("padding");
int padding_w = 0;
int padding_h = 0;
if (width_ > 1 & height_ > 1) {
padding_w = width_ - im->cols;
padding_h = height_ - im->rows;
} else if (coarsest_stride_ >= 1) {
int h = im->rows;
int w = im->cols;
padding_h =
ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
padding_w =
ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
}
if (padding_h < 0 || padding_w < 0) {
std::cerr << "[Padding] Computed padding_h=" << padding_h
<< ", padding_w=" << padding_w
<< ", but they should be greater than 0." << std::endl;
return false;
}
cv::Scalar value = cv::Scalar(im_value_[0], im_value_[1], im_value_[2]);
cv::copyMakeBorder(
*im, *im, 0, padding_h, 0, padding_w, cv::BORDER_CONSTANT, value);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
return true;
}
bool ResizeByLong::Run(cv::Mat* im, ImageBlob* data) {
if (long_size_ <= 0) {
std::cerr << "[ResizeByLong] long_size should be greater than 0"
<< std::endl;
return false;
}
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("resize");
int origin_w = im->cols;
int origin_h = im->rows;
int im_size_max = std::max(origin_w, origin_h);
float scale =
static_cast<float>(long_size_) / static_cast<float>(im_size_max);
cv::resize(*im, *im, cv::Size(), scale, scale, cv::INTER_NEAREST);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
data->scale = scale;
return true;
}
bool Resize::Run(cv::Mat* im, ImageBlob* data) {
if (width_ <= 0 || height_ <= 0) {
std::cerr << "[Resize] width and height should be greater than 0"
<< std::endl;
return false;
}
if (interpolations.count(interp_) <= 0) {
std::cerr << "[Resize] Invalid interpolation method: '" << interp_ << "'"
<< std::endl;
return false;
}
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("resize");
cv::resize(
*im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
return true;
}
void Transforms::Init(
const YAML::Node& transforms_node, std::string type, bool to_rgb) {
transforms_.clear();
to_rgb_ = to_rgb;
type_ = type;
for (const auto& item : transforms_node) {
std::string name = item.begin()->first.as<std::string>();
std::cout << "trans name: " << name << std::endl;
......@@ -94,10 +183,16 @@ std::shared_ptr<Transform> Transforms::CreateTransform(
const std::string& transform_name) {
if (transform_name == "Normalize") {
return std::make_shared<Normalize>();
} else if (transform_name == "CenterCrop") {
return std::make_shared<CenterCrop>();
} else if (transform_name == "ResizeByShort") {
return std::make_shared<ResizeByShort>();
} else if (transform_name == "CenterCrop") {
return std::make_shared<CenterCrop>();
} else if (transform_name == "Resize") {
return std::make_shared<Resize>();
} else if (transform_name == "Padding") {
return std::make_shared<Padding>();
} else if (transform_name == "ResizeByLong") {
return std::make_shared<ResizeByLong>();
} else {
std::cerr << "There's unexpected transform(name='" << transform_name
<< "')." << std::endl;
......@@ -105,27 +200,38 @@ std::shared_ptr<Transform> Transforms::CreateTransform(
}
}
bool Transforms::Run(cv::Mat* im, Blob::Ptr blob) {
// 按照transforms中预处理算子顺序处理图像
bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
// preprocess by order
if (to_rgb_) {
cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
}
(*im).convertTo(*im, CV_32FC3);
if (type_ == "detector") {
InferenceEngine::LockedMemory<void> input2Mapped =
InferenceEngine::as<InferenceEngine::MemoryBlob>(
data->ori_im_size_)->wmap();
float *p = input2Mapped.as<float*>();
p[0] = im->rows;
p[1] = im->cols;
}
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
for (int i = 0; i < transforms_.size(); ++i) {
if (!transforms_[i]->Run(im)) {
if (!transforms_[i]->Run(im, data)) {
std::cerr << "Apply transforms to image failed!" << std::endl;
return false;
}
}
// 将图像由NHWC转为NCHW格式
// 同时转为连续的内存块存储到Blob
SizeVector blobSize = blob->getTensorDesc().getDims();
// image format NHWC to NCHW
// img data save to ImageBlob
InferenceEngine::SizeVector blobSize = data->blob->getTensorDesc().getDims();
const size_t width = blobSize[3];
const size_t height = blobSize[2];
const size_t channels = blobSize[1];
MemoryBlob::Ptr mblob = InferenceEngine::as<MemoryBlob>(blob);
InferenceEngine::MemoryBlob::Ptr mblob =
InferenceEngine::as<InferenceEngine::MemoryBlob>(data->blob);
auto mblobHolder = mblob->wmap();
float *blob_data = mblobHolder.as<float *>();
for (size_t c = 0; c < channels; c++) {
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/paddlex/visualize.h"
namespace PaddleX {
std::vector<int> GenerateColorMap(int num_class) {
auto colormap = std::vector<int>(3 * num_class, 0);
for (int i = 0; i < num_class; ++i) {
int j = 0;
int lab = i;
while (lab) {
colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
++j;
lab >>= 3;
}
}
return colormap;
}
cv::Mat Visualize(const cv::Mat& img,
const DetResult& result,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap,
float threshold) {
cv::Mat vis_img = img.clone();
auto boxes = result.boxes;
for (int i = 0; i < boxes.size(); ++i) {
if (boxes[i].score < threshold) {
continue;
}
cv::Rect roi = cv::Rect(boxes[i].coordinate[0],
boxes[i].coordinate[1],
boxes[i].coordinate[2],
boxes[i].coordinate[3]);
// draw box and title
std::string text = boxes[i].category;
int c1 = colormap[3 * boxes[i].category_id + 0];
int c2 = colormap[3 * boxes[i].category_id + 1];
int c3 = colormap[3 * boxes[i].category_id + 2];
cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
text += std::to_string(static_cast<int>(boxes[i].score * 100)) + "%";
int font_face = cv::FONT_HERSHEY_SIMPLEX;
double font_scale = 0.5f;
float thickness = 0.5;
cv::Size text_size =
cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
cv::Point origin;
origin.x = roi.x;
origin.y = roi.y;
// background
cv::Rect text_back = cv::Rect(boxes[i].coordinate[0],
boxes[i].coordinate[1] - text_size.height,
text_size.width,
text_size.height);
// draw
cv::rectangle(vis_img, roi, roi_color, 2);
cv::rectangle(vis_img, text_back, roi_color, -1);
cv::putText(vis_img,
text,
origin,
font_face,
font_scale,
cv::Scalar(255, 255, 255),
thickness);
// mask
if (boxes[i].mask.data.size() == 0) {
continue;
}
cv::Mat bin_mask(result.mask_resolution,
result.mask_resolution,
CV_32FC1,
boxes[i].mask.data.data());
cv::resize(bin_mask,
bin_mask,
cv::Size(boxes[i].mask.shape[0], boxes[i].mask.shape[1]));
cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1);
bin_mask.copyTo(full_mask(roi));
cv::Mat mask_ch[3];
mask_ch[0] = full_mask * c1;
mask_ch[1] = full_mask * c2;
mask_ch[2] = full_mask * c3;
cv::Mat mask;
cv::merge(mask_ch, 3, mask);
cv::addWeighted(vis_img, 1, mask, 0.5, 0, vis_img);
}
return vis_img;
}
cv::Mat Visualize(const cv::Mat& img,
const SegResult& result,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap) {
std::vector<uint8_t> label_map(result.label_map.data.begin(),
result.label_map.data.end());
cv::Mat mask(result.label_map.shape[0],
result.label_map.shape[1],
CV_8UC1,
label_map.data());
cv::Mat color_mask = cv::Mat::zeros(
result.label_map.shape[0], result.label_map.shape[1], CV_8UC3);
int rows = img.rows;
int cols = img.cols;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
int category_id = static_cast<int>(mask.at<uchar>(i, j));
color_mask.at<cv::Vec3b>(i, j)[0] = colormap[3 * category_id + 0];
color_mask.at<cv::Vec3b>(i, j)[1] = colormap[3 * category_id + 1];
color_mask.at<cv::Vec3b>(i, j)[2] = colormap[3 * category_id + 2];
}
}
return color_mask;
}
std::string generate_save_path(const std::string& save_dir,
const std::string& file_path) {
if (access(save_dir.c_str(), 0) < 0) {
#ifdef _WIN32
mkdir(save_dir.c_str());
#else
if (mkdir(save_dir.c_str(), S_IRWXU) < 0) {
std::cerr << "Fail to create " << save_dir << "directory." << std::endl;
}
#endif
}
int pos = file_path.find_last_of(OS_PATH_SEP);
std::string image_name(file_path.substr(pos + 1));
return save_dir + OS_PATH_SEP + image_name;
}
} // namespace PaddleX
cmake_minimum_required(VERSION 3.0)
project(PaddleX CXX C)
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." OFF)
SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
SET(LITE_DIR "" CACHE PATH "Location of libraries")
SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
SET(NGRAPH_LIB "" CACHE PATH "Location of libraries")
include(cmake/yaml-cpp.cmake)
include_directories("${CMAKE_SOURCE_DIR}/")
link_directories("${CMAKE_CURRENT_BINARY_DIR}")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include")
link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib")
macro(safe_set_static_flag)
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif(${flag_var} MATCHES "/MD")
endforeach(flag_var)
endmacro()
if (NOT DEFINED LITE_DIR OR ${LITE_DIR} STREQUAL "")
message(FATAL_ERROR "please set LITE_DIR with -LITE_DIR=/path/influence_engine")
endif()
if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "")
message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv")
endif()
if (NOT DEFINED GFLAGS_DIR OR ${GFLAGS_DIR} STREQUAL "")
message(FATAL_ERROR "please set GFLAGS_DIR with -DGFLAGS_DIR=/path/gflags")
endif()
link_directories("${LITE_DIR}/lib")
include_directories("${LITE_DIR}/include")
link_directories("${GFLAGS_DIR}/lib")
include_directories("${GFLAGS_DIR}/include")
if (WIN32)
find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH)
unset(OpenCV_DIR CACHE)
else ()
find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/cmake NO_DEFAULT_PATH)
endif ()
include_directories(${OpenCV_INCLUDE_DIRS})
if (WIN32)
add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
if (WITH_STATIC_LIB)
safe_set_static_flag()
add_definitions(-DSTATIC_LIB)
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard -mfpu=neon-vfpv4 -g -o2 -fopenmp -std=c++11")
set(CMAKE_STATIC_LIBRARY_PREFIX "")
endif()
if(WITH_STATIC_LIB)
set(DEPS ${LITE_DIR}/lib/libpaddle_full_api_shared${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
set(DEPS ${LITE_DIR}/lib/libpaddle_full_api_shared${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
if (NOT WIN32)
set(DEPS ${DEPS}
glog gflags z yaml-cpp
)
else()
set(DEPS ${DEPS}
glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt)
set(DEPS ${DEPS} libcmt shlwapi)
endif(NOT WIN32)
if (NOT WIN32)
set(EXTERNAL_LIB "-ldl -lrt -lgomp -lz -lm -lpthread")
set(DEPS ${DEPS} ${EXTERNAL_LIB})
endif()
set(DEPS ${DEPS} ${OpenCV_LIBS})
add_executable(classifier demo/classifier.cpp src/transforms.cpp src/paddlex.cpp)
ADD_DEPENDENCIES(classifier ext-yaml-cpp)
target_link_libraries(classifier ${DEPS})
add_executable(segmenter demo/segmenter.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
ADD_DEPENDENCIES(segmenter ext-yaml-cpp)
target_link_libraries(segmenter ${DEPS})
add_executable(detector demo/detector.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
ADD_DEPENDENCIES(detector ext-yaml-cpp)
target_link_libraries(detector ${DEPS})
include(ExternalProject)
message("${CMAKE_BUILD_TYPE}")
ExternalProject_Add(
ext-yaml-cpp
URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip
URL_MD5 9542d6de397d1fbd649ed468cb5850e6
CMAKE_ARGS
-DYAML_CPP_BUILD_TESTS=OFF
-DYAML_CPP_BUILD_TOOLS=OFF
-DYAML_CPP_INSTALL=OFF
-DYAML_CPP_BUILD_CONTRIB=OFF
-DMSVC_SHARED_RT=OFF
-DBUILD_SHARED_LIBS=OFF
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp"
# Disable install step
INSTALL_COMMAND ""
LOG_DOWNLOAD ON
LOG_BUILD 1
)
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include "include/paddlex/paddlex.h"
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_string(cfg_file, "", "Path of PaddelX model yml file");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(image_list, "", "Path of test image list file");
DEFINE_int32(thread_num, 1, "num of thread to infer");
int main(int argc, char** argv) {
// Parsing command-line
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir == "") {
std::cerr << "--model_dir need to be defined" << std::endl;
return -1;
}
if (FLAGS_cfg_file == "") {
std::cerr << "--cfg_flie need to be defined" << std::endl;
return -1;
}
if (FLAGS_image == "" & FLAGS_image_list == "") {
std::cerr << "--image or --image_list need to be defined" << std::endl;
return -1;
}
// load model
PaddleX::Model model;
model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_thread_num);
std::cout << "init is done" << std::endl;
// predict
if (FLAGS_image_list != "") {
std::ifstream inf(FLAGS_image_list);
if (!inf) {
std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
return -1;
}
std::string image_path;
while (getline(inf, image_path)) {
PaddleX::ClsResult result;
cv::Mat im = cv::imread(image_path, 1);
model.predict(im, &result);
std::cout << "Predict label: " << result.category
<< ", label_id:" << result.category_id
<< ", score: " << result.score << std::endl;
}
} else {
PaddleX::ClsResult result;
cv::Mat im = cv::imread(FLAGS_image, 1);
model.predict(im, &result);
std::cout << "Predict label: " << result.category
<< ", label_id:" << result.category_id
<< ", score: " << result.score << std::endl;
}
return 0;
}
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <omp.h>
#include <algorithm>
#include <chrono> // NOLINT
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <utility>
#include "include/paddlex/paddlex.h"
#include "include/paddlex/visualize.h"
using namespace std::chrono; // NOLINT
DEFINE_string(model_dir, "", "Path of openvino model xml file");
DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(image_list, "", "Path of test image list file");
DEFINE_int32(thread_num, 1, "num of thread to infer");
DEFINE_string(save_dir, "", "Path to save visualized image");
DEFINE_int32(batch_size, 1, "Batch size of infering");
DEFINE_double(threshold,
0.5,
"The minimum scores of target boxes which are shown");
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir == "") {
std::cerr << "--model_dir need to be defined" << std::endl;
return -1;
}
if (FLAGS_cfg_file == "") {
std::cerr << "--cfg_file need to be defined" << std::endl;
return -1;
}
if (FLAGS_image == "" & FLAGS_image_list == "") {
std::cerr << "--image or --image_list need to be defined" << std::endl;
return -1;
}
// load model
PaddleX::Model model;
model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_thread_num);
int imgs = 1;
auto colormap = PaddleX::GenerateColorMap(model.labels.size());
// predict
if (FLAGS_image_list != "") {
std::ifstream inf(FLAGS_image_list);
if (!inf) {
std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
return -1;
}
std::string image_path;
while (getline(inf, image_path)) {
PaddleX::DetResult result;
cv::Mat im = cv::imread(image_path, 1);
model.predict(im, &result);
if (FLAGS_save_dir != "") {
cv::Mat vis_img = PaddleX::Visualize(
im, result, model.labels, colormap, FLAGS_threshold);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
cv::imwrite(save_path, vis_img);
std::cout << "Visualized output saved as " << save_path << std::endl;
}
}
} else {
PaddleX::DetResult result;
cv::Mat im = cv::imread(FLAGS_image, 1);
model.predict(im, &result);
for (int i = 0; i < result.boxes.size(); ++i) {
std::cout << "image file: " << FLAGS_image << std::endl;
std::cout << ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score
<< ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0]
<< ", " << result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", "
<< result.boxes[i].coordinate[3] << ")" << std::endl;
}
if (FLAGS_save_dir != "") {
// visualize
cv::Mat vis_img = PaddleX::Visualize(
im, result, model.labels, colormap, FLAGS_threshold);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
cv::imwrite(save_path, vis_img);
result.clear();
std::cout << "Visualized output saved as " << save_path << std::endl;
}
}
return 0;
}
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <utility>
#include "include/paddlex/paddlex.h"
#include "include/paddlex/visualize.h"
DEFINE_string(model_dir, "", "Path of openvino model xml file");
DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(image_list, "", "Path of test image list file");
DEFINE_string(save_dir, "", "Path to save visualized image");
DEFINE_int32(batch_size, 1, "Batch size of infering");
DEFINE_int32(thread_num, 1, "num of thread to infer");
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir == "") {
std::cerr << "--model_dir need to be defined" << std::endl;
return -1;
}
if (FLAGS_cfg_file == "") {
std::cerr << "--cfg_file need to be defined" << std::endl;
return -1;
}
if (FLAGS_image == "" & FLAGS_image_list == "") {
std::cerr << "--image or --image_list need to be defined" << std::endl;
return -1;
}
// load model
std::cout << "init start" << std::endl;
PaddleX::Model model;
model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_thread_num);
std::cout << "init done" << std::endl;
int imgs = 1;
auto colormap = PaddleX::GenerateColorMap(model.labels.size());
if (FLAGS_image_list != "") {
std::ifstream inf(FLAGS_image_list);
if (!inf) {
std::cerr << "Fail to open file " << FLAGS_image_list <<std::endl;
return -1;
}
std::string image_path;
while (getline(inf, image_path)) {
PaddleX::SegResult result;
cv::Mat im = cv::imread(image_path, 1);
model.predict(im, &result);
if (FLAGS_save_dir != "") {
cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, image_path);
cv::imwrite(save_path, vis_img);
std::cout << "Visualized output saved as " << save_path << std::endl;
}
}
} else {
PaddleX::SegResult result;
cv::Mat im = cv::imread(FLAGS_image, 1);
model.predict(im, &result);
if (FLAGS_save_dir != "") {
cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
cv::imwrite(save_path, vis_img);
std::cout << "Visualized` output saved as " << save_path << std::endl;
}
result.clear();
}
return 0;
}
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "yaml-cpp/yaml.h"
#ifdef _WIN32
#define OS_PATH_SEP "\\"
#else
#define OS_PATH_SEP "/"
#endif
namespace PaddleX {
// Inference model configuration parser
class ConfigPaser {
public:
ConfigPaser() {}
~ConfigPaser() {}
bool load_config(const std::string& model_dir,
const std::string& cfg = "model.yml") {
// Load as a YAML::Node
YAML::Node config;
config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg);
if (config["Transforms"].IsDefined()) {
YAML::Node transforms_ = config["Transforms"];
} else {
std::cerr << "There's no field 'Transforms' in model.yml" << std::endl;
return false;
}
return true;
}
YAML::Node Transforms_;
};
} // namespace PaddleX
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <arm_neon.h>
#include <paddle_api.h>
#include <functional>
#include <iostream>
#include <numeric>
#include <map>
#include <string>
#include <memory>
#include "include/paddlex/config_parser.h"
#include "include/paddlex/results.h"
#include "include/paddlex/transforms.h"
#include "yaml-cpp/yaml.h"
#ifdef _WIN32
#define OS_PATH_SEP "\\"
#else
#define OS_PATH_SEP "/"
#endif
namespace PaddleX {
class Model {
public:
void Init(const std::string& model_dir,
const std::string& cfg_file,
int thread_num) {
create_predictor(model_dir, cfg_file, thread_num);
}
void create_predictor(const std::string& model_dir,
const std::string& cfg_file,
int thread_num);
bool load_config(const std::string& model_dir);
bool preprocess(cv::Mat* input_im, ImageBlob* inputs);
bool predict(const cv::Mat& im, ClsResult* result);
bool predict(const cv::Mat& im, DetResult* result);
bool predict(const cv::Mat& im, SegResult* result);
std::string type;
std::string name;
std::map<int, std::string> labels;
Transforms transforms_;
ImageBlob inputs_;
std::shared_ptr<paddle::lite_api::PaddlePredictor> predictor_;
};
} // namespace PaddleX
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <string>
#include <vector>
namespace PaddleX {
template <class T>
struct Mask {
std::vector<T> data;
std::vector<int> shape;
void clear() {
data.clear();
shape.clear();
}
};
struct Box {
int category_id;
std::string category;
float score;
std::vector<float> coordinate;
Mask<float> mask;
};
class BaseResult {
public:
std::string type = "base";
};
class ClsResult : public BaseResult {
public:
int category_id;
std::string category;
float score;
std::string type = "cls";
};
class DetResult : public BaseResult {
public:
std::vector<Box> boxes;
int mask_resolution;
std::string type = "det";
void clear() { boxes.clear(); }
};
class SegResult : public BaseResult {
public:
Mask<int64_t> label_map;
Mask<float> score_map;
void clear() {
label_map.clear();
score_map.clear();
}
};
} // namespace PaddleX
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <yaml-cpp/yaml.h>
#include <paddle_api.h>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
namespace PaddleX {
/*
* @brief
* This class represents object for storing all preprocessed data
* */
class ImageBlob {
public:
// Original image height and width
std::vector<int> ori_im_size_ = std::vector<int>(2);
// Newest image height and width after process
std::vector<int> new_im_size_ = std::vector<int>(2);
// Image height and width before resize
std::vector<std::vector<int>> im_size_before_resize_;
// Reshape order
std::vector<std::string> reshape_order_;
// Resize scale
float scale = 1.0;
// Buffer for image data after preprocessing
std::unique_ptr<paddle::lite_api::Tensor> input_tensor_;
void clear() {
im_size_before_resize_.clear();
reshape_order_.clear();
}
};
// Abstraction of preprocessing opration class
class Transform {
public:
virtual void Init(const YAML::Node& item) = 0;
virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
};
class Normalize : public Transform {
public:
virtual void Init(const YAML::Node& item) {
mean_ = item["mean"].as<std::vector<float>>();
std_ = item["std"].as<std::vector<float>>();
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
std::vector<float> mean_;
std::vector<float> std_;
};
class ResizeByShort : public Transform {
public:
virtual void Init(const YAML::Node& item) {
short_size_ = item["short_size"].as<int>();
if (item["max_size"].IsDefined()) {
max_size_ = item["max_size"].as<int>();
} else {
max_size_ = -1;
}
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
float GenerateScale(const cv::Mat& im);
int short_size_;
int max_size_;
};
/*
* @brief
* This class execute resize by long operation on image matrix. At first, it resizes
* the long side of image matrix to specified length. Accordingly, the short side
* will be resized in the same proportion.
* */
class ResizeByLong : public Transform {
public:
virtual void Init(const YAML::Node& item) {
long_size_ = item["long_size"].as<int>();
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int long_size_;
};
/*
* @brief
* This class execute resize operation on image matrix. It resizes width and height
* to specified length.
* */
class Resize : public Transform {
public:
virtual void Init(const YAML::Node& item) {
if (item["interp"].IsDefined()) {
interp_ = item["interp"].as<std::string>();
}
if (item["target_size"].IsScalar()) {
height_ = item["target_size"].as<int>();
width_ = item["target_size"].as<int>();
} else if (item["target_size"].IsSequence()) {
std::vector<int> target_size = item["target_size"].as<std::vector<int>>();
width_ = target_size[0];
height_ = target_size[1];
}
if (height_ <= 0 || width_ <= 0) {
std::cerr << "[Resize] target_size should greater than 0" << std::endl;
exit(-1);
}
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int height_;
int width_;
std::string interp_;
};
class CenterCrop : public Transform {
public:
virtual void Init(const YAML::Node& item) {
if (item["crop_size"].IsScalar()) {
height_ = item["crop_size"].as<int>();
width_ = item["crop_size"].as<int>();
} else if (item["crop_size"].IsSequence()) {
std::vector<int> crop_size = item["crop_size"].as<std::vector<int>>();
width_ = crop_size[0];
height_ = crop_size[1];
}
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int height_;
int width_;
};
/*
* @brief
* This class execute padding operation on image matrix. It makes border on edge
* of image matrix.
* */
class Padding : public Transform {
public:
virtual void Init(const YAML::Node& item) {
if (item["coarsest_stride"].IsDefined()) {
coarsest_stride_ = item["coarsest_stride"].as<int>();
if (coarsest_stride_ < 1) {
std::cerr << "[Padding] coarest_stride should greater than 0"
<< std::endl;
exit(-1);
}
}
if (item["target_size"].IsDefined()) {
if (item["target_size"].IsScalar()) {
width_ = item["target_size"].as<int>();
height_ = item["target_size"].as<int>();
} else if (item["target_size"].IsSequence()) {
width_ = item["target_size"].as<std::vector<int>>()[0];
height_ = item["target_size"].as<std::vector<int>>()[1];
}
}
if (item["im_padding_value"].IsDefined()) {
im_value_ = item["im_padding_value"].as<std::vector<float>>();
} else {
im_value_ = {0, 0, 0};
}
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int coarsest_stride_ = -1;
int width_ = 0;
int height_ = 0;
std::vector<float> im_value_;
};
class Transforms {
public:
void Init(const YAML::Node& node, bool to_rgb = true);
std::shared_ptr<Transform> CreateTransform(const std::string& name);
bool Run(cv::Mat* im, ImageBlob* data);
private:
std::vector<std::shared_ptr<Transform>> transforms_;
bool to_rgb_ = true;
};
} // namespace PaddleX
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <map>
#include <vector>
#ifdef _WIN32
#include <direct.h>
#include <io.h>
#else // Linux/Unix
#include <dirent.h>
#include <sys/io.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#endif
#include <string>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "include/paddlex/results.h"
#ifdef _WIN32
#define OS_PATH_SEP "\\"
#else
#define OS_PATH_SEP "/"
#endif
namespace PaddleX {
/*
* @brief
* Generate visualization colormap for each class
*
* @param number of class
* @return color map, the size of vector is 3 * num_class
* */
std::vector<int> GenerateColorMap(int num_class);
/*
* @brief
* Visualize the detection result
*
* @param img: initial image matrix
* @param results: the detection result
* @param labels: label map
* @param colormap: visualization color map
* @return visualized image matrix
* */
cv::Mat Visualize(const cv::Mat& img,
const DetResult& results,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap,
float threshold = 0.5);
/*
* @brief
* Visualize the segmentation result
*
* @param img: initial image matrix
* @param results: the detection result
* @param labels: label map
* @param colormap: visualization color map
* @return visualized image matrix
* */
cv::Mat Visualize(const cv::Mat& img,
const SegResult& result,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap);
/*
* @brief
* generate save path for visualized image matrix
*
* @param save_dir: directory for saving visualized image matrix
* @param file_path: sourcen image file path
* @return path of saving visualized result
* */
std::string generate_save_path(const std::string& save_dir,
const std::string& file_path);
} // namespace PaddleX
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
\ No newline at end of file
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import argparse
import deploy
def arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_dir",
"-m",
type=str,
default=None,
help="path to openvino model .xml file")
parser.add_argument(
"--img", "-i", type=str, default=None, help="path to an image files")
parser.add_argument(
"--img_list", "-l", type=str, default=None, help="Path to a imglist")
parser.add_argument(
"--cfg_file",
"-c",
type=str,
default=None,
help="Path to PaddelX model yml file")
parser.add_argument(
"--thread_num",
"-t",
type=int,
default=1,
help="Path to PaddelX model yml file")
parser.add_argument(
"--input_shape",
"-ip",
type=str,
default=None,
help=" image input shape of model [NCHW] like [1,3,224,244] ")
return parser
def main():
parser = arg_parser()
args = parser.parse_args()
model_nb = args.model_dir
model_yaml = args.cfg_file
thread_num = args.thread_num
input_shape = args.input_shape
input_shape = input_shape[1:-1].split(",", 3)
shape = list(map(int, input_shape))
#model init
predictor = deploy.Predictor(model_nb, model_yaml, thread_num, shape)
#predict
if (args.img_list != None):
f = open(args.img_list)
lines = f.readlines()
for im_path in lines:
print(im_path)
predictor.predict(im_path.strip('\n'))
f.close()
else:
im_path = args.img
predictor.predict(im_path)
if __name__ == "__main__":
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import cls_transforms
from . import det_transforms
from . import seg_transforms
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .ops import *
import random
import os.path as osp
import numpy as np
from PIL import Image, ImageEnhance
class ClsTransform:
"""分类Transform的基类
"""
def __init__(self):
pass
class Compose(ClsTransform):
"""根据数据预处理/增强算子对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强算子。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
def __init__(self, transforms):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
if len(transforms) < 1:
raise ValueError('The length of transforms ' + \
'must be equal or larger than 1!')
self.transforms = transforms
def __call__(self, im, label=None):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 根据网络所需字段所组成的tuple;
字段由transforms中的最后一个数据预处理操作决定。
"""
if isinstance(im, np.ndarray):
if len(im.shape) != 3:
raise Exception(
"im should be 3-dimension, but now is {}-dimensions".
format(len(im.shape)))
else:
try:
im = cv2.imread(im).astype('float32')
except:
raise TypeError('Can\'t read The image file {}!'.format(im))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
for op in self.transforms:
outputs = op(im, label)
im = outputs[0]
if len(outputs) == 2:
label = outputs[1]
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
print(
"{} is already in ComposedTransforms, need to remove it from add_augmenters().".
format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class Normalize(ClsTransform):
"""对图像进行标准化。
1. 对图像进行归一化到区间[0.0, 1.0]。
2. 对图像进行减均值除以标准差操作。
Args:
mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
"""
def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
self.mean = mean
self.std = std
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std)
if label is None:
return (im, )
else:
return (im, label)
class ResizeByShort(ClsTransform):
"""根据图像短边对图像重新调整大小(resize)。
1. 获取图像的长边和短边长度。
2. 根据短边与short_size的比例,计算长边的目标长度,
此时高、宽的resize比例为short_size/原图短边长度。
3. 如果max_size>0,调整resize比例:
如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度;
4. 根据调整大小的比例对图像进行resize。
Args:
short_size (int): 调整大小后的图像目标短边长度。默认为256。
max_size (int): 长边目标长度的最大限制。默认为-1。
"""
def __init__(self, short_size=256, max_size=-1):
self.short_size = short_size
self.max_size = max_size
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
im, (resized_width, resized_height),
interpolation=cv2.INTER_LINEAR)
if label is None:
return (im, )
else:
return (im, label)
class CenterCrop(ClsTransform):
"""以图像中心点扩散裁剪长宽为`crop_size`的正方形
1. 计算剪裁的起始点。
2. 剪裁图像。
Args:
crop_size (int): 裁剪的目标边长。默认为224。
"""
def __init__(self, crop_size=224):
self.crop_size = crop_size
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
im = center_crop(im, self.crop_size)
if label is None:
return (im, )
else:
return (im, label)
class ArrangeClassifier(ClsTransform):
"""获取训练/验证/预测所需信息。注意:此操作不需用户自己显示调用
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
"""
def __init__(self, mode=None):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode must be in ['train', 'eval', 'test', 'quant']!")
self.mode = mode
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当mode为'train'或'eval'时,返回(im, label),分别对应图像np.ndarray数据、
图像类别id;当mode为'test'或'quant'时,返回(im, ),对应图像np.ndarray数据。
"""
im = permute(im, False).astype('float32')
if self.mode == 'train' or self.mode == 'eval':
outputs = (im, label)
else:
outputs = (im, )
return outputs
class ComposedClsTransforms(Compose):
""" 分类模型的基础Transforms流程,具体如下
训练阶段:
1. 随机从图像中crop一块子图,并resize成crop_size大小
2. 将1的输出按0.5的概率随机进行水平翻转
3. 将图像进行归一化
验证/预测阶段:
1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14
2. 从图像中心crop出一个大小为crop_size的图像
3. 将图像进行归一化
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
crop_size(int|list): 输入模型里的图像大小
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
crop_size=[224, 224],
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
width = crop_size
if isinstance(crop_size, list):
if crop_size[0] != crop_size[1]:
raise Exception(
"In classifier model, width and height should be equal, please modify your parameter `crop_size`"
)
width = crop_size[0]
if width % 32 != 0:
raise Exception(
"In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`"
)
if mode == 'train':
pass
else:
# 验证/预测时的transforms
transforms = [
ResizeByShort(short_size=int(width * 1.14)),
CenterCrop(crop_size=width), Normalize(
mean=mean, std=std)
]
super(ComposedClsTransforms, self).__init__(transforms)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
import random
import os.path as osp
import numpy as np
import cv2
from PIL import Image, ImageEnhance
from .ops import *
class DetTransform:
"""检测数据处理基类
"""
def __init__(self):
pass
class Compose(DetTransform):
"""根据数据预处理/增强列表对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强列表。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
def __init__(self, transforms):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
if len(transforms) < 1:
raise ValueError('The length of transforms ' + \
'must be equal or larger than 1!')
self.transforms = transforms
self.use_mixup = False
for t in self.transforms:
if type(t).__name__ == 'MixupImage':
self.use_mixup = True
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息,dict中的字段如下:
- im_id (np.ndarray): 图像序列号,形状为(1,)。
- image_shape (np.ndarray): 图像原始大小,形状为(2,),
image_shape[0]为高,image_shape[1]为宽。
- mixup (list): list为[im, im_info, label_info],分别对应
与当前图像进行mixup的图像np.ndarray数据、图像相关信息、标注框相关信息;
注意,当前epoch若无需进行mixup,则无该字段。
label_info (dict): 存储与标注框相关的信息,dict中的字段如下:
- gt_bbox (np.ndarray): 真实标注框坐标[x1, y1, x2, y2],形状为(n, 4),
其中n代表真实标注框的个数。
- gt_class (np.ndarray): 每个真实标注框对应的类别序号,形状为(n, 1),
其中n代表真实标注框的个数。
- gt_score (np.ndarray): 每个真实标注框对应的混合得分,形状为(n, 1),
其中n代表真实标注框的个数。
- gt_poly (list): 每个真实标注框内的多边形分割区域,每个分割区域由点的x、y坐标组成,
长度为n,其中n代表真实标注框的个数。
- is_crowd (np.ndarray): 每个真实标注框中是否是一组对象,形状为(n, 1),
其中n代表真实标注框的个数。
- difficult (np.ndarray): 每个真实标注框中的对象是否为难识别对象,形状为(n, 1),
其中n代表真实标注框的个数。
Returns:
tuple: 根据网络所需字段所组成的tuple;
字段由transforms中的最后一个数据预处理操作决定。
"""
def decode_image(im_file, im_info, label_info):
if im_info is None:
im_info = dict()
if isinstance(im_file, np.ndarray):
if len(im_file.shape) != 3:
raise Exception(
"im should be 3-dimensions, but now is {}-dimensions".
format(len(im_file.shape)))
im = im_file
else:
try:
im = cv2.imread(im_file).astype('float32')
except:
raise TypeError('Can\'t read The image file {}!'.format(
im_file))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
# make default im_info with [h, w, 1]
im_info['im_resize_info'] = np.array(
[im.shape[0], im.shape[1], 1.], dtype=np.float32)
im_info['image_shape'] = np.array([im.shape[0],
im.shape[1]]).astype('int32')
if not self.use_mixup:
if 'mixup' in im_info:
del im_info['mixup']
# decode mixup image
if 'mixup' in im_info:
im_info['mixup'] = \
decode_image(im_info['mixup'][0],
im_info['mixup'][1],
im_info['mixup'][2])
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
outputs = decode_image(im, im_info, label_info)
im = outputs[0]
im_info = outputs[1]
if len(outputs) == 3:
label_info = outputs[2]
for op in self.transforms:
if im is None:
return None
outputs = op(im, im_info, label_info)
im = outputs[0]
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
print(
"{} is already in ComposedTransforms, need to remove it from add_augmenters().".
format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class ResizeByShort(DetTransform):
"""根据图像的短边调整图像大小(resize)。
1. 获取图像的长边和短边长度。
2. 根据短边与short_size的比例,计算长边的目标长度,
此时高、宽的resize比例为short_size/原图短边长度。
3. 如果max_size>0,调整resize比例:
如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度。
4. 根据调整大小的比例对图像进行resize。
Args:
target_size (int): 短边目标长度。默认为800。
max_size (int): 长边目标长度的最大限制。默认为1333。
Raises:
TypeError: 形参数据类型不满足需求。
"""
def __init__(self, short_size=800, max_size=1333):
self.max_size = int(max_size)
if not isinstance(short_size, int):
raise TypeError(
"Type of short_size is invalid. Must be Integer, now is {}".
format(type(short_size)))
self.short_size = short_size
if not (isinstance(self.max_size, int)):
raise TypeError("max_size: input type is invalid.")
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
其中,im_info更新字段为:
- im_resize_info (np.ndarray): resize后的图像高、resize后的图像宽、resize后的图像相对原始图的缩放比例
三者组成的np.ndarray,形状为(3,)。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if im_info is None:
im_info = dict()
if not isinstance(im, np.ndarray):
raise TypeError("ResizeByShort: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('ResizeByShort: image is not 3-dimensional.')
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im_resize_info = [resized_height, resized_width, scale]
im = cv2.resize(
im, (resized_width, resized_height),
interpolation=cv2.INTER_LINEAR)
im_info['im_resize_info'] = np.array(im_resize_info).astype(np.float32)
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class Padding(DetTransform):
"""1.将图像的长和宽padding至coarsest_stride的倍数。如输入图像为[300, 640],
`coarest_stride`为32,则由于300不为32的倍数,因此在图像最右和最下使用0值
进行padding,最终输出图像为[320, 640]。
2.或者,将图像的长和宽padding到target_size指定的shape,如输入的图像为[300,640],
a. `target_size` = 960,在图像最右和最下使用0值进行padding,最终输出
图像为[960, 960]。
b. `target_size` = [640, 960],在图像最右和最下使用0值进行padding,最终
输出图像为[640, 960]。
1. 如果coarsest_stride为1,target_size为None则直接返回。
2. 获取图像的高H、宽W。
3. 计算填充后图像的高H_new、宽W_new。
4. 构建大小为(H_new, W_new, 3)像素值为0的np.ndarray,
并将原图的np.ndarray粘贴于左上角。
Args:
coarsest_stride (int): 填充后的图像长、宽为该参数的倍数,默认为1。
target_size (int|list|tuple): 填充后的图像长、宽,默认为None,coarset_stride优先级更高。
Raises:
TypeError: 形参`target_size`数据类型不满足需求。
ValueError: 形参`target_size`为(list|tuple)时,长度不满足需求。
"""
def __init__(self, coarsest_stride=1, target_size=None):
self.coarsest_stride = coarsest_stride
if target_size is not None:
if not isinstance(target_size, int):
if not isinstance(target_size, tuple) and not isinstance(
target_size, list):
raise TypeError(
"Padding: Type of target_size must in (int|list|tuple)."
)
elif len(target_size) != 2:
raise ValueError(
"Padding: Length of target_size must equal 2.")
self.target_size = target_size
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
ValueError: coarsest_stride,target_size需有且只有一个被指定。
ValueError: target_size小于原图的大小。
"""
if im_info is None:
im_info = dict()
if not isinstance(im, np.ndarray):
raise TypeError("Padding: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('Padding: image is not 3-dimensional.')
im_h, im_w, im_c = im.shape[:]
if isinstance(self.target_size, int):
padding_im_h = self.target_size
padding_im_w = self.target_size
elif isinstance(self.target_size, list) or isinstance(self.target_size,
tuple):
padding_im_w = self.target_size[0]
padding_im_h = self.target_size[1]
elif self.coarsest_stride > 0:
padding_im_h = int(
np.ceil(im_h / self.coarsest_stride) * self.coarsest_stride)
padding_im_w = int(
np.ceil(im_w / self.coarsest_stride) * self.coarsest_stride)
else:
raise ValueError(
"coarsest_stridei(>1) or target_size(list|int) need setting in Padding transform"
)
pad_height = padding_im_h - im_h
pad_width = padding_im_w - im_w
if pad_height < 0 or pad_width < 0:
raise ValueError(
'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
.format(im_w, im_h, padding_im_w, padding_im_h))
padding_im = np.zeros(
(padding_im_h, padding_im_w, im_c), dtype=np.float32)
padding_im[:im_h, :im_w, :] = im
if label_info is None:
return (padding_im, im_info)
else:
return (padding_im, im_info, label_info)
class Resize(DetTransform):
"""调整图像大小(resize)。
- 当目标大小(target_size)类型为int时,根据插值方式,
将图像resize为[target_size, target_size]。
- 当目标大小(target_size)类型为list或tuple时,根据插值方式,
将图像resize为target_size。
注意:当插值方式为“RANDOM”时,则随机选取一种插值方式进行resize。
Args:
target_size (int/list/tuple): 短边目标长度。默认为608。
interp (str): resize的插值方式,与opencv的插值方式对应,取值范围为
['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC',
'AREA', 'LANCZOS4', 'RANDOM']中。
"""
# The interpolation mode
interp_dict = {
'NEAREST': cv2.INTER_NEAREST,
'LINEAR': cv2.INTER_LINEAR,
'CUBIC': cv2.INTER_CUBIC,
'AREA': cv2.INTER_AREA,
'LANCZOS4': cv2.INTER_LANCZOS4
}
def __init__(self, target_size=608, interp='LINEAR'):
self.interp = interp
if not (interp == "RANDOM" or interp in self.interp_dict):
raise ValueError("interp should be one of {}".format(
self.interp_dict.keys()))
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise TypeError(
'when target is list or tuple, it should include 2 elements, but it is {}'
.format(target_size))
elif not isinstance(target_size, int):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(target_size)))
self.target_size = target_size
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if im_info is None:
im_info = dict()
if not isinstance(im, np.ndarray):
raise TypeError("Resize: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('Resize: image is not 3-dimensional.')
if self.interp == "RANDOM":
interp = random.choice(list(self.interp_dict.keys()))
else:
interp = self.interp
im = resize(im, self.target_size, self.interp_dict[interp])
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class Normalize(DetTransform):
"""对图像进行标准化。
1. 归一化图像到到区间[0.0, 1.0]。
2. 对图像进行减均值除以标准差操作。
Args:
mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
Raises:
TypeError: 形参数据类型不满足需求。
"""
def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
self.mean = mean
self.std = std
if not (isinstance(self.mean, list) and isinstance(self.std, list)):
raise TypeError("NormalizeImage: input type is invalid.")
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
raise TypeError('NormalizeImage: std is invalid!')
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
"""
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std)
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class ArrangeYOLOv3(DetTransform):
"""获取YOLOv3模型训练/验证/预测所需信息。
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
"""
def __init__(self, mode=None):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode must be in ['train', 'eval', 'test', 'quant']!")
self.mode = mode
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当mode为'train'时,返回(im, gt_bbox, gt_class, gt_score, im_shape),分别对应
图像np.ndarray数据、真实标注框、真实标注框对应的类别、真实标注框混合得分、图像大小信息;
当mode为'eval'时,返回(im, im_shape, im_id, gt_bbox, gt_class, difficult),
分别对应图像np.ndarray数据、图像大小信息、图像id、真实标注框、真实标注框对应的类别、
真实标注框是否为难识别对象;当mode为'test'或'quant'时,返回(im, im_shape),
分别对应图像np.ndarray数据、图像大小信息。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
im = permute(im, False)
if self.mode == 'train':
pass
elif self.mode == 'eval':
pass
else:
if im_info is None:
raise TypeError('Cannot do ArrangeYolov3! ' +
'Becasuse the im_info can not be None!')
im_shape = im_info['image_shape']
outputs = (im, im_shape)
return outputs
class ComposedYOLOv3Transforms(Compose):
"""YOLOv3模型的图像预处理流程,具体如下,
训练阶段:
1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage
2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调
3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand
4. 随机裁剪图像
5. 将4步骤的输出图像Resize成shape参数的大小
6. 随机0.5的概率水平翻转图像
7. 图像归一化
验证/预测阶段:
1. 将图像Resize成shape参数大小
2. 图像归一化
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小
mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
shape=[608, 608],
mixup_epoch=250,
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
width = shape
if isinstance(shape, list):
if shape[0] != shape[1]:
raise Exception(
"In YOLOv3 model, width and height should be equal")
width = shape[0]
if width % 32 != 0:
raise Exception(
"In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...."
)
if mode == 'train':
# 训练时的transforms,包含数据增强
pass
else:
# 验证/预测时的transforms
transforms = [
Resize(
target_size=width, interp='CUBIC'), Normalize(
mean=mean, std=std)
]
super(ComposedYOLOv3Transforms, self).__init__(transforms)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import math
import numpy as np
from PIL import Image, ImageEnhance
def normalize(im, mean, std):
im = im / 255.0
im -= mean
im /= std
return im
def permute(im, to_bgr=False):
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
if to_bgr:
im = im[[2, 1, 0], :, :]
return im
def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR):
value = max(im.shape[0], im.shape[1])
scale = float(long_size) / float(value)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
im, (resized_width, resized_height), interpolation=interpolation)
return im
def resize(im, target_size=608, interp=cv2.INTER_LINEAR):
if isinstance(target_size, list) or isinstance(target_size, tuple):
w = target_size[0]
h = target_size[1]
else:
w = target_size
h = target_size
im = cv2.resize(im, (w, h), interpolation=interp)
return im
def random_crop(im,
crop_size=224,
lower_scale=0.08,
lower_ratio=3. / 4,
upper_ratio=4. / 3):
scale = [lower_scale, 1.0]
ratio = [lower_ratio, upper_ratio]
aspect_ratio = math.sqrt(np.random.uniform(*ratio))
w = 1. * aspect_ratio
h = 1. / aspect_ratio
bound = min((float(im.shape[0]) / im.shape[1]) / (h**2),
(float(im.shape[1]) / im.shape[0]) / (w**2))
scale_max = min(scale[1], bound)
scale_min = min(scale[0], bound)
target_area = im.shape[0] * im.shape[1] * np.random.uniform(
scale_min, scale_max)
target_size = math.sqrt(target_area)
w = int(target_size * w)
h = int(target_size * h)
i = np.random.randint(0, im.shape[0] - h + 1)
j = np.random.randint(0, im.shape[1] - w + 1)
im = im[i:i + h, j:j + w, :]
im = cv2.resize(im, (crop_size, crop_size))
return im
def center_crop(im, crop_size=224):
height, width = im.shape[:2]
w_start = (width - crop_size) // 2
h_start = (height - crop_size) // 2
w_end = w_start + crop_size
h_end = h_start + crop_size
im = im[h_start:h_end, w_start:w_end, :]
return im
def horizontal_flip(im):
if len(im.shape) == 3:
im = im[:, ::-1, :]
elif len(im.shape) == 2:
im = im[:, ::-1]
return im
def vertical_flip(im):
if len(im.shape) == 3:
im = im[::-1, :, :]
elif len(im.shape) == 2:
im = im[::-1, :]
return im
def bgr2rgb(im):
return im[:, :, ::-1]
def hue(im, hue_lower, hue_upper):
delta = np.random.uniform(hue_lower, hue_upper)
u = np.cos(delta * np.pi)
w = np.sin(delta * np.pi)
bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
[0.211, -0.523, 0.311]])
ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
[1.0, -1.107, 1.705]])
t = np.dot(np.dot(ityiq, bt), tyiq).T
im = np.dot(im, t)
return im
def saturation(im, saturation_lower, saturation_upper):
delta = np.random.uniform(saturation_lower, saturation_upper)
gray = im * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
gray = gray.sum(axis=2, keepdims=True)
gray *= (1.0 - delta)
im *= delta
im += gray
return im
def contrast(im, contrast_lower, contrast_upper):
delta = np.random.uniform(contrast_lower, contrast_upper)
im *= delta
return im
def brightness(im, brightness_lower, brightness_upper):
delta = np.random.uniform(brightness_lower, brightness_upper)
im += delta
return im
def rotate(im, rotate_lower, rotate_upper):
rotate_delta = np.random.uniform(rotate_lower, rotate_upper)
im = im.rotate(int(rotate_delta))
return im
def resize_padding(im, max_side_len=2400):
'''
resize image to a size multiple of 32 which is required by the network
:param im: the resized image
:param max_side_len: limit of max image size to avoid out of memory in gpu
:return: the resized image and the resize ratio
'''
h, w, _ = im.shape
resize_w = w
resize_h = h
# limit the max side
if max(resize_h, resize_w) > max_side_len:
ratio = float(
max_side_len) / resize_h if resize_h > resize_w else float(
max_side_len) / resize_w
else:
ratio = 1.
resize_h = int(resize_h * ratio)
resize_w = int(resize_w * ratio)
resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32
resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32
resize_h = max(32, resize_h)
resize_w = max(32, resize_w)
im = cv2.resize(im, (int(resize_w), int(resize_h)))
#im = cv2.resize(im, (512, 512))
ratio_h = resize_h / float(h)
ratio_w = resize_w / float(w)
_ratio = np.array([ratio_h, ratio_w]).reshape(-1, 2)
return im, _ratio
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .ops import *
import random
import os.path as osp
import numpy as np
from PIL import Image
import cv2
from collections import OrderedDict
class SegTransform:
""" 分割transform基类
"""
def __init__(self):
pass
class Compose(SegTransform):
"""根据数据预处理/增强算子对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强算子。
Raises:
TypeError: transforms不是list对象
ValueError: transforms元素个数小于1。
"""
def __init__(self, transforms):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
if len(transforms) < 1:
raise ValueError('The length of transforms ' + \
'must be equal or larger than 1!')
self.transforms = transforms
self.to_rgb = False
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。
Returns:
tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。
"""
if im_info is None:
im_info = list()
if isinstance(im, np.ndarray):
if len(im.shape) != 3:
raise Exception(
"im should be 3-dimensions, but now is {}-dimensions".
format(len(im.shape)))
else:
try:
im = cv2.imread(im).astype('float32')
except:
raise ValueError('Can\'t read The image file {}!'.format(im))
if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
if label is not None:
if not isinstance(label, np.ndarray):
label = np.asarray(Image.open(label))
for op in self.transforms:
if isinstance(op, SegTransform):
outputs = op(im, im_info, label)
im = outputs[0]
if len(outputs) >= 2:
im_info = outputs[1]
if len(outputs) == 3:
label = outputs[2]
else:
im = execute_imgaug(op, im)
if label is not None:
outputs = (im, im_info, label)
else:
outputs = (im, im_info)
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class RandomHorizontalFlip(SegTransform):
"""以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。
Args:
prob (float): 随机水平翻转的概率。默认值为0.5。
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if random.random() < self.prob:
im = horizontal_flip(im)
if label is not None:
label = horizontal_flip(label)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomVerticalFlip(SegTransform):
"""以一定的概率对图像进行垂直翻转。当存在标注图像时,则同步进行翻转。
Args:
prob (float): 随机垂直翻转的概率。默认值为0.1。
"""
def __init__(self, prob=0.1):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if random.random() < self.prob:
im = vertical_flip(im)
if label is not None:
label = vertical_flip(label)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class Resize(SegTransform):
"""调整图像大小(resize),当存在标注图像时,则同步进行处理。
- 当目标大小(target_size)类型为int时,根据插值方式,
将图像resize为[target_size, target_size]。
- 当目标大小(target_size)类型为list或tuple时,根据插值方式,
将图像resize为target_size, target_size的输入应为[w, h]或(w, h)。
Args:
target_size (int|list|tuple): 目标大小。
interp (str): resize的插值方式,与opencv的插值方式对应,
可选的值为['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4'],默认为"LINEAR"。
Raises:
TypeError: target_size不是int/list/tuple。
ValueError: target_size为list/tuple时元素个数不等于2。
AssertionError: interp的取值不在['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4']之内。
"""
# The interpolation mode
interp_dict = {
'NEAREST': cv2.INTER_NEAREST,
'LINEAR': cv2.INTER_LINEAR,
'CUBIC': cv2.INTER_CUBIC,
'AREA': cv2.INTER_AREA,
'LANCZOS4': cv2.INTER_LANCZOS4
}
def __init__(self, target_size, interp='LINEAR'):
self.interp = interp
assert interp in self.interp_dict, "interp should be one of {}".format(
interp_dict.keys())
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise ValueError(
'when target is list or tuple, it should include 2 elements, but it is {}'
.format(target_size))
elif not isinstance(target_size, int):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(target_size)))
self.target_size = target_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info跟新字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
Raises:
ZeroDivisionError: im的短边为0。
TypeError: im不是np.ndarray数据。
ValueError: im不是3维nd.ndarray。
"""
if im_info is None:
im_info = OrderedDict()
im_info.append(('resize', im.shape[:2]))
if not isinstance(im, np.ndarray):
raise TypeError("ResizeImage: image type is not np.ndarray.")
if len(im.shape) != 3:
raise ValueError('ResizeImage: image is not 3-dimensional.')
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if float(im_size_min) == 0:
raise ZeroDivisionError('ResizeImage: min size of image is 0')
if isinstance(self.target_size, int):
resize_w = self.target_size
resize_h = self.target_size
else:
resize_w = self.target_size[0]
resize_h = self.target_size[1]
im_scale_x = float(resize_w) / float(im_shape[1])
im_scale_y = float(resize_h) / float(im_shape[0])
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp_dict[self.interp])
if label is not None:
label = cv2.resize(
label,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp_dict['NEAREST'])
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeByLong(SegTransform):
"""对图像长边resize到固定值,短边按比例进行缩放。当存在标注图像时,则同步进行处理。
Args:
long_size (int): resize后图像的长边大小。
"""
def __init__(self, long_size):
self.long_size = long_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info新增字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
"""
if im_info is None:
im_info = OrderedDict()
im_info.append(('resize', im.shape[:2]))
im = resize_long(im, self.long_size)
if label is not None:
label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeByShort(SegTransform):
"""根据图像的短边调整图像大小(resize)。
1. 获取图像的长边和短边长度。
2. 根据短边与short_size的比例,计算长边的目标长度,
此时高、宽的resize比例为short_size/原图短边长度。
3. 如果max_size>0,调整resize比例:
如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度。
4. 根据调整大小的比例对图像进行resize。
Args:
target_size (int): 短边目标长度。默认为800。
max_size (int): 长边目标长度的最大限制。默认为1333。
Raises:
TypeError: 形参数据类型不满足需求。
"""
def __init__(self, short_size=800, max_size=1333):
self.max_size = int(max_size)
if not isinstance(short_size, int):
raise TypeError(
"Type of short_size is invalid. Must be Integer, now is {}".
format(type(short_size)))
self.short_size = short_size
if not (isinstance(self.max_size, int)):
raise TypeError("max_size: input type is invalid.")
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info更新字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if im_info is None:
im_info = OrderedDict()
if not isinstance(im, np.ndarray):
raise TypeError("ResizeByShort: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('ResizeByShort: image is not 3-dimensional.')
im_info.append(('resize', im.shape[:2]))
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
im, (resized_width, resized_height),
interpolation=cv2.INTER_NEAREST)
if label is not None:
im = cv2.resize(
label, (resized_width, resized_height),
interpolation=cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeRangeScaling(SegTransform):
"""对图像长边随机resize到指定范围内,短边按比例进行缩放。当存在标注图像时,则同步进行处理。
Args:
min_value (int): 图像长边resize后的最小值。默认值400。
max_value (int): 图像长边resize后的最大值。默认值600。
Raises:
ValueError: min_value大于max_value
"""
def __init__(self, min_value=400, max_value=600):
if min_value > max_value:
raise ValueError('min_value must be less than max_value, '
'but they are {} and {}.'.format(min_value,
max_value))
self.min_value = min_value
self.max_value = max_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_value == self.max_value:
random_size = self.max_value
else:
random_size = int(
np.random.uniform(self.min_value, self.max_value) + 0.5)
im = resize_long(im, random_size, cv2.INTER_LINEAR)
if label is not None:
label = resize_long(label, random_size, cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeStepScaling(SegTransform):
"""对图像按照某一个比例resize,这个比例以scale_step_size为步长
在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时,则同步进行处理。
Args:
min_scale_factor(float), resize最小尺度。默认值0.75。
max_scale_factor (float), resize最大尺度。默认值1.25。
scale_step_size (float), resize尺度范围间隔。默认值0.25。
Raises:
ValueError: min_scale_factor大于max_scale_factor
"""
def __init__(self,
min_scale_factor=0.75,
max_scale_factor=1.25,
scale_step_size=0.25):
if min_scale_factor > max_scale_factor:
raise ValueError(
'min_scale_factor must be less than max_scale_factor, '
'but they are {} and {}.'.format(min_scale_factor,
max_scale_factor))
self.min_scale_factor = min_scale_factor
self.max_scale_factor = max_scale_factor
self.scale_step_size = scale_step_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_scale_factor == self.max_scale_factor:
scale_factor = self.min_scale_factor
elif self.scale_step_size == 0:
scale_factor = np.random.uniform(self.min_scale_factor,
self.max_scale_factor)
else:
num_steps = int((self.max_scale_factor - self.min_scale_factor) /
self.scale_step_size + 1)
scale_factors = np.linspace(self.min_scale_factor,
self.max_scale_factor,
num_steps).tolist()
np.random.shuffle(scale_factors)
scale_factor = scale_factors[0]
im = cv2.resize(
im, (0, 0),
fx=scale_factor,
fy=scale_factor,
interpolation=cv2.INTER_LINEAR)
if label is not None:
label = cv2.resize(
label, (0, 0),
fx=scale_factor,
fy=scale_factor,
interpolation=cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class Normalize(SegTransform):
"""对图像进行标准化。
1.尺度缩放到 [0,1]。
2.对图像进行减均值除以标准差操作。
Args:
mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。
std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
Raises:
ValueError: mean或std不是list对象。std包含0。
"""
def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
self.mean = mean
self.std = std
if not (isinstance(self.mean, list) and isinstance(self.std, list)):
raise ValueError("{}: input type is invalid.".format(self))
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
raise ValueError('{}: std is invalid!'.format(self))
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class Padding(SegTransform):
"""对图像或标注图像进行padding,padding方向为右和下。
根据提供的值对图像或标注图像进行padding操作。
Args:
target_size (int|list|tuple): padding后图像的大小。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认值为255。
Raises:
TypeError: target_size不是int|list|tuple。
ValueError: target_size为list|tuple时元素个数不等于2。
"""
def __init__(self,
target_size,
im_padding_value=[127.5, 127.5, 127.5],
label_padding_value=255):
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise ValueError(
'when target is list or tuple, it should include 2 elements, but it is {}'
.format(target_size))
elif not isinstance(target_size, int):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(target_size)))
self.target_size = target_size
self.im_padding_value = im_padding_value
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info新增字段为:
-shape_before_padding (tuple): 保存padding之前图像的形状(h, w)。
Raises:
ValueError: 输入图像im或label的形状大于目标值
"""
if im_info is None:
im_info = OrderedDict()
im_info.append(('padding', im.shape[:2]))
im_height, im_width = im.shape[0], im.shape[1]
if isinstance(self.target_size, int):
target_height = self.target_size
target_width = self.target_size
else:
target_height = self.target_size[1]
target_width = self.target_size[0]
pad_height = target_height - im_height
pad_width = target_width - im_width
if pad_height < 0 or pad_width < 0:
raise ValueError(
'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
.format(im_width, im_height, target_width, target_height))
else:
im = cv2.copyMakeBorder(
im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None:
label = cv2.copyMakeBorder(
label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomPaddingCrop(SegTransform):
"""对图像和标注图进行随机裁剪,当所需要的裁剪尺寸大于原图时,则进行padding操作。
Args:
crop_size (int|list|tuple): 裁剪图像大小。默认为512。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认值为255。
Raises:
TypeError: crop_size不是int/list/tuple。
ValueError: target_size为list/tuple时元素个数不等于2。
"""
def __init__(self,
crop_size=512,
im_padding_value=[127.5, 127.5, 127.5],
label_padding_value=255):
if isinstance(crop_size, list) or isinstance(crop_size, tuple):
if len(crop_size) != 2:
raise ValueError(
'when crop_size is list or tuple, it should include 2 elements, but it is {}'
.format(crop_size))
elif not isinstance(crop_size, int):
raise TypeError(
"Type of crop_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(crop_size)))
self.crop_size = crop_size
self.im_padding_value = im_padding_value
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if isinstance(self.crop_size, int):
crop_width = self.crop_size
crop_height = self.crop_size
else:
crop_width = self.crop_size[0]
crop_height = self.crop_size[1]
img_height = im.shape[0]
img_width = im.shape[1]
if img_height == crop_height and img_width == crop_width:
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
else:
pad_height = max(crop_height - img_height, 0)
pad_width = max(crop_width - img_width, 0)
if (pad_height > 0 or pad_width > 0):
im = cv2.copyMakeBorder(
im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None:
label = cv2.copyMakeBorder(
label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
img_height = im.shape[0]
img_width = im.shape[1]
if crop_height > 0 and crop_width > 0:
h_off = np.random.randint(img_height - crop_height + 1)
w_off = np.random.randint(img_width - crop_width + 1)
im = im[h_off:(crop_height + h_off), w_off:(w_off + crop_width
), :]
if label is not None:
label = label[h_off:(crop_height + h_off), w_off:(
w_off + crop_width)]
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomBlur(SegTransform):
"""以一定的概率对图像进行高斯模糊。
Args:
prob (float): 图像模糊概率。默认为0.1。
"""
def __init__(self, prob=0.1):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.prob <= 0:
n = 0
elif self.prob >= 1:
n = 1
else:
n = int(1.0 / self.prob)
if n > 0:
if np.random.randint(0, n) == 0:
radius = np.random.randint(3, 10)
if radius % 2 != 1:
radius = radius + 1
if radius > 9:
radius = 9
im = cv2.GaussianBlur(im, (radius, radius), 0, 0)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomScaleAspect(SegTransform):
"""裁剪并resize回原始尺寸的图像和标注图像。
按照一定的面积比和宽高比对图像进行裁剪,并reszie回原始图像的图像,当存在标注图时,同步进行。
Args:
min_scale (float):裁取图像占原始图像的面积比,取值[0,1],为0时则返回原图。默认为0.5。
aspect_ratio (float): 裁取图像的宽高比范围,非负值,为0时返回原图。默认为0.33。
"""
def __init__(self, min_scale=0.5, aspect_ratio=0.33):
self.min_scale = min_scale
self.aspect_ratio = aspect_ratio
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_scale != 0 and self.aspect_ratio != 0:
img_height = im.shape[0]
img_width = im.shape[1]
for i in range(0, 10):
area = img_height * img_width
target_area = area * np.random.uniform(self.min_scale, 1.0)
aspectRatio = np.random.uniform(self.aspect_ratio,
1.0 / self.aspect_ratio)
dw = int(np.sqrt(target_area * 1.0 * aspectRatio))
dh = int(np.sqrt(target_area * 1.0 / aspectRatio))
if (np.random.randint(10) < 5):
tmp = dw
dw = dh
dh = tmp
if (dh < img_height and dw < img_width):
h1 = np.random.randint(0, img_height - dh)
w1 = np.random.randint(0, img_width - dw)
im = im[h1:(h1 + dh), w1:(w1 + dw), :]
label = label[h1:(h1 + dh), w1:(w1 + dw)]
im = cv2.resize(
im, (img_width, img_height),
interpolation=cv2.INTER_LINEAR)
label = cv2.resize(
label, (img_width, img_height),
interpolation=cv2.INTER_NEAREST)
break
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomDistort(SegTransform):
"""对图像进行随机失真。
1. 对变换的操作顺序进行随机化操作。
2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。
Args:
brightness_range (float): 明亮度因子的范围。默认为0.5。
brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
contrast_range (float): 对比度因子的范围。默认为0.5。
contrast_prob (float): 随机调整对比度的概率。默认为0.5。
saturation_range (float): 饱和度因子的范围。默认为0.5。
saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
hue_range (int): 色调因子的范围。默认为18。
hue_prob (float): 随机调整色调的概率。默认为0.5。
"""
def __init__(self,
brightness_range=0.5,
brightness_prob=0.5,
contrast_range=0.5,
contrast_prob=0.5,
saturation_range=0.5,
saturation_prob=0.5,
hue_range=18,
hue_prob=0.5):
self.brightness_range = brightness_range
self.brightness_prob = brightness_prob
self.contrast_range = contrast_range
self.contrast_prob = contrast_prob
self.saturation_range = saturation_range
self.saturation_prob = saturation_prob
self.hue_range = hue_range
self.hue_prob = hue_prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
brightness_lower = 1 - self.brightness_range
brightness_upper = 1 + self.brightness_range
contrast_lower = 1 - self.contrast_range
contrast_upper = 1 + self.contrast_range
saturation_lower = 1 - self.saturation_range
saturation_upper = 1 + self.saturation_range
hue_lower = -self.hue_range
hue_upper = self.hue_range
ops = [brightness, contrast, saturation, hue]
random.shuffle(ops)
params_dict = {
'brightness': {
'brightness_lower': brightness_lower,
'brightness_upper': brightness_upper
},
'contrast': {
'contrast_lower': contrast_lower,
'contrast_upper': contrast_upper
},
'saturation': {
'saturation_lower': saturation_lower,
'saturation_upper': saturation_upper
},
'hue': {
'hue_lower': hue_lower,
'hue_upper': hue_upper
}
}
prob_dict = {
'brightness': self.brightness_prob,
'contrast': self.contrast_prob,
'saturation': self.saturation_prob,
'hue': self.hue_prob
}
for id in range(4):
params = params_dict[ops[id].__name__]
prob = prob_dict[ops[id].__name__]
params['im'] = im
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ArrangeSegmenter(SegTransform):
"""获取训练/验证/预测所需的信息。
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内
"""
def __init__(self, mode):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode should be defined as one of ['train', 'eval', 'test', 'quant']!"
)
self.mode = mode
def __call__(self, im, im_info, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当mode为'train'或'eval'时,返回的tuple为(im, label),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当mode为'test'时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;当mode为
'quant'时,返回的tuple为(im,),为图像np.ndarray数据。
"""
im = permute(im, False)
if self.mode == 'train' or self.mode == 'eval':
label = label[np.newaxis, :, :]
return (im, label)
elif self.mode == 'test':
return (im, im_info)
else:
return (im, )
class ComposedSegTransforms(Compose):
""" 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下
训练阶段:
1. 随机对图像以0.5的概率水平翻转
2. 按不同的比例随机Resize原图
3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
4. 图像归一化
预测阶段:
1. 图像归一化
Args:
mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
train_crop_size(list): 模型训练阶段,随机从原图crop的大小
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
train_crop_size=[769, 769],
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]):
if mode == 'train':
# 训练时的transforms,包含数据增强
pass
else:
# 验证/预测时的transforms
transforms = [Normalize(mean=mean, std=std)]
super(ComposedSegTransforms, self).__init__(transforms)
# Paddle-Lite预编译库的路径
LITE_DIR=/path/to/Paddle-Lite/inference/lib
# gflags预编译库的路径
GFLAGS_DIR=$(pwd)/deps/gflags
# glog预编译库的路径
GLOG_DIR=$(pwd)/deps/glog
# opencv预编译库的路径, 如果使用自带预编译版本可不修改
OPENCV_DIR=$(pwd)/deps/opencv
# 下载自带预编译版本
exec $(pwd)/scripts/install_third-party.sh
rm -rf build
mkdir -p build
cd build
cmake .. \
-DOPENCV_DIR=${OPENCV_DIR} \
-DGFLAGS_DIR=${GFLAGS_DIR} \
-DLITE_DIR=${LITE_DIR} \
-DCMAKE_CXX_FLAGS="-march=armv7-a"
make
# download third-part lib
if [ ! -d "./deps" ]; then
mkdir deps
fi
if [ ! -d "./deps/gflag" ]; then
cd deps
git clone https://github.com/gflags/gflags
cd gflags
cmake .
make -j 4
cd ..
cd ..
fi
if [ ! -d "./deps/glog" ]; then
cd deps
git clone https://github.com/google/glog
sudo apt-get install autoconf automake libtool
cd glog
./autogen.sh
./configure
make -j 4
cd ..
cd ..
fi
OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/armopencv/opencv.tar.bz2
if [ ! -d "./deps/opencv" ]; then
cd deps
wget -c ${OPENCV_URL}
tar xvfj opencv.tar.bz2
rm -rf opencv.tar.bz2
cd ..
fi
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/paddlex/paddlex.h"
#include <iostream>
#include <fstream>
namespace PaddleX {
void Model::create_predictor(const std::string& model_dir,
const std::string& cfg_file,
int thread_num) {
paddle::lite_api::MobileConfig config;
config.set_model_from_file(model_dir);
config.set_threads(thread_num);
load_config(cfg_file);
predictor_ =
paddle::lite_api::CreatePaddlePredictor<paddle::lite_api::MobileConfig>(
config);
}
bool Model::load_config(const std::string& cfg_file) {
YAML::Node config = YAML::LoadFile(cfg_file);
type = config["_Attributes"]["model_type"].as<std::string>();
name = config["Model"].as<std::string>();
bool to_rgb = true;
if (config["TransformsMode"].IsDefined()) {
std::string mode = config["TransformsMode"].as<std::string>();
if (mode == "BGR") {
to_rgb = false;
} else if (mode != "RGB") {
std::cerr << "[Init] Only 'RGB' or 'BGR' is supported for TransformsMode"
<< std::endl;
return false;
}
}
// init preprocess ops
transforms_.Init(config["Transforms"], to_rgb);
// read label list
for (const auto& item : config["_Attributes"]["labels"]) {
int index = labels.size();
labels[index] = item.as<std::string>();
}
return true;
}
bool Model::preprocess(cv::Mat* input_im, ImageBlob* inputs) {
if (!transforms_.Run(input_im, inputs)) {
return false;
}
return true;
}
bool Model::predict(const cv::Mat& im, ClsResult* result) {
inputs_.clear();
if (type == "detector") {
std::cerr << "Loading model is a 'detector', DetResult should be passed to "
"function predict()!"
<< std::endl;
return false;
} else if (type == "segmenter") {
std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
"to function predict()!"
<< std::endl;
return false;
}
// preprocess
inputs_.input_tensor_ = std::move(predictor_->GetInput(0));
cv::Mat im_clone = im.clone();
if (!preprocess(&im_clone, &inputs_)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
// predict
predictor_->Run();
std::unique_ptr<const paddle::lite_api::Tensor> output_tensor(
std::move(predictor_->GetOutput(0)));
const float *outputs_data = output_tensor->mutable_data<float>();
// postprocess
auto ptr = std::max_element(outputs_data, outputs_data+sizeof(outputs_data));
result->category_id = std::distance(outputs_data, ptr);
result->score = *ptr;
result->category = labels[result->category_id];
}
bool Model::predict(const cv::Mat& im, DetResult* result) {
inputs_.clear();
result->clear();
if (type == "classifier") {
std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
"to function predict()!" << std::endl;
return false;
} else if (type == "segmenter") {
std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
"to function predict()!" << std::endl;
return false;
}
inputs_.input_tensor_ = std::move(predictor_->GetInput(0));
cv::Mat im_clone = im.clone();
if (!preprocess(&im_clone, &inputs_)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
int h = inputs_.new_im_size_[0];
int w = inputs_.new_im_size_[1];
if (name == "YOLOv3") {
std::unique_ptr<paddle::lite_api::Tensor> im_size_tensor(
std::move(predictor_->GetInput(1)));
const std::vector<int64_t> IM_SIZE_SHAPE = {1, 2};
im_size_tensor->Resize(IM_SIZE_SHAPE);
auto *im_size_data = im_size_tensor->mutable_data<int>();
memcpy(im_size_data, inputs_.ori_im_size_.data(), 1*2*sizeof(int));
}
predictor_->Run();
auto output_names = predictor_->GetOutputNames();
auto output_box_tensor = predictor_->GetTensor(output_names[0]);
const float *output_box = output_box_tensor->mutable_data<float>();
std::vector<int64_t> output_box_shape = output_box_tensor->shape();
int size = 1;
for (const auto& i : output_box_shape) {
size *= i;
}
int num_boxes = size / 6;
for (int i = 0; i < num_boxes; ++i) {
Box box;
box.category_id = static_cast<int>(round(output_box[i * 6]));
box.category = labels[box.category_id];
box.score = output_box[i * 6 + 1];
float xmin = output_box[i * 6 + 2];
float ymin = output_box[i * 6 + 3];
float xmax = output_box[i * 6 + 4];
float ymax = output_box[i * 6 + 5];
float w = xmax - xmin + 1;
float h = ymax - ymin + 1;
box.coordinate = {xmin, ymin, w, h};
result->boxes.push_back(std::move(box));
}
return true;
}
bool Model::predict(const cv::Mat& im, SegResult* result) {
result->clear();
inputs_.clear();
if (type == "classifier") {
std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
"to function predict()!" << std::endl;
return false;
} else if (type == "detector") {
std::cerr << "Loading model is a 'detector', DetResult should be passed to "
"function predict()!" << std::endl;
return false;
}
inputs_.input_tensor_ = std::move(predictor_->GetInput(0));
cv::Mat im_clone = im.clone();
if (!preprocess(&im_clone, &inputs_)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
std::cout << "Preprocess is done" << std::endl;
predictor_->Run();
auto output_names = predictor_->GetOutputNames();
auto output_label_tensor = predictor_->GetTensor(output_names[0]);
const int64_t *label_data = output_label_tensor->mutable_data<int64_t>();
std::vector<int64_t> output_label_shape = output_label_tensor->shape();
int size = 1;
for (const auto& i : output_label_shape) {
size *= i;
result->label_map.shape.push_back(i);
}
result->label_map.data.resize(size);
memcpy(result->label_map.data.data(), label_data, size*sizeof(int64_t));
auto output_score_tensor = predictor_->GetTensor(output_names[1]);
const float *score_data = output_score_tensor->mutable_data<float>();
std::vector<int64_t> output_score_shape = output_score_tensor->shape();
size = 1;
for (const auto& i : output_score_shape) {
size *= i;
result->score_map.shape.push_back(i);
}
result->score_map.data.resize(size);
memcpy(result->score_map.data.data(), score_data, size*sizeof(float));
std::vector<uint8_t> label_map(result->label_map.data.begin(),
result->label_map.data.end());
cv::Mat mask_label(result->label_map.shape[1],
result->label_map.shape[2],
CV_8UC1,
label_map.data());
cv::Mat mask_score(result->score_map.shape[2],
result->score_map.shape[3],
CV_32FC1,
result->score_map.data.data());
int idx = 1;
int len_postprocess = inputs_.im_size_before_resize_.size();
for (std::vector<std::string>::reverse_iterator iter =
inputs_.reshape_order_.rbegin();
iter != inputs_.reshape_order_.rend();
++iter) {
if (*iter == "padding") {
auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
inputs_.im_size_before_resize_.pop_back();
auto padding_w = before_shape[0];
auto padding_h = before_shape[1];
mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w));
mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w));
} else if (*iter == "resize") {
auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
inputs_.im_size_before_resize_.pop_back();
auto resize_w = before_shape[0];
auto resize_h = before_shape[1];
cv::resize(mask_label,
mask_label,
cv::Size(resize_h, resize_w),
0,
0,
cv::INTER_NEAREST);
cv::resize(mask_score,
mask_score,
cv::Size(resize_h, resize_w),
0,
0,
cv::INTER_LINEAR);
}
++idx;
}
result->label_map.data.assign(mask_label.begin<uint8_t>(),
mask_label.end<uint8_t>());
result->label_map.shape = {mask_label.rows, mask_label.cols};
result->score_map.data.assign(mask_score.begin<float>(),
mask_score.end<float>());
result->score_map.shape = {mask_score.rows, mask_score.cols};
return true;
}
} // namespace PaddleX
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/paddlex/transforms.h"
#include <math.h>
#include <iostream>
#include <string>
#include <vector>
namespace PaddleX {
std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
{"NEAREST", cv::INTER_NEAREST},
{"AREA", cv::INTER_AREA},
{"CUBIC", cv::INTER_CUBIC},
{"LANCZOS4", cv::INTER_LANCZOS4}};
bool Normalize::Run(cv::Mat* im, ImageBlob* data) {
for (int h = 0; h < im->rows; h++) {
for (int w = 0; w < im->cols; w++) {
im->at<cv::Vec3f>(h, w)[0] =
(im->at<cv::Vec3f>(h, w)[0] / 255.0 - mean_[0]) / std_[0];
im->at<cv::Vec3f>(h, w)[1] =
(im->at<cv::Vec3f>(h, w)[1] / 255.0 - mean_[1]) / std_[1];
im->at<cv::Vec3f>(h, w)[2] =
(im->at<cv::Vec3f>(h, w)[2] / 255.0 - mean_[2]) / std_[2];
}
}
return true;
}
float ResizeByShort::GenerateScale(const cv::Mat& im) {
int origin_w = im.cols;
int origin_h = im.rows;
int im_size_max = std::max(origin_w, origin_h);
int im_size_min = std::min(origin_w, origin_h);
float scale =
static_cast<float>(short_size_) / static_cast<float>(im_size_min);
if (max_size_ > 0) {
if (round(scale * im_size_max) > max_size_) {
scale = static_cast<float>(max_size_) / static_cast<float>(im_size_max);
}
}
return scale;
}
bool ResizeByShort::Run(cv::Mat* im, ImageBlob* data) {
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("resize");
float scale = GenerateScale(*im);
int width = static_cast<int>(round(scale * im->cols));
int height = static_cast<int>(round(scale * im->rows));
cv::resize(*im, *im, cv::Size(width, height), 0, 0, cv::INTER_LINEAR);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
data->scale = scale;
return true;
}
bool CenterCrop::Run(cv::Mat* im, ImageBlob* data) {
int height = static_cast<int>(im->rows);
int width = static_cast<int>(im->cols);
if (height < height_ || width < width_) {
std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
return false;
}
int offset_x = static_cast<int>((width - width_) / 2);
int offset_y = static_cast<int>((height - height_) / 2);
cv::Rect crop_roi(offset_x, offset_y, width_, height_);
*im = (*im)(crop_roi);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
return true;
}
bool Padding::Run(cv::Mat* im, ImageBlob* data) {
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("padding");
int padding_w = 0;
int padding_h = 0;
if (width_ > 1 & height_ > 1) {
padding_w = width_ - im->cols;
padding_h = height_ - im->rows;
} else if (coarsest_stride_ >= 1) {
int h = im->rows;
int w = im->cols;
padding_h =
ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
padding_w =
ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
}
if (padding_h < 0 || padding_w < 0) {
std::cerr << "[Padding] Computed padding_h=" << padding_h
<< ", padding_w=" << padding_w
<< ", but they should be greater than 0." << std::endl;
return false;
}
cv::Scalar value = cv::Scalar(im_value_[0], im_value_[1], im_value_[2]);
cv::copyMakeBorder(
*im, *im, 0, padding_h, 0, padding_w, cv::BORDER_CONSTANT, value);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
return true;
}
bool ResizeByLong::Run(cv::Mat* im, ImageBlob* data) {
if (long_size_ <= 0) {
std::cerr << "[ResizeByLong] long_size should be greater than 0"
<< std::endl;
return false;
}
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("resize");
int origin_w = im->cols;
int origin_h = im->rows;
int im_size_max = std::max(origin_w, origin_h);
float scale =
static_cast<float>(long_size_) / static_cast<float>(im_size_max);
cv::resize(*im, *im, cv::Size(), scale, scale, cv::INTER_NEAREST);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
data->scale = scale;
return true;
}
bool Resize::Run(cv::Mat* im, ImageBlob* data) {
if (width_ <= 0 || height_ <= 0) {
std::cerr << "[Resize] width and height should be greater than 0"
<< std::endl;
return false;
}
if (interpolations.count(interp_) <= 0) {
std::cerr << "[Resize] Invalid interpolation method: '" << interp_ << "'"
<< std::endl;
return false;
}
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("resize");
cv::resize(
*im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
return true;
}
void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) {
transforms_.clear();
to_rgb_ = to_rgb;
for (const auto& item : transforms_node) {
std::string name = item.begin()->first.as<std::string>();
std::cout << "trans name: " << name << std::endl;
std::shared_ptr<Transform> transform = CreateTransform(name);
transform->Init(item.begin()->second);
transforms_.push_back(transform);
}
}
std::shared_ptr<Transform> Transforms::CreateTransform(
const std::string& transform_name) {
if (transform_name == "Normalize") {
return std::make_shared<Normalize>();
} else if (transform_name == "ResizeByShort") {
return std::make_shared<ResizeByShort>();
} else if (transform_name == "CenterCrop") {
return std::make_shared<CenterCrop>();
} else if (transform_name == "Resize") {
return std::make_shared<Resize>();
} else if (transform_name == "Padding") {
return std::make_shared<Padding>();
} else if (transform_name == "ResizeByLong") {
return std::make_shared<ResizeByLong>();
} else {
std::cerr << "There's unexpected transform(name='" << transform_name
<< "')." << std::endl;
exit(-1);
}
}
bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
// preprocess by order
if (to_rgb_) {
cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
}
(*im).convertTo(*im, CV_32FC3);
data->ori_im_size_[0] = im->rows;
data->ori_im_size_[1] = im->cols;
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
for (int i = 0; i < transforms_.size(); ++i) {
if (!transforms_[i]->Run(im, data)) {
std::cerr << "Apply transforms to image failed!" << std::endl;
return false;
}
}
// image format NHWC to NCHW
// img data save to ImageBlob
int height = im->rows;
int width = im->cols;
int channels = im->channels();
const std::vector<int64_t> INPUT_SHAPE = {1, channels, height, width};
data->input_tensor_->Resize(INPUT_SHAPE);
auto *input_data = data->input_tensor_->mutable_data<float>();
for (size_t c = 0; c < channels; c++) {
for (size_t h = 0; h < height; h++) {
for (size_t w = 0; w < width; w++) {
input_data[c * width * height + h * width + w] =
im->at<cv::Vec3f>(h, w)[c];
}
}
}
return true;
}
} // namespace PaddleX
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/paddlex/visualize.h"
namespace PaddleX {
std::vector<int> GenerateColorMap(int num_class) {
auto colormap = std::vector<int>(3 * num_class, 0);
for (int i = 0; i < num_class; ++i) {
int j = 0;
int lab = i;
while (lab) {
colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
++j;
lab >>= 3;
}
}
return colormap;
}
cv::Mat Visualize(const cv::Mat& img,
const DetResult& result,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap,
float threshold) {
cv::Mat vis_img = img.clone();
auto boxes = result.boxes;
for (int i = 0; i < boxes.size(); ++i) {
if (boxes[i].score < threshold) {
continue;
}
cv::Rect roi = cv::Rect(boxes[i].coordinate[0],
boxes[i].coordinate[1],
boxes[i].coordinate[2],
boxes[i].coordinate[3]);
// 生成预测框和标题
std::string text = boxes[i].category;
int c1 = colormap[3 * boxes[i].category_id + 0];
int c2 = colormap[3 * boxes[i].category_id + 1];
int c3 = colormap[3 * boxes[i].category_id + 2];
cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
text += std::to_string(static_cast<int>(boxes[i].score * 100)) + "%";
int font_face = cv::FONT_HERSHEY_SIMPLEX;
double font_scale = 0.5f;
float thickness = 0.5;
cv::Size text_size =
cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
cv::Point origin;
origin.x = roi.x;
origin.y = roi.y;
// 生成预测框标题的背景
cv::Rect text_back = cv::Rect(boxes[i].coordinate[0],
boxes[i].coordinate[1] - text_size.height,
text_size.width,
text_size.height);
// 绘图和文字
cv::rectangle(vis_img, roi, roi_color, 2);
cv::rectangle(vis_img, text_back, roi_color, -1);
cv::putText(vis_img,
text,
origin,
font_face,
font_scale,
cv::Scalar(255, 255, 255),
thickness);
// 生成实例分割mask
if (boxes[i].mask.data.size() == 0) {
continue;
}
cv::Mat bin_mask(result.mask_resolution,
result.mask_resolution,
CV_32FC1,
boxes[i].mask.data.data());
cv::resize(bin_mask,
bin_mask,
cv::Size(boxes[i].mask.shape[0], boxes[i].mask.shape[1]));
cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1);
bin_mask.copyTo(full_mask(roi));
cv::Mat mask_ch[3];
mask_ch[0] = full_mask * c1;
mask_ch[1] = full_mask * c2;
mask_ch[2] = full_mask * c3;
cv::Mat mask;
cv::merge(mask_ch, 3, mask);
cv::addWeighted(vis_img, 1, mask, 0.5, 0, vis_img);
}
return vis_img;
}
cv::Mat Visualize(const cv::Mat& img,
const SegResult& result,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap) {
std::vector<uint8_t> label_map(result.label_map.data.begin(),
result.label_map.data.end());
cv::Mat mask(result.label_map.shape[0],
result.label_map.shape[1],
CV_8UC1,
label_map.data());
cv::Mat color_mask = cv::Mat::zeros(
result.label_map.shape[0], result.label_map.shape[1], CV_8UC3);
int rows = img.rows;
int cols = img.cols;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
int category_id = static_cast<int>(mask.at<uchar>(i, j));
color_mask.at<cv::Vec3b>(i, j)[0] = colormap[3 * category_id + 0];
color_mask.at<cv::Vec3b>(i, j)[1] = colormap[3 * category_id + 1];
color_mask.at<cv::Vec3b>(i, j)[2] = colormap[3 * category_id + 2];
}
}
return color_mask;
}
std::string generate_save_path(const std::string& save_dir,
const std::string& file_path) {
if (access(save_dir.c_str(), 0) < 0) {
#ifdef _WIN32
mkdir(save_dir.c_str());
#else
if (mkdir(save_dir.c_str(), S_IRWXU) < 0) {
std::cerr << "Fail to create " << save_dir << "directory." << std::endl;
}
#endif
}
int pos = file_path.find_last_of(OS_PATH_SEP);
std::string image_name(file_path.substr(pos + 1));
return save_dir + OS_PATH_SEP + image_name;
}
} // namespace PaddleX
......@@ -45,7 +45,7 @@ predict(image, topk=1)
### batch_predict 接口
```
batch_predict(image_list, topk=1, thread_num=2)
batch_predict(image_list, topk=1)
```
批量图片预测接口。
......@@ -53,4 +53,3 @@ batch_predict(image_list, topk=1, thread_num=2)
>
> > * **image_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径或numpy数组(HWC排列,BGR格式)。
> > * **topk** (int): 图像分类时使用的参数,表示预测前topk个可能的分类。
> > * **thread_num** (int): 并发执行各图像预处理时的线程数。
......@@ -62,7 +62,7 @@ evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False)
### predict
```python
predict(self, img_file, transforms=None, topk=5)
predict(self, img_file, transforms=None, topk=1)
```
> 分类模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`ResNet50.test_transforms`和`ResNet50.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
......@@ -81,7 +81,7 @@ predict(self, img_file, transforms=None, topk=5)
### batch_predict
```python
batch_predict(self, img_file_list, transforms=None, topk=5, thread_num=2)
batch_predict(self, img_file_list, transforms=None, topk=1)
```
> 分类模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`ResNet50.test_transforms`和`ResNet50.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义test_transforms传入给`batch_predict`接口。
......@@ -91,7 +91,6 @@ batch_predict(self, img_file_list, transforms=None, topk=5, thread_num=2)
> > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径或numpy数组(HWC排列,BGR格式)。
> > - **transforms** (paddlex.cls.transforms): 数据预处理操作。
> > - **topk** (int): 预测时前k个最大值。
> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
> **返回值**
>
......
......@@ -108,7 +108,7 @@ predict(self, img_file, transforms=None)
### batch_predict
```python
batch_predict(self, img_file_list, transforms=None, thread_num=2)
batch_predict(self, img_file_list, transforms=None)
```
> PPYOLO模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`YOLOv3.test_transforms`和`YOLOv3.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义`test_transforms`传入给`batch_predict`接口
......@@ -117,7 +117,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2)
>
> > - **img_file_list** (str|np.ndarray): 对列表(或元组)中的图像同时进行预测,列表中的元素是预测图像路径或numpy数组(HWC排列,BGR格式)。
> > - **transforms** (paddlex.det.transforms): 数据预处理操作。
> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
>
> **返回值**
>
......@@ -222,7 +221,7 @@ predict(self, img_file, transforms=None)
### batch_predict
```python
batch_predict(self, img_file_list, transforms=None, thread_num=2)
batch_predict(self, img_file_list, transforms=None)
```
> YOLOv3模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`YOLOv3.test_transforms`和`YOLOv3.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义`test_transforms`传入给`batch_predict`接口
......@@ -231,7 +230,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2)
>
> > - **img_file_list** (str|np.ndarray): 对列表(或元组)中的图像同时进行预测,列表中的元素是预测图像路径或numpy数组(HWC排列,BGR格式)。
> > - **transforms** (paddlex.det.transforms): 数据预处理操作。
> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
>
> **返回值**
>
......@@ -327,7 +325,7 @@ predict(self, img_file, transforms=None)
### batch_predict
```python
batch_predict(self, img_file_list, transforms=None, thread_num=2)
batch_predict(self, img_file_list, transforms=None)
```
> FasterRCNN模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`FasterRCNN.test_transforms`和`FasterRCNN.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义test_transforms传入给`batch_predict`接口。
......@@ -336,7 +334,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2)
>
> > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素是预测图像路径或numpy数组(HWC排列,BGR格式)。
> > - **transforms** (paddlex.det.transforms): 数据预处理操作。
> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
>
> **返回值**
>
......
......@@ -88,7 +88,7 @@ predict(self, img_file, transforms=None)
#### batch_predict
```python
batch_predict(self, img_file_list, transforms=None, thread_num=2)
batch_predict(self, img_file_list, transforms=None)
```
> MaskRCNN模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`FasterRCNN.test_transforms`和`FasterRCNN.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义test_transforms传入给`batch_predict`接口。
......@@ -97,7 +97,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2)
>
> > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是预测图像路径或numpy数组(HWC排列,BGR格式)。
> > - **transforms** (paddlex.det.transforms): 数据预处理操作。
> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
>
> **返回值**
>
......
......@@ -95,7 +95,7 @@ predict(self, img_file, transforms=None):
### batch_predict
```
batch_predict(self, img_file_list, transforms=None, thread_num=2):
batch_predict(self, img_file_list, transforms=None):
```
> DeepLabv3p模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`DeepLabv3p.test_transforms`和`DeepLabv3p.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义test_transforms传入给`batch_predict`接口。
......@@ -104,7 +104,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2):
> >
> > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是预测图像路径或numpy数组(HWC排列,BGR格式)。
> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
> **返回值**
> >
......
# OpenVINO模型转换
将Paddle模型转换为OpenVINO的Inference Engine
## 环境依赖
* ONNX 1.5.0+
* PaddleX 1.0+
* OpenVINO 2020.4
**说明**:PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html) , OpenVINO安装请参考[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html),ONNX请安装1.5.0以上版本否则会出现转模型错误。
请确保系统已经安装好上述基本软件,**下面所有示例以工作目录 `/root/projects/`演示**
## 导出inference模型
paddle模型转openvino之前需要先把paddle模型导出为inference格式模型,导出的模型将包括__model__、__params__和model.yml三个文件名,导出命令如下
```
paddlex --export_inference --model_dir=/path/to/paddle_model --save_dir=./inference_model --fixed_input_shape=[w,h]
```
## 导出OpenVINO模型
```
cd /root/projects/python
python convertor.py --model_dir /path/to/inference_model --save_dir /path/to/openvino_model --fixed_input_shape [w,h]
```
**转换成功后会在save_dir下出现后缀名为.xml、.bin、.mapping三个文件**
转换参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
| --model_dir | Paddle模型路径,请确保__model__, \_\_params__model.yml在同一个目录|
| --save_dir | OpenVINO模型保存路径 |
| --fixed_input_shape | 模型输入的[W,H] |
| --data type(option) | FP32、FP16,默认为FP32,VPU下的IR需要为FP16 |
**注意**
- 由于OpenVINO不支持ONNX的resize-11 OP的原因,目前还不支持Paddle的分割模型
- YOLOv3在通过OpenVINO部署时,由于OpenVINO对ONNX OP的支持限制,我们在将YOLOv3的Paddle模型导出时,对最后一层multiclass_nms进行了特殊处理,导出的ONNX模型,最终输出的Box结果包括背景类别(而Paddle模型不包含),此处在OpenVINO的部署代码中,我们通过后处理过滤了背景类别。
......@@ -6,6 +6,8 @@ OpenVINO部署
:maxdepth: 2
:caption: 文档目录:
introduction.md
windows.md
linux.md
intel_movidius.md
python.md
export_openvino_model.md
# OpenVINO部署简介
PaddleX支持将训练好的Paddle模型通过OpenVINO实现模型的预测加速,OpenVINO详细资料与安装流程请参考[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html)
## 部署支持情况
下表提供了PaddleX在不同环境下对使用OpenVINO加速的支持情况
|硬件平台|Linux|Windows|Raspbian OS|c++|python |分类|检测|分割|
| ----| ---- | ---- | ----| ---- | ---- |---- | ---- |---- |
|CPU|支持|支持|不支持|支持|支持|支持|支持|不支持|
|VPU|支持|支持|支持|支持|支持|支持|不支持|不支持|
**注意**:其中Raspbian OS为树莓派操作系统。检测模型仅支持YOLOV3,由于OpenVINO不支持ONNX的resize-11 OP的原因,目前还不支持Paddle的分割模型
## 部署流程
**PaddleX到OpenVINO的部署流程可以分为如下两步**
* **模型转换**:将Paddle的模型转换为OpenVINO的Inference Engine
* **预测部署**:使用Inference Engine进行预测
## 模型转换
**模型转换请参考文档[模型转换](./export_openvino_model.md)**
**说明**:由于不同软硬件平台下OpenVINO模型转换方法一致,故如何转换模型后续文档中不再赘述。
## 预测部署
由于不同软硬下部署OpenVINO实现预测的方式不完全一致,具体请参考:
**[Linux](./linux.md)**:介绍了PaddleX在操作系统为Linux或者Raspbian OS,编程语言为C++,硬件平台为
CPU或者VPU的情况下使用OpenVINO进行预测加速
**[Windows](./windows.md)**:介绍了PaddleX在操作系统为Window,编程语言为C++,硬件平台为CPU或者VPU的情况下使用OpenVINO进行预测加速
**[Python](./python.md)**:介绍了PaddleX在python下使用OpenVINO进行预测加速
\ No newline at end of file
# Linux平台
## 前置条件
* OS: Ubuntu、Raspbian OS
* GCC* 5.4.0
* CMake 3.0+
* PaddleX 1.0+
* OpenVINO 2020.4
* 硬件平台:CPU、VPU
**说明**:PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html) , OpenVINO安装请根据相应的系统参考[OpenVINO-Linux](https://docs.openvinotoolkit.org/latest/_docs_install_guides_installing_openvino_linux.html)或者[OpenVINO-Raspbian](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_raspbian.html)
请确保系统已经安装好上述基本软件,并配置好相应环境,**下面所有示例以工作目录 `/root/projects/`演示**
## 预测部署
文档提供了c++下预测部署的方法,如果需要在python下预测部署请参考[python预测部署](./python.md)
### Step1 下载PaddleX预测代码
```
mkdir -p /root/projects
cd /root/projects
git clone https://github.com/PaddlePaddle/PaddleX.git
```
**说明**:其中C++预测代码在PaddleX/deploy/openvino 目录,该目录不依赖任何PaddleX下其他目录。
### Step2 软件依赖
提供了依赖软件预编包或者一键编译,用户不需要单独下载或编译第三方依赖软件。若需要自行编译第三方依赖软件请参考:
- gflags:编译请参考 [编译文档](https://gflags.github.io/gflags/#download)
- glog:编译请参考[编译文档](https://github.com/google/glog)
- opencv: 编译请参考
[编译文档](https://docs.opencv.org/master/d7/d9f/tutorial_linux_install.html)
### Step3: 编译
编译`cmake`的命令在`scripts/build.sh`中,若在树莓派(Raspbian OS)上编译请修改ARCH参数x86为armv7,若自行编译第三方依赖软件请根据Step1中编译软件的实际情况修改主要参数,其主要内容说明如下:
```
# openvino预编译库的路径
OPENVINO_DIR=$INTEL_OPENVINO_DIR/inference_engine
# gflags预编译库的路径
GFLAGS_DIR=$(pwd)/deps/gflags
# glog预编译库的路径
GLOG_DIR=$(pwd)/deps/glog
# ngraph lib预编译库的路径
NGRAPH_LIB=$INTEL_OPENVINO_DIR/deployment_tools/ngraph/lib
# opencv预编译库的路径
OPENCV_DIR=$(pwd)/deps/opencv/
#cpu架构(x86或armv7)
ARCH=x86
```
执行`build`脚本:
```shell
sh ./scripts/build.sh
```
### Step4: 预测
编译成功后,分类任务的预测可执行程序为`classifier`,检测任务的预测可执行程序为`detector`,其主要命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
| --model_dir | 模型转换生成的.xml文件路径,请保证模型转换生成的三个文件在同一路径下|
| --image | 要预测的图片文件路径 |
| --image_list | 按行存储图片路径的.txt文件 |
| --device | 运行的平台,可选项{"CPU","MYRIAD"},默认值为"CPU",如在VPU上请使用"MYRIAD"|
| --cfg_file | PaddleX model 的.yml配置文件 |
| --save_dir | 可视化结果图片保存地址,仅适用于检测任务,默认值为" "既不保存可视化结果 |
### 样例
`样例一`
linux系统在CPU下做单张图片的分类任务预测
测试图片 `/path/to/test_img.jpeg`
```shell
./build/classifier --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml
```
`样例二`:
linux系统在CPU下做多张图片的检测任务预测,并保存预测可视化结果
预测的多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下:
```
/path/to/images/test_img1.jpeg
/path/to/images/test_img2.jpeg
...
/path/to/images/test_imgn.jpeg
```
```shell
./build/detector --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --save_dir ./output
```
`样例三`:
树莓派(Raspbian OS)在VPU下做单张图片分类任务预测
测试图片 `/path/to/test_img.jpeg`
```shell
./build/classifier --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml --device=MYRIAD
```
## 性能测试
`测试一`
在服务器CPU下测试了OpenVINO对PaddleX部署的加速性能:
- CPU:Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz
- OpenVINO: 2020.4
- PaddleX:采用Paddle预测库(1.8),打开mkldnn加速,打开多线程。
- 模型来自PaddleX tutorials,Batch Size均为1,耗时单位为ms/image,只计算模型运行时间,不包括数据的预处理和后处理,20张图片warmup,100张图片测试性能。
|模型| PaddleX| OpenVINO | 图片输入大小|
|---|---|---|---|
|resnet-50 | 20.56 | 16.12 | 224*224 |
|mobilenet-V2 | 5.16 | 2.31 |224*224|
|yolov3-mobilnetv1 |76.63| 46.26|608*608 |
`测试二`:
在PC机上插入VPU架构的神经计算棒(NCS2),通过Openvino加速。
- CPU:Intel(R) Core(TM) i5-4300U 1.90GHz
- VPU:Movidius Neural Compute Stick2
- OpenVINO: 2020.4
- 模型来自PaddleX tutorials,Batch Size均为1,耗时单位为ms/image,只计算模型运行时间,不包括数据的预处理和后处理,20张图片warmup,100张图片测试性能。
|模型|OpenVINO|输入图片|
|---|---|---|
|mobilenetV2|24.00|224*224|
|resnet50_vd_ssld|58.53|224*224|
`测试三`:
在树莓派3B上插入VPU架构的神经计算棒(NCS2),通过Openvino加速。
- CPU :ARM Cortex-A72 1.2GHz 64bit
- VPU:Movidius Neural Compute Stick2
- OpenVINO 2020.4
- 模型来自paddleX tutorials,Batch Size均为1,耗时单位为ms/image,只计算模型运行时间,不包括数据的预处理和后处理,20张图片warmup,100张图片测试性能。
|模型|OpenVINO|输入图片大小|
|---|---|---|
|mobilenetV2|43.15|224*224|
|resnet50|82.66|224*224|
# Python预测部署
文档说明了在python下基于OpenVINO的预测部署,部署前需要先将paddle模型转换为OpenVINO的Inference Engine,请参考[模型转换](docs/deploy/openvino/export_openvino_model.md)。目前CPU硬件上支持PadlleX的分类、检测、分割模型;VPU上支持PaddleX的分类模型。
## 前置条件
* Python 3.6+
* OpenVINO 2020.4
**说明**:OpenVINO安装请参考[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html)
请确保系统已经安装好上述基本软件,**下面所有示例以工作目录 `/root/projects/`演示**
## 预测部署
运行/root/projects/PaddleX/deploy/openvino/python目录下demo.py文件可以进行预测,其命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
| --model_dir | 模型转换生成的.xml文件路径,请保证模型转换生成的三个文件在同一路径下|
| --img | 要预测的图片文件路径 |
| --image_list | 按行存储图片路径的.txt文件 |
| --device | 运行的平台, 默认值为"CPU" |
| --cfg_file | PaddleX model 的.yml配置文件 |
### 样例
`样例一`
测试图片 `/path/to/test_img.jpeg`
```
cd /root/projects/python
python demo.py --model_dir /path/to/openvino_model --img /path/to/test_img.jpeg --cfg_file /path/to/PadlleX_model.yml
```
样例二`:
预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下:
```
/path/to/images/test_img1.jpeg
/path/to/images/test_img2.jpeg
...
/path/to/images/test_imgn.jpeg
```
```
cd /root/projects/python
python demo.py --model_dir /path/to/models/openvino_model --image_list /root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml
```
# Windows平台
## 说明
Windows 平台下,我们使用`Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目,但是直到`2019`才提供了稳定和完全的支持,所以如果你想使用CMake管理项目编译构建,我们推荐你使用`Visual Studio 2019`环境下构建。
## 前置条件
* Visual Studio 2019
* OpenVINO 2020.4
* CMake 3.0+
**说明**:PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html) , OpenVINO安装请参考[OpenVINO-Windows](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_windows.html)
**注意**:安装完OpenVINO后需要手动添加OpenVINO目录到系统环境变量,否则在运行程序时会出现找不到dll的情况。以安装OpenVINO时不改变OpenVINO安装目录情况下为示例,流程如下
- 我的电脑->属性->高级系统设置->环境变量
- 在系统变量中找到Path(如没有,自行创建),并双击编辑
- 新建,分别将OpenVINO以下路径填入并保存:
`C:\Program File (x86)\IntelSWTools\openvino\inference_engine\bin\intel64\Release`
`C:\Program File (x86)\IntelSWTools\openvino\inference_engine\external\tbb\bin`
`C:\Program File (x86)\IntelSWTools\openvino\deployment_tools\ngraph\lib`
请确保系统已经安装好上述基本软件,并配置好相应环境,**下面所有示例以工作目录为 `D:\projects`演示。**
## 预测部署
文档提供了c++下预测部署的方法,如果需要在python下预测部署请参考[python预测部署](./python.md)
### Step1: 下载PaddleX预测代码
```shell
d:
mkdir projects
cd projects
git clone https://github.com/PaddlePaddle/PaddleX.git
```
**说明**:其中`C++`预测代码在`PaddleX\deploy\openvino` 目录,该目录不依赖任何`PaddleX`下其他目录。
### Step2 软件依赖
提供了依赖软件预编译库:
- [gflas-glog](https://bj.bcebos.com/paddlex/deploy/windows/third-parts.zip)
- [opencv](https://bj.bcebos.com/paddleseg/deploy/opencv-3.4.6-vc14_vc15.exe)
请下载上面两个连接的预编译库。若需要自行下载请参考:
- gflags:[下载地址](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/gflags)
- glog:[编译文档](https://github.com/google/glog)
- opencv:[下载地址](https://opencv.org/releases/)
下载完opencv后需要配置环境变量,如下流程所示
- 我的电脑->属性->高级系统设置->环境变量
- 在系统变量中找到Path(如没有,自行创建),并双击编辑
- 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin`
### Step3: 使用Visual Studio 2019直接编译CMake
1. 打开Visual Studio 2019 Community,点击`继续但无需代码`
2. 点击: `文件`->`打开`->`CMake` 选择C++预测代码所在路径(例如`D:\projects\PaddleX\deploy\openvino`),并打开`CMakeList.txt`
3. 点击:`项目`->`CMake设置`
4. 点击`浏览`,分别设置编译选项指定`OpenVINO``Gflags``GLOG``NGRAPH``OPENCV`的路径
| 参数名 | 含义 |
| ---- | ---- |
| OPENCV_DIR | opencv库路径 |
| OPENVINO_DIR | OpenVINO推理库路径,在OpenVINO安装目录下的deployment/inference_engine目录,若未修改OpenVINO默认安装目录可以不用修改 |
| NGRAPH_LIB | OpenVINO的ngraph库路径,在OpenVINO安装目录下的deployment/ngraph/lib目录,若未修改OpenVINO默认安装目录可以不用修改 |
| GFLAGS_DIR | gflags库路径 |
| GLOG_DIR | glog库路径 |
| WITH_STATIC_LIB | 是否静态编译,默认为True |
**设置完成后**, 点击`保存并生成CMake缓存以加载变量`
5. 点击`生成`->`全部生成`
### Step5: 预测
上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
```
D:
cd D:\projects\PaddleX\deploy\openvino\out\build\x64-Release
```
* 编译成功后,图片预测demo的入口程序为`detector.exe``classifier.exe`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
| --model_dir | 模型转换生成的.xml文件路径,请保证模型转换生成的三个文件在同一路径下|
| --image | 要预测的图片文件路径 |
| --image_list | 按行存储图片路径的.txt文件 |
| --device | 运行的平台,可选项{"CPU","MYRIAD"},默认值为"CPU",如在VPU上请使用"MYRIAD"|
| --cfg_file | PaddleX model 的.yml配置文件 |
| --save_dir | 可视化结果图片保存地址,仅适用于检测任务,默认值为" "既不保存可视化结果 |
### 样例
`样例一`
在CPU下做单张图片的分类任务预测
测试图片 `/path/to/test_img.jpeg`
```shell
./classifier.exe --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml
```
`样例二`:
在CPU下做多张图片的检测任务预测,并保存预测可视化结果
预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下:
```
/path/to/images/test_img1.jpeg
/path/to/images/test_img2.jpeg
...
/path/to/images/test_imgn.jpeg
```
```shell
./detector.exe --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --save_dir ./output
```
`样例三`:
在VPU下做单张图片分类任务预测
测试图片 `/path/to/test_img.jpeg`
```shell
.classifier.exe --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml --device=MYRIAD
```
# 树莓派
PaddleX支持通过Paddle-Lite和基于OpenVINO的神经计算棒(NCS2)这两种方式在树莓派上完成预测部署。
## 硬件环境配置
对于尚未安装系统的树莓派首先需要进行系统安装、环境配置等步骤来初始化硬件环境,过程中需要的软硬件如下:
- 硬件:micro SD,显示器,键盘,鼠标
- 软件:Raspbian OS
### Step1:系统安装
- 格式化micro SD卡为FAT格式,Windows和Mac下建议使用[SD Memory Card Formatter](https://www.sdcard.org/downloads/formatter/)工具,Linux下请参考[NOOBS For Raspberry Pi](http://qdosmsq.dunbar-it.co.uk/blog/2013/06/noobs-for-raspberry-pi/)
- 下载NOOBS版本的Raspbian OS [下载地址](https://www.raspberrypi.org/downloads/)并将解压后的文件复制到SD中,插入SD后给树莓派通电,然后将自动安装系统
### Step2:环境配置
- 启用VNC和SSH服务:打开LX终端输入,输入如下命令,选择Interfacing Option然后选择P2 SSH 和 P3 VNC分别打开SSH与VNC。打开后就可以通过SSH或者VNC的方式连接树莓派
```
sudo raspi-config
```
- 更换源:由于树莓派官方源速度很慢,建议在官网查询国内源 [树莓派软件源](https://www.jianshu.com/p/67b9e6ebf8a0)。更换后执行
```
sudo apt-get update
sudo apt-get upgrade
```
## Paddle-Lite部署
基于Paddle-Lite的部署目前可以支持PaddleX的分类、分割与检测模型,其实检测模型仅支持YOLOV3
部署的流程包括:PaddleX模型转换与转换后的模型部署
**说明**:PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html),Paddle-Lite详细资料请参考[Paddle-Lite](https://paddle-lite.readthedocs.io/zh/latest/index.html)
请确保系统已经安装好上述基本软件,并配置好相应环境,**下面所有示例以工作目录 `/root/projects/`演示**
## Paddle-Lite模型转换
将PaddleX模型转换为Paddle-Lite模型,具体请参考[Paddle-Lite模型转换](./export_nb_model.md)
## Paddle-Lite 预测
### Step1 下载PaddleX预测代码
```
mkdir -p /root/projects
cd /root/projects
git clone https://github.com/PaddlePaddle/PaddleX.git
```
**说明**:其中C++预测代码在PaddleX/deploy/raspberry 目录,该目录不依赖任何PaddleX下其他目录,如果需要在python下预测部署请参考[Python预测部署](./python.md)
### Step2:Paddle-Lite预编译库下载
提供了下载的opt工具对应的Paddle-Lite在ArmLinux下面的预编译库:[Paddle-Lite(ArmLinux)预编译库](https://bj.bcebos.com/paddlex/deploy/lite/inference_lite_2.6.1_armlinux.tar.bz2)
建议用户使用预编译库,若需要自行编译,在树莓派上LX终端输入
```
git clone https://github.com/PaddlePaddle/Paddle-Lite.git
cd Paddle-Lite
sudo ./lite/tools/build.sh --arm_os=armlinux --arm_abi=armv7hf --arm_lang=gcc --build_extra=ON full_publish
```
预编库位置:`./build.lite.armlinux.armv7hf.gcc/inference_lite_lib.armlinux.armv7hf/cxx`
**注意**:预测库版本需要跟opt版本一致,更多Paddle-Lite编译内容请参考[Paddle-Lite编译](https://paddle-lite.readthedocs.io/zh/latest/user_guides/source_compile.html);更多预编译Paddle-Lite预测库请参考[Paddle-Lite Release Note](https://github.com/PaddlePaddle/Paddle-Lite/releases)
### Step3 软件依赖
提供了依赖软件的预编包或者一键编译,用户不需要单独下载或编译第三方依赖软件。若需要自行编译第三方依赖软件请参考:
- gflags:编译请参考 [编译文档](https://gflags.github.io/gflags/#download)
- glog:编译请参考[编译文档](https://github.com/google/glog)
- opencv: 编译请参考
[编译文档](https://docs.opencv.org/master/d7/d9f/tutorial_linux_install.html)
### Step4: 编译
编译`cmake`的命令在`scripts/build.sh`中,修改LITE_DIR为Paddle-Lite预测库目录,若自行编译第三方依赖软件请根据Step1中编译软件的实际情况修改主要参数,其主要内容说明如下:
```
# Paddle-Lite预编译库的路径
LITE_DIR=/path/to/Paddle-Lite/inference/lib
# gflags预编译库的路径
GFLAGS_DIR=$(pwd)/deps/gflags
# glog预编译库的路径
GLOG_DIR=$(pwd)/deps/glog
# opencv预编译库的路径
OPENCV_DIR=$(pwd)/deps/opencv/
```
执行`build`脚本:
```shell
sh ./scripts/build.sh
```
### Step3: 预测
编译成功后,分类任务的预测可执行程序为`classifier`,分割任务的预测可执行程序为`segmenter`,检测任务的预测可执行程序为`detector`,其主要命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
| --model_dir | 模型转换生成的.xml文件路径,请保证模型转换生成的三个文件在同一路径下|
| --image | 要预测的图片文件路径 |
| --image_list | 按行存储图片路径的.txt文件 |
| --thread_num | 预测的线程数,默认值为1 |
| --cfg_file | PaddleX model 的.yml配置文件 |
| --save_dir | 可视化结果图片保存地址,仅适用于检测和分割任务,默认值为" "既不保存可视化结果 |
### 样例
`样例一`
单张图片分类任务
测试图片 `/path/to/test_img.jpeg`
```shell
./build/classifier --model_dir=/path/to/nb_model
--image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml --thread_num=4
```
`样例二`:
多张图片分割任务
预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下:
```
/path/to/images/test_img1.jpeg
/path/to/images/test_img2.jpeg
...
/path/to/images/test_imgn.jpeg
```
```shell
./build/segmenter --model_dir=/path/to/models/nb_model --image_list=/root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --save_dir ./output --thread_num=4
```
## 性能测试
### 测试环境:
硬件:Raspberry Pi 3 Model B
系统:raspbian OS
软件:paddle-lite 2.6.1
### 测试结果
单位ms,num表示paddle-lite下使用的线程数
|模型|lite(num=4)|输入图片大小|
| ----| ---- | ----|
|mobilenet-v2|136.19|224*224|
|resnet-50|1131.42|224*224|
|deeplabv3|2162.03|512*512|
|hrnet|6118.23|512*512|
|yolov3-darknet53|4741.15|320*320|
|yolov3-mobilenet|1424.01|320*320|
|densenet121|1144.92|224*224|
|densenet161|2751.57|224*224|
|densenet201|1847.06|224*224|
|HRNet_W18|1753.06|224*224|
|MobileNetV1|177.63|224*224|
|MobileNetV3_large_ssld|133.99|224*224|
|MobileNetV3_small_ssld|53.99|224*224|
|ResNet101|2290.56|224*224|
|ResNet101_vd|2337.51|224*224|
|ResNet101_vd_ssld|3124.49|224*224|
|ShuffleNetV2|115.97|224*224|
|Xception41|1418.29|224*224|
|Xception65|2094.7|224*224|
从测试结果看建议用户在树莓派上使用MobileNetV1-V3,ShuffleNetV2这类型的小型网络
## NCS2部署
树莓派支持通过OpenVINO在NCS2上跑PaddleX模型预测,目前仅支持PaddleX的分类网络,基于NCS2的方式包含Paddle模型转OpenVINO IR以及部署IR在NCS2上进行预测两个步骤。
- 模型转换请参考:[PaddleX模型转换为OpenVINO IR]('./openvino/export_openvino_model.md'),raspbian OS上的OpenVINO不支持模型转换,需要先在host侧转换FP16的IR。
- 预测部署请参考[OpenVINO部署](./openvino/linux.md)中VPU在raspbian OS部署的部分
# Paddle-Lite模型转换
将PaddleX模型转换为Paddle-Lite的nb模型,模型转换主要包括PaddleX转inference model和inference model转Paddle-Lite nb模型
### Step1:导出inference模型
PaddleX模型转Paddle-Lite模型之前需要先把PaddleX模型导出为inference格式模型,导出的模型将包括__model__、__params__和model.yml三个文件名。具体方法请参考[Inference模型导出](../export_model.md)
### Step2:导出Paddle-Lite模型
Paddle-Lite模型需要通过Paddle-Lite的opt工具转出模型,下载并解压: [模型优化工具opt(2.6.1-linux)](https://bj.bcebos.com/paddlex/deploy/Rasoberry/opt.zip),在Linux系统下运行:
``` bash
./opt --model_file=<model_path> \
--param_file=<param_path> \
--valid_targets=arm \
--optimize_out_type=naive_buffer \
--optimize_out=model_output_name
```
| 参数 | 说明 |
| ---- | ---- |
| --model_file | 导出inference模型中包含的网络结构文件:`__model__`所在的路径|
| --param_file | 导出inference模型中包含的参数文件:`__params__`所在的路径|
| --valid_targets |指定模型可执行的backend,这里请指定为`arm`|
| --optimize_out_type | 输出模型类型,目前支持两种类型:protobuf和naive_buffer,其中naive_buffer是一种更轻量级的序列化/反序列化,这里请指定为`naive_buffer`|
若安装了python版本的Paddle-Lite也可以通过如下方式转换
```
./paddle_lite_opt --model_file=<model_path> \
--param_file=<param_path> \
--valid_targets=arm \
--optimize_out_type=naive_buffer \
--optimize_out=model_output_name
```
更多详细的使用方法和参数含义请参考: [使用opt转化模型](https://paddle-lite.readthedocs.io/zh/latest/user_guides/opt/opt_bin.html),更多opt预编译版本请参考[Paddle-Lite Release Note](https://github.com/PaddlePaddle/Paddle-Lite/releases)
**注意**:opt版本需要跟预测库版本保持一致,如使2.6.0版本预测库,请从上面Release Note中下载2.6.0版本的opt转换模型
\ No newline at end of file
树莓派部署
=======================================
.. toctree::
:maxdepth: 2
:caption: 文档目录:
Raspberry.md
python.md
export_nb_model.md
\ No newline at end of file
# Python预测部署
文档说明了在树莓派上使用Python版本的Paddle-Lite进行PaddleX模型好的预测部署,根据下面的命令安装Python版本的Paddle-Lite预测库,若安装不成功用户也可以下载whl文件进行安装[Paddle-Lite_2.6.0_python](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.6.0/armlinux_python_installer.zip),更多版本请参考[Paddle-Lite Release Note](https://github.com/PaddlePaddle/Paddle-Lite/releases)
```
python -m pip install paddlelite
```
部署前需要先将PaddleX模型转换为Paddle-Lite的nb模型,具体请参考[Paddle-Lite模型转换](./export_nb_model.md)
**注意**:若用户使用2.6.0的Python预测库,请下载2.6.0版本的opt转换工具转换模型
## 前置条件
* Python 3.6+
* Paddle-Lite_python 2.6.0+
请确保系统已经安装好上述基本软件,**下面所有示例以工作目录 `/root/projects/`演示**
## 预测部署
运行/root/projects/PaddleX/deploy/raspberry/python目录下demo.py文件可以进行预测,其命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
| --model_dir | 模型转换生成的.xml文件路径,请保证模型转换生成的三个文件在同一路径下|
| --img | 要预测的图片文件路径 |
| --image_list | 按行存储图片路径的.txt文件 |
| --cfg_file | PaddleX model 的.yml配置文件 |
| --thread_num | 预测的线程数, 默认值为1 |
| --input_shape | 模型输入中图片输入的大小[N,C,H.W] |
### 样例
`样例一`
测试图片 `/path/to/test_img.jpeg`
```
cd /root/projects/python
python demo.py --model_dir /path/to/openvino_model --img /path/to/test_img.jpeg --cfg_file /path/to/PadlleX_model.yml --thread_num 4 --input_shape [1,3,224,224]
```
样例二`:
预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下:
```
/path/to/images/test_img1.jpeg
/path/to/images/test_img2.jpeg
...
/path/to/images/test_imgn.jpeg
```
```
cd /root/projects/python
python demo.py --model_dir /path/to/models/openvino_model --image_list /root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --thread_num 4 --input_shape [1,3,224,224]
```
......@@ -125,6 +125,8 @@ yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://
| image_list | 按行存储图片路径的.txt文件 |
| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
| use_trt | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
| use_mkl | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
| mkl_thread_num | MKL推理的线程数,默认为cpu处理器个数 |
| gpu_id | GPU 设备ID, 默认值为0 |
| save_dir | 保存可视化结果的路径, 默认值为"output",**classfier无该参数** |
| key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 |
......@@ -141,6 +143,8 @@ yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://
| video_path | 视频文件的路径 |
| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
| use_trt | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
| use_mkl | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
| mkl_thread_num | MKL推理的线程数,默认为cpu处理器个数 |
| gpu_id | GPU 设备ID, 默认值为0 |
| show_result | 对视频文件做预测时,是否在屏幕上实时显示预测可视化结果(因加入了延迟处理,故显示结果不能反映真实的帧率),支持值为0或1(默认值为0) |
| save_result | 是否将每帧的预测可视结果保存为视频文件,支持值为0或1(默认值为1) |
......
......@@ -109,6 +109,8 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
| image | 要预测的图片文件路径 |
| image_list | 按行存储图片路径的.txt文件 |
| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
| use_mkl | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
| mkl_thread_num | MKL推理的线程数,默认为cpu处理器个数 |
| gpu_id | GPU 设备ID, 默认值为0 |
| save_dir | 保存可视化结果的路径, 默认值为"output",classifier无该参数 |
| key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 |
......@@ -124,6 +126,8 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
| camera_id | 摄像头设备ID,默认值为0 |
| video_path | 视频文件的路径 |
| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
| use_mkl | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
| mkl_thread_num | MKL推理的线程数,默认为cpu处理器个数 |
| gpu_id | GPU 设备ID, 默认值为0 |
| show_result | 对视频文件做预测时,是否在屏幕上实时显示预测可视化结果(因加入了延迟处理,故显示结果不能反映真实的帧率),支持值为0或1(默认值为0) |
| save_result | 是否将每帧的预测可视结果保存为视频文件,支持值为0或1(默认值为1) |
......
......@@ -70,7 +70,6 @@ cd PaddleX/examples/meter_reader/
| save_dir | 保存可视化结果的路径, 默认值为"output"|
| score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5|
| seg_batch_size | 分割的批量大小,默认为2 |
| seg_thread_num | 分割预测的线程数,默认为cpu处理器个数 |
| use_camera | 是否使用摄像头采集图片,默认为False |
| camera_id | 摄像头设备ID,默认值为0 |
| use_erode | 是否使用图像腐蚀对分割预测图进行细分,默认为False |
......
......@@ -79,7 +79,6 @@ cd PaddleX/examples/meter_reader/
| save_dir | 保存可视化结果的路径, 默认值为"output"|
| score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5|
| seg_batch_size | 分割的批量大小,默认为2 |
| seg_thread_num | 分割预测的线程数,默认为cpu处理器个数 |
| use_camera | 是否使用摄像头采集图片,默认为False |
| camera_id | 摄像头设备ID,默认值为0 |
| use_erode | 是否使用图像腐蚀对分割预测图进行细分,默认为False |
......
......@@ -105,12 +105,6 @@ def parse_args():
help="Segmentation batch size",
type=int,
default=2)
parser.add_argument(
'--seg_thread_num',
dest='seg_thread_num',
help="Thread number of segmentation preprocess",
type=int,
default=2)
return parser.parse_args()
......@@ -143,8 +137,7 @@ class MeterReader:
use_erode=True,
erode_kernel=4,
score_threshold=0.5,
seg_batch_size=2,
seg_thread_num=2):
seg_batch_size=2):
if isinstance(im_file, str):
im = cv2.imread(im_file).astype('float32')
else:
......@@ -190,8 +183,7 @@ class MeterReader:
meter_images.append(resized_meters[j - i])
result = self.segmenter.batch_predict(
transforms=self.seg_transforms,
img_file_list=meter_images,
thread_num=seg_thread_num)
img_file_list=meter_images)
if use_erode:
kernel = np.ones((erode_kernel, erode_kernel), np.uint8)
for i in range(len(result)):
......@@ -334,7 +326,7 @@ def infer(args):
for im_file in image_lists:
meter_reader.predict(im_file, args.save_dir, args.use_erode,
args.erode_kernel, args.score_threshold,
args.seg_batch_size, args.seg_thread_num)
args.seg_batch_size)
elif args.use_camera:
cap_video = cv2.VideoCapture(args.camera_id)
if not cap_video.isOpened():
......@@ -347,7 +339,7 @@ def infer(args):
if ret:
meter_reader.predict(frame, args.save_dir, args.use_erode,
args.erode_kernel, args.score_threshold,
args.seg_batch_size, args.seg_thread_num)
args.seg_batch_size)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
......
......@@ -105,12 +105,6 @@ def parse_args():
help="Segmentation batch size",
type=int,
default=2)
parser.add_argument(
'--seg_thread_num',
dest='seg_thread_num',
help="Thread number of segmentation preprocess",
type=int,
default=2)
return parser.parse_args()
......@@ -143,8 +137,7 @@ class MeterReader:
use_erode=True,
erode_kernel=4,
score_threshold=0.5,
seg_batch_size=2,
seg_thread_num=2):
seg_batch_size=2):
if isinstance(im_file, str):
im = cv2.imread(im_file).astype('float32')
else:
......@@ -190,8 +183,7 @@ class MeterReader:
meter_images.append(resized_meters[j - i])
result = self.segmenter.batch_predict(
transforms=self.seg_transforms,
img_file_list=meter_images,
thread_num=seg_thread_num)
img_file_list=meter_images)
if use_erode:
kernel = np.ones((erode_kernel, erode_kernel), np.uint8)
for i in range(len(result)):
......@@ -334,7 +326,7 @@ def infer(args):
for im_file in image_lists:
meter_reader.predict(im_file, args.save_dir, args.use_erode,
args.erode_kernel, args.score_threshold,
args.seg_batch_size, args.seg_thread_num)
args.seg_batch_size)
elif args.use_camera:
cap_video = cv2.VideoCapture(args.camera_id)
if not cap_video.isOpened():
......@@ -347,7 +339,7 @@ def infer(args):
if ret:
meter_reader.predict(frame, args.save_dir, args.use_erode,
args.erode_kernel, args.score_threshold,
args.seg_batch_size, args.seg_thread_num)
args.seg_batch_size)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
......
......@@ -57,4 +57,4 @@ log_level = 2
from . import interpret
__version__ = '1.1.1'
__version__ = '1.1.4'
......@@ -51,6 +51,12 @@ def arg_parser():
action="store_true",
default=False,
help="export onnx model for deployment")
parser.add_argument(
"--onnx_opset",
"-oo",
type=int,
default=10,
help="when use paddle2onnx, set onnx opset version to export")
parser.add_argument(
"--data_conversion",
"-dc",
......@@ -162,7 +168,7 @@ def main():
logging.error(
"paddlex --export_inference --model_dir model_path --save_dir infer_model"
)
pdx.convertor.export_onnx_model(model, args.save_dir)
pdx.convertor.export_onnx_model(model, args.save_dir, args.onnx_opset)
if args.data_conversion:
assert args.source is not None, "--source should be defined while converting dataset"
......@@ -183,7 +189,7 @@ def main():
if args.split_dataset:
assert args.dataset_dir is not None, "--dataset_dir should be defined while spliting dataset"
assert args.format is not None, "--form should be defined while spliting dataset"
assert args.format is not None, "--format should be defined while spliting dataset"
assert args.val_value is not None, "--val_value should be defined while spliting dataset"
dataset_dir = args.dataset_dir
......
......@@ -29,10 +29,12 @@ def export_onnx(model_dir, save_dir, fixed_input_shape):
export_onnx_model(model, save_dir)
def export_onnx_model(model, save_dir):
if model.model_type == "detector" or model.__class__.__name__ == "FastSCNN":
def export_onnx_model(model, save_dir, opset_version=10):
if model.__class__.__name__ == "FastSCNN" or (
model.model_type == "detector" and
model.__class__.__name__ != "YOLOv3"):
logging.error(
"Only image classifier models and semantic segmentation models(except FastSCNN) are supported to export to ONNX"
"Only image classifier models, detection models(YOLOv3) and semantic segmentation models(except FastSCNN) are supported to export to ONNX"
)
try:
import x2paddle
......@@ -41,6 +43,406 @@ def export_onnx_model(model, save_dir):
except:
logging.error(
"You need to install x2paddle first, pip install x2paddle>=0.7.4")
from x2paddle.op_mapper.paddle_op_mapper import PaddleOpMapper
if opset_version == 10 and model.__class__.__name__ == "YOLOv3":
logging.warning(
"Export for openVINO by default, the output of multiclass_nms exported to onnx will contains background. If you need onnx completely consistent with paddle, please use X2Paddle to export"
)
x2paddle.op_mapper.paddle2onnx.opset10.paddle_custom_layer.multiclass_nms.multiclass_nms = multiclass_nms_for_openvino
from x2paddle.op_mapper.paddle2onnx.paddle_op_mapper import PaddleOpMapper
mapper = PaddleOpMapper()
mapper.convert(model.test_prog, save_dir)
mapper.convert(
model.test_prog,
save_dir,
scope=model.scope,
opset_version=opset_version)
def multiclass_nms_for_openvino(op, block):
"""
Convert the paddle multiclass_nms to onnx op.
This op is get the select boxes from origin boxes.
This op is for OpenVINO, which donn't support dynamic shape).
"""
import math
import sys
import numpy as np
import paddle.fluid.core as core
import paddle.fluid as fluid
import onnx
import warnings
from onnx import helper, onnx_pb
inputs = dict()
outputs = dict()
attrs = dict()
for name in op.input_names:
inputs[name] = op.input(name)
for name in op.output_names:
outputs[name] = op.output(name)
for name in op.attr_names:
attrs[name] = op.attr(name)
result_name = outputs['Out'][0]
background = attrs['background_label']
normalized = attrs['normalized']
if normalized == False:
warnings.warn(
'The parameter normalized of multiclass_nms OP of Paddle is False, which has diff with ONNX. \
Please set normalized=True in multiclass_nms of Paddle'
)
#convert the paddle attribute to onnx tensor
name_score_threshold = [outputs['Out'][0] + "@score_threshold"]
name_iou_threshold = [outputs['Out'][0] + "@iou_threshold"]
name_keep_top_k = [outputs['Out'][0] + '@keep_top_k']
name_keep_top_k_2D = [outputs['Out'][0] + '@keep_top_k_1D']
node_score_threshold = onnx.helper.make_node(
'Constant',
inputs=[],
outputs=name_score_threshold,
value=onnx.helper.make_tensor(
name=name_score_threshold[0] + "@const",
data_type=onnx.TensorProto.FLOAT,
dims=(),
vals=[float(attrs['score_threshold'])]))
node_iou_threshold = onnx.helper.make_node(
'Constant',
inputs=[],
outputs=name_iou_threshold,
value=onnx.helper.make_tensor(
name=name_iou_threshold[0] + "@const",
data_type=onnx.TensorProto.FLOAT,
dims=(),
vals=[float(attrs['nms_threshold'])]))
node_keep_top_k = onnx.helper.make_node(
'Constant',
inputs=[],
outputs=name_keep_top_k,
value=onnx.helper.make_tensor(
name=name_keep_top_k[0] + "@const",
data_type=onnx.TensorProto.INT64,
dims=(),
vals=[np.int64(attrs['keep_top_k'])]))
node_keep_top_k_2D = onnx.helper.make_node(
'Constant',
inputs=[],
outputs=name_keep_top_k_2D,
value=onnx.helper.make_tensor(
name=name_keep_top_k_2D[0] + "@const",
data_type=onnx.TensorProto.INT64,
dims=[1, 1],
vals=[np.int64(attrs['keep_top_k'])]))
# the paddle data format is x1,y1,x2,y2
kwargs = {'center_point_box': 0}
name_select_nms = [outputs['Out'][0] + "@select_index"]
node_select_nms= onnx.helper.make_node(
'NonMaxSuppression',
inputs=inputs['BBoxes'] + inputs['Scores'] + name_keep_top_k +\
name_iou_threshold + name_score_threshold,
outputs=name_select_nms)
# step 1 nodes select the nms class
node_list = [
node_score_threshold, node_iou_threshold, node_keep_top_k,
node_keep_top_k_2D, node_select_nms
]
# create some const value to use
name_const_value = [result_name+"@const_0",
result_name+"@const_1",\
result_name+"@const_2",\
result_name+"@const_-1"]
value_const_value = [0, 1, 2, -1]
for name, value in zip(name_const_value, value_const_value):
node = onnx.helper.make_node(
'Constant',
inputs=[],
outputs=[name],
value=onnx.helper.make_tensor(
name=name + "@const",
data_type=onnx.TensorProto.INT64,
dims=[1],
vals=[value]))
node_list.append(node)
# In this code block, we will deocde the raw score data, reshape N * C * M to 1 * N*C*M
# and the same time, decode the select indices to 1 * D, gather the select_indices
outputs_gather_1_ = [result_name + "@gather_1_"]
node_gather_1_ = onnx.helper.make_node(
'Gather',
inputs=name_select_nms + [result_name + "@const_1"],
outputs=outputs_gather_1_,
axis=1)
node_list.append(node_gather_1_)
outputs_gather_1 = [result_name + "@gather_1"]
node_gather_1 = onnx.helper.make_node(
'Unsqueeze',
inputs=outputs_gather_1_,
outputs=outputs_gather_1,
axes=[0])
node_list.append(node_gather_1)
outputs_gather_2_ = [result_name + "@gather_2_"]
node_gather_2_ = onnx.helper.make_node(
'Gather',
inputs=name_select_nms + [result_name + "@const_2"],
outputs=outputs_gather_2_,
axis=1)
node_list.append(node_gather_2_)
outputs_gather_2 = [result_name + "@gather_2"]
node_gather_2 = onnx.helper.make_node(
'Unsqueeze',
inputs=outputs_gather_2_,
outputs=outputs_gather_2,
axes=[0])
node_list.append(node_gather_2)
# reshape scores N * C * M to (N*C*M) * 1
outputs_reshape_scores_rank1 = [result_name + "@reshape_scores_rank1"]
node_reshape_scores_rank1 = onnx.helper.make_node(
"Reshape",
inputs=inputs['Scores'] + [result_name + "@const_-1"],
outputs=outputs_reshape_scores_rank1)
node_list.append(node_reshape_scores_rank1)
# get the shape of scores
outputs_shape_scores = [result_name + "@shape_scores"]
node_shape_scores = onnx.helper.make_node(
'Shape', inputs=inputs['Scores'], outputs=outputs_shape_scores)
node_list.append(node_shape_scores)
# gather the index: 2 shape of scores
outputs_gather_scores_dim1 = [result_name + "@gather_scores_dim1"]
node_gather_scores_dim1 = onnx.helper.make_node(
'Gather',
inputs=outputs_shape_scores + [result_name + "@const_2"],
outputs=outputs_gather_scores_dim1,
axis=0)
node_list.append(node_gather_scores_dim1)
# mul class * M
outputs_mul_classnum_boxnum = [result_name + "@mul_classnum_boxnum"]
node_mul_classnum_boxnum = onnx.helper.make_node(
'Mul',
inputs=outputs_gather_1 + outputs_gather_scores_dim1,
outputs=outputs_mul_classnum_boxnum)
node_list.append(node_mul_classnum_boxnum)
# add class * M * index
outputs_add_class_M_index = [result_name + "@add_class_M_index"]
node_add_class_M_index = onnx.helper.make_node(
'Add',
inputs=outputs_mul_classnum_boxnum + outputs_gather_2,
outputs=outputs_add_class_M_index)
node_list.append(node_add_class_M_index)
# Squeeze the indices to 1 dim
outputs_squeeze_select_index = [result_name + "@squeeze_select_index"]
node_squeeze_select_index = onnx.helper.make_node(
'Squeeze',
inputs=outputs_add_class_M_index,
outputs=outputs_squeeze_select_index,
axes=[0, 2])
node_list.append(node_squeeze_select_index)
# gather the data from flatten scores
outputs_gather_select_scores = [result_name + "@gather_select_scores"]
node_gather_select_scores = onnx.helper.make_node('Gather',
inputs=outputs_reshape_scores_rank1 + \
outputs_squeeze_select_index,
outputs=outputs_gather_select_scores,
axis=0)
node_list.append(node_gather_select_scores)
# get nums to input TopK
outputs_shape_select_num = [result_name + "@shape_select_num"]
node_shape_select_num = onnx.helper.make_node(
'Shape',
inputs=outputs_gather_select_scores,
outputs=outputs_shape_select_num)
node_list.append(node_shape_select_num)
outputs_gather_select_num = [result_name + "@gather_select_num"]
node_gather_select_num = onnx.helper.make_node(
'Gather',
inputs=outputs_shape_select_num + [result_name + "@const_0"],
outputs=outputs_gather_select_num,
axis=0)
node_list.append(node_gather_select_num)
outputs_unsqueeze_select_num = [result_name + "@unsqueeze_select_num"]
node_unsqueeze_select_num = onnx.helper.make_node(
'Unsqueeze',
inputs=outputs_gather_select_num,
outputs=outputs_unsqueeze_select_num,
axes=[0])
node_list.append(node_unsqueeze_select_num)
outputs_concat_topK_select_num = [result_name + "@conat_topK_select_num"]
node_conat_topK_select_num = onnx.helper.make_node(
'Concat',
inputs=outputs_unsqueeze_select_num + name_keep_top_k_2D,
outputs=outputs_concat_topK_select_num,
axis=0)
node_list.append(node_conat_topK_select_num)
outputs_cast_concat_topK_select_num = [
result_name + "@concat_topK_select_num"
]
node_outputs_cast_concat_topK_select_num = onnx.helper.make_node(
'Cast',
inputs=outputs_concat_topK_select_num,
outputs=outputs_cast_concat_topK_select_num,
to=6)
node_list.append(node_outputs_cast_concat_topK_select_num)
# get min(topK, num_select)
outputs_compare_topk_num_select = [
result_name + "@compare_topk_num_select"
]
node_compare_topk_num_select = onnx.helper.make_node(
'ReduceMin',
inputs=outputs_cast_concat_topK_select_num,
outputs=outputs_compare_topk_num_select,
keepdims=0)
node_list.append(node_compare_topk_num_select)
# unsqueeze the indices to 1D tensor
outputs_unsqueeze_topk_select_indices = [
result_name + "@unsqueeze_topk_select_indices"
]
node_unsqueeze_topk_select_indices = onnx.helper.make_node(
'Unsqueeze',
inputs=outputs_compare_topk_num_select,
outputs=outputs_unsqueeze_topk_select_indices,
axes=[0])
node_list.append(node_unsqueeze_topk_select_indices)
# cast the indices to INT64
outputs_cast_topk_indices = [result_name + "@cast_topk_indices"]
node_cast_topk_indices = onnx.helper.make_node(
'Cast',
inputs=outputs_unsqueeze_topk_select_indices,
outputs=outputs_cast_topk_indices,
to=7)
node_list.append(node_cast_topk_indices)
# select topk scores indices
outputs_topk_select_topk_indices = [result_name + "@topk_select_topk_values",\
result_name + "@topk_select_topk_indices"]
node_topk_select_topk_indices = onnx.helper.make_node(
'TopK',
inputs=outputs_gather_select_scores + outputs_cast_topk_indices,
outputs=outputs_topk_select_topk_indices)
node_list.append(node_topk_select_topk_indices)
# gather topk label, scores, boxes
outputs_gather_topk_scores = [result_name + "@gather_topk_scores"]
node_gather_topk_scores = onnx.helper.make_node(
'Gather',
inputs=outputs_gather_select_scores +
[outputs_topk_select_topk_indices[1]],
outputs=outputs_gather_topk_scores,
axis=0)
node_list.append(node_gather_topk_scores)
outputs_gather_topk_class = [result_name + "@gather_topk_class"]
node_gather_topk_class = onnx.helper.make_node(
'Gather',
inputs=outputs_gather_1 + [outputs_topk_select_topk_indices[1]],
outputs=outputs_gather_topk_class,
axis=1)
node_list.append(node_gather_topk_class)
# gather the boxes need to gather the boxes id, then get boxes
outputs_gather_topk_boxes_id = [result_name + "@gather_topk_boxes_id"]
node_gather_topk_boxes_id = onnx.helper.make_node(
'Gather',
inputs=outputs_gather_2 + [outputs_topk_select_topk_indices[1]],
outputs=outputs_gather_topk_boxes_id,
axis=1)
node_list.append(node_gather_topk_boxes_id)
# squeeze the gather_topk_boxes_id to 1 dim
outputs_squeeze_topk_boxes_id = [result_name + "@squeeze_topk_boxes_id"]
node_squeeze_topk_boxes_id = onnx.helper.make_node(
'Squeeze',
inputs=outputs_gather_topk_boxes_id,
outputs=outputs_squeeze_topk_boxes_id,
axes=[0, 2])
node_list.append(node_squeeze_topk_boxes_id)
outputs_gather_select_boxes = [result_name + "@gather_select_boxes"]
node_gather_select_boxes = onnx.helper.make_node(
'Gather',
inputs=inputs['BBoxes'] + outputs_squeeze_topk_boxes_id,
outputs=outputs_gather_select_boxes,
axis=1)
node_list.append(node_gather_select_boxes)
# concat the final result
# before concat need to cast the class to float
outputs_cast_topk_class = [result_name + "@cast_topk_class"]
node_cast_topk_class = onnx.helper.make_node(
'Cast',
inputs=outputs_gather_topk_class,
outputs=outputs_cast_topk_class,
to=1)
node_list.append(node_cast_topk_class)
outputs_unsqueeze_topk_scores = [result_name + "@unsqueeze_topk_scores"]
node_unsqueeze_topk_scores = onnx.helper.make_node(
'Unsqueeze',
inputs=outputs_gather_topk_scores,
outputs=outputs_unsqueeze_topk_scores,
axes=[0, 2])
node_list.append(node_unsqueeze_topk_scores)
inputs_concat_final_results = outputs_cast_topk_class + outputs_unsqueeze_topk_scores +\
outputs_gather_select_boxes
outputs_sort_by_socre_results = [result_name + "@concat_topk_scores"]
node_sort_by_socre_results = onnx.helper.make_node(
'Concat',
inputs=inputs_concat_final_results,
outputs=outputs_sort_by_socre_results,
axis=2)
node_list.append(node_sort_by_socre_results)
# select topk classes indices
outputs_squeeze_cast_topk_class = [
result_name + "@squeeze_cast_topk_class"
]
node_squeeze_cast_topk_class = onnx.helper.make_node(
'Squeeze',
inputs=outputs_cast_topk_class,
outputs=outputs_squeeze_cast_topk_class,
axes=[0, 2])
node_list.append(node_squeeze_cast_topk_class)
outputs_neg_squeeze_cast_topk_class = [
result_name + "@neg_squeeze_cast_topk_class"
]
node_neg_squeeze_cast_topk_class = onnx.helper.make_node(
'Neg',
inputs=outputs_squeeze_cast_topk_class,
outputs=outputs_neg_squeeze_cast_topk_class)
node_list.append(node_neg_squeeze_cast_topk_class)
outputs_topk_select_classes_indices = [result_name + "@topk_select_topk_classes_scores",\
result_name + "@topk_select_topk_classes_indices"]
node_topk_select_topk_indices = onnx.helper.make_node(
'TopK',
inputs=outputs_neg_squeeze_cast_topk_class + outputs_cast_topk_indices,
outputs=outputs_topk_select_classes_indices)
node_list.append(node_topk_select_topk_indices)
outputs_concat_final_results = outputs['Out']
node_concat_final_results = onnx.helper.make_node(
'Gather',
inputs=outputs_sort_by_socre_results +
[outputs_topk_select_classes_indices[1]],
outputs=outputs_concat_final_results,
axis=1)
node_list.append(node_concat_final_results)
return node_list
......@@ -23,6 +23,7 @@ import yaml
import copy
import json
import functools
import multiprocessing as mp
import paddlex.utils.logging as logging
from paddlex.utils import seconds_to_hms
from paddlex.utils.utils import EarlyStop
......@@ -76,6 +77,16 @@ class BaseAPI:
self.completed_epochs = 0
self.scope = fluid.global_scope()
# 线程池,在模型在预测时用于对输入数据以图片为单位进行并行处理
# 主要用于batch_predict接口
thread_num = mp.cpu_count() if mp.cpu_count() < 8 else 8
self.thread_pool = mp.pool.ThreadPool(thread_num)
def reset_thread_pool(self, thread_num):
self.thread_pool.close()
self.thread_pool.join()
self.thread_pool = mp.pool.ThreadPool(thread_num)
def _get_single_card_bs(self, batch_size):
if batch_size % len(self.places) == 0:
return int(batch_size // len(self.places))
......@@ -356,16 +367,6 @@ class BaseAPI:
]
test_outputs = list(self.test_outputs.values())
with fluid.scope_guard(self.scope):
if self.__class__.__name__ == 'MaskRCNN':
from paddlex.utils.save import save_mask_inference_model
save_mask_inference_model(
dirname=save_dir,
executor=self.exe,
params_filename='__params__',
feeded_var_names=test_input_names,
target_vars=test_outputs,
main_program=self.test_prog)
else:
fluid.io.save_inference_model(
dirname=save_dir,
executor=self.exe,
......
......@@ -279,16 +279,18 @@ class BaseClassifier(BaseAPI):
return metrics
@staticmethod
def _preprocess(images, transforms, model_type, class_name, thread_num=1):
def _preprocess(images, transforms, model_type, class_name, thread_pool=None):
arrange_transforms(
model_type=model_type,
class_name=class_name,
transforms=transforms,
mode='test')
pool = ThreadPool(thread_num)
batch_data = pool.map(transforms, images)
pool.close()
pool.join()
if thread_pool is not None:
batch_data = thread_pool.map(transforms, images)
else:
batch_data = list()
for image in images:
batch_data.append(transforms(image))
padding_batch = generate_minibatch(batch_data)
im = np.array([data[0] for data in padding_batch])
......@@ -344,15 +346,13 @@ class BaseClassifier(BaseAPI):
def batch_predict(self,
img_file_list,
transforms=None,
topk=1,
thread_num=2):
topk=1):
"""预测。
Args:
img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径
也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。
transforms (paddlex.cls.transforms): 数据预处理操作。
topk (int): 预测时前k个最大值。
thread_num (int): 并发执行各图像预处理时的线程数。
Returns:
list: 每个元素都为列表,表示各图像的预测结果。在各图像的预测列表中,其中元素均为字典。字典的关键字为'category_id'、'category'、'score',
分别对应预测类别id、预测类别标签、预测得分。
......@@ -367,7 +367,7 @@ class BaseClassifier(BaseAPI):
transforms = self.test_transforms
im = BaseClassifier._preprocess(img_file_list, transforms,
self.model_type,
self.__class__.__name__, thread_num)
self.__class__.__name__, self.thread_pool)
with fluid.scope_guard(self.scope):
result = self.exe.run(self.test_prog,
......
......@@ -448,16 +448,18 @@ class DeepLabv3p(BaseAPI):
return metrics
@staticmethod
def _preprocess(images, transforms, model_type, class_name, thread_num=1):
def _preprocess(images, transforms, model_type, class_name, thread_pool=None):
arrange_transforms(
model_type=model_type,
class_name=class_name,
transforms=transforms,
mode='test')
pool = ThreadPool(thread_num)
batch_data = pool.map(transforms, images)
pool.close()
pool.join()
if thread_pool is not None:
batch_data = thread_pool.map(transforms, images)
else:
batch_data = list()
for image in images:
batch_data.append(transforms(image))
padding_batch = generate_minibatch(batch_data)
im = np.array(
[data[0] for data in padding_batch],
......@@ -522,13 +524,12 @@ class DeepLabv3p(BaseAPI):
preds = DeepLabv3p._postprocess(result, im_info)
return preds[0]
def batch_predict(self, img_file_list, transforms=None, thread_num=2):
def batch_predict(self, img_file_list, transforms=None):
"""预测。
Args:
img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径
也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。
transforms(paddlex.cv.transforms): 数据预处理操作。
thread_num (int): 并发执行各图像预处理时的线程数。
Returns:
list: 每个元素都为列表,表示各图像的预测结果。各图像的预测结果用字典表示,包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,
......@@ -543,7 +544,7 @@ class DeepLabv3p(BaseAPI):
transforms = self.test_transforms
im, im_info = DeepLabv3p._preprocess(
img_file_list, transforms, self.model_type,
self.__class__.__name__, thread_num)
self.__class__.__name__, self.thread_pool)
with fluid.scope_guard(self.scope):
result = self.exe.run(self.test_prog,
......
......@@ -376,16 +376,18 @@ class FasterRCNN(BaseAPI):
return metrics
@staticmethod
def _preprocess(images, transforms, model_type, class_name, thread_num=1):
def _preprocess(images, transforms, model_type, class_name, thread_pool=None):
arrange_transforms(
model_type=model_type,
class_name=class_name,
transforms=transforms,
mode='test')
pool = ThreadPool(thread_num)
batch_data = pool.map(transforms, images)
pool.close()
pool.join()
if thread_pool is not None:
batch_data = thread_pool.map(transforms, images)
else:
batch_data = list()
for image in images:
batch_data.append(transforms(image))
padding_batch = generate_minibatch(batch_data)
im = np.array([data[0] for data in padding_batch])
im_resize_info = np.array([data[1] for data in padding_batch])
......@@ -453,14 +455,13 @@ class FasterRCNN(BaseAPI):
return preds[0]
def batch_predict(self, img_file_list, transforms=None, thread_num=2):
def batch_predict(self, img_file_list, transforms=None):
"""预测。
Args:
img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径
也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。
transforms (paddlex.det.transforms): 数据预处理操作。
thread_num (int): 并发执行各图像预处理时的线程数。
Returns:
list: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个预测结果由预测框类别标签、
......@@ -477,7 +478,7 @@ class FasterRCNN(BaseAPI):
transforms = self.test_transforms
im, im_resize_info, im_shape = FasterRCNN._preprocess(
img_file_list, transforms, self.model_type,
self.__class__.__name__, thread_num)
self.__class__.__name__, self.thread_pool)
with fluid.scope_guard(self.scope):
result = self.exe.run(self.test_prog,
......
......@@ -408,14 +408,13 @@ class MaskRCNN(FasterRCNN):
return preds[0]
def batch_predict(self, img_file_list, transforms=None, thread_num=2):
def batch_predict(self, img_file_list, transforms=None):
"""预测。
Args:
img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径
也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。
transforms (paddlex.det.transforms): 数据预处理操作。
thread_num (int): 并发执行各图像预处理时的线程数。
Returns:
dict: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个预测结果由预测框类别标签、预测框类别名称、
预测框坐标(坐标格式为[xmin, ymin, w, h])、
......@@ -432,7 +431,7 @@ class MaskRCNN(FasterRCNN):
transforms = self.test_transforms
im, im_resize_info, im_shape = FasterRCNN._preprocess(
img_file_list, transforms, self.model_type,
self.__class__.__name__, thread_num)
self.__class__.__name__, self.thread_pool)
with fluid.scope_guard(self.scope):
result = self.exe.run(self.test_prog,
......
......@@ -447,16 +447,18 @@ class PPYOLO(BaseAPI):
return evaluate_metrics
@staticmethod
def _preprocess(images, transforms, model_type, class_name, thread_num=1):
def _preprocess(images, transforms, model_type, class_name, thread_pool=None):
arrange_transforms(
model_type=model_type,
class_name=class_name,
transforms=transforms,
mode='test')
pool = ThreadPool(thread_num)
batch_data = pool.map(transforms, images)
pool.close()
pool.join()
if thread_pool is not None:
batch_data = thread_pool.map(transforms, images)
else:
batch_data = list()
for image in images:
batch_data.append(transforms(image))
padding_batch = generate_minibatch(batch_data)
im = np.array(
[data[0] for data in padding_batch],
......@@ -520,14 +522,13 @@ class PPYOLO(BaseAPI):
len(images), self.num_classes, self.labels)
return preds[0]
def batch_predict(self, img_file_list, transforms=None, thread_num=2):
def batch_predict(self, img_file_list, transforms=None):
"""预测。
Args:
img_file_list (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径,也可以是解码后的排列格式为(H,W,C)
且类型为float32且为BGR格式的数组。
transforms (paddlex.det.transforms): 数据预处理操作。
thread_num (int): 并发执行各图像预处理时的线程数。
Returns:
list: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个预测结果由预测框类别标签、
预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
......@@ -543,7 +544,7 @@ class PPYOLO(BaseAPI):
transforms = self.test_transforms
im, im_size = PPYOLO._preprocess(img_file_list, transforms,
self.model_type,
self.__class__.__name__, thread_num)
self.__class__.__name__, self.thread_pool)
with fluid.scope_guard(self.scope):
result = self.exe.run(self.test_prog,
......
......@@ -91,7 +91,23 @@ sensitivities_data = {
'DeepLabv3p_Xception65_aspp_decoder':
'https://bj.bcebos.com/paddlex/slim_prune/deeplab_xception65_with_aspp_decoder.sensitivities',
'DeepLabv3p_Xception41_aspp_decoder':
'https://bj.bcebos.com/paddlex/slim_prune/deeplab_xception41_with_aspp_decoder.sensitivities'
'https://bj.bcebos.com/paddlex/slim_prune/deeplab_xception41_with_aspp_decoder.sensitivities',
'HRNet_W18_Seg':
'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w18.sensitivities',
'HRNet_W30_Seg':
'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w30.sensitivities',
'HRNet_W32_Seg':
'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w32.sensitivities',
'HRNet_W40_Seg':
'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w40.sensitivities',
'HRNet_W44_Seg':
'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w44.sensitivities',
'HRNet_W48_Seg':
'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w48.sensitivities',
'HRNet_W64_Seg':
'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w64.sensitivities',
'FastSCNN':
'https://bj.bcebos.com/paddlex/slim_prune/fast_scnn.sensitivities'
}
......@@ -105,6 +121,8 @@ def get_sensitivities(flag, model, save_dir):
elif hasattr(model, 'encoder_with_aspp') or hasattr(model,
'enable_decoder'):
model_type = model_type + '_' + 'aspp' + '_' + 'decoder'
if model_type.startswith('HRNet') and model.model_type == 'segmenter':
model_type = '{}_W{}_Seg'.format(model_type, model.width)
if osp.isfile(flag):
return flag
elif flag == 'DEFAULT':
......@@ -244,6 +262,28 @@ def get_prune_params(model):
if i in prune_names:
prune_names.remove(i)
elif model_type.startswith('HRNet') and model.model_type == 'segmenter':
for param in program.global_block().all_parameters():
if 'weight' not in param.name:
continue
prune_names.append(param.name)
params_not_prune = ['conv-1_weights']
for i in params_not_prune:
if i in prune_names:
prune_names.remove(i)
elif model_type.startswith('FastSCNN'):
for param in program.global_block().all_parameters():
if 'weight' not in param.name:
continue
if 'dwise' in param.name or 'depthwise' in param.name or 'logit' in param.name:
continue
prune_names.append(param.name)
params_not_prune = ['classifier/weights']
for i in params_not_prune:
if i in prune_names:
prune_names.remove(i)
elif model_type.startswith('DeepLabv3p'):
for param in program.global_block().all_parameters():
if 'weight' not in param.name:
......
......@@ -311,7 +311,7 @@ class YOLOv3:
def _upsample(self, input, scale=2, name=None):
out = fluid.layers.resize_nearest(
input=input, scale=float(scale), name=name)
input=input, scale=float(scale), name=name, align_corners=False)
return out
def _detection_block(self,
......
......@@ -235,10 +235,13 @@ class HRNet(object):
name=name + '_layer_' + str(i + 1) + '_' + str(j + 1))
if self.feature_maps == "stage4":
y = fluid.layers.resize_bilinear(
input=y, out_shape=[height, width])
input=y,
out_shape=[height, width],
align_corners=False,
align_mode=1)
else:
y = fluid.layers.resize_nearest(
input=y, scale=2**(j - i))
input=y, scale=2**(j - i), align_corners=False)
residual = fluid.layers.elementwise_add(
x=residual, y=y, act=None)
elif j < i:
......
......@@ -16,6 +16,7 @@ import os.path as osp
import cv2
import numpy as np
import yaml
import multiprocessing as mp
import paddlex
import paddle.fluid as fluid
from paddlex.cv.transforms import build_transforms
......@@ -79,12 +80,21 @@ class Predictor:
self.predictor = self.create_predictor(use_gpu, gpu_id, use_mkl,
mkl_thread_num, use_trt,
use_glog, memory_optimize)
# 线程池,在模型在预测时用于对输入数据以图片为单位进行并行处理
# 主要用于batch_predict接口
thread_num = mp.cpu_count() if mp.cpu_count() < 8 else 8
self.thread_pool = mp.pool.ThreadPool(thread_num)
def reset_thread_pool(self, thread_num):
self.thread_pool.close()
self.thread_pool.join()
self.thread_pool = mp.pool.ThreadPool(thread_num)
def create_predictor(self,
use_gpu=True,
gpu_id=0,
use_mkl=False,
mkl_thread_num=4,
mkl_thread_num=psutil.cpu_count(),
use_trt=False,
use_glog=False,
memory_optimize=True):
......@@ -98,6 +108,7 @@ class Predictor:
else:
config.disable_gpu()
if use_mkl:
if self.model_name not in ["HRNet", "DeepLabv3p"]:
config.enable_mkldnn()
config.set_cpu_math_library_num_threads(mkl_thread_num)
if use_glog:
......@@ -114,7 +125,7 @@ class Predictor:
predictor = fluid.core.create_paddle_predictor(config)
return predictor
def preprocess(self, image, thread_num=1):
def preprocess(self, image, thread_pool=None):
""" 对图像做预处理
Args:
......@@ -128,7 +139,7 @@ class Predictor:
self.transforms,
self.model_type,
self.model_name,
thread_num=thread_num)
thread_pool=thread_pool)
res['image'] = im
elif self.model_type == "detector":
if self.model_name in ["PPYOLO", "YOLOv3"]:
......@@ -137,7 +148,7 @@ class Predictor:
self.transforms,
self.model_type,
self.model_name,
thread_num=thread_num)
thread_pool=thread_pool)
res['image'] = im
res['im_size'] = im_size
if self.model_name.count('RCNN') > 0:
......@@ -146,7 +157,7 @@ class Predictor:
self.transforms,
self.model_type,
self.model_name,
thread_num=thread_num)
thread_pool=thread_pool)
res['image'] = im
res['im_info'] = im_resize_info
res['im_shape'] = im_shape
......@@ -156,7 +167,7 @@ class Predictor:
self.transforms,
self.model_type,
self.model_name,
thread_num=thread_num)
thread_pool=thread_pool)
res['image'] = im
res['im_info'] = im_info
return res
......@@ -253,17 +264,16 @@ class Predictor:
return results[0]
def batch_predict(self, image_list, topk=1, thread_num=2):
def batch_predict(self, image_list, topk=1):
""" 图片预测
Args:
image_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径
也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。
thread_num (int): 并发执行各图像预处理时的线程数。
topk(int): 分类预测时使用,表示预测前topk的结果
"""
preprocessed_input = self.preprocess(image_list)
preprocessed_input = self.preprocess(image_list, self.thread_pool)
model_pred = self.raw_predict(preprocessed_input)
im_shape = None if 'im_shape' not in preprocessed_input else preprocessed_input[
'im_shape']
......
......@@ -19,7 +19,7 @@ long_description = "PaddlePaddle Entire Process Development Toolkit"
setuptools.setup(
name="paddlex",
version='1.1.1',
version='1.1.4',
author="paddlex",
author_email="paddlex@baidu.com",
description=long_description,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册