diff --git a/deploy/README.md b/deploy/README.md index 7fe3219882c3c8d863824829baf6742b74759d2f..15fbe898d3a4ebbf488b5c0fc1f665bf847f3aa9 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -14,3 +14,5 @@ - [模型量化](../docs/deploy/paddlelite/slim/quant.md) - [模型裁剪](../docs/deploy/paddlelite/slim/prune.md) - [Android平台](../docs/deploy/paddlelite/android.md) +- [OpenVINO部署](../docs/deploy/openvino/introduction.md) +- [树莓派部署](../docs/deploy/raspberry/Raspberry.md) \ No newline at end of file diff --git a/deploy/cpp/demo/classifier.cpp b/deploy/cpp/demo/classifier.cpp index cf3bb5ccf64c43ec42d59a9b73fdced6b50b8dc5..548eaff411a737ea0ffcfca63d36a7f18cd9d994 100644 --- a/deploy/cpp/demo/classifier.cpp +++ b/deploy/cpp/demo/classifier.cpp @@ -29,6 +29,10 @@ using namespace std::chrono; // NOLINT DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_trt, false, "Infering with TensorRT"); +DEFINE_bool(use_mkl, true, "Infering with MKL"); +DEFINE_int32(mkl_thread_num, + omp_get_num_procs(), + "Number of mkl threads"); DEFINE_int32(gpu_id, 0, "GPU card id"); DEFINE_string(key, "", "key of encryption"); DEFINE_string(image, "", "Path of test image file"); @@ -56,6 +60,8 @@ int main(int argc, char** argv) { model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, + FLAGS_use_mkl, + FLAGS_mkl_thread_num, FLAGS_gpu_id, FLAGS_key); diff --git a/deploy/cpp/demo/detector.cpp b/deploy/cpp/demo/detector.cpp index ef7fd782715bef5d9cc1dae43c87ceaa123e914f..f5fefc05d0bbc4bbd482c23f0db8c066b7d1013b 100644 --- a/deploy/cpp/demo/detector.cpp +++ b/deploy/cpp/demo/detector.cpp @@ -31,6 +31,10 @@ using namespace std::chrono; // NOLINT DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_trt, false, "Infering with TensorRT"); +DEFINE_bool(use_mkl, true, "Infering with MKL"); +DEFINE_int32(mkl_thread_num, + omp_get_num_procs(), + "Number of mkl threads"); DEFINE_int32(gpu_id, 0, "GPU card id"); DEFINE_string(key, "", "key of encryption"); DEFINE_string(image, "", "Path of test image file"); @@ -61,6 +65,8 @@ int main(int argc, char** argv) { model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, + FLAGS_use_mkl, + FLAGS_mkl_thread_num, FLAGS_gpu_id, FLAGS_key); int imgs = 1; diff --git a/deploy/cpp/demo/segmenter.cpp b/deploy/cpp/demo/segmenter.cpp index d13a328f5beecc90fe9257a4f32ee63a8fe609a5..0d888001490759f65790d51837e2e69a6f448c4b 100644 --- a/deploy/cpp/demo/segmenter.cpp +++ b/deploy/cpp/demo/segmenter.cpp @@ -30,6 +30,10 @@ using namespace std::chrono; // NOLINT DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_trt, false, "Infering with TensorRT"); +DEFINE_bool(use_mkl, true, "Infering with MKL"); +DEFINE_int32(mkl_thread_num, + omp_get_num_procs(), + "Number of mkl threads"); DEFINE_int32(gpu_id, 0, "GPU card id"); DEFINE_string(key, "", "key of encryption"); DEFINE_string(image, "", "Path of test image file"); @@ -58,6 +62,8 @@ int main(int argc, char** argv) { model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, + FLAGS_use_mkl, + FLAGS_mkl_thread_num, FLAGS_gpu_id, FLAGS_key); int imgs = 1; diff --git a/deploy/cpp/demo/video_classifier.cpp b/deploy/cpp/demo/video_classifier.cpp index 96be867d40800455184b7938dc829e8a0b8f8390..c0485791ccb42fc880ab384ae2cf5e1d9d48b1ae 100644 --- a/deploy/cpp/demo/video_classifier.cpp +++ b/deploy/cpp/demo/video_classifier.cpp @@ -35,8 +35,12 @@ using namespace std::chrono; // NOLINT DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_trt, false, "Infering with TensorRT"); +DEFINE_bool(use_mkl, true, "Infering with MKL"); DEFINE_int32(gpu_id, 0, "GPU card id"); DEFINE_string(key, "", "key of encryption"); +DEFINE_int32(mkl_thread_num, + omp_get_num_procs(), + "Number of mkl threads"); DEFINE_bool(use_camera, false, "Infering with Camera"); DEFINE_int32(camera_id, 0, "Camera id"); DEFINE_string(video_path, "", "Path of input video"); @@ -62,6 +66,8 @@ int main(int argc, char** argv) { model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, + FLAGS_use_mkl, + FLAGS_mkl_thread_num, FLAGS_gpu_id, FLAGS_key); diff --git a/deploy/cpp/demo/video_detector.cpp b/deploy/cpp/demo/video_detector.cpp index ee4d5bdb138d03020042e60d41ded0ca1efde46d..e617dbd1339b73676225a65a667a42a06abfa63e 100644 --- a/deploy/cpp/demo/video_detector.cpp +++ b/deploy/cpp/demo/video_detector.cpp @@ -35,6 +35,7 @@ using namespace std::chrono; // NOLINT DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_trt, false, "Infering with TensorRT"); +DEFINE_bool(use_mkl, true, "Infering with MKL"); DEFINE_int32(gpu_id, 0, "GPU card id"); DEFINE_bool(use_camera, false, "Infering with Camera"); DEFINE_int32(camera_id, 0, "Camera id"); @@ -42,6 +43,9 @@ DEFINE_string(video_path, "", "Path of input video"); DEFINE_bool(show_result, false, "show the result of each frame with a window"); DEFINE_bool(save_result, true, "save the result of each frame to a video"); DEFINE_string(key, "", "key of encryption"); +DEFINE_int32(mkl_thread_num, + omp_get_num_procs(), + "Number of mkl threads"); DEFINE_string(save_dir, "output", "Path to save visualized image"); DEFINE_double(threshold, 0.5, @@ -64,6 +68,8 @@ int main(int argc, char** argv) { model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, + FLAGS_use_mkl, + FLAGS_mkl_thread_num, FLAGS_gpu_id, FLAGS_key); // Open video diff --git a/deploy/cpp/demo/video_segmenter.cpp b/deploy/cpp/demo/video_segmenter.cpp index 6a835117cd1434b5f26e0fb660e6fe07ef56e607..35af64f4b00ea5983653bb135394da9389539604 100644 --- a/deploy/cpp/demo/video_segmenter.cpp +++ b/deploy/cpp/demo/video_segmenter.cpp @@ -35,8 +35,12 @@ using namespace std::chrono; // NOLINT DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_trt, false, "Infering with TensorRT"); +DEFINE_bool(use_mkl, true, "Infering with MKL"); DEFINE_int32(gpu_id, 0, "GPU card id"); DEFINE_string(key, "", "key of encryption"); +DEFINE_int32(mkl_thread_num, + omp_get_num_procs(), + "Number of mkl threads"); DEFINE_bool(use_camera, false, "Infering with Camera"); DEFINE_int32(camera_id, 0, "Camera id"); DEFINE_string(video_path, "", "Path of input video"); @@ -62,6 +66,8 @@ int main(int argc, char** argv) { model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, + FLAGS_use_mkl, + FLAGS_mkl_thread_num, FLAGS_gpu_id, FLAGS_key); // Open video diff --git a/deploy/cpp/include/paddlex/paddlex.h b/deploy/cpp/include/paddlex/paddlex.h index 00b1a05ac8127d403dd7325f3357ece75ec23a58..b85cb09de49c42e4182103f0239aec2222cb2349 100644 --- a/deploy/cpp/include/paddlex/paddlex.h +++ b/deploy/cpp/include/paddlex/paddlex.h @@ -70,6 +70,8 @@ class Model { * @param model_dir: the directory which contains model.yml * @param use_gpu: use gpu or not when infering * @param use_trt: use Tensor RT or not when infering + * @param use_mkl: use mkl or not when infering + * @param mkl_thread_num: number of threads for mkldnn when infering * @param gpu_id: the id of gpu when infering with using gpu * @param key: the key of encryption when using encrypted model * @param use_ir_optim: use ir optimization when infering @@ -77,15 +79,26 @@ class Model { void Init(const std::string& model_dir, bool use_gpu = false, bool use_trt = false, + bool use_mkl = true, + int mkl_thread_num = 4, int gpu_id = 0, std::string key = "", bool use_ir_optim = true) { - create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, use_ir_optim); + create_predictor( + model_dir, + use_gpu, + use_trt, + use_mkl, + mkl_thread_num, + gpu_id, + key, + use_ir_optim); } - void create_predictor(const std::string& model_dir, bool use_gpu = false, bool use_trt = false, + bool use_mkl = true, + int mkl_thread_num = 4, int gpu_id = 0, std::string key = "", bool use_ir_optim = true); diff --git a/deploy/cpp/include/paddlex/results.h b/deploy/cpp/include/paddlex/results.h index 72caa1f5d4f78275ca9c4de55aa89bc22edd02e5..e3526bf69b854d19a99cc001df226c5d51c7094d 100644 --- a/deploy/cpp/include/paddlex/results.h +++ b/deploy/cpp/include/paddlex/results.h @@ -37,7 +37,7 @@ struct Mask { }; /* - * @brief + * @brief * This class represents target box in detection or instance segmentation tasks. * */ struct Box { @@ -47,7 +47,7 @@ struct Box { // confidence score float score; std::vector coordinate; - Mask mask; + Mask mask; }; /* diff --git a/deploy/cpp/include/paddlex/transforms.h b/deploy/cpp/include/paddlex/transforms.h index 7e936dc17f4b6e58cdb8cdc36639173ccc24177c..b99abf991de0503c71127c9713df6a234f530fc3 100644 --- a/deploy/cpp/include/paddlex/transforms.h +++ b/deploy/cpp/include/paddlex/transforms.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -216,8 +217,7 @@ class Padding : public Transform { } if (item["im_padding_value"].IsDefined()) { im_value_ = item["im_padding_value"].as>(); - } - else { + } else { im_value_ = {0, 0, 0}; } } diff --git a/deploy/cpp/src/paddlex.cpp b/deploy/cpp/src/paddlex.cpp index 47dc5b9e9e9104e2d4983a8ac077e5a0810610cf..57d35b89f638173aa1fdc46600d059e16f183c14 100644 --- a/deploy/cpp/src/paddlex.cpp +++ b/deploy/cpp/src/paddlex.cpp @@ -11,16 +11,25 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + +#include #include #include #include #include #include "include/paddlex/paddlex.h" + +#include +#include +#include + namespace PaddleX { void Model::create_predictor(const std::string& model_dir, bool use_gpu, bool use_trt, + bool use_mkl, + int mkl_thread_num, int gpu_id, std::string key, bool use_ir_optim) { @@ -40,7 +49,7 @@ void Model::create_predictor(const std::string& model_dir, } #endif if (yaml_input == "") { - // 读取配置文件 + // read yaml file std::ifstream yaml_fin(yaml_file); yaml_fin.seekg(0, std::ios::end); size_t yaml_file_size = yaml_fin.tellg(); @@ -48,7 +57,7 @@ void Model::create_predictor(const std::string& model_dir, yaml_fin.seekg(0); yaml_fin.read(&yaml_input[0], yaml_file_size); } - // 读取配置文件内容 + // load yaml file if (!load_config(yaml_input)) { std::cerr << "Parse file 'model.yml' failed!" << std::endl; exit(-1); @@ -57,6 +66,10 @@ void Model::create_predictor(const std::string& model_dir, if (key == "") { config.SetModel(model_file, params_file); } + if (use_mkl && name != "HRNet" && name != "DeepLabv3p") { + config.EnableMKLDNN(); + config.SetCpuMathLibraryNumThreads(mkl_thread_num); + } if (use_gpu) { config.EnableUseGpu(100, gpu_id); } else { @@ -64,13 +77,13 @@ void Model::create_predictor(const std::string& model_dir, } config.SwitchUseFeedFetchOps(false); config.SwitchSpecifyInputNames(true); - // 开启图优化 + // enable graph Optim #if defined(__arm__) || defined(__aarch64__) config.SwitchIrOptim(false); #else config.SwitchIrOptim(use_ir_optim); #endif - // 开启内存优化 + // enable Memory Optim config.EnableMemoryOptim(); if (use_trt) { config.EnableTensorRtEngine( @@ -108,9 +121,9 @@ bool Model::load_config(const std::string& yaml_input) { return false; } } - // 构建数据处理流 + // build data preprocess stream transforms_.Init(config["Transforms"], to_rgb); - // 读入label list + // read label list labels.clear(); for (const auto& item : config["_Attributes"]["labels"]) { int index = labels.size(); @@ -152,19 +165,19 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) { "to function predict()!" << std::endl; return false; } - // 处理输入图像 + // im preprocess if (!preprocess(im, &inputs_)) { std::cerr << "Preprocess failed!" << std::endl; return false; } - // 使用加载的模型进行预测 + // predict auto in_tensor = predictor_->GetInputTensor("image"); int h = inputs_.new_im_size_[0]; int w = inputs_.new_im_size_[1]; in_tensor->Reshape({1, 3, h, w}); in_tensor->copy_from_cpu(inputs_.im_data_.data()); predictor_->ZeroCopyRun(); - // 取出模型的输出结果 + // get result auto output_names = predictor_->GetOutputNames(); auto output_tensor = predictor_->GetOutputTensor(output_names[0]); std::vector output_shape = output_tensor->shape(); @@ -174,7 +187,7 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) { } outputs_.resize(size); output_tensor->copy_to_cpu(outputs_.data()); - // 对模型输出结果进行后处理 + // postprocess auto ptr = std::max_element(std::begin(outputs_), std::end(outputs_)); result->category_id = std::distance(std::begin(outputs_), ptr); result->score = *ptr; @@ -198,12 +211,12 @@ bool Model::predict(const std::vector& im_batch, return false; } inputs_batch_.assign(im_batch.size(), ImageBlob()); - // 处理输入图像 + // preprocess if (!preprocess(im_batch, &inputs_batch_, thread_num)) { std::cerr << "Preprocess failed!" << std::endl; return false; } - // 使用加载的模型进行预测 + // predict int batch_size = im_batch.size(); auto in_tensor = predictor_->GetInputTensor("image"); int h = inputs_batch_[0].new_im_size_[0]; @@ -218,7 +231,7 @@ bool Model::predict(const std::vector& im_batch, in_tensor->copy_from_cpu(inputs_data.data()); // in_tensor->copy_from_cpu(inputs_.im_data_.data()); predictor_->ZeroCopyRun(); - // 取出模型的输出结果 + // get result auto output_names = predictor_->GetOutputNames(); auto output_tensor = predictor_->GetOutputTensor(output_names[0]); std::vector output_shape = output_tensor->shape(); @@ -228,7 +241,7 @@ bool Model::predict(const std::vector& im_batch, } outputs_.resize(size); output_tensor->copy_to_cpu(outputs_.data()); - // 对模型输出结果进行后处理 + // postprocess (*results).clear(); (*results).resize(batch_size); int single_batch_size = size / batch_size; @@ -258,7 +271,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) { return false; } - // 处理输入图像 + // preprocess if (!preprocess(im, &inputs_)) { std::cerr << "Preprocess failed!" << std::endl; return false; @@ -288,7 +301,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) { im_info_tensor->copy_from_cpu(im_info); im_shape_tensor->copy_from_cpu(im_shape); } - // 使用加载的模型进行预测 + // predict predictor_->ZeroCopyRun(); std::vector output_box; @@ -306,7 +319,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) { return true; } int num_boxes = size / 6; - // 解析预测框box + // box postprocess for (int i = 0; i < num_boxes; ++i) { Box box; box.category_id = static_cast(round(output_box[i * 6])); @@ -321,7 +334,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) { box.coordinate = {xmin, ymin, w, h}; result->boxes.push_back(std::move(box)); } - // 实例分割需解析mask + // mask postprocess if (name == "MaskRCNN") { std::vector output_mask; auto output_mask_tensor = predictor_->GetOutputTensor(output_names[1]); @@ -337,12 +350,22 @@ bool Model::predict(const cv::Mat& im, DetResult* result) { result->mask_resolution = output_mask_shape[2]; for (int i = 0; i < result->boxes.size(); ++i) { Box* box = &result->boxes[i]; - auto begin_mask = - output_mask.begin() + (i * classes + box->category_id) * mask_pixels; - auto end_mask = begin_mask + mask_pixels; - box->mask.data.assign(begin_mask, end_mask); box->mask.shape = {static_cast(box->coordinate[2]), static_cast(box->coordinate[3])}; + auto begin_mask = + output_mask.data() + (i * classes + box->category_id) * mask_pixels; + cv::Mat bin_mask(result->mask_resolution, + result->mask_resolution, + CV_32FC1, + begin_mask); + cv::resize(bin_mask, + bin_mask, + cv::Size(box->mask.shape[0], box->mask.shape[1])); + cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY); + auto mask_int_begin = reinterpret_cast(bin_mask.data); + auto mask_int_end = + mask_int_begin + box->mask.shape[0] * box->mask.shape[1]; + box->mask.data.assign(mask_int_begin, mask_int_end); } } return true; @@ -366,12 +389,12 @@ bool Model::predict(const std::vector& im_batch, inputs_batch_.assign(im_batch.size(), ImageBlob()); int batch_size = im_batch.size(); - // 处理输入图像 + // preprocess if (!preprocess(im_batch, &inputs_batch_, thread_num)) { std::cerr << "Preprocess failed!" << std::endl; return false; } - // 对RCNN类模型做批量padding + // RCNN model padding if (batch_size > 1) { if (name == "FasterRCNN" || name == "MaskRCNN") { int max_h = -1; @@ -452,10 +475,10 @@ bool Model::predict(const std::vector& im_batch, im_info_tensor->copy_from_cpu(im_info.data()); im_shape_tensor->copy_from_cpu(im_shape.data()); } - // 使用加载的模型进行预测 + // predict predictor_->ZeroCopyRun(); - // 读取所有box + // get all box std::vector output_box; auto output_names = predictor_->GetOutputNames(); auto output_box_tensor = predictor_->GetOutputTensor(output_names[0]); @@ -472,7 +495,7 @@ bool Model::predict(const std::vector& im_batch, } auto lod_vector = output_box_tensor->lod(); int num_boxes = size / 6; - // 解析预测框box + // box postprocess (*results).clear(); (*results).resize(batch_size); for (int i = 0; i < lod_vector[0].size() - 1; ++i) { @@ -492,7 +515,7 @@ bool Model::predict(const std::vector& im_batch, } } - // 实例分割需解析mask + // mask postprocess if (name == "MaskRCNN") { std::vector output_mask; auto output_mask_tensor = predictor_->GetOutputTensor(output_names[1]); @@ -509,14 +532,24 @@ bool Model::predict(const std::vector& im_batch, for (int i = 0; i < lod_vector[0].size() - 1; ++i) { (*results)[i].mask_resolution = output_mask_shape[2]; for (int j = 0; j < (*results)[i].boxes.size(); ++j) { - Box* box = &(*results)[i].boxes[j]; + Box* box = &(*results)[i].boxes[i]; int category_id = box->category_id; - auto begin_mask = output_mask.begin() + - (mask_idx * classes + category_id) * mask_pixels; - auto end_mask = begin_mask + mask_pixels; - box->mask.data.assign(begin_mask, end_mask); box->mask.shape = {static_cast(box->coordinate[2]), - static_cast(box->coordinate[3])}; + static_cast(box->coordinate[3])}; + auto begin_mask = + output_mask.data() + (i * classes + box->category_id) * mask_pixels; + cv::Mat bin_mask(output_mask_shape[2], + output_mask_shape[2], + CV_32FC1, + begin_mask); + cv::resize(bin_mask, + bin_mask, + cv::Size(box->mask.shape[0], box->mask.shape[1])); + cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY); + auto mask_int_begin = reinterpret_cast(bin_mask.data); + auto mask_int_end = + mask_int_begin + box->mask.shape[0] * box->mask.shape[1]; + box->mask.data.assign(mask_int_begin, mask_int_end); mask_idx++; } } @@ -537,7 +570,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { return false; } - // 处理输入图像 + // preprocess if (!preprocess(im, &inputs_)) { std::cerr << "Preprocess failed!" << std::endl; return false; @@ -549,10 +582,10 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { im_tensor->Reshape({1, 3, h, w}); im_tensor->copy_from_cpu(inputs_.im_data_.data()); - // 使用加载的模型进行预测 + // predict predictor_->ZeroCopyRun(); - // 获取预测置信度,经过argmax后的labelmap + // get labelmap auto output_names = predictor_->GetOutputNames(); auto output_label_tensor = predictor_->GetOutputTensor(output_names[0]); std::vector output_label_shape = output_label_tensor->shape(); @@ -565,7 +598,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { result->label_map.data.resize(size); output_label_tensor->copy_to_cpu(result->label_map.data.data()); - // 获取预测置信度scoremap + // get scoremap auto output_score_tensor = predictor_->GetOutputTensor(output_names[1]); std::vector output_score_shape = output_score_tensor->shape(); size = 1; @@ -577,7 +610,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { result->score_map.data.resize(size); output_score_tensor->copy_to_cpu(result->score_map.data.data()); - // 解析输出结果到原图大小 + // get origin image result std::vector label_map(result->label_map.data.begin(), result->label_map.data.end()); cv::Mat mask_label(result->label_map.shape[1], @@ -647,7 +680,7 @@ bool Model::predict(const std::vector& im_batch, return false; } - // 处理输入图像 + // preprocess inputs_batch_.assign(im_batch.size(), ImageBlob()); if (!preprocess(im_batch, &inputs_batch_, thread_num)) { std::cerr << "Preprocess failed!" << std::endl; @@ -670,10 +703,10 @@ bool Model::predict(const std::vector& im_batch, im_tensor->copy_from_cpu(inputs_data.data()); // im_tensor->copy_from_cpu(inputs_.im_data_.data()); - // 使用加载的模型进行预测 + // predict predictor_->ZeroCopyRun(); - // 获取预测置信度,经过argmax后的labelmap + // get labelmap auto output_names = predictor_->GetOutputNames(); auto output_label_tensor = predictor_->GetOutputTensor(output_names[0]); std::vector output_label_shape = output_label_tensor->shape(); @@ -698,7 +731,7 @@ bool Model::predict(const std::vector& im_batch, (*results)[i].label_map.data.data()); } - // 获取预测置信度scoremap + // get scoremap auto output_score_tensor = predictor_->GetOutputTensor(output_names[1]); std::vector output_score_shape = output_score_tensor->shape(); size = 1; @@ -722,7 +755,7 @@ bool Model::predict(const std::vector& im_batch, (*results)[i].score_map.data.data()); } - // 解析输出结果到原图大小 + // get origin image result for (int i = 0; i < batch_size; ++i) { std::vector label_map((*results)[i].label_map.data.begin(), (*results)[i].label_map.data.end()); diff --git a/deploy/cpp/src/transforms.cpp b/deploy/cpp/src/transforms.cpp index f623fc664e9d66002e0eb0065d034d90965eddf7..dfbe6d9154e397cec4337dbfdc9a053c31ea151e 100644 --- a/deploy/cpp/src/transforms.cpp +++ b/deploy/cpp/src/transforms.cpp @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "include/paddlex/transforms.h" + +#include + #include #include #include -#include -#include "include/paddlex/transforms.h" namespace PaddleX { @@ -195,7 +197,7 @@ std::shared_ptr Transforms::CreateTransform( } bool Transforms::Run(cv::Mat* im, ImageBlob* data) { - // 按照transforms中预处理算子顺序处理图像 + // do all preprocess ops by order if (to_rgb_) { cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB); } @@ -211,8 +213,8 @@ bool Transforms::Run(cv::Mat* im, ImageBlob* data) { } } - // 将图像由NHWC转为NCHW格式 - // 同时转为连续的内存块存储到ImageBlob + // data format NHWC to NCHW + // img data save to ImageBlob int h = im->rows; int w = im->cols; int c = im->channels(); diff --git a/deploy/cpp/src/visualize.cpp b/deploy/cpp/src/visualize.cpp index afc1733b497269b706bf4e07d82f3a7aa43087f5..3443848c1c56ecea071c70491efecea7d35d06fe 100644 --- a/deploy/cpp/src/visualize.cpp +++ b/deploy/cpp/src/visualize.cpp @@ -47,7 +47,7 @@ cv::Mat Visualize(const cv::Mat& img, boxes[i].coordinate[2], boxes[i].coordinate[3]); - // 生成预测框和标题 + // draw box and title std::string text = boxes[i].category; int c1 = colormap[3 * boxes[i].category_id + 0]; int c2 = colormap[3 * boxes[i].category_id + 1]; @@ -63,13 +63,13 @@ cv::Mat Visualize(const cv::Mat& img, origin.x = roi.x; origin.y = roi.y; - // 生成预测框标题的背景 + // background cv::Rect text_back = cv::Rect(boxes[i].coordinate[0], boxes[i].coordinate[1] - text_size.height, text_size.width, text_size.height); - // 绘图和文字 + // draw cv::rectangle(vis_img, roi, roi_color, 2); cv::rectangle(vis_img, text_back, roi_color, -1); cv::putText(vis_img, @@ -80,18 +80,16 @@ cv::Mat Visualize(const cv::Mat& img, cv::Scalar(255, 255, 255), thickness); - // 生成实例分割mask + // mask if (boxes[i].mask.data.size() == 0) { continue; } - cv::Mat bin_mask(result.mask_resolution, - result.mask_resolution, + std::vector mask_data; + mask_data.assign(boxes[i].mask.data.begin(), boxes[i].mask.data.end()); + cv::Mat bin_mask(boxes[i].mask.shape[1], + boxes[i].mask.shape[0], CV_32FC1, boxes[i].mask.data.data()); - cv::resize(bin_mask, - bin_mask, - cv::Size(boxes[i].mask.shape[0], boxes[i].mask.shape[1])); - cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY); cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1); bin_mask.copyTo(full_mask(roi)); cv::Mat mask_ch[3]; diff --git a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/Transforms.java b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/Transforms.java index 940ebaa234db2e34faa2daaf74dfacc0e9d131fe..d88ec4bfa7017fede63ffccc154bcf4a34a8a878 100644 --- a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/Transforms.java +++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/Transforms.java @@ -23,6 +23,7 @@ import org.opencv.core.Scalar; import org.opencv.core.Size; import org.opencv.imgproc.Imgproc; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.List; @@ -101,6 +102,15 @@ public class Transforms { if (info.containsKey("coarsest_stride")) { padding.coarsest_stride = (int) info.get("coarsest_stride"); } + if (info.containsKey("im_padding_value")) { + List im_padding_value = (List) info.get("im_padding_value"); + if (im_padding_value.size()!=3){ + Log.e(TAG, "len of im_padding_value in padding must == 3."); + } + for (int k =0; i> reverseReshapeInfo = new ArrayList>(imageBlob.getReshapeInfo().entrySet()).listIterator(imageBlob.getReshapeInfo().size()); while (reverseReshapeInfo.hasPrevious()) { Map.Entry entry = reverseReshapeInfo.previous(); diff --git a/deploy/openvino/CMakeLists.txt b/deploy/openvino/CMakeLists.txt old mode 100644 new mode 100755 index 8e32a9592fce38918e46ad9ab9e4b2d1fc97cd6e..e219c8537c40af153b48e5025d07f9292482686a --- a/deploy/openvino/CMakeLists.txt +++ b/deploy/openvino/CMakeLists.txt @@ -8,7 +8,9 @@ SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) SET(OPENVINO_DIR "" CACHE PATH "Location of libraries") SET(OPENCV_DIR "" CACHE PATH "Location of libraries") SET(GFLAGS_DIR "" CACHE PATH "Location of libraries") +SET(GLOG_DIR "" CACHE PATH "Location of libraries") SET(NGRAPH_LIB "" CACHE PATH "Location of libraries") +SET(ARCH "" CACHE PATH "Location of libraries") include(cmake/yaml-cpp.cmake) @@ -27,6 +29,12 @@ macro(safe_set_static_flag) endforeach(flag_var) endmacro() +if(NOT WIN32) + if (NOT DEFINED ARCH OR ${ARCH} STREQUAL "") + message(FATAL_ERROR "please set ARCH with -DARCH=x86 OR armv7") + endif() +endif() + if (NOT DEFINED OPENVINO_DIR OR ${OPENVINO_DIR} STREQUAL "") message(FATAL_ERROR "please set OPENVINO_DIR with -DOPENVINO_DIR=/path/influence_engine") endif() @@ -39,19 +47,32 @@ if (NOT DEFINED GFLAGS_DIR OR ${GFLAGS_DIR} STREQUAL "") message(FATAL_ERROR "please set GFLAGS_DIR with -DGFLAGS_DIR=/path/gflags") endif() +if (NOT DEFINED GLOG_DIR OR ${GLOG_DIR} STREQUAL "") + message(FATAL_ERROR "please set GLOG_DIR with -DLOG_DIR=/path/glog") +endif() + if (NOT DEFINED NGRAPH_LIB OR ${NGRAPH_LIB} STREQUAL "") message(FATAL_ERROR "please set NGRAPH_DIR with -DNGRAPH_DIR=/path/ngraph") endif() include_directories("${OPENVINO_DIR}") -link_directories("${OPENVINO_DIR}/lib") include_directories("${OPENVINO_DIR}/include") -link_directories("${OPENVINO_DIR}/external/tbb/lib") include_directories("${OPENVINO_DIR}/external/tbb/include/tbb") +link_directories("${OPENVINO_DIR}/lib") +link_directories("${OPENVINO_DIR}/external/tbb/lib") +if(WIN32) + link_directories("${OPENVINO_DIR}/lib/intel64/Release") + link_directories("${OPENVINO_DIR}/bin/intel64/Release") +endif() + + link_directories("${GFLAGS_DIR}/lib") include_directories("${GFLAGS_DIR}/include") +link_directories("${GLOG_DIR}/lib") +include_directories("${GLOG_DIR}/include") + link_directories("${NGRAPH_LIB}") link_directories("${NGRAPH_LIB}/lib") @@ -79,14 +100,29 @@ else() set(CMAKE_STATIC_LIBRARY_PREFIX "") endif() - -if(WITH_STATIC_LIB) - set(DEPS ${OPENVINO_DIR}/lib/intel64/libinference_engine${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/libinference_engine_legacy${CMAKE_STATIC_LIBRARY_SUFFIX}) +if(WIN32) + set(DEPS ${OPENVINO_DIR}/lib/intel64/Release/inference_engine${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/Release/inference_engine_legacy${CMAKE_STATIC_LIBRARY_SUFFIX}) else() - set(DEPS ${OPENVINO_DIR}/lib/intel64/libinference_engine${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/libinference_engine_legacy${CMAKE_SHARED_LIBRARY_SUFFIX}) -endif() + if (ARCH STREQUAL "armv7") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv7-a") + if(WITH_STATIC_LIB) + set(DEPS ${OPENVINO_DIR}/lib/armv7l/libinference_engine${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/armv7l/libinference_engine_legacy${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(DEPS ${OPENVINO_DIR}/lib/armv7l/libinference_engine${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/armv7l/libinference_engine_legacy${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() + else() + if(WITH_STATIC_LIB) + set(DEPS ${OPENVINO_DIR}/lib/intel64/libinference_engine${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/libinference_engine_legacy${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(DEPS ${OPENVINO_DIR}/lib/intel64/libinference_engine${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/libinference_engine_legacy${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() + endif() +endif(WIN32) if (NOT WIN32) set(DEPS ${DEPS} @@ -94,7 +130,7 @@ if (NOT WIN32) ) else() set(DEPS ${DEPS} - glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt) + glog gflags_static libyaml-cppmt) set(DEPS ${DEPS} libcmt shlwapi) endif(NOT WIN32) @@ -105,7 +141,14 @@ if (NOT WIN32) endif() set(DEPS ${DEPS} ${OpenCV_LIBS}) -add_executable(classifier src/classifier.cpp src/transforms.cpp src/paddlex.cpp) +add_executable(classifier demo/classifier.cpp src/transforms.cpp src/paddlex.cpp) ADD_DEPENDENCIES(classifier ext-yaml-cpp) target_link_libraries(classifier ${DEPS}) +add_executable(segmenter demo/segmenter.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp) +ADD_DEPENDENCIES(segmenter ext-yaml-cpp) +target_link_libraries(segmenter ${DEPS}) + +add_executable(detector demo/detector.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp) +ADD_DEPENDENCIES(detector ext-yaml-cpp) +target_link_libraries(detector ${DEPS}) diff --git a/deploy/openvino/CMakeSettings.json b/deploy/openvino/CMakeSettings.json old mode 100644 new mode 100755 index 861839dbc67816aeb96ca1ab174d95ca7dd292ef..bb3873b6022deb06ccec99830ed4d0d89aa42f6b --- a/deploy/openvino/CMakeSettings.json +++ b/deploy/openvino/CMakeSettings.json @@ -1,27 +1,47 @@ { - "configurations": [ + "configurations": [ + { + "name": "x64-Release", + "generator": "Ninja", + "configurationType": "RelWithDebInfo", + "inheritEnvironments": [ "msvc_x64_x64" ], + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "-v", + "ctestCommandArgs": "", + "variables": [ { - "name": "x64-Release", - "generator": "Ninja", - "configurationType": "RelWithDebInfo", - "inheritEnvironments": [ "msvc_x64_x64" ], - "buildRoot": "${projectDir}\\out\\build\\${name}", - "installRoot": "${projectDir}\\out\\install\\${name}", - "cmakeCommandArgs": "", - "buildCommandArgs": "-v", - "ctestCommandArgs": "", - "variables": [ - { - "name": "OPENCV_DIR", - "value": "C:/projects/opencv", - "type": "PATH" - }, - { - "name": "OPENVINO_LIB", - "value": "C:/projetcs/inference_engine", - "type": "PATH" - } - ] + "name": "OPENCV_DIR", + "value": "/path/to/opencv", + "type": "PATH" + }, + { + "name": "OPENVINO_DIR", + "value": "C:/Program Files (x86)/IntelSWTools/openvino/deployment_tools/inference_engine", + "type": "PATH" + }, + { + "name": "NGRAPH_LIB", + "value": "C:/Program Files (x86)/IntelSWTools/openvino/deployment_tools/ngraph/lib", + "type": "PATH" + }, + { + "name": "GFLAGS_DIR", + "value": "/path/to/gflags", + "type": "PATH" + }, + { + "name": "WITH_STATIC_LIB", + "value": "True", + "type": "BOOL" + }, + { + "name": "GLOG_DIR", + "value": "/path/to/glog", + "type": "PATH" } - ] -} + ] + } + ] +} \ No newline at end of file diff --git a/deploy/openvino/cmake/yaml-cpp.cmake b/deploy/openvino/cmake/yaml-cpp.cmake old mode 100644 new mode 100755 index 30d904dc76196cf106abccb47c003eed485691f1..726433d904908ce96c51442246fc884d0899de04 --- a/deploy/openvino/cmake/yaml-cpp.cmake +++ b/deploy/openvino/cmake/yaml-cpp.cmake @@ -1,4 +1,3 @@ -find_package(Git REQUIRED) include(ExternalProject) diff --git a/deploy/openvino/src/classifier.cpp b/deploy/openvino/demo/classifier.cpp old mode 100644 new mode 100755 similarity index 87% rename from deploy/openvino/src/classifier.cpp rename to deploy/openvino/demo/classifier.cpp index 38c0da9b86d8b6d9c7d248aeb8526dfe1deab148..2180cb40e390affa2dd1ddcd720d900c715aab75 --- a/deploy/openvino/src/classifier.cpp +++ b/deploy/openvino/demo/classifier.cpp @@ -22,7 +22,7 @@ #include "include/paddlex/paddlex.h" DEFINE_string(model_dir, "", "Path of inference model"); -DEFINE_string(cfg_dir, "", "Path of inference model"); +DEFINE_string(cfg_file, "", "Path of PaddelX model yml file"); DEFINE_string(device, "CPU", "Device name"); DEFINE_string(image, "", "Path of test image file"); DEFINE_string(image_list, "", "Path of test image list file"); @@ -35,8 +35,8 @@ int main(int argc, char** argv) { std::cerr << "--model_dir need to be defined" << std::endl; return -1; } - if (FLAGS_cfg_dir == "") { - std::cerr << "--cfg_dir need to be defined" << std::endl; + if (FLAGS_cfg_file == "") { + std::cerr << "--cfg_file need to be defined" << std::endl; return -1; } if (FLAGS_image == "" & FLAGS_image_list == "") { @@ -44,11 +44,11 @@ int main(int argc, char** argv) { return -1; } - // 加载模型 + // load model PaddleX::Model model; - model.Init(FLAGS_model_dir, FLAGS_cfg_dir, FLAGS_device); + model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_device); - // 进行预测 + // predict if (FLAGS_image_list != "") { std::ifstream inf(FLAGS_image_list); if (!inf) { diff --git a/deploy/openvino/demo/detector.cpp b/deploy/openvino/demo/detector.cpp new file mode 100644 index 0000000000000000000000000000000000000000..66a31cefc0fa500ad77353e0f9bdd43e4564cc81 --- /dev/null +++ b/deploy/openvino/demo/detector.cpp @@ -0,0 +1,110 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include +#include // NOLINT +#include +#include +#include +#include +#include + +#include "include/paddlex/paddlex.h" +#include "include/paddlex/visualize.h" + +using namespace std::chrono; // NOLINT + +DEFINE_string(model_dir, "", "Path of openvino model xml file"); +DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file"); +DEFINE_string(image, "", "Path of test image file"); +DEFINE_string(image_list, "", "Path of test image list file"); +DEFINE_string(device, "CPU", "Device name"); +DEFINE_string(save_dir, "", "Path to save visualized image"); +DEFINE_int32(batch_size, 1, "Batch size of infering"); +DEFINE_double(threshold, + 0.5, + "The minimum scores of target boxes which are shown"); + +int main(int argc, char** argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_model_dir == "") { + std::cerr << "--model_dir need to be defined" << std::endl; + return -1; + } + if (FLAGS_cfg_file == "") { + std::cerr << "--cfg_file need to be defined" << std::endl; + return -1; + } + if (FLAGS_image == "" & FLAGS_image_list == "") { + std::cerr << "--image or --image_list need to be defined" << std::endl; + return -1; + } + + // load model + PaddleX::Model model; + model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_device); + + int imgs = 1; + auto colormap = PaddleX::GenerateColorMap(model.labels.size()); + // predict + if (FLAGS_image_list != "") { + std::ifstream inf(FLAGS_image_list); + if (!inf) { + std::cerr << "Fail to open file " << FLAGS_image_list << std::endl; + return -1; + } + std::string image_path; + while (getline(inf, image_path)) { + PaddleX::DetResult result; + cv::Mat im = cv::imread(image_path, 1); + model.predict(im, &result); + if (FLAGS_save_dir != "") { + cv::Mat vis_img = PaddleX::Visualize( + im, result, model.labels, colormap, FLAGS_threshold); + std::string save_path = + PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image); + cv::imwrite(save_path, vis_img); + std::cout << "Visualized output saved as " << save_path << std::endl; + } + } + } else { + PaddleX::DetResult result; + cv::Mat im = cv::imread(FLAGS_image, 1); + model.predict(im, &result); + for (int i = 0; i < result.boxes.size(); ++i) { + std::cout << "image file: " << FLAGS_image << std::endl; + std::cout << ", predict label: " << result.boxes[i].category + << ", label_id:" << result.boxes[i].category_id + << ", score: " << result.boxes[i].score + << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0] + << ", " << result.boxes[i].coordinate[1] << ", " + << result.boxes[i].coordinate[2] << ", " + << result.boxes[i].coordinate[3] << ")" << std::endl; + } + if (FLAGS_save_dir != "") { + // visualize + cv::Mat vis_img = PaddleX::Visualize( + im, result, model.labels, colormap, FLAGS_threshold); + std::string save_path = + PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image); + cv::imwrite(save_path, vis_img); + result.clear(); + std::cout << "Visualized output saved as " << save_path << std::endl; + } + } + return 0; +} diff --git a/deploy/openvino/demo/segmenter.cpp b/deploy/openvino/demo/segmenter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bb6886aae8def104a9a3923443f9609684b3b154 --- /dev/null +++ b/deploy/openvino/demo/segmenter.cpp @@ -0,0 +1,90 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include +#include +#include +#include "include/paddlex/paddlex.h" +#include "include/paddlex/visualize.h" + + +DEFINE_string(model_dir, "", "Path of openvino model xml file"); +DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file"); +DEFINE_string(image, "", "Path of test image file"); +DEFINE_string(image_list, "", "Path of test image list file"); +DEFINE_string(device, "CPU", "Device name"); +DEFINE_string(save_dir, "", "Path to save visualized image"); +DEFINE_int32(batch_size, 1, "Batch size of infering"); + + +int main(int argc, char** argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_model_dir == "") { + std::cerr << "--model_dir need to be defined" << std::endl; + return -1; + } + if (FLAGS_cfg_file == "") { + std::cerr << "--cfg_file need to be defined" << std::endl; + return -1; + } + if (FLAGS_image == "" & FLAGS_image_list == "") { + std::cerr << "--image or --image_list need to be defined" << std::endl; + return -1; + } + + // load model + PaddleX::Model model; + model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_device); + int imgs = 1; + auto colormap = PaddleX::GenerateColorMap(model.labels.size()); + + if (FLAGS_image_list != "") { + std::ifstream inf(FLAGS_image_list); + if (!inf) { + std::cerr << "Fail to open file " << FLAGS_image_list < #include #include +#include +#include #include "yaml-cpp/yaml.h" @@ -30,35 +32,40 @@ #include "include/paddlex/config_parser.h" #include "include/paddlex/results.h" #include "include/paddlex/transforms.h" -using namespace InferenceEngine; + namespace PaddleX { class Model { public: void Init(const std::string& model_dir, - const std::string& cfg_dir, + const std::string& cfg_file, std::string device) { - create_predictor(model_dir, cfg_dir, device); + create_predictor(model_dir, cfg_file, device); } void create_predictor(const std::string& model_dir, - const std::string& cfg_dir, + const std::string& cfg_file, std::string device); bool load_config(const std::string& model_dir); - bool preprocess(cv::Mat* input_im); + bool preprocess(cv::Mat* input_im, ImageBlob* inputs); bool predict(const cv::Mat& im, ClsResult* result); + bool predict(const cv::Mat& im, DetResult* result); + + bool predict(const cv::Mat& im, SegResult* result); + + std::string type; std::string name; - std::vector labels; + std::map labels; Transforms transforms_; - Blob::Ptr inputs_; - Blob::Ptr output_; - CNNNetwork network_; - ExecutableNetwork executable_network_; + ImageBlob inputs_; + InferenceEngine::Blob::Ptr output_; + InferenceEngine::CNNNetwork network_; + InferenceEngine::ExecutableNetwork executable_network_; }; -} // namespce of PaddleX +} // namespace PaddleX diff --git a/deploy/openvino/include/paddlex/results.h b/deploy/openvino/include/paddlex/results.h old mode 100644 new mode 100755 index de90c4a85130f42c0201f0d671fd3e2d53b0f37d..7a77e0e2df4dbe4889f7be176df173b00dc454fa --- a/deploy/openvino/include/paddlex/results.h +++ b/deploy/openvino/include/paddlex/results.h @@ -61,11 +61,11 @@ class DetResult : public BaseResult { class SegResult : public BaseResult { public: - Mask label_map; + Mask label_map; Mask score_map; void clear() { label_map.clear(); score_map.clear(); } }; -} // namespce of PaddleX +} // namespace PaddleX diff --git a/deploy/openvino/include/paddlex/transforms.h b/deploy/openvino/include/paddlex/transforms.h old mode 100644 new mode 100755 index fa76a82999173ea01c80b8ea3b67ca1bc4f95fc7..b179c09fbbff082cd3844c8217d3d9e76e5b25c7 --- a/deploy/openvino/include/paddlex/transforms.h +++ b/deploy/openvino/include/paddlex/transforms.h @@ -16,26 +16,54 @@ #include -#include -#include #include #include +#include +#include #include +#include #include #include #include - #include -using namespace InferenceEngine; + namespace PaddleX { +/* + * @brief + * This class represents object for storing all preprocessed data + * */ +class ImageBlob { + public: + // Original image height and width + InferenceEngine::Blob::Ptr ori_im_size_; + + // Newest image height and width after process + std::vector new_im_size_ = std::vector(2); + // Image height and width before resize + std::vector> im_size_before_resize_; + // Reshape order + std::vector reshape_order_; + // Resize scale + float scale = 1.0; + // Buffer for image data after preprocessing + InferenceEngine::Blob::Ptr blob; + + void clear() { + im_size_before_resize_.clear(); + reshape_order_.clear(); + } +}; + + + // Abstraction of preprocessing opration class class Transform { public: virtual void Init(const YAML::Node& item) = 0; - virtual bool Run(cv::Mat* im) = 0; + virtual bool Run(cv::Mat* im, ImageBlob* data) = 0; }; class Normalize : public Transform { @@ -45,7 +73,7 @@ class Normalize : public Transform { std_ = item["std"].as>(); } - virtual bool Run(cv::Mat* im); + virtual bool Run(cv::Mat* im, ImageBlob* data); private: std::vector mean_; @@ -61,8 +89,8 @@ class ResizeByShort : public Transform { } else { max_size_ = -1; } - }; - virtual bool Run(cv::Mat* im); + } + virtual bool Run(cv::Mat* im, ImageBlob* data); private: float GenerateScale(const cv::Mat& im); @@ -70,6 +98,55 @@ class ResizeByShort : public Transform { int max_size_; }; +/* + * @brief + * This class execute resize by long operation on image matrix. At first, it resizes + * the long side of image matrix to specified length. Accordingly, the short side + * will be resized in the same proportion. + * */ +class ResizeByLong : public Transform { + public: + virtual void Init(const YAML::Node& item) { + long_size_ = item["long_size"].as(); + } + virtual bool Run(cv::Mat* im, ImageBlob* data); + + private: + int long_size_; +}; + +/* + * @brief + * This class execute resize operation on image matrix. It resizes width and height + * to specified length. + * */ +class Resize : public Transform { + public: + virtual void Init(const YAML::Node& item) { + if (item["interp"].IsDefined()) { + interp_ = item["interp"].as(); + } + if (item["target_size"].IsScalar()) { + height_ = item["target_size"].as(); + width_ = item["target_size"].as(); + } else if (item["target_size"].IsSequence()) { + std::vector target_size = item["target_size"].as>(); + width_ = target_size[0]; + height_ = target_size[1]; + } + if (height_ <= 0 || width_ <= 0) { + std::cerr << "[Resize] target_size should greater than 0" << std::endl; + exit(-1); + } + } + virtual bool Run(cv::Mat* im, ImageBlob* data); + + private: + int height_; + int width_; + std::string interp_; +}; + class CenterCrop : public Transform { public: @@ -83,22 +160,65 @@ class CenterCrop : public Transform { height_ = crop_size[1]; } } - virtual bool Run(cv::Mat* im); + virtual bool Run(cv::Mat* im, ImageBlob* data); private: int height_; int width_; }; + +/* + * @brief + * This class execute padding operation on image matrix. It makes border on edge + * of image matrix. + * */ +class Padding : public Transform { + public: + virtual void Init(const YAML::Node& item) { + if (item["coarsest_stride"].IsDefined()) { + coarsest_stride_ = item["coarsest_stride"].as(); + if (coarsest_stride_ < 1) { + std::cerr << "[Padding] coarest_stride should greater than 0" + << std::endl; + exit(-1); + } + } + if (item["target_size"].IsDefined()) { + if (item["target_size"].IsScalar()) { + width_ = item["target_size"].as(); + height_ = item["target_size"].as(); + } else if (item["target_size"].IsSequence()) { + width_ = item["target_size"].as>()[0]; + height_ = item["target_size"].as>()[1]; + } + } + if (item["im_padding_value"].IsDefined()) { + im_value_ = item["im_padding_value"].as>(); + } else { + im_value_ = {0, 0, 0}; + } + } + + virtual bool Run(cv::Mat* im, ImageBlob* data); + + private: + int coarsest_stride_ = -1; + int width_ = 0; + int height_ = 0; + std::vector im_value_; +}; + class Transforms { public: - void Init(const YAML::Node& node, bool to_rgb = true); + void Init(const YAML::Node& node, std::string type, bool to_rgb = true); std::shared_ptr CreateTransform(const std::string& name); - bool Run(cv::Mat* im, Blob::Ptr blob); + bool Run(cv::Mat* im, ImageBlob* data); private: std::vector> transforms_; bool to_rgb_ = true; + std::string type_; }; } // namespace PaddleX diff --git a/deploy/openvino/include/paddlex/visualize.h b/deploy/openvino/include/paddlex/visualize.h new file mode 100644 index 0000000000000000000000000000000000000000..d3eb094f525dc2c4e878dbfe11916dc98c63dd49 --- /dev/null +++ b/deploy/openvino/include/paddlex/visualize.h @@ -0,0 +1,97 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#ifdef _WIN32 +#include +#include +#else // Linux/Unix +#include +#include +#include +#include +#include +#endif +#include + +#include +#include +#include + +#include "include/paddlex/results.h" + +#ifdef _WIN32 +#define OS_PATH_SEP "\\" +#else +#define OS_PATH_SEP "/" +#endif + +namespace PaddleX { + +/* + * @brief + * Generate visualization colormap for each class + * + * @param number of class + * @return color map, the size of vector is 3 * num_class + * */ +std::vector GenerateColorMap(int num_class); + + +/* + * @brief + * Visualize the detection result + * + * @param img: initial image matrix + * @param results: the detection result + * @param labels: label map + * @param colormap: visualization color map + * @return visualized image matrix + * */ +cv::Mat Visualize(const cv::Mat& img, + const DetResult& results, + const std::map& labels, + const std::vector& colormap, + float threshold = 0.5); + +/* + * @brief + * Visualize the segmentation result + * + * @param img: initial image matrix + * @param results: the detection result + * @param labels: label map + * @param colormap: visualization color map + * @return visualized image matrix + * */ +cv::Mat Visualize(const cv::Mat& img, + const SegResult& result, + const std::map& labels, + const std::vector& colormap); + +/* + * @brief + * generate save path for visualized image matrix + * + * @param save_dir: directory for saving visualized image matrix + * @param file_path: sourcen image file path + * @return path of saving visualized result + * */ +std::string generate_save_path(const std::string& save_dir, + const std::string& file_path); +} // namespace PaddleX diff --git a/deploy/openvino/python/__init__.py b/deploy/openvino/python/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..abf198b97e6e818e1fbe59006f98492640bcee54 --- /dev/null +++ b/deploy/openvino/python/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/deploy/openvino/python/convertor.py b/deploy/openvino/python/convertor.py new file mode 100644 index 0000000000000000000000000000000000000000..f04720374b933f4472125a754a800ada1c48cae2 --- /dev/null +++ b/deploy/openvino/python/convertor.py @@ -0,0 +1,101 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from six import text_type as _text_type +import argparse +import sys +from utils import logging +import paddlex as pdx + +def arg_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + "-m", + type=_text_type, + default=None, + help="define model directory path") + parser.add_argument( + "--save_dir", + "-s", + type=_text_type, + default=None, + help="path to save inference model") + parser.add_argument( + "--fixed_input_shape", + "-fs", + default=None, + help="export openvino model with input shape:[w,h]") + parser.add_argument( + "--data_type", + "-dp", + default="FP32", + help="option, FP32 or FP16, the data_type of openvino IR") + return parser + + + + + +def export_openvino_model(model, args): + if model.model_type == "detector" or model.__class__.__name__ == "FastSCNN": + logging.error( + "Only image classifier models and semantic segmentation models(except FastSCNN) are supported to export to openvino") + try: + import x2paddle + if x2paddle.__version__ < '0.7.4': + logging.error("You need to upgrade x2paddle >= 0.7.4") + except: + logging.error( + "You need to install x2paddle first, pip install x2paddle>=0.7.4") + + import x2paddle.convert as x2pc + x2pc.paddle2onnx(args.model_dir, args.save_dir) + + import mo.main as mo + from mo.utils.cli_parser import get_onnx_cli_parser + onnx_parser = get_onnx_cli_parser() + onnx_parser.add_argument("--model_dir",type=_text_type) + onnx_parser.add_argument("--save_dir",type=_text_type) + onnx_parser.add_argument("--fixed_input_shape") + onnx_input = os.path.join(args.save_dir, 'x2paddle_model.onnx') + onnx_parser.set_defaults(input_model=onnx_input) + onnx_parser.set_defaults(output_dir=args.save_dir) + shape = '[1,3,' + shape = shape + args.fixed_input_shape[1:] + if model.__class__.__name__ == "YOLOV3": + shape = shape + ",[1,2]" + inputs = "image,im_size" + onnx_parser.set_defaults(input = inputs) + onnx_parser.set_defaults(input_shape = shape) + mo.main(onnx_parser,'onnx') + + +def main(): + parser = arg_parser() + args = parser.parse_args() + assert args.model_dir is not None, "--model_dir should be defined while exporting openvino model" + assert args.save_dir is not None, "--save_dir should be defined to create openvino model" + model = pdx.load_model(args.model_dir) + if model.status == "Normal" or model.status == "Prune": + logging.error( + "Only support inference model, try to export model first as below,", + exit=False) + export_openvino_model(model, args) + +if __name__ == "__main__": + main() + + diff --git a/deploy/openvino/python/demo.py b/deploy/openvino/python/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..93ecaab8e526977402a798c21b8b8c5696f1f70b --- /dev/null +++ b/deploy/openvino/python/demo.py @@ -0,0 +1,78 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import argparse +import deploy + + +def arg_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + "-m", + type=str, + default=None, + help="path to openvino model .xml file") + parser.add_argument( + "--device", + "-d", + type=str, + default='CPU', + help="Specify the target device to infer on:[CPU, GPU, FPGA, HDDL, MYRIAD,HETERO]" + "Default value is CPU") + parser.add_argument( + "--img", "-i", type=str, default=None, help="path to an image files") + + parser.add_argument( + "--img_list", "-l", type=str, default=None, help="Path to a imglist") + + parser.add_argument( + "--cfg_file", + "-c", + type=str, + default=None, + help="Path to PaddelX model yml file") + + return parser + + +def main(): + parser = arg_parser() + args = parser.parse_args() + model_xml = args.model_dir + model_yaml = args.cfg_file + + #model init + if ("CPU" not in args.device): + predictor = deploy.Predictor(model_xml, model_yaml, args.device) + else: + predictor = deploy.Predictor(model_xml, model_yaml) + + #predict + if (args.img_list != None): + f = open(args.img_list) + lines = f.readlines() + for im_path in lines: + print(im_path) + predictor.predict(im_path.strip('\n')) + f.close() + else: + im_path = args.img + predictor.predict(im_path) + + +if __name__ == "__main__": + main() diff --git a/deploy/openvino/python/deploy.py b/deploy/openvino/python/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..b43f96d9894775d4bf0e54c5c8c56c4a9ed87fb4 --- /dev/null +++ b/deploy/openvino/python/deploy.py @@ -0,0 +1,227 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import os.path as osp +import time +import cv2 +import numpy as np +import yaml +from six import text_type as _text_type +from openvino.inference_engine import IECore + + +class Predictor: + def __init__(self, model_xml, model_yaml, device="CPU"): + self.device = device + if not osp.exists(model_xml): + print("model xml file is not exists in {}".format(model_xml)) + self.model_xml = model_xml + self.model_bin = osp.splitext(model_xml)[0] + ".bin" + if not osp.exists(model_yaml): + print("model yaml file is not exists in {}".format(model_yaml)) + with open(model_yaml) as f: + self.info = yaml.load(f.read(), Loader=yaml.Loader) + self.model_type = self.info['_Attributes']['model_type'] + self.model_name = self.info['Model'] + self.num_classes = self.info['_Attributes']['num_classes'] + self.labels = self.info['_Attributes']['labels'] + if self.info['Model'] == 'MaskRCNN': + if self.info['_init_params']['with_fpn']: + self.mask_head_resolution = 28 + else: + self.mask_head_resolution = 14 + transforms_mode = self.info.get('TransformsMode', 'RGB') + if transforms_mode == 'RGB': + to_rgb = True + else: + to_rgb = False + self.transforms = self.build_transforms(self.info['Transforms'], + to_rgb) + self.predictor, self.net = self.create_predictor() + self.total_time = 0 + self.count_num = 0 + + def create_predictor(self): + + #initialization for specified device + print("Creating Inference Engine") + ie = IECore() + print("Loading network files:\n\t{}\n\t{}".format(self.model_xml, + self.model_bin)) + net = ie.read_network(model=self.model_xml, weights=self.model_bin) + net.batch_size = 1 + network_config = {} + if self.device == "MYRIAD": + network_config = {'VPU_HW_STAGES_OPTIMIZATION': 'NO'} + exec_net = ie.load_network( + network=net, device_name=self.device, config=network_config) + return exec_net, net + + def build_transforms(self, transforms_info, to_rgb=True): + if self.model_type == "classifier": + import transforms.cls_transforms as transforms + elif self.model_type == "detector": + import transforms.det_transforms as transforms + elif self.model_type == "segmenter": + import transforms.seg_transforms as transforms + op_list = list() + for op_info in transforms_info: + op_name = list(op_info.keys())[0] + op_attr = op_info[op_name] + if not hasattr(transforms, op_name): + raise Exception( + "There's no operator named '{}' in transforms of {}". + format(op_name, self.model_type)) + op_list.append(getattr(transforms, op_name)(**op_attr)) + eval_transforms = transforms.Compose(op_list) + if hasattr(eval_transforms, 'to_rgb'): + eval_transforms.to_rgb = to_rgb + self.arrange_transforms(eval_transforms) + return eval_transforms + + def arrange_transforms(self, eval_transforms): + if self.model_type == 'classifier': + import transforms.cls_transforms as transforms + arrange_transform = transforms.ArrangeClassifier + elif self.model_type == 'segmenter': + import transforms.seg_transforms as transforms + arrange_transform = transforms.ArrangeSegmenter + elif self.model_type == 'detector': + import transforms.det_transforms as transforms + arrange_name = 'Arrange{}'.format(self.model_name) + arrange_transform = getattr(transforms, arrange_name) + else: + raise Exception("Unrecognized model type: {}".format( + self.model_type)) + if type(eval_transforms.transforms[-1]).__name__.startswith('Arrange'): + eval_transforms.transforms[-1] = arrange_transform(mode='test') + else: + eval_transforms.transforms.append(arrange_transform(mode='test')) + + def raw_predict(self, preprocessed_input): + self.count_num += 1 + feed_dict = {} + if self.model_name == "YOLOv3": + inputs = self.net.inputs + for name in inputs: + if (len(inputs[name].shape) == 2): + feed_dict[name] = preprocessed_input['im_size'] + elif (len(inputs[name].shape) == 4): + feed_dict[name] = preprocessed_input['image'] + else: + pass + else: + input_blob = next(iter(self.net.inputs)) + feed_dict[input_blob] = preprocessed_input['image'] + #Start sync inference + print("Starting inference in synchronous mode") + res = self.predictor.infer(inputs=feed_dict) + + #Processing output blob + print("Processing output blob") + return res + + def preprocess(self, image): + res = dict() + if self.model_type == "classifier": + im, = self.transforms(image) + im = np.expand_dims(im, axis=0).copy() + res['image'] = im + elif self.model_type == "detector": + if self.model_name == "YOLOv3": + im, im_shape = self.transforms(image) + im = np.expand_dims(im, axis=0).copy() + im_shape = np.expand_dims(im_shape, axis=0).copy() + res['image'] = im + res['im_size'] = im_shape + if self.model_name.count('RCNN') > 0: + im, im_resize_info, im_shape = self.transforms(image) + im = np.expand_dims(im, axis=0).copy() + im_resize_info = np.expand_dims(im_resize_info, axis=0).copy() + im_shape = np.expand_dims(im_shape, axis=0).copy() + res['image'] = im + res['im_info'] = im_resize_info + res['im_shape'] = im_shape + elif self.model_type == "segmenter": + im, im_info = self.transforms(image) + im = np.expand_dims(im, axis=0).copy() + res['image'] = im + res['im_info'] = im_info + return res + + def classifier_postprocess(self, preds, topk=1): + """ 对分类模型的预测结果做后处理 + """ + true_topk = min(self.num_classes, topk) + output_name = next(iter(self.net.outputs)) + pred_label = np.argsort(-preds[output_name][0])[:true_topk] + result = [{ + 'category_id': l, + 'category': self.labels[l], + 'score': preds[output_name][0][l], + } for l in pred_label] + print(result) + return result + + def segmenter_postprocess(self, preds, preprocessed_inputs): + """ 对语义分割结果做后处理 + """ + it = iter(self.net.outputs) + next(it) + score_name = next(it) + score_map = np.squeeze(preds[score_name]) + score_map = np.transpose(score_map, (1, 2, 0)) + label_name = next(it) + label_map = np.squeeze(preds[label_name]).astype('uint8') + im_info = preprocessed_inputs['im_info'] + for info in im_info[::-1]: + if info[0] == 'resize': + w, h = info[1][1], info[1][0] + label_map = cv2.resize(label_map, (w, h), cv2.INTER_NEAREST) + score_map = cv2.resize(score_map, (w, h), cv2.INTER_LINEAR) + elif info[0] == 'padding': + w, h = info[1][1], info[1][0] + label_map = label_map[0:h, 0:w] + score_map = score_map[0:h, 0:w, :] + else: + raise Exception("Unexpected info '{}' in im_info".format(info[ + 0])) + return {'label_map': label_map, 'score_map': score_map} + + def detector_postprocess(self, preds, preprocessed_inputs): + """对图像检测结果做后处理 + """ + output_name = next(iter(self.net.outputs)) + outputs = preds[output_name][0] + result = [] + for out in outputs: + if (out[0] > 0): + result.append(out.tolist()) + else: + pass + print(result) + return result + + def predict(self, image, topk=1, threshold=0.5): + preprocessed_input = self.preprocess(image) + model_pred = self.raw_predict(preprocessed_input) + if self.model_type == "classifier": + results = self.classifier_postprocess(model_pred, topk) + elif self.model_type == "detector": + results = self.detector_postprocess(model_pred, preprocessed_input) + elif self.model_type == "segmenter": + results = self.segmenter_postprocess(model_pred, + preprocessed_input) diff --git a/deploy/openvino/python/transforms/__init__.py b/deploy/openvino/python/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9ec4809004549b5d564e7d69feb5d3a32fbebc98 --- /dev/null +++ b/deploy/openvino/python/transforms/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cls_transforms +from . import det_transforms +from . import seg_transforms diff --git a/deploy/openvino/python/transforms/cls_transforms.py b/deploy/openvino/python/transforms/cls_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..120c2699238e99d57316eba86ebb2e845d4f3435 --- /dev/null +++ b/deploy/openvino/python/transforms/cls_transforms.py @@ -0,0 +1,281 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .ops import * +import random +import os.path as osp +import numpy as np +from PIL import Image, ImageEnhance + + +class ClsTransform: + """分类Transform的基类 + """ + + def __init__(self): + pass + + +class Compose(ClsTransform): + """根据数据预处理/增强算子对输入数据进行操作。 + 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 + + Args: + transforms (list): 数据预处理/增强算子。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + + def __init__(self, transforms): + if not isinstance(transforms, list): + raise TypeError('The transforms must be a list!') + if len(transforms) < 1: + raise ValueError('The length of transforms ' + \ + 'must be equal or larger than 1!') + self.transforms = transforms + + def __call__(self, im, label=None): + """ + Args: + im (str/np.ndarray): 图像路径/图像np.ndarray数据。 + label (int): 每张图像所对应的类别序号。 + Returns: + tuple: 根据网络所需字段所组成的tuple; + 字段由transforms中的最后一个数据预处理操作决定。 + """ + if isinstance(im, np.ndarray): + if len(im.shape) != 3: + raise Exception( + "im should be 3-dimension, but now is {}-dimensions". + format(len(im.shape))) + else: + try: + im = cv2.imread(im).astype('float32') + except: + raise TypeError('Can\'t read The image file {}!'.format(im)) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + for op in self.transforms: + outputs = op(im, label) + im = outputs[0] + if len(outputs) == 2: + label = outputs[1] + return outputs + + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + transform_names = [type(x).__name__ for x in self.transforms] + for aug in augmenters: + if type(aug).__name__ in transform_names: + print( + "{} is already in ComposedTransforms, need to remove it from add_augmenters().". + format(type(aug).__name__)) + self.transforms = augmenters + self.transforms + + +class Normalize(ClsTransform): + """对图像进行标准化。 + + 1. 对图像进行归一化到区间[0.0, 1.0]。 + 2. 对图像进行减均值除以标准差操作。 + + Args: + mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。 + std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。 + + """ + + def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]): + self.mean = mean + self.std = std + + def __call__(self, im, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + label (int): 每张图像所对应的类别序号。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; + 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 + """ + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + im = normalize(im, mean, std) + if label is None: + return (im, ) + else: + return (im, label) + + +class ResizeByShort(ClsTransform): + """根据图像短边对图像重新调整大小(resize)。 + + 1. 获取图像的长边和短边长度。 + 2. 根据短边与short_size的比例,计算长边的目标长度, + 此时高、宽的resize比例为short_size/原图短边长度。 + 3. 如果max_size>0,调整resize比例: + 如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度; + 4. 根据调整大小的比例对图像进行resize。 + + Args: + short_size (int): 调整大小后的图像目标短边长度。默认为256。 + max_size (int): 长边目标长度的最大限制。默认为-1。 + """ + + def __init__(self, short_size=256, max_size=-1): + self.short_size = short_size + self.max_size = max_size + + def __call__(self, im, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + label (int): 每张图像所对应的类别序号。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; + 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 + """ + im_short_size = min(im.shape[0], im.shape[1]) + im_long_size = max(im.shape[0], im.shape[1]) + scale = float(self.short_size) / im_short_size + if self.max_size > 0 and np.round(scale * + im_long_size) > self.max_size: + scale = float(self.max_size) / float(im_long_size) + resized_width = int(round(im.shape[1] * scale)) + resized_height = int(round(im.shape[0] * scale)) + im = cv2.resize( + im, (resized_width, resized_height), + interpolation=cv2.INTER_LINEAR) + + if label is None: + return (im, ) + else: + return (im, label) + + +class CenterCrop(ClsTransform): + """以图像中心点扩散裁剪长宽为`crop_size`的正方形 + + 1. 计算剪裁的起始点。 + 2. 剪裁图像。 + + Args: + crop_size (int): 裁剪的目标边长。默认为224。 + """ + + def __init__(self, crop_size=224): + self.crop_size = crop_size + + def __call__(self, im, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + label (int): 每张图像所对应的类别序号。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; + 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 + """ + im = center_crop(im, self.crop_size) + if label is None: + return (im, ) + else: + return (im, label) + + +class ArrangeClassifier(ClsTransform): + """获取训练/验证/预测所需信息。注意:此操作不需用户自己显示调用 + + Args: + mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。 + + Raises: + ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。 + """ + + def __init__(self, mode=None): + if mode not in ['train', 'eval', 'test', 'quant']: + raise ValueError( + "mode must be in ['train', 'eval', 'test', 'quant']!") + self.mode = mode + + def __call__(self, im, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + label (int): 每张图像所对应的类别序号。 + + Returns: + tuple: 当mode为'train'或'eval'时,返回(im, label),分别对应图像np.ndarray数据、 + 图像类别id;当mode为'test'或'quant'时,返回(im, ),对应图像np.ndarray数据。 + """ + im = permute(im, False).astype('float32') + if self.mode == 'train' or self.mode == 'eval': + outputs = (im, label) + else: + outputs = (im, ) + return outputs + + +class ComposedClsTransforms(Compose): + """ 分类模型的基础Transforms流程,具体如下 + 训练阶段: + 1. 随机从图像中crop一块子图,并resize成crop_size大小 + 2. 将1的输出按0.5的概率随机进行水平翻转 + 3. 将图像进行归一化 + 验证/预测阶段: + 1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14 + 2. 从图像中心crop出一个大小为crop_size的图像 + 3. 将图像进行归一化 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + crop_size(int|list): 输入模型里的图像大小 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + crop_size=[224, 224], + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + width = crop_size + if isinstance(crop_size, list): + if crop_size[0] != crop_size[1]: + raise Exception( + "In classifier model, width and height should be equal, please modify your parameter `crop_size`" + ) + width = crop_size[0] + if width % 32 != 0: + raise Exception( + "In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`" + ) + + if mode == 'train': + pass + else: + # 验证/预测时的transforms + transforms = [ + ResizeByShort(short_size=int(width * 1.14)), + CenterCrop(crop_size=width), Normalize( + mean=mean, std=std) + ] + + super(ComposedClsTransforms, self).__init__(transforms) diff --git a/deploy/openvino/python/transforms/det_transforms.py b/deploy/openvino/python/transforms/det_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..0e2d1dc30c0d0bb768839709da9cd74f2140d84a --- /dev/null +++ b/deploy/openvino/python/transforms/det_transforms.py @@ -0,0 +1,540 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + from collections.abc import Sequence +except Exception: + from collections import Sequence + +import random +import os.path as osp +import numpy as np + +import cv2 +from PIL import Image, ImageEnhance + +from .ops import * + + +class DetTransform: + """检测数据处理基类 + """ + + def __init__(self): + pass + + +class Compose(DetTransform): + """根据数据预处理/增强列表对输入数据进行操作。 + 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 + + Args: + transforms (list): 数据预处理/增强列表。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + + def __init__(self, transforms): + if not isinstance(transforms, list): + raise TypeError('The transforms must be a list!') + if len(transforms) < 1: + raise ValueError('The length of transforms ' + \ + 'must be equal or larger than 1!') + self.transforms = transforms + self.use_mixup = False + for t in self.transforms: + if type(t).__name__ == 'MixupImage': + self.use_mixup = True + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (str/np.ndarray): 图像路径/图像np.ndarray数据。 + im_info (dict): 存储与图像相关的信息,dict中的字段如下: + - im_id (np.ndarray): 图像序列号,形状为(1,)。 + - image_shape (np.ndarray): 图像原始大小,形状为(2,), + image_shape[0]为高,image_shape[1]为宽。 + - mixup (list): list为[im, im_info, label_info],分别对应 + 与当前图像进行mixup的图像np.ndarray数据、图像相关信息、标注框相关信息; + 注意,当前epoch若无需进行mixup,则无该字段。 + label_info (dict): 存储与标注框相关的信息,dict中的字段如下: + - gt_bbox (np.ndarray): 真实标注框坐标[x1, y1, x2, y2],形状为(n, 4), + 其中n代表真实标注框的个数。 + - gt_class (np.ndarray): 每个真实标注框对应的类别序号,形状为(n, 1), + 其中n代表真实标注框的个数。 + - gt_score (np.ndarray): 每个真实标注框对应的混合得分,形状为(n, 1), + 其中n代表真实标注框的个数。 + - gt_poly (list): 每个真实标注框内的多边形分割区域,每个分割区域由点的x、y坐标组成, + 长度为n,其中n代表真实标注框的个数。 + - is_crowd (np.ndarray): 每个真实标注框中是否是一组对象,形状为(n, 1), + 其中n代表真实标注框的个数。 + - difficult (np.ndarray): 每个真实标注框中的对象是否为难识别对象,形状为(n, 1), + 其中n代表真实标注框的个数。 + Returns: + tuple: 根据网络所需字段所组成的tuple; + 字段由transforms中的最后一个数据预处理操作决定。 + """ + + def decode_image(im_file, im_info, label_info): + if im_info is None: + im_info = dict() + if isinstance(im_file, np.ndarray): + if len(im_file.shape) != 3: + raise Exception( + "im should be 3-dimensions, but now is {}-dimensions". + format(len(im_file.shape))) + im = im_file + else: + try: + im = cv2.imread(im_file).astype('float32') + except: + raise TypeError('Can\'t read The image file {}!'.format( + im_file)) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + # make default im_info with [h, w, 1] + im_info['im_resize_info'] = np.array( + [im.shape[0], im.shape[1], 1.], dtype=np.float32) + im_info['image_shape'] = np.array([im.shape[0], + im.shape[1]]).astype('int32') + if not self.use_mixup: + if 'mixup' in im_info: + del im_info['mixup'] + # decode mixup image + if 'mixup' in im_info: + im_info['mixup'] = \ + decode_image(im_info['mixup'][0], + im_info['mixup'][1], + im_info['mixup'][2]) + if label_info is None: + return (im, im_info) + else: + return (im, im_info, label_info) + + outputs = decode_image(im, im_info, label_info) + im = outputs[0] + im_info = outputs[1] + if len(outputs) == 3: + label_info = outputs[2] + for op in self.transforms: + if im is None: + return None + outputs = op(im, im_info, label_info) + im = outputs[0] + return outputs + + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + transform_names = [type(x).__name__ for x in self.transforms] + for aug in augmenters: + if type(aug).__name__ in transform_names: + print( + "{} is already in ComposedTransforms, need to remove it from add_augmenters().". + format(type(aug).__name__)) + self.transforms = augmenters + self.transforms + + +class ResizeByShort(DetTransform): + """根据图像的短边调整图像大小(resize)。 + + 1. 获取图像的长边和短边长度。 + 2. 根据短边与short_size的比例,计算长边的目标长度, + 此时高、宽的resize比例为short_size/原图短边长度。 + 3. 如果max_size>0,调整resize比例: + 如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度。 + 4. 根据调整大小的比例对图像进行resize。 + + Args: + target_size (int): 短边目标长度。默认为800。 + max_size (int): 长边目标长度的最大限制。默认为1333。 + + Raises: + TypeError: 形参数据类型不满足需求。 + """ + + def __init__(self, short_size=800, max_size=1333): + self.max_size = int(max_size) + if not isinstance(short_size, int): + raise TypeError( + "Type of short_size is invalid. Must be Integer, now is {}". + format(type(short_size))) + self.short_size = short_size + if not (isinstance(self.max_size, int)): + raise TypeError("max_size: input type is invalid.") + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (numnp.ndarraypy): 图像np.ndarray数据。 + im_info (dict, 可选): 存储与图像相关的信息。 + label_info (dict, 可选): 存储与标注框相关的信息。 + + Returns: + tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 + 存储与标注框相关信息的字典。 + 其中,im_info更新字段为: + - im_resize_info (np.ndarray): resize后的图像高、resize后的图像宽、resize后的图像相对原始图的缩放比例 + 三者组成的np.ndarray,形状为(3,)。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + if im_info is None: + im_info = dict() + if not isinstance(im, np.ndarray): + raise TypeError("ResizeByShort: image type is not numpy.") + if len(im.shape) != 3: + raise ValueError('ResizeByShort: image is not 3-dimensional.') + im_short_size = min(im.shape[0], im.shape[1]) + im_long_size = max(im.shape[0], im.shape[1]) + scale = float(self.short_size) / im_short_size + if self.max_size > 0 and np.round(scale * + im_long_size) > self.max_size: + scale = float(self.max_size) / float(im_long_size) + resized_width = int(round(im.shape[1] * scale)) + resized_height = int(round(im.shape[0] * scale)) + im_resize_info = [resized_height, resized_width, scale] + im = cv2.resize( + im, (resized_width, resized_height), + interpolation=cv2.INTER_LINEAR) + im_info['im_resize_info'] = np.array(im_resize_info).astype(np.float32) + if label_info is None: + return (im, im_info) + else: + return (im, im_info, label_info) + + +class Padding(DetTransform): + """1.将图像的长和宽padding至coarsest_stride的倍数。如输入图像为[300, 640], + `coarest_stride`为32,则由于300不为32的倍数,因此在图像最右和最下使用0值 + 进行padding,最终输出图像为[320, 640]。 + 2.或者,将图像的长和宽padding到target_size指定的shape,如输入的图像为[300,640], + a. `target_size` = 960,在图像最右和最下使用0值进行padding,最终输出 + 图像为[960, 960]。 + b. `target_size` = [640, 960],在图像最右和最下使用0值进行padding,最终 + 输出图像为[640, 960]。 + + 1. 如果coarsest_stride为1,target_size为None则直接返回。 + 2. 获取图像的高H、宽W。 + 3. 计算填充后图像的高H_new、宽W_new。 + 4. 构建大小为(H_new, W_new, 3)像素值为0的np.ndarray, + 并将原图的np.ndarray粘贴于左上角。 + + Args: + coarsest_stride (int): 填充后的图像长、宽为该参数的倍数,默认为1。 + target_size (int|list|tuple): 填充后的图像长、宽,默认为None,coarset_stride优先级更高。 + + Raises: + TypeError: 形参`target_size`数据类型不满足需求。 + ValueError: 形参`target_size`为(list|tuple)时,长度不满足需求。 + """ + + def __init__(self, coarsest_stride=1, target_size=None): + self.coarsest_stride = coarsest_stride + if target_size is not None: + if not isinstance(target_size, int): + if not isinstance(target_size, tuple) and not isinstance( + target_size, list): + raise TypeError( + "Padding: Type of target_size must in (int|list|tuple)." + ) + elif len(target_size) != 2: + raise ValueError( + "Padding: Length of target_size must equal 2.") + self.target_size = target_size + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (numnp.ndarraypy): 图像np.ndarray数据。 + im_info (dict, 可选): 存储与图像相关的信息。 + label_info (dict, 可选): 存储与标注框相关的信息。 + + Returns: + tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 + 存储与标注框相关信息的字典。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + ValueError: coarsest_stride,target_size需有且只有一个被指定。 + ValueError: target_size小于原图的大小。 + """ + if im_info is None: + im_info = dict() + if not isinstance(im, np.ndarray): + raise TypeError("Padding: image type is not numpy.") + if len(im.shape) != 3: + raise ValueError('Padding: image is not 3-dimensional.') + im_h, im_w, im_c = im.shape[:] + + if isinstance(self.target_size, int): + padding_im_h = self.target_size + padding_im_w = self.target_size + elif isinstance(self.target_size, list) or isinstance(self.target_size, + tuple): + padding_im_w = self.target_size[0] + padding_im_h = self.target_size[1] + elif self.coarsest_stride > 0: + padding_im_h = int( + np.ceil(im_h / self.coarsest_stride) * self.coarsest_stride) + padding_im_w = int( + np.ceil(im_w / self.coarsest_stride) * self.coarsest_stride) + else: + raise ValueError( + "coarsest_stridei(>1) or target_size(list|int) need setting in Padding transform" + ) + pad_height = padding_im_h - im_h + pad_width = padding_im_w - im_w + if pad_height < 0 or pad_width < 0: + raise ValueError( + 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' + .format(im_w, im_h, padding_im_w, padding_im_h)) + padding_im = np.zeros( + (padding_im_h, padding_im_w, im_c), dtype=np.float32) + padding_im[:im_h, :im_w, :] = im + if label_info is None: + return (padding_im, im_info) + else: + return (padding_im, im_info, label_info) + + +class Resize(DetTransform): + """调整图像大小(resize)。 + + - 当目标大小(target_size)类型为int时,根据插值方式, + 将图像resize为[target_size, target_size]。 + - 当目标大小(target_size)类型为list或tuple时,根据插值方式, + 将图像resize为target_size。 + 注意:当插值方式为“RANDOM”时,则随机选取一种插值方式进行resize。 + + Args: + target_size (int/list/tuple): 短边目标长度。默认为608。 + interp (str): resize的插值方式,与opencv的插值方式对应,取值范围为 + ['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC', + 'AREA', 'LANCZOS4', 'RANDOM']中。 + """ + + # The interpolation mode + interp_dict = { + 'NEAREST': cv2.INTER_NEAREST, + 'LINEAR': cv2.INTER_LINEAR, + 'CUBIC': cv2.INTER_CUBIC, + 'AREA': cv2.INTER_AREA, + 'LANCZOS4': cv2.INTER_LANCZOS4 + } + + def __init__(self, target_size=608, interp='LINEAR'): + self.interp = interp + if not (interp == "RANDOM" or interp in self.interp_dict): + raise ValueError("interp should be one of {}".format( + self.interp_dict.keys())) + if isinstance(target_size, list) or isinstance(target_size, tuple): + if len(target_size) != 2: + raise TypeError( + 'when target is list or tuple, it should include 2 elements, but it is {}' + .format(target_size)) + elif not isinstance(target_size, int): + raise TypeError( + "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" + .format(type(target_size))) + + self.target_size = target_size + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (dict, 可选): 存储与图像相关的信息。 + label_info (dict, 可选): 存储与标注框相关的信息。 + + Returns: + tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 + 存储与标注框相关信息的字典。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + if im_info is None: + im_info = dict() + if not isinstance(im, np.ndarray): + raise TypeError("Resize: image type is not numpy.") + if len(im.shape) != 3: + raise ValueError('Resize: image is not 3-dimensional.') + if self.interp == "RANDOM": + interp = random.choice(list(self.interp_dict.keys())) + else: + interp = self.interp + im = resize(im, self.target_size, self.interp_dict[interp]) + if label_info is None: + return (im, im_info) + else: + return (im, im_info, label_info) + + +class Normalize(DetTransform): + """对图像进行标准化。 + + 1. 归一化图像到到区间[0.0, 1.0]。 + 2. 对图像进行减均值除以标准差操作。 + + Args: + mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。 + std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。 + + Raises: + TypeError: 形参数据类型不满足需求。 + """ + + def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]): + self.mean = mean + self.std = std + if not (isinstance(self.mean, list) and isinstance(self.std, list)): + raise TypeError("NormalizeImage: input type is invalid.") + from functools import reduce + if reduce(lambda x, y: x * y, self.std) == 0: + raise TypeError('NormalizeImage: std is invalid!') + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (numnp.ndarraypy): 图像np.ndarray数据。 + im_info (dict, 可选): 存储与图像相关的信息。 + label_info (dict, 可选): 存储与标注框相关的信息。 + + Returns: + tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 + 存储与标注框相关信息的字典。 + """ + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + im = normalize(im, mean, std) + if label_info is None: + return (im, im_info) + else: + return (im, im_info, label_info) + + +class ArrangeYOLOv3(DetTransform): + """获取YOLOv3模型训练/验证/预测所需信息。 + + Args: + mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。 + + Raises: + ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。 + """ + + def __init__(self, mode=None): + if mode not in ['train', 'eval', 'test', 'quant']: + raise ValueError( + "mode must be in ['train', 'eval', 'test', 'quant']!") + self.mode = mode + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (dict, 可选): 存储与图像相关的信息。 + label_info (dict, 可选): 存储与标注框相关的信息。 + + Returns: + tuple: 当mode为'train'时,返回(im, gt_bbox, gt_class, gt_score, im_shape),分别对应 + 图像np.ndarray数据、真实标注框、真实标注框对应的类别、真实标注框混合得分、图像大小信息; + 当mode为'eval'时,返回(im, im_shape, im_id, gt_bbox, gt_class, difficult), + 分别对应图像np.ndarray数据、图像大小信息、图像id、真实标注框、真实标注框对应的类别、 + 真实标注框是否为难识别对象;当mode为'test'或'quant'时,返回(im, im_shape), + 分别对应图像np.ndarray数据、图像大小信息。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + im = permute(im, False) + if self.mode == 'train': + pass + elif self.mode == 'eval': + pass + else: + if im_info is None: + raise TypeError('Cannot do ArrangeYolov3! ' + + 'Becasuse the im_info can not be None!') + im_shape = im_info['image_shape'] + outputs = (im, im_shape) + return outputs + + +class ComposedYOLOv3Transforms(Compose): + """YOLOv3模型的图像预处理流程,具体如下, + 训练阶段: + 1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage + 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调 + 3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand + 4. 随机裁剪图像 + 5. 将4步骤的输出图像Resize成shape参数的大小 + 6. 随机0.5的概率水平翻转图像 + 7. 图像归一化 + 验证/预测阶段: + 1. 将图像Resize成shape参数大小 + 2. 图像归一化 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小 + mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + shape=[608, 608], + mixup_epoch=250, + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + width = shape + if isinstance(shape, list): + if shape[0] != shape[1]: + raise Exception( + "In YOLOv3 model, width and height should be equal") + width = shape[0] + if width % 32 != 0: + raise Exception( + "In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...." + ) + + if mode == 'train': + # 训练时的transforms,包含数据增强 + pass + else: + # 验证/预测时的transforms + transforms = [ + Resize( + target_size=width, interp='CUBIC'), Normalize( + mean=mean, std=std) + ] + super(ComposedYOLOv3Transforms, self).__init__(transforms) diff --git a/deploy/openvino/python/transforms/ops.py b/deploy/openvino/python/transforms/ops.py new file mode 100644 index 0000000000000000000000000000000000000000..3f298d7824be48355b69973a1e14486172efcb08 --- /dev/null +++ b/deploy/openvino/python/transforms/ops.py @@ -0,0 +1,186 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import math +import numpy as np +from PIL import Image, ImageEnhance + + +def normalize(im, mean, std): + im = im / 255.0 + im -= mean + im /= std + return im + + +def permute(im, to_bgr=False): + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + if to_bgr: + im = im[[2, 1, 0], :, :] + return im + + +def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR): + value = max(im.shape[0], im.shape[1]) + scale = float(long_size) / float(value) + resized_width = int(round(im.shape[1] * scale)) + resized_height = int(round(im.shape[0] * scale)) + + im = cv2.resize( + im, (resized_width, resized_height), interpolation=interpolation) + return im + + +def resize(im, target_size=608, interp=cv2.INTER_LINEAR): + if isinstance(target_size, list) or isinstance(target_size, tuple): + w = target_size[0] + h = target_size[1] + else: + w = target_size + h = target_size + im = cv2.resize(im, (w, h), interpolation=interp) + return im + + +def random_crop(im, + crop_size=224, + lower_scale=0.08, + lower_ratio=3. / 4, + upper_ratio=4. / 3): + scale = [lower_scale, 1.0] + ratio = [lower_ratio, upper_ratio] + aspect_ratio = math.sqrt(np.random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + bound = min((float(im.shape[0]) / im.shape[1]) / (h**2), + (float(im.shape[1]) / im.shape[0]) / (w**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + target_area = im.shape[0] * im.shape[1] * np.random.uniform( + scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + i = np.random.randint(0, im.shape[0] - h + 1) + j = np.random.randint(0, im.shape[1] - w + 1) + im = im[i:i + h, j:j + w, :] + im = cv2.resize(im, (crop_size, crop_size)) + return im + + +def center_crop(im, crop_size=224): + height, width = im.shape[:2] + w_start = (width - crop_size) // 2 + h_start = (height - crop_size) // 2 + w_end = w_start + crop_size + h_end = h_start + crop_size + im = im[h_start:h_end, w_start:w_end, :] + return im + + +def horizontal_flip(im): + if len(im.shape) == 3: + im = im[:, ::-1, :] + elif len(im.shape) == 2: + im = im[:, ::-1] + return im + + +def vertical_flip(im): + if len(im.shape) == 3: + im = im[::-1, :, :] + elif len(im.shape) == 2: + im = im[::-1, :] + return im + + +def bgr2rgb(im): + return im[:, :, ::-1] + + +def hue(im, hue_lower, hue_upper): + delta = np.random.uniform(hue_lower, hue_upper) + u = np.cos(delta * np.pi) + w = np.sin(delta * np.pi) + bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]]) + tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321], + [0.211, -0.523, 0.311]]) + ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647], + [1.0, -1.107, 1.705]]) + t = np.dot(np.dot(ityiq, bt), tyiq).T + im = np.dot(im, t) + return im + + +def saturation(im, saturation_lower, saturation_upper): + delta = np.random.uniform(saturation_lower, saturation_upper) + gray = im * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32) + gray = gray.sum(axis=2, keepdims=True) + gray *= (1.0 - delta) + im *= delta + im += gray + return im + + +def contrast(im, contrast_lower, contrast_upper): + delta = np.random.uniform(contrast_lower, contrast_upper) + im *= delta + return im + + +def brightness(im, brightness_lower, brightness_upper): + delta = np.random.uniform(brightness_lower, brightness_upper) + im += delta + return im + +def rotate(im, rotate_lower, rotate_upper): + rotate_delta = np.random.uniform(rotate_lower, rotate_upper) + im = im.rotate(int(rotate_delta)) + return im + + +def resize_padding(im, max_side_len=2400): + ''' + resize image to a size multiple of 32 which is required by the network + :param im: the resized image + :param max_side_len: limit of max image size to avoid out of memory in gpu + :return: the resized image and the resize ratio + ''' + h, w, _ = im.shape + + resize_w = w + resize_h = h + + # limit the max side + if max(resize_h, resize_w) > max_side_len: + ratio = float( + max_side_len) / resize_h if resize_h > resize_w else float( + max_side_len) / resize_w + else: + ratio = 1. + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + + resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32 + resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32 + resize_h = max(32, resize_h) + resize_w = max(32, resize_w) + im = cv2.resize(im, (int(resize_w), int(resize_h))) + #im = cv2.resize(im, (512, 512)) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + _ratio = np.array([ratio_h, ratio_w]).reshape(-1, 2) + return im, _ratio diff --git a/deploy/openvino/python/transforms/seg_transforms.py b/deploy/openvino/python/transforms/seg_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..a3fb6241d415939a33f73a29b843f9ed45976463 --- /dev/null +++ b/deploy/openvino/python/transforms/seg_transforms.py @@ -0,0 +1,1054 @@ +# coding: utf8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .ops import * +import random +import os.path as osp +import numpy as np +from PIL import Image +import cv2 +from collections import OrderedDict + + +class SegTransform: + """ 分割transform基类 + """ + + def __init__(self): + pass + + +class Compose(SegTransform): + """根据数据预处理/增强算子对输入数据进行操作。 + 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 + + Args: + transforms (list): 数据预处理/增强算子。 + + Raises: + TypeError: transforms不是list对象 + ValueError: transforms元素个数小于1。 + + """ + + def __init__(self, transforms): + if not isinstance(transforms, list): + raise TypeError('The transforms must be a list!') + if len(transforms) < 1: + raise ValueError('The length of transforms ' + \ + 'must be equal or larger than 1!') + self.transforms = transforms + self.to_rgb = False + + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (str/np.ndarray): 图像路径/图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。 + + Returns: + tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。 + """ + + if im_info is None: + im_info = list() + if isinstance(im, np.ndarray): + if len(im.shape) != 3: + raise Exception( + "im should be 3-dimensions, but now is {}-dimensions". + format(len(im.shape))) + else: + try: + im = cv2.imread(im).astype('float32') + except: + raise ValueError('Can\'t read The image file {}!'.format(im)) + if self.to_rgb: + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + if label is not None: + if not isinstance(label, np.ndarray): + label = np.asarray(Image.open(label)) + for op in self.transforms: + if isinstance(op, SegTransform): + outputs = op(im, im_info, label) + im = outputs[0] + if len(outputs) >= 2: + im_info = outputs[1] + if len(outputs) == 3: + label = outputs[2] + else: + im = execute_imgaug(op, im) + if label is not None: + outputs = (im, im_info, label) + else: + outputs = (im, im_info) + return outputs + + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + transform_names = [type(x).__name__ for x in self.transforms] + for aug in augmenters: + if type(aug).__name__ in transform_names: + print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) + self.transforms = augmenters + self.transforms + + +class RandomHorizontalFlip(SegTransform): + """以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。 + + Args: + prob (float): 随机水平翻转的概率。默认值为0.5。 + + """ + + def __init__(self, prob=0.5): + self.prob = prob + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if random.random() < self.prob: + im = horizontal_flip(im) + if label is not None: + label = horizontal_flip(label) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class RandomVerticalFlip(SegTransform): + """以一定的概率对图像进行垂直翻转。当存在标注图像时,则同步进行翻转。 + + Args: + prob (float): 随机垂直翻转的概率。默认值为0.1。 + """ + + def __init__(self, prob=0.1): + self.prob = prob + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if random.random() < self.prob: + im = vertical_flip(im) + if label is not None: + label = vertical_flip(label) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class Resize(SegTransform): + """调整图像大小(resize),当存在标注图像时,则同步进行处理。 + + - 当目标大小(target_size)类型为int时,根据插值方式, + 将图像resize为[target_size, target_size]。 + - 当目标大小(target_size)类型为list或tuple时,根据插值方式, + 将图像resize为target_size, target_size的输入应为[w, h]或(w, h)。 + + Args: + target_size (int|list|tuple): 目标大小。 + interp (str): resize的插值方式,与opencv的插值方式对应, + 可选的值为['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4'],默认为"LINEAR"。 + + Raises: + TypeError: target_size不是int/list/tuple。 + ValueError: target_size为list/tuple时元素个数不等于2。 + AssertionError: interp的取值不在['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4']之内。 + """ + + # The interpolation mode + interp_dict = { + 'NEAREST': cv2.INTER_NEAREST, + 'LINEAR': cv2.INTER_LINEAR, + 'CUBIC': cv2.INTER_CUBIC, + 'AREA': cv2.INTER_AREA, + 'LANCZOS4': cv2.INTER_LANCZOS4 + } + + def __init__(self, target_size, interp='LINEAR'): + self.interp = interp + assert interp in self.interp_dict, "interp should be one of {}".format( + interp_dict.keys()) + if isinstance(target_size, list) or isinstance(target_size, tuple): + if len(target_size) != 2: + raise ValueError( + 'when target is list or tuple, it should include 2 elements, but it is {}' + .format(target_size)) + elif not isinstance(target_size, int): + raise TypeError( + "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" + .format(type(target_size))) + + self.target_size = target_size + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + 其中,im_info跟新字段为: + -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 + + Raises: + ZeroDivisionError: im的短边为0。 + TypeError: im不是np.ndarray数据。 + ValueError: im不是3维nd.ndarray。 + """ + if im_info is None: + im_info = OrderedDict() + im_info.append(('resize', im.shape[:2])) + + if not isinstance(im, np.ndarray): + raise TypeError("ResizeImage: image type is not np.ndarray.") + if len(im.shape) != 3: + raise ValueError('ResizeImage: image is not 3-dimensional.') + im_shape = im.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if float(im_size_min) == 0: + raise ZeroDivisionError('ResizeImage: min size of image is 0') + + if isinstance(self.target_size, int): + resize_w = self.target_size + resize_h = self.target_size + else: + resize_w = self.target_size[0] + resize_h = self.target_size[1] + im_scale_x = float(resize_w) / float(im_shape[1]) + im_scale_y = float(resize_h) / float(im_shape[0]) + + im = cv2.resize( + im, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp_dict[self.interp]) + if label is not None: + label = cv2.resize( + label, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp_dict['NEAREST']) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class ResizeByLong(SegTransform): + """对图像长边resize到固定值,短边按比例进行缩放。当存在标注图像时,则同步进行处理。 + + Args: + long_size (int): resize后图像的长边大小。 + """ + + def __init__(self, long_size): + self.long_size = long_size + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + 其中,im_info新增字段为: + -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 + """ + if im_info is None: + im_info = OrderedDict() + + im_info.append(('resize', im.shape[:2])) + im = resize_long(im, self.long_size) + if label is not None: + label = resize_long(label, self.long_size, cv2.INTER_NEAREST) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class ResizeByShort(SegTransform): + """根据图像的短边调整图像大小(resize)。 + + 1. 获取图像的长边和短边长度。 + 2. 根据短边与short_size的比例,计算长边的目标长度, + 此时高、宽的resize比例为short_size/原图短边长度。 + 3. 如果max_size>0,调整resize比例: + 如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度。 + 4. 根据调整大小的比例对图像进行resize。 + + Args: + target_size (int): 短边目标长度。默认为800。 + max_size (int): 长边目标长度的最大限制。默认为1333。 + + Raises: + TypeError: 形参数据类型不满足需求。 + """ + + def __init__(self, short_size=800, max_size=1333): + self.max_size = int(max_size) + if not isinstance(short_size, int): + raise TypeError( + "Type of short_size is invalid. Must be Integer, now is {}". + format(type(short_size))) + self.short_size = short_size + if not (isinstance(self.max_size, int)): + raise TypeError("max_size: input type is invalid.") + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (numnp.ndarraypy): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + 其中,im_info更新字段为: + -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + if im_info is None: + im_info = OrderedDict() + if not isinstance(im, np.ndarray): + raise TypeError("ResizeByShort: image type is not numpy.") + if len(im.shape) != 3: + raise ValueError('ResizeByShort: image is not 3-dimensional.') + im_info.append(('resize', im.shape[:2])) + im_short_size = min(im.shape[0], im.shape[1]) + im_long_size = max(im.shape[0], im.shape[1]) + scale = float(self.short_size) / im_short_size + if self.max_size > 0 and np.round(scale * + im_long_size) > self.max_size: + scale = float(self.max_size) / float(im_long_size) + resized_width = int(round(im.shape[1] * scale)) + resized_height = int(round(im.shape[0] * scale)) + im = cv2.resize( + im, (resized_width, resized_height), + interpolation=cv2.INTER_NEAREST) + if label is not None: + im = cv2.resize( + label, (resized_width, resized_height), + interpolation=cv2.INTER_NEAREST) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class ResizeRangeScaling(SegTransform): + """对图像长边随机resize到指定范围内,短边按比例进行缩放。当存在标注图像时,则同步进行处理。 + + Args: + min_value (int): 图像长边resize后的最小值。默认值400。 + max_value (int): 图像长边resize后的最大值。默认值600。 + + Raises: + ValueError: min_value大于max_value + """ + + def __init__(self, min_value=400, max_value=600): + if min_value > max_value: + raise ValueError('min_value must be less than max_value, ' + 'but they are {} and {}.'.format(min_value, + max_value)) + self.min_value = min_value + self.max_value = max_value + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if self.min_value == self.max_value: + random_size = self.max_value + else: + random_size = int( + np.random.uniform(self.min_value, self.max_value) + 0.5) + im = resize_long(im, random_size, cv2.INTER_LINEAR) + if label is not None: + label = resize_long(label, random_size, cv2.INTER_NEAREST) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class ResizeStepScaling(SegTransform): + """对图像按照某一个比例resize,这个比例以scale_step_size为步长 + 在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时,则同步进行处理。 + + Args: + min_scale_factor(float), resize最小尺度。默认值0.75。 + max_scale_factor (float), resize最大尺度。默认值1.25。 + scale_step_size (float), resize尺度范围间隔。默认值0.25。 + + Raises: + ValueError: min_scale_factor大于max_scale_factor + """ + + def __init__(self, + min_scale_factor=0.75, + max_scale_factor=1.25, + scale_step_size=0.25): + if min_scale_factor > max_scale_factor: + raise ValueError( + 'min_scale_factor must be less than max_scale_factor, ' + 'but they are {} and {}.'.format(min_scale_factor, + max_scale_factor)) + self.min_scale_factor = min_scale_factor + self.max_scale_factor = max_scale_factor + self.scale_step_size = scale_step_size + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if self.min_scale_factor == self.max_scale_factor: + scale_factor = self.min_scale_factor + + elif self.scale_step_size == 0: + scale_factor = np.random.uniform(self.min_scale_factor, + self.max_scale_factor) + + else: + num_steps = int((self.max_scale_factor - self.min_scale_factor) / + self.scale_step_size + 1) + scale_factors = np.linspace(self.min_scale_factor, + self.max_scale_factor, + num_steps).tolist() + np.random.shuffle(scale_factors) + scale_factor = scale_factors[0] + + im = cv2.resize( + im, (0, 0), + fx=scale_factor, + fy=scale_factor, + interpolation=cv2.INTER_LINEAR) + if label is not None: + label = cv2.resize( + label, (0, 0), + fx=scale_factor, + fy=scale_factor, + interpolation=cv2.INTER_NEAREST) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class Normalize(SegTransform): + """对图像进行标准化。 + 1.尺度缩放到 [0,1]。 + 2.对图像进行减均值除以标准差操作。 + + Args: + mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。 + std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。 + + Raises: + ValueError: mean或std不是list对象。std包含0。 + """ + + def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]): + self.mean = mean + self.std = std + if not (isinstance(self.mean, list) and isinstance(self.std, list)): + raise ValueError("{}: input type is invalid.".format(self)) + from functools import reduce + if reduce(lambda x, y: x * y, self.std) == 0: + raise ValueError('{}: std is invalid!'.format(self)) + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + im = normalize(im, mean, std) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class Padding(SegTransform): + """对图像或标注图像进行padding,padding方向为右和下。 + 根据提供的值对图像或标注图像进行padding操作。 + + Args: + target_size (int|list|tuple): padding后图像的大小。 + im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。 + label_padding_value (int): 标注图像padding的值。默认值为255。 + + Raises: + TypeError: target_size不是int|list|tuple。 + ValueError: target_size为list|tuple时元素个数不等于2。 + """ + + def __init__(self, + target_size, + im_padding_value=[127.5, 127.5, 127.5], + label_padding_value=255): + if isinstance(target_size, list) or isinstance(target_size, tuple): + if len(target_size) != 2: + raise ValueError( + 'when target is list or tuple, it should include 2 elements, but it is {}' + .format(target_size)) + elif not isinstance(target_size, int): + raise TypeError( + "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" + .format(type(target_size))) + self.target_size = target_size + self.im_padding_value = im_padding_value + self.label_padding_value = label_padding_value + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + 其中,im_info新增字段为: + -shape_before_padding (tuple): 保存padding之前图像的形状(h, w)。 + + Raises: + ValueError: 输入图像im或label的形状大于目标值 + """ + if im_info is None: + im_info = OrderedDict() + im_info.append(('padding', im.shape[:2])) + + im_height, im_width = im.shape[0], im.shape[1] + if isinstance(self.target_size, int): + target_height = self.target_size + target_width = self.target_size + else: + target_height = self.target_size[1] + target_width = self.target_size[0] + pad_height = target_height - im_height + pad_width = target_width - im_width + if pad_height < 0 or pad_width < 0: + raise ValueError( + 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' + .format(im_width, im_height, target_width, target_height)) + else: + im = cv2.copyMakeBorder( + im, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.im_padding_value) + if label is not None: + label = cv2.copyMakeBorder( + label, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.label_padding_value) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class RandomPaddingCrop(SegTransform): + """对图像和标注图进行随机裁剪,当所需要的裁剪尺寸大于原图时,则进行padding操作。 + + Args: + crop_size (int|list|tuple): 裁剪图像大小。默认为512。 + im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。 + label_padding_value (int): 标注图像padding的值。默认值为255。 + + Raises: + TypeError: crop_size不是int/list/tuple。 + ValueError: target_size为list/tuple时元素个数不等于2。 + """ + + def __init__(self, + crop_size=512, + im_padding_value=[127.5, 127.5, 127.5], + label_padding_value=255): + if isinstance(crop_size, list) or isinstance(crop_size, tuple): + if len(crop_size) != 2: + raise ValueError( + 'when crop_size is list or tuple, it should include 2 elements, but it is {}' + .format(crop_size)) + elif not isinstance(crop_size, int): + raise TypeError( + "Type of crop_size is invalid. Must be Integer or List or tuple, now is {}" + .format(type(crop_size))) + self.crop_size = crop_size + self.im_padding_value = im_padding_value + self.label_padding_value = label_padding_value + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if isinstance(self.crop_size, int): + crop_width = self.crop_size + crop_height = self.crop_size + else: + crop_width = self.crop_size[0] + crop_height = self.crop_size[1] + + img_height = im.shape[0] + img_width = im.shape[1] + + if img_height == crop_height and img_width == crop_width: + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + else: + pad_height = max(crop_height - img_height, 0) + pad_width = max(crop_width - img_width, 0) + if (pad_height > 0 or pad_width > 0): + im = cv2.copyMakeBorder( + im, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.im_padding_value) + if label is not None: + label = cv2.copyMakeBorder( + label, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.label_padding_value) + img_height = im.shape[0] + img_width = im.shape[1] + + if crop_height > 0 and crop_width > 0: + h_off = np.random.randint(img_height - crop_height + 1) + w_off = np.random.randint(img_width - crop_width + 1) + + im = im[h_off:(crop_height + h_off), w_off:(w_off + crop_width + ), :] + if label is not None: + label = label[h_off:(crop_height + h_off), w_off:( + w_off + crop_width)] + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class RandomBlur(SegTransform): + """以一定的概率对图像进行高斯模糊。 + + Args: + prob (float): 图像模糊概率。默认为0.1。 + """ + + def __init__(self, prob=0.1): + self.prob = prob + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if self.prob <= 0: + n = 0 + elif self.prob >= 1: + n = 1 + else: + n = int(1.0 / self.prob) + if n > 0: + if np.random.randint(0, n) == 0: + radius = np.random.randint(3, 10) + if radius % 2 != 1: + radius = radius + 1 + if radius > 9: + radius = 9 + im = cv2.GaussianBlur(im, (radius, radius), 0, 0) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + + + +class RandomScaleAspect(SegTransform): + """裁剪并resize回原始尺寸的图像和标注图像。 + 按照一定的面积比和宽高比对图像进行裁剪,并reszie回原始图像的图像,当存在标注图时,同步进行。 + + Args: + min_scale (float):裁取图像占原始图像的面积比,取值[0,1],为0时则返回原图。默认为0.5。 + aspect_ratio (float): 裁取图像的宽高比范围,非负值,为0时返回原图。默认为0.33。 + """ + + def __init__(self, min_scale=0.5, aspect_ratio=0.33): + self.min_scale = min_scale + self.aspect_ratio = aspect_ratio + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if self.min_scale != 0 and self.aspect_ratio != 0: + img_height = im.shape[0] + img_width = im.shape[1] + for i in range(0, 10): + area = img_height * img_width + target_area = area * np.random.uniform(self.min_scale, 1.0) + aspectRatio = np.random.uniform(self.aspect_ratio, + 1.0 / self.aspect_ratio) + + dw = int(np.sqrt(target_area * 1.0 * aspectRatio)) + dh = int(np.sqrt(target_area * 1.0 / aspectRatio)) + if (np.random.randint(10) < 5): + tmp = dw + dw = dh + dh = tmp + + if (dh < img_height and dw < img_width): + h1 = np.random.randint(0, img_height - dh) + w1 = np.random.randint(0, img_width - dw) + + im = im[h1:(h1 + dh), w1:(w1 + dw), :] + label = label[h1:(h1 + dh), w1:(w1 + dw)] + im = cv2.resize( + im, (img_width, img_height), + interpolation=cv2.INTER_LINEAR) + label = cv2.resize( + label, (img_width, img_height), + interpolation=cv2.INTER_NEAREST) + break + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class RandomDistort(SegTransform): + """对图像进行随机失真。 + + 1. 对变换的操作顺序进行随机化操作。 + 2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。 + + Args: + brightness_range (float): 明亮度因子的范围。默认为0.5。 + brightness_prob (float): 随机调整明亮度的概率。默认为0.5。 + contrast_range (float): 对比度因子的范围。默认为0.5。 + contrast_prob (float): 随机调整对比度的概率。默认为0.5。 + saturation_range (float): 饱和度因子的范围。默认为0.5。 + saturation_prob (float): 随机调整饱和度的概率。默认为0.5。 + hue_range (int): 色调因子的范围。默认为18。 + hue_prob (float): 随机调整色调的概率。默认为0.5。 + """ + + def __init__(self, + brightness_range=0.5, + brightness_prob=0.5, + contrast_range=0.5, + contrast_prob=0.5, + saturation_range=0.5, + saturation_prob=0.5, + hue_range=18, + hue_prob=0.5): + self.brightness_range = brightness_range + self.brightness_prob = brightness_prob + self.contrast_range = contrast_range + self.contrast_prob = contrast_prob + self.saturation_range = saturation_range + self.saturation_prob = saturation_prob + self.hue_range = hue_range + self.hue_prob = hue_prob + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + brightness_lower = 1 - self.brightness_range + brightness_upper = 1 + self.brightness_range + contrast_lower = 1 - self.contrast_range + contrast_upper = 1 + self.contrast_range + saturation_lower = 1 - self.saturation_range + saturation_upper = 1 + self.saturation_range + hue_lower = -self.hue_range + hue_upper = self.hue_range + ops = [brightness, contrast, saturation, hue] + random.shuffle(ops) + params_dict = { + 'brightness': { + 'brightness_lower': brightness_lower, + 'brightness_upper': brightness_upper + }, + 'contrast': { + 'contrast_lower': contrast_lower, + 'contrast_upper': contrast_upper + }, + 'saturation': { + 'saturation_lower': saturation_lower, + 'saturation_upper': saturation_upper + }, + 'hue': { + 'hue_lower': hue_lower, + 'hue_upper': hue_upper + } + } + prob_dict = { + 'brightness': self.brightness_prob, + 'contrast': self.contrast_prob, + 'saturation': self.saturation_prob, + 'hue': self.hue_prob + } + for id in range(4): + params = params_dict[ops[id].__name__] + prob = prob_dict[ops[id].__name__] + params['im'] = im + if np.random.uniform(0, 1) < prob: + im = ops[id](**params) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class ArrangeSegmenter(SegTransform): + """获取训练/验证/预测所需的信息。 + + Args: + mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。 + + Raises: + ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内 + """ + + def __init__(self, mode): + if mode not in ['train', 'eval', 'test', 'quant']: + raise ValueError( + "mode should be defined as one of ['train', 'eval', 'test', 'quant']!" + ) + self.mode = mode + + def __call__(self, im, im_info, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当mode为'train'或'eval'时,返回的tuple为(im, label),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当mode为'test'时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;当mode为 + 'quant'时,返回的tuple为(im,),为图像np.ndarray数据。 + """ + im = permute(im, False) + if self.mode == 'train' or self.mode == 'eval': + label = label[np.newaxis, :, :] + return (im, label) + elif self.mode == 'test': + return (im, im_info) + else: + return (im, ) + + +class ComposedSegTransforms(Compose): + """ 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下 + 训练阶段: + 1. 随机对图像以0.5的概率水平翻转 + 2. 按不同的比例随机Resize原图 + 3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小 + 4. 图像归一化 + 预测阶段: + 1. 图像归一化 + + Args: + mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + train_crop_size(list): 模型训练阶段,随机从原图crop的大小 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + train_crop_size=[769, 769], + mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]): + if mode == 'train': + # 训练时的transforms,包含数据增强 + pass + else: + # 验证/预测时的transforms + transforms = [Normalize(mean=mean, std=std)] + + super(ComposedSegTransforms, self).__init__(transforms) diff --git a/deploy/openvino/scripts/bootstrap.sh b/deploy/openvino/scripts/bootstrap.sh deleted file mode 100644 index f9fc1d1edc327370f7b5d8e7494cb88d4fd4d12c..0000000000000000000000000000000000000000 --- a/deploy/openvino/scripts/bootstrap.sh +++ /dev/null @@ -1,10 +0,0 @@ -# download pre-compiled opencv lib -OPENCV_URL=https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2 -if [ ! -d "./deps/opencv3gcc4.8" ]; then - mkdir -p deps - cd deps - wget -c ${OPENCV_URL} - tar xvfj opencv3gcc4.8.tar.bz2 - rm -rf opencv3gcc4.8.tar.bz2 - cd .. -fi diff --git a/deploy/openvino/scripts/build.sh b/deploy/openvino/scripts/build.sh old mode 100644 new mode 100755 index 17f988146a6147030be35bd4abee966b569caa5f..0e204b5cf87518da92405bed6d9987850f15e2fd --- a/deploy/openvino/scripts/build.sh +++ b/deploy/openvino/scripts/build.sh @@ -1,14 +1,23 @@ -# openvino预编译库的路径 -OPENVINO_DIR=/path/to/inference_engine/ -# gflags预编译库的路径 -GFLAGS_DIR=/path/to/gflags +# OpenVINO预编译库的路径 +OPENVINO_DIR=$INTEL_OPENVINO_DIR/inference_engine + # ngraph lib的路径,编译openvino时通常会生成 -NGRAPH_LIB=/path/to/ngraph/lib/ +NGRAPH_LIB=$INTEL_OPENVINO_DIR/deployment_tools/ngraph/lib + +# gflags预编译库的路径 +GFLAGS_DIR=$(pwd)/deps/gflags +# glog预编译库的路径 +GLOG_DIR=$(pwd)/deps/glog + +# opencv使用自带预编译版本 +OPENCV_DIR=$(pwd)/deps/opencv/ + +#cpu架构 +ARCH=x86 +export ARCH -# opencv预编译库的路径, 如果使用自带预编译版本可不修改 -OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/ -# 下载自带预编译版本 -sh $(pwd)/scripts/bootstrap.sh +#下载并编译third-part lib +sh $(pwd)/scripts/install_third-party.sh rm -rf build mkdir -p build @@ -16,6 +25,8 @@ cd build cmake .. \ -DOPENCV_DIR=${OPENCV_DIR} \ -DGFLAGS_DIR=${GFLAGS_DIR} \ + -DGLOG_DIR=${GLOG_DIR} \ -DOPENVINO_DIR=${OPENVINO_DIR} \ - -DNGRAPH_LIB=${NGRAPH_LIB} + -DNGRAPH_LIB=${NGRAPH_LIB} \ + -DARCH=${ARCH} make diff --git a/deploy/openvino/scripts/install_third-party.sh b/deploy/openvino/scripts/install_third-party.sh new file mode 100644 index 0000000000000000000000000000000000000000..8824f64a37d0a0c245cfb0be7e047b5828516be1 --- /dev/null +++ b/deploy/openvino/scripts/install_third-party.sh @@ -0,0 +1,37 @@ +# download third-part lib +if [ ! -d "./deps" ]; then + mkdir deps +fi +if [ ! -d "./deps/gflag" ]; then + cd deps + git clone https://github.com/gflags/gflags + cd gflags + cmake . + make -j 8 + cd .. + cd .. +fi +if [ ! -d "./deps/glog" ]; then + cd deps + git clone https://github.com/google/glog + sudo apt-get install autoconf automake libtool + cd glog + ./autogen.sh + ./configure + make -j 8 + cd .. + cd .. +fi + +if [ "$ARCH" = "x86" ]; then + OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/x86opencv/opencv.tar.bz2 +else + OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/armopencv/opencv.tar.bz2 +fi +if [ ! -d "./deps/opencv" ]; then + cd deps + wget -c ${OPENCV_URL} + tar xvfj opencv.tar.bz2 + rm -rf opencv.tar.bz2 + cd .. +fi diff --git a/deploy/openvino/src/paddlex.cpp b/deploy/openvino/src/paddlex.cpp old mode 100644 new mode 100755 index bdae99892735ccc67d2189e1b6dcc0a0789dcf95..f924b968d1a189846edfb20abc779131514cf0c5 --- a/deploy/openvino/src/paddlex.cpp +++ b/deploy/openvino/src/paddlex.cpp @@ -13,28 +13,47 @@ // limitations under the License. #include "include/paddlex/paddlex.h" +#include +#include -using namespace InferenceEngine; namespace PaddleX { void Model::create_predictor(const std::string& model_dir, - const std::string& cfg_dir, + const std::string& cfg_file, std::string device) { - Core ie; - network_ = ie.ReadNetwork(model_dir, model_dir.substr(0, model_dir.size() - 4) + ".bin"); + InferenceEngine::Core ie; + network_ = ie.ReadNetwork( + model_dir, model_dir.substr(0, model_dir.size() - 4) + ".bin"); network_.setBatchSize(1); - InputInfo::Ptr input_info = network_.getInputsInfo().begin()->second; - input_info->getPreProcess().setResizeAlgorithm(RESIZE_BILINEAR); - input_info->setLayout(Layout::NCHW); - input_info->setPrecision(Precision::FP32); - executable_network_ = ie.LoadNetwork(network_, device); - load_config(cfg_dir); + InferenceEngine::InputsDataMap inputInfo(network_.getInputsInfo()); + std::string imageInputName; + for (const auto & inputInfoItem : inputInfo) { + if (inputInfoItem.second->getTensorDesc().getDims().size() == 4) { + imageInputName = inputInfoItem.first; + inputInfoItem.second->setPrecision(InferenceEngine::Precision::FP32); + inputInfoItem.second->getPreProcess().setResizeAlgorithm( + InferenceEngine::RESIZE_BILINEAR); + inputInfoItem.second->setLayout(InferenceEngine::Layout::NCHW); + } + if (inputInfoItem.second->getTensorDesc().getDims().size() == 2) { + imageInputName = inputInfoItem.first; + inputInfoItem.second->setPrecision(InferenceEngine::Precision::FP32); + } + } + if (device == "MYRIAD") { + std::map networkConfig; + networkConfig["VPU_HW_STAGES_OPTIMIZATION"] = "ON"; + executable_network_ = ie.LoadNetwork(network_, device, networkConfig); + } else { + executable_network_ = ie.LoadNetwork(network_, device); + } + load_config(cfg_file); } -bool Model::load_config(const std::string& cfg_dir) { - YAML::Node config = YAML::LoadFile(cfg_dir); +bool Model::load_config(const std::string& cfg_file) { + YAML::Node config = YAML::LoadFile(cfg_file); type = config["_Attributes"]["model_type"].as(); name = config["Model"].as(); bool to_rgb = true; @@ -48,22 +67,26 @@ bool Model::load_config(const std::string& cfg_dir) { return false; } } - // 构建数据处理流 - transforms_.Init(config["Transforms"], to_rgb); - // 读入label list - labels.clear(); - labels = config["_Attributes"]["labels"].as>(); + // init preprocess ops + transforms_.Init(config["Transforms"], type, to_rgb); + // read label list + for (const auto& item : config["_Attributes"]["labels"]) { + int index = labels.size(); + labels[index] = item.as(); + } + return true; } -bool Model::preprocess(cv::Mat* input_im) { - if (!transforms_.Run(input_im, inputs_)) { +bool Model::preprocess(cv::Mat* input_im, ImageBlob* inputs) { + if (!transforms_.Run(input_im, inputs)) { return false; } return true; } bool Model::predict(const cv::Mat& im, ClsResult* result) { + inputs_.clear(); if (type == "detector") { std::cerr << "Loading model is a 'detector', DetResult should be passed to " "function predict()!" @@ -75,34 +98,221 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) { << std::endl; return false; } - // 处理输入图像 - InferRequest infer_request = executable_network_.CreateInferRequest(); + // preprocess + InferenceEngine::InferRequest infer_request = + executable_network_.CreateInferRequest(); std::string input_name = network_.getInputsInfo().begin()->first; - inputs_ = infer_request.GetBlob(input_name); - - auto im_clone = im.clone(); - if (!preprocess(&im_clone)) { + inputs_.blob = infer_request.GetBlob(input_name); + cv::Mat im_clone = im.clone(); + if (!preprocess(&im_clone, &inputs_)) { std::cerr << "Preprocess failed!" << std::endl; return false; } + // predict infer_request.Infer(); std::string output_name = network_.getOutputsInfo().begin()->first; output_ = infer_request.GetBlob(output_name); - MemoryBlob::CPtr moutput = as(output_); + InferenceEngine::MemoryBlob::CPtr moutput = + InferenceEngine::as(output_); auto moutputHolder = moutput->rmap(); float* outputs_data = moutputHolder.as(); - // 对模型输出结果进行后处理 + // post process auto ptr = std::max_element(outputs_data, outputs_data+sizeof(outputs_data)); result->category_id = std::distance(outputs_data, ptr); result->score = *ptr; result->category = labels[result->category_id]; - //for (int i=0;iclear(); + if (type == "classifier") { + std::cerr << "Loading model is a 'classifier', ClsResult should be passed " + "to function predict()!" << std::endl; + return false; + } else if (type == "segmenter") { + std::cerr << "Loading model is a 'segmenter', SegResult should be passed " + "to function predict()!" << std::endl; + return false; + } + InferenceEngine::InferRequest infer_request = + executable_network_.CreateInferRequest(); + InferenceEngine::InputsDataMap input_maps = network_.getInputsInfo(); + std::string inputName; + for (const auto & input_map : input_maps) { + if (input_map.second->getTensorDesc().getDims().size() == 4) { + inputName = input_map.first; + inputs_.blob = infer_request.GetBlob(inputName); + } + if (input_map.second->getTensorDesc().getDims().size() == 2) { + inputName = input_map.first; + inputs_.ori_im_size_ = infer_request.GetBlob(inputName); + } + } + cv::Mat im_clone = im.clone(); + if (!preprocess(&im_clone, &inputs_)) { + std::cerr << "Preprocess failed!" << std::endl; + return false; + } + + infer_request.Infer(); + + InferenceEngine::OutputsDataMap out_map = network_.getOutputsInfo(); + auto iter = out_map.begin(); + std::string outputName = iter->first; + InferenceEngine::Blob::Ptr output = infer_request.GetBlob(outputName); + InferenceEngine::MemoryBlob::CPtr moutput = + InferenceEngine::as(output); + InferenceEngine::TensorDesc blob_output = moutput->getTensorDesc(); + std::vector output_shape = blob_output.getDims(); + auto moutputHolder = moutput->rmap(); + float* data = moutputHolder.as(); + int size = 1; + for (auto& i : output_shape) { + size *= static_cast(i); + } + int num_boxes = size / 6; + for (int i = 0; i < num_boxes; ++i) { + if (data[i * 6] > 0) { + Box box; + box.category_id = static_cast(data[i * 6]); + box.category = labels[box.category_id]; + box.score = data[i * 6 + 1]; + float xmin = data[i * 6 + 2]; + float ymin = data[i * 6 + 3]; + float xmax = data[i * 6 + 4]; + float ymax = data[i * 6 + 5]; + float w = xmax - xmin + 1; + float h = ymax - ymin + 1; + box.coordinate = {xmin, ymin, w, h}; + result->boxes.push_back(std::move(box)); + } + } } -} // namespce of PaddleX + +bool Model::predict(const cv::Mat& im, SegResult* result) { + result->clear(); + inputs_.clear(); + if (type == "classifier") { + std::cerr << "Loading model is a 'classifier', ClsResult should be passed " + "to function predict()!" << std::endl; + return false; + } else if (type == "detector") { + std::cerr << "Loading model is a 'detector', DetResult should be passed to " + "function predict()!" << std::endl; + return false; + } + // init infer + InferenceEngine::InferRequest infer_request = + executable_network_.CreateInferRequest(); + std::string input_name = network_.getInputsInfo().begin()->first; + inputs_.blob = infer_request.GetBlob(input_name); + + // preprocess + cv::Mat im_clone = im.clone(); + if (!preprocess(&im_clone, &inputs_)) { + std::cerr << "Preprocess failed!" << std::endl; + return false; + } + + // predict + infer_request.Infer(); + + InferenceEngine::OutputsDataMap out_map = network_.getOutputsInfo(); + auto iter = out_map.begin(); + iter++; + std::string output_name_score = iter->first; + InferenceEngine::Blob::Ptr output_score = + infer_request.GetBlob(output_name_score); + InferenceEngine::MemoryBlob::CPtr moutput_score = + InferenceEngine::as(output_score); + InferenceEngine::TensorDesc blob_score = moutput_score->getTensorDesc(); + std::vector output_score_shape = blob_score.getDims(); + int size = 1; + for (auto& i : output_score_shape) { + size *= static_cast(i); + result->score_map.shape.push_back(static_cast(i)); + } + result->score_map.data.resize(size); + auto moutputHolder_score = moutput_score->rmap(); + float* score_data = moutputHolder_score.as(); + memcpy(result->score_map.data.data(), score_data, moutput_score->byteSize()); + + iter++; + std::string output_name_label = iter->first; + InferenceEngine::Blob::Ptr output_label = + infer_request.GetBlob(output_name_label); + InferenceEngine::MemoryBlob::CPtr moutput_label = + InferenceEngine::as(output_label); + InferenceEngine::TensorDesc blob_label = moutput_label->getTensorDesc(); + std::vector output_label_shape = blob_label.getDims(); + size = 1; + for (auto& i : output_label_shape) { + size *= static_cast(i); + result->label_map.shape.push_back(static_cast(i)); + } + result->label_map.data.resize(size); + auto moutputHolder_label = moutput_label->rmap(); + int* label_data = moutputHolder_label.as(); + memcpy(result->label_map.data.data(), label_data, moutput_label->byteSize()); + + + + std::vector label_map(result->label_map.data.begin(), + result->label_map.data.end()); + cv::Mat mask_label(result->label_map.shape[1], + result->label_map.shape[2], + CV_8UC1, + label_map.data()); + + cv::Mat mask_score(result->score_map.shape[2], + result->score_map.shape[3], + CV_32FC1, + result->score_map.data.data()); + int idx = 1; + int len_postprocess = inputs_.im_size_before_resize_.size(); + for (std::vector::reverse_iterator iter = + inputs_.reshape_order_.rbegin(); + iter != inputs_.reshape_order_.rend(); + ++iter) { + if (*iter == "padding") { + auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx]; + inputs_.im_size_before_resize_.pop_back(); + auto padding_w = before_shape[0]; + auto padding_h = before_shape[1]; + mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w)); + mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w)); + } else if (*iter == "resize") { + auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx]; + inputs_.im_size_before_resize_.pop_back(); + auto resize_w = before_shape[0]; + auto resize_h = before_shape[1]; + cv::resize(mask_label, + mask_label, + cv::Size(resize_h, resize_w), + 0, + 0, + cv::INTER_NEAREST); + cv::resize(mask_score, + mask_score, + cv::Size(resize_h, resize_w), + 0, + 0, + cv::INTER_LINEAR); + } + ++idx; + } + result->label_map.data.assign(mask_label.begin(), + mask_label.end()); + result->label_map.shape = {mask_label.rows, mask_label.cols}; + result->score_map.data.assign(mask_score.begin(), + mask_score.end()); + result->score_map.shape = {mask_score.rows, mask_score.cols}; + return true; +} +} // namespace PaddleX diff --git a/deploy/openvino/src/transforms.cpp b/deploy/openvino/src/transforms.cpp old mode 100644 new mode 100755 index 1c7fe2ee68433a67645f5c91c18e525d62a6c4d3..b65eaf7fd2df4e48b0dcefbe9561eb28cd9c7ba7 --- a/deploy/openvino/src/transforms.cpp +++ b/deploy/openvino/src/transforms.cpp @@ -12,11 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "include/paddlex/transforms.h" + +#include + #include +#include #include #include -#include "include/paddlex/transforms.h" namespace PaddleX { @@ -26,7 +30,7 @@ std::map interpolations = {{"LINEAR", cv::INTER_LINEAR}, {"CUBIC", cv::INTER_CUBIC}, {"LANCZOS4", cv::INTER_LANCZOS4}}; -bool Normalize::Run(cv::Mat* im){ +bool Normalize::Run(cv::Mat* im, ImageBlob* data) { for (int h = 0; h < im->rows; h++) { for (int w = 0; w < im->cols; w++) { im->at(h, w)[0] = @@ -40,19 +44,6 @@ bool Normalize::Run(cv::Mat* im){ return true; } -bool CenterCrop::Run(cv::Mat* im) { - int height = static_cast(im->rows); - int width = static_cast(im->cols); - if (height < height_ || width < width_) { - std::cerr << "[CenterCrop] Image size less than crop size" << std::endl; - return false; - } - int offset_x = static_cast((width - width_) / 2); - int offset_y = static_cast((height - height_) / 2); - cv::Rect crop_roi(offset_x, offset_y, width_, height_); - *im = (*im)(crop_roi); - return true; -} float ResizeByShort::GenerateScale(const cv::Mat& im) { @@ -70,17 +61,115 @@ float ResizeByShort::GenerateScale(const cv::Mat& im) { return scale; } -bool ResizeByShort::Run(cv::Mat* im) { +bool ResizeByShort::Run(cv::Mat* im, ImageBlob* data) { + data->im_size_before_resize_.push_back({im->rows, im->cols}); + data->reshape_order_.push_back("resize"); float scale = GenerateScale(*im); - int width = static_cast(scale * im->cols); - int height = static_cast(scale * im->rows); + int width = static_cast(round(scale * im->cols)); + int height = static_cast(round(scale * im->rows)); cv::resize(*im, *im, cv::Size(width, height), 0, 0, cv::INTER_LINEAR); + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; + data->scale = scale; return true; } -void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) { +bool CenterCrop::Run(cv::Mat* im, ImageBlob* data) { + int height = static_cast(im->rows); + int width = static_cast(im->cols); + if (height < height_ || width < width_) { + std::cerr << "[CenterCrop] Image size less than crop size" << std::endl; + return false; + } + int offset_x = static_cast((width - width_) / 2); + int offset_y = static_cast((height - height_) / 2); + cv::Rect crop_roi(offset_x, offset_y, width_, height_); + *im = (*im)(crop_roi); + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; + return true; +} + + +bool Padding::Run(cv::Mat* im, ImageBlob* data) { + data->im_size_before_resize_.push_back({im->rows, im->cols}); + data->reshape_order_.push_back("padding"); + + int padding_w = 0; + int padding_h = 0; + if (width_ > 1 & height_ > 1) { + padding_w = width_ - im->cols; + padding_h = height_ - im->rows; + } else if (coarsest_stride_ >= 1) { + int h = im->rows; + int w = im->cols; + padding_h = + ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows; + padding_w = + ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols; + } + + if (padding_h < 0 || padding_w < 0) { + std::cerr << "[Padding] Computed padding_h=" << padding_h + << ", padding_w=" << padding_w + << ", but they should be greater than 0." << std::endl; + return false; + } + cv::Scalar value = cv::Scalar(im_value_[0], im_value_[1], im_value_[2]); + cv::copyMakeBorder( + *im, *im, 0, padding_h, 0, padding_w, cv::BORDER_CONSTANT, value); + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; + return true; +} + +bool ResizeByLong::Run(cv::Mat* im, ImageBlob* data) { + if (long_size_ <= 0) { + std::cerr << "[ResizeByLong] long_size should be greater than 0" + << std::endl; + return false; + } + data->im_size_before_resize_.push_back({im->rows, im->cols}); + data->reshape_order_.push_back("resize"); + int origin_w = im->cols; + int origin_h = im->rows; + + int im_size_max = std::max(origin_w, origin_h); + float scale = + static_cast(long_size_) / static_cast(im_size_max); + cv::resize(*im, *im, cv::Size(), scale, scale, cv::INTER_NEAREST); + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; + data->scale = scale; + return true; +} + +bool Resize::Run(cv::Mat* im, ImageBlob* data) { + if (width_ <= 0 || height_ <= 0) { + std::cerr << "[Resize] width and height should be greater than 0" + << std::endl; + return false; + } + if (interpolations.count(interp_) <= 0) { + std::cerr << "[Resize] Invalid interpolation method: '" << interp_ << "'" + << std::endl; + return false; + } + data->im_size_before_resize_.push_back({im->rows, im->cols}); + data->reshape_order_.push_back("resize"); + + cv::resize( + *im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]); + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; + return true; +} + +void Transforms::Init( + const YAML::Node& transforms_node, std::string type, bool to_rgb) { transforms_.clear(); to_rgb_ = to_rgb; + type_ = type; for (const auto& item : transforms_node) { std::string name = item.begin()->first.as(); std::cout << "trans name: " << name << std::endl; @@ -94,10 +183,16 @@ std::shared_ptr Transforms::CreateTransform( const std::string& transform_name) { if (transform_name == "Normalize") { return std::make_shared(); - } else if (transform_name == "CenterCrop") { - return std::make_shared(); } else if (transform_name == "ResizeByShort") { return std::make_shared(); + } else if (transform_name == "CenterCrop") { + return std::make_shared(); + } else if (transform_name == "Resize") { + return std::make_shared(); + } else if (transform_name == "Padding") { + return std::make_shared(); + } else if (transform_name == "ResizeByLong") { + return std::make_shared(); } else { std::cerr << "There's unexpected transform(name='" << transform_name << "')." << std::endl; @@ -105,27 +200,38 @@ std::shared_ptr Transforms::CreateTransform( } } -bool Transforms::Run(cv::Mat* im, Blob::Ptr blob) { - // 按照transforms中预处理算子顺序处理图像 +bool Transforms::Run(cv::Mat* im, ImageBlob* data) { + // preprocess by order if (to_rgb_) { cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB); } (*im).convertTo(*im, CV_32FC3); + if (type_ == "detector") { + InferenceEngine::LockedMemory input2Mapped = + InferenceEngine::as( + data->ori_im_size_)->wmap(); + float *p = input2Mapped.as(); + p[0] = im->rows; + p[1] = im->cols; + } + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; for (int i = 0; i < transforms_.size(); ++i) { - if (!transforms_[i]->Run(im)) { + if (!transforms_[i]->Run(im, data)) { std::cerr << "Apply transforms to image failed!" << std::endl; return false; } } - // 将图像由NHWC转为NCHW格式 - // 同时转为连续的内存块存储到Blob - SizeVector blobSize = blob->getTensorDesc().getDims(); + // image format NHWC to NCHW + // img data save to ImageBlob + InferenceEngine::SizeVector blobSize = data->blob->getTensorDesc().getDims(); const size_t width = blobSize[3]; const size_t height = blobSize[2]; const size_t channels = blobSize[1]; - MemoryBlob::Ptr mblob = InferenceEngine::as(blob); + InferenceEngine::MemoryBlob::Ptr mblob = + InferenceEngine::as(data->blob); auto mblobHolder = mblob->wmap(); float *blob_data = mblobHolder.as(); for (size_t c = 0; c < channels; c++) { diff --git a/deploy/openvino/src/visualize.cpp b/deploy/openvino/src/visualize.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dcfa8e7910d2e5e193bee2a74eb00eb65d60d7f0 --- /dev/null +++ b/deploy/openvino/src/visualize.cpp @@ -0,0 +1,148 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "include/paddlex/visualize.h" + +namespace PaddleX { +std::vector GenerateColorMap(int num_class) { + auto colormap = std::vector(3 * num_class, 0); + for (int i = 0; i < num_class; ++i) { + int j = 0; + int lab = i; + while (lab) { + colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j)); + colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)); + colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)); + ++j; + lab >>= 3; + } + } + return colormap; +} + +cv::Mat Visualize(const cv::Mat& img, + const DetResult& result, + const std::map& labels, + const std::vector& colormap, + float threshold) { + cv::Mat vis_img = img.clone(); + auto boxes = result.boxes; + for (int i = 0; i < boxes.size(); ++i) { + if (boxes[i].score < threshold) { + continue; + } + cv::Rect roi = cv::Rect(boxes[i].coordinate[0], + boxes[i].coordinate[1], + boxes[i].coordinate[2], + boxes[i].coordinate[3]); + + // draw box and title + std::string text = boxes[i].category; + int c1 = colormap[3 * boxes[i].category_id + 0]; + int c2 = colormap[3 * boxes[i].category_id + 1]; + int c3 = colormap[3 * boxes[i].category_id + 2]; + cv::Scalar roi_color = cv::Scalar(c1, c2, c3); + text += std::to_string(static_cast(boxes[i].score * 100)) + "%"; + int font_face = cv::FONT_HERSHEY_SIMPLEX; + double font_scale = 0.5f; + float thickness = 0.5; + cv::Size text_size = + cv::getTextSize(text, font_face, font_scale, thickness, nullptr); + cv::Point origin; + origin.x = roi.x; + origin.y = roi.y; + + // background + cv::Rect text_back = cv::Rect(boxes[i].coordinate[0], + boxes[i].coordinate[1] - text_size.height, + text_size.width, + text_size.height); + + // draw + cv::rectangle(vis_img, roi, roi_color, 2); + cv::rectangle(vis_img, text_back, roi_color, -1); + cv::putText(vis_img, + text, + origin, + font_face, + font_scale, + cv::Scalar(255, 255, 255), + thickness); + + // mask + if (boxes[i].mask.data.size() == 0) { + continue; + } + cv::Mat bin_mask(result.mask_resolution, + result.mask_resolution, + CV_32FC1, + boxes[i].mask.data.data()); + cv::resize(bin_mask, + bin_mask, + cv::Size(boxes[i].mask.shape[0], boxes[i].mask.shape[1])); + cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY); + cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1); + bin_mask.copyTo(full_mask(roi)); + cv::Mat mask_ch[3]; + mask_ch[0] = full_mask * c1; + mask_ch[1] = full_mask * c2; + mask_ch[2] = full_mask * c3; + cv::Mat mask; + cv::merge(mask_ch, 3, mask); + cv::addWeighted(vis_img, 1, mask, 0.5, 0, vis_img); + } + return vis_img; +} + +cv::Mat Visualize(const cv::Mat& img, + const SegResult& result, + const std::map& labels, + const std::vector& colormap) { + std::vector label_map(result.label_map.data.begin(), + result.label_map.data.end()); + cv::Mat mask(result.label_map.shape[0], + result.label_map.shape[1], + CV_8UC1, + label_map.data()); + cv::Mat color_mask = cv::Mat::zeros( + result.label_map.shape[0], result.label_map.shape[1], CV_8UC3); + int rows = img.rows; + int cols = img.cols; + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + int category_id = static_cast(mask.at(i, j)); + color_mask.at(i, j)[0] = colormap[3 * category_id + 0]; + color_mask.at(i, j)[1] = colormap[3 * category_id + 1]; + color_mask.at(i, j)[2] = colormap[3 * category_id + 2]; + } + } + return color_mask; +} + +std::string generate_save_path(const std::string& save_dir, + const std::string& file_path) { + if (access(save_dir.c_str(), 0) < 0) { +#ifdef _WIN32 + mkdir(save_dir.c_str()); +#else + if (mkdir(save_dir.c_str(), S_IRWXU) < 0) { + std::cerr << "Fail to create " << save_dir << "directory." << std::endl; + } +#endif + } + int pos = file_path.find_last_of(OS_PATH_SEP); + std::string image_name(file_path.substr(pos + 1)); + return save_dir + OS_PATH_SEP + image_name; +} +} // namespace PaddleX diff --git a/deploy/raspberry/CMakeLists.txt b/deploy/raspberry/CMakeLists.txt new file mode 100755 index 0000000000000000000000000000000000000000..c2d8a14da75dca2ec57d6afc3ef6b9d616a23617 --- /dev/null +++ b/deploy/raspberry/CMakeLists.txt @@ -0,0 +1,116 @@ +cmake_minimum_required(VERSION 3.0) +project(PaddleX CXX C) + + +option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." OFF) + +SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) +SET(LITE_DIR "" CACHE PATH "Location of libraries") +SET(OPENCV_DIR "" CACHE PATH "Location of libraries") +SET(NGRAPH_LIB "" CACHE PATH "Location of libraries") + + +include(cmake/yaml-cpp.cmake) + +include_directories("${CMAKE_SOURCE_DIR}/") +link_directories("${CMAKE_CURRENT_BINARY_DIR}") +include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include") +link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib") + +macro(safe_set_static_flag) + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/MD") + endforeach(flag_var) +endmacro() + +if (NOT DEFINED LITE_DIR OR ${LITE_DIR} STREQUAL "") + message(FATAL_ERROR "please set LITE_DIR with -LITE_DIR=/path/influence_engine") +endif() + +if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "") + message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv") +endif() + +if (NOT DEFINED GFLAGS_DIR OR ${GFLAGS_DIR} STREQUAL "") + message(FATAL_ERROR "please set GFLAGS_DIR with -DGFLAGS_DIR=/path/gflags") +endif() + + + + + +link_directories("${LITE_DIR}/lib") +include_directories("${LITE_DIR}/include") + + + +link_directories("${GFLAGS_DIR}/lib") +include_directories("${GFLAGS_DIR}/include") + + + + +if (WIN32) + find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH) + unset(OpenCV_DIR CACHE) +else () + find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/cmake NO_DEFAULT_PATH) +endif () + +include_directories(${OpenCV_INCLUDE_DIRS}) + +if (WIN32) + add_definitions("/DGOOGLE_GLOG_DLL_DECL=") + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") + if (WITH_STATIC_LIB) + safe_set_static_flag() + add_definitions(-DSTATIC_LIB) + endif() +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard -mfpu=neon-vfpv4 -g -o2 -fopenmp -std=c++11") + set(CMAKE_STATIC_LIBRARY_PREFIX "") +endif() + + +if(WITH_STATIC_LIB) + set(DEPS ${LITE_DIR}/lib/libpaddle_full_api_shared${CMAKE_STATIC_LIBRARY_SUFFIX}) +else() + set(DEPS ${LITE_DIR}/lib/libpaddle_full_api_shared${CMAKE_SHARED_LIBRARY_SUFFIX}) +endif() + +if (NOT WIN32) + set(DEPS ${DEPS} + glog gflags z yaml-cpp + ) +else() + set(DEPS ${DEPS} + glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt) + set(DEPS ${DEPS} libcmt shlwapi) +endif(NOT WIN32) + + +if (NOT WIN32) + set(EXTERNAL_LIB "-ldl -lrt -lgomp -lz -lm -lpthread") + set(DEPS ${DEPS} ${EXTERNAL_LIB}) +endif() + +set(DEPS ${DEPS} ${OpenCV_LIBS}) +add_executable(classifier demo/classifier.cpp src/transforms.cpp src/paddlex.cpp) +ADD_DEPENDENCIES(classifier ext-yaml-cpp) +target_link_libraries(classifier ${DEPS}) + + +add_executable(segmenter demo/segmenter.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp) +ADD_DEPENDENCIES(segmenter ext-yaml-cpp) +target_link_libraries(segmenter ${DEPS}) + +add_executable(detector demo/detector.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp) +ADD_DEPENDENCIES(detector ext-yaml-cpp) +target_link_libraries(detector ${DEPS}) diff --git a/deploy/raspberry/cmake/yaml-cpp.cmake b/deploy/raspberry/cmake/yaml-cpp.cmake new file mode 100755 index 0000000000000000000000000000000000000000..726433d904908ce96c51442246fc884d0899de04 --- /dev/null +++ b/deploy/raspberry/cmake/yaml-cpp.cmake @@ -0,0 +1,29 @@ + +include(ExternalProject) + +message("${CMAKE_BUILD_TYPE}") + +ExternalProject_Add( + ext-yaml-cpp + URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip + URL_MD5 9542d6de397d1fbd649ed468cb5850e6 + CMAKE_ARGS + -DYAML_CPP_BUILD_TESTS=OFF + -DYAML_CPP_BUILD_TOOLS=OFF + -DYAML_CPP_INSTALL=OFF + -DYAML_CPP_BUILD_CONTRIB=OFF + -DMSVC_SHARED_RT=OFF + -DBUILD_SHARED_LIBS=OFF + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib + -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib + PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp" + # Disable install step + INSTALL_COMMAND "" + LOG_DOWNLOAD ON + LOG_BUILD 1 +) + diff --git a/deploy/raspberry/demo/classifier.cpp b/deploy/raspberry/demo/classifier.cpp new file mode 100755 index 0000000000000000000000000000000000000000..7754a5f1dddfa0d0567b1545c781b00361e8abbf --- /dev/null +++ b/deploy/raspberry/demo/classifier.cpp @@ -0,0 +1,78 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include + +#include "include/paddlex/paddlex.h" + +DEFINE_string(model_dir, "", "Path of inference model"); +DEFINE_string(cfg_file, "", "Path of PaddelX model yml file"); +DEFINE_string(image, "", "Path of test image file"); +DEFINE_string(image_list, "", "Path of test image list file"); +DEFINE_int32(thread_num, 1, "num of thread to infer"); + +int main(int argc, char** argv) { + // Parsing command-line + google::ParseCommandLineFlags(&argc, &argv, true); + + if (FLAGS_model_dir == "") { + std::cerr << "--model_dir need to be defined" << std::endl; + return -1; + } + if (FLAGS_cfg_file == "") { + std::cerr << "--cfg_flie need to be defined" << std::endl; + return -1; + } + if (FLAGS_image == "" & FLAGS_image_list == "") { + std::cerr << "--image or --image_list need to be defined" << std::endl; + return -1; + } + + // load model + PaddleX::Model model; + model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_thread_num); + std::cout << "init is done" << std::endl; + // predict + if (FLAGS_image_list != "") { + std::ifstream inf(FLAGS_image_list); + if (!inf) { + std::cerr << "Fail to open file " << FLAGS_image_list << std::endl; + return -1; + } + std::string image_path; + + while (getline(inf, image_path)) { + PaddleX::ClsResult result; + cv::Mat im = cv::imread(image_path, 1); + model.predict(im, &result); + std::cout << "Predict label: " << result.category + << ", label_id:" << result.category_id + << ", score: " << result.score << std::endl; + } + } else { + PaddleX::ClsResult result; + cv::Mat im = cv::imread(FLAGS_image, 1); + model.predict(im, &result); + std::cout << "Predict label: " << result.category + << ", label_id:" << result.category_id + << ", score: " << result.score << std::endl; + } + + return 0; +} diff --git a/deploy/raspberry/demo/detector.cpp b/deploy/raspberry/demo/detector.cpp new file mode 100755 index 0000000000000000000000000000000000000000..e75ff2e62b50ad1fdde618c8d42cc9a0709fae3b --- /dev/null +++ b/deploy/raspberry/demo/detector.cpp @@ -0,0 +1,111 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include +#include // NOLINT +#include +#include +#include +#include +#include + +#include "include/paddlex/paddlex.h" +#include "include/paddlex/visualize.h" + +using namespace std::chrono; // NOLINT + +DEFINE_string(model_dir, "", "Path of openvino model xml file"); +DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file"); +DEFINE_string(image, "", "Path of test image file"); +DEFINE_string(image_list, "", "Path of test image list file"); +DEFINE_int32(thread_num, 1, "num of thread to infer"); +DEFINE_string(save_dir, "", "Path to save visualized image"); +DEFINE_int32(batch_size, 1, "Batch size of infering"); +DEFINE_double(threshold, + 0.5, + "The minimum scores of target boxes which are shown"); + +int main(int argc, char** argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_model_dir == "") { + std::cerr << "--model_dir need to be defined" << std::endl; + return -1; + } + if (FLAGS_cfg_file == "") { + std::cerr << "--cfg_file need to be defined" << std::endl; + return -1; + } + if (FLAGS_image == "" & FLAGS_image_list == "") { + std::cerr << "--image or --image_list need to be defined" << std::endl; + return -1; + } + + // load model + PaddleX::Model model; + model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_thread_num); + + int imgs = 1; + auto colormap = PaddleX::GenerateColorMap(model.labels.size()); + // predict + if (FLAGS_image_list != "") { + std::ifstream inf(FLAGS_image_list); + if (!inf) { + std::cerr << "Fail to open file " << FLAGS_image_list << std::endl; + return -1; + } + std::string image_path; + + while (getline(inf, image_path)) { + PaddleX::DetResult result; + cv::Mat im = cv::imread(image_path, 1); + model.predict(im, &result); + if (FLAGS_save_dir != "") { + cv::Mat vis_img = PaddleX::Visualize( + im, result, model.labels, colormap, FLAGS_threshold); + std::string save_path = + PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image); + cv::imwrite(save_path, vis_img); + std::cout << "Visualized output saved as " << save_path << std::endl; + } + } + } else { + PaddleX::DetResult result; + cv::Mat im = cv::imread(FLAGS_image, 1); + model.predict(im, &result); + for (int i = 0; i < result.boxes.size(); ++i) { + std::cout << "image file: " << FLAGS_image << std::endl; + std::cout << ", predict label: " << result.boxes[i].category + << ", label_id:" << result.boxes[i].category_id + << ", score: " << result.boxes[i].score + << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0] + << ", " << result.boxes[i].coordinate[1] << ", " + << result.boxes[i].coordinate[2] << ", " + << result.boxes[i].coordinate[3] << ")" << std::endl; + } + if (FLAGS_save_dir != "") { + // visualize + cv::Mat vis_img = PaddleX::Visualize( + im, result, model.labels, colormap, FLAGS_threshold); + std::string save_path = + PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image); + cv::imwrite(save_path, vis_img); + result.clear(); + std::cout << "Visualized output saved as " << save_path << std::endl; + } + } + return 0; +} diff --git a/deploy/raspberry/demo/segmenter.cpp b/deploy/raspberry/demo/segmenter.cpp new file mode 100755 index 0000000000000000000000000000000000000000..21bfcd1ae338fad61443e1dcfe8adc3a25165609 --- /dev/null +++ b/deploy/raspberry/demo/segmenter.cpp @@ -0,0 +1,91 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include +#include +#include +#include "include/paddlex/paddlex.h" +#include "include/paddlex/visualize.h" + + +DEFINE_string(model_dir, "", "Path of openvino model xml file"); +DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file"); +DEFINE_string(image, "", "Path of test image file"); +DEFINE_string(image_list, "", "Path of test image list file"); +DEFINE_string(save_dir, "", "Path to save visualized image"); +DEFINE_int32(batch_size, 1, "Batch size of infering"); +DEFINE_int32(thread_num, 1, "num of thread to infer"); + +int main(int argc, char** argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_model_dir == "") { + std::cerr << "--model_dir need to be defined" << std::endl; + return -1; + } + if (FLAGS_cfg_file == "") { + std::cerr << "--cfg_file need to be defined" << std::endl; + return -1; + } + if (FLAGS_image == "" & FLAGS_image_list == "") { + std::cerr << "--image or --image_list need to be defined" << std::endl; + return -1; + } + + // load model + std::cout << "init start" << std::endl; + PaddleX::Model model; + model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_thread_num); + std::cout << "init done" << std::endl; + int imgs = 1; + auto colormap = PaddleX::GenerateColorMap(model.labels.size()); + if (FLAGS_image_list != "") { + std::ifstream inf(FLAGS_image_list); + if (!inf) { + std::cerr << "Fail to open file " << FLAGS_image_list < +#include +#include +#include + +#include "yaml-cpp/yaml.h" + +#ifdef _WIN32 +#define OS_PATH_SEP "\\" +#else +#define OS_PATH_SEP "/" +#endif + +namespace PaddleX { + +// Inference model configuration parser +class ConfigPaser { + public: + ConfigPaser() {} + + ~ConfigPaser() {} + + bool load_config(const std::string& model_dir, + const std::string& cfg = "model.yml") { + // Load as a YAML::Node + YAML::Node config; + config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg); + + if (config["Transforms"].IsDefined()) { + YAML::Node transforms_ = config["Transforms"]; + } else { + std::cerr << "There's no field 'Transforms' in model.yml" << std::endl; + return false; + } + return true; + } + + YAML::Node Transforms_; +}; + +} // namespace PaddleX diff --git a/deploy/raspberry/include/paddlex/paddlex.h b/deploy/raspberry/include/paddlex/paddlex.h new file mode 100755 index 0000000000000000000000000000000000000000..7c4a7065b043140be09cd032a5465f4bb2951398 --- /dev/null +++ b/deploy/raspberry/include/paddlex/paddlex.h @@ -0,0 +1,79 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "include/paddlex/config_parser.h" +#include "include/paddlex/results.h" +#include "include/paddlex/transforms.h" + + + +#include "yaml-cpp/yaml.h" + + + + +#ifdef _WIN32 +#define OS_PATH_SEP "\\" +#else +#define OS_PATH_SEP "/" +#endif + + + + +namespace PaddleX { + +class Model { + public: + void Init(const std::string& model_dir, + const std::string& cfg_file, + int thread_num) { + create_predictor(model_dir, cfg_file, thread_num); + } + + void create_predictor(const std::string& model_dir, + const std::string& cfg_file, + int thread_num); + + bool load_config(const std::string& model_dir); + + bool preprocess(cv::Mat* input_im, ImageBlob* inputs); + + bool predict(const cv::Mat& im, ClsResult* result); + + bool predict(const cv::Mat& im, DetResult* result); + + bool predict(const cv::Mat& im, SegResult* result); + + + std::string type; + std::string name; + std::map labels; + Transforms transforms_; + ImageBlob inputs_; + std::shared_ptr predictor_; +}; +} // namespace PaddleX diff --git a/deploy/raspberry/include/paddlex/results.h b/deploy/raspberry/include/paddlex/results.h new file mode 100755 index 0000000000000000000000000000000000000000..099e2c98b4b99c68b48c4dd99c8fbdfa1d2cf4fa --- /dev/null +++ b/deploy/raspberry/include/paddlex/results.h @@ -0,0 +1,71 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +namespace PaddleX { + +template +struct Mask { + std::vector data; + std::vector shape; + void clear() { + data.clear(); + shape.clear(); + } +}; + +struct Box { + int category_id; + std::string category; + float score; + std::vector coordinate; + Mask mask; +}; + +class BaseResult { + public: + std::string type = "base"; +}; + +class ClsResult : public BaseResult { + public: + int category_id; + std::string category; + float score; + std::string type = "cls"; +}; + +class DetResult : public BaseResult { + public: + std::vector boxes; + int mask_resolution; + std::string type = "det"; + void clear() { boxes.clear(); } +}; + +class SegResult : public BaseResult { + public: + Mask label_map; + Mask score_map; + void clear() { + label_map.clear(); + score_map.clear(); + } +}; +} // namespace PaddleX diff --git a/deploy/raspberry/include/paddlex/transforms.h b/deploy/raspberry/include/paddlex/transforms.h new file mode 100755 index 0000000000000000000000000000000000000000..60bf1750f8a10795d8123d2d98d43c68cf94a33d --- /dev/null +++ b/deploy/raspberry/include/paddlex/transforms.h @@ -0,0 +1,224 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + + +namespace PaddleX { + +/* + * @brief + * This class represents object for storing all preprocessed data + * */ +class ImageBlob { + public: + // Original image height and width + std::vector ori_im_size_ = std::vector(2); + + // Newest image height and width after process + std::vector new_im_size_ = std::vector(2); + // Image height and width before resize + std::vector> im_size_before_resize_; + // Reshape order + std::vector reshape_order_; + // Resize scale + float scale = 1.0; + // Buffer for image data after preprocessing + std::unique_ptr input_tensor_; + + void clear() { + im_size_before_resize_.clear(); + reshape_order_.clear(); + } +}; + + + +// Abstraction of preprocessing opration class +class Transform { + public: + virtual void Init(const YAML::Node& item) = 0; + virtual bool Run(cv::Mat* im, ImageBlob* data) = 0; +}; + +class Normalize : public Transform { + public: + virtual void Init(const YAML::Node& item) { + mean_ = item["mean"].as>(); + std_ = item["std"].as>(); + } + + virtual bool Run(cv::Mat* im, ImageBlob* data); + + private: + std::vector mean_; + std::vector std_; +}; + +class ResizeByShort : public Transform { + public: + virtual void Init(const YAML::Node& item) { + short_size_ = item["short_size"].as(); + if (item["max_size"].IsDefined()) { + max_size_ = item["max_size"].as(); + } else { + max_size_ = -1; + } + } + virtual bool Run(cv::Mat* im, ImageBlob* data); + + private: + float GenerateScale(const cv::Mat& im); + int short_size_; + int max_size_; +}; + +/* + * @brief + * This class execute resize by long operation on image matrix. At first, it resizes + * the long side of image matrix to specified length. Accordingly, the short side + * will be resized in the same proportion. + * */ +class ResizeByLong : public Transform { + public: + virtual void Init(const YAML::Node& item) { + long_size_ = item["long_size"].as(); + } + virtual bool Run(cv::Mat* im, ImageBlob* data); + + private: + int long_size_; +}; + +/* + * @brief + * This class execute resize operation on image matrix. It resizes width and height + * to specified length. + * */ +class Resize : public Transform { + public: + virtual void Init(const YAML::Node& item) { + if (item["interp"].IsDefined()) { + interp_ = item["interp"].as(); + } + if (item["target_size"].IsScalar()) { + height_ = item["target_size"].as(); + width_ = item["target_size"].as(); + } else if (item["target_size"].IsSequence()) { + std::vector target_size = item["target_size"].as>(); + width_ = target_size[0]; + height_ = target_size[1]; + } + if (height_ <= 0 || width_ <= 0) { + std::cerr << "[Resize] target_size should greater than 0" << std::endl; + exit(-1); + } + } + virtual bool Run(cv::Mat* im, ImageBlob* data); + + private: + int height_; + int width_; + std::string interp_; +}; + + +class CenterCrop : public Transform { + public: + virtual void Init(const YAML::Node& item) { + if (item["crop_size"].IsScalar()) { + height_ = item["crop_size"].as(); + width_ = item["crop_size"].as(); + } else if (item["crop_size"].IsSequence()) { + std::vector crop_size = item["crop_size"].as>(); + width_ = crop_size[0]; + height_ = crop_size[1]; + } + } + virtual bool Run(cv::Mat* im, ImageBlob* data); + + private: + int height_; + int width_; +}; + + +/* + * @brief + * This class execute padding operation on image matrix. It makes border on edge + * of image matrix. + * */ +class Padding : public Transform { + public: + virtual void Init(const YAML::Node& item) { + if (item["coarsest_stride"].IsDefined()) { + coarsest_stride_ = item["coarsest_stride"].as(); + if (coarsest_stride_ < 1) { + std::cerr << "[Padding] coarest_stride should greater than 0" + << std::endl; + exit(-1); + } + } + if (item["target_size"].IsDefined()) { + if (item["target_size"].IsScalar()) { + width_ = item["target_size"].as(); + height_ = item["target_size"].as(); + } else if (item["target_size"].IsSequence()) { + width_ = item["target_size"].as>()[0]; + height_ = item["target_size"].as>()[1]; + } + } + if (item["im_padding_value"].IsDefined()) { + im_value_ = item["im_padding_value"].as>(); + } else { + im_value_ = {0, 0, 0}; + } + } + + virtual bool Run(cv::Mat* im, ImageBlob* data); + + private: + int coarsest_stride_ = -1; + int width_ = 0; + int height_ = 0; + std::vector im_value_; +}; + +class Transforms { + public: + void Init(const YAML::Node& node, bool to_rgb = true); + std::shared_ptr CreateTransform(const std::string& name); + bool Run(cv::Mat* im, ImageBlob* data); + + private: + std::vector> transforms_; + bool to_rgb_ = true; +}; + +} // namespace PaddleX diff --git a/deploy/raspberry/include/paddlex/visualize.h b/deploy/raspberry/include/paddlex/visualize.h new file mode 100755 index 0000000000000000000000000000000000000000..d3eb094f525dc2c4e878dbfe11916dc98c63dd49 --- /dev/null +++ b/deploy/raspberry/include/paddlex/visualize.h @@ -0,0 +1,97 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#ifdef _WIN32 +#include +#include +#else // Linux/Unix +#include +#include +#include +#include +#include +#endif +#include + +#include +#include +#include + +#include "include/paddlex/results.h" + +#ifdef _WIN32 +#define OS_PATH_SEP "\\" +#else +#define OS_PATH_SEP "/" +#endif + +namespace PaddleX { + +/* + * @brief + * Generate visualization colormap for each class + * + * @param number of class + * @return color map, the size of vector is 3 * num_class + * */ +std::vector GenerateColorMap(int num_class); + + +/* + * @brief + * Visualize the detection result + * + * @param img: initial image matrix + * @param results: the detection result + * @param labels: label map + * @param colormap: visualization color map + * @return visualized image matrix + * */ +cv::Mat Visualize(const cv::Mat& img, + const DetResult& results, + const std::map& labels, + const std::vector& colormap, + float threshold = 0.5); + +/* + * @brief + * Visualize the segmentation result + * + * @param img: initial image matrix + * @param results: the detection result + * @param labels: label map + * @param colormap: visualization color map + * @return visualized image matrix + * */ +cv::Mat Visualize(const cv::Mat& img, + const SegResult& result, + const std::map& labels, + const std::vector& colormap); + +/* + * @brief + * generate save path for visualized image matrix + * + * @param save_dir: directory for saving visualized image matrix + * @param file_path: sourcen image file path + * @return path of saving visualized result + * */ +std::string generate_save_path(const std::string& save_dir, + const std::string& file_path); +} // namespace PaddleX diff --git a/deploy/raspberry/python/__init__.py b/deploy/raspberry/python/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..131e0650f5c8db6885ebab5cd342b37630f13be8 --- /dev/null +++ b/deploy/raspberry/python/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/deploy/raspberry/python/demo.py b/deploy/raspberry/python/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..512426bd380e58538e18ec71e722b1b510380b75 --- /dev/null +++ b/deploy/raspberry/python/demo.py @@ -0,0 +1,85 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import argparse +import deploy + + +def arg_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + "-m", + type=str, + default=None, + help="path to openvino model .xml file") + parser.add_argument( + "--img", "-i", type=str, default=None, help="path to an image files") + + parser.add_argument( + "--img_list", "-l", type=str, default=None, help="Path to a imglist") + + parser.add_argument( + "--cfg_file", + "-c", + type=str, + default=None, + help="Path to PaddelX model yml file") + + parser.add_argument( + "--thread_num", + "-t", + type=int, + default=1, + help="Path to PaddelX model yml file") + + parser.add_argument( + "--input_shape", + "-ip", + type=str, + default=None, + help=" image input shape of model [NCHW] like [1,3,224,244] ") + + return parser + + +def main(): + parser = arg_parser() + args = parser.parse_args() + model_nb = args.model_dir + model_yaml = args.cfg_file + thread_num = args.thread_num + input_shape = args.input_shape + input_shape = input_shape[1:-1].split(",", 3) + shape = list(map(int, input_shape)) + #model init + predictor = deploy.Predictor(model_nb, model_yaml, thread_num, shape) + + #predict + if (args.img_list != None): + f = open(args.img_list) + lines = f.readlines() + for im_path in lines: + print(im_path) + predictor.predict(im_path.strip('\n')) + f.close() + else: + im_path = args.img + predictor.predict(im_path) + + +if __name__ == "__main__": + main() diff --git a/deploy/raspberry/python/transforms/__init__.py b/deploy/raspberry/python/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9ec4809004549b5d564e7d69feb5d3a32fbebc98 --- /dev/null +++ b/deploy/raspberry/python/transforms/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cls_transforms +from . import det_transforms +from . import seg_transforms diff --git a/deploy/raspberry/python/transforms/cls_transforms.py b/deploy/raspberry/python/transforms/cls_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..120c2699238e99d57316eba86ebb2e845d4f3435 --- /dev/null +++ b/deploy/raspberry/python/transforms/cls_transforms.py @@ -0,0 +1,281 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .ops import * +import random +import os.path as osp +import numpy as np +from PIL import Image, ImageEnhance + + +class ClsTransform: + """分类Transform的基类 + """ + + def __init__(self): + pass + + +class Compose(ClsTransform): + """根据数据预处理/增强算子对输入数据进行操作。 + 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 + + Args: + transforms (list): 数据预处理/增强算子。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + + def __init__(self, transforms): + if not isinstance(transforms, list): + raise TypeError('The transforms must be a list!') + if len(transforms) < 1: + raise ValueError('The length of transforms ' + \ + 'must be equal or larger than 1!') + self.transforms = transforms + + def __call__(self, im, label=None): + """ + Args: + im (str/np.ndarray): 图像路径/图像np.ndarray数据。 + label (int): 每张图像所对应的类别序号。 + Returns: + tuple: 根据网络所需字段所组成的tuple; + 字段由transforms中的最后一个数据预处理操作决定。 + """ + if isinstance(im, np.ndarray): + if len(im.shape) != 3: + raise Exception( + "im should be 3-dimension, but now is {}-dimensions". + format(len(im.shape))) + else: + try: + im = cv2.imread(im).astype('float32') + except: + raise TypeError('Can\'t read The image file {}!'.format(im)) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + for op in self.transforms: + outputs = op(im, label) + im = outputs[0] + if len(outputs) == 2: + label = outputs[1] + return outputs + + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + transform_names = [type(x).__name__ for x in self.transforms] + for aug in augmenters: + if type(aug).__name__ in transform_names: + print( + "{} is already in ComposedTransforms, need to remove it from add_augmenters().". + format(type(aug).__name__)) + self.transforms = augmenters + self.transforms + + +class Normalize(ClsTransform): + """对图像进行标准化。 + + 1. 对图像进行归一化到区间[0.0, 1.0]。 + 2. 对图像进行减均值除以标准差操作。 + + Args: + mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。 + std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。 + + """ + + def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]): + self.mean = mean + self.std = std + + def __call__(self, im, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + label (int): 每张图像所对应的类别序号。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; + 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 + """ + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + im = normalize(im, mean, std) + if label is None: + return (im, ) + else: + return (im, label) + + +class ResizeByShort(ClsTransform): + """根据图像短边对图像重新调整大小(resize)。 + + 1. 获取图像的长边和短边长度。 + 2. 根据短边与short_size的比例,计算长边的目标长度, + 此时高、宽的resize比例为short_size/原图短边长度。 + 3. 如果max_size>0,调整resize比例: + 如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度; + 4. 根据调整大小的比例对图像进行resize。 + + Args: + short_size (int): 调整大小后的图像目标短边长度。默认为256。 + max_size (int): 长边目标长度的最大限制。默认为-1。 + """ + + def __init__(self, short_size=256, max_size=-1): + self.short_size = short_size + self.max_size = max_size + + def __call__(self, im, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + label (int): 每张图像所对应的类别序号。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; + 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 + """ + im_short_size = min(im.shape[0], im.shape[1]) + im_long_size = max(im.shape[0], im.shape[1]) + scale = float(self.short_size) / im_short_size + if self.max_size > 0 and np.round(scale * + im_long_size) > self.max_size: + scale = float(self.max_size) / float(im_long_size) + resized_width = int(round(im.shape[1] * scale)) + resized_height = int(round(im.shape[0] * scale)) + im = cv2.resize( + im, (resized_width, resized_height), + interpolation=cv2.INTER_LINEAR) + + if label is None: + return (im, ) + else: + return (im, label) + + +class CenterCrop(ClsTransform): + """以图像中心点扩散裁剪长宽为`crop_size`的正方形 + + 1. 计算剪裁的起始点。 + 2. 剪裁图像。 + + Args: + crop_size (int): 裁剪的目标边长。默认为224。 + """ + + def __init__(self, crop_size=224): + self.crop_size = crop_size + + def __call__(self, im, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + label (int): 每张图像所对应的类别序号。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; + 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 + """ + im = center_crop(im, self.crop_size) + if label is None: + return (im, ) + else: + return (im, label) + + +class ArrangeClassifier(ClsTransform): + """获取训练/验证/预测所需信息。注意:此操作不需用户自己显示调用 + + Args: + mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。 + + Raises: + ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。 + """ + + def __init__(self, mode=None): + if mode not in ['train', 'eval', 'test', 'quant']: + raise ValueError( + "mode must be in ['train', 'eval', 'test', 'quant']!") + self.mode = mode + + def __call__(self, im, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + label (int): 每张图像所对应的类别序号。 + + Returns: + tuple: 当mode为'train'或'eval'时,返回(im, label),分别对应图像np.ndarray数据、 + 图像类别id;当mode为'test'或'quant'时,返回(im, ),对应图像np.ndarray数据。 + """ + im = permute(im, False).astype('float32') + if self.mode == 'train' or self.mode == 'eval': + outputs = (im, label) + else: + outputs = (im, ) + return outputs + + +class ComposedClsTransforms(Compose): + """ 分类模型的基础Transforms流程,具体如下 + 训练阶段: + 1. 随机从图像中crop一块子图,并resize成crop_size大小 + 2. 将1的输出按0.5的概率随机进行水平翻转 + 3. 将图像进行归一化 + 验证/预测阶段: + 1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14 + 2. 从图像中心crop出一个大小为crop_size的图像 + 3. 将图像进行归一化 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + crop_size(int|list): 输入模型里的图像大小 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + crop_size=[224, 224], + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + width = crop_size + if isinstance(crop_size, list): + if crop_size[0] != crop_size[1]: + raise Exception( + "In classifier model, width and height should be equal, please modify your parameter `crop_size`" + ) + width = crop_size[0] + if width % 32 != 0: + raise Exception( + "In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`" + ) + + if mode == 'train': + pass + else: + # 验证/预测时的transforms + transforms = [ + ResizeByShort(short_size=int(width * 1.14)), + CenterCrop(crop_size=width), Normalize( + mean=mean, std=std) + ] + + super(ComposedClsTransforms, self).__init__(transforms) diff --git a/deploy/raspberry/python/transforms/det_transforms.py b/deploy/raspberry/python/transforms/det_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..0e2d1dc30c0d0bb768839709da9cd74f2140d84a --- /dev/null +++ b/deploy/raspberry/python/transforms/det_transforms.py @@ -0,0 +1,540 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + from collections.abc import Sequence +except Exception: + from collections import Sequence + +import random +import os.path as osp +import numpy as np + +import cv2 +from PIL import Image, ImageEnhance + +from .ops import * + + +class DetTransform: + """检测数据处理基类 + """ + + def __init__(self): + pass + + +class Compose(DetTransform): + """根据数据预处理/增强列表对输入数据进行操作。 + 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 + + Args: + transforms (list): 数据预处理/增强列表。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + + def __init__(self, transforms): + if not isinstance(transforms, list): + raise TypeError('The transforms must be a list!') + if len(transforms) < 1: + raise ValueError('The length of transforms ' + \ + 'must be equal or larger than 1!') + self.transforms = transforms + self.use_mixup = False + for t in self.transforms: + if type(t).__name__ == 'MixupImage': + self.use_mixup = True + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (str/np.ndarray): 图像路径/图像np.ndarray数据。 + im_info (dict): 存储与图像相关的信息,dict中的字段如下: + - im_id (np.ndarray): 图像序列号,形状为(1,)。 + - image_shape (np.ndarray): 图像原始大小,形状为(2,), + image_shape[0]为高,image_shape[1]为宽。 + - mixup (list): list为[im, im_info, label_info],分别对应 + 与当前图像进行mixup的图像np.ndarray数据、图像相关信息、标注框相关信息; + 注意,当前epoch若无需进行mixup,则无该字段。 + label_info (dict): 存储与标注框相关的信息,dict中的字段如下: + - gt_bbox (np.ndarray): 真实标注框坐标[x1, y1, x2, y2],形状为(n, 4), + 其中n代表真实标注框的个数。 + - gt_class (np.ndarray): 每个真实标注框对应的类别序号,形状为(n, 1), + 其中n代表真实标注框的个数。 + - gt_score (np.ndarray): 每个真实标注框对应的混合得分,形状为(n, 1), + 其中n代表真实标注框的个数。 + - gt_poly (list): 每个真实标注框内的多边形分割区域,每个分割区域由点的x、y坐标组成, + 长度为n,其中n代表真实标注框的个数。 + - is_crowd (np.ndarray): 每个真实标注框中是否是一组对象,形状为(n, 1), + 其中n代表真实标注框的个数。 + - difficult (np.ndarray): 每个真实标注框中的对象是否为难识别对象,形状为(n, 1), + 其中n代表真实标注框的个数。 + Returns: + tuple: 根据网络所需字段所组成的tuple; + 字段由transforms中的最后一个数据预处理操作决定。 + """ + + def decode_image(im_file, im_info, label_info): + if im_info is None: + im_info = dict() + if isinstance(im_file, np.ndarray): + if len(im_file.shape) != 3: + raise Exception( + "im should be 3-dimensions, but now is {}-dimensions". + format(len(im_file.shape))) + im = im_file + else: + try: + im = cv2.imread(im_file).astype('float32') + except: + raise TypeError('Can\'t read The image file {}!'.format( + im_file)) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + # make default im_info with [h, w, 1] + im_info['im_resize_info'] = np.array( + [im.shape[0], im.shape[1], 1.], dtype=np.float32) + im_info['image_shape'] = np.array([im.shape[0], + im.shape[1]]).astype('int32') + if not self.use_mixup: + if 'mixup' in im_info: + del im_info['mixup'] + # decode mixup image + if 'mixup' in im_info: + im_info['mixup'] = \ + decode_image(im_info['mixup'][0], + im_info['mixup'][1], + im_info['mixup'][2]) + if label_info is None: + return (im, im_info) + else: + return (im, im_info, label_info) + + outputs = decode_image(im, im_info, label_info) + im = outputs[0] + im_info = outputs[1] + if len(outputs) == 3: + label_info = outputs[2] + for op in self.transforms: + if im is None: + return None + outputs = op(im, im_info, label_info) + im = outputs[0] + return outputs + + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + transform_names = [type(x).__name__ for x in self.transforms] + for aug in augmenters: + if type(aug).__name__ in transform_names: + print( + "{} is already in ComposedTransforms, need to remove it from add_augmenters().". + format(type(aug).__name__)) + self.transforms = augmenters + self.transforms + + +class ResizeByShort(DetTransform): + """根据图像的短边调整图像大小(resize)。 + + 1. 获取图像的长边和短边长度。 + 2. 根据短边与short_size的比例,计算长边的目标长度, + 此时高、宽的resize比例为short_size/原图短边长度。 + 3. 如果max_size>0,调整resize比例: + 如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度。 + 4. 根据调整大小的比例对图像进行resize。 + + Args: + target_size (int): 短边目标长度。默认为800。 + max_size (int): 长边目标长度的最大限制。默认为1333。 + + Raises: + TypeError: 形参数据类型不满足需求。 + """ + + def __init__(self, short_size=800, max_size=1333): + self.max_size = int(max_size) + if not isinstance(short_size, int): + raise TypeError( + "Type of short_size is invalid. Must be Integer, now is {}". + format(type(short_size))) + self.short_size = short_size + if not (isinstance(self.max_size, int)): + raise TypeError("max_size: input type is invalid.") + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (numnp.ndarraypy): 图像np.ndarray数据。 + im_info (dict, 可选): 存储与图像相关的信息。 + label_info (dict, 可选): 存储与标注框相关的信息。 + + Returns: + tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 + 存储与标注框相关信息的字典。 + 其中,im_info更新字段为: + - im_resize_info (np.ndarray): resize后的图像高、resize后的图像宽、resize后的图像相对原始图的缩放比例 + 三者组成的np.ndarray,形状为(3,)。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + if im_info is None: + im_info = dict() + if not isinstance(im, np.ndarray): + raise TypeError("ResizeByShort: image type is not numpy.") + if len(im.shape) != 3: + raise ValueError('ResizeByShort: image is not 3-dimensional.') + im_short_size = min(im.shape[0], im.shape[1]) + im_long_size = max(im.shape[0], im.shape[1]) + scale = float(self.short_size) / im_short_size + if self.max_size > 0 and np.round(scale * + im_long_size) > self.max_size: + scale = float(self.max_size) / float(im_long_size) + resized_width = int(round(im.shape[1] * scale)) + resized_height = int(round(im.shape[0] * scale)) + im_resize_info = [resized_height, resized_width, scale] + im = cv2.resize( + im, (resized_width, resized_height), + interpolation=cv2.INTER_LINEAR) + im_info['im_resize_info'] = np.array(im_resize_info).astype(np.float32) + if label_info is None: + return (im, im_info) + else: + return (im, im_info, label_info) + + +class Padding(DetTransform): + """1.将图像的长和宽padding至coarsest_stride的倍数。如输入图像为[300, 640], + `coarest_stride`为32,则由于300不为32的倍数,因此在图像最右和最下使用0值 + 进行padding,最终输出图像为[320, 640]。 + 2.或者,将图像的长和宽padding到target_size指定的shape,如输入的图像为[300,640], + a. `target_size` = 960,在图像最右和最下使用0值进行padding,最终输出 + 图像为[960, 960]。 + b. `target_size` = [640, 960],在图像最右和最下使用0值进行padding,最终 + 输出图像为[640, 960]。 + + 1. 如果coarsest_stride为1,target_size为None则直接返回。 + 2. 获取图像的高H、宽W。 + 3. 计算填充后图像的高H_new、宽W_new。 + 4. 构建大小为(H_new, W_new, 3)像素值为0的np.ndarray, + 并将原图的np.ndarray粘贴于左上角。 + + Args: + coarsest_stride (int): 填充后的图像长、宽为该参数的倍数,默认为1。 + target_size (int|list|tuple): 填充后的图像长、宽,默认为None,coarset_stride优先级更高。 + + Raises: + TypeError: 形参`target_size`数据类型不满足需求。 + ValueError: 形参`target_size`为(list|tuple)时,长度不满足需求。 + """ + + def __init__(self, coarsest_stride=1, target_size=None): + self.coarsest_stride = coarsest_stride + if target_size is not None: + if not isinstance(target_size, int): + if not isinstance(target_size, tuple) and not isinstance( + target_size, list): + raise TypeError( + "Padding: Type of target_size must in (int|list|tuple)." + ) + elif len(target_size) != 2: + raise ValueError( + "Padding: Length of target_size must equal 2.") + self.target_size = target_size + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (numnp.ndarraypy): 图像np.ndarray数据。 + im_info (dict, 可选): 存储与图像相关的信息。 + label_info (dict, 可选): 存储与标注框相关的信息。 + + Returns: + tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 + 存储与标注框相关信息的字典。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + ValueError: coarsest_stride,target_size需有且只有一个被指定。 + ValueError: target_size小于原图的大小。 + """ + if im_info is None: + im_info = dict() + if not isinstance(im, np.ndarray): + raise TypeError("Padding: image type is not numpy.") + if len(im.shape) != 3: + raise ValueError('Padding: image is not 3-dimensional.') + im_h, im_w, im_c = im.shape[:] + + if isinstance(self.target_size, int): + padding_im_h = self.target_size + padding_im_w = self.target_size + elif isinstance(self.target_size, list) or isinstance(self.target_size, + tuple): + padding_im_w = self.target_size[0] + padding_im_h = self.target_size[1] + elif self.coarsest_stride > 0: + padding_im_h = int( + np.ceil(im_h / self.coarsest_stride) * self.coarsest_stride) + padding_im_w = int( + np.ceil(im_w / self.coarsest_stride) * self.coarsest_stride) + else: + raise ValueError( + "coarsest_stridei(>1) or target_size(list|int) need setting in Padding transform" + ) + pad_height = padding_im_h - im_h + pad_width = padding_im_w - im_w + if pad_height < 0 or pad_width < 0: + raise ValueError( + 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' + .format(im_w, im_h, padding_im_w, padding_im_h)) + padding_im = np.zeros( + (padding_im_h, padding_im_w, im_c), dtype=np.float32) + padding_im[:im_h, :im_w, :] = im + if label_info is None: + return (padding_im, im_info) + else: + return (padding_im, im_info, label_info) + + +class Resize(DetTransform): + """调整图像大小(resize)。 + + - 当目标大小(target_size)类型为int时,根据插值方式, + 将图像resize为[target_size, target_size]。 + - 当目标大小(target_size)类型为list或tuple时,根据插值方式, + 将图像resize为target_size。 + 注意:当插值方式为“RANDOM”时,则随机选取一种插值方式进行resize。 + + Args: + target_size (int/list/tuple): 短边目标长度。默认为608。 + interp (str): resize的插值方式,与opencv的插值方式对应,取值范围为 + ['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC', + 'AREA', 'LANCZOS4', 'RANDOM']中。 + """ + + # The interpolation mode + interp_dict = { + 'NEAREST': cv2.INTER_NEAREST, + 'LINEAR': cv2.INTER_LINEAR, + 'CUBIC': cv2.INTER_CUBIC, + 'AREA': cv2.INTER_AREA, + 'LANCZOS4': cv2.INTER_LANCZOS4 + } + + def __init__(self, target_size=608, interp='LINEAR'): + self.interp = interp + if not (interp == "RANDOM" or interp in self.interp_dict): + raise ValueError("interp should be one of {}".format( + self.interp_dict.keys())) + if isinstance(target_size, list) or isinstance(target_size, tuple): + if len(target_size) != 2: + raise TypeError( + 'when target is list or tuple, it should include 2 elements, but it is {}' + .format(target_size)) + elif not isinstance(target_size, int): + raise TypeError( + "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" + .format(type(target_size))) + + self.target_size = target_size + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (dict, 可选): 存储与图像相关的信息。 + label_info (dict, 可选): 存储与标注框相关的信息。 + + Returns: + tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 + 存储与标注框相关信息的字典。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + if im_info is None: + im_info = dict() + if not isinstance(im, np.ndarray): + raise TypeError("Resize: image type is not numpy.") + if len(im.shape) != 3: + raise ValueError('Resize: image is not 3-dimensional.') + if self.interp == "RANDOM": + interp = random.choice(list(self.interp_dict.keys())) + else: + interp = self.interp + im = resize(im, self.target_size, self.interp_dict[interp]) + if label_info is None: + return (im, im_info) + else: + return (im, im_info, label_info) + + +class Normalize(DetTransform): + """对图像进行标准化。 + + 1. 归一化图像到到区间[0.0, 1.0]。 + 2. 对图像进行减均值除以标准差操作。 + + Args: + mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。 + std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。 + + Raises: + TypeError: 形参数据类型不满足需求。 + """ + + def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]): + self.mean = mean + self.std = std + if not (isinstance(self.mean, list) and isinstance(self.std, list)): + raise TypeError("NormalizeImage: input type is invalid.") + from functools import reduce + if reduce(lambda x, y: x * y, self.std) == 0: + raise TypeError('NormalizeImage: std is invalid!') + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (numnp.ndarraypy): 图像np.ndarray数据。 + im_info (dict, 可选): 存储与图像相关的信息。 + label_info (dict, 可选): 存储与标注框相关的信息。 + + Returns: + tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、 + 存储与标注框相关信息的字典。 + """ + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + im = normalize(im, mean, std) + if label_info is None: + return (im, im_info) + else: + return (im, im_info, label_info) + + +class ArrangeYOLOv3(DetTransform): + """获取YOLOv3模型训练/验证/预测所需信息。 + + Args: + mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。 + + Raises: + ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。 + """ + + def __init__(self, mode=None): + if mode not in ['train', 'eval', 'test', 'quant']: + raise ValueError( + "mode must be in ['train', 'eval', 'test', 'quant']!") + self.mode = mode + + def __call__(self, im, im_info=None, label_info=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (dict, 可选): 存储与图像相关的信息。 + label_info (dict, 可选): 存储与标注框相关的信息。 + + Returns: + tuple: 当mode为'train'时,返回(im, gt_bbox, gt_class, gt_score, im_shape),分别对应 + 图像np.ndarray数据、真实标注框、真实标注框对应的类别、真实标注框混合得分、图像大小信息; + 当mode为'eval'时,返回(im, im_shape, im_id, gt_bbox, gt_class, difficult), + 分别对应图像np.ndarray数据、图像大小信息、图像id、真实标注框、真实标注框对应的类别、 + 真实标注框是否为难识别对象;当mode为'test'或'quant'时,返回(im, im_shape), + 分别对应图像np.ndarray数据、图像大小信息。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + im = permute(im, False) + if self.mode == 'train': + pass + elif self.mode == 'eval': + pass + else: + if im_info is None: + raise TypeError('Cannot do ArrangeYolov3! ' + + 'Becasuse the im_info can not be None!') + im_shape = im_info['image_shape'] + outputs = (im, im_shape) + return outputs + + +class ComposedYOLOv3Transforms(Compose): + """YOLOv3模型的图像预处理流程,具体如下, + 训练阶段: + 1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage + 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调 + 3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand + 4. 随机裁剪图像 + 5. 将4步骤的输出图像Resize成shape参数的大小 + 6. 随机0.5的概率水平翻转图像 + 7. 图像归一化 + 验证/预测阶段: + 1. 将图像Resize成shape参数大小 + 2. 图像归一化 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小 + mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + shape=[608, 608], + mixup_epoch=250, + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + width = shape + if isinstance(shape, list): + if shape[0] != shape[1]: + raise Exception( + "In YOLOv3 model, width and height should be equal") + width = shape[0] + if width % 32 != 0: + raise Exception( + "In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...." + ) + + if mode == 'train': + # 训练时的transforms,包含数据增强 + pass + else: + # 验证/预测时的transforms + transforms = [ + Resize( + target_size=width, interp='CUBIC'), Normalize( + mean=mean, std=std) + ] + super(ComposedYOLOv3Transforms, self).__init__(transforms) diff --git a/deploy/raspberry/python/transforms/ops.py b/deploy/raspberry/python/transforms/ops.py new file mode 100644 index 0000000000000000000000000000000000000000..3f298d7824be48355b69973a1e14486172efcb08 --- /dev/null +++ b/deploy/raspberry/python/transforms/ops.py @@ -0,0 +1,186 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import math +import numpy as np +from PIL import Image, ImageEnhance + + +def normalize(im, mean, std): + im = im / 255.0 + im -= mean + im /= std + return im + + +def permute(im, to_bgr=False): + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + if to_bgr: + im = im[[2, 1, 0], :, :] + return im + + +def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR): + value = max(im.shape[0], im.shape[1]) + scale = float(long_size) / float(value) + resized_width = int(round(im.shape[1] * scale)) + resized_height = int(round(im.shape[0] * scale)) + + im = cv2.resize( + im, (resized_width, resized_height), interpolation=interpolation) + return im + + +def resize(im, target_size=608, interp=cv2.INTER_LINEAR): + if isinstance(target_size, list) or isinstance(target_size, tuple): + w = target_size[0] + h = target_size[1] + else: + w = target_size + h = target_size + im = cv2.resize(im, (w, h), interpolation=interp) + return im + + +def random_crop(im, + crop_size=224, + lower_scale=0.08, + lower_ratio=3. / 4, + upper_ratio=4. / 3): + scale = [lower_scale, 1.0] + ratio = [lower_ratio, upper_ratio] + aspect_ratio = math.sqrt(np.random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + bound = min((float(im.shape[0]) / im.shape[1]) / (h**2), + (float(im.shape[1]) / im.shape[0]) / (w**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + target_area = im.shape[0] * im.shape[1] * np.random.uniform( + scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + i = np.random.randint(0, im.shape[0] - h + 1) + j = np.random.randint(0, im.shape[1] - w + 1) + im = im[i:i + h, j:j + w, :] + im = cv2.resize(im, (crop_size, crop_size)) + return im + + +def center_crop(im, crop_size=224): + height, width = im.shape[:2] + w_start = (width - crop_size) // 2 + h_start = (height - crop_size) // 2 + w_end = w_start + crop_size + h_end = h_start + crop_size + im = im[h_start:h_end, w_start:w_end, :] + return im + + +def horizontal_flip(im): + if len(im.shape) == 3: + im = im[:, ::-1, :] + elif len(im.shape) == 2: + im = im[:, ::-1] + return im + + +def vertical_flip(im): + if len(im.shape) == 3: + im = im[::-1, :, :] + elif len(im.shape) == 2: + im = im[::-1, :] + return im + + +def bgr2rgb(im): + return im[:, :, ::-1] + + +def hue(im, hue_lower, hue_upper): + delta = np.random.uniform(hue_lower, hue_upper) + u = np.cos(delta * np.pi) + w = np.sin(delta * np.pi) + bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]]) + tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321], + [0.211, -0.523, 0.311]]) + ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647], + [1.0, -1.107, 1.705]]) + t = np.dot(np.dot(ityiq, bt), tyiq).T + im = np.dot(im, t) + return im + + +def saturation(im, saturation_lower, saturation_upper): + delta = np.random.uniform(saturation_lower, saturation_upper) + gray = im * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32) + gray = gray.sum(axis=2, keepdims=True) + gray *= (1.0 - delta) + im *= delta + im += gray + return im + + +def contrast(im, contrast_lower, contrast_upper): + delta = np.random.uniform(contrast_lower, contrast_upper) + im *= delta + return im + + +def brightness(im, brightness_lower, brightness_upper): + delta = np.random.uniform(brightness_lower, brightness_upper) + im += delta + return im + +def rotate(im, rotate_lower, rotate_upper): + rotate_delta = np.random.uniform(rotate_lower, rotate_upper) + im = im.rotate(int(rotate_delta)) + return im + + +def resize_padding(im, max_side_len=2400): + ''' + resize image to a size multiple of 32 which is required by the network + :param im: the resized image + :param max_side_len: limit of max image size to avoid out of memory in gpu + :return: the resized image and the resize ratio + ''' + h, w, _ = im.shape + + resize_w = w + resize_h = h + + # limit the max side + if max(resize_h, resize_w) > max_side_len: + ratio = float( + max_side_len) / resize_h if resize_h > resize_w else float( + max_side_len) / resize_w + else: + ratio = 1. + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + + resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32 + resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32 + resize_h = max(32, resize_h) + resize_w = max(32, resize_w) + im = cv2.resize(im, (int(resize_w), int(resize_h))) + #im = cv2.resize(im, (512, 512)) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + _ratio = np.array([ratio_h, ratio_w]).reshape(-1, 2) + return im, _ratio diff --git a/deploy/raspberry/python/transforms/seg_transforms.py b/deploy/raspberry/python/transforms/seg_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..a3fb6241d415939a33f73a29b843f9ed45976463 --- /dev/null +++ b/deploy/raspberry/python/transforms/seg_transforms.py @@ -0,0 +1,1054 @@ +# coding: utf8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .ops import * +import random +import os.path as osp +import numpy as np +from PIL import Image +import cv2 +from collections import OrderedDict + + +class SegTransform: + """ 分割transform基类 + """ + + def __init__(self): + pass + + +class Compose(SegTransform): + """根据数据预处理/增强算子对输入数据进行操作。 + 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 + + Args: + transforms (list): 数据预处理/增强算子。 + + Raises: + TypeError: transforms不是list对象 + ValueError: transforms元素个数小于1。 + + """ + + def __init__(self, transforms): + if not isinstance(transforms, list): + raise TypeError('The transforms must be a list!') + if len(transforms) < 1: + raise ValueError('The length of transforms ' + \ + 'must be equal or larger than 1!') + self.transforms = transforms + self.to_rgb = False + + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (str/np.ndarray): 图像路径/图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。 + + Returns: + tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。 + """ + + if im_info is None: + im_info = list() + if isinstance(im, np.ndarray): + if len(im.shape) != 3: + raise Exception( + "im should be 3-dimensions, but now is {}-dimensions". + format(len(im.shape))) + else: + try: + im = cv2.imread(im).astype('float32') + except: + raise ValueError('Can\'t read The image file {}!'.format(im)) + if self.to_rgb: + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + if label is not None: + if not isinstance(label, np.ndarray): + label = np.asarray(Image.open(label)) + for op in self.transforms: + if isinstance(op, SegTransform): + outputs = op(im, im_info, label) + im = outputs[0] + if len(outputs) >= 2: + im_info = outputs[1] + if len(outputs) == 3: + label = outputs[2] + else: + im = execute_imgaug(op, im) + if label is not None: + outputs = (im, im_info, label) + else: + outputs = (im, im_info) + return outputs + + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + transform_names = [type(x).__name__ for x in self.transforms] + for aug in augmenters: + if type(aug).__name__ in transform_names: + print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) + self.transforms = augmenters + self.transforms + + +class RandomHorizontalFlip(SegTransform): + """以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。 + + Args: + prob (float): 随机水平翻转的概率。默认值为0.5。 + + """ + + def __init__(self, prob=0.5): + self.prob = prob + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if random.random() < self.prob: + im = horizontal_flip(im) + if label is not None: + label = horizontal_flip(label) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class RandomVerticalFlip(SegTransform): + """以一定的概率对图像进行垂直翻转。当存在标注图像时,则同步进行翻转。 + + Args: + prob (float): 随机垂直翻转的概率。默认值为0.1。 + """ + + def __init__(self, prob=0.1): + self.prob = prob + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if random.random() < self.prob: + im = vertical_flip(im) + if label is not None: + label = vertical_flip(label) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class Resize(SegTransform): + """调整图像大小(resize),当存在标注图像时,则同步进行处理。 + + - 当目标大小(target_size)类型为int时,根据插值方式, + 将图像resize为[target_size, target_size]。 + - 当目标大小(target_size)类型为list或tuple时,根据插值方式, + 将图像resize为target_size, target_size的输入应为[w, h]或(w, h)。 + + Args: + target_size (int|list|tuple): 目标大小。 + interp (str): resize的插值方式,与opencv的插值方式对应, + 可选的值为['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4'],默认为"LINEAR"。 + + Raises: + TypeError: target_size不是int/list/tuple。 + ValueError: target_size为list/tuple时元素个数不等于2。 + AssertionError: interp的取值不在['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4']之内。 + """ + + # The interpolation mode + interp_dict = { + 'NEAREST': cv2.INTER_NEAREST, + 'LINEAR': cv2.INTER_LINEAR, + 'CUBIC': cv2.INTER_CUBIC, + 'AREA': cv2.INTER_AREA, + 'LANCZOS4': cv2.INTER_LANCZOS4 + } + + def __init__(self, target_size, interp='LINEAR'): + self.interp = interp + assert interp in self.interp_dict, "interp should be one of {}".format( + interp_dict.keys()) + if isinstance(target_size, list) or isinstance(target_size, tuple): + if len(target_size) != 2: + raise ValueError( + 'when target is list or tuple, it should include 2 elements, but it is {}' + .format(target_size)) + elif not isinstance(target_size, int): + raise TypeError( + "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" + .format(type(target_size))) + + self.target_size = target_size + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + 其中,im_info跟新字段为: + -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 + + Raises: + ZeroDivisionError: im的短边为0。 + TypeError: im不是np.ndarray数据。 + ValueError: im不是3维nd.ndarray。 + """ + if im_info is None: + im_info = OrderedDict() + im_info.append(('resize', im.shape[:2])) + + if not isinstance(im, np.ndarray): + raise TypeError("ResizeImage: image type is not np.ndarray.") + if len(im.shape) != 3: + raise ValueError('ResizeImage: image is not 3-dimensional.') + im_shape = im.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if float(im_size_min) == 0: + raise ZeroDivisionError('ResizeImage: min size of image is 0') + + if isinstance(self.target_size, int): + resize_w = self.target_size + resize_h = self.target_size + else: + resize_w = self.target_size[0] + resize_h = self.target_size[1] + im_scale_x = float(resize_w) / float(im_shape[1]) + im_scale_y = float(resize_h) / float(im_shape[0]) + + im = cv2.resize( + im, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp_dict[self.interp]) + if label is not None: + label = cv2.resize( + label, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp_dict['NEAREST']) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class ResizeByLong(SegTransform): + """对图像长边resize到固定值,短边按比例进行缩放。当存在标注图像时,则同步进行处理。 + + Args: + long_size (int): resize后图像的长边大小。 + """ + + def __init__(self, long_size): + self.long_size = long_size + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + 其中,im_info新增字段为: + -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 + """ + if im_info is None: + im_info = OrderedDict() + + im_info.append(('resize', im.shape[:2])) + im = resize_long(im, self.long_size) + if label is not None: + label = resize_long(label, self.long_size, cv2.INTER_NEAREST) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class ResizeByShort(SegTransform): + """根据图像的短边调整图像大小(resize)。 + + 1. 获取图像的长边和短边长度。 + 2. 根据短边与short_size的比例,计算长边的目标长度, + 此时高、宽的resize比例为short_size/原图短边长度。 + 3. 如果max_size>0,调整resize比例: + 如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度。 + 4. 根据调整大小的比例对图像进行resize。 + + Args: + target_size (int): 短边目标长度。默认为800。 + max_size (int): 长边目标长度的最大限制。默认为1333。 + + Raises: + TypeError: 形参数据类型不满足需求。 + """ + + def __init__(self, short_size=800, max_size=1333): + self.max_size = int(max_size) + if not isinstance(short_size, int): + raise TypeError( + "Type of short_size is invalid. Must be Integer, now is {}". + format(type(short_size))) + self.short_size = short_size + if not (isinstance(self.max_size, int)): + raise TypeError("max_size: input type is invalid.") + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (numnp.ndarraypy): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + 其中,im_info更新字段为: + -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 + + Raises: + TypeError: 形参数据类型不满足需求。 + ValueError: 数据长度不匹配。 + """ + if im_info is None: + im_info = OrderedDict() + if not isinstance(im, np.ndarray): + raise TypeError("ResizeByShort: image type is not numpy.") + if len(im.shape) != 3: + raise ValueError('ResizeByShort: image is not 3-dimensional.') + im_info.append(('resize', im.shape[:2])) + im_short_size = min(im.shape[0], im.shape[1]) + im_long_size = max(im.shape[0], im.shape[1]) + scale = float(self.short_size) / im_short_size + if self.max_size > 0 and np.round(scale * + im_long_size) > self.max_size: + scale = float(self.max_size) / float(im_long_size) + resized_width = int(round(im.shape[1] * scale)) + resized_height = int(round(im.shape[0] * scale)) + im = cv2.resize( + im, (resized_width, resized_height), + interpolation=cv2.INTER_NEAREST) + if label is not None: + im = cv2.resize( + label, (resized_width, resized_height), + interpolation=cv2.INTER_NEAREST) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class ResizeRangeScaling(SegTransform): + """对图像长边随机resize到指定范围内,短边按比例进行缩放。当存在标注图像时,则同步进行处理。 + + Args: + min_value (int): 图像长边resize后的最小值。默认值400。 + max_value (int): 图像长边resize后的最大值。默认值600。 + + Raises: + ValueError: min_value大于max_value + """ + + def __init__(self, min_value=400, max_value=600): + if min_value > max_value: + raise ValueError('min_value must be less than max_value, ' + 'but they are {} and {}.'.format(min_value, + max_value)) + self.min_value = min_value + self.max_value = max_value + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if self.min_value == self.max_value: + random_size = self.max_value + else: + random_size = int( + np.random.uniform(self.min_value, self.max_value) + 0.5) + im = resize_long(im, random_size, cv2.INTER_LINEAR) + if label is not None: + label = resize_long(label, random_size, cv2.INTER_NEAREST) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class ResizeStepScaling(SegTransform): + """对图像按照某一个比例resize,这个比例以scale_step_size为步长 + 在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时,则同步进行处理。 + + Args: + min_scale_factor(float), resize最小尺度。默认值0.75。 + max_scale_factor (float), resize最大尺度。默认值1.25。 + scale_step_size (float), resize尺度范围间隔。默认值0.25。 + + Raises: + ValueError: min_scale_factor大于max_scale_factor + """ + + def __init__(self, + min_scale_factor=0.75, + max_scale_factor=1.25, + scale_step_size=0.25): + if min_scale_factor > max_scale_factor: + raise ValueError( + 'min_scale_factor must be less than max_scale_factor, ' + 'but they are {} and {}.'.format(min_scale_factor, + max_scale_factor)) + self.min_scale_factor = min_scale_factor + self.max_scale_factor = max_scale_factor + self.scale_step_size = scale_step_size + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if self.min_scale_factor == self.max_scale_factor: + scale_factor = self.min_scale_factor + + elif self.scale_step_size == 0: + scale_factor = np.random.uniform(self.min_scale_factor, + self.max_scale_factor) + + else: + num_steps = int((self.max_scale_factor - self.min_scale_factor) / + self.scale_step_size + 1) + scale_factors = np.linspace(self.min_scale_factor, + self.max_scale_factor, + num_steps).tolist() + np.random.shuffle(scale_factors) + scale_factor = scale_factors[0] + + im = cv2.resize( + im, (0, 0), + fx=scale_factor, + fy=scale_factor, + interpolation=cv2.INTER_LINEAR) + if label is not None: + label = cv2.resize( + label, (0, 0), + fx=scale_factor, + fy=scale_factor, + interpolation=cv2.INTER_NEAREST) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class Normalize(SegTransform): + """对图像进行标准化。 + 1.尺度缩放到 [0,1]。 + 2.对图像进行减均值除以标准差操作。 + + Args: + mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。 + std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。 + + Raises: + ValueError: mean或std不是list对象。std包含0。 + """ + + def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]): + self.mean = mean + self.std = std + if not (isinstance(self.mean, list) and isinstance(self.std, list)): + raise ValueError("{}: input type is invalid.".format(self)) + from functools import reduce + if reduce(lambda x, y: x * y, self.std) == 0: + raise ValueError('{}: std is invalid!'.format(self)) + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + im = normalize(im, mean, std) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class Padding(SegTransform): + """对图像或标注图像进行padding,padding方向为右和下。 + 根据提供的值对图像或标注图像进行padding操作。 + + Args: + target_size (int|list|tuple): padding后图像的大小。 + im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。 + label_padding_value (int): 标注图像padding的值。默认值为255。 + + Raises: + TypeError: target_size不是int|list|tuple。 + ValueError: target_size为list|tuple时元素个数不等于2。 + """ + + def __init__(self, + target_size, + im_padding_value=[127.5, 127.5, 127.5], + label_padding_value=255): + if isinstance(target_size, list) or isinstance(target_size, tuple): + if len(target_size) != 2: + raise ValueError( + 'when target is list or tuple, it should include 2 elements, but it is {}' + .format(target_size)) + elif not isinstance(target_size, int): + raise TypeError( + "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" + .format(type(target_size))) + self.target_size = target_size + self.im_padding_value = im_padding_value + self.label_padding_value = label_padding_value + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + 其中,im_info新增字段为: + -shape_before_padding (tuple): 保存padding之前图像的形状(h, w)。 + + Raises: + ValueError: 输入图像im或label的形状大于目标值 + """ + if im_info is None: + im_info = OrderedDict() + im_info.append(('padding', im.shape[:2])) + + im_height, im_width = im.shape[0], im.shape[1] + if isinstance(self.target_size, int): + target_height = self.target_size + target_width = self.target_size + else: + target_height = self.target_size[1] + target_width = self.target_size[0] + pad_height = target_height - im_height + pad_width = target_width - im_width + if pad_height < 0 or pad_width < 0: + raise ValueError( + 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' + .format(im_width, im_height, target_width, target_height)) + else: + im = cv2.copyMakeBorder( + im, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.im_padding_value) + if label is not None: + label = cv2.copyMakeBorder( + label, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.label_padding_value) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class RandomPaddingCrop(SegTransform): + """对图像和标注图进行随机裁剪,当所需要的裁剪尺寸大于原图时,则进行padding操作。 + + Args: + crop_size (int|list|tuple): 裁剪图像大小。默认为512。 + im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。 + label_padding_value (int): 标注图像padding的值。默认值为255。 + + Raises: + TypeError: crop_size不是int/list/tuple。 + ValueError: target_size为list/tuple时元素个数不等于2。 + """ + + def __init__(self, + crop_size=512, + im_padding_value=[127.5, 127.5, 127.5], + label_padding_value=255): + if isinstance(crop_size, list) or isinstance(crop_size, tuple): + if len(crop_size) != 2: + raise ValueError( + 'when crop_size is list or tuple, it should include 2 elements, but it is {}' + .format(crop_size)) + elif not isinstance(crop_size, int): + raise TypeError( + "Type of crop_size is invalid. Must be Integer or List or tuple, now is {}" + .format(type(crop_size))) + self.crop_size = crop_size + self.im_padding_value = im_padding_value + self.label_padding_value = label_padding_value + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if isinstance(self.crop_size, int): + crop_width = self.crop_size + crop_height = self.crop_size + else: + crop_width = self.crop_size[0] + crop_height = self.crop_size[1] + + img_height = im.shape[0] + img_width = im.shape[1] + + if img_height == crop_height and img_width == crop_width: + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + else: + pad_height = max(crop_height - img_height, 0) + pad_width = max(crop_width - img_width, 0) + if (pad_height > 0 or pad_width > 0): + im = cv2.copyMakeBorder( + im, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.im_padding_value) + if label is not None: + label = cv2.copyMakeBorder( + label, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.label_padding_value) + img_height = im.shape[0] + img_width = im.shape[1] + + if crop_height > 0 and crop_width > 0: + h_off = np.random.randint(img_height - crop_height + 1) + w_off = np.random.randint(img_width - crop_width + 1) + + im = im[h_off:(crop_height + h_off), w_off:(w_off + crop_width + ), :] + if label is not None: + label = label[h_off:(crop_height + h_off), w_off:( + w_off + crop_width)] + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class RandomBlur(SegTransform): + """以一定的概率对图像进行高斯模糊。 + + Args: + prob (float): 图像模糊概率。默认为0.1。 + """ + + def __init__(self, prob=0.1): + self.prob = prob + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if self.prob <= 0: + n = 0 + elif self.prob >= 1: + n = 1 + else: + n = int(1.0 / self.prob) + if n > 0: + if np.random.randint(0, n) == 0: + radius = np.random.randint(3, 10) + if radius % 2 != 1: + radius = radius + 1 + if radius > 9: + radius = 9 + im = cv2.GaussianBlur(im, (radius, radius), 0, 0) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + + + +class RandomScaleAspect(SegTransform): + """裁剪并resize回原始尺寸的图像和标注图像。 + 按照一定的面积比和宽高比对图像进行裁剪,并reszie回原始图像的图像,当存在标注图时,同步进行。 + + Args: + min_scale (float):裁取图像占原始图像的面积比,取值[0,1],为0时则返回原图。默认为0.5。 + aspect_ratio (float): 裁取图像的宽高比范围,非负值,为0时返回原图。默认为0.33。 + """ + + def __init__(self, min_scale=0.5, aspect_ratio=0.33): + self.min_scale = min_scale + self.aspect_ratio = aspect_ratio + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + if self.min_scale != 0 and self.aspect_ratio != 0: + img_height = im.shape[0] + img_width = im.shape[1] + for i in range(0, 10): + area = img_height * img_width + target_area = area * np.random.uniform(self.min_scale, 1.0) + aspectRatio = np.random.uniform(self.aspect_ratio, + 1.0 / self.aspect_ratio) + + dw = int(np.sqrt(target_area * 1.0 * aspectRatio)) + dh = int(np.sqrt(target_area * 1.0 / aspectRatio)) + if (np.random.randint(10) < 5): + tmp = dw + dw = dh + dh = tmp + + if (dh < img_height and dw < img_width): + h1 = np.random.randint(0, img_height - dh) + w1 = np.random.randint(0, img_width - dw) + + im = im[h1:(h1 + dh), w1:(w1 + dw), :] + label = label[h1:(h1 + dh), w1:(w1 + dw)] + im = cv2.resize( + im, (img_width, img_height), + interpolation=cv2.INTER_LINEAR) + label = cv2.resize( + label, (img_width, img_height), + interpolation=cv2.INTER_NEAREST) + break + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class RandomDistort(SegTransform): + """对图像进行随机失真。 + + 1. 对变换的操作顺序进行随机化操作。 + 2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。 + + Args: + brightness_range (float): 明亮度因子的范围。默认为0.5。 + brightness_prob (float): 随机调整明亮度的概率。默认为0.5。 + contrast_range (float): 对比度因子的范围。默认为0.5。 + contrast_prob (float): 随机调整对比度的概率。默认为0.5。 + saturation_range (float): 饱和度因子的范围。默认为0.5。 + saturation_prob (float): 随机调整饱和度的概率。默认为0.5。 + hue_range (int): 色调因子的范围。默认为18。 + hue_prob (float): 随机调整色调的概率。默认为0.5。 + """ + + def __init__(self, + brightness_range=0.5, + brightness_prob=0.5, + contrast_range=0.5, + contrast_prob=0.5, + saturation_range=0.5, + saturation_prob=0.5, + hue_range=18, + hue_prob=0.5): + self.brightness_range = brightness_range + self.brightness_prob = brightness_prob + self.contrast_range = contrast_range + self.contrast_prob = contrast_prob + self.saturation_range = saturation_range + self.saturation_prob = saturation_prob + self.hue_range = hue_range + self.hue_prob = hue_prob + + def __call__(self, im, im_info=None, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 + 存储与图像相关信息的字典和标注图像np.ndarray数据。 + """ + brightness_lower = 1 - self.brightness_range + brightness_upper = 1 + self.brightness_range + contrast_lower = 1 - self.contrast_range + contrast_upper = 1 + self.contrast_range + saturation_lower = 1 - self.saturation_range + saturation_upper = 1 + self.saturation_range + hue_lower = -self.hue_range + hue_upper = self.hue_range + ops = [brightness, contrast, saturation, hue] + random.shuffle(ops) + params_dict = { + 'brightness': { + 'brightness_lower': brightness_lower, + 'brightness_upper': brightness_upper + }, + 'contrast': { + 'contrast_lower': contrast_lower, + 'contrast_upper': contrast_upper + }, + 'saturation': { + 'saturation_lower': saturation_lower, + 'saturation_upper': saturation_upper + }, + 'hue': { + 'hue_lower': hue_lower, + 'hue_upper': hue_upper + } + } + prob_dict = { + 'brightness': self.brightness_prob, + 'contrast': self.contrast_prob, + 'saturation': self.saturation_prob, + 'hue': self.hue_prob + } + for id in range(4): + params = params_dict[ops[id].__name__] + prob = prob_dict[ops[id].__name__] + params['im'] = im + if np.random.uniform(0, 1) < prob: + im = ops[id](**params) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +class ArrangeSegmenter(SegTransform): + """获取训练/验证/预测所需的信息。 + + Args: + mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。 + + Raises: + ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内 + """ + + def __init__(self, mode): + if mode not in ['train', 'eval', 'test', 'quant']: + raise ValueError( + "mode should be defined as one of ['train', 'eval', 'test', 'quant']!" + ) + self.mode = mode + + def __call__(self, im, im_info, label=None): + """ + Args: + im (np.ndarray): 图像np.ndarray数据。 + im_info (list): 存储图像reisze或padding前的shape信息,如 + [('resize', [200, 300]), ('padding', [400, 600])]表示 + 图像在过resize前shape为(200, 300), 过padding前shape为 + (400, 600) + label (np.ndarray): 标注图像np.ndarray数据。 + + Returns: + tuple: 当mode为'train'或'eval'时,返回的tuple为(im, label),分别对应图像np.ndarray数据、存储与图像相关信息的字典; + 当mode为'test'时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;当mode为 + 'quant'时,返回的tuple为(im,),为图像np.ndarray数据。 + """ + im = permute(im, False) + if self.mode == 'train' or self.mode == 'eval': + label = label[np.newaxis, :, :] + return (im, label) + elif self.mode == 'test': + return (im, im_info) + else: + return (im, ) + + +class ComposedSegTransforms(Compose): + """ 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下 + 训练阶段: + 1. 随机对图像以0.5的概率水平翻转 + 2. 按不同的比例随机Resize原图 + 3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小 + 4. 图像归一化 + 预测阶段: + 1. 图像归一化 + + Args: + mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + train_crop_size(list): 模型训练阶段,随机从原图crop的大小 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + train_crop_size=[769, 769], + mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]): + if mode == 'train': + # 训练时的transforms,包含数据增强 + pass + else: + # 验证/预测时的transforms + transforms = [Normalize(mean=mean, std=std)] + + super(ComposedSegTransforms, self).__init__(transforms) diff --git a/deploy/raspberry/scripts/build.sh b/deploy/raspberry/scripts/build.sh new file mode 100755 index 0000000000000000000000000000000000000000..ef268a1b72abd0fee258769764840efe13447058 --- /dev/null +++ b/deploy/raspberry/scripts/build.sh @@ -0,0 +1,22 @@ +# Paddle-Lite预编译库的路径 +LITE_DIR=/path/to/Paddle-Lite/inference/lib + +# gflags预编译库的路径 +GFLAGS_DIR=$(pwd)/deps/gflags +# glog预编译库的路径 +GLOG_DIR=$(pwd)/deps/glog + +# opencv预编译库的路径, 如果使用自带预编译版本可不修改 +OPENCV_DIR=$(pwd)/deps/opencv +# 下载自带预编译版本 +exec $(pwd)/scripts/install_third-party.sh + +rm -rf build +mkdir -p build +cd build +cmake .. \ + -DOPENCV_DIR=${OPENCV_DIR} \ + -DGFLAGS_DIR=${GFLAGS_DIR} \ + -DLITE_DIR=${LITE_DIR} \ + -DCMAKE_CXX_FLAGS="-march=armv7-a" +make diff --git a/deploy/raspberry/scripts/install_third-party.sh b/deploy/raspberry/scripts/install_third-party.sh new file mode 100755 index 0000000000000000000000000000000000000000..decc380d4d2c24b99d785ddd3c1a21d217388539 --- /dev/null +++ b/deploy/raspberry/scripts/install_third-party.sh @@ -0,0 +1,32 @@ +# download third-part lib +if [ ! -d "./deps" ]; then + mkdir deps +fi +if [ ! -d "./deps/gflag" ]; then + cd deps + git clone https://github.com/gflags/gflags + cd gflags + cmake . + make -j 4 + cd .. + cd .. +fi +if [ ! -d "./deps/glog" ]; then + cd deps + git clone https://github.com/google/glog + sudo apt-get install autoconf automake libtool + cd glog + ./autogen.sh + ./configure + make -j 4 + cd .. + cd .. +fi +OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/armopencv/opencv.tar.bz2 +if [ ! -d "./deps/opencv" ]; then + cd deps + wget -c ${OPENCV_URL} + tar xvfj opencv.tar.bz2 + rm -rf opencv.tar.bz2 + cd .. +fi diff --git a/deploy/raspberry/src/paddlex.cpp b/deploy/raspberry/src/paddlex.cpp new file mode 100755 index 0000000000000000000000000000000000000000..081a1ffb7acc56a5efb22e1e92264cad1d807f4d --- /dev/null +++ b/deploy/raspberry/src/paddlex.cpp @@ -0,0 +1,256 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "include/paddlex/paddlex.h" +#include +#include + + +namespace PaddleX { + +void Model::create_predictor(const std::string& model_dir, + const std::string& cfg_file, + int thread_num) { + paddle::lite_api::MobileConfig config; + config.set_model_from_file(model_dir); + config.set_threads(thread_num); + load_config(cfg_file); + predictor_ = + paddle::lite_api::CreatePaddlePredictor( + config); +} + +bool Model::load_config(const std::string& cfg_file) { + YAML::Node config = YAML::LoadFile(cfg_file); + type = config["_Attributes"]["model_type"].as(); + name = config["Model"].as(); + bool to_rgb = true; + if (config["TransformsMode"].IsDefined()) { + std::string mode = config["TransformsMode"].as(); + if (mode == "BGR") { + to_rgb = false; + } else if (mode != "RGB") { + std::cerr << "[Init] Only 'RGB' or 'BGR' is supported for TransformsMode" + << std::endl; + return false; + } + } + // init preprocess ops + transforms_.Init(config["Transforms"], to_rgb); + // read label list + for (const auto& item : config["_Attributes"]["labels"]) { + int index = labels.size(); + labels[index] = item.as(); + } + + return true; +} + +bool Model::preprocess(cv::Mat* input_im, ImageBlob* inputs) { + if (!transforms_.Run(input_im, inputs)) { + return false; + } + return true; +} + +bool Model::predict(const cv::Mat& im, ClsResult* result) { + inputs_.clear(); + if (type == "detector") { + std::cerr << "Loading model is a 'detector', DetResult should be passed to " + "function predict()!" + << std::endl; + return false; + } else if (type == "segmenter") { + std::cerr << "Loading model is a 'segmenter', SegResult should be passed " + "to function predict()!" + << std::endl; + return false; + } + // preprocess + inputs_.input_tensor_ = std::move(predictor_->GetInput(0)); + cv::Mat im_clone = im.clone(); + if (!preprocess(&im_clone, &inputs_)) { + std::cerr << "Preprocess failed!" << std::endl; + return false; + } + // predict + predictor_->Run(); + + std::unique_ptr output_tensor( + std::move(predictor_->GetOutput(0))); + const float *outputs_data = output_tensor->mutable_data(); + + + // postprocess + auto ptr = std::max_element(outputs_data, outputs_data+sizeof(outputs_data)); + result->category_id = std::distance(outputs_data, ptr); + result->score = *ptr; + result->category = labels[result->category_id]; +} + +bool Model::predict(const cv::Mat& im, DetResult* result) { + inputs_.clear(); + result->clear(); + if (type == "classifier") { + std::cerr << "Loading model is a 'classifier', ClsResult should be passed " + "to function predict()!" << std::endl; + return false; + } else if (type == "segmenter") { + std::cerr << "Loading model is a 'segmenter', SegResult should be passed " + "to function predict()!" << std::endl; + return false; + } + inputs_.input_tensor_ = std::move(predictor_->GetInput(0)); + + cv::Mat im_clone = im.clone(); + if (!preprocess(&im_clone, &inputs_)) { + std::cerr << "Preprocess failed!" << std::endl; + return false; + } + int h = inputs_.new_im_size_[0]; + int w = inputs_.new_im_size_[1]; + if (name == "YOLOv3") { + std::unique_ptr im_size_tensor( + std::move(predictor_->GetInput(1))); + const std::vector IM_SIZE_SHAPE = {1, 2}; + im_size_tensor->Resize(IM_SIZE_SHAPE); + auto *im_size_data = im_size_tensor->mutable_data(); + memcpy(im_size_data, inputs_.ori_im_size_.data(), 1*2*sizeof(int)); + } + predictor_->Run(); + auto output_names = predictor_->GetOutputNames(); + auto output_box_tensor = predictor_->GetTensor(output_names[0]); + const float *output_box = output_box_tensor->mutable_data(); + std::vector output_box_shape = output_box_tensor->shape(); + int size = 1; + for (const auto& i : output_box_shape) { + size *= i; + } + int num_boxes = size / 6; + for (int i = 0; i < num_boxes; ++i) { + Box box; + box.category_id = static_cast(round(output_box[i * 6])); + box.category = labels[box.category_id]; + box.score = output_box[i * 6 + 1]; + float xmin = output_box[i * 6 + 2]; + float ymin = output_box[i * 6 + 3]; + float xmax = output_box[i * 6 + 4]; + float ymax = output_box[i * 6 + 5]; + float w = xmax - xmin + 1; + float h = ymax - ymin + 1; + box.coordinate = {xmin, ymin, w, h}; + result->boxes.push_back(std::move(box)); + } + return true; +} + + +bool Model::predict(const cv::Mat& im, SegResult* result) { + result->clear(); + inputs_.clear(); + if (type == "classifier") { + std::cerr << "Loading model is a 'classifier', ClsResult should be passed " + "to function predict()!" << std::endl; + return false; + } else if (type == "detector") { + std::cerr << "Loading model is a 'detector', DetResult should be passed to " + "function predict()!" << std::endl; + return false; + } + inputs_.input_tensor_ = std::move(predictor_->GetInput(0)); + cv::Mat im_clone = im.clone(); + if (!preprocess(&im_clone, &inputs_)) { + std::cerr << "Preprocess failed!" << std::endl; + return false; + } + std::cout << "Preprocess is done" << std::endl; + predictor_->Run(); + auto output_names = predictor_->GetOutputNames(); + + auto output_label_tensor = predictor_->GetTensor(output_names[0]); + const int64_t *label_data = output_label_tensor->mutable_data(); + std::vector output_label_shape = output_label_tensor->shape(); + int size = 1; + for (const auto& i : output_label_shape) { + size *= i; + result->label_map.shape.push_back(i); + } + result->label_map.data.resize(size); + memcpy(result->label_map.data.data(), label_data, size*sizeof(int64_t)); + + auto output_score_tensor = predictor_->GetTensor(output_names[1]); + const float *score_data = output_score_tensor->mutable_data(); + std::vector output_score_shape = output_score_tensor->shape(); + size = 1; + for (const auto& i : output_score_shape) { + size *= i; + result->score_map.shape.push_back(i); + } + result->score_map.data.resize(size); + memcpy(result->score_map.data.data(), score_data, size*sizeof(float)); + + + std::vector label_map(result->label_map.data.begin(), + result->label_map.data.end()); + cv::Mat mask_label(result->label_map.shape[1], + result->label_map.shape[2], + CV_8UC1, + label_map.data()); + + cv::Mat mask_score(result->score_map.shape[2], + result->score_map.shape[3], + CV_32FC1, + result->score_map.data.data()); + int idx = 1; + int len_postprocess = inputs_.im_size_before_resize_.size(); + for (std::vector::reverse_iterator iter = + inputs_.reshape_order_.rbegin(); + iter != inputs_.reshape_order_.rend(); + ++iter) { + if (*iter == "padding") { + auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx]; + inputs_.im_size_before_resize_.pop_back(); + auto padding_w = before_shape[0]; + auto padding_h = before_shape[1]; + mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w)); + mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w)); + } else if (*iter == "resize") { + auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx]; + inputs_.im_size_before_resize_.pop_back(); + auto resize_w = before_shape[0]; + auto resize_h = before_shape[1]; + cv::resize(mask_label, + mask_label, + cv::Size(resize_h, resize_w), + 0, + 0, + cv::INTER_NEAREST); + cv::resize(mask_score, + mask_score, + cv::Size(resize_h, resize_w), + 0, + 0, + cv::INTER_LINEAR); + } + ++idx; + } + result->label_map.data.assign(mask_label.begin(), + mask_label.end()); + result->label_map.shape = {mask_label.rows, mask_label.cols}; + result->score_map.data.assign(mask_score.begin(), + mask_score.end()); + result->score_map.shape = {mask_score.rows, mask_score.cols}; + return true; +} +} // namespace PaddleX diff --git a/deploy/raspberry/src/transforms.cpp b/deploy/raspberry/src/transforms.cpp new file mode 100755 index 0000000000000000000000000000000000000000..026c0e02e155e32224a79ef66ab203fd0afa40b3 --- /dev/null +++ b/deploy/raspberry/src/transforms.cpp @@ -0,0 +1,239 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "include/paddlex/transforms.h" + +#include + +#include +#include +#include + + + +namespace PaddleX { + +std::map interpolations = {{"LINEAR", cv::INTER_LINEAR}, + {"NEAREST", cv::INTER_NEAREST}, + {"AREA", cv::INTER_AREA}, + {"CUBIC", cv::INTER_CUBIC}, + {"LANCZOS4", cv::INTER_LANCZOS4}}; + +bool Normalize::Run(cv::Mat* im, ImageBlob* data) { + for (int h = 0; h < im->rows; h++) { + for (int w = 0; w < im->cols; w++) { + im->at(h, w)[0] = + (im->at(h, w)[0] / 255.0 - mean_[0]) / std_[0]; + im->at(h, w)[1] = + (im->at(h, w)[1] / 255.0 - mean_[1]) / std_[1]; + im->at(h, w)[2] = + (im->at(h, w)[2] / 255.0 - mean_[2]) / std_[2]; + } + } + return true; +} + + + +float ResizeByShort::GenerateScale(const cv::Mat& im) { + int origin_w = im.cols; + int origin_h = im.rows; + int im_size_max = std::max(origin_w, origin_h); + int im_size_min = std::min(origin_w, origin_h); + float scale = + static_cast(short_size_) / static_cast(im_size_min); + if (max_size_ > 0) { + if (round(scale * im_size_max) > max_size_) { + scale = static_cast(max_size_) / static_cast(im_size_max); + } + } + return scale; +} + +bool ResizeByShort::Run(cv::Mat* im, ImageBlob* data) { + data->im_size_before_resize_.push_back({im->rows, im->cols}); + data->reshape_order_.push_back("resize"); + + float scale = GenerateScale(*im); + int width = static_cast(round(scale * im->cols)); + int height = static_cast(round(scale * im->rows)); + cv::resize(*im, *im, cv::Size(width, height), 0, 0, cv::INTER_LINEAR); + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; + data->scale = scale; + return true; +} + +bool CenterCrop::Run(cv::Mat* im, ImageBlob* data) { + int height = static_cast(im->rows); + int width = static_cast(im->cols); + if (height < height_ || width < width_) { + std::cerr << "[CenterCrop] Image size less than crop size" << std::endl; + return false; + } + int offset_x = static_cast((width - width_) / 2); + int offset_y = static_cast((height - height_) / 2); + cv::Rect crop_roi(offset_x, offset_y, width_, height_); + *im = (*im)(crop_roi); + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; + return true; +} + + +bool Padding::Run(cv::Mat* im, ImageBlob* data) { + data->im_size_before_resize_.push_back({im->rows, im->cols}); + data->reshape_order_.push_back("padding"); + + int padding_w = 0; + int padding_h = 0; + if (width_ > 1 & height_ > 1) { + padding_w = width_ - im->cols; + padding_h = height_ - im->rows; + } else if (coarsest_stride_ >= 1) { + int h = im->rows; + int w = im->cols; + padding_h = + ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows; + padding_w = + ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols; + } + + if (padding_h < 0 || padding_w < 0) { + std::cerr << "[Padding] Computed padding_h=" << padding_h + << ", padding_w=" << padding_w + << ", but they should be greater than 0." << std::endl; + return false; + } + cv::Scalar value = cv::Scalar(im_value_[0], im_value_[1], im_value_[2]); + cv::copyMakeBorder( + *im, *im, 0, padding_h, 0, padding_w, cv::BORDER_CONSTANT, value); + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; + return true; +} + +bool ResizeByLong::Run(cv::Mat* im, ImageBlob* data) { + if (long_size_ <= 0) { + std::cerr << "[ResizeByLong] long_size should be greater than 0" + << std::endl; + return false; + } + data->im_size_before_resize_.push_back({im->rows, im->cols}); + data->reshape_order_.push_back("resize"); + int origin_w = im->cols; + int origin_h = im->rows; + + int im_size_max = std::max(origin_w, origin_h); + float scale = + static_cast(long_size_) / static_cast(im_size_max); + cv::resize(*im, *im, cv::Size(), scale, scale, cv::INTER_NEAREST); + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; + data->scale = scale; + return true; +} + +bool Resize::Run(cv::Mat* im, ImageBlob* data) { + if (width_ <= 0 || height_ <= 0) { + std::cerr << "[Resize] width and height should be greater than 0" + << std::endl; + return false; + } + if (interpolations.count(interp_) <= 0) { + std::cerr << "[Resize] Invalid interpolation method: '" << interp_ << "'" + << std::endl; + return false; + } + data->im_size_before_resize_.push_back({im->rows, im->cols}); + data->reshape_order_.push_back("resize"); + + cv::resize( + *im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]); + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; + return true; +} + +void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) { + transforms_.clear(); + to_rgb_ = to_rgb; + for (const auto& item : transforms_node) { + std::string name = item.begin()->first.as(); + std::cout << "trans name: " << name << std::endl; + std::shared_ptr transform = CreateTransform(name); + transform->Init(item.begin()->second); + transforms_.push_back(transform); + } +} + +std::shared_ptr Transforms::CreateTransform( + const std::string& transform_name) { + if (transform_name == "Normalize") { + return std::make_shared(); + } else if (transform_name == "ResizeByShort") { + return std::make_shared(); + } else if (transform_name == "CenterCrop") { + return std::make_shared(); + } else if (transform_name == "Resize") { + return std::make_shared(); + } else if (transform_name == "Padding") { + return std::make_shared(); + } else if (transform_name == "ResizeByLong") { + return std::make_shared(); + } else { + std::cerr << "There's unexpected transform(name='" << transform_name + << "')." << std::endl; + exit(-1); + } +} + +bool Transforms::Run(cv::Mat* im, ImageBlob* data) { + // preprocess by order + if (to_rgb_) { + cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB); + } + (*im).convertTo(*im, CV_32FC3); + data->ori_im_size_[0] = im->rows; + data->ori_im_size_[1] = im->cols; + data->new_im_size_[0] = im->rows; + data->new_im_size_[1] = im->cols; + + for (int i = 0; i < transforms_.size(); ++i) { + if (!transforms_[i]->Run(im, data)) { + std::cerr << "Apply transforms to image failed!" << std::endl; + return false; + } + } + + // image format NHWC to NCHW + // img data save to ImageBlob + int height = im->rows; + int width = im->cols; + int channels = im->channels(); + const std::vector INPUT_SHAPE = {1, channels, height, width}; + data->input_tensor_->Resize(INPUT_SHAPE); + auto *input_data = data->input_tensor_->mutable_data(); + for (size_t c = 0; c < channels; c++) { + for (size_t h = 0; h < height; h++) { + for (size_t w = 0; w < width; w++) { + input_data[c * width * height + h * width + w] = + im->at(h, w)[c]; + } + } + } + return true; +} +} // namespace PaddleX diff --git a/deploy/raspberry/src/visualize.cpp b/deploy/raspberry/src/visualize.cpp new file mode 100755 index 0000000000000000000000000000000000000000..df2cd768495ea8638acf020ad53437bd827cb95e --- /dev/null +++ b/deploy/raspberry/src/visualize.cpp @@ -0,0 +1,148 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "include/paddlex/visualize.h" + +namespace PaddleX { +std::vector GenerateColorMap(int num_class) { + auto colormap = std::vector(3 * num_class, 0); + for (int i = 0; i < num_class; ++i) { + int j = 0; + int lab = i; + while (lab) { + colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j)); + colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)); + colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)); + ++j; + lab >>= 3; + } + } + return colormap; +} + +cv::Mat Visualize(const cv::Mat& img, + const DetResult& result, + const std::map& labels, + const std::vector& colormap, + float threshold) { + cv::Mat vis_img = img.clone(); + auto boxes = result.boxes; + for (int i = 0; i < boxes.size(); ++i) { + if (boxes[i].score < threshold) { + continue; + } + cv::Rect roi = cv::Rect(boxes[i].coordinate[0], + boxes[i].coordinate[1], + boxes[i].coordinate[2], + boxes[i].coordinate[3]); + + // 生成预测框和标题 + std::string text = boxes[i].category; + int c1 = colormap[3 * boxes[i].category_id + 0]; + int c2 = colormap[3 * boxes[i].category_id + 1]; + int c3 = colormap[3 * boxes[i].category_id + 2]; + cv::Scalar roi_color = cv::Scalar(c1, c2, c3); + text += std::to_string(static_cast(boxes[i].score * 100)) + "%"; + int font_face = cv::FONT_HERSHEY_SIMPLEX; + double font_scale = 0.5f; + float thickness = 0.5; + cv::Size text_size = + cv::getTextSize(text, font_face, font_scale, thickness, nullptr); + cv::Point origin; + origin.x = roi.x; + origin.y = roi.y; + + // 生成预测框标题的背景 + cv::Rect text_back = cv::Rect(boxes[i].coordinate[0], + boxes[i].coordinate[1] - text_size.height, + text_size.width, + text_size.height); + + // 绘图和文字 + cv::rectangle(vis_img, roi, roi_color, 2); + cv::rectangle(vis_img, text_back, roi_color, -1); + cv::putText(vis_img, + text, + origin, + font_face, + font_scale, + cv::Scalar(255, 255, 255), + thickness); + + // 生成实例分割mask + if (boxes[i].mask.data.size() == 0) { + continue; + } + cv::Mat bin_mask(result.mask_resolution, + result.mask_resolution, + CV_32FC1, + boxes[i].mask.data.data()); + cv::resize(bin_mask, + bin_mask, + cv::Size(boxes[i].mask.shape[0], boxes[i].mask.shape[1])); + cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY); + cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1); + bin_mask.copyTo(full_mask(roi)); + cv::Mat mask_ch[3]; + mask_ch[0] = full_mask * c1; + mask_ch[1] = full_mask * c2; + mask_ch[2] = full_mask * c3; + cv::Mat mask; + cv::merge(mask_ch, 3, mask); + cv::addWeighted(vis_img, 1, mask, 0.5, 0, vis_img); + } + return vis_img; +} + +cv::Mat Visualize(const cv::Mat& img, + const SegResult& result, + const std::map& labels, + const std::vector& colormap) { + std::vector label_map(result.label_map.data.begin(), + result.label_map.data.end()); + cv::Mat mask(result.label_map.shape[0], + result.label_map.shape[1], + CV_8UC1, + label_map.data()); + cv::Mat color_mask = cv::Mat::zeros( + result.label_map.shape[0], result.label_map.shape[1], CV_8UC3); + int rows = img.rows; + int cols = img.cols; + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + int category_id = static_cast(mask.at(i, j)); + color_mask.at(i, j)[0] = colormap[3 * category_id + 0]; + color_mask.at(i, j)[1] = colormap[3 * category_id + 1]; + color_mask.at(i, j)[2] = colormap[3 * category_id + 2]; + } + } + return color_mask; +} + +std::string generate_save_path(const std::string& save_dir, + const std::string& file_path) { + if (access(save_dir.c_str(), 0) < 0) { +#ifdef _WIN32 + mkdir(save_dir.c_str()); +#else + if (mkdir(save_dir.c_str(), S_IRWXU) < 0) { + std::cerr << "Fail to create " << save_dir << "directory." << std::endl; + } +#endif + } + int pos = file_path.find_last_of(OS_PATH_SEP); + std::string image_name(file_path.substr(pos + 1)); + return save_dir + OS_PATH_SEP + image_name; +} +} // namespace PaddleX diff --git a/docs/apis/deploy.md b/docs/apis/deploy.md index 3f924ebee2893cfca77cad459f4cb9c7a6b2acb1..5b906239b9fb45f92e9bca1ba450c028817885ff 100644 --- a/docs/apis/deploy.md +++ b/docs/apis/deploy.md @@ -45,7 +45,7 @@ predict(image, topk=1) ### batch_predict 接口 ``` -batch_predict(image_list, topk=1, thread_num=2) +batch_predict(image_list, topk=1) ``` 批量图片预测接口。 @@ -53,4 +53,3 @@ batch_predict(image_list, topk=1, thread_num=2) > > > * **image_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径或numpy数组(HWC排列,BGR格式)。 > > * **topk** (int): 图像分类时使用的参数,表示预测前topk个可能的分类。 -> > * **thread_num** (int): 并发执行各图像预处理时的线程数。 diff --git a/docs/apis/models/classification.md b/docs/apis/models/classification.md index 793a889568f8cb597fdea650310acada6512a1e9..96f76b8b5d49d800ad12439eefdb46530fc4c834 100755 --- a/docs/apis/models/classification.md +++ b/docs/apis/models/classification.md @@ -62,7 +62,7 @@ evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False) ### predict ```python -predict(self, img_file, transforms=None, topk=5) +predict(self, img_file, transforms=None, topk=1) ``` > 分类模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`ResNet50.test_transforms`和`ResNet50.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。 @@ -81,7 +81,7 @@ predict(self, img_file, transforms=None, topk=5) ### batch_predict ```python -batch_predict(self, img_file_list, transforms=None, topk=5, thread_num=2) +batch_predict(self, img_file_list, transforms=None, topk=1) ``` > 分类模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`ResNet50.test_transforms`和`ResNet50.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义test_transforms传入给`batch_predict`接口。 @@ -91,7 +91,6 @@ batch_predict(self, img_file_list, transforms=None, topk=5, thread_num=2) > > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径或numpy数组(HWC排列,BGR格式)。 > > - **transforms** (paddlex.cls.transforms): 数据预处理操作。 > > - **topk** (int): 预测时前k个最大值。 -> > - **thread_num** (int): 并发执行各图像预处理时的线程数。 > **返回值** > diff --git a/docs/apis/models/detection.md b/docs/apis/models/detection.md index b3873ce5eba6516c4296d13d2d99510d5e3e6e45..3cc911377bc45d3831faacfb160a760bcc1cd8e2 100755 --- a/docs/apis/models/detection.md +++ b/docs/apis/models/detection.md @@ -108,7 +108,7 @@ predict(self, img_file, transforms=None) ### batch_predict ```python -batch_predict(self, img_file_list, transforms=None, thread_num=2) +batch_predict(self, img_file_list, transforms=None) ``` > PPYOLO模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`YOLOv3.test_transforms`和`YOLOv3.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义`test_transforms`传入给`batch_predict`接口 @@ -117,7 +117,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2) > > > - **img_file_list** (str|np.ndarray): 对列表(或元组)中的图像同时进行预测,列表中的元素是预测图像路径或numpy数组(HWC排列,BGR格式)。 > > - **transforms** (paddlex.det.transforms): 数据预处理操作。 -> > - **thread_num** (int): 并发执行各图像预处理时的线程数。 > > **返回值** > @@ -222,7 +221,7 @@ predict(self, img_file, transforms=None) ### batch_predict ```python -batch_predict(self, img_file_list, transforms=None, thread_num=2) +batch_predict(self, img_file_list, transforms=None) ``` > YOLOv3模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`YOLOv3.test_transforms`和`YOLOv3.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义`test_transforms`传入给`batch_predict`接口 @@ -231,7 +230,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2) > > > - **img_file_list** (str|np.ndarray): 对列表(或元组)中的图像同时进行预测,列表中的元素是预测图像路径或numpy数组(HWC排列,BGR格式)。 > > - **transforms** (paddlex.det.transforms): 数据预处理操作。 -> > - **thread_num** (int): 并发执行各图像预处理时的线程数。 > > **返回值** > @@ -327,7 +325,7 @@ predict(self, img_file, transforms=None) ### batch_predict ```python -batch_predict(self, img_file_list, transforms=None, thread_num=2) +batch_predict(self, img_file_list, transforms=None) ``` > FasterRCNN模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`FasterRCNN.test_transforms`和`FasterRCNN.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义test_transforms传入给`batch_predict`接口。 @@ -336,7 +334,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2) > > > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素是预测图像路径或numpy数组(HWC排列,BGR格式)。 > > - **transforms** (paddlex.det.transforms): 数据预处理操作。 -> > - **thread_num** (int): 并发执行各图像预处理时的线程数。 > > **返回值** > diff --git a/docs/apis/models/instance_segmentation.md b/docs/apis/models/instance_segmentation.md index 494cde32a1888897b5771e6d94d8691d6ff79ce8..3c86096cccef3d66dcad15a2a496023102750386 100755 --- a/docs/apis/models/instance_segmentation.md +++ b/docs/apis/models/instance_segmentation.md @@ -88,7 +88,7 @@ predict(self, img_file, transforms=None) #### batch_predict ```python -batch_predict(self, img_file_list, transforms=None, thread_num=2) +batch_predict(self, img_file_list, transforms=None) ``` > MaskRCNN模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`FasterRCNN.test_transforms`和`FasterRCNN.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义test_transforms传入给`batch_predict`接口。 @@ -97,7 +97,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2) > > > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是预测图像路径或numpy数组(HWC排列,BGR格式)。 > > - **transforms** (paddlex.det.transforms): 数据预处理操作。 -> > - **thread_num** (int): 并发执行各图像预处理时的线程数。 > > **返回值** > diff --git a/docs/apis/models/semantic_segmentation.md b/docs/apis/models/semantic_segmentation.md index 584167618f0a0fcdc5da6ac8b271eb99e9804b26..ab62bd57fa25f4ed2bc35551996688de0cedcacc 100755 --- a/docs/apis/models/semantic_segmentation.md +++ b/docs/apis/models/semantic_segmentation.md @@ -95,7 +95,7 @@ predict(self, img_file, transforms=None): ### batch_predict ``` -batch_predict(self, img_file_list, transforms=None, thread_num=2): +batch_predict(self, img_file_list, transforms=None): ``` > DeepLabv3p模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`DeepLabv3p.test_transforms`和`DeepLabv3p.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`batch_predict`接口时,用户需要再重新定义test_transforms传入给`batch_predict`接口。 @@ -104,7 +104,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2): > > > > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是预测图像路径或numpy数组(HWC排列,BGR格式)。 > > - **transforms** (paddlex.seg.transforms): 数据预处理操作。 -> > - **thread_num** (int): 并发执行各图像预处理时的线程数。 > **返回值** > > diff --git a/docs/deploy/openvino/export_openvino_model.md b/docs/deploy/openvino/export_openvino_model.md new file mode 100644 index 0000000000000000000000000000000000000000..1d3ddc44378bba5ab061862914f94a22b8721016 --- /dev/null +++ b/docs/deploy/openvino/export_openvino_model.md @@ -0,0 +1,38 @@ +# OpenVINO模型转换 +将Paddle模型转换为OpenVINO的Inference Engine + +## 环境依赖 + +* ONNX 1.5.0+ +* PaddleX 1.0+ +* OpenVINO 2020.4 + +**说明**:PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html) , OpenVINO安装请参考[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html),ONNX请安装1.5.0以上版本否则会出现转模型错误。 + +请确保系统已经安装好上述基本软件,**下面所有示例以工作目录 `/root/projects/`演示**。 + +## 导出inference模型 +paddle模型转openvino之前需要先把paddle模型导出为inference格式模型,导出的模型将包括__model__、__params__和model.yml三个文件名,导出命令如下 +``` +paddlex --export_inference --model_dir=/path/to/paddle_model --save_dir=./inference_model --fixed_input_shape=[w,h] +``` + +## 导出OpenVINO模型 + +``` +cd /root/projects/python + +python convertor.py --model_dir /path/to/inference_model --save_dir /path/to/openvino_model --fixed_input_shape [w,h] +``` +**转换成功后会在save_dir下出现后缀名为.xml、.bin、.mapping三个文件** +转换参数说明如下: + +| 参数 | 说明 | +| ---- | ---- | +| --model_dir | Paddle模型路径,请确保__model__, \_\_params__model.yml在同一个目录| +| --save_dir | OpenVINO模型保存路径 | +| --fixed_input_shape | 模型输入的[W,H] | +| --data type(option) | FP32、FP16,默认为FP32,VPU下的IR需要为FP16 | +**注意**: +- 由于OpenVINO不支持ONNX的resize-11 OP的原因,目前还不支持Paddle的分割模型 +- YOLOv3在通过OpenVINO部署时,由于OpenVINO对ONNX OP的支持限制,我们在将YOLOv3的Paddle模型导出时,对最后一层multiclass_nms进行了特殊处理,导出的ONNX模型,最终输出的Box结果包括背景类别(而Paddle模型不包含),此处在OpenVINO的部署代码中,我们通过后处理过滤了背景类别。 diff --git a/docs/deploy/openvino/index.rst b/docs/deploy/openvino/index.rst index 3f3ee83919b3c7e7e7c2d03cbd3d451803042a14..bf8213a456f418299ee7c631353ed362ddac9c37 100755 --- a/docs/deploy/openvino/index.rst +++ b/docs/deploy/openvino/index.rst @@ -6,6 +6,8 @@ OpenVINO部署 :maxdepth: 2 :caption: 文档目录: + introduction.md windows.md linux.md - intel_movidius.md + python.md + export_openvino_model.md diff --git a/docs/deploy/openvino/intel_movidius.md b/docs/deploy/openvino/intel_movidius.md deleted file mode 100644 index ec514562b7738abe86654e08af044b22ec450f6c..0000000000000000000000000000000000000000 --- a/docs/deploy/openvino/intel_movidius.md +++ /dev/null @@ -1 +0,0 @@ -# Intel计算棒 diff --git a/docs/deploy/openvino/introduction.md b/docs/deploy/openvino/introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..06af9ac6ef37d3cc0e908c33455b413b41bdb20d --- /dev/null +++ b/docs/deploy/openvino/introduction.md @@ -0,0 +1,32 @@ +# OpenVINO部署简介 +PaddleX支持将训练好的Paddle模型通过OpenVINO实现模型的预测加速,OpenVINO详细资料与安装流程请参考[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html) + +## 部署支持情况 +下表提供了PaddleX在不同环境下对使用OpenVINO加速的支持情况 + +|硬件平台|Linux|Windows|Raspbian OS|c++|python |分类|检测|分割| +| ----| ---- | ---- | ----| ---- | ---- |---- | ---- |---- | +|CPU|支持|支持|不支持|支持|支持|支持|支持|不支持| +|VPU|支持|支持|支持|支持|支持|支持|不支持|不支持| + + +**注意**:其中Raspbian OS为树莓派操作系统。检测模型仅支持YOLOV3,由于OpenVINO不支持ONNX的resize-11 OP的原因,目前还不支持Paddle的分割模型 + +## 部署流程 +**PaddleX到OpenVINO的部署流程可以分为如下两步**: + + * **模型转换**:将Paddle的模型转换为OpenVINO的Inference Engine + * **预测部署**:使用Inference Engine进行预测 + +## 模型转换 +**模型转换请参考文档[模型转换](./export_openvino_model.md)** +**说明**:由于不同软硬件平台下OpenVINO模型转换方法一致,故如何转换模型后续文档中不再赘述。 + +## 预测部署 +由于不同软硬下部署OpenVINO实现预测的方式不完全一致,具体请参考: +**[Linux](./linux.md)**:介绍了PaddleX在操作系统为Linux或者Raspbian OS,编程语言为C++,硬件平台为 +CPU或者VPU的情况下使用OpenVINO进行预测加速 + +**[Windows](./windows.md)**:介绍了PaddleX在操作系统为Window,编程语言为C++,硬件平台为CPU或者VPU的情况下使用OpenVINO进行预测加速 + +**[Python](./python.md)**:介绍了PaddleX在python下使用OpenVINO进行预测加速 \ No newline at end of file diff --git a/docs/deploy/openvino/linux.md b/docs/deploy/openvino/linux.md index 4825da6366435f098145e42ca7b88fc6d4f84255..43d414a896ceaff5469e929a598bb060ee3ccd86 100644 --- a/docs/deploy/openvino/linux.md +++ b/docs/deploy/openvino/linux.md @@ -1 +1,145 @@ # Linux平台 + + +## 前置条件 + +* OS: Ubuntu、Raspbian OS +* GCC* 5.4.0 +* CMake 3.0+ +* PaddleX 1.0+ +* OpenVINO 2020.4 +* 硬件平台:CPU、VPU + +**说明**:PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html) , OpenVINO安装请根据相应的系统参考[OpenVINO-Linux](https://docs.openvinotoolkit.org/latest/_docs_install_guides_installing_openvino_linux.html)或者[OpenVINO-Raspbian](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_raspbian.html) + +请确保系统已经安装好上述基本软件,并配置好相应环境,**下面所有示例以工作目录 `/root/projects/`演示**。 + + + +## 预测部署 + +文档提供了c++下预测部署的方法,如果需要在python下预测部署请参考[python预测部署](./python.md) + +### Step1 下载PaddleX预测代码 +``` +mkdir -p /root/projects +cd /root/projects +git clone https://github.com/PaddlePaddle/PaddleX.git +``` +**说明**:其中C++预测代码在PaddleX/deploy/openvino 目录,该目录不依赖任何PaddleX下其他目录。 + +### Step2 软件依赖 +提供了依赖软件预编包或者一键编译,用户不需要单独下载或编译第三方依赖软件。若需要自行编译第三方依赖软件请参考: + +- gflags:编译请参考 [编译文档](https://gflags.github.io/gflags/#download) + +- glog:编译请参考[编译文档](https://github.com/google/glog) + +- opencv: 编译请参考 +[编译文档](https://docs.opencv.org/master/d7/d9f/tutorial_linux_install.html) + + + +### Step3: 编译 +编译`cmake`的命令在`scripts/build.sh`中,若在树莓派(Raspbian OS)上编译请修改ARCH参数x86为armv7,若自行编译第三方依赖软件请根据Step1中编译软件的实际情况修改主要参数,其主要内容说明如下: +``` +# openvino预编译库的路径 +OPENVINO_DIR=$INTEL_OPENVINO_DIR/inference_engine +# gflags预编译库的路径 +GFLAGS_DIR=$(pwd)/deps/gflags +# glog预编译库的路径 +GLOG_DIR=$(pwd)/deps/glog +# ngraph lib预编译库的路径 +NGRAPH_LIB=$INTEL_OPENVINO_DIR/deployment_tools/ngraph/lib +# opencv预编译库的路径 +OPENCV_DIR=$(pwd)/deps/opencv/ +#cpu架构(x86或armv7) +ARCH=x86 +``` +执行`build`脚本: + ```shell + sh ./scripts/build.sh + ``` + +### Step4: 预测 + +编译成功后,分类任务的预测可执行程序为`classifier`,检测任务的预测可执行程序为`detector`,其主要命令参数说明如下: + +| 参数 | 说明 | +| ---- | ---- | +| --model_dir | 模型转换生成的.xml文件路径,请保证模型转换生成的三个文件在同一路径下| +| --image | 要预测的图片文件路径 | +| --image_list | 按行存储图片路径的.txt文件 | +| --device | 运行的平台,可选项{"CPU","MYRIAD"},默认值为"CPU",如在VPU上请使用"MYRIAD"| +| --cfg_file | PaddleX model 的.yml配置文件 | +| --save_dir | 可视化结果图片保存地址,仅适用于检测任务,默认值为" "既不保存可视化结果 | + +### 样例 +`样例一`: +linux系统在CPU下做单张图片的分类任务预测 +测试图片 `/path/to/test_img.jpeg` + +```shell +./build/classifier --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml +``` + + +`样例二`: +linux系统在CPU下做多张图片的检测任务预测,并保存预测可视化结果 +预测的多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下: +``` +/path/to/images/test_img1.jpeg +/path/to/images/test_img2.jpeg +... +/path/to/images/test_imgn.jpeg +``` + +```shell +./build/detector --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --save_dir ./output +``` + +`样例三`: +树莓派(Raspbian OS)在VPU下做单张图片分类任务预测 +测试图片 `/path/to/test_img.jpeg` + +```shell +./build/classifier --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml --device=MYRIAD +``` + +## 性能测试 +`测试一`: +在服务器CPU下测试了OpenVINO对PaddleX部署的加速性能: +- CPU:Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz +- OpenVINO: 2020.4 +- PaddleX:采用Paddle预测库(1.8),打开mkldnn加速,打开多线程。 +- 模型来自PaddleX tutorials,Batch Size均为1,耗时单位为ms/image,只计算模型运行时间,不包括数据的预处理和后处理,20张图片warmup,100张图片测试性能。 + +|模型| PaddleX| OpenVINO | 图片输入大小| +|---|---|---|---| +|resnet-50 | 20.56 | 16.12 | 224*224 | +|mobilenet-V2 | 5.16 | 2.31 |224*224| +|yolov3-mobilnetv1 |76.63| 46.26|608*608 | + +`测试二`: +在PC机上插入VPU架构的神经计算棒(NCS2),通过Openvino加速。 +- CPU:Intel(R) Core(TM) i5-4300U 1.90GHz +- VPU:Movidius Neural Compute Stick2 +- OpenVINO: 2020.4 +- 模型来自PaddleX tutorials,Batch Size均为1,耗时单位为ms/image,只计算模型运行时间,不包括数据的预处理和后处理,20张图片warmup,100张图片测试性能。 + +|模型|OpenVINO|输入图片| +|---|---|---| +|mobilenetV2|24.00|224*224| +|resnet50_vd_ssld|58.53|224*224| + +`测试三`: +在树莓派3B上插入VPU架构的神经计算棒(NCS2),通过Openvino加速。 +- CPU :ARM Cortex-A72 1.2GHz 64bit +- VPU:Movidius Neural Compute Stick2 +- OpenVINO 2020.4 +- 模型来自paddleX tutorials,Batch Size均为1,耗时单位为ms/image,只计算模型运行时间,不包括数据的预处理和后处理,20张图片warmup,100张图片测试性能。 + +|模型|OpenVINO|输入图片大小| +|---|---|---| +|mobilenetV2|43.15|224*224| +|resnet50|82.66|224*224| diff --git a/docs/deploy/openvino/python.md b/docs/deploy/openvino/python.md new file mode 100644 index 0000000000000000000000000000000000000000..e750d11e932f1c40aad59fc1463d47c08b63fa5f --- /dev/null +++ b/docs/deploy/openvino/python.md @@ -0,0 +1,49 @@ +# Python预测部署 +文档说明了在python下基于OpenVINO的预测部署,部署前需要先将paddle模型转换为OpenVINO的Inference Engine,请参考[模型转换](docs/deploy/openvino/export_openvino_model.md)。目前CPU硬件上支持PadlleX的分类、检测、分割模型;VPU上支持PaddleX的分类模型。 + +## 前置条件 +* Python 3.6+ +* OpenVINO 2020.4 + +**说明**:OpenVINO安装请参考[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html) + + +请确保系统已经安装好上述基本软件,**下面所有示例以工作目录 `/root/projects/`演示**。 + +## 预测部署 +运行/root/projects/PaddleX/deploy/openvino/python目录下demo.py文件可以进行预测,其命令参数说明如下: + +| 参数 | 说明 | +| ---- | ---- | +| --model_dir | 模型转换生成的.xml文件路径,请保证模型转换生成的三个文件在同一路径下| +| --img | 要预测的图片文件路径 | +| --image_list | 按行存储图片路径的.txt文件 | +| --device | 运行的平台, 默认值为"CPU" | +| --cfg_file | PaddleX model 的.yml配置文件 | + +### 样例 +`样例一`: +测试图片 `/path/to/test_img.jpeg` + +``` +cd /root/projects/python + +python demo.py --model_dir /path/to/openvino_model --img /path/to/test_img.jpeg --cfg_file /path/to/PadlleX_model.yml +``` + +样例二`: + +预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下: + +``` +/path/to/images/test_img1.jpeg +/path/to/images/test_img2.jpeg +... +/path/to/images/test_imgn.jpeg +``` + +``` +cd /root/projects/python + +python demo.py --model_dir /path/to/models/openvino_model --image_list /root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml +``` diff --git a/docs/deploy/openvino/windows.md b/docs/deploy/openvino/windows.md index 30cdf17ded910bfda9286e5f700525c9b1bb777b..a65312561f9628e2d99e16241e79b0a8acd0e83e 100644 --- a/docs/deploy/openvino/windows.md +++ b/docs/deploy/openvino/windows.md @@ -1 +1,116 @@ # Windows平台 + +## 说明 +Windows 平台下,我们使用`Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目,但是直到`2019`才提供了稳定和完全的支持,所以如果你想使用CMake管理项目编译构建,我们推荐你使用`Visual Studio 2019`环境下构建。 + +## 前置条件 +* Visual Studio 2019 +* OpenVINO 2020.4 +* CMake 3.0+ + +**说明**:PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html) , OpenVINO安装请参考[OpenVINO-Windows](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_windows.html) + +**注意**:安装完OpenVINO后需要手动添加OpenVINO目录到系统环境变量,否则在运行程序时会出现找不到dll的情况。以安装OpenVINO时不改变OpenVINO安装目录情况下为示例,流程如下 +- 我的电脑->属性->高级系统设置->环境变量 + - 在系统变量中找到Path(如没有,自行创建),并双击编辑 + - 新建,分别将OpenVINO以下路径填入并保存: + `C:\Program File (x86)\IntelSWTools\openvino\inference_engine\bin\intel64\Release` + `C:\Program File (x86)\IntelSWTools\openvino\inference_engine\external\tbb\bin` + `C:\Program File (x86)\IntelSWTools\openvino\deployment_tools\ngraph\lib` + +请确保系统已经安装好上述基本软件,并配置好相应环境,**下面所有示例以工作目录为 `D:\projects`演示。** + +## 预测部署 + +文档提供了c++下预测部署的方法,如果需要在python下预测部署请参考[python预测部署](./python.md) + +### Step1: 下载PaddleX预测代码 + +```shell +d: +mkdir projects +cd projects +git clone https://github.com/PaddlePaddle/PaddleX.git +``` + +**说明**:其中`C++`预测代码在`PaddleX\deploy\openvino` 目录,该目录不依赖任何`PaddleX`下其他目录。 + +### Step2 软件依赖 +提供了依赖软件预编译库: +- [gflas-glog](https://bj.bcebos.com/paddlex/deploy/windows/third-parts.zip) +- [opencv](https://bj.bcebos.com/paddleseg/deploy/opencv-3.4.6-vc14_vc15.exe) +请下载上面两个连接的预编译库。若需要自行下载请参考: +- gflags:[下载地址](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/gflags) +- glog:[编译文档](https://github.com/google/glog) +- opencv:[下载地址](https://opencv.org/releases/) +下载完opencv后需要配置环境变量,如下流程所示 + - 我的电脑->属性->高级系统设置->环境变量 + - 在系统变量中找到Path(如没有,自行创建),并双击编辑 + - 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin` + +### Step3: 使用Visual Studio 2019直接编译CMake +1. 打开Visual Studio 2019 Community,点击`继续但无需代码` +2. 点击: `文件`->`打开`->`CMake` 选择C++预测代码所在路径(例如`D:\projects\PaddleX\deploy\openvino`),并打开`CMakeList.txt`: +3. 点击:`项目`->`CMake设置` +4. 点击`浏览`,分别设置编译选项指定`OpenVINO`、`Gflags`、`GLOG`、`NGRAPH`、`OPENCV`的路径 + +| 参数名 | 含义 | +| ---- | ---- | +| OPENCV_DIR | opencv库路径 | +| OPENVINO_DIR | OpenVINO推理库路径,在OpenVINO安装目录下的deployment/inference_engine目录,若未修改OpenVINO默认安装目录可以不用修改 | +| NGRAPH_LIB | OpenVINO的ngraph库路径,在OpenVINO安装目录下的deployment/ngraph/lib目录,若未修改OpenVINO默认安装目录可以不用修改 | +| GFLAGS_DIR | gflags库路径 | +| GLOG_DIR | glog库路径 | +| WITH_STATIC_LIB | 是否静态编译,默认为True | + +**设置完成后**, 点击`保存并生成CMake缓存以加载变量`。 +5. 点击`生成`->`全部生成` +### Step5: 预测 +上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: + +``` +D: +cd D:\projects\PaddleX\deploy\openvino\out\build\x64-Release +``` + +* 编译成功后,图片预测demo的入口程序为`detector.exe`,`classifier.exe`,用户可根据自己的模型类型选择,其主要命令参数说明如下: + +| 参数 | 说明 | +| ---- | ---- | +| --model_dir | 模型转换生成的.xml文件路径,请保证模型转换生成的三个文件在同一路径下| +| --image | 要预测的图片文件路径 | +| --image_list | 按行存储图片路径的.txt文件 | +| --device | 运行的平台,可选项{"CPU","MYRIAD"},默认值为"CPU",如在VPU上请使用"MYRIAD"| +| --cfg_file | PaddleX model 的.yml配置文件 | +| --save_dir | 可视化结果图片保存地址,仅适用于检测任务,默认值为" "既不保存可视化结果 | + +### 样例 +`样例一`: +在CPU下做单张图片的分类任务预测 +测试图片 `/path/to/test_img.jpeg` + +```shell +./classifier.exe --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml +``` + +`样例二`: +在CPU下做多张图片的检测任务预测,并保存预测可视化结果 +预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下: +``` +/path/to/images/test_img1.jpeg +/path/to/images/test_img2.jpeg +... +/path/to/images/test_imgn.jpeg +``` + +```shell +./detector.exe --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --save_dir ./output +``` + +`样例三`: +在VPU下做单张图片分类任务预测 +测试图片 `/path/to/test_img.jpeg` + +```shell +.classifier.exe --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml --device=MYRIAD +``` diff --git a/docs/deploy/raspberry/Raspberry.md b/docs/deploy/raspberry/Raspberry.md new file mode 100644 index 0000000000000000000000000000000000000000..a49b9605e9956a288162e4d6fc6bd969f0103d90 --- /dev/null +++ b/docs/deploy/raspberry/Raspberry.md @@ -0,0 +1,156 @@ +# 树莓派 +PaddleX支持通过Paddle-Lite和基于OpenVINO的神经计算棒(NCS2)这两种方式在树莓派上完成预测部署。 + + +## 硬件环境配置 + +对于尚未安装系统的树莓派首先需要进行系统安装、环境配置等步骤来初始化硬件环境,过程中需要的软硬件如下: + +- 硬件:micro SD,显示器,键盘,鼠标 +- 软件:Raspbian OS +### Step1:系统安装 +- 格式化micro SD卡为FAT格式,Windows和Mac下建议使用[SD Memory Card Formatter](https://www.sdcard.org/downloads/formatter/)工具,Linux下请参考[NOOBS For Raspberry Pi](http://qdosmsq.dunbar-it.co.uk/blog/2013/06/noobs-for-raspberry-pi/) +- 下载NOOBS版本的Raspbian OS [下载地址](https://www.raspberrypi.org/downloads/)并将解压后的文件复制到SD中,插入SD后给树莓派通电,然后将自动安装系统 +### Step2:环境配置 +- 启用VNC和SSH服务:打开LX终端输入,输入如下命令,选择Interfacing Option然后选择P2 SSH 和 P3 VNC分别打开SSH与VNC。打开后就可以通过SSH或者VNC的方式连接树莓派 +``` +sudo raspi-config +``` +- 更换源:由于树莓派官方源速度很慢,建议在官网查询国内源 [树莓派软件源](https://www.jianshu.com/p/67b9e6ebf8a0)。更换后执行 +``` +sudo apt-get update +sudo apt-get upgrade +``` + +## Paddle-Lite部署 +基于Paddle-Lite的部署目前可以支持PaddleX的分类、分割与检测模型,其实检测模型仅支持YOLOV3 +部署的流程包括:PaddleX模型转换与转换后的模型部署 + +**说明**:PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html),Paddle-Lite详细资料请参考[Paddle-Lite](https://paddle-lite.readthedocs.io/zh/latest/index.html) + +请确保系统已经安装好上述基本软件,并配置好相应环境,**下面所有示例以工作目录 `/root/projects/`演示**。 + +## Paddle-Lite模型转换 +将PaddleX模型转换为Paddle-Lite模型,具体请参考[Paddle-Lite模型转换](./export_nb_model.md) + +## Paddle-Lite 预测 +### Step1 下载PaddleX预测代码 +``` +mkdir -p /root/projects +cd /root/projects +git clone https://github.com/PaddlePaddle/PaddleX.git +``` +**说明**:其中C++预测代码在PaddleX/deploy/raspberry 目录,该目录不依赖任何PaddleX下其他目录,如果需要在python下预测部署请参考[Python预测部署](./python.md)。 + +### Step2:Paddle-Lite预编译库下载 +提供了下载的opt工具对应的Paddle-Lite在ArmLinux下面的预编译库:[Paddle-Lite(ArmLinux)预编译库](https://bj.bcebos.com/paddlex/deploy/lite/inference_lite_2.6.1_armlinux.tar.bz2)。 +建议用户使用预编译库,若需要自行编译,在树莓派上LX终端输入 +``` +git clone https://github.com/PaddlePaddle/Paddle-Lite.git +cd Paddle-Lite +sudo ./lite/tools/build.sh --arm_os=armlinux --arm_abi=armv7hf --arm_lang=gcc --build_extra=ON full_publish +``` + +预编库位置:`./build.lite.armlinux.armv7hf.gcc/inference_lite_lib.armlinux.armv7hf/cxx` + +**注意**:预测库版本需要跟opt版本一致,更多Paddle-Lite编译内容请参考[Paddle-Lite编译](https://paddle-lite.readthedocs.io/zh/latest/user_guides/source_compile.html);更多预编译Paddle-Lite预测库请参考[Paddle-Lite Release Note](https://github.com/PaddlePaddle/Paddle-Lite/releases) + +### Step3 软件依赖 +提供了依赖软件的预编包或者一键编译,用户不需要单独下载或编译第三方依赖软件。若需要自行编译第三方依赖软件请参考: + +- gflags:编译请参考 [编译文档](https://gflags.github.io/gflags/#download) + +- glog:编译请参考[编译文档](https://github.com/google/glog) + +- opencv: 编译请参考 +[编译文档](https://docs.opencv.org/master/d7/d9f/tutorial_linux_install.html) +### Step4: 编译 +编译`cmake`的命令在`scripts/build.sh`中,修改LITE_DIR为Paddle-Lite预测库目录,若自行编译第三方依赖软件请根据Step1中编译软件的实际情况修改主要参数,其主要内容说明如下: +``` +# Paddle-Lite预编译库的路径 +LITE_DIR=/path/to/Paddle-Lite/inference/lib +# gflags预编译库的路径 +GFLAGS_DIR=$(pwd)/deps/gflags +# glog预编译库的路径 +GLOG_DIR=$(pwd)/deps/glog +# opencv预编译库的路径 +OPENCV_DIR=$(pwd)/deps/opencv/ +``` +执行`build`脚本: + ```shell + sh ./scripts/build.sh + ``` + + +### Step3: 预测 + +编译成功后,分类任务的预测可执行程序为`classifier`,分割任务的预测可执行程序为`segmenter`,检测任务的预测可执行程序为`detector`,其主要命令参数说明如下: +| 参数 | 说明 | +| ---- | ---- | +| --model_dir | 模型转换生成的.xml文件路径,请保证模型转换生成的三个文件在同一路径下| +| --image | 要预测的图片文件路径 | +| --image_list | 按行存储图片路径的.txt文件 | +| --thread_num | 预测的线程数,默认值为1 | +| --cfg_file | PaddleX model 的.yml配置文件 | +| --save_dir | 可视化结果图片保存地址,仅适用于检测和分割任务,默认值为" "既不保存可视化结果 | + +### 样例 +`样例一`: +单张图片分类任务 +测试图片 `/path/to/test_img.jpeg` + +```shell +./build/classifier --model_dir=/path/to/nb_model +--image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml --thread_num=4 +``` + + +`样例二`: +多张图片分割任务 +预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下: +``` +/path/to/images/test_img1.jpeg +/path/to/images/test_img2.jpeg +... +/path/to/images/test_imgn.jpeg +``` + +```shell +./build/segmenter --model_dir=/path/to/models/nb_model --image_list=/root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --save_dir ./output --thread_num=4 +``` + +## 性能测试 +### 测试环境: +硬件:Raspberry Pi 3 Model B +系统:raspbian OS +软件:paddle-lite 2.6.1 +### 测试结果 +单位ms,num表示paddle-lite下使用的线程数 +|模型|lite(num=4)|输入图片大小| +| ----| ---- | ----| +|mobilenet-v2|136.19|224*224| +|resnet-50|1131.42|224*224| +|deeplabv3|2162.03|512*512| +|hrnet|6118.23|512*512| +|yolov3-darknet53|4741.15|320*320| +|yolov3-mobilenet|1424.01|320*320| +|densenet121|1144.92|224*224| +|densenet161|2751.57|224*224| +|densenet201|1847.06|224*224| +|HRNet_W18|1753.06|224*224| +|MobileNetV1|177.63|224*224| +|MobileNetV3_large_ssld|133.99|224*224| +|MobileNetV3_small_ssld|53.99|224*224| +|ResNet101|2290.56|224*224| +|ResNet101_vd|2337.51|224*224| +|ResNet101_vd_ssld|3124.49|224*224| +|ShuffleNetV2|115.97|224*224| +|Xception41|1418.29|224*224| +|Xception65|2094.7|224*224| + +从测试结果看建议用户在树莓派上使用MobileNetV1-V3,ShuffleNetV2这类型的小型网络 + +## NCS2部署 +树莓派支持通过OpenVINO在NCS2上跑PaddleX模型预测,目前仅支持PaddleX的分类网络,基于NCS2的方式包含Paddle模型转OpenVINO IR以及部署IR在NCS2上进行预测两个步骤。 +- 模型转换请参考:[PaddleX模型转换为OpenVINO IR]('./openvino/export_openvino_model.md'),raspbian OS上的OpenVINO不支持模型转换,需要先在host侧转换FP16的IR。 +- 预测部署请参考[OpenVINO部署](./openvino/linux.md)中VPU在raspbian OS部署的部分 diff --git a/docs/deploy/raspberry/export_nb_model.md b/docs/deploy/raspberry/export_nb_model.md new file mode 100644 index 0000000000000000000000000000000000000000..bbc42c7c87f9b5ae6600bf648e895dfc301c172c --- /dev/null +++ b/docs/deploy/raspberry/export_nb_model.md @@ -0,0 +1,33 @@ +# Paddle-Lite模型转换 +将PaddleX模型转换为Paddle-Lite的nb模型,模型转换主要包括PaddleX转inference model和inference model转Paddle-Lite nb模型 +### Step1:导出inference模型 +PaddleX模型转Paddle-Lite模型之前需要先把PaddleX模型导出为inference格式模型,导出的模型将包括__model__、__params__和model.yml三个文件名。具体方法请参考[Inference模型导出](../export_model.md)。 +### Step2:导出Paddle-Lite模型 +Paddle-Lite模型需要通过Paddle-Lite的opt工具转出模型,下载并解压: [模型优化工具opt(2.6.1-linux)](https://bj.bcebos.com/paddlex/deploy/Rasoberry/opt.zip),在Linux系统下运行: +``` bash +./opt --model_file= \ + --param_file= \ + --valid_targets=arm \ + --optimize_out_type=naive_buffer \ + --optimize_out=model_output_name +``` +| 参数 | 说明 | +| ---- | ---- | +| --model_file | 导出inference模型中包含的网络结构文件:`__model__`所在的路径| +| --param_file | 导出inference模型中包含的参数文件:`__params__`所在的路径| +| --valid_targets |指定模型可执行的backend,这里请指定为`arm`| +| --optimize_out_type | 输出模型类型,目前支持两种类型:protobuf和naive_buffer,其中naive_buffer是一种更轻量级的序列化/反序列化,这里请指定为`naive_buffer`| + + +若安装了python版本的Paddle-Lite也可以通过如下方式转换 +``` +./paddle_lite_opt --model_file= \ + --param_file= \ + --valid_targets=arm \ + --optimize_out_type=naive_buffer \ + --optimize_out=model_output_name +``` + +更多详细的使用方法和参数含义请参考: [使用opt转化模型](https://paddle-lite.readthedocs.io/zh/latest/user_guides/opt/opt_bin.html),更多opt预编译版本请参考[Paddle-Lite Release Note](https://github.com/PaddlePaddle/Paddle-Lite/releases) + +**注意**:opt版本需要跟预测库版本保持一致,如使2.6.0版本预测库,请从上面Release Note中下载2.6.0版本的opt转换模型 \ No newline at end of file diff --git a/docs/deploy/raspberry/index.rst b/docs/deploy/raspberry/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..e5c192134b6e506b7ec92bb34b2f0080482d4cfc --- /dev/null +++ b/docs/deploy/raspberry/index.rst @@ -0,0 +1,11 @@ +树莓派部署 +======================================= + + +.. toctree:: + :maxdepth: 2 + :caption: 文档目录: + + Raspberry.md + python.md + export_nb_model.md \ No newline at end of file diff --git a/docs/deploy/raspberry/python.md b/docs/deploy/raspberry/python.md new file mode 100644 index 0000000000000000000000000000000000000000..086b34dcd06709ecc48cce973755c4ada8a84357 --- /dev/null +++ b/docs/deploy/raspberry/python.md @@ -0,0 +1,54 @@ +# Python预测部署 +文档说明了在树莓派上使用Python版本的Paddle-Lite进行PaddleX模型好的预测部署,根据下面的命令安装Python版本的Paddle-Lite预测库,若安装不成功用户也可以下载whl文件进行安装[Paddle-Lite_2.6.0_python](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.6.0/armlinux_python_installer.zip),更多版本请参考[Paddle-Lite Release Note](https://github.com/PaddlePaddle/Paddle-Lite/releases) +``` +python -m pip install paddlelite +``` +部署前需要先将PaddleX模型转换为Paddle-Lite的nb模型,具体请参考[Paddle-Lite模型转换](./export_nb_model.md) +**注意**:若用户使用2.6.0的Python预测库,请下载2.6.0版本的opt转换工具转换模型 + + + +## 前置条件 +* Python 3.6+ +* Paddle-Lite_python 2.6.0+ + +请确保系统已经安装好上述基本软件,**下面所有示例以工作目录 `/root/projects/`演示**。 + +## 预测部署 +运行/root/projects/PaddleX/deploy/raspberry/python目录下demo.py文件可以进行预测,其命令参数说明如下: + +| 参数 | 说明 | +| ---- | ---- | +| --model_dir | 模型转换生成的.xml文件路径,请保证模型转换生成的三个文件在同一路径下| +| --img | 要预测的图片文件路径 | +| --image_list | 按行存储图片路径的.txt文件 | +| --cfg_file | PaddleX model 的.yml配置文件 | +| --thread_num | 预测的线程数, 默认值为1 | +| --input_shape | 模型输入中图片输入的大小[N,C,H.W] | + +### 样例 +`样例一`: +测试图片 `/path/to/test_img.jpeg` + +``` +cd /root/projects/python + +python demo.py --model_dir /path/to/openvino_model --img /path/to/test_img.jpeg --cfg_file /path/to/PadlleX_model.yml --thread_num 4 --input_shape [1,3,224,224] +``` + +样例二`: + +预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下: + +``` +/path/to/images/test_img1.jpeg +/path/to/images/test_img2.jpeg +... +/path/to/images/test_imgn.jpeg +``` + +``` +cd /root/projects/python + +python demo.py --model_dir /path/to/models/openvino_model --image_list /root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --thread_num 4 --input_shape [1,3,224,224] +``` diff --git a/docs/deploy/server/cpp/linux.md b/docs/deploy/server/cpp/linux.md index d81569e6d280d06e3637dd13a012e38169b615a2..fb35824dc1d3db017fb146e0ba38eb57f25d7545 100644 --- a/docs/deploy/server/cpp/linux.md +++ b/docs/deploy/server/cpp/linux.md @@ -125,6 +125,8 @@ yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https:// | image_list | 按行存储图片路径的.txt文件 | | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | | use_trt | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) | +| use_mkl | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) | +| mkl_thread_num | MKL推理的线程数,默认为cpu处理器个数 | | gpu_id | GPU 设备ID, 默认值为0 | | save_dir | 保存可视化结果的路径, 默认值为"output",**classfier无该参数** | | key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 | @@ -141,6 +143,8 @@ yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https:// | video_path | 视频文件的路径 | | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | | use_trt | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) | +| use_mkl | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) | +| mkl_thread_num | MKL推理的线程数,默认为cpu处理器个数 | | gpu_id | GPU 设备ID, 默认值为0 | | show_result | 对视频文件做预测时,是否在屏幕上实时显示预测可视化结果(因加入了延迟处理,故显示结果不能反映真实的帧率),支持值为0或1(默认值为0) | | save_result | 是否将每帧的预测可视结果保存为视频文件,支持值为0或1(默认值为1) | diff --git a/docs/deploy/server/cpp/windows.md b/docs/deploy/server/cpp/windows.md index 4c5ef9e201424cca4b3bcb291ffa74df9c45546b..ce74550c0d6cac08ab5ada9869c7740824708930 100644 --- a/docs/deploy/server/cpp/windows.md +++ b/docs/deploy/server/cpp/windows.md @@ -109,6 +109,8 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release | image | 要预测的图片文件路径 | | image_list | 按行存储图片路径的.txt文件 | | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | +| use_mkl | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) | +| mkl_thread_num | MKL推理的线程数,默认为cpu处理器个数 | | gpu_id | GPU 设备ID, 默认值为0 | | save_dir | 保存可视化结果的路径, 默认值为"output",classifier无该参数 | | key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 | @@ -124,6 +126,8 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release | camera_id | 摄像头设备ID,默认值为0 | | video_path | 视频文件的路径 | | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | +| use_mkl | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) | +| mkl_thread_num | MKL推理的线程数,默认为cpu处理器个数 | | gpu_id | GPU 设备ID, 默认值为0 | | show_result | 对视频文件做预测时,是否在屏幕上实时显示预测可视化结果(因加入了延迟处理,故显示结果不能反映真实的帧率),支持值为0或1(默认值为0) | | save_result | 是否将每帧的预测可视结果保存为视频文件,支持值为0或1(默认值为1) | diff --git a/docs/examples/meter_reader.md b/docs/examples/meter_reader.md index 670d7d1399b55c672b17ed903663bf26c8a6ef84..114046d16ba4db837e143a63d47b1fcdfdd26680 100644 --- a/docs/examples/meter_reader.md +++ b/docs/examples/meter_reader.md @@ -70,7 +70,6 @@ cd PaddleX/examples/meter_reader/ | save_dir | 保存可视化结果的路径, 默认值为"output"| | score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5| | seg_batch_size | 分割的批量大小,默认为2 | -| seg_thread_num | 分割预测的线程数,默认为cpu处理器个数 | | use_camera | 是否使用摄像头采集图片,默认为False | | camera_id | 摄像头设备ID,默认值为0 | | use_erode | 是否使用图像腐蚀对分割预测图进行细分,默认为False | diff --git a/examples/meter_reader/README.md b/examples/meter_reader/README.md index ce5666f5afeecb0dc97dd78429ae132ae52a7723..a5cae4af0cc3ace2ee80b10ccc44c2fff79ea0cc 100644 --- a/examples/meter_reader/README.md +++ b/examples/meter_reader/README.md @@ -79,7 +79,6 @@ cd PaddleX/examples/meter_reader/ | save_dir | 保存可视化结果的路径, 默认值为"output"| | score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5| | seg_batch_size | 分割的批量大小,默认为2 | -| seg_thread_num | 分割预测的线程数,默认为cpu处理器个数 | | use_camera | 是否使用摄像头采集图片,默认为False | | camera_id | 摄像头设备ID,默认值为0 | | use_erode | 是否使用图像腐蚀对分割预测图进行细分,默认为False | diff --git a/examples/meter_reader/deploy/python/reader_deploy.py b/examples/meter_reader/deploy/python/reader_deploy.py index a5f5d18b0edad902217b6392cfc53dfb4709daf9..1006ff83a732bf0a12f586e37aa1ad99e932ccb5 100644 --- a/examples/meter_reader/deploy/python/reader_deploy.py +++ b/examples/meter_reader/deploy/python/reader_deploy.py @@ -105,12 +105,6 @@ def parse_args(): help="Segmentation batch size", type=int, default=2) - parser.add_argument( - '--seg_thread_num', - dest='seg_thread_num', - help="Thread number of segmentation preprocess", - type=int, - default=2) return parser.parse_args() @@ -143,8 +137,7 @@ class MeterReader: use_erode=True, erode_kernel=4, score_threshold=0.5, - seg_batch_size=2, - seg_thread_num=2): + seg_batch_size=2): if isinstance(im_file, str): im = cv2.imread(im_file).astype('float32') else: @@ -190,8 +183,7 @@ class MeterReader: meter_images.append(resized_meters[j - i]) result = self.segmenter.batch_predict( transforms=self.seg_transforms, - img_file_list=meter_images, - thread_num=seg_thread_num) + img_file_list=meter_images) if use_erode: kernel = np.ones((erode_kernel, erode_kernel), np.uint8) for i in range(len(result)): @@ -334,7 +326,7 @@ def infer(args): for im_file in image_lists: meter_reader.predict(im_file, args.save_dir, args.use_erode, args.erode_kernel, args.score_threshold, - args.seg_batch_size, args.seg_thread_num) + args.seg_batch_size) elif args.use_camera: cap_video = cv2.VideoCapture(args.camera_id) if not cap_video.isOpened(): @@ -347,7 +339,7 @@ def infer(args): if ret: meter_reader.predict(frame, args.save_dir, args.use_erode, args.erode_kernel, args.score_threshold, - args.seg_batch_size, args.seg_thread_num) + args.seg_batch_size) if cv2.waitKey(1) & 0xFF == ord('q'): break else: diff --git a/examples/meter_reader/reader_infer.py b/examples/meter_reader/reader_infer.py index c7f7d7367a7ef3d0b6bba4fd1c6a3258cd5145ac..58108bbacfc02422bd9e80dd93d53252694cec1e 100644 --- a/examples/meter_reader/reader_infer.py +++ b/examples/meter_reader/reader_infer.py @@ -105,12 +105,6 @@ def parse_args(): help="Segmentation batch size", type=int, default=2) - parser.add_argument( - '--seg_thread_num', - dest='seg_thread_num', - help="Thread number of segmentation preprocess", - type=int, - default=2) return parser.parse_args() @@ -143,8 +137,7 @@ class MeterReader: use_erode=True, erode_kernel=4, score_threshold=0.5, - seg_batch_size=2, - seg_thread_num=2): + seg_batch_size=2): if isinstance(im_file, str): im = cv2.imread(im_file).astype('float32') else: @@ -190,8 +183,7 @@ class MeterReader: meter_images.append(resized_meters[j - i]) result = self.segmenter.batch_predict( transforms=self.seg_transforms, - img_file_list=meter_images, - thread_num=seg_thread_num) + img_file_list=meter_images) if use_erode: kernel = np.ones((erode_kernel, erode_kernel), np.uint8) for i in range(len(result)): @@ -334,7 +326,7 @@ def infer(args): for im_file in image_lists: meter_reader.predict(im_file, args.save_dir, args.use_erode, args.erode_kernel, args.score_threshold, - args.seg_batch_size, args.seg_thread_num) + args.seg_batch_size) elif args.use_camera: cap_video = cv2.VideoCapture(args.camera_id) if not cap_video.isOpened(): @@ -347,7 +339,7 @@ def infer(args): if ret: meter_reader.predict(frame, args.save_dir, args.use_erode, args.erode_kernel, args.score_threshold, - args.seg_batch_size, args.seg_thread_num) + args.seg_batch_size) if cv2.waitKey(1) & 0xFF == ord('q'): break else: diff --git a/paddlex/__init__.py b/paddlex/__init__.py index d35fd044e3cbd0066ca8ce3d0188ab8493088d85..84b7270ade652b39a6376ceb560ddf16a341725e 100644 --- a/paddlex/__init__.py +++ b/paddlex/__init__.py @@ -57,4 +57,4 @@ log_level = 2 from . import interpret -__version__ = '1.1.1' +__version__ = '1.1.4' diff --git a/paddlex/command.py b/paddlex/command.py index 590681dea6395bd2e95d63ca938aa62e721d27c7..a433cbcfb8f308470e27b0bc31d4d55ef7a3c29c 100644 --- a/paddlex/command.py +++ b/paddlex/command.py @@ -51,6 +51,12 @@ def arg_parser(): action="store_true", default=False, help="export onnx model for deployment") + parser.add_argument( + "--onnx_opset", + "-oo", + type=int, + default=10, + help="when use paddle2onnx, set onnx opset version to export") parser.add_argument( "--data_conversion", "-dc", @@ -162,7 +168,7 @@ def main(): logging.error( "paddlex --export_inference --model_dir model_path --save_dir infer_model" ) - pdx.convertor.export_onnx_model(model, args.save_dir) + pdx.convertor.export_onnx_model(model, args.save_dir, args.onnx_opset) if args.data_conversion: assert args.source is not None, "--source should be defined while converting dataset" @@ -183,7 +189,7 @@ def main(): if args.split_dataset: assert args.dataset_dir is not None, "--dataset_dir should be defined while spliting dataset" - assert args.format is not None, "--form should be defined while spliting dataset" + assert args.format is not None, "--format should be defined while spliting dataset" assert args.val_value is not None, "--val_value should be defined while spliting dataset" dataset_dir = args.dataset_dir diff --git a/paddlex/convertor.py b/paddlex/convertor.py index 6f4daa5ddb1492f12581b31ae990592bf271d5ee..9aafa8d2d1d6e7e4ca4d434f6799876bdd8ae873 100644 --- a/paddlex/convertor.py +++ b/paddlex/convertor.py @@ -29,10 +29,12 @@ def export_onnx(model_dir, save_dir, fixed_input_shape): export_onnx_model(model, save_dir) -def export_onnx_model(model, save_dir): - if model.model_type == "detector" or model.__class__.__name__ == "FastSCNN": +def export_onnx_model(model, save_dir, opset_version=10): + if model.__class__.__name__ == "FastSCNN" or ( + model.model_type == "detector" and + model.__class__.__name__ != "YOLOv3"): logging.error( - "Only image classifier models and semantic segmentation models(except FastSCNN) are supported to export to ONNX" + "Only image classifier models, detection models(YOLOv3) and semantic segmentation models(except FastSCNN) are supported to export to ONNX" ) try: import x2paddle @@ -41,6 +43,406 @@ def export_onnx_model(model, save_dir): except: logging.error( "You need to install x2paddle first, pip install x2paddle>=0.7.4") - from x2paddle.op_mapper.paddle_op_mapper import PaddleOpMapper + if opset_version == 10 and model.__class__.__name__ == "YOLOv3": + logging.warning( + "Export for openVINO by default, the output of multiclass_nms exported to onnx will contains background. If you need onnx completely consistent with paddle, please use X2Paddle to export" + ) + x2paddle.op_mapper.paddle2onnx.opset10.paddle_custom_layer.multiclass_nms.multiclass_nms = multiclass_nms_for_openvino + from x2paddle.op_mapper.paddle2onnx.paddle_op_mapper import PaddleOpMapper mapper = PaddleOpMapper() - mapper.convert(model.test_prog, save_dir) + mapper.convert( + model.test_prog, + save_dir, + scope=model.scope, + opset_version=opset_version) + + +def multiclass_nms_for_openvino(op, block): + """ + Convert the paddle multiclass_nms to onnx op. + This op is get the select boxes from origin boxes. + This op is for OpenVINO, which donn't support dynamic shape). + """ + import math + import sys + import numpy as np + import paddle.fluid.core as core + import paddle.fluid as fluid + import onnx + import warnings + from onnx import helper, onnx_pb + inputs = dict() + outputs = dict() + attrs = dict() + for name in op.input_names: + inputs[name] = op.input(name) + for name in op.output_names: + outputs[name] = op.output(name) + for name in op.attr_names: + attrs[name] = op.attr(name) + + result_name = outputs['Out'][0] + background = attrs['background_label'] + normalized = attrs['normalized'] + if normalized == False: + warnings.warn( + 'The parameter normalized of multiclass_nms OP of Paddle is False, which has diff with ONNX. \ + Please set normalized=True in multiclass_nms of Paddle' + ) + + #convert the paddle attribute to onnx tensor + name_score_threshold = [outputs['Out'][0] + "@score_threshold"] + name_iou_threshold = [outputs['Out'][0] + "@iou_threshold"] + name_keep_top_k = [outputs['Out'][0] + '@keep_top_k'] + name_keep_top_k_2D = [outputs['Out'][0] + '@keep_top_k_1D'] + + node_score_threshold = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=name_score_threshold, + value=onnx.helper.make_tensor( + name=name_score_threshold[0] + "@const", + data_type=onnx.TensorProto.FLOAT, + dims=(), + vals=[float(attrs['score_threshold'])])) + + node_iou_threshold = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=name_iou_threshold, + value=onnx.helper.make_tensor( + name=name_iou_threshold[0] + "@const", + data_type=onnx.TensorProto.FLOAT, + dims=(), + vals=[float(attrs['nms_threshold'])])) + + node_keep_top_k = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=name_keep_top_k, + value=onnx.helper.make_tensor( + name=name_keep_top_k[0] + "@const", + data_type=onnx.TensorProto.INT64, + dims=(), + vals=[np.int64(attrs['keep_top_k'])])) + + node_keep_top_k_2D = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=name_keep_top_k_2D, + value=onnx.helper.make_tensor( + name=name_keep_top_k_2D[0] + "@const", + data_type=onnx.TensorProto.INT64, + dims=[1, 1], + vals=[np.int64(attrs['keep_top_k'])])) + + # the paddle data format is x1,y1,x2,y2 + kwargs = {'center_point_box': 0} + + name_select_nms = [outputs['Out'][0] + "@select_index"] + node_select_nms= onnx.helper.make_node( + 'NonMaxSuppression', + inputs=inputs['BBoxes'] + inputs['Scores'] + name_keep_top_k +\ + name_iou_threshold + name_score_threshold, + outputs=name_select_nms) + # step 1 nodes select the nms class + node_list = [ + node_score_threshold, node_iou_threshold, node_keep_top_k, + node_keep_top_k_2D, node_select_nms + ] + + # create some const value to use + name_const_value = [result_name+"@const_0", + result_name+"@const_1",\ + result_name+"@const_2",\ + result_name+"@const_-1"] + value_const_value = [0, 1, 2, -1] + for name, value in zip(name_const_value, value_const_value): + node = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=[name], + value=onnx.helper.make_tensor( + name=name + "@const", + data_type=onnx.TensorProto.INT64, + dims=[1], + vals=[value])) + node_list.append(node) + + # In this code block, we will deocde the raw score data, reshape N * C * M to 1 * N*C*M + # and the same time, decode the select indices to 1 * D, gather the select_indices + outputs_gather_1_ = [result_name + "@gather_1_"] + node_gather_1_ = onnx.helper.make_node( + 'Gather', + inputs=name_select_nms + [result_name + "@const_1"], + outputs=outputs_gather_1_, + axis=1) + node_list.append(node_gather_1_) + outputs_gather_1 = [result_name + "@gather_1"] + node_gather_1 = onnx.helper.make_node( + 'Unsqueeze', + inputs=outputs_gather_1_, + outputs=outputs_gather_1, + axes=[0]) + node_list.append(node_gather_1) + + outputs_gather_2_ = [result_name + "@gather_2_"] + node_gather_2_ = onnx.helper.make_node( + 'Gather', + inputs=name_select_nms + [result_name + "@const_2"], + outputs=outputs_gather_2_, + axis=1) + node_list.append(node_gather_2_) + + outputs_gather_2 = [result_name + "@gather_2"] + node_gather_2 = onnx.helper.make_node( + 'Unsqueeze', + inputs=outputs_gather_2_, + outputs=outputs_gather_2, + axes=[0]) + node_list.append(node_gather_2) + + # reshape scores N * C * M to (N*C*M) * 1 + outputs_reshape_scores_rank1 = [result_name + "@reshape_scores_rank1"] + node_reshape_scores_rank1 = onnx.helper.make_node( + "Reshape", + inputs=inputs['Scores'] + [result_name + "@const_-1"], + outputs=outputs_reshape_scores_rank1) + node_list.append(node_reshape_scores_rank1) + + # get the shape of scores + outputs_shape_scores = [result_name + "@shape_scores"] + node_shape_scores = onnx.helper.make_node( + 'Shape', inputs=inputs['Scores'], outputs=outputs_shape_scores) + node_list.append(node_shape_scores) + + # gather the index: 2 shape of scores + outputs_gather_scores_dim1 = [result_name + "@gather_scores_dim1"] + node_gather_scores_dim1 = onnx.helper.make_node( + 'Gather', + inputs=outputs_shape_scores + [result_name + "@const_2"], + outputs=outputs_gather_scores_dim1, + axis=0) + node_list.append(node_gather_scores_dim1) + + # mul class * M + outputs_mul_classnum_boxnum = [result_name + "@mul_classnum_boxnum"] + node_mul_classnum_boxnum = onnx.helper.make_node( + 'Mul', + inputs=outputs_gather_1 + outputs_gather_scores_dim1, + outputs=outputs_mul_classnum_boxnum) + node_list.append(node_mul_classnum_boxnum) + + # add class * M * index + outputs_add_class_M_index = [result_name + "@add_class_M_index"] + node_add_class_M_index = onnx.helper.make_node( + 'Add', + inputs=outputs_mul_classnum_boxnum + outputs_gather_2, + outputs=outputs_add_class_M_index) + node_list.append(node_add_class_M_index) + + # Squeeze the indices to 1 dim + outputs_squeeze_select_index = [result_name + "@squeeze_select_index"] + node_squeeze_select_index = onnx.helper.make_node( + 'Squeeze', + inputs=outputs_add_class_M_index, + outputs=outputs_squeeze_select_index, + axes=[0, 2]) + node_list.append(node_squeeze_select_index) + + # gather the data from flatten scores + outputs_gather_select_scores = [result_name + "@gather_select_scores"] + node_gather_select_scores = onnx.helper.make_node('Gather', + inputs=outputs_reshape_scores_rank1 + \ + outputs_squeeze_select_index, + outputs=outputs_gather_select_scores, + axis=0) + node_list.append(node_gather_select_scores) + + # get nums to input TopK + outputs_shape_select_num = [result_name + "@shape_select_num"] + node_shape_select_num = onnx.helper.make_node( + 'Shape', + inputs=outputs_gather_select_scores, + outputs=outputs_shape_select_num) + node_list.append(node_shape_select_num) + + outputs_gather_select_num = [result_name + "@gather_select_num"] + node_gather_select_num = onnx.helper.make_node( + 'Gather', + inputs=outputs_shape_select_num + [result_name + "@const_0"], + outputs=outputs_gather_select_num, + axis=0) + node_list.append(node_gather_select_num) + + outputs_unsqueeze_select_num = [result_name + "@unsqueeze_select_num"] + node_unsqueeze_select_num = onnx.helper.make_node( + 'Unsqueeze', + inputs=outputs_gather_select_num, + outputs=outputs_unsqueeze_select_num, + axes=[0]) + node_list.append(node_unsqueeze_select_num) + + outputs_concat_topK_select_num = [result_name + "@conat_topK_select_num"] + node_conat_topK_select_num = onnx.helper.make_node( + 'Concat', + inputs=outputs_unsqueeze_select_num + name_keep_top_k_2D, + outputs=outputs_concat_topK_select_num, + axis=0) + node_list.append(node_conat_topK_select_num) + + outputs_cast_concat_topK_select_num = [ + result_name + "@concat_topK_select_num" + ] + node_outputs_cast_concat_topK_select_num = onnx.helper.make_node( + 'Cast', + inputs=outputs_concat_topK_select_num, + outputs=outputs_cast_concat_topK_select_num, + to=6) + node_list.append(node_outputs_cast_concat_topK_select_num) + # get min(topK, num_select) + outputs_compare_topk_num_select = [ + result_name + "@compare_topk_num_select" + ] + node_compare_topk_num_select = onnx.helper.make_node( + 'ReduceMin', + inputs=outputs_cast_concat_topK_select_num, + outputs=outputs_compare_topk_num_select, + keepdims=0) + node_list.append(node_compare_topk_num_select) + + # unsqueeze the indices to 1D tensor + outputs_unsqueeze_topk_select_indices = [ + result_name + "@unsqueeze_topk_select_indices" + ] + node_unsqueeze_topk_select_indices = onnx.helper.make_node( + 'Unsqueeze', + inputs=outputs_compare_topk_num_select, + outputs=outputs_unsqueeze_topk_select_indices, + axes=[0]) + node_list.append(node_unsqueeze_topk_select_indices) + + # cast the indices to INT64 + outputs_cast_topk_indices = [result_name + "@cast_topk_indices"] + node_cast_topk_indices = onnx.helper.make_node( + 'Cast', + inputs=outputs_unsqueeze_topk_select_indices, + outputs=outputs_cast_topk_indices, + to=7) + node_list.append(node_cast_topk_indices) + + # select topk scores indices + outputs_topk_select_topk_indices = [result_name + "@topk_select_topk_values",\ + result_name + "@topk_select_topk_indices"] + node_topk_select_topk_indices = onnx.helper.make_node( + 'TopK', + inputs=outputs_gather_select_scores + outputs_cast_topk_indices, + outputs=outputs_topk_select_topk_indices) + node_list.append(node_topk_select_topk_indices) + + # gather topk label, scores, boxes + outputs_gather_topk_scores = [result_name + "@gather_topk_scores"] + node_gather_topk_scores = onnx.helper.make_node( + 'Gather', + inputs=outputs_gather_select_scores + + [outputs_topk_select_topk_indices[1]], + outputs=outputs_gather_topk_scores, + axis=0) + node_list.append(node_gather_topk_scores) + + outputs_gather_topk_class = [result_name + "@gather_topk_class"] + node_gather_topk_class = onnx.helper.make_node( + 'Gather', + inputs=outputs_gather_1 + [outputs_topk_select_topk_indices[1]], + outputs=outputs_gather_topk_class, + axis=1) + node_list.append(node_gather_topk_class) + + # gather the boxes need to gather the boxes id, then get boxes + outputs_gather_topk_boxes_id = [result_name + "@gather_topk_boxes_id"] + node_gather_topk_boxes_id = onnx.helper.make_node( + 'Gather', + inputs=outputs_gather_2 + [outputs_topk_select_topk_indices[1]], + outputs=outputs_gather_topk_boxes_id, + axis=1) + node_list.append(node_gather_topk_boxes_id) + + # squeeze the gather_topk_boxes_id to 1 dim + outputs_squeeze_topk_boxes_id = [result_name + "@squeeze_topk_boxes_id"] + node_squeeze_topk_boxes_id = onnx.helper.make_node( + 'Squeeze', + inputs=outputs_gather_topk_boxes_id, + outputs=outputs_squeeze_topk_boxes_id, + axes=[0, 2]) + node_list.append(node_squeeze_topk_boxes_id) + + outputs_gather_select_boxes = [result_name + "@gather_select_boxes"] + node_gather_select_boxes = onnx.helper.make_node( + 'Gather', + inputs=inputs['BBoxes'] + outputs_squeeze_topk_boxes_id, + outputs=outputs_gather_select_boxes, + axis=1) + node_list.append(node_gather_select_boxes) + + # concat the final result + # before concat need to cast the class to float + outputs_cast_topk_class = [result_name + "@cast_topk_class"] + node_cast_topk_class = onnx.helper.make_node( + 'Cast', + inputs=outputs_gather_topk_class, + outputs=outputs_cast_topk_class, + to=1) + node_list.append(node_cast_topk_class) + + outputs_unsqueeze_topk_scores = [result_name + "@unsqueeze_topk_scores"] + node_unsqueeze_topk_scores = onnx.helper.make_node( + 'Unsqueeze', + inputs=outputs_gather_topk_scores, + outputs=outputs_unsqueeze_topk_scores, + axes=[0, 2]) + node_list.append(node_unsqueeze_topk_scores) + + inputs_concat_final_results = outputs_cast_topk_class + outputs_unsqueeze_topk_scores +\ + outputs_gather_select_boxes + outputs_sort_by_socre_results = [result_name + "@concat_topk_scores"] + node_sort_by_socre_results = onnx.helper.make_node( + 'Concat', + inputs=inputs_concat_final_results, + outputs=outputs_sort_by_socre_results, + axis=2) + node_list.append(node_sort_by_socre_results) + + # select topk classes indices + outputs_squeeze_cast_topk_class = [ + result_name + "@squeeze_cast_topk_class" + ] + node_squeeze_cast_topk_class = onnx.helper.make_node( + 'Squeeze', + inputs=outputs_cast_topk_class, + outputs=outputs_squeeze_cast_topk_class, + axes=[0, 2]) + node_list.append(node_squeeze_cast_topk_class) + outputs_neg_squeeze_cast_topk_class = [ + result_name + "@neg_squeeze_cast_topk_class" + ] + node_neg_squeeze_cast_topk_class = onnx.helper.make_node( + 'Neg', + inputs=outputs_squeeze_cast_topk_class, + outputs=outputs_neg_squeeze_cast_topk_class) + node_list.append(node_neg_squeeze_cast_topk_class) + outputs_topk_select_classes_indices = [result_name + "@topk_select_topk_classes_scores",\ + result_name + "@topk_select_topk_classes_indices"] + node_topk_select_topk_indices = onnx.helper.make_node( + 'TopK', + inputs=outputs_neg_squeeze_cast_topk_class + outputs_cast_topk_indices, + outputs=outputs_topk_select_classes_indices) + node_list.append(node_topk_select_topk_indices) + outputs_concat_final_results = outputs['Out'] + node_concat_final_results = onnx.helper.make_node( + 'Gather', + inputs=outputs_sort_by_socre_results + + [outputs_topk_select_classes_indices[1]], + outputs=outputs_concat_final_results, + axis=1) + node_list.append(node_concat_final_results) + return node_list diff --git a/paddlex/cv/models/base.py b/paddlex/cv/models/base.py index 19bf4f034a2fb2c0c42126843913517f8c7cb56a..899d1d3b64e4d166fcdf432fadaf4eaebe189710 100644 --- a/paddlex/cv/models/base.py +++ b/paddlex/cv/models/base.py @@ -23,6 +23,7 @@ import yaml import copy import json import functools +import multiprocessing as mp import paddlex.utils.logging as logging from paddlex.utils import seconds_to_hms from paddlex.utils.utils import EarlyStop @@ -76,6 +77,16 @@ class BaseAPI: self.completed_epochs = 0 self.scope = fluid.global_scope() + # 线程池,在模型在预测时用于对输入数据以图片为单位进行并行处理 + # 主要用于batch_predict接口 + thread_num = mp.cpu_count() if mp.cpu_count() < 8 else 8 + self.thread_pool = mp.pool.ThreadPool(thread_num) + + def reset_thread_pool(self, thread_num): + self.thread_pool.close() + self.thread_pool.join() + self.thread_pool = mp.pool.ThreadPool(thread_num) + def _get_single_card_bs(self, batch_size): if batch_size % len(self.places) == 0: return int(batch_size // len(self.places)) @@ -356,23 +367,13 @@ class BaseAPI: ] test_outputs = list(self.test_outputs.values()) with fluid.scope_guard(self.scope): - if self.__class__.__name__ == 'MaskRCNN': - from paddlex.utils.save import save_mask_inference_model - save_mask_inference_model( - dirname=save_dir, - executor=self.exe, - params_filename='__params__', - feeded_var_names=test_input_names, - target_vars=test_outputs, - main_program=self.test_prog) - else: - fluid.io.save_inference_model( - dirname=save_dir, - executor=self.exe, - params_filename='__params__', - feeded_var_names=test_input_names, - target_vars=test_outputs, - main_program=self.test_prog) + fluid.io.save_inference_model( + dirname=save_dir, + executor=self.exe, + params_filename='__params__', + feeded_var_names=test_input_names, + target_vars=test_outputs, + main_program=self.test_prog) model_info = self.get_model_info() model_info['status'] = 'Infer' diff --git a/paddlex/cv/models/classifier.py b/paddlex/cv/models/classifier.py index 7f1c3527d8c681e8737e6a65a898ec083495bf4b..41590ad99e5c645744324c467b131b2c6271c13b 100644 --- a/paddlex/cv/models/classifier.py +++ b/paddlex/cv/models/classifier.py @@ -279,16 +279,18 @@ class BaseClassifier(BaseAPI): return metrics @staticmethod - def _preprocess(images, transforms, model_type, class_name, thread_num=1): + def _preprocess(images, transforms, model_type, class_name, thread_pool=None): arrange_transforms( model_type=model_type, class_name=class_name, transforms=transforms, mode='test') - pool = ThreadPool(thread_num) - batch_data = pool.map(transforms, images) - pool.close() - pool.join() + if thread_pool is not None: + batch_data = thread_pool.map(transforms, images) + else: + batch_data = list() + for image in images: + batch_data.append(transforms(image)) padding_batch = generate_minibatch(batch_data) im = np.array([data[0] for data in padding_batch]) @@ -344,15 +346,13 @@ class BaseClassifier(BaseAPI): def batch_predict(self, img_file_list, transforms=None, - topk=1, - thread_num=2): + topk=1): """预测。 Args: img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径 也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。 transforms (paddlex.cls.transforms): 数据预处理操作。 topk (int): 预测时前k个最大值。 - thread_num (int): 并发执行各图像预处理时的线程数。 Returns: list: 每个元素都为列表,表示各图像的预测结果。在各图像的预测列表中,其中元素均为字典。字典的关键字为'category_id'、'category'、'score', 分别对应预测类别id、预测类别标签、预测得分。 @@ -367,7 +367,7 @@ class BaseClassifier(BaseAPI): transforms = self.test_transforms im = BaseClassifier._preprocess(img_file_list, transforms, self.model_type, - self.__class__.__name__, thread_num) + self.__class__.__name__, self.thread_pool) with fluid.scope_guard(self.scope): result = self.exe.run(self.test_prog, diff --git a/paddlex/cv/models/deeplabv3p.py b/paddlex/cv/models/deeplabv3p.py index 9371859f647ede5740a3c4a3190ef5fbed66ebce..49a6a1d33e31ccc871df7c02301f40ba606a51dc 100644 --- a/paddlex/cv/models/deeplabv3p.py +++ b/paddlex/cv/models/deeplabv3p.py @@ -448,16 +448,18 @@ class DeepLabv3p(BaseAPI): return metrics @staticmethod - def _preprocess(images, transforms, model_type, class_name, thread_num=1): + def _preprocess(images, transforms, model_type, class_name, thread_pool=None): arrange_transforms( model_type=model_type, class_name=class_name, transforms=transforms, mode='test') - pool = ThreadPool(thread_num) - batch_data = pool.map(transforms, images) - pool.close() - pool.join() + if thread_pool is not None: + batch_data = thread_pool.map(transforms, images) + else: + batch_data = list() + for image in images: + batch_data.append(transforms(image)) padding_batch = generate_minibatch(batch_data) im = np.array( [data[0] for data in padding_batch], @@ -522,13 +524,12 @@ class DeepLabv3p(BaseAPI): preds = DeepLabv3p._postprocess(result, im_info) return preds[0] - def batch_predict(self, img_file_list, transforms=None, thread_num=2): + def batch_predict(self, img_file_list, transforms=None): """预测。 Args: img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径 也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。 transforms(paddlex.cv.transforms): 数据预处理操作。 - thread_num (int): 并发执行各图像预处理时的线程数。 Returns: list: 每个元素都为列表,表示各图像的预测结果。各图像的预测结果用字典表示,包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图, @@ -543,7 +544,7 @@ class DeepLabv3p(BaseAPI): transforms = self.test_transforms im, im_info = DeepLabv3p._preprocess( img_file_list, transforms, self.model_type, - self.__class__.__name__, thread_num) + self.__class__.__name__, self.thread_pool) with fluid.scope_guard(self.scope): result = self.exe.run(self.test_prog, diff --git a/paddlex/cv/models/faster_rcnn.py b/paddlex/cv/models/faster_rcnn.py index 3ab4da52899a7d122a68d2de17666addc8ae4849..f09ca047903964576517553a4b3705ce85d0dc12 100644 --- a/paddlex/cv/models/faster_rcnn.py +++ b/paddlex/cv/models/faster_rcnn.py @@ -376,16 +376,18 @@ class FasterRCNN(BaseAPI): return metrics @staticmethod - def _preprocess(images, transforms, model_type, class_name, thread_num=1): + def _preprocess(images, transforms, model_type, class_name, thread_pool=None): arrange_transforms( model_type=model_type, class_name=class_name, transforms=transforms, mode='test') - pool = ThreadPool(thread_num) - batch_data = pool.map(transforms, images) - pool.close() - pool.join() + if thread_pool is not None: + batch_data = thread_pool.map(transforms, images) + else: + batch_data = list() + for image in images: + batch_data.append(transforms(image)) padding_batch = generate_minibatch(batch_data) im = np.array([data[0] for data in padding_batch]) im_resize_info = np.array([data[1] for data in padding_batch]) @@ -453,14 +455,13 @@ class FasterRCNN(BaseAPI): return preds[0] - def batch_predict(self, img_file_list, transforms=None, thread_num=2): + def batch_predict(self, img_file_list, transforms=None): """预测。 Args: img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径 也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。 transforms (paddlex.det.transforms): 数据预处理操作。 - thread_num (int): 并发执行各图像预处理时的线程数。 Returns: list: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个预测结果由预测框类别标签、 @@ -477,7 +478,7 @@ class FasterRCNN(BaseAPI): transforms = self.test_transforms im, im_resize_info, im_shape = FasterRCNN._preprocess( img_file_list, transforms, self.model_type, - self.__class__.__name__, thread_num) + self.__class__.__name__, self.thread_pool) with fluid.scope_guard(self.scope): result = self.exe.run(self.test_prog, diff --git a/paddlex/cv/models/mask_rcnn.py b/paddlex/cv/models/mask_rcnn.py index 7f31cd530ff0d6660e65661531b442941c88a336..0869fec29ac9ff1cc503a9aa2d6ee2446eddb855 100644 --- a/paddlex/cv/models/mask_rcnn.py +++ b/paddlex/cv/models/mask_rcnn.py @@ -408,14 +408,13 @@ class MaskRCNN(FasterRCNN): return preds[0] - def batch_predict(self, img_file_list, transforms=None, thread_num=2): + def batch_predict(self, img_file_list, transforms=None): """预测。 Args: img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径 也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。 transforms (paddlex.det.transforms): 数据预处理操作。 - thread_num (int): 并发执行各图像预处理时的线程数。 Returns: dict: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个预测结果由预测框类别标签、预测框类别名称、 预测框坐标(坐标格式为[xmin, ymin, w, h])、 @@ -432,7 +431,7 @@ class MaskRCNN(FasterRCNN): transforms = self.test_transforms im, im_resize_info, im_shape = FasterRCNN._preprocess( img_file_list, transforms, self.model_type, - self.__class__.__name__, thread_num) + self.__class__.__name__, self.thread_pool) with fluid.scope_guard(self.scope): result = self.exe.run(self.test_prog, diff --git a/paddlex/cv/models/ppyolo.py b/paddlex/cv/models/ppyolo.py index eab6e9565adcbad559100b7b8aad031e5815c39d..3c6db9bffd597526e03359cc496993d452fe8e94 100644 --- a/paddlex/cv/models/ppyolo.py +++ b/paddlex/cv/models/ppyolo.py @@ -447,16 +447,18 @@ class PPYOLO(BaseAPI): return evaluate_metrics @staticmethod - def _preprocess(images, transforms, model_type, class_name, thread_num=1): + def _preprocess(images, transforms, model_type, class_name, thread_pool=None): arrange_transforms( model_type=model_type, class_name=class_name, transforms=transforms, mode='test') - pool = ThreadPool(thread_num) - batch_data = pool.map(transforms, images) - pool.close() - pool.join() + if thread_pool is not None: + batch_data = thread_pool.map(transforms, images) + else: + batch_data = list() + for image in images: + batch_data.append(transforms(image)) padding_batch = generate_minibatch(batch_data) im = np.array( [data[0] for data in padding_batch], @@ -520,14 +522,13 @@ class PPYOLO(BaseAPI): len(images), self.num_classes, self.labels) return preds[0] - def batch_predict(self, img_file_list, transforms=None, thread_num=2): + def batch_predict(self, img_file_list, transforms=None): """预测。 Args: img_file_list (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径,也可以是解码后的排列格式为(H,W,C) 且类型为float32且为BGR格式的数组。 transforms (paddlex.det.transforms): 数据预处理操作。 - thread_num (int): 并发执行各图像预处理时的线程数。 Returns: list: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个预测结果由预测框类别标签、 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、 @@ -543,7 +544,7 @@ class PPYOLO(BaseAPI): transforms = self.test_transforms im, im_size = PPYOLO._preprocess(img_file_list, transforms, self.model_type, - self.__class__.__name__, thread_num) + self.__class__.__name__, self.thread_pool) with fluid.scope_guard(self.scope): result = self.exe.run(self.test_prog, diff --git a/paddlex/cv/models/slim/prune_config.py b/paddlex/cv/models/slim/prune_config.py index d5e6325e805f6dda7987c1e0e909950e43aa5218..d85867e9cb3b921715c2a22aa7900f9c23a6491a 100644 --- a/paddlex/cv/models/slim/prune_config.py +++ b/paddlex/cv/models/slim/prune_config.py @@ -91,7 +91,23 @@ sensitivities_data = { 'DeepLabv3p_Xception65_aspp_decoder': 'https://bj.bcebos.com/paddlex/slim_prune/deeplab_xception65_with_aspp_decoder.sensitivities', 'DeepLabv3p_Xception41_aspp_decoder': - 'https://bj.bcebos.com/paddlex/slim_prune/deeplab_xception41_with_aspp_decoder.sensitivities' + 'https://bj.bcebos.com/paddlex/slim_prune/deeplab_xception41_with_aspp_decoder.sensitivities', + 'HRNet_W18_Seg': + 'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w18.sensitivities', + 'HRNet_W30_Seg': + 'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w30.sensitivities', + 'HRNet_W32_Seg': + 'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w32.sensitivities', + 'HRNet_W40_Seg': + 'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w40.sensitivities', + 'HRNet_W44_Seg': + 'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w44.sensitivities', + 'HRNet_W48_Seg': + 'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w48.sensitivities', + 'HRNet_W64_Seg': + 'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w64.sensitivities', + 'FastSCNN': + 'https://bj.bcebos.com/paddlex/slim_prune/fast_scnn.sensitivities' } @@ -105,6 +121,8 @@ def get_sensitivities(flag, model, save_dir): elif hasattr(model, 'encoder_with_aspp') or hasattr(model, 'enable_decoder'): model_type = model_type + '_' + 'aspp' + '_' + 'decoder' + if model_type.startswith('HRNet') and model.model_type == 'segmenter': + model_type = '{}_W{}_Seg'.format(model_type, model.width) if osp.isfile(flag): return flag elif flag == 'DEFAULT': @@ -244,6 +262,28 @@ def get_prune_params(model): if i in prune_names: prune_names.remove(i) + elif model_type.startswith('HRNet') and model.model_type == 'segmenter': + for param in program.global_block().all_parameters(): + if 'weight' not in param.name: + continue + prune_names.append(param.name) + params_not_prune = ['conv-1_weights'] + for i in params_not_prune: + if i in prune_names: + prune_names.remove(i) + + elif model_type.startswith('FastSCNN'): + for param in program.global_block().all_parameters(): + if 'weight' not in param.name: + continue + if 'dwise' in param.name or 'depthwise' in param.name or 'logit' in param.name: + continue + prune_names.append(param.name) + params_not_prune = ['classifier/weights'] + for i in params_not_prune: + if i in prune_names: + prune_names.remove(i) + elif model_type.startswith('DeepLabv3p'): for param in program.global_block().all_parameters(): if 'weight' not in param.name: diff --git a/paddlex/cv/nets/detection/yolo_v3.py b/paddlex/cv/nets/detection/yolo_v3.py index 01c729a4b673fc990ab4116092e3aeb0bf5587fe..b73cdc768737a54ff6b01eb7977c3c508ba5c0e3 100644 --- a/paddlex/cv/nets/detection/yolo_v3.py +++ b/paddlex/cv/nets/detection/yolo_v3.py @@ -311,7 +311,7 @@ class YOLOv3: def _upsample(self, input, scale=2, name=None): out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) + input=input, scale=float(scale), name=name, align_corners=False) return out def _detection_block(self, diff --git a/paddlex/cv/nets/hrnet.py b/paddlex/cv/nets/hrnet.py index 561c7594da2904632386c0d88e9d841c047fb2d2..d4fe2d5918632ecec85a3cbb4057ab3d49023f43 100644 --- a/paddlex/cv/nets/hrnet.py +++ b/paddlex/cv/nets/hrnet.py @@ -235,10 +235,13 @@ class HRNet(object): name=name + '_layer_' + str(i + 1) + '_' + str(j + 1)) if self.feature_maps == "stage4": y = fluid.layers.resize_bilinear( - input=y, out_shape=[height, width]) + input=y, + out_shape=[height, width], + align_corners=False, + align_mode=1) else: y = fluid.layers.resize_nearest( - input=y, scale=2**(j - i)) + input=y, scale=2**(j - i), align_corners=False) residual = fluid.layers.elementwise_add( x=residual, y=y, act=None) elif j < i: diff --git a/paddlex/deploy.py b/paddlex/deploy.py index ced22aee21e787c3ecf3e6b9e7d51b348ed27077..e7a9264240ff52007ad3480ed794064cc171320f 100644 --- a/paddlex/deploy.py +++ b/paddlex/deploy.py @@ -16,6 +16,7 @@ import os.path as osp import cv2 import numpy as np import yaml +import multiprocessing as mp import paddlex import paddle.fluid as fluid from paddlex.cv.transforms import build_transforms @@ -79,12 +80,21 @@ class Predictor: self.predictor = self.create_predictor(use_gpu, gpu_id, use_mkl, mkl_thread_num, use_trt, use_glog, memory_optimize) + # 线程池,在模型在预测时用于对输入数据以图片为单位进行并行处理 + # 主要用于batch_predict接口 + thread_num = mp.cpu_count() if mp.cpu_count() < 8 else 8 + self.thread_pool = mp.pool.ThreadPool(thread_num) + + def reset_thread_pool(self, thread_num): + self.thread_pool.close() + self.thread_pool.join() + self.thread_pool = mp.pool.ThreadPool(thread_num) def create_predictor(self, use_gpu=True, gpu_id=0, use_mkl=False, - mkl_thread_num=4, + mkl_thread_num=psutil.cpu_count(), use_trt=False, use_glog=False, memory_optimize=True): @@ -98,8 +108,9 @@ class Predictor: else: config.disable_gpu() if use_mkl: - config.enable_mkldnn() - config.set_cpu_math_library_num_threads(mkl_thread_num) + if self.model_name not in ["HRNet", "DeepLabv3p"]: + config.enable_mkldnn() + config.set_cpu_math_library_num_threads(mkl_thread_num) if use_glog: config.enable_glog_info() else: @@ -114,7 +125,7 @@ class Predictor: predictor = fluid.core.create_paddle_predictor(config) return predictor - def preprocess(self, image, thread_num=1): + def preprocess(self, image, thread_pool=None): """ 对图像做预处理 Args: @@ -128,7 +139,7 @@ class Predictor: self.transforms, self.model_type, self.model_name, - thread_num=thread_num) + thread_pool=thread_pool) res['image'] = im elif self.model_type == "detector": if self.model_name in ["PPYOLO", "YOLOv3"]: @@ -137,7 +148,7 @@ class Predictor: self.transforms, self.model_type, self.model_name, - thread_num=thread_num) + thread_pool=thread_pool) res['image'] = im res['im_size'] = im_size if self.model_name.count('RCNN') > 0: @@ -146,7 +157,7 @@ class Predictor: self.transforms, self.model_type, self.model_name, - thread_num=thread_num) + thread_pool=thread_pool) res['image'] = im res['im_info'] = im_resize_info res['im_shape'] = im_shape @@ -156,7 +167,7 @@ class Predictor: self.transforms, self.model_type, self.model_name, - thread_num=thread_num) + thread_pool=thread_pool) res['image'] = im res['im_info'] = im_info return res @@ -253,17 +264,16 @@ class Predictor: return results[0] - def batch_predict(self, image_list, topk=1, thread_num=2): + def batch_predict(self, image_list, topk=1): """ 图片预测 Args: image_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径 也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。 - thread_num (int): 并发执行各图像预处理时的线程数。 topk(int): 分类预测时使用,表示预测前topk的结果 """ - preprocessed_input = self.preprocess(image_list) + preprocessed_input = self.preprocess(image_list, self.thread_pool) model_pred = self.raw_predict(preprocessed_input) im_shape = None if 'im_shape' not in preprocessed_input else preprocessed_input[ 'im_shape'] diff --git a/setup.py b/setup.py index 57924649c89f783f1b6adf67f3b56065ef3d1400..36be43a351876d2fe2a4d2ce960f1d3eb34c932b 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ long_description = "PaddlePaddle Entire Process Development Toolkit" setuptools.setup( name="paddlex", - version='1.1.1', + version='1.1.4', author="paddlex", author_email="paddlex@baidu.com", description=long_description,