提交 144a0dce 编写于 作者: S syyxsxx

add openvino

上级 3066636d
......@@ -105,7 +105,10 @@ if (NOT WIN32)
endif()
set(DEPS ${DEPS} ${OpenCV_LIBS})
add_executable(classifier src/classifier.cpp src/transforms.cpp src/paddlex.cpp)
add_executable(classifier demo/classifier.cpp src/transforms.cpp src/paddlex.cpp)
ADD_DEPENDENCIES(classifier ext-yaml-cpp)
target_link_libraries(classifier ${DEPS})
add_executable(segmenter demo/segmenter.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
ADD_DEPENDENCIES(segmenter ext-yaml-cpp)
target_link_libraries(segmenter ${DEPS})
文件模式从 100644 更改为 100755
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
# PaddelX-openvino
paddlex-openvino提供将PaddleX训练好的模型,部署到装有openvino的硬件上,通过openvino实现对模型预测的加速
## 部署流程
**PaddleX到OoenVINO的部署流程如下**
PaddleX -> ONNX -> OpenVINO IR -> OpenVINO Inference Engine
### 部署环境
* Ubuntu* 16.04 (64-bit) with GCC* 5.4.0
* CMkae 3.12.3
* Python 3.7
* ONNX 1.5.0
* PaddleX 1.0
* OpenVINO 2020.3
**说明**:PaddleX安装请参考[PaddleX](https://github.com/PaddlePaddle/PaddleX/blob/release-v1.0/README.md) , OpenVINO相关请参考[OpenVINO](https://github.com/openvinotoolkit/openvino/blob/master/README.md)
请确保系统已经安装好上述基本软件,**下面所有示例以工作目录 `/root/projects/`演示**
### Step1 软件依赖
- OpenVINO:编译请参考 [编译文档](https://github.com/openvinotoolkit/openvino/blob/master/build-instruction.md)
- gflags:编译请参考 [编译文档](https://gflags.github.io/gflags/#download)
- opencv:编译请参考
[编译文档](https://docs.opencv.org/master/d7/d9f/tutorial_linux_install.html)
说明:/root/projects/openvino/scripts/bootstrap.sh提供了预编译版本下载,也可自行编译。
- ngraph:
说明:openvino编译的过程中会生成ngraph的lib文件,位于{openvino根目录}/bin/intel64/Release/lib/下。
### Step2: 编译
编译`cmake`的命令在`scripts/build.sh`中,请根据Step1中编译软件的实际情况修改主要参数,其主要内容说明如下:
```
# openvino预编译库的路径
OPENVINO_DIR=/path/to/openvino/inference_engine/
# gflags预编译库的路径
GFLAGS_DIR=/path/to/gflags/build/
# ngraph lib的路径,编译openvino时通常会生成
NGRAPH_LIB=/path/to/ngraph/lib/
# opencv预编译库的路径, 如果使用自带预编译版本可不修改
OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/
```
修改脚本设置好主要参数后,执行`build`脚本:
```shell
sh ./scripts/build.sh
```
### Step3: 模型转换
将PaddleX模型转换成ONNX模型:
```
paddlex --export_onnx --model_dir=/path/to/PaddleX_model --save_dir=/path/to/onnx_model --fixed_input_shape [w,h]
```
**说明** :onnx请使用1.5.0版本否则可能会出现模型转换错误
将生成的onnx模型转换为OpenVINO支持的格式
```
cd {openvino根目录}/model-optimizer
python mo_onnx.py --input_model /path/to/onnx_model --output_dir /path/to/openvino_model --input_shape [N,C,H,W]
```
**说明** :模型转换好后包括.xml和.bin两个文件,更多细节请参考[Model Optimizer文档](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html)
### Step4: 预测
编译成功后,分类任务的预测可执行程序为`classifier`,其主要命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
| --model_dir | Model Optimizer生成的.xml文件路径,请保证Model Optimizer生成的三个文件在同一路径下|
| --image | 要预测的图片文件路径 |
| --image_list | 按行存储图片路径的.txt文件 |
| --device | 运行的平台, 默认值为"CPU" |
| --cfg_dir | PaddleX model 的.yml配置文件 |
#### 样例
`样例一`
测试图片 `/path/to/test_img.jpeg`
```shell
./build/classifier --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_dir=/path/to/PadlleX_model.yml
```
`样例二`:
预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下:
```
/path/to/images/test_img1.jpeg
/path/to/images/test_img2.jpeg
...
/path/to/images/test_imgn.jpeg
```
```shell
./build/classifier --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt --cfg_dir=/path/to/PadlleX_model.yml
```
......@@ -6,8 +6,9 @@ message("${CMAKE_BUILD_TYPE}")
ExternalProject_Add(
ext-yaml-cpp
URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip
URL_MD5 9542d6de397d1fbd649ed468cb5850e6
#URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip
#URL_MD5 9542d6de397d1fbd649ed468cb5850e6
URL "/wangsiyuan06/PaddleX/deploy/openvino/deps/yaml-cpp.zip"
CMAKE_ARGS
-DYAML_CPP_BUILD_TESTS=OFF
-DYAML_CPP_BUILD_TOOLS=OFF
......
......@@ -22,7 +22,7 @@
#include "include/paddlex/paddlex.h"
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_string(cfg_dir, "", "Path of inference model");
DEFINE_string(cfg_dir, "", "Path of PaddelX model yml file");
DEFINE_string(device, "CPU", "Device name");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(image_list, "", "Path of test image list file");
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <utility>
#include "include/paddlex/paddlex.h"
#include "include/paddlex/visualize.h"
DEFINE_string(model_dir, "", "Path of openvino model xml file");
DEFINE_string(cfg_dir, "", "Path of PaddleX model yaml file");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(image_list, "", "Path of test image list file");
DEFINE_string(device, "CPU", "Device name");
DEFINE_string(save_dir, "", "Path to save visualized image");
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir == "") {
std::cerr << "--model_dir need to be defined" << std::endl;
return -1;
}
if (FLAGS_cfg_dir == "") {
std::cerr << "--cfg_dir need to be defined" << std::endl;
return -1;
}
if (FLAGS_image == "" & FLAGS_image_list == "") {
std::cerr << "--image or --image_list need to be defined" << std::endl;
return -1;
}
//
PaddleX::Model model;
model.Init(FLAGS_model_dir, FLAGS_cfg_dir, FLAGS_device);
int imgs = 1;
auto colormap = PaddleX::GenerateColorMap(model.labels.size());
if (FLAGS_image_list != "") {
std::ifstream inf(FLAGS_image_list);
if (!inf) {
std::cerr << "Fail to open file " << FLAGS_image_list <<std::endl;
return -1;
}
std::string image_path;
while (getline(inf, image_path)) {
PaddleX::SegResult result;
cv::Mat im = cv::imread(image_path, 1);
model.predict(im, &result);
if(FLAGS_save_dir != ""){
cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, image_path);
cv::imwrite(save_path, vis_img);
std::cout << "Visualized output saved as " << save_path << std::endl;
}
}
}else{
PaddleX::SegResult result;
cv::Mat im = cv::imread(FLAGS_image, 1);
std::cout << "predict start" << std::endl;
model.predict(im, &result);
std::cout << "predict done" << std::endl;
if(FLAGS_save_dir != ""){
cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
cv::imwrite(save_path, vis_img);
std::cout << "Visualized` output saved as " << save_path << std::endl;
}
result.clear();
}
return 0;
}
文件模式从 100644 更改为 100755
......@@ -48,15 +48,18 @@ class Model {
bool load_config(const std::string& model_dir);
bool preprocess(cv::Mat* input_im);
bool preprocess(cv::Mat* input_im, ImageBlob* inputs);
bool predict(const cv::Mat& im, ClsResult* result);
bool predict(const cv::Mat& im, SegResult* result);
std::string type;
std::string name;
std::vector<std::string> labels;
std::map<int, std::string> labels;
Transforms transforms_;
Blob::Ptr inputs_;
ImageBlob inputs_;
Blob::Ptr output_;
CNNNetwork network_;
ExecutableNetwork executable_network_;
......
......@@ -61,7 +61,7 @@ class DetResult : public BaseResult {
class SegResult : public BaseResult {
public:
Mask<int64_t> label_map;
Mask<int> label_map;
Mask<float> score_map;
void clear() {
label_map.clear();
......
......@@ -31,11 +31,38 @@ using namespace InferenceEngine;
namespace PaddleX {
/*
* @brief
* This class represents object for storing all preprocessed data
* */
class ImageBlob {
public:
// Original image height and width
std::vector<int> ori_im_size_ = std::vector<int>(2);
// Newest image height and width after process
std::vector<int> new_im_size_ = std::vector<int>(2);
// Image height and width before resize
std::vector<std::vector<int>> im_size_before_resize_;
// Reshape order
std::vector<std::string> reshape_order_;
// Resize scale
float scale = 1.0;
// Buffer for image data after preprocessing
Blob::Ptr blob;
void clear() {
im_size_before_resize_.clear();
reshape_order_.clear();
}
};
// Abstraction of preprocessing opration class
class Transform {
public:
virtual void Init(const YAML::Node& item) = 0;
virtual bool Run(cv::Mat* im) = 0;
virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
};
class Normalize : public Transform {
......@@ -45,7 +72,7 @@ class Normalize : public Transform {
std_ = item["std"].as<std::vector<float>>();
}
virtual bool Run(cv::Mat* im);
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
std::vector<float> mean_;
......@@ -62,7 +89,7 @@ class ResizeByShort : public Transform {
max_size_ = -1;
}
};
virtual bool Run(cv::Mat* im);
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
float GenerateScale(const cv::Mat& im);
......@@ -70,6 +97,55 @@ class ResizeByShort : public Transform {
int max_size_;
};
/*
* @brief
* This class execute resize by long operation on image matrix. At first, it resizes
* the long side of image matrix to specified length. Accordingly, the short side
* will be resized in the same proportion.
* */
class ResizeByLong : public Transform {
public:
virtual void Init(const YAML::Node& item) {
long_size_ = item["long_size"].as<int>();
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int long_size_;
};
/*
* @brief
* This class execute resize operation on image matrix. It resizes width and height
* to specified length.
* */
class Resize : public Transform {
public:
virtual void Init(const YAML::Node& item) {
if (item["interp"].IsDefined()) {
interp_ = item["interp"].as<std::string>();
}
if (item["target_size"].IsScalar()) {
height_ = item["target_size"].as<int>();
width_ = item["target_size"].as<int>();
} else if (item["target_size"].IsSequence()) {
std::vector<int> target_size = item["target_size"].as<std::vector<int>>();
width_ = target_size[0];
height_ = target_size[1];
}
if (height_ <= 0 || width_ <= 0) {
std::cerr << "[Resize] target_size should greater than 0" << std::endl;
exit(-1);
}
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int height_;
int width_;
std::string interp_;
};
class CenterCrop : public Transform {
public:
......@@ -83,18 +159,53 @@ class CenterCrop : public Transform {
height_ = crop_size[1];
}
}
virtual bool Run(cv::Mat* im);
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int height_;
int width_;
};
/*
* @brief
* This class execute padding operation on image matrix. It makes border on edge
* of image matrix.
* */
class Padding : public Transform {
public:
virtual void Init(const YAML::Node& item) {
if (item["coarsest_stride"].IsDefined()) {
coarsest_stride_ = item["coarsest_stride"].as<int>();
if (coarsest_stride_ < 1) {
std::cerr << "[Padding] coarest_stride should greater than 0"
<< std::endl;
exit(-1);
}
}
if (item["target_size"].IsDefined()) {
if (item["target_size"].IsScalar()) {
width_ = item["target_size"].as<int>();
height_ = item["target_size"].as<int>();
} else if (item["target_size"].IsSequence()) {
width_ = item["target_size"].as<std::vector<int>>()[0];
height_ = item["target_size"].as<std::vector<int>>()[1];
}
}
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
int coarsest_stride_ = -1;
int width_ = 0;
int height_ = 0;
};
class Transforms {
public:
void Init(const YAML::Node& node, bool to_rgb = true);
std::shared_ptr<Transform> CreateTransform(const std::string& name);
bool Run(cv::Mat* im, Blob::Ptr blob);
bool Run(cv::Mat* im, ImageBlob* data);
private:
std::vector<std::shared_ptr<Transform>> transforms_;
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <map>
#include <vector>
#ifdef _WIN32
#include <direct.h>
#include <io.h>
#else // Linux/Unix
#include <dirent.h>
#include <sys/io.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#endif
#include <string>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "include/paddlex/results.h"
#ifdef _WIN32
#define OS_PATH_SEP "\\"
#else
#define OS_PATH_SEP "/"
#endif
namespace PaddleX {
/*
* @brief
* Generate visualization colormap for each class
*
* @param number of class
* @return color map, the size of vector is 3 * num_class
* */
std::vector<int> GenerateColorMap(int num_class);
/*
* @brief
* Visualize the detection result
*
* @param img: initial image matrix
* @param results: the detection result
* @param labels: label map
* @param colormap: visualization color map
* @return visualized image matrix
* */
cv::Mat Visualize(const cv::Mat& img,
const DetResult& results,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap,
float threshold = 0.5);
/*
* @brief
* Visualize the segmentation result
*
* @param img: initial image matrix
* @param results: the detection result
* @param labels: label map
* @param colormap: visualization color map
* @return visualized image matrix
* */
cv::Mat Visualize(const cv::Mat& img,
const SegResult& result,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap);
/*
* @brief
* generate save path for visualized image matrix
*
* @param save_dir: directory for saving visualized image matrix
* @param file_path: sourcen image file path
* @return path of saving visualized result
* */
std::string generate_save_path(const std::string& save_dir,
const std::string& file_path);
} // namespace PaddleX
import os
from six import text_type as _text_type
import argparse
import sys
from utils import logging
import paddlex as pdx
def arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_dir",
"-m",
type=_text_type,
default=None,
help="define model directory path")
parser.add_argument(
"--save_dir",
"-s",
type=_text_type,
default=None,
help="path to save inference model")
parser.add_argument(
"--fixed_input_shape",
"-fs",
default=None,
help="export openvino model with input shape:[h,w]")
return parser
def export_openvino_model(model, args):
if model.model_type == "detector" or model.__class__.__name__ == "FastSCNN":
logging.error(
"Only image classifier models and semantic segmentation models(except FastSCNN) are supported to export to openvino")
try:
import x2paddle
if x2paddle.__version__ < '0.7.4':
logging.error("You need to upgrade x2paddle >= 0.7.4")
except:
logging.error(
"You need to install x2paddle first, pip install x2paddle>=0.7.4")
from x2paddle.op_mapper.paddle_op_mapper import PaddleOpMapper
mapper = PaddleOpMapper()
mapper.convert(model.test_prog, args.save_dir)
import mo.main as mo
from mo.utils.cli_parser import get_onnx_cli_parser
onnx_parser = get_onnx_cli_parser()
onnx_parser.add_argument("--model_dir",type=_text_type)
onnx_parser.add_argument("--save_dir",type=_text_type)
onnx_parser.add_argument("--fixed_input_shape")
onnx_input = os.path.join(args.save_dir, 'x2paddle_model.onnx')
onnx_parser.set_defaults(input_model=onnx_input)
onnx_parser.set_defaults(output_dir=args.save_dir)
shape = '[1,3,'
shape = shape + args.fixed_input_shape[1:]
onnx_parser.set_defaults(input_shape = shape)
mo.main(onnx_parser,'onnx')
def main():
parser = arg_parser()
args = parser.parse_args()
assert args.model_dir is not None, "--model_dir should be defined while exporting openvino model"
assert args.save_dir is not None, "--save_dir should be defined to create openvino model"
model = pdx.load_model(args.model_dir)
if model.status == "Normal" or model.status == "Prune":
logging.error(
"Only support inference model, try to export model first as below,",
exit=False)
export_openvino_model(model, args)
if __name__ == "__main__":
main()
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import argparse
import deploy
def arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_dir",
"-m",
type=str,
default=None,
help="path to openvino model .xml file")
parser.add_argument(
"--device",
"-d",
type=str,
default='CPU',
help="Specify the target device to infer on:[CPU, GPU, FPGA, HDDL, MYRIAD,HETERO]"
"Default value is CPU")
parser.add_argument(
"--img",
"-i",
type=str,
default=None,
help="path to an image files")
parser.add_argument(
"--img_list",
"-l",
type=str,
default=None,
help="Path to a imglist")
parser.add_argument(
"--cfg_dir",
"-c",
type=str,
default=None,
help="Path to PaddelX model yml file")
return parser
def main():
parser = arg_parser()
args = parser.parse_args()
model_xml = args.model_dir
model_yaml = args.cfg_dir
#model init
if("CPU" not in args.device):
predictor = deploy.Predictor(model_xml,model_yaml,args.device)
else:
predictor = deploy.Predictor(model_xml,model_yaml)
#predict
if(args.img_list != None):
f = open(args.img_list)
lines = f.readlines()
for im_path in lines:
print(im_path)
predictor.predict(im_path.strip('\n'))
f.close()
else:
im_path = args.img
predictor.predict(im_path)
if __name__ == "__main__":
main()
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import os.path as osp
import cv2
import numpy as np
import yaml
from six import text_type as _text_type
from openvino.inference_engine import IECore
from utils import logging
class Predictor:
def __init__(self,
model_xml,
model_yaml,
device="CPU"):
self.device = device
if not osp.exists(model_xml):
logging.error("model xml file is not exists in {}".format(model_xml))
self.model_xml = model_xml
self.model_bin = osp.splitext(model_xml)[0] + ".bin"
if not osp.exists(model_yaml):
logging,error("model yaml file is not exists in {}".format(model_yaml))
with open(model_yaml) as f:
self.info = yaml.load(f.read(), Loader=yaml.Loader)
self.model_type = self.info['_Attributes']['model_type']
self.model_name = self.info['Model']
self.num_classes = self.info['_Attributes']['num_classes']
self.labels = self.info['_Attributes']['labels']
if self.info['Model'] == 'MaskRCNN':
if self.info['_init_params']['with_fpn']:
self.mask_head_resolution = 28
else:
self.mask_head_resolution = 14
transforms_mode = self.info.get('TransformsMode', 'RGB')
if transforms_mode == 'RGB':
to_rgb = True
else:
to_rgb = False
self.transforms = self.build_transforms(self.info['Transforms'], to_rgb)
self.predictor, self.net = self.create_predictor()
def create_predictor(self):
#initialization for specified device
logging.info("Creating Inference Engine")
ie = IECore()
logging.info("Loading network files:\n\t{}\n\t{}".format(self.model_xml, self.model_bin))
net = ie.read_network(model=self.model_xml,weights=self.model_bin)
net.batch_size = 1
exec_net = ie.load_network(network=net,device_name=self.device)
return exec_net, net
def build_transforms(self, transforms_info, to_rgb=True):
if self.model_type == "classifier":
import transforms.cls_transforms as transforms
elif self.model_type == "detector":
import transforms.det_transforms as transforms
elif self.model_type == "segmenter":
import transforms.seg_transforms as transforms
op_list = list()
for op_info in transforms_info:
op_name = list(op_info.keys())[0]
op_attr = op_info[op_name]
if not hasattr(transforms, op_name):
raise Exception(
"There's no operator named '{}' in transforms of {}".
format(op_name, self.model_type))
op_list.append(getattr(transforms, op_name)(**op_attr))
eval_transforms = transforms.Compose(op_list)
if hasattr(eval_transforms, 'to_rgb'):
eval_transforms.to_rgb = to_rgb
self.arrange_transforms(eval_transforms)
return eval_transforms
def arrange_transforms(self, eval_transforms):
if self.model_type == 'classifier':
import transforms.cls_transforms as transforms
arrange_transform = transforms.ArrangeClassifier
elif self.model_type == 'segmenter':
import transforms.det_transforms as transforms
arrange_transform = transforms.ArrangeSegmenter
elif self.model_type == 'detector':
import transforms.seg_transforms as transforms
arrange_name = 'Arrange{}'.format(self.model_name)
arrange_transform = getattr(transforms, arrange_name)
else:
raise Exception("Unrecognized model type: {}".format(
self.model_type))
if type(eval_transforms.transforms[-1]).__name__.startswith('Arrange'):
eval_transforms.transforms[-1] = arrange_transform(mode='test')
else:
eval_transforms.transforms.append(arrange_transform(mode='test'))
def raw_predict(self, images):
input_blob = next(iter(self.net.inputs))
out_blob = next(iter(self.net.outputs))
#Start sync inference
logging.info("Starting inference in synchronous mode")
res = self.predictor.infer(inputs={input_blob:images})
#Processing output blob
logging.info("Processing output blob")
res = res[out_blob]
print("res: ",res)
def preprocess(self, image):
if self.model_type == "classifier":
im, = self.transforms(image)
im = np.expand_dims(im, axis=0).copy()
#res['image'] = im
'''elif self.model_type == "detector":
if self.model_name == "YOLOv3":
im, im_shape = self.transforms(image)
im = np.expand_dims(im, axis=0).copy()
im_shape = np.expand_dims(im_shape, axis=0).copy()
res['image'] = im
res['im_size'] = im_shape
if self.model_name.count('RCNN') > 0:
im, im_resize_info, im_shape = self.transforms(image)
im = np.expand_dims(im, axis=0).copy()
im_resize_info = np.expand_dims(im_resize_info, axis=0).copy()
im_shape = np.expand_dims(im_shape, axis=0).copy()
res['image'] = im
res['im_info'] = im_resize_info
res['im_shape'] = im_shape
elif self.model_type == "segmenter":
im, im_info = self.transforms(image)
im = np.expand_dims(im, axis=0).copy()
res['image'] = im
res['im_info'] = im_info'''
return im
def predict(self, image, topk=1, threshold=0.5):
preprocessed_input = self.preprocess(image)
model_pred = self.raw_predict(preprocessed_input)
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import cls_transforms
from . import det_transforms
from . import seg_transforms
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import random
import math
import cv2
import scipy
def bbox_area(src_bbox):
if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
return 0.
else:
width = src_bbox[2] - src_bbox[0]
height = src_bbox[3] - src_bbox[1]
return width * height
def jaccard_overlap(sample_bbox, object_bbox):
if sample_bbox[0] >= object_bbox[2] or \
sample_bbox[2] <= object_bbox[0] or \
sample_bbox[1] >= object_bbox[3] or \
sample_bbox[3] <= object_bbox[1]:
return 0
intersect_xmin = max(sample_bbox[0], object_bbox[0])
intersect_ymin = max(sample_bbox[1], object_bbox[1])
intersect_xmax = min(sample_bbox[2], object_bbox[2])
intersect_ymax = min(sample_bbox[3], object_bbox[3])
intersect_size = (intersect_xmax - intersect_xmin) * (
intersect_ymax - intersect_ymin)
sample_bbox_size = bbox_area(sample_bbox)
object_bbox_size = bbox_area(object_bbox)
overlap = intersect_size / (
sample_bbox_size + object_bbox_size - intersect_size)
return overlap
def iou_matrix(a, b):
tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
area_o = (area_a[:, np.newaxis] + area_b - area_i)
return area_i / (area_o + 1e-10)
def crop_box_with_center_constraint(box, crop):
cropped_box = box.copy()
cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2])
cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:])
cropped_box[:, :2] -= crop[:2]
cropped_box[:, 2:] -= crop[:2]
centers = (box[:, :2] + box[:, 2:]) / 2
valid = np.logical_and(crop[:2] <= centers, centers < crop[2:]).all(axis=1)
valid = np.logical_and(
valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
return cropped_box, np.where(valid)[0]
def is_poly(segm):
if not isinstance(segm, (list, dict)):
raise Exception("Invalid segm type: {}".format(type(segm)))
return isinstance(segm, list)
def crop_image(img, crop):
x1, y1, x2, y2 = crop
return img[y1:y2, x1:x2, :]
def crop_segms(segms, valid_ids, crop, height, width):
def _crop_poly(segm, crop):
xmin, ymin, xmax, ymax = crop
crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
crop_p = np.array(crop_coord).reshape(4, 2)
crop_p = Polygon(crop_p)
crop_segm = list()
for poly in segm:
poly = np.array(poly).reshape(len(poly) // 2, 2)
polygon = Polygon(poly)
if not polygon.is_valid:
exterior = polygon.exterior
multi_lines = exterior.intersection(exterior)
polygons = shapely.ops.polygonize(multi_lines)
polygon = MultiPolygon(polygons)
multi_polygon = list()
if isinstance(polygon, MultiPolygon):
multi_polygon = copy.deepcopy(polygon)
else:
multi_polygon.append(copy.deepcopy(polygon))
for per_polygon in multi_polygon:
inter = per_polygon.intersection(crop_p)
if not inter:
continue
if isinstance(inter, (MultiPolygon, GeometryCollection)):
for part in inter:
if not isinstance(part, Polygon):
continue
part = np.squeeze(
np.array(part.exterior.coords[:-1]).reshape(1, -1))
part[0::2] -= xmin
part[1::2] -= ymin
crop_segm.append(part.tolist())
elif isinstance(inter, Polygon):
crop_poly = np.squeeze(
np.array(inter.exterior.coords[:-1]).reshape(1, -1))
crop_poly[0::2] -= xmin
crop_poly[1::2] -= ymin
crop_segm.append(crop_poly.tolist())
else:
continue
return crop_segm
def _crop_rle(rle, crop, height, width):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, height, width)
mask = mask_util.decode(rle)
mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
crop_segms = []
for id in valid_ids:
segm = segms[id]
if is_poly(segm):
import copy
import shapely.ops
import logging
from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
logging.getLogger("shapely").setLevel(logging.WARNING)
# Polygon format
crop_segms.append(_crop_poly(segm, crop))
else:
# RLE format
import pycocotools.mask as mask_util
crop_segms.append(_crop_rle(segm, crop, height, width))
return crop_segms
def expand_segms(segms, x, y, height, width, ratio):
def _expand_poly(poly, x, y):
expanded_poly = np.array(poly)
expanded_poly[0::2] += x
expanded_poly[1::2] += y
return expanded_poly.tolist()
def _expand_rle(rle, x, y, height, width, ratio):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, height, width)
mask = mask_util.decode(rle)
expanded_mask = np.full((int(height * ratio), int(width * ratio)),
0).astype(mask.dtype)
expanded_mask[y:y + height, x:x + width] = mask
rle = mask_util.encode(
np.array(expanded_mask, order='F', dtype=np.uint8))
return rle
expanded_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
expanded_segms.append([_expand_poly(poly, x, y) for poly in segm])
else:
# RLE format
import pycocotools.mask as mask_util
expanded_segms.append(
_expand_rle(segm, x, y, height, width, ratio))
return expanded_segms
def box_horizontal_flip(bboxes, width):
oldx1 = bboxes[:, 0].copy()
oldx2 = bboxes[:, 2].copy()
bboxes[:, 0] = width - oldx2 - 1
bboxes[:, 2] = width - oldx1 - 1
if bboxes.shape[0] != 0 and (bboxes[:, 2] < bboxes[:, 0]).all():
raise ValueError(
"RandomHorizontalFlip: invalid box, x2 should be greater than x1")
return bboxes
def segms_horizontal_flip(segms, height, width):
def _flip_poly(poly, width):
flipped_poly = np.array(poly)
flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
return flipped_poly.tolist()
def _flip_rle(rle, height, width):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects([rle], height, width)
mask = mask_util.decode(rle)
mask = mask[:, ::-1]
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
flipped_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
flipped_segms.append([_flip_poly(poly, width) for poly in segm])
else:
# RLE format
import pycocotools.mask as mask_util
flipped_segms.append(_flip_rle(segm, height, width))
return flipped_segms
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .ops import *
from .imgaug_support import execute_imgaug
import random
import os.path as osp
import numpy as np
from PIL import Image, ImageEnhance
import utils.logging as logging
class ClsTransform:
"""分类Transform的基类
"""
def __init__(self):
pass
class Compose(ClsTransform):
"""根据数据预处理/增强算子对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强算子。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
def __init__(self, transforms):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
if len(transforms) < 1:
raise ValueError('The length of transforms ' + \
'must be equal or larger than 1!')
self.transforms = transforms
# 检查transforms里面的操作,目前支持PaddleX定义的或者是imgaug操作
for op in self.transforms:
if not isinstance(op, ClsTransform):
import imgaug.augmenters as iaa
if not isinstance(op, iaa.Augmenter):
raise Exception(
"Elements in transforms should be defined in 'paddlex.cls.transforms' or class of imgaug.augmenters.Augmenter, see docs here: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/"
)
def __call__(self, im, label=None):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 根据网络所需字段所组成的tuple;
字段由transforms中的最后一个数据预处理操作决定。
"""
if isinstance(im, np.ndarray):
if len(im.shape) != 3:
raise Exception(
"im should be 3-dimension, but now is {}-dimensions".
format(len(im.shape)))
else:
try:
im = cv2.imread(im).astype('float32')
except:
raise TypeError('Can\'t read The image file {}!'.format(im))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
for op in self.transforms:
if isinstance(op, ClsTransform):
outputs = op(im, label)
im = outputs[0]
if len(outputs) == 2:
label = outputs[1]
else:
import imgaug.augmenters as iaa
if isinstance(op, iaa.Augmenter):
im = execute_imgaug(op, im)
outputs = (im, )
if label is not None:
outputs = (im, label)
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class RandomCrop(ClsTransform):
"""对图像进行随机剪裁,模型训练时的数据增强操作。
1. 根据lower_scale、lower_ratio、upper_ratio计算随机剪裁的高、宽。
2. 根据随机剪裁的高、宽随机选取剪裁的起始点。
3. 剪裁图像。
4. 调整剪裁后的图像的大小到crop_size*crop_size。
Args:
crop_size (int): 随机裁剪后重新调整的目标边长。默认为224。
lower_scale (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。
lower_ratio (float): 宽变换比例的最小限制。默认为3. / 4。
upper_ratio (float): 宽变换比例的最大限制。默认为4. / 3。
"""
def __init__(self,
crop_size=224,
lower_scale=0.08,
lower_ratio=3. / 4,
upper_ratio=4. / 3):
self.crop_size = crop_size
self.lower_scale = lower_scale
self.lower_ratio = lower_ratio
self.upper_ratio = upper_ratio
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
im = random_crop(im, self.crop_size, self.lower_scale,
self.lower_ratio, self.upper_ratio)
if label is None:
return (im, )
else:
return (im, label)
class RandomHorizontalFlip(ClsTransform):
"""以一定的概率对图像进行随机水平翻转,模型训练时的数据增强操作。
Args:
prob (float): 随机水平翻转的概率。默认为0.5。
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
if random.random() < self.prob:
im = horizontal_flip(im)
if label is None:
return (im, )
else:
return (im, label)
class RandomVerticalFlip(ClsTransform):
"""以一定的概率对图像进行随机垂直翻转,模型训练时的数据增强操作。
Args:
prob (float): 随机垂直翻转的概率。默认为0.5。
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
if random.random() < self.prob:
im = vertical_flip(im)
if label is None:
return (im, )
else:
return (im, label)
class Normalize(ClsTransform):
"""对图像进行标准化。
1. 对图像进行归一化到区间[0.0, 1.0]。
2. 对图像进行减均值除以标准差操作。
Args:
mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
"""
def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
self.mean = mean
self.std = std
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std)
if label is None:
return (im, )
else:
return (im, label)
class ResizeByShort(ClsTransform):
"""根据图像短边对图像重新调整大小(resize)。
1. 获取图像的长边和短边长度。
2. 根据短边与short_size的比例,计算长边的目标长度,
此时高、宽的resize比例为short_size/原图短边长度。
3. 如果max_size>0,调整resize比例:
如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度;
4. 根据调整大小的比例对图像进行resize。
Args:
short_size (int): 调整大小后的图像目标短边长度。默认为256。
max_size (int): 长边目标长度的最大限制。默认为-1。
"""
def __init__(self, short_size=256, max_size=-1):
self.short_size = short_size
self.max_size = max_size
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
im, (resized_width, resized_height),
interpolation=cv2.INTER_LINEAR)
if label is None:
return (im, )
else:
return (im, label)
class CenterCrop(ClsTransform):
"""以图像中心点扩散裁剪长宽为`crop_size`的正方形
1. 计算剪裁的起始点。
2. 剪裁图像。
Args:
crop_size (int): 裁剪的目标边长。默认为224。
"""
def __init__(self, crop_size=224):
self.crop_size = crop_size
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
im = center_crop(im, self.crop_size)
if label is None:
return (im, )
else:
return (im, label)
class RandomRotate(ClsTransform):
def __init__(self, rotate_range=30, prob=0.5):
"""以一定的概率对图像在[-rotate_range, rotaterange]角度范围内进行旋转,模型训练时的数据增强操作。
Args:
rotate_range (int): 旋转度数的范围。默认为30。
prob (float): 随机旋转的概率。默认为0.5。
"""
self.rotate_range = rotate_range
self.prob = prob
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
rotate_lower = -self.rotate_range
rotate_upper = self.rotate_range
im = im.astype('uint8')
im = Image.fromarray(im)
if np.random.uniform(0, 1) < self.prob:
im = rotate(im, rotate_lower, rotate_upper)
im = np.asarray(im).astype('float32')
if label is None:
return (im, )
else:
return (im, label)
class RandomDistort(ClsTransform):
"""以一定的概率对图像进行随机像素内容变换,模型训练时的数据增强操作。
1. 对变换的操作顺序进行随机化操作。
2. 按照1中的顺序以一定的概率对图像在范围[-range, range]内进行随机像素内容变换。
Args:
brightness_range (float): 明亮度因子的范围。默认为0.9。
brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
contrast_range (float): 对比度因子的范围。默认为0.9。
contrast_prob (float): 随机调整对比度的概率。默认为0.5。
saturation_range (float): 饱和度因子的范围。默认为0.9。
saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
hue_range (int): 色调因子的范围。默认为18。
hue_prob (float): 随机调整色调的概率。默认为0.5。
"""
def __init__(self,
brightness_range=0.9,
brightness_prob=0.5,
contrast_range=0.9,
contrast_prob=0.5,
saturation_range=0.9,
saturation_prob=0.5,
hue_range=18,
hue_prob=0.5):
self.brightness_range = brightness_range
self.brightness_prob = brightness_prob
self.contrast_range = contrast_range
self.contrast_prob = contrast_prob
self.saturation_range = saturation_range
self.saturation_prob = saturation_prob
self.hue_range = hue_range
self.hue_prob = hue_prob
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
brightness_lower = 1 - self.brightness_range
brightness_upper = 1 + self.brightness_range
contrast_lower = 1 - self.contrast_range
contrast_upper = 1 + self.contrast_range
saturation_lower = 1 - self.saturation_range
saturation_upper = 1 + self.saturation_range
hue_lower = -self.hue_range
hue_upper = self.hue_range
ops = [brightness, contrast, saturation, hue]
random.shuffle(ops)
params_dict = {
'brightness': {
'brightness_lower': brightness_lower,
'brightness_upper': brightness_upper
},
'contrast': {
'contrast_lower': contrast_lower,
'contrast_upper': contrast_upper
},
'saturation': {
'saturation_lower': saturation_lower,
'saturation_upper': saturation_upper
},
'hue': {
'hue_lower': hue_lower,
'hue_upper': hue_upper
}
}
prob_dict = {
'brightness': self.brightness_prob,
'contrast': self.contrast_prob,
'saturation': self.saturation_prob,
'hue': self.hue_prob,
}
for id in range(len(ops)):
params = params_dict[ops[id].__name__]
prob = prob_dict[ops[id].__name__]
params['im'] = im
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
if label is None:
return (im, )
else:
return (im, label)
class ArrangeClassifier(ClsTransform):
"""获取训练/验证/预测所需信息。注意:此操作不需用户自己显示调用
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
"""
def __init__(self, mode=None):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode must be in ['train', 'eval', 'test', 'quant']!")
self.mode = mode
def __call__(self, im, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
Returns:
tuple: 当mode为'train'或'eval'时,返回(im, label),分别对应图像np.ndarray数据、
图像类别id;当mode为'test'或'quant'时,返回(im, ),对应图像np.ndarray数据。
"""
im = permute(im, False).astype('float32')
if self.mode == 'train' or self.mode == 'eval':
outputs = (im, label)
else:
outputs = (im, )
return outputs
class ComposedClsTransforms(Compose):
""" 分类模型的基础Transforms流程,具体如下
训练阶段:
1. 随机从图像中crop一块子图,并resize成crop_size大小
2. 将1的输出按0.5的概率随机进行水平翻转
3. 将图像进行归一化
验证/预测阶段:
1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14
2. 从图像中心crop出一个大小为crop_size的图像
3. 将图像进行归一化
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
crop_size(int|list): 输入模型里的图像大小
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
crop_size=[224, 224],
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
width = crop_size
if isinstance(crop_size, list):
if crop_size[0] != crop_size[1]:
raise Exception(
"In classifier model, width and height should be equal, please modify your parameter `crop_size`"
)
width = crop_size[0]
if width % 32 != 0:
raise Exception(
"In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`"
)
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5),
Normalize(
mean=mean, std=std)
]
else:
# 验证/预测时的transforms
transforms = [
ResizeByShort(short_size=int(width * 1.14)),
CenterCrop(crop_size=width), Normalize(
mean=mean, std=std)
]
super(ComposedClsTransforms, self).__init__(transforms)
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
import random
import os.path as osp
import numpy as np
import cv2
from PIL import Image, ImageEnhance
from .imgaug_support import execute_imgaug
from .ops import *
from .box_utils import *
import utils.logging as logging
class DetTransform:
"""检测数据处理基类
"""
def __init__(self):
pass
class Compose(DetTransform):
"""根据数据预处理/增强列表对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强列表。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
def __init__(self, transforms):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
if len(transforms) < 1:
raise ValueError('The length of transforms ' + \
'must be equal or larger than 1!')
self.transforms = transforms
self.use_mixup = False
for t in self.transforms:
if type(t).__name__ == 'MixupImage':
self.use_mixup = True
# 检查transforms里面的操作,目前支持PaddleX定义的或者是imgaug操作
for op in self.transforms:
if not isinstance(op, DetTransform):
import imgaug.augmenters as iaa
if not isinstance(op, iaa.Augmenter):
raise Exception(
"Elements in transforms should be defined in 'paddlex.det.transforms' or class of imgaug.augmenters.Augmenter, see docs here: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/"
)
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息,dict中的字段如下:
- im_id (np.ndarray): 图像序列号,形状为(1,)。
- image_shape (np.ndarray): 图像原始大小,形状为(2,),
image_shape[0]为高,image_shape[1]为宽。
- mixup (list): list为[im, im_info, label_info],分别对应
与当前图像进行mixup的图像np.ndarray数据、图像相关信息、标注框相关信息;
注意,当前epoch若无需进行mixup,则无该字段。
label_info (dict): 存储与标注框相关的信息,dict中的字段如下:
- gt_bbox (np.ndarray): 真实标注框坐标[x1, y1, x2, y2],形状为(n, 4),
其中n代表真实标注框的个数。
- gt_class (np.ndarray): 每个真实标注框对应的类别序号,形状为(n, 1),
其中n代表真实标注框的个数。
- gt_score (np.ndarray): 每个真实标注框对应的混合得分,形状为(n, 1),
其中n代表真实标注框的个数。
- gt_poly (list): 每个真实标注框内的多边形分割区域,每个分割区域由点的x、y坐标组成,
长度为n,其中n代表真实标注框的个数。
- is_crowd (np.ndarray): 每个真实标注框中是否是一组对象,形状为(n, 1),
其中n代表真实标注框的个数。
- difficult (np.ndarray): 每个真实标注框中的对象是否为难识别对象,形状为(n, 1),
其中n代表真实标注框的个数。
Returns:
tuple: 根据网络所需字段所组成的tuple;
字段由transforms中的最后一个数据预处理操作决定。
"""
def decode_image(im_file, im_info, label_info):
if im_info is None:
im_info = dict()
if isinstance(im_file, np.ndarray):
if len(im_file.shape) != 3:
raise Exception(
"im should be 3-dimensions, but now is {}-dimensions".
format(len(im_file.shape)))
im = im_file
else:
try:
im = cv2.imread(im_file).astype('float32')
except:
raise TypeError('Can\'t read The image file {}!'.format(
im_file))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
# make default im_info with [h, w, 1]
im_info['im_resize_info'] = np.array(
[im.shape[0], im.shape[1], 1.], dtype=np.float32)
im_info['image_shape'] = np.array([im.shape[0],
im.shape[1]]).astype('int32')
if not self.use_mixup:
if 'mixup' in im_info:
del im_info['mixup']
# decode mixup image
if 'mixup' in im_info:
im_info['mixup'] = \
decode_image(im_info['mixup'][0],
im_info['mixup'][1],
im_info['mixup'][2])
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
outputs = decode_image(im, im_info, label_info)
im = outputs[0]
im_info = outputs[1]
if len(outputs) == 3:
label_info = outputs[2]
for op in self.transforms:
if im is None:
return None
if isinstance(op, DetTransform):
outputs = op(im, im_info, label_info)
im = outputs[0]
else:
im = execute_imgaug(op, im)
if label_info is not None:
outputs = (im, im_info, label_info)
else:
outputs = (im, im_info)
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class ResizeByShort(DetTransform):
"""根据图像的短边调整图像大小(resize)。
1. 获取图像的长边和短边长度。
2. 根据短边与short_size的比例,计算长边的目标长度,
此时高、宽的resize比例为short_size/原图短边长度。
3. 如果max_size>0,调整resize比例:
如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度。
4. 根据调整大小的比例对图像进行resize。
Args:
target_size (int): 短边目标长度。默认为800。
max_size (int): 长边目标长度的最大限制。默认为1333。
Raises:
TypeError: 形参数据类型不满足需求。
"""
def __init__(self, short_size=800, max_size=1333):
self.max_size = int(max_size)
if not isinstance(short_size, int):
raise TypeError(
"Type of short_size is invalid. Must be Integer, now is {}".
format(type(short_size)))
self.short_size = short_size
if not (isinstance(self.max_size, int)):
raise TypeError("max_size: input type is invalid.")
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
其中,im_info更新字段为:
- im_resize_info (np.ndarray): resize后的图像高、resize后的图像宽、resize后的图像相对原始图的缩放比例
三者组成的np.ndarray,形状为(3,)。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if im_info is None:
im_info = dict()
if not isinstance(im, np.ndarray):
raise TypeError("ResizeByShort: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('ResizeByShort: image is not 3-dimensional.')
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im_resize_info = [resized_height, resized_width, scale]
im = cv2.resize(
im, (resized_width, resized_height),
interpolation=cv2.INTER_LINEAR)
im_info['im_resize_info'] = np.array(im_resize_info).astype(np.float32)
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class Padding(DetTransform):
"""1.将图像的长和宽padding至coarsest_stride的倍数。如输入图像为[300, 640],
`coarest_stride`为32,则由于300不为32的倍数,因此在图像最右和最下使用0值
进行padding,最终输出图像为[320, 640]。
2.或者,将图像的长和宽padding到target_size指定的shape,如输入的图像为[300,640],
a. `target_size` = 960,在图像最右和最下使用0值进行padding,最终输出
图像为[960, 960]。
b. `target_size` = [640, 960],在图像最右和最下使用0值进行padding,最终
输出图像为[640, 960]。
1. 如果coarsest_stride为1,target_size为None则直接返回。
2. 获取图像的高H、宽W。
3. 计算填充后图像的高H_new、宽W_new。
4. 构建大小为(H_new, W_new, 3)像素值为0的np.ndarray,
并将原图的np.ndarray粘贴于左上角。
Args:
coarsest_stride (int): 填充后的图像长、宽为该参数的倍数,默认为1。
target_size (int|list|tuple): 填充后的图像长、宽,默认为None,coarset_stride优先级更高。
Raises:
TypeError: 形参`target_size`数据类型不满足需求。
ValueError: 形参`target_size`为(list|tuple)时,长度不满足需求。
"""
def __init__(self, coarsest_stride=1, target_size=None):
self.coarsest_stride = coarsest_stride
if target_size is not None:
if not isinstance(target_size, int):
if not isinstance(target_size, tuple) and not isinstance(
target_size, list):
raise TypeError(
"Padding: Type of target_size must in (int|list|tuple)."
)
elif len(target_size) != 2:
raise ValueError(
"Padding: Length of target_size must equal 2.")
self.target_size = target_size
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
ValueError: coarsest_stride,target_size需有且只有一个被指定。
ValueError: target_size小于原图的大小。
"""
if im_info is None:
im_info = dict()
if not isinstance(im, np.ndarray):
raise TypeError("Padding: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('Padding: image is not 3-dimensional.')
im_h, im_w, im_c = im.shape[:]
if isinstance(self.target_size, int):
padding_im_h = self.target_size
padding_im_w = self.target_size
elif isinstance(self.target_size, list) or isinstance(self.target_size,
tuple):
padding_im_w = self.target_size[0]
padding_im_h = self.target_size[1]
elif self.coarsest_stride > 0:
padding_im_h = int(
np.ceil(im_h / self.coarsest_stride) * self.coarsest_stride)
padding_im_w = int(
np.ceil(im_w / self.coarsest_stride) * self.coarsest_stride)
else:
raise ValueError(
"coarsest_stridei(>1) or target_size(list|int) need setting in Padding transform"
)
pad_height = padding_im_h - im_h
pad_width = padding_im_w - im_w
if pad_height < 0 or pad_width < 0:
raise ValueError(
'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
.format(im_w, im_h, padding_im_w, padding_im_h))
padding_im = np.zeros(
(padding_im_h, padding_im_w, im_c), dtype=np.float32)
padding_im[:im_h, :im_w, :] = im
if label_info is None:
return (padding_im, im_info)
else:
return (padding_im, im_info, label_info)
class Resize(DetTransform):
"""调整图像大小(resize)。
- 当目标大小(target_size)类型为int时,根据插值方式,
将图像resize为[target_size, target_size]。
- 当目标大小(target_size)类型为list或tuple时,根据插值方式,
将图像resize为target_size。
注意:当插值方式为“RANDOM”时,则随机选取一种插值方式进行resize。
Args:
target_size (int/list/tuple): 短边目标长度。默认为608。
interp (str): resize的插值方式,与opencv的插值方式对应,取值范围为
['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC',
'AREA', 'LANCZOS4', 'RANDOM']中。
"""
# The interpolation mode
interp_dict = {
'NEAREST': cv2.INTER_NEAREST,
'LINEAR': cv2.INTER_LINEAR,
'CUBIC': cv2.INTER_CUBIC,
'AREA': cv2.INTER_AREA,
'LANCZOS4': cv2.INTER_LANCZOS4
}
def __init__(self, target_size=608, interp='LINEAR'):
self.interp = interp
if not (interp == "RANDOM" or interp in self.interp_dict):
raise ValueError("interp should be one of {}".format(
self.interp_dict.keys()))
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise TypeError(
'when target is list or tuple, it should include 2 elements, but it is {}'
.format(target_size))
elif not isinstance(target_size, int):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(target_size)))
self.target_size = target_size
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if im_info is None:
im_info = dict()
if not isinstance(im, np.ndarray):
raise TypeError("Resize: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('Resize: image is not 3-dimensional.')
if self.interp == "RANDOM":
interp = random.choice(list(self.interp_dict.keys()))
else:
interp = self.interp
im = resize(im, self.target_size, self.interp_dict[interp])
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class RandomHorizontalFlip(DetTransform):
"""随机翻转图像、标注框、分割信息,模型训练时的数据增强操作。
1. 随机采样一个0-1之间的小数,当小数小于水平翻转概率时,
执行2-4步操作,否则直接返回。
2. 水平翻转图像。
3. 计算翻转后的真实标注框的坐标,更新label_info中的gt_bbox信息。
4. 计算翻转后的真实分割区域的坐标,更新label_info中的gt_poly信息。
Args:
prob (float): 随机水平翻转的概率。默认为0.5。
Raises:
TypeError: 形参数据类型不满足需求。
"""
def __init__(self, prob=0.5):
self.prob = prob
if not isinstance(self.prob, float):
raise TypeError("RandomHorizontalFlip: input type is invalid.")
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
其中,im_info更新字段为:
- gt_bbox (np.ndarray): 水平翻转后的标注框坐标[x1, y1, x2, y2],形状为(n, 4),
其中n代表真实标注框的个数。
- gt_poly (list): 水平翻转后的多边形分割区域的x、y坐标,长度为n,
其中n代表真实标注框的个数。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if not isinstance(im, np.ndarray):
raise TypeError(
"RandomHorizontalFlip: image is not a numpy array.")
if len(im.shape) != 3:
raise ValueError(
"RandomHorizontalFlip: image is not 3-dimensional.")
if im_info is None or label_info is None:
raise TypeError(
'Cannot do RandomHorizontalFlip! ' +
'Becasuse the im_info and label_info can not be None!')
if 'gt_bbox' not in label_info:
raise TypeError('Cannot do RandomHorizontalFlip! ' + \
'Becasuse gt_bbox is not in label_info!')
image_shape = im_info['image_shape']
gt_bbox = label_info['gt_bbox']
height = image_shape[0]
width = image_shape[1]
if np.random.uniform(0, 1) < self.prob:
im = horizontal_flip(im)
if gt_bbox.shape[0] == 0:
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
label_info['gt_bbox'] = box_horizontal_flip(gt_bbox, width)
if 'gt_poly' in label_info and \
len(label_info['gt_poly']) != 0:
label_info['gt_poly'] = segms_horizontal_flip(
label_info['gt_poly'], height, width)
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class Normalize(DetTransform):
"""对图像进行标准化。
1. 归一化图像到到区间[0.0, 1.0]。
2. 对图像进行减均值除以标准差操作。
Args:
mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
Raises:
TypeError: 形参数据类型不满足需求。
"""
def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
self.mean = mean
self.std = std
if not (isinstance(self.mean, list) and isinstance(self.std, list)):
raise TypeError("NormalizeImage: input type is invalid.")
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
raise TypeError('NormalizeImage: std is invalid!')
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
"""
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std)
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class RandomDistort(DetTransform):
"""以一定的概率对图像进行随机像素内容变换,模型训练时的数据增强操作
1. 对变换的操作顺序进行随机化操作。
2. 按照1中的顺序以一定的概率在范围[-range, range]对图像进行随机像素内容变换。
Args:
brightness_range (float): 明亮度因子的范围。默认为0.5。
brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
contrast_range (float): 对比度因子的范围。默认为0.5。
contrast_prob (float): 随机调整对比度的概率。默认为0.5。
saturation_range (float): 饱和度因子的范围。默认为0.5。
saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
hue_range (int): 色调因子的范围。默认为18。
hue_prob (float): 随机调整色调的概率。默认为0.5。
"""
def __init__(self,
brightness_range=0.5,
brightness_prob=0.5,
contrast_range=0.5,
contrast_prob=0.5,
saturation_range=0.5,
saturation_prob=0.5,
hue_range=18,
hue_prob=0.5):
self.brightness_range = brightness_range
self.brightness_prob = brightness_prob
self.contrast_range = contrast_range
self.contrast_prob = contrast_prob
self.saturation_range = saturation_range
self.saturation_prob = saturation_prob
self.hue_range = hue_range
self.hue_prob = hue_prob
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
"""
brightness_lower = 1 - self.brightness_range
brightness_upper = 1 + self.brightness_range
contrast_lower = 1 - self.contrast_range
contrast_upper = 1 + self.contrast_range
saturation_lower = 1 - self.saturation_range
saturation_upper = 1 + self.saturation_range
hue_lower = -self.hue_range
hue_upper = self.hue_range
ops = [brightness, contrast, saturation, hue]
random.shuffle(ops)
params_dict = {
'brightness': {
'brightness_lower': brightness_lower,
'brightness_upper': brightness_upper
},
'contrast': {
'contrast_lower': contrast_lower,
'contrast_upper': contrast_upper
},
'saturation': {
'saturation_lower': saturation_lower,
'saturation_upper': saturation_upper
},
'hue': {
'hue_lower': hue_lower,
'hue_upper': hue_upper
}
}
prob_dict = {
'brightness': self.brightness_prob,
'contrast': self.contrast_prob,
'saturation': self.saturation_prob,
'hue': self.hue_prob
}
for id in range(4):
params = params_dict[ops[id].__name__]
prob = prob_dict[ops[id].__name__]
params['im'] = im
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class MixupImage(DetTransform):
"""对图像进行mixup操作,模型训练时的数据增强操作,目前仅YOLOv3模型支持该transform。
当label_info中不存在mixup字段时,直接返回,否则进行下述操作:
1. 从随机beta分布中抽取出随机因子factor。
2.
- 当factor>=1.0时,去除label_info中的mixup字段,直接返回。
- 当factor<=0.0时,直接返回label_info中的mixup字段,并在label_info中去除该字段。
- 其余情况,执行下述操作:
(1)原图像乘以factor,mixup图像乘以(1-factor),叠加2个结果。
(2)拼接原图像标注框和mixup图像标注框。
(3)拼接原图像标注框类别和mixup图像标注框类别。
(4)原图像标注框混合得分乘以factor,mixup图像标注框混合得分乘以(1-factor),叠加2个结果。
3. 更新im_info中的image_shape信息。
Args:
alpha (float): 随机beta分布的下限。默认为1.5。
beta (float): 随机beta分布的上限。默认为1.5。
mixup_epoch (int): 在前mixup_epoch轮使用mixup增强操作;当该参数为-1时,该策略不会生效。
默认为-1。
Raises:
ValueError: 数据长度不匹配。
"""
def __init__(self, alpha=1.5, beta=1.5, mixup_epoch=-1):
self.alpha = alpha
self.beta = beta
if self.alpha <= 0.0:
raise ValueError("alpha shold be positive in MixupImage")
if self.beta <= 0.0:
raise ValueError("beta shold be positive in MixupImage")
self.mixup_epoch = mixup_epoch
def _mixup_img(self, img1, img2, factor):
h = max(img1.shape[0], img2.shape[0])
w = max(img1.shape[1], img2.shape[1])
img = np.zeros((h, w, img1.shape[2]), 'float32')
img[:img1.shape[0], :img1.shape[1], :] = \
img1.astype('float32') * factor
img[:img2.shape[0], :img2.shape[1], :] += \
img2.astype('float32') * (1.0 - factor)
return img.astype('float32')
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
其中,im_info更新字段为:
- image_shape (np.ndarray): mixup后的图像高、宽二者组成的np.ndarray,形状为(2,)。
im_info删除的字段:
- mixup (list): 与当前字段进行mixup的图像相关信息。
label_info更新字段为:
- gt_bbox (np.ndarray): mixup后真实标注框坐标,形状为(n, 4),
其中n代表真实标注框的个数。
- gt_class (np.ndarray): mixup后每个真实标注框对应的类别序号,形状为(n, 1),
其中n代表真实标注框的个数。
- gt_score (np.ndarray): mixup后每个真实标注框对应的混合得分,形状为(n, 1),
其中n代表真实标注框的个数。
Raises:
TypeError: 形参数据类型不满足需求。
"""
if im_info is None:
raise TypeError('Cannot do MixupImage! ' +
'Becasuse the im_info can not be None!')
if 'mixup' not in im_info:
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
factor = np.random.beta(self.alpha, self.beta)
factor = max(0.0, min(1.0, factor))
if im_info['epoch'] > self.mixup_epoch \
or factor >= 1.0:
im_info.pop('mixup')
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
if factor <= 0.0:
return im_info.pop('mixup')
im = self._mixup_img(im, im_info['mixup'][0], factor)
if label_info is None:
raise TypeError('Cannot do MixupImage! ' +
'Becasuse the label_info can not be None!')
if 'gt_bbox' not in label_info or \
'gt_class' not in label_info or \
'gt_score' not in label_info:
raise TypeError('Cannot do MixupImage! ' + \
'Becasuse gt_bbox/gt_class/gt_score is not in label_info!')
gt_bbox1 = label_info['gt_bbox']
gt_bbox2 = im_info['mixup'][2]['gt_bbox']
gt_class1 = label_info['gt_class']
gt_class2 = im_info['mixup'][2]['gt_class']
gt_score1 = label_info['gt_score']
gt_score2 = im_info['mixup'][2]['gt_score']
if 'gt_poly' in label_info:
gt_poly1 = label_info['gt_poly']
gt_poly2 = im_info['mixup'][2]['gt_poly']
is_crowd1 = label_info['is_crowd']
is_crowd2 = im_info['mixup'][2]['is_crowd']
if 0 not in gt_class1 and 0 not in gt_class2:
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly1 + gt_poly2
is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
elif 0 in gt_class1:
gt_bbox = gt_bbox2
gt_class = gt_class2
gt_score = gt_score2 * (1. - factor)
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly2
is_crowd = is_crowd2
else:
gt_bbox = gt_bbox1
gt_class = gt_class1
gt_score = gt_score1 * factor
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly1
is_crowd = is_crowd1
label_info['gt_bbox'] = gt_bbox
label_info['gt_score'] = gt_score
label_info['gt_class'] = gt_class
label_info['is_crowd'] = is_crowd
im_info['image_shape'] = np.array([im.shape[0],
im.shape[1]]).astype('int32')
im_info.pop('mixup')
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
class RandomExpand(DetTransform):
"""随机扩张图像,模型训练时的数据增强操作。
1. 随机选取扩张比例(扩张比例大于1时才进行扩张)。
2. 计算扩张后图像大小。
3. 初始化像素值为输入填充值的图像,并将原图像随机粘贴于该图像上。
4. 根据原图像粘贴位置换算出扩张后真实标注框的位置坐标。
5. 根据原图像粘贴位置换算出扩张后真实分割区域的位置坐标。
Args:
ratio (float): 图像扩张的最大比例。默认为4.0。
prob (float): 随机扩张的概率。默认为0.5。
fill_value (list): 扩张图像的初始填充值(0-255)。默认为[123.675, 116.28, 103.53]。
"""
def __init__(self,
ratio=4.,
prob=0.5,
fill_value=[123.675, 116.28, 103.53]):
super(RandomExpand, self).__init__()
assert ratio > 1.01, "expand ratio must be larger than 1.01"
self.ratio = ratio
self.prob = prob
assert isinstance(fill_value, Sequence), \
"fill value must be sequence"
if not isinstance(fill_value, tuple):
fill_value = tuple(fill_value)
self.fill_value = fill_value
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
其中,im_info更新字段为:
- image_shape (np.ndarray): 扩张后的图像高、宽二者组成的np.ndarray,形状为(2,)。
label_info更新字段为:
- gt_bbox (np.ndarray): 随机扩张后真实标注框坐标,形状为(n, 4),
其中n代表真实标注框的个数。
- gt_class (np.ndarray): 随机扩张后每个真实标注框对应的类别序号,形状为(n, 1),
其中n代表真实标注框的个数。
Raises:
TypeError: 形参数据类型不满足需求。
"""
if im_info is None or label_info is None:
raise TypeError(
'Cannot do RandomExpand! ' +
'Becasuse the im_info and label_info can not be None!')
if 'gt_bbox' not in label_info or \
'gt_class' not in label_info:
raise TypeError('Cannot do RandomExpand! ' + \
'Becasuse gt_bbox/gt_class is not in label_info!')
if np.random.uniform(0., 1.) < self.prob:
return (im, im_info, label_info)
if 'gt_class' in label_info and 0 in label_info['gt_class']:
return (im, im_info, label_info)
image_shape = im_info['image_shape']
height = int(image_shape[0])
width = int(image_shape[1])
expand_ratio = np.random.uniform(1., self.ratio)
h = int(height * expand_ratio)
w = int(width * expand_ratio)
if not h > height or not w > width:
return (im, im_info, label_info)
y = np.random.randint(0, h - height)
x = np.random.randint(0, w - width)
canvas = np.ones((h, w, 3), dtype=np.float32)
canvas *= np.array(self.fill_value, dtype=np.float32)
canvas[y:y + height, x:x + width, :] = im
im_info['image_shape'] = np.array([h, w]).astype('int32')
if 'gt_bbox' in label_info and len(label_info['gt_bbox']) > 0:
label_info['gt_bbox'] += np.array([x, y] * 2, dtype=np.float32)
if 'gt_poly' in label_info and len(label_info['gt_poly']) > 0:
label_info['gt_poly'] = expand_segms(label_info['gt_poly'], x, y,
height, width, expand_ratio)
return (canvas, im_info, label_info)
class RandomCrop(DetTransform):
"""随机裁剪图像。
1. 若allow_no_crop为True,则在thresholds加入’no_crop’。
2. 随机打乱thresholds。
3. 遍历thresholds中各元素:
(1) 如果当前thresh为’no_crop’,则返回原始图像和标注信息。
(2) 随机取出aspect_ratio和scaling中的值并由此计算出候选裁剪区域的高、宽、起始点。
(3) 计算真实标注框与候选裁剪区域IoU,若全部真实标注框的IoU都小于thresh,则继续第3步。
(4) 如果cover_all_box为True且存在真实标注框的IoU小于thresh,则继续第3步。
(5) 筛选出位于候选裁剪区域内的真实标注框,若有效框的个数为0,则继续第3步,否则进行第4步。
4. 换算有效真值标注框相对候选裁剪区域的位置坐标。
5. 换算有效分割区域相对候选裁剪区域的位置坐标。
Args:
aspect_ratio (list): 裁剪后短边缩放比例的取值范围,以[min, max]形式表示。默认值为[.5, 2.]。
thresholds (list): 判断裁剪候选区域是否有效所需的IoU阈值取值列表。默认值为[.0, .1, .3, .5, .7, .9]。
scaling (list): 裁剪面积相对原面积的取值范围,以[min, max]形式表示。默认值为[.3, 1.]。
num_attempts (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。
allow_no_crop (bool): 是否允许未进行裁剪。默认值为True。
cover_all_box (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。
"""
def __init__(self,
aspect_ratio=[.5, 2.],
thresholds=[.0, .1, .3, .5, .7, .9],
scaling=[.3, 1.],
num_attempts=50,
allow_no_crop=True,
cover_all_box=False):
self.aspect_ratio = aspect_ratio
self.thresholds = thresholds
self.scaling = scaling
self.num_attempts = num_attempts
self.allow_no_crop = allow_no_crop
self.cover_all_box = cover_all_box
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
存储与标注框相关信息的字典。
其中,im_info更新字段为:
- image_shape (np.ndarray): 扩裁剪的图像高、宽二者组成的np.ndarray,形状为(2,)。
label_info更新字段为:
- gt_bbox (np.ndarray): 随机裁剪后真实标注框坐标,形状为(n, 4),
其中n代表真实标注框的个数。
- gt_class (np.ndarray): 随机裁剪后每个真实标注框对应的类别序号,形状为(n, 1),
其中n代表真实标注框的个数。
- gt_score (np.ndarray): 随机裁剪后每个真实标注框对应的混合得分,形状为(n, 1),
其中n代表真实标注框的个数。
Raises:
TypeError: 形参数据类型不满足需求。
"""
if im_info is None or label_info is None:
raise TypeError(
'Cannot do RandomCrop! ' +
'Becasuse the im_info and label_info can not be None!')
if 'gt_bbox' not in label_info or \
'gt_class' not in label_info:
raise TypeError('Cannot do RandomCrop! ' + \
'Becasuse gt_bbox/gt_class is not in label_info!')
if len(label_info['gt_bbox']) == 0:
return (im, im_info, label_info)
if 'gt_class' in label_info and 0 in label_info['gt_class']:
return (im, im_info, label_info)
image_shape = im_info['image_shape']
w = image_shape[1]
h = image_shape[0]
gt_bbox = label_info['gt_bbox']
thresholds = list(self.thresholds)
if self.allow_no_crop:
thresholds.append('no_crop')
np.random.shuffle(thresholds)
for thresh in thresholds:
if thresh == 'no_crop':
return (im, im_info, label_info)
found = False
for i in range(self.num_attempts):
scale = np.random.uniform(*self.scaling)
min_ar, max_ar = self.aspect_ratio
aspect_ratio = np.random.uniform(
max(min_ar, scale**2), min(max_ar, scale**-2))
crop_h = int(h * scale / np.sqrt(aspect_ratio))
crop_w = int(w * scale * np.sqrt(aspect_ratio))
crop_y = np.random.randint(0, h - crop_h)
crop_x = np.random.randint(0, w - crop_w)
crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
iou = iou_matrix(
gt_bbox, np.array(
[crop_box], dtype=np.float32))
if iou.max() < thresh:
continue
if self.cover_all_box and iou.min() < thresh:
continue
cropped_box, valid_ids = crop_box_with_center_constraint(
gt_bbox, np.array(
crop_box, dtype=np.float32))
if valid_ids.size > 0:
found = True
break
if found:
if 'gt_poly' in label_info and len(label_info['gt_poly']) > 0:
crop_polys = crop_segms(
label_info['gt_poly'],
valid_ids,
np.array(
crop_box, dtype=np.int64),
h,
w)
if [] in crop_polys:
delete_id = list()
valid_polys = list()
for id, crop_poly in enumerate(crop_polys):
if crop_poly == []:
delete_id.append(id)
else:
valid_polys.append(crop_poly)
valid_ids = np.delete(valid_ids, delete_id)
if len(valid_polys) == 0:
return (im, im_info, label_info)
label_info['gt_poly'] = valid_polys
else:
label_info['gt_poly'] = crop_polys
im = crop_image(im, crop_box)
label_info['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
label_info['gt_class'] = np.take(
label_info['gt_class'], valid_ids, axis=0)
im_info['image_shape'] = np.array(
[crop_box[3] - crop_box[1],
crop_box[2] - crop_box[0]]).astype('int32')
if 'gt_score' in label_info:
label_info['gt_score'] = np.take(
label_info['gt_score'], valid_ids, axis=0)
if 'is_crowd' in label_info:
label_info['is_crowd'] = np.take(
label_info['is_crowd'], valid_ids, axis=0)
return (im, im_info, label_info)
return (im, im_info, label_info)
class ArrangeFasterRCNN(DetTransform):
"""获取FasterRCNN模型训练/验证/预测所需信息。
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
"""
def __init__(self, mode=None):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode must be in ['train', 'eval', 'test', 'quant']!")
self.mode = mode
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当mode为'train'时,返回(im, im_resize_info, gt_bbox, gt_class, is_crowd),分别对应
图像np.ndarray数据、图像相当对于原图的resize信息、真实标注框、真实标注框对应的类别、真实标注框内是否是一组对象;
当mode为'eval'时,返回(im, im_resize_info, im_id, im_shape, gt_bbox, gt_class, is_difficult),
分别对应图像np.ndarray数据、图像相当对于原图的resize信息、图像id、图像大小信息、真实标注框、真实标注框对应的类别、
真实标注框是否为难识别对象;当mode为'test'或'quant'时,返回(im, im_resize_info, im_shape),分别对应图像np.ndarray数据、
图像相当对于原图的resize信息、图像大小信息。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
im = permute(im, False)
if self.mode == 'train':
if im_info is None or label_info is None:
raise TypeError(
'Cannot do ArrangeFasterRCNN! ' +
'Becasuse the im_info and label_info can not be None!')
if len(label_info['gt_bbox']) != len(label_info['gt_class']):
raise ValueError("gt num mismatch: bbox and class.")
im_resize_info = im_info['im_resize_info']
gt_bbox = label_info['gt_bbox']
gt_class = label_info['gt_class']
is_crowd = label_info['is_crowd']
outputs = (im, im_resize_info, gt_bbox, gt_class, is_crowd)
elif self.mode == 'eval':
if im_info is None or label_info is None:
raise TypeError(
'Cannot do ArrangeFasterRCNN! ' +
'Becasuse the im_info and label_info can not be None!')
im_resize_info = im_info['im_resize_info']
im_id = im_info['im_id']
im_shape = np.array(
(im_info['image_shape'][0], im_info['image_shape'][1], 1),
dtype=np.float32)
gt_bbox = label_info['gt_bbox']
gt_class = label_info['gt_class']
is_difficult = label_info['difficult']
outputs = (im, im_resize_info, im_id, im_shape, gt_bbox, gt_class,
is_difficult)
else:
if im_info is None:
raise TypeError('Cannot do ArrangeFasterRCNN! ' +
'Becasuse the im_info can not be None!')
im_resize_info = im_info['im_resize_info']
im_shape = np.array(
(im_info['image_shape'][0], im_info['image_shape'][1], 1),
dtype=np.float32)
outputs = (im, im_resize_info, im_shape)
return outputs
class ArrangeMaskRCNN(DetTransform):
"""获取MaskRCNN模型训练/验证/预测所需信息。
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
"""
def __init__(self, mode=None):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode must be in ['train', 'eval', 'test', 'quant']!")
self.mode = mode
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当mode为'train'时,返回(im, im_resize_info, gt_bbox, gt_class, is_crowd, gt_masks),分别对应
图像np.ndarray数据、图像相当对于原图的resize信息、真实标注框、真实标注框对应的类别、真实标注框内是否是一组对象、
真实分割区域;当mode为'eval'时,返回(im, im_resize_info, im_id, im_shape),分别对应图像np.ndarray数据、
图像相当对于原图的resize信息、图像id、图像大小信息;当mode为'test'或'quant'时,返回(im, im_resize_info, im_shape),
分别对应图像np.ndarray数据、图像相当对于原图的resize信息、图像大小信息。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
im = permute(im, False)
if self.mode == 'train':
if im_info is None or label_info is None:
raise TypeError(
'Cannot do ArrangeTrainMaskRCNN! ' +
'Becasuse the im_info and label_info can not be None!')
if len(label_info['gt_bbox']) != len(label_info['gt_class']):
raise ValueError("gt num mismatch: bbox and class.")
im_resize_info = im_info['im_resize_info']
gt_bbox = label_info['gt_bbox']
gt_class = label_info['gt_class']
is_crowd = label_info['is_crowd']
assert 'gt_poly' in label_info
segms = label_info['gt_poly']
if len(segms) != 0:
assert len(segms) == is_crowd.shape[0]
gt_masks = []
valid = True
for i in range(len(segms)):
segm = segms[i]
gt_segm = []
if is_crowd[i]:
gt_segm.append([[0, 0]])
else:
for poly in segm:
if len(poly) == 0:
valid = False
break
gt_segm.append(np.array(poly).reshape(-1, 2))
if (not valid) or len(gt_segm) == 0:
break
gt_masks.append(gt_segm)
outputs = (im, im_resize_info, gt_bbox, gt_class, is_crowd,
gt_masks)
else:
if im_info is None:
raise TypeError('Cannot do ArrangeMaskRCNN! ' +
'Becasuse the im_info can not be None!')
im_resize_info = im_info['im_resize_info']
im_shape = np.array(
(im_info['image_shape'][0], im_info['image_shape'][1], 1),
dtype=np.float32)
if self.mode == 'eval':
im_id = im_info['im_id']
outputs = (im, im_resize_info, im_id, im_shape)
else:
outputs = (im, im_resize_info, im_shape)
return outputs
class ArrangeYOLOv3(DetTransform):
"""获取YOLOv3模型训练/验证/预测所需信息。
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
"""
def __init__(self, mode=None):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode must be in ['train', 'eval', 'test', 'quant']!")
self.mode = mode
def __call__(self, im, im_info=None, label_info=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label_info (dict, 可选): 存储与标注框相关的信息。
Returns:
tuple: 当mode为'train'时,返回(im, gt_bbox, gt_class, gt_score, im_shape),分别对应
图像np.ndarray数据、真实标注框、真实标注框对应的类别、真实标注框混合得分、图像大小信息;
当mode为'eval'时,返回(im, im_shape, im_id, gt_bbox, gt_class, difficult),
分别对应图像np.ndarray数据、图像大小信息、图像id、真实标注框、真实标注框对应的类别、
真实标注框是否为难识别对象;当mode为'test'或'quant'时,返回(im, im_shape),
分别对应图像np.ndarray数据、图像大小信息。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
im = permute(im, False)
if self.mode == 'train':
if im_info is None or label_info is None:
raise TypeError(
'Cannot do ArrangeYolov3! ' +
'Becasuse the im_info and label_info can not be None!')
im_shape = im_info['image_shape']
if len(label_info['gt_bbox']) != len(label_info['gt_class']):
raise ValueError("gt num mismatch: bbox and class.")
if len(label_info['gt_bbox']) != len(label_info['gt_score']):
raise ValueError("gt num mismatch: bbox and score.")
gt_bbox = np.zeros((50, 4), dtype=im.dtype)
gt_class = np.zeros((50, ), dtype=np.int32)
gt_score = np.zeros((50, ), dtype=im.dtype)
gt_num = min(50, len(label_info['gt_bbox']))
if gt_num > 0:
label_info['gt_class'][:gt_num, 0] = label_info[
'gt_class'][:gt_num, 0] - 1
if -1 not in label_info['gt_class']:
gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :]
gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0]
gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0]
# parse [x1, y1, x2, y2] to [x, y, w, h]
gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2]
gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2.
outputs = (im, gt_bbox, gt_class, gt_score, im_shape)
elif self.mode == 'eval':
if im_info is None or label_info is None:
raise TypeError(
'Cannot do ArrangeYolov3! ' +
'Becasuse the im_info and label_info can not be None!')
im_shape = im_info['image_shape']
if len(label_info['gt_bbox']) != len(label_info['gt_class']):
raise ValueError("gt num mismatch: bbox and class.")
im_id = im_info['im_id']
gt_bbox = np.zeros((50, 4), dtype=im.dtype)
gt_class = np.zeros((50, ), dtype=np.int32)
difficult = np.zeros((50, ), dtype=np.int32)
gt_num = min(50, len(label_info['gt_bbox']))
if gt_num > 0:
label_info['gt_class'][:gt_num, 0] = label_info[
'gt_class'][:gt_num, 0] - 1
gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :]
gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0]
difficult[:gt_num] = label_info['difficult'][:gt_num, 0]
outputs = (im, im_shape, im_id, gt_bbox, gt_class, difficult)
else:
if im_info is None:
raise TypeError('Cannot do ArrangeYolov3! ' +
'Becasuse the im_info can not be None!')
im_shape = im_info['image_shape']
outputs = (im, im_shape)
return outputs
class ComposedRCNNTransforms(Compose):
""" RCNN模型(faster-rcnn/mask-rcnn)图像处理流程,具体如下,
训练阶段:
1. 随机以0.5的概率将图像水平翻转
2. 图像归一化
3. 图像按比例Resize,scale计算方式如下
scale = min_max_size[0] / short_size_of_image
if max_size_of_image * scale > min_max_size[1]:
scale = min_max_size[1] / max_size_of_image
4. 将3步骤的长宽进行padding,使得长宽为32的倍数
验证阶段:
1. 图像归一化
2. 图像按比例Resize,scale计算方式同上训练阶段
3. 将2步骤的长宽进行padding,使得长宽为32的倍数
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
min_max_size(list): 图像在缩放时,最小边和最大边的约束条件
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
min_max_size=[800, 1333],
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomHorizontalFlip(prob=0.5), Normalize(
mean=mean, std=std), ResizeByShort(
short_size=min_max_size[0], max_size=min_max_size[1]),
Padding(coarsest_stride=32)
]
else:
# 验证/预测时的transforms
transforms = [
Normalize(
mean=mean, std=std), ResizeByShort(
short_size=min_max_size[0], max_size=min_max_size[1]),
Padding(coarsest_stride=32)
]
super(ComposedRCNNTransforms, self).__init__(transforms)
class ComposedYOLOv3Transforms(Compose):
"""YOLOv3模型的图像预处理流程,具体如下,
训练阶段:
1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage
2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调
3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand
4. 随机裁剪图像
5. 将4步骤的输出图像Resize成shape参数的大小
6. 随机0.5的概率水平翻转图像
7. 图像归一化
验证/预测阶段:
1. 将图像Resize成shape参数大小
2. 图像归一化
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小
mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
shape=[608, 608],
mixup_epoch=250,
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
width = shape
if isinstance(shape, list):
if shape[0] != shape[1]:
raise Exception(
"In YOLOv3 model, width and height should be equal")
width = shape[0]
if width % 32 != 0:
raise Exception(
"In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...."
)
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
MixupImage(mixup_epoch=mixup_epoch), RandomDistort(),
RandomExpand(), RandomCrop(), Resize(
target_size=width,
interp='RANDOM'), RandomHorizontalFlip(), Normalize(
mean=mean, std=std)
]
else:
# 验证/预测时的transforms
transforms = [
Resize(
target_size=width, interp='CUBIC'), Normalize(
mean=mean, std=std)
]
super(ComposedYOLOv3Transforms, self).__init__(transforms)
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import copy
def execute_imgaug(augmenter, im, bboxes=None, polygons=None,
segment_map=None):
# 预处理,将bboxes, polygons转换成imgaug格式
import imgaug.augmentables.kps as kps
import imgaug.augmentables.bbs as bbs
aug_im = im.astype('uint8')
aug_im = augmenter.augment(image=aug_im).astype('float32')
return aug_im
# TODO imgaug的标注处理逻辑与paddlex已存的transform存在部分差异
# 目前仅支持对原图进行处理,因此只能使用pixlevel的imgaug增强操作
# 以下代码暂不会执行
aug_bboxes = None
if bboxes is not None:
aug_bboxes = list()
for i in range(len(bboxes)):
x1 = bboxes[i, 0]
y1 = bboxes[i, 1]
x2 = bboxes[i, 2]
y2 = bboxes[i, 3]
aug_bboxes.append(bbs.BoundingBox(x1, y1, x2, y2))
aug_points = None
if polygons is not None:
aug_points = list()
for i in range(len(polygons)):
num = len(polygons[i])
for j in range(num):
tmp = np.reshape(polygons[i][j], (-1, 2))
for k in range(len(tmp)):
aug_points.append(kps.Keypoint(tmp[k, 0], tmp[k, 1]))
aug_segment_map = None
if segment_map is not None:
if len(segment_map.shape) == 2:
h, w = segment_map.shape
aug_segment_map = np.reshape(segment_map, (1, h, w, 1))
elif len(segment_map.shape) == 3:
h, w, c = segment_map.shape
aug_segment_map = np.reshape(segment_map, (1, h, w, c))
else:
raise Exception(
"Only support 2-dimensions for 3-dimensions for segment_map")
unnormalized_batch = augmenter.augment(
image=aug_im,
bounding_boxes=aug_bboxes,
keypoints=aug_points,
segmentation_maps=aug_segment_map,
return_batch=True)
aug_im = unnormalized_batch.images_aug[0]
aug_bboxes = unnormalized_batch.bounding_boxes_aug
aug_points = unnormalized_batch.keypoints_aug
aug_seg_map = unnormalized_batch.segmentation_maps_aug
aug_im = aug_im.astype('float32')
if aug_bboxes is not None:
converted_bboxes = list()
for i in range(len(aug_bboxes)):
converted_bboxes.append([
aug_bboxes[i].x1, aug_bboxes[i].y1, aug_bboxes[i].x2,
aug_bboxes[i].y2
])
aug_bboxes = converted_bboxes
aug_polygons = None
if aug_points is not None:
aug_polygons = copy.deepcopy(polygons)
idx = 0
for i in range(len(aug_polygons)):
num = len(aug_polygons[i])
for j in range(num):
num_points = len(aug_polygons[i][j]) // 2
for k in range(num_points):
aug_polygons[i][j][k * 2] = aug_points[idx].x
aug_polygons[i][j][k * 2 + 1] = aug_points[idx].y
idx += 1
result = [aug_im]
if aug_bboxes is not None:
result.append(np.array(aug_bboxes))
if aug_polygons is not None:
result.append(aug_polygons)
if aug_seg_map is not None:
n, h, w, c = aug_seg_map.shape
if len(segment_map.shape) == 2:
aug_seg_map = np.reshape(aug_seg_map, (h, w))
elif len(segment_map.shape) == 3:
aug_seg_map = np.reshape(aug_seg_map, (h, w, c))
result.append(aug_seg_map)
return result
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import math
import numpy as np
from PIL import Image, ImageEnhance
def normalize(im, mean, std):
im = im / 255.0
im -= mean
im /= std
return im
def permute(im, to_bgr=False):
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
if to_bgr:
im = im[[2, 1, 0], :, :]
return im
def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR):
value = max(im.shape[0], im.shape[1])
scale = float(long_size) / float(value)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
im, (resized_width, resized_height), interpolation=interpolation)
return im
def resize(im, target_size=608, interp=cv2.INTER_LINEAR):
if isinstance(target_size, list) or isinstance(target_size, tuple):
w = target_size[0]
h = target_size[1]
else:
w = target_size
h = target_size
im = cv2.resize(im, (w, h), interpolation=interp)
return im
def random_crop(im,
crop_size=224,
lower_scale=0.08,
lower_ratio=3. / 4,
upper_ratio=4. / 3):
scale = [lower_scale, 1.0]
ratio = [lower_ratio, upper_ratio]
aspect_ratio = math.sqrt(np.random.uniform(*ratio))
w = 1. * aspect_ratio
h = 1. / aspect_ratio
bound = min((float(im.shape[0]) / im.shape[1]) / (h**2),
(float(im.shape[1]) / im.shape[0]) / (w**2))
scale_max = min(scale[1], bound)
scale_min = min(scale[0], bound)
target_area = im.shape[0] * im.shape[1] * np.random.uniform(
scale_min, scale_max)
target_size = math.sqrt(target_area)
w = int(target_size * w)
h = int(target_size * h)
i = np.random.randint(0, im.shape[0] - h + 1)
j = np.random.randint(0, im.shape[1] - w + 1)
im = im[i:i + h, j:j + w, :]
im = cv2.resize(im, (crop_size, crop_size))
return im
def center_crop(im, crop_size=224):
height, width = im.shape[:2]
w_start = (width - crop_size) // 2
h_start = (height - crop_size) // 2
w_end = w_start + crop_size
h_end = h_start + crop_size
im = im[h_start:h_end, w_start:w_end, :]
return im
def horizontal_flip(im):
if len(im.shape) == 3:
im = im[:, ::-1, :]
elif len(im.shape) == 2:
im = im[:, ::-1]
return im
def vertical_flip(im):
if len(im.shape) == 3:
im = im[::-1, :, :]
elif len(im.shape) == 2:
im = im[::-1, :]
return im
def bgr2rgb(im):
return im[:, :, ::-1]
def hue(im, hue_lower, hue_upper):
delta = np.random.uniform(hue_lower, hue_upper)
u = np.cos(delta * np.pi)
w = np.sin(delta * np.pi)
bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
[0.211, -0.523, 0.311]])
ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
[1.0, -1.107, 1.705]])
t = np.dot(np.dot(ityiq, bt), tyiq).T
im = np.dot(im, t)
return im
def saturation(im, saturation_lower, saturation_upper):
delta = np.random.uniform(saturation_lower, saturation_upper)
gray = im * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
gray = gray.sum(axis=2, keepdims=True)
gray *= (1.0 - delta)
im *= delta
im += gray
return im
def contrast(im, contrast_lower, contrast_upper):
delta = np.random.uniform(contrast_lower, contrast_upper)
im *= delta
return im
def brightness(im, brightness_lower, brightness_upper):
delta = np.random.uniform(brightness_lower, brightness_upper)
im += delta
return im
def rotate(im, rotate_lower, rotate_upper):
rotate_delta = np.random.uniform(rotate_lower, rotate_upper)
im = im.rotate(int(rotate_delta))
return im
def resize_padding(im, max_side_len=2400):
'''
resize image to a size multiple of 32 which is required by the network
:param im: the resized image
:param max_side_len: limit of max image size to avoid out of memory in gpu
:return: the resized image and the resize ratio
'''
h, w, _ = im.shape
resize_w = w
resize_h = h
# limit the max side
if max(resize_h, resize_w) > max_side_len:
ratio = float(
max_side_len) / resize_h if resize_h > resize_w else float(
max_side_len) / resize_w
else:
ratio = 1.
resize_h = int(resize_h * ratio)
resize_w = int(resize_w * ratio)
resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32
resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32
resize_h = max(32, resize_h)
resize_w = max(32, resize_w)
im = cv2.resize(im, (int(resize_w), int(resize_h)))
#im = cv2.resize(im, (512, 512))
ratio_h = resize_h / float(h)
ratio_w = resize_w / float(w)
_ratio = np.array([ratio_h, ratio_w]).reshape(-1, 2)
return im, _ratio
# coding: utf8
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .ops import *
from .imgaug_support import execute_imgaug
import random
import os.path as osp
import numpy as np
from PIL import Image
import cv2
from collections import OrderedDict
import utils.logging as logging
class SegTransform:
""" 分割transform基类
"""
def __init__(self):
pass
class Compose(SegTransform):
"""根据数据预处理/增强算子对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强算子。
Raises:
TypeError: transforms不是list对象
ValueError: transforms元素个数小于1。
"""
def __init__(self, transforms):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
if len(transforms) < 1:
raise ValueError('The length of transforms ' + \
'must be equal or larger than 1!')
self.transforms = transforms
self.to_rgb = False
# 检查transforms里面的操作,目前支持PaddleX定义的或者是imgaug操作
for op in self.transforms:
if not isinstance(op, SegTransform):
import imgaug.augmenters as iaa
if not isinstance(op, iaa.Augmenter):
raise Exception(
"Elements in transforms should be defined in 'paddlex.seg.transforms' or class of imgaug.augmenters.Augmenter, see docs here: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/"
)
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。
Returns:
tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。
"""
if im_info is None:
im_info = list()
if isinstance(im, np.ndarray):
if len(im.shape) != 3:
raise Exception(
"im should be 3-dimensions, but now is {}-dimensions".
format(len(im.shape)))
else:
try:
im = cv2.imread(im).astype('float32')
except:
raise ValueError('Can\'t read The image file {}!'.format(im))
if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
if label is not None:
if not isinstance(label, np.ndarray):
label = np.asarray(Image.open(label))
for op in self.transforms:
if isinstance(op, SegTransform):
outputs = op(im, im_info, label)
im = outputs[0]
if len(outputs) >= 2:
im_info = outputs[1]
if len(outputs) == 3:
label = outputs[2]
else:
im = execute_imgaug(op, im)
if label is not None:
outputs = (im, im_info, label)
else:
outputs = (im, im_info)
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class RandomHorizontalFlip(SegTransform):
"""以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。
Args:
prob (float): 随机水平翻转的概率。默认值为0.5。
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if random.random() < self.prob:
im = horizontal_flip(im)
if label is not None:
label = horizontal_flip(label)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomVerticalFlip(SegTransform):
"""以一定的概率对图像进行垂直翻转。当存在标注图像时,则同步进行翻转。
Args:
prob (float): 随机垂直翻转的概率。默认值为0.1。
"""
def __init__(self, prob=0.1):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if random.random() < self.prob:
im = vertical_flip(im)
if label is not None:
label = vertical_flip(label)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class Resize(SegTransform):
"""调整图像大小(resize),当存在标注图像时,则同步进行处理。
- 当目标大小(target_size)类型为int时,根据插值方式,
将图像resize为[target_size, target_size]。
- 当目标大小(target_size)类型为list或tuple时,根据插值方式,
将图像resize为target_size, target_size的输入应为[w, h]或(w, h)。
Args:
target_size (int|list|tuple): 目标大小。
interp (str): resize的插值方式,与opencv的插值方式对应,
可选的值为['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4'],默认为"LINEAR"。
Raises:
TypeError: target_size不是int/list/tuple。
ValueError: target_size为list/tuple时元素个数不等于2。
AssertionError: interp的取值不在['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4']之内。
"""
# The interpolation mode
interp_dict = {
'NEAREST': cv2.INTER_NEAREST,
'LINEAR': cv2.INTER_LINEAR,
'CUBIC': cv2.INTER_CUBIC,
'AREA': cv2.INTER_AREA,
'LANCZOS4': cv2.INTER_LANCZOS4
}
def __init__(self, target_size, interp='LINEAR'):
self.interp = interp
assert interp in self.interp_dict, "interp should be one of {}".format(
interp_dict.keys())
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise ValueError(
'when target is list or tuple, it should include 2 elements, but it is {}'
.format(target_size))
elif not isinstance(target_size, int):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(target_size)))
self.target_size = target_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info跟新字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
Raises:
ZeroDivisionError: im的短边为0。
TypeError: im不是np.ndarray数据。
ValueError: im不是3维nd.ndarray。
"""
if im_info is None:
im_info = OrderedDict()
im_info.append(('resize', im.shape[:2]))
if not isinstance(im, np.ndarray):
raise TypeError("ResizeImage: image type is not np.ndarray.")
if len(im.shape) != 3:
raise ValueError('ResizeImage: image is not 3-dimensional.')
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if float(im_size_min) == 0:
raise ZeroDivisionError('ResizeImage: min size of image is 0')
if isinstance(self.target_size, int):
resize_w = self.target_size
resize_h = self.target_size
else:
resize_w = self.target_size[0]
resize_h = self.target_size[1]
im_scale_x = float(resize_w) / float(im_shape[1])
im_scale_y = float(resize_h) / float(im_shape[0])
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp_dict[self.interp])
if label is not None:
label = cv2.resize(
label,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp_dict['NEAREST'])
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeByLong(SegTransform):
"""对图像长边resize到固定值,短边按比例进行缩放。当存在标注图像时,则同步进行处理。
Args:
long_size (int): resize后图像的长边大小。
"""
def __init__(self, long_size):
self.long_size = long_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info新增字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
"""
if im_info is None:
im_info = OrderedDict()
im_info.append(('resize', im.shape[:2]))
im = resize_long(im, self.long_size)
if label is not None:
label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeByShort(SegTransform):
"""根据图像的短边调整图像大小(resize)。
1. 获取图像的长边和短边长度。
2. 根据短边与short_size的比例,计算长边的目标长度,
此时高、宽的resize比例为short_size/原图短边长度。
3. 如果max_size>0,调整resize比例:
如果长边的目标长度>max_size,则高、宽的resize比例为max_size/原图长边长度。
4. 根据调整大小的比例对图像进行resize。
Args:
target_size (int): 短边目标长度。默认为800。
max_size (int): 长边目标长度的最大限制。默认为1333。
Raises:
TypeError: 形参数据类型不满足需求。
"""
def __init__(self, short_size=800, max_size=1333):
self.max_size = int(max_size)
if not isinstance(short_size, int):
raise TypeError(
"Type of short_size is invalid. Must be Integer, now is {}".
format(type(short_size)))
self.short_size = short_size
if not (isinstance(self.max_size, int)):
raise TypeError("max_size: input type is invalid.")
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (numnp.ndarraypy): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info更新字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if im_info is None:
im_info = OrderedDict()
if not isinstance(im, np.ndarray):
raise TypeError("ResizeByShort: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('ResizeByShort: image is not 3-dimensional.')
im_info.append(('resize', im.shape[:2]))
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
im, (resized_width, resized_height),
interpolation=cv2.INTER_NEAREST)
if label is not None:
im = cv2.resize(
label, (resized_width, resized_height),
interpolation=cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeRangeScaling(SegTransform):
"""对图像长边随机resize到指定范围内,短边按比例进行缩放。当存在标注图像时,则同步进行处理。
Args:
min_value (int): 图像长边resize后的最小值。默认值400。
max_value (int): 图像长边resize后的最大值。默认值600。
Raises:
ValueError: min_value大于max_value
"""
def __init__(self, min_value=400, max_value=600):
if min_value > max_value:
raise ValueError('min_value must be less than max_value, '
'but they are {} and {}.'.format(min_value,
max_value))
self.min_value = min_value
self.max_value = max_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_value == self.max_value:
random_size = self.max_value
else:
random_size = int(
np.random.uniform(self.min_value, self.max_value) + 0.5)
im = resize_long(im, random_size, cv2.INTER_LINEAR)
if label is not None:
label = resize_long(label, random_size, cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ResizeStepScaling(SegTransform):
"""对图像按照某一个比例resize,这个比例以scale_step_size为步长
在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时,则同步进行处理。
Args:
min_scale_factor(float), resize最小尺度。默认值0.75。
max_scale_factor (float), resize最大尺度。默认值1.25。
scale_step_size (float), resize尺度范围间隔。默认值0.25。
Raises:
ValueError: min_scale_factor大于max_scale_factor
"""
def __init__(self,
min_scale_factor=0.75,
max_scale_factor=1.25,
scale_step_size=0.25):
if min_scale_factor > max_scale_factor:
raise ValueError(
'min_scale_factor must be less than max_scale_factor, '
'but they are {} and {}.'.format(min_scale_factor,
max_scale_factor))
self.min_scale_factor = min_scale_factor
self.max_scale_factor = max_scale_factor
self.scale_step_size = scale_step_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_scale_factor == self.max_scale_factor:
scale_factor = self.min_scale_factor
elif self.scale_step_size == 0:
scale_factor = np.random.uniform(self.min_scale_factor,
self.max_scale_factor)
else:
num_steps = int((self.max_scale_factor - self.min_scale_factor) /
self.scale_step_size + 1)
scale_factors = np.linspace(self.min_scale_factor,
self.max_scale_factor,
num_steps).tolist()
np.random.shuffle(scale_factors)
scale_factor = scale_factors[0]
im = cv2.resize(
im, (0, 0),
fx=scale_factor,
fy=scale_factor,
interpolation=cv2.INTER_LINEAR)
if label is not None:
label = cv2.resize(
label, (0, 0),
fx=scale_factor,
fy=scale_factor,
interpolation=cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class Normalize(SegTransform):
"""对图像进行标准化。
1.尺度缩放到 [0,1]。
2.对图像进行减均值除以标准差操作。
Args:
mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。
std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
Raises:
ValueError: mean或std不是list对象。std包含0。
"""
def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
self.mean = mean
self.std = std
if not (isinstance(self.mean, list) and isinstance(self.std, list)):
raise ValueError("{}: input type is invalid.".format(self))
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
raise ValueError('{}: std is invalid!'.format(self))
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class Padding(SegTransform):
"""对图像或标注图像进行padding,padding方向为右和下。
根据提供的值对图像或标注图像进行padding操作。
Args:
target_size (int|list|tuple): padding后图像的大小。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认值为255。
Raises:
TypeError: target_size不是int|list|tuple。
ValueError: target_size为list|tuple时元素个数不等于2。
"""
def __init__(self,
target_size,
im_padding_value=[127.5, 127.5, 127.5],
label_padding_value=255):
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise ValueError(
'when target is list or tuple, it should include 2 elements, but it is {}'
.format(target_size))
elif not isinstance(target_size, int):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(target_size)))
self.target_size = target_size
self.im_padding_value = im_padding_value
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info新增字段为:
-shape_before_padding (tuple): 保存padding之前图像的形状(h, w)。
Raises:
ValueError: 输入图像im或label的形状大于目标值
"""
if im_info is None:
im_info = OrderedDict()
im_info.append(('padding', im.shape[:2]))
im_height, im_width = im.shape[0], im.shape[1]
if isinstance(self.target_size, int):
target_height = self.target_size
target_width = self.target_size
else:
target_height = self.target_size[1]
target_width = self.target_size[0]
pad_height = target_height - im_height
pad_width = target_width - im_width
if pad_height < 0 or pad_width < 0:
raise ValueError(
'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
.format(im_width, im_height, target_width, target_height))
else:
im = cv2.copyMakeBorder(
im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None:
label = cv2.copyMakeBorder(
label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomPaddingCrop(SegTransform):
"""对图像和标注图进行随机裁剪,当所需要的裁剪尺寸大于原图时,则进行padding操作。
Args:
crop_size (int|list|tuple): 裁剪图像大小。默认为512。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认值为255。
Raises:
TypeError: crop_size不是int/list/tuple。
ValueError: target_size为list/tuple时元素个数不等于2。
"""
def __init__(self,
crop_size=512,
im_padding_value=[127.5, 127.5, 127.5],
label_padding_value=255):
if isinstance(crop_size, list) or isinstance(crop_size, tuple):
if len(crop_size) != 2:
raise ValueError(
'when crop_size is list or tuple, it should include 2 elements, but it is {}'
.format(crop_size))
elif not isinstance(crop_size, int):
raise TypeError(
"Type of crop_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(crop_size)))
self.crop_size = crop_size
self.im_padding_value = im_padding_value
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if isinstance(self.crop_size, int):
crop_width = self.crop_size
crop_height = self.crop_size
else:
crop_width = self.crop_size[0]
crop_height = self.crop_size[1]
img_height = im.shape[0]
img_width = im.shape[1]
if img_height == crop_height and img_width == crop_width:
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
else:
pad_height = max(crop_height - img_height, 0)
pad_width = max(crop_width - img_width, 0)
if (pad_height > 0 or pad_width > 0):
im = cv2.copyMakeBorder(
im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None:
label = cv2.copyMakeBorder(
label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
img_height = im.shape[0]
img_width = im.shape[1]
if crop_height > 0 and crop_width > 0:
h_off = np.random.randint(img_height - crop_height + 1)
w_off = np.random.randint(img_width - crop_width + 1)
im = im[h_off:(crop_height + h_off), w_off:(w_off + crop_width
), :]
if label is not None:
label = label[h_off:(crop_height + h_off), w_off:(
w_off + crop_width)]
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomBlur(SegTransform):
"""以一定的概率对图像进行高斯模糊。
Args:
prob (float): 图像模糊概率。默认为0.1。
"""
def __init__(self, prob=0.1):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.prob <= 0:
n = 0
elif self.prob >= 1:
n = 1
else:
n = int(1.0 / self.prob)
if n > 0:
if np.random.randint(0, n) == 0:
radius = np.random.randint(3, 10)
if radius % 2 != 1:
radius = radius + 1
if radius > 9:
radius = 9
im = cv2.GaussianBlur(im, (radius, radius), 0, 0)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomRotate(SegTransform):
"""对图像进行随机旋转, 模型训练时的数据增强操作。
在旋转区间[-rotate_range, rotate_range]内,对图像进行随机旋转,当存在标注图像时,同步进行,
并对旋转后的图像和标注图像进行相应的padding。
Args:
rotate_range (float): 最大旋转角度。默认为15度。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认为255。
"""
def __init__(self,
rotate_range=15,
im_padding_value=[127.5, 127.5, 127.5],
label_padding_value=255):
self.rotate_range = rotate_range
self.im_padding_value = im_padding_value
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.rotate_range > 0:
(h, w) = im.shape[:2]
do_rotation = np.random.uniform(-self.rotate_range,
self.rotate_range)
pc = (w // 2, h // 2)
r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0)
cos = np.abs(r[0, 0])
sin = np.abs(r[0, 1])
nw = int((h * sin) + (w * cos))
nh = int((h * cos) + (w * sin))
(cx, cy) = pc
r[0, 2] += (nw / 2) - cx
r[1, 2] += (nh / 2) - cy
dsize = (nw, nh)
im = cv2.warpAffine(
im,
r,
dsize=dsize,
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.im_padding_value)
label = cv2.warpAffine(
label,
r,
dsize=dsize,
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.label_padding_value)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomScaleAspect(SegTransform):
"""裁剪并resize回原始尺寸的图像和标注图像。
按照一定的面积比和宽高比对图像进行裁剪,并reszie回原始图像的图像,当存在标注图时,同步进行。
Args:
min_scale (float):裁取图像占原始图像的面积比,取值[0,1],为0时则返回原图。默认为0.5。
aspect_ratio (float): 裁取图像的宽高比范围,非负值,为0时返回原图。默认为0.33。
"""
def __init__(self, min_scale=0.5, aspect_ratio=0.33):
self.min_scale = min_scale
self.aspect_ratio = aspect_ratio
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_scale != 0 and self.aspect_ratio != 0:
img_height = im.shape[0]
img_width = im.shape[1]
for i in range(0, 10):
area = img_height * img_width
target_area = area * np.random.uniform(self.min_scale, 1.0)
aspectRatio = np.random.uniform(self.aspect_ratio,
1.0 / self.aspect_ratio)
dw = int(np.sqrt(target_area * 1.0 * aspectRatio))
dh = int(np.sqrt(target_area * 1.0 / aspectRatio))
if (np.random.randint(10) < 5):
tmp = dw
dw = dh
dh = tmp
if (dh < img_height and dw < img_width):
h1 = np.random.randint(0, img_height - dh)
w1 = np.random.randint(0, img_width - dw)
im = im[h1:(h1 + dh), w1:(w1 + dw), :]
label = label[h1:(h1 + dh), w1:(w1 + dw)]
im = cv2.resize(
im, (img_width, img_height),
interpolation=cv2.INTER_LINEAR)
label = cv2.resize(
label, (img_width, img_height),
interpolation=cv2.INTER_NEAREST)
break
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class RandomDistort(SegTransform):
"""对图像进行随机失真。
1. 对变换的操作顺序进行随机化操作。
2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。
Args:
brightness_range (float): 明亮度因子的范围。默认为0.5。
brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
contrast_range (float): 对比度因子的范围。默认为0.5。
contrast_prob (float): 随机调整对比度的概率。默认为0.5。
saturation_range (float): 饱和度因子的范围。默认为0.5。
saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
hue_range (int): 色调因子的范围。默认为18。
hue_prob (float): 随机调整色调的概率。默认为0.5。
"""
def __init__(self,
brightness_range=0.5,
brightness_prob=0.5,
contrast_range=0.5,
contrast_prob=0.5,
saturation_range=0.5,
saturation_prob=0.5,
hue_range=18,
hue_prob=0.5):
self.brightness_range = brightness_range
self.brightness_prob = brightness_prob
self.contrast_range = contrast_range
self.contrast_prob = contrast_prob
self.saturation_range = saturation_range
self.saturation_prob = saturation_prob
self.hue_range = hue_range
self.hue_prob = hue_prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
brightness_lower = 1 - self.brightness_range
brightness_upper = 1 + self.brightness_range
contrast_lower = 1 - self.contrast_range
contrast_upper = 1 + self.contrast_range
saturation_lower = 1 - self.saturation_range
saturation_upper = 1 + self.saturation_range
hue_lower = -self.hue_range
hue_upper = self.hue_range
ops = [brightness, contrast, saturation, hue]
random.shuffle(ops)
params_dict = {
'brightness': {
'brightness_lower': brightness_lower,
'brightness_upper': brightness_upper
},
'contrast': {
'contrast_lower': contrast_lower,
'contrast_upper': contrast_upper
},
'saturation': {
'saturation_lower': saturation_lower,
'saturation_upper': saturation_upper
},
'hue': {
'hue_lower': hue_lower,
'hue_upper': hue_upper
}
}
prob_dict = {
'brightness': self.brightness_prob,
'contrast': self.contrast_prob,
'saturation': self.saturation_prob,
'hue': self.hue_prob
}
for id in range(4):
params = params_dict[ops[id].__name__]
prob = prob_dict[ops[id].__name__]
params['im'] = im
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
class ArrangeSegmenter(SegTransform):
"""获取训练/验证/预测所需的信息。
Args:
mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。
Raises:
ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内
"""
def __init__(self, mode):
if mode not in ['train', 'eval', 'test', 'quant']:
raise ValueError(
"mode should be defined as one of ['train', 'eval', 'test', 'quant']!"
)
self.mode = mode
def __call__(self, im, im_info, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (list): 存储图像reisze或padding前的shape信息,如
[('resize', [200, 300]), ('padding', [400, 600])]表示
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当mode为'train'或'eval'时,返回的tuple为(im, label),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当mode为'test'时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;当mode为
'quant'时,返回的tuple为(im,),为图像np.ndarray数据。
"""
im = permute(im, False)
if self.mode == 'train' or self.mode == 'eval':
label = label[np.newaxis, :, :]
return (im, label)
elif self.mode == 'test':
return (im, im_info)
else:
return (im, )
class ComposedSegTransforms(Compose):
""" 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下
训练阶段:
1. 随机对图像以0.5的概率水平翻转
2. 按不同的比例随机Resize原图
3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
4. 图像归一化
预测阶段:
1. 图像归一化
Args:
mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
train_crop_size(list): 模型训练阶段,随机从原图crop的大小
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
train_crop_size=[769, 769],
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]):
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomHorizontalFlip(prob=0.5), ResizeStepScaling(),
RandomPaddingCrop(crop_size=train_crop_size), Normalize(
mean=mean, std=std)
]
else:
# 验证/预测时的transforms
transforms = [Normalize(mean=mean, std=std)]
super(ComposedSegTransforms, self).__init__(transforms)
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import os
import sys
import colorama
from colorama import init
init(autoreset=True)
levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'}
log_level = 2
def log(level=2, message="", use_color=False):
current_time = time.time()
time_array = time.localtime(current_time)
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
if log_level >= level:
if use_color:
print("\033[1;31;40m{} [{}]\t{}\033[0m".format(current_time, levels[
level], message).encode("utf-8").decode("latin1"))
else:
print("{} [{}]\t{}".format(current_time, levels[level], message)
.encode("utf-8").decode("latin1"))
sys.stdout.flush()
def debug(message="", use_color=False):
log(level=3, message=message, use_color=use_color)
def info(message="", use_color=False):
log(level=2, message=message, use_color=use_color)
def warning(message="", use_color=True):
log(level=1, message=message, use_color=use_color)
def error(message="", use_color=True, exit=True):
log(level=0, message=message, use_color=use_color)
if exit:
sys.exit(-1)
文件模式从 100644 更改为 100755
# openvino预编译库的路径
OPENVINO_DIR=/path/to/inference_engine/
OPENVINO_DIR=$INTEL_OPENVINO_DIR/inference_engine
# gflags预编译库的路径
GFLAGS_DIR=/path/to/gflags
GFLAGS_DIR=/wangsiyuan06/gflags/build
# ngraph lib的路径,编译openvino时通常会生成
NGRAPH_LIB=/path/to/ngraph/lib/
NGRAPH_LIB=$INTEL_OPENVINO_DIR/deployment_tools/ngraph/lib
# opencv预编译库的路径, 如果使用自带预编译版本可不修改
OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/
......
......@@ -13,6 +13,8 @@
// limitations under the License.
#include "include/paddlex/paddlex.h"
#include <iostream>
#include <fstream>
using namespace InferenceEngine;
......@@ -50,20 +52,24 @@ bool Model::load_config(const std::string& cfg_dir) {
}
// 构建数据处理流
transforms_.Init(config["Transforms"], to_rgb);
// 读入label list
labels.clear();
labels = config["_Attributes"]["labels"].as<std::vector<std::string>>();
// 读入label lis
for (const auto& item : config["_Attributes"]["labels"]) {
int index = labels.size();
labels[index] = item.as<std::string>();
}
return true;
}
bool Model::preprocess(cv::Mat* input_im) {
if (!transforms_.Run(input_im, inputs_)) {
bool Model::preprocess(cv::Mat* input_im, ImageBlob* inputs) {
if (!transforms_.Run(input_im, inputs)) {
return false;
}
return true;
}
bool Model::predict(const cv::Mat& im, ClsResult* result) {
inputs_.clear();
if (type == "detector") {
std::cerr << "Loading model is a 'detector', DetResult should be passed to "
"function predict()!"
......@@ -78,10 +84,9 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
// 处理输入图像
InferRequest infer_request = executable_network_.CreateInferRequest();
std::string input_name = network_.getInputsInfo().begin()->first;
inputs_ = infer_request.GetBlob(input_name);
auto im_clone = im.clone();
if (!preprocess(&im_clone)) {
inputs_.blob = infer_request.GetBlob(input_name);
cv::Mat im_clone = im.clone();
if (!preprocess(&im_clone, &inputs_)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
......@@ -89,6 +94,7 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
infer_request.Infer();
std::string output_name = network_.getOutputsInfo().begin()->first;
std::cout << "ouput node name" << output_name << std::endl;
output_ = infer_request.GetBlob(output_name);
MemoryBlob::CPtr moutput = as<MemoryBlob>(output_);
auto moutputHolder = moutput->rmap();
......@@ -99,10 +105,122 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
result->category_id = std::distance(outputs_data, ptr);
result->score = *ptr;
result->category = labels[result->category_id];
//for (int i=0;i<sizeof(outputs_data);i++){
// std::cout << labels[i] << std::endl;
// std::cout << outputs_[i] << std::endl;
// }
}
bool Model::predict(const cv::Mat& im, SegResult* result) {
result->clear();
inputs_.clear();
if (type == "classifier") {
std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
"to function predict()!" << std::endl;
return false;
} else if (type == "detector") {
std::cerr << "Loading model is a 'detector', DetResult should be passed to "
"function predict()!" << std::endl;
return false;
}
//
InferRequest infer_request = executable_network_.CreateInferRequest();
std::string input_name = network_.getInputsInfo().begin()->first;
inputs_.blob = infer_request.GetBlob(input_name);
//
cv::Mat im_clone = im.clone();
if (!preprocess(&im_clone, &inputs_)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
//
infer_request.Infer();
OutputsDataMap out_map = network_.getOutputsInfo();
auto iter = out_map.begin();
iter++;
std::string output_name_score = iter->first;
Blob::Ptr output_score = infer_request.GetBlob(output_name_score);
MemoryBlob::CPtr moutput_score = as<MemoryBlob>(output_score);
TensorDesc blob_score = moutput_score->getTensorDesc();
std::vector<size_t> output_score_shape = blob_score.getDims();
int size = 1;
for (auto& i : output_score_shape) {
size *= static_cast<int>(i);
result->score_map.shape.push_back(static_cast<int>(i));
}
result->score_map.data.resize(size);
auto moutputHolder_score = moutput_score->rmap();
float* score_data = moutputHolder_score.as<float *>();
memcpy(result->score_map.data.data(),score_data,moutput_score->byteSize());
iter++;
std::string output_name_label = iter->first;
Blob::Ptr output_label = infer_request.GetBlob(output_name_label);
MemoryBlob::CPtr moutput_label = as<MemoryBlob>(output_label);
TensorDesc blob_label = moutput_label->getTensorDesc();
std::vector<size_t> output_label_shape = blob_label.getDims();
size = 1;
for (auto& i : output_label_shape) {
size *= static_cast<int>(i);
result->label_map.shape.push_back(static_cast<int>(i));
}
result->label_map.data.resize(size);
auto moutputHolder_label = moutput_label->rmap();
int* label_data = moutputHolder_label.as<int *>();
memcpy(result->label_map.data.data(),label_data,moutput_label->byteSize());
std::vector<uint8_t> label_map(result->label_map.data.begin(),
result->label_map.data.end());
cv::Mat mask_label(result->label_map.shape[1],
result->label_map.shape[2],
CV_8UC1,
label_map.data());
cv::Mat mask_score(result->score_map.shape[2],
result->score_map.shape[3],
CV_32FC1,
result->score_map.data.data());
int idx = 1;
int len_postprocess = inputs_.im_size_before_resize_.size();
for (std::vector<std::string>::reverse_iterator iter =
inputs_.reshape_order_.rbegin();
iter != inputs_.reshape_order_.rend();
++iter) {
if (*iter == "padding") {
auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
inputs_.im_size_before_resize_.pop_back();
auto padding_w = before_shape[0];
auto padding_h = before_shape[1];
mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w));
mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w));
} else if (*iter == "resize") {
auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
inputs_.im_size_before_resize_.pop_back();
auto resize_w = before_shape[0];
auto resize_h = before_shape[1];
cv::resize(mask_label,
mask_label,
cv::Size(resize_h, resize_w),
0,
0,
cv::INTER_NEAREST);
cv::resize(mask_score,
mask_score,
cv::Size(resize_h, resize_w),
0,
0,
cv::INTER_LINEAR);
}
++idx;
}
result->label_map.data.assign(mask_label.begin<uint8_t>(),
mask_label.end<uint8_t>());
result->label_map.shape = {mask_label.rows, mask_label.cols};
result->score_map.data.assign(mask_score.begin<float>(),
mask_score.end<float>());
result->score_map.shape = {mask_score.rows, mask_score.cols};
return true;
}
} // namespce of PaddleX
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
......@@ -26,7 +27,7 @@ std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
{"CUBIC", cv::INTER_CUBIC},
{"LANCZOS4", cv::INTER_LANCZOS4}};
bool Normalize::Run(cv::Mat* im){
bool Normalize::Run(cv::Mat* im, ImageBlob* data){
for (int h = 0; h < im->rows; h++) {
for (int w = 0; w < im->cols; w++) {
im->at<cv::Vec3f>(h, w)[0] =
......@@ -40,19 +41,6 @@ bool Normalize::Run(cv::Mat* im){
return true;
}
bool CenterCrop::Run(cv::Mat* im) {
int height = static_cast<int>(im->rows);
int width = static_cast<int>(im->cols);
if (height < height_ || width < width_) {
std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
return false;
}
int offset_x = static_cast<int>((width - width_) / 2);
int offset_y = static_cast<int>((height - height_) / 2);
cv::Rect crop_roi(offset_x, offset_y, width_, height_);
*im = (*im)(crop_roi);
return true;
}
float ResizeByShort::GenerateScale(const cv::Mat& im) {
......@@ -70,11 +58,109 @@ float ResizeByShort::GenerateScale(const cv::Mat& im) {
return scale;
}
bool ResizeByShort::Run(cv::Mat* im) {
bool ResizeByShort::Run(cv::Mat* im, ImageBlob* data) {
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("resize");
float scale = GenerateScale(*im);
int width = static_cast<int>(scale * im->cols);
int height = static_cast<int>(scale * im->rows);
cv::resize(*im, *im, cv::Size(width, height), 0, 0, cv::INTER_LINEAR);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
data->scale = scale;
return true;
}
bool CenterCrop::Run(cv::Mat* im, ImageBlob* data) {
int height = static_cast<int>(im->rows);
int width = static_cast<int>(im->cols);
if (height < height_ || width < width_) {
std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
return false;
}
int offset_x = static_cast<int>((width - width_) / 2);
int offset_y = static_cast<int>((height - height_) / 2);
cv::Rect crop_roi(offset_x, offset_y, width_, height_);
*im = (*im)(crop_roi);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
return true;
}
bool Padding::Run(cv::Mat* im, ImageBlob* data) {
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("padding");
int padding_w = 0;
int padding_h = 0;
if (width_ > 1 & height_ > 1) {
padding_w = width_ - im->cols;
padding_h = height_ - im->rows;
} else if (coarsest_stride_ >= 1) {
int h = im->rows;
int w = im->cols;
padding_h =
ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
padding_w =
ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
}
if (padding_h < 0 || padding_w < 0) {
std::cerr << "[Padding] Computed padding_h=" << padding_h
<< ", padding_w=" << padding_w
<< ", but they should be greater than 0." << std::endl;
return false;
}
cv::copyMakeBorder(
*im, *im, 0, padding_h, 0, padding_w, cv::BORDER_CONSTANT, cv::Scalar(0));
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
return true;
}
bool ResizeByLong::Run(cv::Mat* im, ImageBlob* data) {
if (long_size_ <= 0) {
std::cerr << "[ResizeByLong] long_size should be greater than 0"
<< std::endl;
return false;
}
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("resize");
int origin_w = im->cols;
int origin_h = im->rows;
int im_size_max = std::max(origin_w, origin_h);
float scale =
static_cast<float>(long_size_) / static_cast<float>(im_size_max);
cv::resize(*im, *im, cv::Size(), scale, scale, cv::INTER_NEAREST);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
data->scale = scale;
return true;
}
bool Resize::Run(cv::Mat* im, ImageBlob* data) {
if (width_ <= 0 || height_ <= 0) {
std::cerr << "[Resize] width and height should be greater than 0"
<< std::endl;
return false;
}
if (interpolations.count(interp_) <= 0) {
std::cerr << "[Resize] Invalid interpolation method: '" << interp_ << "'"
<< std::endl;
return false;
}
data->im_size_before_resize_.push_back({im->rows, im->cols});
data->reshape_order_.push_back("resize");
cv::resize(
*im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]);
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
return true;
}
......@@ -94,10 +180,16 @@ std::shared_ptr<Transform> Transforms::CreateTransform(
const std::string& transform_name) {
if (transform_name == "Normalize") {
return std::make_shared<Normalize>();
} else if (transform_name == "CenterCrop") {
return std::make_shared<CenterCrop>();
} else if (transform_name == "ResizeByShort") {
return std::make_shared<ResizeByShort>();
} else if (transform_name == "CenterCrop") {
return std::make_shared<CenterCrop>();
} else if (transform_name == "Resize") {
return std::make_shared<Resize>();
} else if (transform_name == "Padding") {
return std::make_shared<Padding>();
} else if (transform_name == "ResizeByLong") {
return std::make_shared<ResizeByLong>();
} else {
std::cerr << "There's unexpected transform(name='" << transform_name
<< "')." << std::endl;
......@@ -105,15 +197,20 @@ std::shared_ptr<Transform> Transforms::CreateTransform(
}
}
bool Transforms::Run(cv::Mat* im, Blob::Ptr blob) {
bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
// 按照transforms中预处理算子顺序处理图像
if (to_rgb_) {
cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
}
(*im).convertTo(*im, CV_32FC3);
data->ori_im_size_[0] = im->rows;
data->ori_im_size_[1] = im->cols;
data->new_im_size_[0] = im->rows;
data->new_im_size_[1] = im->cols;
for (int i = 0; i < transforms_.size(); ++i) {
if (!transforms_[i]->Run(im)) {
if (!transforms_[i]->Run(im,data)) {
std::cerr << "Apply transforms to image failed!" << std::endl;
return false;
}
......@@ -121,13 +218,15 @@ bool Transforms::Run(cv::Mat* im, Blob::Ptr blob) {
// 将图像由NHWC转为NCHW格式
// 同时转为连续的内存块存储到Blob
SizeVector blobSize = blob->getTensorDesc().getDims();
SizeVector blobSize = data->blob->getTensorDesc().getDims();
const size_t width = blobSize[3];
const size_t height = blobSize[2];
const size_t channels = blobSize[1];
MemoryBlob::Ptr mblob = InferenceEngine::as<MemoryBlob>(blob);
MemoryBlob::Ptr mblob = InferenceEngine::as<MemoryBlob>(data->blob);
auto mblobHolder = mblob->wmap();
float *blob_data = mblobHolder.as<float *>();
for (size_t c = 0; c < channels; c++) {
for (size_t h = 0; h < height; h++) {
for (size_t w = 0; w < width; w++) {
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/paddlex/visualize.h"
namespace PaddleX {
std::vector<int> GenerateColorMap(int num_class) {
auto colormap = std::vector<int>(3 * num_class, 0);
for (int i = 0; i < num_class; ++i) {
int j = 0;
int lab = i;
while (lab) {
colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
++j;
lab >>= 3;
}
}
return colormap;
}
cv::Mat Visualize(const cv::Mat& img,
const DetResult& result,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap,
float threshold) {
cv::Mat vis_img = img.clone();
auto boxes = result.boxes;
for (int i = 0; i < boxes.size(); ++i) {
if (boxes[i].score < threshold) {
continue;
}
cv::Rect roi = cv::Rect(boxes[i].coordinate[0],
boxes[i].coordinate[1],
boxes[i].coordinate[2],
boxes[i].coordinate[3]);
// 生成预测框和标题
std::string text = boxes[i].category;
int c1 = colormap[3 * boxes[i].category_id + 0];
int c2 = colormap[3 * boxes[i].category_id + 1];
int c3 = colormap[3 * boxes[i].category_id + 2];
cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
text += std::to_string(static_cast<int>(boxes[i].score * 100)) + "%";
int font_face = cv::FONT_HERSHEY_SIMPLEX;
double font_scale = 0.5f;
float thickness = 0.5;
cv::Size text_size =
cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
cv::Point origin;
origin.x = roi.x;
origin.y = roi.y;
// 生成预测框标题的背景
cv::Rect text_back = cv::Rect(boxes[i].coordinate[0],
boxes[i].coordinate[1] - text_size.height,
text_size.width,
text_size.height);
// 绘图和文字
cv::rectangle(vis_img, roi, roi_color, 2);
cv::rectangle(vis_img, text_back, roi_color, -1);
cv::putText(vis_img,
text,
origin,
font_face,
font_scale,
cv::Scalar(255, 255, 255),
thickness);
// 生成实例分割mask
if (boxes[i].mask.data.size() == 0) {
continue;
}
cv::Mat bin_mask(result.mask_resolution,
result.mask_resolution,
CV_32FC1,
boxes[i].mask.data.data());
cv::resize(bin_mask,
bin_mask,
cv::Size(boxes[i].mask.shape[0], boxes[i].mask.shape[1]));
cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1);
bin_mask.copyTo(full_mask(roi));
cv::Mat mask_ch[3];
mask_ch[0] = full_mask * c1;
mask_ch[1] = full_mask * c2;
mask_ch[2] = full_mask * c3;
cv::Mat mask;
cv::merge(mask_ch, 3, mask);
cv::addWeighted(vis_img, 1, mask, 0.5, 0, vis_img);
}
return vis_img;
}
cv::Mat Visualize(const cv::Mat& img,
const SegResult& result,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap) {
std::vector<uint8_t> label_map(result.label_map.data.begin(),
result.label_map.data.end());
cv::Mat mask(result.label_map.shape[0],
result.label_map.shape[1],
CV_8UC1,
label_map.data());
cv::Mat color_mask = cv::Mat::zeros(
result.label_map.shape[0], result.label_map.shape[1], CV_8UC3);
int rows = img.rows;
int cols = img.cols;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
int category_id = static_cast<int>(mask.at<uchar>(i, j));
color_mask.at<cv::Vec3b>(i, j)[0] = colormap[3 * category_id + 0];
color_mask.at<cv::Vec3b>(i, j)[1] = colormap[3 * category_id + 1];
color_mask.at<cv::Vec3b>(i, j)[2] = colormap[3 * category_id + 2];
}
}
return color_mask;
}
std::string generate_save_path(const std::string& save_dir,
const std::string& file_path) {
if (access(save_dir.c_str(), 0) < 0) {
#ifdef _WIN32
mkdir(save_dir.c_str());
#else
if (mkdir(save_dir.c_str(), S_IRWXU) < 0) {
std::cerr << "Fail to create " << save_dir << "directory." << std::endl;
}
#endif
}
int pos = file_path.find_last_of(OS_PATH_SEP);
std::string image_name(file_path.substr(pos + 1));
return save_dir + OS_PATH_SEP + image_name;
}
} // namespace PaddleX
因为 它太大了无法显示 source diff 。你可以改为 查看blob
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册