提交 9b0d2062 编写于 作者: S sunyanfang01

fix the conflicts

......@@ -23,6 +23,7 @@
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude: (?!.*third_party)^.*$
- repo: local
hooks:
- id: clang-format-with-version-check
name: clang-format
......@@ -31,10 +32,11 @@
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
- repo: local
hooks:
- id: cpplint-cpp-source
name: cpplint
description: Check C++ code style using cpplint.py.
entry: bash cpplint_pre_commit.hook
entry: bash ./tools/codestyle/cpplint_pre_commit.hook
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
......@@ -66,7 +66,7 @@ int main(int argc, char** argv) {
std::cout << "image file: " << image_path
<< ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score << ", box:("
<< ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
<< result.boxes[i].coordinate[0] << ", "
<< result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", "
......@@ -89,7 +89,7 @@ int main(int argc, char** argv) {
for (int i = 0; i < result.boxes.size(); ++i) {
std::cout << ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score << ", box:("
<< ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
<< result.boxes[i].coordinate[0] << ", "
<< result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", "
......
......@@ -63,9 +63,10 @@ class SegResult : public BaseResult {
public:
Mask<int64_t> label_map;
Mask<float> score_map;
std::string type = "seg";
void clear() {
label_map.clear();
score_map.clear();
}
};
} // namespce of PaddleX
} // namespace PaddleX
......@@ -83,7 +83,7 @@ class ResizeByShort : public Transform {
} else {
max_size_ = -1;
}
};
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
......@@ -96,7 +96,7 @@ class ResizeByLong : public Transform {
public:
virtual void Init(const YAML::Node& item) {
long_size_ = item["long_size"].as<int>();
};
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
private:
......@@ -167,9 +167,6 @@ class Padding : public Transform {
height_ = item["target_size"].as<std::vector<int>>()[1];
}
}
if (item["im_padding_value"].IsDefined()) {
value_ = item["im_padding_value"].as<std::vector<float>>();
}
}
virtual bool Run(cv::Mat* im, ImageBlob* data);
......@@ -177,7 +174,6 @@ class Padding : public Transform {
int coarsest_stride_ = -1;
int width_ = 0;
int height_ = 0;
std::vector<float> value_;
};
class Transforms {
......
......@@ -65,6 +65,15 @@ bool Model::load_config(const std::string& model_dir) {
YAML::Node config = YAML::LoadFile(yaml_file);
type = config["_Attributes"]["model_type"].as<std::string>();
name = config["Model"].as<std::string>();
std::string version = config["version"].as<std::string>();
if (version[0] == '0') {
std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, deployment "
<< "cannot be done, please refer to "
<< "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/upgrade_version.md "
<< "to transfer version."
<< std::endl;
return false;
}
bool to_rgb = true;
if (config["TransformsMode"].IsDefined()) {
std::string mode = config["TransformsMode"].as<std::string>();
......@@ -89,7 +98,7 @@ bool Model::load_config(const std::string& model_dir) {
bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) {
cv::Mat im = input_im.clone();
if (!transforms_.Run(&im, &inputs_)) {
if (!transforms_.Run(&im, blob)) {
return false;
}
return true;
......
......@@ -19,30 +19,30 @@ import argparse
def export_lite():
opt = lite.Opt()
model_file = os.path.join(FLAGS.model_path, '__model__')
params_file = os.path.join(FLAGS.model_path, '__params__')
opt.run_optimize("", model_file, params_file, FLAGS.place, FLAGS.save_dir)
model_file = os.path.join(FLAGS.model_dir, '__model__')
params_file = os.path.join(FLAGS.model_dir, '__params__')
opt.run_optimize("", model_file, params_file, FLAGS.place, FLAGS.save_file)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--model_path",
"--model_dir",
type=str,
default="",
help="model path.",
help="path of '__model__' and '__params__'.",
required=True)
parser.add_argument(
"--place",
type=str,
default="arm",
help="preprocess config path.",
help="run place: 'arm|opencl|x86|npu|xpu|rknpu|apu'.",
required=True)
parser.add_argument(
"--save_dir",
"--save_file",
type=str,
default="paddlex.onnx",
help="Directory for storing the output visualization files.",
help="file name for storing the output files.",
required=True)
FLAGS = parser.parse_args()
export_lite()
......@@ -60,3 +60,9 @@
## 11. 每次训练新的模型,都需要重新下载预训练模型,怎样可以下载一次就搞定
> 1.可以按照9的方式来解决这个问题
> 2.每次训练前都设定`paddlex.pretrain_dir`路径,如设定`paddlex.pretrain_dir='/usrname/paddlex`,如此下载完的预训练模型会存放至`/usrname/paddlex`目录下,而已经下载在该目录的模型也不会再次重复下载
## 12. 程序启动时提示"Failed to execute script PaddleX",如何解决?
> 1. 请检查目标机器上PaddleX程序所在路径是否包含中文。目前暂不支持中文路径,请尝试将程序移动到英文目录。
> 2. 如果您的系统是Windows 7或者Windows Server 2012时,原因是缺少MFPlat.DLL/MF.dll/MFReadWrite.dll等OpenCV依赖的DLL,请按如下方式安装桌面体验:通过“我的电脑”-->“属性”-->"管理"打开服务器管理器,点击右上角“管理”选择“添加角色和功能”。点击“服务器选择”-->“功能”,拖动滚动条到最下端,点开“用户界面和基础结构”,勾选“桌面体验”后点击“安装”,等安装完成尝试再次运行PaddleX。
> 3. 请检查目标机器上是否有其他的PaddleX程序或者进程在运行中,如有请退出或者重启机器看是否解决
> 4. 请确认运行程序的用户是否有管理员权限,如非管理员权限用户请尝试使用管理员运行看是否成功
\ No newline at end of file
......@@ -8,7 +8,7 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None,
> 仅用于**目标检测**。读取PascalVOC格式的检测数据集,并对样本进行相应的处理。PascalVOC数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md)
> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_mobilenetv1.py#L29)
> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_darknet53.py#L29)
> **参数**
......@@ -21,6 +21,16 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None,
> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义VOCDetection类后调用其成员函数`add_negative_samples`添加背景图片即可:
> ```
> add_negative_samples(image_dir)
> ```
> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
> > **参数**
> > > * **image_dir** (str): 背景图片所在的目录路径。
## CocoDetection类
```
......@@ -41,6 +51,16 @@ paddlex.datasets.CocoDetection(data_dir, ann_file, transforms=None, num_workers=
> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义CocoDetection类后调用其成员函数`add_negative_samples`添加背景图片即可:
> ```
> add_negative_samples(image_dir)
> ```
> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
> > **参数**
> > > * **image_dir** (str): 背景图片所在的目录路径。
## EasyDataDet类
```
......@@ -61,3 +81,13 @@ paddlex.datasets.EasyDataDet(data_dir, file_list, label_list, transforms=None, n
> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义EasyDataDet类后调用其成员函数`add_negative_samples`添加背景图片即可:
> ```
> add_negative_samples(image_dir)
> ```
> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
> > **参数**
> > > * **image_dir** (str): 背景图片所在的目录路径。
# Predictor部署-paddlex.deploy
# 预测部署-paddlex.deploy
使用AnalysisPredictor进行预测部署。
使用Paddle Inference进行高性能的Python预测部署。更多关于Paddle Inference信息请参考[Paddle Inference文档](https://paddle-inference.readthedocs.io/en/latest/#)
## Predictor类
......@@ -22,6 +22,7 @@ paddlex.deploy.Predictor(model_dir, use_gpu=False, gpu_id=0, use_mkl=False, use_
> >
> > ```
> > import paddlex
> >
> > model = paddlex.deploy.Predictor(model_dir, use_gpu=True)
> > result = model.predict(image_file)
> > ```
......
......@@ -15,7 +15,7 @@ paddlex.cls.ResNet50(num_classes=1000)
### train 训练接口
```python
train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None)
train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, warmup_steps=0, warmup_start_lr=0.0, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None)
```
>
> **参数**
......@@ -30,12 +30,14 @@ train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, s
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.025。
> > - **warmup_steps** (int): 默认优化器的warmup步数,学习率将在设定的步数内,从warmup_start_lr线性增长至设定的learning_rate,默认为0。
> > - **warmup_start_lr**(float): 默认优化器的warmup起始学习率,默认为0.0。
> > - **lr_decay_epochs** (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。
> > - **lr_decay_gamma** (float): 默认优化器的学习率衰减率。默认为0.1。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
......@@ -186,3 +188,7 @@ paddlex.cls.DenseNet161(num_classes=1000)
paddlex.cls.DenseNet201(num_classes=1000)
```
### HRNet_W18
```python
paddlex.cls.HRNet_W18(num_classes=1000)
```
......@@ -42,7 +42,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为20。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认值为'output'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的学习率。默认为1.0/8000。
> > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为1000。
......@@ -53,7 +53,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在PascalVOC数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
......@@ -107,7 +107,7 @@ paddlex.det.FasterRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspec
> **参数**
> > - **num_classes** (int): 包含了背景类的类别数。默认为81。
> > - **backbone** (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。
> > - **backbone** (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
> > - **with_fpn** (bool): 是否使用FPN结构。默认为True。
> > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
> > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
......@@ -129,7 +129,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, sa
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认值为'output'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供ResNet18的COCO预训练模型);为None,则不使用预训练模型。默认为None。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.0025。
> > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。
......
......@@ -12,7 +12,7 @@ paddlex.det.MaskRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspect_
> **参数**
> > - **num_classes** (int): 包含了背景类的类别数。默认为81。
> > - **backbone** (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。
> > - **backbone** (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
> > - **with_fpn** (bool): 是否使用FPN结构。默认为True。
> > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
> > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
......@@ -34,7 +34,7 @@ train(self, num_epochs, train_dataset, train_batch_size=1, eval_dataset=None, sa
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认值为'output'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供ResNet18和HRNet_W18的COCO预训练模型);若为None,则不使用预训练模型。默认为None。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.00125。
> > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。
......@@ -82,4 +82,4 @@ predict(self, img_file, transforms=None)
>
> **返回值**
>
> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度,其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高
> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度。其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。Mask信息为原图大小的二值图,1表示像素点属于预测类别,0表示像素点是背景
......@@ -12,7 +12,7 @@ paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride
> **参数**
> > - **num_classes** (int): 类别数。
> > - **backbone** (str): DeepLabv3+的backbone网络,实现特征图的计算,取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0'],'MobileNetV2_x1.0'。
> > - **backbone** (str): DeepLabv3+的backbone网络,实现特征图的计算,取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0'],默认值为'MobileNetV2_x1.0'。
> > - **output_stride** (int): backbone 输出特征图相对于输入的下采样倍数,一般取值为8或16。默认16。
> > - **aspp_with_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。
> > - **decoder_use_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。
......@@ -40,14 +40,14 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认'output'
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的COCO预训练模型);若为字符串'CITYSCAPES',则自动下载在CITYSCAPES数据集上预训练的模型权重(注意:暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的CITYSCAPES预训练模型);若为None,则不使用预训练模型。默认'IMAGENET'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
......@@ -124,12 +124,12 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认'output'
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在COCO图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'COCO'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'COCO',则自动下载在COCO图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'COCO'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
......@@ -173,3 +173,173 @@ predict(self, im_file, transforms=None):
> **返回值**
> >
> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
## HRNet类
```python
paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255)
```
> 构建HRNet分割器。
> **参数**
> > - **num_classes** (int): 类别数。
> > - **width** (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。
> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。
> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。
### train 训练接口
```python
train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None):
```
> HRNet模型训练接口。
> **参数**
> >
> > - **num_epochs** (int): 训练迭代轮数。
> > - **train_dataset** (paddlex.datasets): 训练数据读取器。
> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认'output'
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet数据集上预训练的模型权重;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重(注意:目前仅提供`width`取值为18的CITYSCAPES预训练模型);若为None,则不使用预训练模型。默认'IMAGENET'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
#### evaluate 评估接口
```
evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False):
```
> HRNet模型评估接口。
> **参数**
> >
> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
> > - **batch_size** (int): 评估时的batch大小。默认1。
> > - **epoch_id** (int): 当前评估模型所在的训练轮数。
> > - **return_details** (bool): 是否返回详细信息。默认False。
> **返回值**
> >
> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、
> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。
> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details),
> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。
#### predict 预测接口
```
predict(self, im_file, transforms=None):
```
> HRNet模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
> **参数**
> >
> > - **img_file** (str): 预测图像路径。
> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
> **返回值**
> >
> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
## FastSCNN类
```python
paddlex.seg.FastSCNN(num_classes=2, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, multi_loss_weight=[1.0])
```
> 构建FastSCNN分割器。
> **参数**
> > - **num_classes** (int): 类别数。
> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。
> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。
> > - **multi_loss_weight** (list): 多分支上的loss权重。默认计算一个分支上的loss,即默认值为[1.0]。也支持计算两个分支或三个分支上的loss,权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列,fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重,higher_branch_weight为空间细节分支上的loss权重,lower_branch_weight为全局上下文分支上的loss权重,若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。
### train 训练接口
```python
train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='CITYSCAPES', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None):
```
> FastSCNN模型训练接口。
> **参数**
> >
> > - **num_epochs** (int): 训练迭代轮数。
> > - **train_dataset** (paddlex.datasets): 训练数据读取器。
> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认'output'
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'CITYSCAPES'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
#### evaluate 评估接口
```
evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False):
```
> FastSCNN模型评估接口。
> **参数**
> >
> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
> > - **batch_size** (int): 评估时的batch大小。默认1。
> > - **epoch_id** (int): 当前评估模型所在的训练轮数。
> > - **return_details** (bool): 是否返回详细信息。默认False。
> **返回值**
> >
> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、
> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。
> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details),
> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。
#### predict 预测接口
```
predict(self, im_file, transforms=None):
```
> FastSCNN模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
> **参数**
> >
> > - **img_file** (str): 预测图像路径。
> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
> **返回值**
> >
> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
......@@ -9,8 +9,8 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了
| 任务类型 | 增强方法 |
| :------- | :------------|
| 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)[RandomVerticalFlip](cls_transforms.html#randomverticalflip)<br> [RandomRotate](cls_transforms.html#randomratate)[RandomDistort](cls_transforms.html#randomdistort) |
|目标检测<br>实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)[RandomDistort](det_transforms.html#randomdistort)[RandomCrop](det_transforms.html#randomcrop)<br> [[MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)RandomExpand](det_transforms.html#randomexpand) |
|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)[RandomVerticalFlip](seg_transforms.html#randomverticalflip)[RandomRangeScaling](seg_transforms.html#randomrangescaling)<br> [RandomStepScaling](seg_transforms.html#randomstepscaling)[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)[RandomBlur](seg_transforms.html#randomblur)<br> [RandomRotation](seg_transforms.html#randomrotation)[RandomScaleAspect](seg_transforms.html#randomscaleaspect)[RandomDistort](seg_transforms.html#randomdistort) |
|目标检测<br>实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)[RandomDistort](det_transforms.html#randomdistort)[RandomCrop](det_transforms.html#randomcrop)<br> [MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)[RandomExpand](det_transforms.html#randomexpand) |
|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)[RandomVerticalFlip](seg_transforms.html#randomverticalflip)[RandomRangeScaling](seg_transforms.html#randomrangescaling)<br> [RandomStepScaling](seg_transforms.html#randomstepscaling)[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)[RandomBlur](seg_transforms.html#randomblur)<br> [RandomRotate](seg_transforms.html#randomrotate)[RandomScaleAspect](seg_transforms.html#randomscaleaspect)[RandomDistort](seg_transforms.html#randomdistort) |
## imgaug增强库的支持
......
......@@ -15,7 +15,7 @@ paddlex.cls.transforms.Compose(transforms)
## RandomCrop类
```python
paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.88, lower_ratio=3. / 4, upper_ratio=4. / 3)
paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.08, lower_ratio=3. / 4, upper_ratio=4. / 3)
```
对图像进行随机剪裁,模型训练时的数据增强操作。
......@@ -26,7 +26,7 @@ paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.88, lower_ratio=3
### 参数
* **crop_size** (int): 随机裁剪后重新调整的目标边长。默认为224。
* **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.88。
* **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。
* **lower_ratio** (float): 宽变换比例的最小限制。默认为3. / 4。
* **upper_ratio** (float): 宽变换比例的最小限制。默认为4. / 3。
......@@ -122,3 +122,64 @@ paddlex.cls.transforms.RandomDistort(brightness_range=0.9, brightness_prob=0.5,
* **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。
* **hue_range** (int): 色调因子的范围。默认为18。
* **hue_prob** (float): 随机调整色调的概率。默认为0.5。
## ComposedClsTransforms类
```python
paddlex.cls.transforms.ComposedClsTransforms(mode, crop_size=[224, 224], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
分类模型中已经组合好的数据处理流程,开发者可以直接使用ComposedClsTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomCrop](#RandomCrop)[RandomHorizontalFlip](#RandomHorizontalFlip)两种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedClsTransforms共包括以下几个步骤:
> 训练阶段:
> > 1. 随机从图像中crop一块子图,并resize成crop_size大小
> > 2. 将1的输出按0.5的概率随机进行水平翻转
> > 3. 将图像进行归一化
> 验证/预测阶段:
> > 1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14
> > 2. 从图像中心crop出一个大小为crop_size的图像
> > 3. 将图像进行归一化
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **crop_size** (int|list): 输入到模型里的图像大小,默认为[224, 224](与原图大小无关,根据上述几个步骤,会将原图处理成该图大小输入给模型训练)
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedClsTransforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.cls import transforms
train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[320, 320])
eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[320, 320])
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.cls import transforms
train_transforms = transforms.Composed([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面两个为通过add_augmenters额外添加的数据增强方式
transforms.RandomCrop(crop_size=320),
transforms.RandomHorizontalFlip(prob=0.5),
transforms.Normalize()
])
eval_transforms = transforms.Composed([
transforms.ResizeByShort(short_size=int(320*1.14)),
transforms.CenterCrop(crop_size=320),
transforms.Normalize()
])
```
......@@ -167,3 +167,133 @@ paddlex.det.transforms.RandomCrop(aspect_ratio=[.5, 2.], thresholds=[.0, .1, .3,
* **num_attempts** (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。
* **allow_no_crop** (bool): 是否允许未进行裁剪。默认值为True。
* **cover_all_box** (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。
## ComposedRCNNTransforms类
```python
paddlex.det.transforms.ComposedRCNNTransforms(mode, min_max_size=[224, 224], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
目标检测FasterRCNN和实例分割MaskRCNN模型中已经组合好的数据处理流程,开发者可以直接使用ComposedRCNNTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomHorizontalFlip](#RandomHorizontalFlip)数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedRCNNTransforms共包括以下几个步骤:
> 训练阶段:
> > 1. 随机以0.5的概率将图像水平翻转
> > 2. 将图像进行归一化
> > 3. 图像采用[ResizeByShort](#ResizeByShort)方式,根据min_max_size参数,进行缩入
> > 4. 使用[Padding](#Padding)将图像的长和宽分别Padding成32的倍数
> 验证/预测阶段:
> > 1. 将图像进行归一化
> > 2. 图像采用[ResizeByShort](#ResizeByShort)方式,根据min_max_size参数,进行缩入
> > 3. 使用[Padding](#Padding)将图像的长和宽分别Padding成32的倍数
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **min_max_size** (list): 输入模型中图像的最短边长度和最长边长度,参考[ResizeByShort](#ResizeByShort)(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练),默认[800, 1333]
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedRCNNTransforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.Composed([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面两个为通过add_augmenters额外添加的数据增强方式
transforms.RandomHorizontalFlip(prob=0.5),
transforms.Normalize(),
transforms.ResizeByShort(short_size=800, max_size=1333),
transforms.Padding(coarsest_stride=32)
])
eval_transforms = transforms.Composed([
transforms.Normalize(),
transforms.ResizeByShort(short_size=800, max_size=1333),
transforms.Padding(coarsest_stride=32)
])
```
## ComposedYOLOv3Transforms类
```python
paddlex.det.transforms.ComposedYOLOv3Transforms(mode, shape=[608, 608], mixup_epoch=250, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
目标检测YOLOv3模型中已经组合好的数据处理流程,开发者可以直接使用ComposedYOLOv3Transforms,简化手动组合transforms的过程, 该类中已经包含了[MixupImage](#MixupImage)、[RandomDistort](#RandomDistort)、[RandomExpand](#RandomExpand)、[RandomCrop](#RandomCrop)、[RandomHorizontalFlip](#RandomHorizontalFlip)5种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedYOLOv3Transforms共包括以下几个步骤:
> 训练阶段:
> > 1. 在前mixup_epoch轮迭代中,使用MixupImage策略
> > 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调
> > 3. 随机扩充图像
> > 4. 随机裁剪图像
> > 5. 将4步骤的输出图像Resize成shape参数的大小
> > 6. 随机0.5的概率水平翻转图像
> > 7. 图像归一化
> 验证/预测阶段:
> > 1. 将图像Resize成shape参数大小
> > 2. 图像归一化
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **shape** (list): 输入模型中图像的大小(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练), 默认[608, 608]
* **mixup_epoch**(int): 模型训练过程中,在前mixup_epoch轮迭代中,使用mixup策略,如果为-1,则不使用mixup策略, 默认250。
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedYOLOv3Transforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[480, 480])
eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eval', shape=[480, 480])
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.Composed([
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面为通过add_augmenters额外添加的数据增强方式
transforms.MixupImage(mixup_epoch=250),
transforms.RandomDistort(),
transforms.RandomExpand(),
transforms.RandomCrop(),
transforms.Resize(target_size=480, interp='RANDOM'),
transforms.RandomHorizontalFlip(prob=0.5),
transforms.Normalize()
])
eval_transforms = transforms.Composed([
transforms.Resize(target_size=480, interp='CUBIC'),
transforms.Normalize()
])
```
......@@ -120,7 +120,7 @@ paddlex.seg.transforms.RandomBlur(prob=0.1)
* **prob** (float): 图像模糊概率。默认为0.1。
## RandomRotation
## RandomRotate
```python
paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255)
```
......@@ -166,3 +166,63 @@ paddlex.seg.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5,
* **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。
* **hue_range** (int): 色调因子的范围。默认为18。
* **hue_prob** (float): 随机调整色调的概率。默认为0.5。
## ComposedSegTransforms类
```python
paddlex.det.transforms.ComposedSegTransforms(mode, train_crop_shape=[769, 769], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
语义分割DeepLab和UNet模型中已经组合好的数据处理流程,开发者可以直接使用ComposedSegTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomHorizontalFlip](#RandomHorizontalFlip)[ResizeStepScaling](#ResizeStepScaling)[RandomPaddingCrop](#RandomPaddingCrop)3种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedSegTransforms共包括以下几个步骤:
> 训练阶段:
> > 1. 随机对图像以0.5的概率水平翻转
> > 2. 按不同的比例随机Resize原图
> > 3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
> > 4. 图像归一化
> 预测阶段:
> > 1. 图像归一化
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **train_crop_size** (list): 训练过程中随机Crop和Resize后(验证或预测过程中不需配置该参数,自动使用原图大小),输入到模型中图像的大小(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练), 默认[769, 769]
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedSegTransforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.seg import transforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[512, 512])
eval_transforms = transforms.ComposedYOLOTransforms(mode='eval')
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.Composed([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面2行为通过add_augmenters额外添加的数据增强方式
transforms.RandomHorizontalFlip(prob=0.5),
transforms.ResizeStepScaling(),
transforms.PaddingCrop(crop_size=[512, 512]),
transforms.Normalize()
])
eval_transforms = transforms.Composed([
transforms.Normalize()
])
```
......@@ -6,47 +6,56 @@
| 模型 | 模型大小 | 预测速度(毫秒) | Top1准确率(%) | Top5准确率(%) |
| :----| :------- | :----------- | :--------- | :--------- |
| ResNet18| 46.9MB | 1.499 | 71.0 | 89.9 |
| ResNet34| 87.5MB | 2.272 | 74.6 | 92.1 |
| ResNet50| 102.7MB | 2.939 | 76.5 | 93.0 |
| ResNet101 |179.1MB | 5.314 | 77.6 | 93.6 |
| ResNet50_vd |102.8MB | 3.165 | 79.1 | 94.4 |
| ResNet101_vd| 179.2MB | 5.252 | 80.2 | 95.0 |
| ResNet50_vd_ssld |102.8MB | 3.165 | 82.4 | 96.1 |
| ResNet101_vd_ssld| 179.2MB | 5.252 | 83.7 | 96.7 |
| DarkNet53|166.9MB | 3.139 | 78.0 | 94.1 |
| MobileNetV1 | 16.0MB | 32.523 | 71.0 | 89.7 |
| MobileNetV2 | 14.0MB | 23.318 | 72.2 | 90.7 |
| MobileNetV3_large| 21.0MB | 19.308 | 75.3 | 93.2 |
| MobileNetV3_small | 12.0MB | 6.546 | 68.2 | 88.1 |
| MobileNetV3_large_ssld| 21.0MB | 19.308 | 79.0 | 94.5 |
| MobileNetV3_small_ssld | 12.0MB | 6.546 | 71.3 | 90.1 |
| Xception41 |92.4MB | 4.408 | 79.6 | 94.4 |
| Xception65 | 144.6MB | 6.464 | 80.3 | 94.5 |
| DenseNet121 | 32.8MB | 4.371 | 75.7 | 92.6 |
| DenseNet161|116.3MB | 8.863 | 78.6 | 94.1 |
| DenseNet201| 84.6MB | 8.173 | 77.6 | 93.7 |
| ShuffleNetV2 | 9.0MB | 10.941 | 68.8 | 88.5 |
| [ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar)| 46.2MB | 3.72882 | 71.0 | 89.9 |
| [ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar)| 87.9MB | 5.50876 | 74.6 | 92.1 |
| [ResNet50](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar)| 103.4MB | 7.76659 | 76.5 | 93.0 |
| [ResNet101](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) |180.4MB | 13.80876 | 77.6 | 93.6 |
| [ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) |103.5MB | 8.20476 | 79.1 | 94.4 |
| [ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar)| 180.5MB | 14.24643 | 80.2 | 95.0 |
| [ResNet50_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar) |103.5MB | 7.79264 | 82.4 | 96.1 |
| [ResNet101_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_ssld_pretrained.tar)| 180.5MB | 13.34580 | 83.7 | 96.7 |
| [DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar)|167.4MB | 8.82047 | 78.0 | 94.1 |
| [MobileNetV1](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 17.4MB | 3.42838 | 71.0 | 89.7 |
| [MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 15.0MB | 5.92667 | 72.2 | 90.7 |
| [MobileNetV3_large](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar)| 22.8MB | 8.31428 | 75.3 | 93.2 |
| [MobileNetV3_small](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar) | 12.5MB | 7.30689 | 68.2 | 88.1 |
| [MobileNetV3_large_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_ssld_pretrained.tar)| 22.8MB | 8.06651 | 79.0 | 94.5 |
| [MobileNetV3_small_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_ssld_pretrained.tar) | 12.5MB | 7.08837 | 71.3 | 90.1 |
| [Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_deeplab_pretrained.tar) | 109.2MB | 8.15611 | 79.6 | 94.4 |
| [Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar) | 161.6MB | 13.87017 | 80.3 | 94.5 |
| [DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 33.1MB | 17.09874 | 75.7 | 92.6 |
| [DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar)| 118.0MB | 22.79690 | 78.6 | 94.1 |
| [DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar)| 84.1MB | 25.26089 | 77.6 | 93.7 |
| [ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 10.2MB | 15.40138 | 68.8 | 88.5 |
| [HRNet_W18](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar) | 21.29MB |45.25514 | 76.9 | 93.4 |
## 目标检测模型
> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到,表中符号`-`表示相关指标暂未测试。
> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到,表中符号`-`表示相关指标暂未测试。
| 模型 | 模型大小 | 预测时间(毫秒) | BoxAP(%) |
|:-------|:-----------|:-------------|:----------|
|FasterRCNN-ResNet50|135.6MB| 78.450 | 35.2 |
|FasterRCNN-ResNet50_vd| 135.7MB | 79.523 | 36.4 |
|FasterRCNN-ResNet101| 211.7MB | 107.342 | 38.3 |
|FasterRCNN-ResNet50-FPN| 167.2MB | 44.897 | 37.2 |
|FasterRCNN-ResNet50_vd-FPN|168.7MB | 45.773 | 38.9 |
|FasterRCNN-ResNet101-FPN| 251.7MB | 55.782 | 38.7 |
|FasterRCNN-ResNet101_vd-FPN |252MB | 58.785 | 40.5 |
|YOLOv3-DarkNet53|252.4MB | 21.944 | 38.9 |
|YOLOv3-MobileNetv1 |101.2MB | 12.771 | 29.3 |
|YOLOv3-MobileNetv3|94.6MB | - | 31.6 |
| YOLOv3-ResNet34|169.7MB | 15.784 | 36.2 |
|[FasterRCNN-ResNet50](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar)|136.0MB| 197.715 | 35.2 |
|[FasterRCNN-ResNet50_vd](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_1x.tar)| 136.1MB | 475.700 | 36.4 |
|[FasterRCNN-ResNet101](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_1x.tar)| 212.5MB | 582.911 | 38.3 |
|[FasterRCNN-ResNet50-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_1x.tar)| 167.7MB | 83.189 | 37.2 |
|[FasterRCNN-ResNet50_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar)|167.8MB | 128.277 | 38.9 |
|[FasterRCNN-ResNet101-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_1x.tar)| 244.2MB | 156.097 | 38.7 |
|[FasterRCNN-ResNet101_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar) |244.3MB | 119.788 | 40.5 |
|[FasterRCNN-HRNet_W18-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_1x.tar) |115.5MB | 81.592 | 36 |
|[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar)|249.2MB | 42.672 | 38.9 |
|[YOLOv3-MobileNetV1](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) |99.2MB | 15.442 | 29.3 |
|[YOLOv3-MobileNetV3_large](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams)|100.7MB | 143.322 | 31.6 |
| [YOLOv3-ResNet34](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar)|170.3MB | 23.185 | 36.2 |
## 实例分割模型
> 表中模型相关指标均为在MSCOCO数据集上测试得到。
| 模型 | 模型大小 | 预测时间(毫秒) | mIoU(%) |
|:-------|:-----------|:-------------|:----------|
|DeepLabv3+-MobileNetV2_x1.0|-| - | - |
|DeepLabv3+-Xception41|-| - | - |
|DeepLabv3+-Xception65|-| - | - |
|UNet|-| - | - |
|HRNet_w18|-| - | - |
......@@ -23,3 +23,35 @@ Batch Size指模型在训练过程中,一次性处理的样本数量, 如若
- [实例分割MaskRCNN-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#train)
- [语义分割DeepLabv3p-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#train)
- [语义分割UNet](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#id2)
## 关于lr_decay_epoch, warmup_steps等参数的说明
在PaddleX或其它深度学习模型的训练过程中,经常见到lr_decay_epoch, warmup_steps, warmup_start_lr等参数设置,下面介绍一些这些参数的作用。
首先这些参数都是用于控制模型训练过程中学习率的变化方式,例如我们在训练时将learning_rate设为0.1, 通常情况,在模型的训练过程中,学习率一直以0.1不变训练下去, 但为了调出更好的模型效果,我们往往不希望学习率一直保持不变。
### warmup_steps和warmup_start_lr
我们在训练模型时,一般都会使用预训练模型,例如检测模型在训练时使用backbone在ImageNet数据集上的预训练权重。但由于在自行训练时,自己的数据与ImageNet数据集存在较大的差异,可能会一开始由于梯度过大使得训练出现问题,因此可以在刚开始训练时,让学习率以一个较小的值,慢慢增长到设定的学习率。因此`warmup_steps``warmup_start_lr`就是这个作用,模型开始训练时,学习率会从`warmup_start_lr`开始,在`warmup_steps`内线性增长到设定的学习率。
### lr_decay_epochs和lr_decay_gamma
`lr_decay_epochs`用于让学习率在模型训练后期逐步衰减,它一般是一个list,如[6, 8, 10],表示学习率在第6个epoch时衰减一次,第8个epoch时再衰减一次,第10个epoch时再衰减一次。每次学习率衰减为之前的学习率*lr_decay_gamma
### Notice
在PaddleX中,限制warmup需要在第一个学习率decay衰减前结束,因此要满足下面的公式
```
warmup_steps <= lr_decay_epochs[0] * num_steps_each_epoch
```
其中公式中`num_steps_each_epoch = num_samples_in_train_dataset // train_batch_size`
> 因此如若在训练时PaddleX提示`warmup_steps should be less than xxx`时,即可根据上述公式来调整你的`lr_decay_epochs`或者是`warmup_steps`使得两个参数满足上面的条件
> - 图像分类模型 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#train)
> - FasterRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn)
> - YOLOv3 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3)
> - MaskRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn)
> - DeepLab [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p)
> - UNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet)
> - HRNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet)
......@@ -26,13 +26,14 @@ PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习
cv_solutions.md
apis/index.rst
paddlex_gui/index.rst
tuning_strategy/index.rst
update.md
FAQ.md
appendix/index.rst
* PaddleX版本: v1.0.0
* 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex
* 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop
* 项目GitHub: https://github.com/PaddlePaddle/PaddleX
* 官方QQ用户群: 1045148026
* GitHub Issue反馈: http://www.github.com/PaddlePaddle/PaddleX/issues
# PaddleX GUI下载安装
## <a name="2">PaddleX GUI安装</a>
PaddleX GUI是提升项目开发效率的核心模块,开发者可快速完成深度学习模型全流程开发。我们诚挚地邀请您前往 [官网](https://www.paddlepaddle.org.cn/paddle/paddleX)下载试用PaddleX GUI可视化前端,并获得您宝贵的意见或开源项目贡献。
#### <a name="1">安装推荐环境</a>
* **操作系统**
* Windows7/8/10(推荐Windows 10);
* Mac OS 10.13+;
* Ubuntu 18.04+;
***注:处理器需为x86_64架构,支持MKL。***
* **训练硬件**
* **GPU**(仅Windows及Linux系统):
推荐使用支持CUDA的NVIDIA显卡,例如:GTX 1070+以上性能的显卡;
Windows系统X86_64驱动版本>=411.31;
Linux系统X86_64驱动版本>=410.48;
显存8G以上;
* **CPU**
PaddleX当前支持您用本地CPU进行训练,但推荐使用GPU以获得更好的开发体验。
* **内存**:建议8G以上
* **硬盘空间**:建议SSD剩余空间1T以上(非必须)
***注:PaddleX在Windows及Mac OS系统只支持单卡模型。Windows系统暂不支持NCCL。***
# PaddleX GUI如何训练模型
# PaddleX GUI使用文档
飞桨全流程开发工具,集飞桨核心框架、模型库、工具及组件等深度学习开发全流程所需能力于一身,易用易集成,是开发者快速入门深度学习、提升深度学习项目开发效率的最佳辅助工具。
PaddleX GUI是一个应用PaddleX实现的一个图形化开发客户端产品,它使得开发者通过键入式输入即可完成深度学习模型全流程开发,可大幅度提升项目开发效率。飞桨团队期待各位开发者基于PaddleX,实现出各种符合自己产业实际需求的产品。
我们诚挚地邀请您前往 [官网](https://www.paddlepaddle.org.cn/paddlex)下载试用PaddleX GUI,并获得您宝贵的意见或开源项目贡献。
## 目录
* **产品特性**
* **PaddleX GUI可视化前端**
* **FAQ**
## 产品特性
\- **全流程打通**
将深度学习开发全流程打通,并提供可视化开发界面, 省去了对各环节API的熟悉过程及重复的代码开发,极大地提升了开发效率。
\- **易用易集成**
提供功能最全、最灵活的Python API开发模式,完全开源开放,易于集成和二次开发。键入式输入的图形化开发界面,使得非专业算法人员也可快速进行业务POC。
\- **融合产业实践经验**
融合飞桨产业落地经验,精选高质量的视觉模型方案,开放实际的案例教学,手把手带您实现产业需求落地。
\- **教程与服务**
从数据集准备到上线部署,为您提供业务开发全流程的文档说明及技术服务。开发者可以通过QQ群、微信群、GitHub社区等多种形式与飞桨团队及同业合作伙伴交流沟通。
## PaddleX GUI 可视化前端
**第一步:准备数据**
在开始模型训练前,您需要根据不同的任务类型,将数据标注为相应的格式。目前PaddleX支持【图像分类】、【目标检测】、【语义分割】、【实例分割】四种任务类型。不同类型任务的数据处理方式可查看[数据标注方式](https://paddlex.readthedocs.io/zh_CN/latest/appendix/datasets.html)
**第二步:导入我的数据集**
①数据标注完成后,您需要根据不同的任务,将数据和标注文件,按照客户端提示更名并保存到正确的文件中。
②在客户端新建数据集,选择与数据集匹配的任务类型,并选择数据集对应的路径,将数据集导入。
![](images/datasets1.jpg)
③选定导入数据集后,客户端会自动校验数据及标注文件是否合规,校验成功后,您可根据实际需求,将数据集按比例划分为训练集、验证集、测试集。
④您可在「数据分析」模块按规则预览您标注的数据集,双击单张图片可放大查看。
![](images/dataset2.jpg)
**第三步:创建项目**
① 在完成数据导入后,您可以点击「新建项目」创建一个项目。
② 您可根据实际任务需求选择项目的任务类型,需要注意项目所采用的数据集也带有任务类型属性,两者需要进行匹配。
![](images/project3.jpg)
**第四步:项目开发**
**数据选择**:项目创建完成后,您需要选择已载入客户端并校验后的数据集,并点击下一步,进入参数配置页面。
![](images/project1.jpg)
**参数配置**:主要分为**模型参数****训练参数****优化策略**三部分。您可根据实际需求选择模型结构、骨架网络及对应的训练参数、优化策略,使得任务效果最佳。
![](images/project2.jpg)
参数配置完成后,点击启动训练,模型开始训练并进行效果评估。
**训练可视化**:在训练过程中,您可通过VisualDL查看模型训练过程参数变化、日志详情,及当前最优的训练集和验证集训练指标。模型在训练过程中通过点击"中止训练"随时中止训练过程。
![](images/visualization1.jpg)
模型训练结束后,可选择进入『模型剪裁分析』或者直接进入『模型评估』。
![](images/visualization2.jpg)
**模型裁剪**:如果开发者希望减少模型的体积、计算量,提升模型在设备上的预测性能,可以采用PaddleX提供的模型裁剪策略。裁剪过程将对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪,再进行精调训练获得最终裁剪后的模型。
![](images/visualization3.jpg)
**模型评估**:在模型评估页面,您可查看训练后的模型效果。评估方法包括混淆矩阵、精度、召回率等。
![](images/visualization4.jpg)
您还可以选择『数据集切分』时留出的『测试数据集』或从本地文件夹中导入一张/多张图片,将训练后的模型进行测试。根据测试结果,您可决定是否将训练完成的模型保存为预训练模型并进入模型发布页面,或返回先前步骤调整参数配置重新进行训练。
![](images/visualization5.jpg)
**第五步:模型发布**
当模型效果满意后,您可根据实际的生产环境需求,选择将模型发布为需要的版本。
![](images/publish.jpg)
## FAQ
1. **为什么训练速度这么慢?**
PaddleX完全采用您本地的硬件进行计算,深度学习任务确实对算力要求较高,为了使您能快速体验应用PaddleX进行开发,我们适配了CPU硬件,但强烈建议您使用GPU以提升训练速度和开发体验。
2. **我可以在服务器或云平台上部署PaddleX么?**
PaddleX GUI是一个适配本地单机安装的客户端,无法在服务器上直接进行部署,您可以直接使用PaddleX API,或采用飞桨核心框架进行服务器上的部署。如果您希望使用公有算力,强烈建议您尝试飞桨产品系列中的 [EasyDL](https://ai.baidu.com/easydl/)[AI Studio](https://aistudio.baidu.com/aistudio/index)进行开发。
3. **PaddleX支持EasyData标注的数据吗?**
支持,PaddleX可顺畅读取EasyData标注的数据。但当前版本的PaddleX GUI暂时无法支持直接导入EasyData数据格式,您可以参照文档,将[数据集进行转换](https://paddlex.readthedocs.io/zh_CN/latest/appendix/how_to_convert_dataset.html)再导入PaddleX GUI进行后续开发。
同时,我们也在紧密开发PaddleX GUI可直接导入EasyData数据格式的功能。
4. **为什么模型裁剪分析耗时这么长?**
模型裁剪分析过程是对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪。此过程需要重复多次直至FLOPS满足要求,最后再进行精调训练获得最终裁剪后的模型,因此耗时较长。有关模型裁剪的原理,可参见文档[剪裁原理介绍](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)
5. **如何调用后端代码?**
PaddleX 团队为您整理了相关的API接口文档,方便您学习和使用。具体请参见[PaddleX API说明文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/index.html)
**如果您有更多问题或建议,欢迎以issue的形式,或加入PaddleX官方QQ群(1045148026)直接反馈您的问题和需求**
![](images/QR.jpg)
......@@ -21,7 +21,7 @@ PaddleX GUI是基于PaddleX开发实现的可视化模型训练套件,可以
how_to_use.md
xx.md
* PaddleX版本: v0.1.7
* PaddleX GUI版本: v1.0
* 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex
* 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop
* 官方QQ用户群: 1045148026
......
......@@ -61,7 +61,7 @@ eval_dataset = pdx.datasets.ImageNet(
本文档中使用百度基于蒸馏方法得到的MobileNetV3预训练模型,模型结构与MobileNetV3一致,但精度更高。PaddleX内置了20多种分类模型,查阅[PaddleX模型库](appendix/model_zoo.md)了解更多分类模型。
```
num_classes = len(train_dataset.labels)
model.pdx.cls.MobileNetV3_small_ssld(num_classes=num_classes)
model = pdx.cls.MobileNetV3_small_ssld(num_classes=num_classes)
```
### 3.4 定义训练参数
......@@ -86,7 +86,7 @@ python train.py
## 5. 训练过程中查看训练指标
模型在训练过程中,所有的迭代信息将以标注输出流的形式,输出到命令执行的终端上,用户也可通过visualdl以可视化的方式查看训练指标的变化,通过如下方式启动visualdl后,在浏览器打开https://0.0.0.0:8001 (或 https://localhost:8001)即可。
```
visualdl --logdir output/mobilenetv2/vdl_log --port 8000
visualdl --logdir output/mobilenetv2/vdl_log --port 8001
```
![](./images/vdl1.jpg)
......
模型压缩
============================
.. toctree::
:maxdepth: 2
prune.md
quant.md
# 模型裁剪
## 原理介绍
模型裁剪用于减小模型的计算量和体积,可以加快模型部署后的预测速度,是一种减小模型大小和降低模型计算复杂度的常用方式,通过裁剪卷积层中Kernel输出通道的大小及其关联层参数大小来实现,其关联裁剪的原理可参见[PaddleSlim相关文档](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#id16)**一般而言,在同等模型精度前提下,数据复杂度越低,模型可以被裁剪的比例就越高**
## 裁剪方法
PaddleX提供了两种方式:
**1.用户自行计算裁剪配置(推荐),整体流程包含三个步骤,**
> **第一步**: 使用数据集训练原始模型
> **第二步**:利用第一步训练好的模型,在验证数据集上计算模型中各个参数的敏感度,并将敏感度信息存储至本地文件
> **第三步**:使用数据集训练裁剪模型(与第一步差异在于需要在`train`接口中,将第二步计算得到的敏感信息文件传给接口的`sensitivities_file`参数)
> 在如上三个步骤中,**相当于模型共需要训练两遍**,分别对应第一步和第三步,但其中第三步训练的是裁剪后的模型,因此训练速度较第一步会更快。
> 第二步会遍历模型中的部分裁剪参数,分别计算各个参数裁剪后对于模型在验证集上效果的影响,**因此会反复在验证集上评估多次**。
**2.使用PaddleX内置的裁剪方案**
> PaddleX内置的模型裁剪方案是**基于标准数据集**上计算得到的参数敏感度信息,由于不同数据集特征分布会有较大差异,所以该方案相较于第1种方案训练得到的模型**精度一般而言会更低**(**且用户自定义数据集与标准数据集特征分布差异越大,导致训练的模型精度会越低**),仅在用户想节省时间的前提下可以参考使用,使用方式只需一步,
> **一步**: 使用数据集训练裁剪模型,在训练调用`train`接口时,将接口中的`sensitivities_file`参数设置为'DEFAULT'字符串
> 注:各模型内置的裁剪方案分别依据的数据集为: 图像分类——ImageNet数据集、目标检测——PascalVOC数据集、语义分割——CityScape数据集
## 裁剪实验
基于上述两种方案,我们在PaddleX上使用样例数据进行了实验,在Tesla P40上实验指标如下所示,
### 图像分类
实验背景:使用MobileNetV2模型,数据集为蔬菜分类示例数据,见[使用教程-模型压缩-图像分类](../tutorials/compress/classification.md)
| 模型 | 裁剪情况 | 模型大小 | Top1准确率(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- |:----------|
|MobileNetV2 | 无裁剪(原模型)| 13.0M | 97.50|6.47ms |47.44ms |
|MobileNetV2 | 方案一(eval_metric_loss=0.10) | 2.1M | 99.58 |5.03ms |20.22ms |
|MobileNetV2 | 方案二(eval_metric_loss=0.10) | 6.0M | 99.58 |5.42ms |29.06ms |
### 目标检测
实验背景:使用YOLOv3-MobileNetV1模型,数据集为昆虫检测示例数据,见[使用教程-模型压缩-目标检测](../tutorials/compress/detection.md)
| 模型 | 裁剪情况 | 模型大小 | MAP(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- | :---------|
|YOLOv3-MobileNetV1 | 无裁剪(原模型)| 139M | 67.57| 14.88ms |976.42ms |
|YOLOv3-MobileNetV1 | 方案一(eval_metric_loss=0.10) | 34M | 75.49 |10.60ms |558.49ms |
|YOLOv3-MobileNetV1 | 方案二(eval_metric_loss=0.05) | 29M | 50.27| 9.43ms |360.46ms |
### 语义分割
实验背景:使用UNet模型,数据集为视盘分割示例数据, 见[使用教程-模型压缩-语义分割](../tutorials/compress/segmentation.md)
| 模型 | 裁剪情况 | 模型大小 | mIOU(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- | :---------|
|UNet | 无裁剪(原模型)| 77M | 91.22 |33.28ms |9523.55ms |
|UNet | 方案一(eval_metric_loss=0.10) |26M | 90.37 |21.04ms |3936.20ms |
|UNet | 方案二(eval_metric_loss=0.10) |23M | 91.21 |18.61ms |3447.75ms |
# 模型量化
## 原理介绍
为了满足低内存带宽、低功耗、低计算资源占用以及低模型存储等需求,定点量化被提出。为此我们提供了训练后量化,该量化使用KL散度确定量化比例因子,将FP32模型转成INT8模型,且不需要重新训练,可以快速得到量化模型。
## 使用PaddleX量化模型
PaddleX提供了`export_quant_model`接口,让用户以接口的形式完成模型以post_quantization方式量化并导出。点击查看[量化接口使用文档](../apis/slim.md)
## 量化性能对比
模型量化后的性能对比指标请查阅[PaddleSlim模型库](https://paddlepaddle.github.io/PaddleSlim/model_zoo.html)
目标检测
============================
PaddleX针对目标检测任务提供了通过负样本学习降低误检率的策略,用户可根据需求及应用场景使用该策略对模型进行调优。
.. toctree::
:maxdepth: 1
negatives_training.md
# 通过负样本学习降低误检率
## 应用场景
在背景和目标相似的场景下,模型容易把背景误检成目标。为了降低误检率,可以通过负样本学习来降低误检率,即在训练过程中把无目标真值的图片加入训练。
## 效果对比
* 与基准模型相比,通过负样本学习后的模型**mmAP有3.6%的提升,mAP有0.1%的提升**
* 与基准模型相比,通过负样本学习后的模型在背景图片上的图片级别**误检率降低了49.68%**
表1 违禁品验证集上**框级别精度**对比
||mmAP(AP@IoU=0.5:0.95)| mAP (AP@IoU=0.5)|
|:---|:---|:---|
|基准模型 | 45.8% | 83% |
|通过负样本学习后的模型 | 49.4% | 83.1% |
表2 违禁品验证集上**图片级别的召回率**、无违禁品验证集上**图片级别的误检率**对比
||违禁品图片级别的召回率| 无违禁品图片级别的误检率|
|:---|:--------------------|:------------------------|
|基准模型 | 98.97% | 55.27% |
|通过负样本学习后的模型 | 97.75% | 5.59% |
【名词解释】
* 图片级别的召回率:只要在有目标的图片上检测出目标(不论框的个数),该图片被认为召回。批量有目标图片中被召回图片所占的比例,即为图片级别的召回率。
* 图片级别的误检率:只要在无目标的图片上检测出目标(不论框的个数),该图片被认为误检。批量无目标图片中被误检图片所占的比例,即为图片级别的误检率。
## 使用方法
在定义训练所用的数据集之后,使用数据集类的成员函数`add_negative_samples`将无目标真值的背景图片所在路径传入给训练集。代码示例如下:
```
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 定义训练和验证时的transforms
train_transforms = transforms.ComposedRCNNTransforms(
mode='train', min_max_size=[600, 1000])
eval_transforms = transforms.ComposedRCNNTransforms(
mode='eval', min_max_size=[600, 1000])
# 定义训练所用的数据集
train_dataset = pdx.datasets.CocoDetection(
data_dir='jinnan2_round1_train_20190305/restricted/',
ann_file='jinnan2_round1_train_20190305/train.json',
transforms=train_transforms,
shuffle=True,
num_workers=2)
# 训练集中加入无目标背景图片
train_dataset.add_negative_samples(
'jinnan2_round1_train_20190305/normal_train_back/')
# 定义验证所用的数据集
eval_dataset = pdx.datasets.CocoDetection(
data_dir='jinnan2_round1_train_20190305/restricted/',
ann_file='jinnan2_round1_train_20190305/val.json',
transforms=eval_transforms,
num_workers=2)
# 初始化模型,并进行训练
model = pdx.det.FasterRCNN(num_classes=len(train_dataset.labels) + 1)
model.train(
num_epochs=17,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
train_batch_size=8,
learning_rate=0.01,
lr_decay_epochs=[13, 16],
save_dir='./output')
```
## 实验细则
(1) 数据集
我们使用X光违禁品数据集对通过负样本学习降低误检率的策略有效性进行了实验验证。该数据集中背景比较繁杂,很多背景物体与目标物体较为相似。
* 检测铁壳打火机、黑钉打火机 、刀具、电源和电池、剪刀5种违禁品。
* 训练集有883张违禁品图片,验证集有98张违禁品图片。
* 无违禁品的X光图片有2540张。
(2) 基准模型
使用FasterRCNN-ResNet50作为检测模型,除了水平翻转外没有使用其他的数据增强方式,只使用违禁品训练集进行训练。模型在违禁品验证集上的精度见表1,mmAP有45.8%,mAP达到83%。
(3) 通过负样本学习后的模型
把无违禁品的X光图片按1:1分成无违禁品训练集和无违禁品验证集。我们将基准模型在无违禁品验证集进行测试,发现图片级别的误检率高达55.27%。为了降低该误检率,将基准模型在无违禁品训练集进行测试,挑选出被误检图片共663张,将这663张图片加入训练,训练参数配置与基准模型训练时一致。
通过负样本学习后的模型在违禁品验证集上的精度见表1,mmAP有49.4%,mAP达到83.1%。与基准模型相比,**mmAP有3.6%的提升,mAP有0.1%的提升**。通过负样本学习后的模型在无违禁品验证集的误检率仅有5.58%,与基准模型相比,**误检率降低了49.68%**
此外,还测试了两个模型在有违禁品验证集上图片级别的召回率,见表2,与基准模型相比,通过负样本学习后的模型仅漏检了1张图片,召回率几乎是无损的。
PaddleX调优策略介绍
============================
.. toctree::
:maxdepth: 2
detection/index.rst
# 数据准备
## 数据标注
## 主流标注软件支持
## EasyData数据标注支持
该部分内容已迁移至[附录](../appendix/datasets.md)
# 移动端部署
PaddleX的移动端部署由PaddleLite实现,部署的流程如下,首先将训练好的模型导出为inference model,然后使用PaddleLite的python接口对模型进行优化,最后使用PaddleLite的预测库进行部署,
PaddleLite的详细介绍和使用可参考:[PaddleLite文档](https://paddle-lite.readthedocs.io/zh/latest/)
> PaddleX --> Inference Model --> PaddleLite Opt --> PaddleLite Inference
以下介绍如何将PaddleX导出为inference model,然后使用PaddleLite的OPT模块对模型进行优化:
step 1: 安装PaddleLite
```
......@@ -9,14 +16,21 @@ pip install paddlelite
step 2: 将PaddleX模型导出为inference模型
参考[导出inference模型](deploy_server/deploy_python.html#inference)将模型导出为inference格式模型。
**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](./upgrade_version.md)对模型版本进行升级。**
step 3: 将inference模型转换成PaddleLite模型
```
python /path/to/PaddleX/deploy/lite/export_lite.py --model_path /path/to/inference_model --save_dir /path/to/onnx_model
python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/onnx_model --place place/to/run
```
`--model_path`用于指定inference模型的路径,`--save_dir`用于指定Lite模型的保存路径。
| 参数 | 说明 |
| ---- | ---- |
| model_dir | 预测模型所在路径,包含"__model__", "__params__"文件 |
| save_file | 模型输出的名称,默认为"paddlex.nb" |
| place | 运行的平台,可选:arm|opencl|x86|npu|xpu|rknpu|apu |
step 4: 预测
......
......@@ -96,6 +96,17 @@ cmake .. \
make
```
**注意:** linux环境下编译会自动下载OPENCV, PaddleX-Encryption和YAML,如果编译环境无法访问外网,可手动下载:
- [opencv3gcc4.8.tar.bz2](https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2)
- [paddlex-encryption.zip](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)
- [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
opencv3gcc4.8.tar.bz2文件下载后解压,然后在script/build.sh中指定`OPENCE_DIR`为解压后的路径。
paddlex-encryption.zip文件下载后解压,然后在script/build.sh中指定`ENCRYPTION_DIR`为解压后的路径。
yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。
修改脚本设置好主要参数后,执行`build`脚本:
```shell
......@@ -104,7 +115,9 @@ make
### Step5: 预测及可视化
参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。
**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**
> **注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
编译成功后,预测demo的可执行程序分别为`build/demo/detector``build/demo/classifer``build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
......@@ -116,7 +129,7 @@ make
| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
| use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) |
| gpu_id | GPU 设备ID, 默认值为0 |
| save_dir | 保存可视化结果的路径, 默认值为"output",classfier无该参数 |
| save_dir | 保存可视化结果的路径, 默认值为"output",**classfier无该参数** |
## 样例
......
......@@ -86,7 +86,14 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
| OPENCV_DIR | OpenCV的安装路径, |
| PADDLE_DIR | Paddle c++预测库的路径 |
**注意:** 1. 使用`CPU`版预测库,请把`WITH_GPU`的``去掉勾 2. 如果使用的是`openblas`版本,请把`WITH_MKL`的``去掉勾
**注意:**
1. 使用`CPU`版预测库,请把`WITH_GPU`的``去掉勾
2. 如果使用的是`openblas`版本,请把`WITH_MKL`的``去掉勾
3. Windows环境下编译会自动下载YAML,如果编译环境无法访问外网,可手动下载: [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。
![step4](../../images/vs2019_step5.png)
......@@ -99,7 +106,10 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
### Step5: 预测及可视化
参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。
**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**
**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
......@@ -122,7 +132,7 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
## 样例
可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测。
可使用[小度熊识别模型](../deploy_python.md)中导出的`inference_model`和测试图片进行预测。
`样例一`:
......
......@@ -20,6 +20,8 @@ paddlex --export_inference --model_dir=./xiaoduxiong_epoch_12 --save_dir=./infer
```
## 预测部署
**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。**
> 点击下载测试图片 [xiaoduxiong_test_image.tar.gz](https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_test_image.tar.gz)
```
......
......@@ -61,7 +61,7 @@ paddlex-encryption
./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model
```
`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted``__params__.encrypted``model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`**注意**:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。)。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted``__params__.encrypted``model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
![](../images/encrypt.png)
......
# 模型版本升级
由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,用户需要按照以下步骤对模型版本进行转换,转换后的模型可以在多端上完成部署。
## 检查模型版本
存放模型的文件夹存有一个`model.yml`文件,该文件的最后一行`version`值表示模型的版本号,若版本号小于1.0.0,则需要进行版本转换,若版本号大于及等于1.0.0,则不需要进行版本转换。
## 版本转换
```
paddlex --export_inference --model_dir=/path/to/low_version_model --save_dir=SSpath/to/high_version_model
```
`--model_dir`为版本号小于1.0.0的模型路径,可以是PaddleX训练过程保存的模型,也可以是导出为inference格式的模型。`--save_dir`为转换为高版本的模型,后续可用于多端部署。
\ No newline at end of file
# 使用教程——训练模型
本目录下整理了使用PaddleX训练模型的示例代码,代码中均提供了示例数据的自动下载,并均使用单张GPU卡进行训练。
|代码 | 模型任务 | 数据 |
|------|--------|---------|
|classification/mobilenetv2.py | 图像分类MobileNetV2 | 蔬菜分类 |
|classification/resnet50.py | 图像分类ResNet50 | 蔬菜分类 |
|detection/faster_rcnn_r50_fpn.py | 目标检测FasterRCNN | 昆虫检测 |
|detection/mask_rcnn_f50_fpn.py | 实例分割MaskRCNN | 垃圾分拣 |
|segmentation/deeplabv3p.py | 语义分割DeepLabV3| 视盘分割 |
|segmentation/unet.py | 语义分割UNet | 视盘分割 |
|segmentation/hrnet.py | 语义分割HRNet | 视盘分割 |
|segmentation/fast_scnn.py | 语义分割FastSCNN | 视盘分割 |
## 开始训练
在安装PaddleX后,使用如下命令开始训练
```
python classification/mobilenetv2.py
```
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.cls import transforms
import paddlex as pdx
# 下载和解压蔬菜分类数据集
veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
pdx.utils.download_and_decompress(veg_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
train_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/train_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/val_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels))
model.train(
num_epochs=10,
train_dataset=train_dataset,
train_batch_size=32,
eval_dataset=eval_dataset,
lr_decay_epochs=[4, 6, 8],
learning_rate=0.025,
save_dir='output/mobilenetv2',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddle.fluid as fluid
from paddlex.cls import transforms
import paddlex as pdx
# 下载和解压蔬菜分类数据集
veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
pdx.utils.download_and_decompress(veg_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
train_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/train_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/val_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=eval_transforms)
# PaddleX支持自定义构建优化器
step_each_epoch = train_dataset.num_samples // 32
learning_rate = fluid.layers.cosine_decay(
learning_rate=0.025, step_each_epoch=step_each_epoch, epochs=10)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(4e-5))
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels))
model.train(
num_epochs=10,
train_dataset=train_dataset,
train_batch_size=32,
eval_dataset=eval_dataset,
optimizer=optimizer,
save_dir='output/resnet50',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压昆虫检测数据集
insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
pdx.utils.download_and_decompress(insect_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
train_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/train_list.txt',
label_list='insect_det/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/val_list.txt',
label_list='insect_det/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn
num_classes = len(train_dataset.labels) + 1
model = pdx.det.FasterRCNN(num_classes=num_classes)
model.train(
num_epochs=12,
train_dataset=train_dataset,
train_batch_size=2,
eval_dataset=eval_dataset,
learning_rate=0.0025,
lr_decay_epochs=[8, 11],
save_dir='output/faster_rcnn_r50_fpn',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压小度熊分拣数据集
xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz'
pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#cocodetection
train_dataset = pdx.datasets.CocoDetection(
data_dir='xiaoduxiong_ins_det/JPEGImages',
ann_file='xiaoduxiong_ins_det/train.json',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.CocoDetection(
data_dir='xiaoduxiong_ins_det/JPEGImages',
ann_file='xiaoduxiong_ins_det/val.json',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn
num_classes = len(train_dataset.labels) + 1
model = pdx.det.MaskRCNN(num_classes=num_classes)
model.train(
num_epochs=12,
train_dataset=train_dataset,
train_batch_size=1,
eval_dataset=eval_dataset,
learning_rate=0.00125,
warmup_steps=10,
lr_decay_epochs=[8, 11],
save_dir='output/mask_rcnn_r50_fpn',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压昆虫检测数据集
insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
pdx.utils.download_and_decompress(insect_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedyolotransforms
train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[608, 608])
eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eva', shape=[608, 608])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
train_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/train_list.txt',
label_list='insect_det/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/val_list.txt',
label_list='insect_det/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3
num_classes = len(train_dataset.labels)
model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53')
model.train(
num_epochs=270,
train_dataset=train_dataset,
train_batch_size=8,
eval_dataset=eval_dataset,
learning_rate=0.000125,
lr_decay_epochs=[210, 240],
save_dir='output/yolov3_darknet53',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
train_transforms.add_augmenters([
transforms.RandomRotate()
])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p
num_classes = len(train_dataset.labels)
model = pdx.seg.DeepLabv3p(num_classes=num_classes)
model.train(
num_epochs=40,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/deeplab',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(
mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet
num_classes = len(train_dataset.labels)
model = pdx.seg.FastSCNN(num_classes=num_classes)
model.train(
num_epochs=20,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/fastscnn',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet
num_classes = len(train_dataset.labels)
model = pdx.seg.HRNet(num_classes=num_classes)
model.train(
num_epochs=20,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/hrnet',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet
num_classes = len(train_dataset.labels)
model = pdx.seg.UNet(num_classes=num_classes)
model.train(
num_epochs=20,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/unet',
use_vdl=True)
......@@ -53,4 +53,4 @@ log_level = 2
from . import interpret
__version__ = '1.0.2.github'
__version__ = '1.0.6'
......@@ -36,5 +36,7 @@ DenseNet121 = cv.models.DenseNet121
DenseNet161 = cv.models.DenseNet161
DenseNet201 = cv.models.DenseNet201
ShuffleNetV2 = cv.models.ShuffleNetV2
HRNet_W18 = cv.models.HRNet_W18
AlexNet = cv.models.AlexNet
transforms = cv.transforms.cls_transforms
......@@ -128,7 +128,6 @@ class CocoDetection(VOCDetection):
coco_rec = (im_info, label_info)
self.file_list.append([im_fname, coco_rec])
if not len(self.file_list) > 0:
raise Exception('not found any coco record in %s' % (ann_file))
logging.info("{} samples in file {}".format(
......
......@@ -209,8 +209,8 @@ def GenerateMiniBatch(batch_data):
padding_batch = []
for data in batch_data:
im_c, im_h, im_w = data[0].shape[:]
padding_im = np.zeros((im_c, max_shape[1], max_shape[2]),
dtype=np.float32)
padding_im = np.zeros(
(im_c, max_shape[1], max_shape[2]), dtype=np.float32)
padding_im[:, :im_h, :im_w] = data[0]
padding_batch.append((padding_im, ) + data[1:])
return padding_batch
......@@ -226,8 +226,8 @@ class Dataset:
if num_workers == 'auto':
import multiprocessing as mp
num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8
if platform.platform().startswith(
"Darwin") or platform.platform().startswith("Windows"):
if platform.platform().startswith("Darwin") or platform.platform(
).startswith("Windows"):
parallel_method = 'thread'
if transforms is None:
raise Exception("transform should be defined.")
......
......@@ -14,6 +14,7 @@
from __future__ import absolute_import
import copy
import os
import os.path as osp
import random
import re
......@@ -122,9 +123,13 @@ class VOCDetection(Dataset):
is_crowd = np.zeros((len(objs), 1), dtype=np.int32)
difficult = np.zeros((len(objs), 1), dtype=np.int32)
for i, obj in enumerate(objs):
<<<<<<< HEAD
pattern = re.compile('<name>', re.IGNORECASE)
name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
cname = obj.find(name_tag).text.strip()
=======
cname = obj.find('name').text.strip()
>>>>>>> e9b8c938bf48a74eba95458257b512cfcbdbcca3
gt_class[i][0] = cname2cid[cname]
pattern = re.compile('<difficult>', re.IGNORECASE)
diff_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
......@@ -197,6 +202,44 @@ class VOCDetection(Dataset):
self.coco_gt.dataset = annotations
self.coco_gt.createIndex()
def add_negative_samples(self, image_dir):
import cv2
if not osp.exists(image_dir):
raise Exception("{} background images directory does not exist.".
format(image_dir))
image_list = os.listdir(image_dir)
max_img_id = max(self.coco_gt.getImgIds())
for image in image_list:
if not is_pic(image):
continue
# False ground truth
gt_bbox = np.array([[0, 0, 1e-05, 1e-05]], dtype=np.float32)
gt_class = np.array([[0]], dtype=np.int32)
gt_score = np.ones((1, 1), dtype=np.float32)
is_crowd = np.array([[0]], dtype=np.int32)
difficult = np.zeros((1, 1), dtype=np.int32)
gt_poly = [[[0, 0, 0, 1e-05, 1e-05, 1e-05, 1e-05, 0]]]
max_img_id += 1
im_fname = osp.join(image_dir, image)
img_data = cv2.imread(im_fname)
im_h, im_w, im_c = img_data.shape
im_info = {
'im_id': np.array([max_img_id]).astype('int32'),
'image_shape': np.array([im_h, im_w]).astype('int32'),
}
label_info = {
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_score': gt_score,
'difficult': difficult,
'gt_poly': gt_poly
}
coco_rec = (im_info, label_info)
self.file_list.append([im_fname, coco_rec])
self.num_samples = len(self.file_list)
def iterator(self):
self._epoch += 1
self._pos = 0
......
......@@ -34,11 +34,15 @@ from .classifier import DenseNet121
from .classifier import DenseNet161
from .classifier import DenseNet201
from .classifier import ShuffleNetV2
from .classifier import HRNet_W18
from .classifier import AlexNet
from .base import BaseAPI
from .yolo_v3 import YOLOv3
from .faster_rcnn import FasterRCNN
from .mask_rcnn import MaskRCNN
from .unet import UNet
from .deeplabv3p import DeepLabv3p
from .hrnet import HRNet
from .fast_scnn import FastSCNN
from .load_model import load_model
from .slim import prune
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import paddle.fluid as fluid
......@@ -79,9 +79,9 @@ class BaseAPI:
return int(batch_size // len(self.places))
else:
raise Exception("Please support correct batch_size, \
which can be divided by available cards({}) in {}".
format(paddlex.env_info['num'],
paddlex.env_info['place']))
which can be divided by available cards({}) in {}"
.format(paddlex.env_info['num'], paddlex.env_info[
'place']))
def build_program(self):
# 构建训练网络
......@@ -194,12 +194,37 @@ class BaseAPI:
if os.path.exists(pretrain_dir):
os.remove(pretrain_dir)
os.makedirs(pretrain_dir)
if pretrain_weights is not None and not os.path.exists(
pretrain_weights):
if self.model_type == 'classifier':
if pretrain_weights not in ['IMAGENET']:
logging.warning(
"Pretrain_weights for classifier should be defined as directory path or parameter file or 'IMAGENET' or None, but it is {}, so we force to set it as 'IMAGENET'".
format(pretrain_weights))
pretrain_weights = 'IMAGENET'
elif self.model_type == 'detector':
if pretrain_weights not in ['IMAGENET', 'COCO']:
logging.warning(
"Pretrain_weights for detector should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or None, but it is {}, so we force to set it as 'IMAGENET'".
format(pretrain_weights))
pretrain_weights = 'IMAGENET'
elif self.model_type == 'segmenter':
if pretrain_weights not in [
'IMAGENET', 'COCO', 'CITYSCAPES'
]:
logging.warning(
"Pretrain_weights for segmenter should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or 'CITYSCAPES', but it is {}, so we force to set it as 'IMAGENET'".
format(pretrain_weights))
pretrain_weights = 'IMAGENET'
if hasattr(self, 'backbone'):
backbone = self.backbone
else:
backbone = self.__class__.__name__
if backbone == "HRNet":
backbone = backbone + "_W{}".format(self.width)
class_name = self.__class__.__name__
pretrain_weights = get_pretrain_weights(
pretrain_weights, self.model_type, backbone, pretrain_dir)
pretrain_weights, class_name, backbone, pretrain_dir)
if startup_prog is None:
startup_prog = fluid.default_startup_program()
self.exe.run(startup_prog)
......@@ -210,8 +235,8 @@ class BaseAPI:
paddlex.utils.utils.load_pretrain_weights(
self.exe, self.train_prog, resume_checkpoint, resume=True)
if not osp.exists(osp.join(resume_checkpoint, "model.yml")):
raise Exception(
"There's not model.yml in {}".format(resume_checkpoint))
raise Exception("There's not model.yml in {}".format(
resume_checkpoint))
with open(osp.join(resume_checkpoint, "model.yml")) as f:
info = yaml.load(f.read(), Loader=yaml.Loader)
self.completed_epochs = info['completed_epochs']
......@@ -260,6 +285,7 @@ class BaseAPI:
info['_Attributes']['num_classes'] = self.num_classes
info['_Attributes']['labels'] = self.labels
info['_Attributes']['fixed_input_shape'] = self.fixed_input_shape
try:
primary_metric_key = list(self.eval_metrics.keys())[0]
primary_metric_value = float(self.eval_metrics[primary_metric_key])
......@@ -269,13 +295,13 @@ class BaseAPI:
except:
pass
if hasattr(self, 'test_transforms'):
if hasattr(self.test_transforms, 'to_rgb'):
if self.test_transforms.to_rgb:
info['TransformsMode'] = 'RGB'
else:
info['TransformsMode'] = 'BGR'
if hasattr(self, 'test_transforms'):
if self.test_transforms is not None:
info['Transforms'] = list()
for op in self.test_transforms.transforms:
......@@ -362,8 +388,8 @@ class BaseAPI:
# 模型保存成功的标志
open(osp.join(save_dir, '.success'), 'w').close()
logging.info(
"Model for inference deploy saved in {}.".format(save_dir))
logging.info("Model for inference deploy saved in {}.".format(
save_dir))
def train_loop(self,
num_epochs,
......@@ -377,7 +403,8 @@ class BaseAPI:
early_stop=False,
early_stop_patience=5):
if train_dataset.num_samples < train_batch_size:
raise Exception('The amount of training datset must be larger than batch size.')
raise Exception(
'The amount of training datset must be larger than batch size.')
if not osp.isdir(save_dir):
if osp.exists(save_dir):
os.remove(save_dir)
......@@ -415,8 +442,8 @@ class BaseAPI:
build_strategy=build_strategy,
exec_strategy=exec_strategy)
total_num_steps = math.floor(
train_dataset.num_samples / train_batch_size)
total_num_steps = math.floor(train_dataset.num_samples /
train_batch_size)
num_steps = 0
time_stat = list()
time_train_one_epoch = None
......@@ -430,8 +457,8 @@ class BaseAPI:
if self.model_type == 'detector':
eval_batch_size = self._get_single_card_bs(train_batch_size)
if eval_dataset is not None:
total_num_steps_eval = math.ceil(
eval_dataset.num_samples / eval_batch_size)
total_num_steps_eval = math.ceil(eval_dataset.num_samples /
eval_batch_size)
if use_vdl:
# VisualDL component
......@@ -473,7 +500,9 @@ class BaseAPI:
if use_vdl:
for k, v in step_metrics.items():
log_writer.add_scalar('Metrics/Training(Step): {}'.format(k), v, num_steps)
log_writer.add_scalar(
'Metrics/Training(Step): {}'.format(k), v,
num_steps)
# 估算剩余时间
avg_step_time = np.mean(time_stat)
......@@ -481,11 +510,12 @@ class BaseAPI:
eta = (num_epochs - i - 1) * time_train_one_epoch + (
total_num_steps - step - 1) * avg_step_time
else:
eta = ((num_epochs - i) * total_num_steps - step -
1) * avg_step_time
eta = ((num_epochs - i) * total_num_steps - step - 1
) * avg_step_time
if time_eval_one_epoch is not None:
eval_eta = (total_eval_times - i //
save_interval_epochs) * time_eval_one_epoch
eval_eta = (
total_eval_times - i // save_interval_epochs
) * time_eval_one_epoch
else:
eval_eta = (
total_eval_times - i // save_interval_epochs
......@@ -495,16 +525,18 @@ class BaseAPI:
logging.info(
"[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}"
.format(i + 1, num_epochs, step + 1, total_num_steps,
dict2str(step_metrics), round(
avg_step_time, 2), eta_str))
dict2str(step_metrics),
round(avg_step_time, 2), eta_str))
train_metrics = OrderedDict(
zip(list(self.train_outputs.keys()), np.mean(records, axis=0)))
zip(list(self.train_outputs.keys()), np.mean(
records, axis=0)))
logging.info('[TRAIN] Epoch {} finished, {} .'.format(
i + 1, dict2str(train_metrics)))
time_train_one_epoch = time.time() - epoch_start_time
epoch_start_time = time.time()
# 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存
self.completed_epochs += 1
eval_epoch_start_time = time.time()
if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1:
current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1))
......@@ -518,7 +550,6 @@ class BaseAPI:
return_details=True)
logging.info('[EVAL] Finished, Epoch={}, {} .'.format(
i + 1, dict2str(self.eval_metrics)))
self.completed_epochs += 1
# 保存最优模型
best_accuracy_key = list(self.eval_metrics.keys())[0]
current_accuracy = self.eval_metrics[best_accuracy_key]
......@@ -534,7 +565,8 @@ class BaseAPI:
if isinstance(v, np.ndarray):
if v.size > 1:
continue
log_writer.add_scalar("Metrics/Eval(Epoch): {}".format(k), v, i+1)
log_writer.add_scalar(
"Metrics/Eval(Epoch): {}".format(k), v, i + 1)
self.save_model(save_dir=current_save_dir)
time_eval_one_epoch = time.time() - eval_epoch_start_time
eval_epoch_start_time = time.time()
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import numpy as np
......@@ -40,20 +40,21 @@ class BaseClassifier(BaseAPI):
self.init_params = locals()
super(BaseClassifier, self).__init__('classifier')
if not hasattr(paddlex.cv.nets, str.lower(model_name)):
raise Exception(
"ERROR: There's no model named {}.".format(model_name))
raise Exception("ERROR: There's no model named {}.".format(
model_name))
self.model_name = model_name
self.labels = None
self.num_classes = num_classes
self.fixed_input_shape = None
def build_net(self, mode='train'):
if self.__class__.__name__ == "AlexNet":
assert self.fixed_input_shape is not None, "In AlexNet, input_shape should be defined, e.g. model = paddlex.cls.AlexNet(num_classes=1000, input_shape=[224, 224])"
if self.fixed_input_shape is not None:
input_shape = [
None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
]
image = fluid.data(
dtype='float32', shape=input_shape, name='image')
image = fluid.data(dtype='float32', shape=input_shape, name='image')
else:
image = fluid.data(
dtype='float32', shape=[None, 3, None, None], name='image')
......@@ -81,7 +82,8 @@ class BaseClassifier(BaseAPI):
del outputs['loss']
return inputs, outputs
def default_optimizer(self, learning_rate, lr_decay_epochs, lr_decay_gamma,
def default_optimizer(self, learning_rate, warmup_steps, warmup_start_lr,
lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch):
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [
......@@ -90,6 +92,24 @@ class BaseClassifier(BaseAPI):
]
lr_decay = fluid.layers.piecewise_decay(
boundaries=boundaries, values=values)
if warmup_steps > 0:
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
logging.error(
"In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch,
warmup_steps // num_steps_each_epoch))
lr_decay = fluid.layers.linear_lr_warmup(
learning_rate=lr_decay,
warmup_steps=warmup_steps,
start_lr=warmup_start_lr,
end_lr=learning_rate)
optimizer = fluid.optimizer.Momentum(
lr_decay,
momentum=0.9,
......@@ -107,6 +127,8 @@ class BaseClassifier(BaseAPI):
pretrain_weights='IMAGENET',
optimizer=None,
learning_rate=0.025,
warmup_steps=0,
warmup_start_lr=0.0,
lr_decay_epochs=[30, 60, 90],
lr_decay_gamma=0.1,
use_vdl=False,
......@@ -129,6 +151,8 @@ class BaseClassifier(BaseAPI):
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的初始学习率。默认为0.025。
warmup_steps(int): 学习率从warmup_start_lr上升至设定的learning_rate,所需的步数,默认为0
warmup_start_lr(float): 学习率在warmup阶段时的起始值,默认为0.0
lr_decay_epochs (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。
lr_decay_gamma (float): 默认优化器的学习率衰减率。默认为0.1。
use_vdl (bool): 是否使用VisualDL进行可视化。默认值为False。
......@@ -149,6 +173,8 @@ class BaseClassifier(BaseAPI):
num_steps_each_epoch = train_dataset.num_samples // train_batch_size
optimizer = self.default_optimizer(
learning_rate=learning_rate,
warmup_steps=warmup_steps,
warmup_start_lr=warmup_start_lr,
lr_decay_epochs=lr_decay_epochs,
lr_decay_gamma=lr_decay_gamma,
num_steps_each_epoch=num_steps_each_epoch)
......@@ -193,8 +219,7 @@ class BaseClassifier(BaseAPI):
tuple (metrics, eval_details): 当return_details为True时,增加返回dict,
包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。
"""
self.arrange_transforms(
transforms=eval_dataset.transforms, mode='eval')
self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
data_generator = eval_dataset.generator(
batch_size=batch_size, drop_last=False)
k = min(5, self.num_classes)
......@@ -206,9 +231,8 @@ class BaseClassifier(BaseAPI):
self.test_prog).with_data_parallel(
share_vars_from=self.parallel_train_prog)
batch_size_each_gpu = self._get_single_card_bs(batch_size)
logging.info(
"Start to evaluating(total_samples={}, total_steps={})...".format(
eval_dataset.num_samples, total_steps))
logging.info("Start to evaluating(total_samples={}, total_steps={})...".
format(eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data]).astype('float32')
......@@ -218,15 +242,14 @@ class BaseClassifier(BaseAPI):
num_pad_samples = batch_size - num_samples
pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
images = np.concatenate([images, pad_images])
outputs = self.exe.run(
self.parallel_test_prog,
outputs = self.exe.run(self.parallel_test_prog,
feed={'image': images},
fetch_list=list(self.test_outputs.values()))
outputs = [outputs[0][:num_samples]]
true_labels.extend(labels)
pred_scores.extend(outputs[0].tolist())
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
epoch_id, step + 1, total_steps))
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
1, total_steps))
pred_top1_label = np.argsort(pred_scores)[:, -1]
pred_topk_label = np.argsort(pred_scores)[:, -k:]
......@@ -263,10 +286,10 @@ class BaseClassifier(BaseAPI):
self.arrange_transforms(
transforms=self.test_transforms, mode='test')
im = self.test_transforms(img_file)
result = self.exe.run(
self.test_prog,
result = self.exe.run(self.test_prog,
feed={'image': im},
fetch_list=list(self.test_outputs.values()))
fetch_list=list(self.test_outputs.values()),
use_program_cache=True)
pred_label = np.argsort(result[0][0])[::-1][:true_topk]
res = [{
'category_id': l,
......@@ -400,3 +423,16 @@ class ShuffleNetV2(BaseClassifier):
def __init__(self, num_classes=1000):
super(ShuffleNetV2, self).__init__(
model_name='ShuffleNetV2', num_classes=num_classes)
class HRNet_W18(BaseClassifier):
def __init__(self, num_classes=1000):
super(HRNet_W18, self).__init__(
model_name='HRNet_W18', num_classes=num_classes)
class AlexNet(BaseClassifier):
def __init__(self, num_classes=1000, input_shape=None):
super(AlexNet, self).__init__(
model_name='AlexNet', num_classes=num_classes)
self.fixed_input_shape = input_shape
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import os.path as osp
......@@ -190,11 +190,6 @@ class DeepLabv3p(BaseAPI):
if mode == 'train':
self.optimizer.minimize(model_out)
outputs['loss'] = model_out
elif mode == 'eval':
outputs['loss'] = model_out[0]
outputs['pred'] = model_out[1]
outputs['label'] = model_out[2]
outputs['mask'] = model_out[3]
else:
outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1]
......@@ -247,14 +242,16 @@ class DeepLabv3p(BaseAPI):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
save_dir (str): 模型保存路径。默认'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET。
则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为字符串'CITYSCAPES',
则自动下载在CITYSCAPES数据集上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用
fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
learning_rate (float): 默认优化器的初始学习率。默认0.01。
lr_decay_power (float): 默认优化器学习率衰减指数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
......@@ -336,15 +333,24 @@ class DeepLabv3p(BaseAPI):
for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data])
labels = np.array([d[1] for d in data])
_, _, im_h, im_w = images.shape
labels = list()
for d in data:
padding_label = np.zeros(
(1, im_h, im_w)).astype('int64') + self.ignore_index
_, label_h, label_w = d[1].shape
padding_label[:, :label_h, :label_w] = d[1]
labels.append(padding_label)
labels = np.array(labels)
num_samples = images.shape[0]
if num_samples < batch_size:
num_pad_samples = batch_size - num_samples
pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
images = np.concatenate([images, pad_images])
feed_data = {'image': images}
outputs = self.exe.run(
self.parallel_test_prog,
outputs = self.exe.run(self.parallel_test_prog,
feed=feed_data,
fetch_list=list(self.test_outputs.values()),
return_numpy=True)
......@@ -364,8 +370,7 @@ class DeepLabv3p(BaseAPI):
metrics = OrderedDict(
zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'],
[miou, category_iou, macc, category_acc,
conf_mat.kappa()]))
[miou, category_iou, macc, category_acc, conf_mat.kappa()]))
if return_details:
eval_details = {
'confusion_matrix': conf_mat.confusion_matrix.tolist()
......@@ -394,10 +399,10 @@ class DeepLabv3p(BaseAPI):
transforms=self.test_transforms, mode='test')
im, im_info = self.test_transforms(im_file)
im = np.expand_dims(im, axis=0)
result = self.exe.run(
self.test_prog,
result = self.exe.run(self.test_prog,
feed={'image': im},
fetch_list=list(self.test_outputs.values()))
fetch_list=list(self.test_outputs.values()),
use_program_cache=True)
pred = result[0]
pred = np.squeeze(pred).astype('uint8')
logit = result[1]
......@@ -413,6 +418,6 @@ class DeepLabv3p(BaseAPI):
pred = pred[0:h, 0:w]
logit = logit[0:h, 0:w, :]
else:
raise Exception("Unexpected info '{}' in im_info".format(
info[0]))
raise Exception("Unexpected info '{}' in im_info".format(info[
0]))
return {'label_map': pred, 'score_map': logit}
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import paddle.fluid as fluid
import paddlex
from collections import OrderedDict
from .deeplabv3p import DeepLabv3p
class FastSCNN(DeepLabv3p):
"""实现Fast SCNN网络的构建并进行训练、评估、预测和模型导出。
Args:
num_classes (int): 类别数。
use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。
当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为
num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重
自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,
即平时使用的交叉熵损失函数。
ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。默认255。
multi_loss_weight (list): 多分支上的loss权重。默认计算一个分支上的loss,即默认值为[1.0]。
也支持计算两个分支或三个分支上的loss,权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列,
fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重,higher_branch_weight为空间细节分支上的loss权重,
lower_branch_weight为全局上下文分支上的loss权重,若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。
Raises:
ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。
ValueError: class_weight为list, 但长度不等于num_class。
class_weight为str, 但class_weight.low()不等于dynamic。
TypeError: class_weight不为None时,其类型不是list或str。
TypeError: multi_loss_weight不为list。
ValueError: multi_loss_weight为list但长度小于0或者大于3。
"""
def __init__(self,
num_classes=2,
use_bce_loss=False,
use_dice_loss=False,
class_weight=None,
ignore_index=255,
multi_loss_weight=[1.0]):
self.init_params = locals()
super(DeepLabv3p, self).__init__('segmenter')
# dice_loss或bce_loss只适用两类分割中
if num_classes > 2 and (use_bce_loss or use_dice_loss):
raise ValueError(
"dice loss and bce loss is only applicable to binary classfication"
)
if class_weight is not None:
if isinstance(class_weight, list):
if len(class_weight) != num_classes:
raise ValueError(
"Length of class_weight should be equal to number of classes"
)
elif isinstance(class_weight, str):
if class_weight.lower() != 'dynamic':
raise ValueError(
"if class_weight is string, must be dynamic!")
else:
raise TypeError(
'Expect class_weight is a list or string but receive {}'.
format(type(class_weight)))
if not isinstance(multi_loss_weight, list):
raise TypeError(
'Expect multi_loss_weight is a list but receive {}'.format(
type(multi_loss_weight)))
if len(multi_loss_weight) > 3 or len(multi_loss_weight) < 0:
raise ValueError(
"Length of multi_loss_weight should be lower than or equal to 3 but greater than 0."
)
self.num_classes = num_classes
self.use_bce_loss = use_bce_loss
self.use_dice_loss = use_dice_loss
self.class_weight = class_weight
self.multi_loss_weight = multi_loss_weight
self.ignore_index = ignore_index
self.labels = None
self.fixed_input_shape = None
def build_net(self, mode='train'):
model = paddlex.cv.nets.segmentation.FastSCNN(
self.num_classes,
mode=mode,
use_bce_loss=self.use_bce_loss,
use_dice_loss=self.use_dice_loss,
class_weight=self.class_weight,
ignore_index=self.ignore_index,
multi_loss_weight=self.multi_loss_weight,
fixed_input_shape=self.fixed_input_shape)
inputs = model.generate_inputs()
model_out = model.build_net(inputs)
outputs = OrderedDict()
if mode == 'train':
self.optimizer.minimize(model_out)
outputs['loss'] = model_out
else:
outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1]
return inputs, outputs
def train(self,
num_epochs,
train_dataset,
train_batch_size=2,
eval_dataset=None,
save_interval_epochs=1,
log_interval_steps=2,
save_dir='output',
pretrain_weights='CITYSCAPES',
optimizer=None,
learning_rate=0.01,
lr_decay_power=0.9,
use_vdl=False,
sensitivities_file=None,
eval_metric_loss=0.05,
early_stop=False,
early_stop_patience=5,
resume_checkpoint=None):
"""训练。
Args:
num_epochs (int): 训练迭代轮数。
train_dataset (paddlex.datasets): 训练数据读取器。
train_batch_size (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
eval_dataset (paddlex.datasets): 评估数据读取器。
save_interval_epochs (int): 模型保存间隔(单位:迭代轮数)。默认为1。
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
save_dir (str): 模型保存路径。默认'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'CITYSCAPES'
则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'CITYSCAPES'。
optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用
fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
learning_rate (float): 默认优化器的初始学习率。默认0.01。
lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
连续下降或持平,则终止训练。默认值为5。
resume_checkpoint (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
Raises:
ValueError: 模型从inference model进行加载。
"""
return super(FastSCNN, self).train(
num_epochs, train_dataset, train_batch_size, eval_dataset,
save_interval_epochs, log_interval_steps, save_dir,
pretrain_weights, optimizer, learning_rate, lr_decay_power,
use_vdl, sensitivities_file, eval_metric_loss, early_stop,
early_stop_patience, resume_checkpoint)
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import math
......@@ -32,7 +32,7 @@ class FasterRCNN(BaseAPI):
Args:
num_classes (int): 包含了背景类的类别数。默认为81。
backbone (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50',
'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。
'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
with_fpn (bool): 是否使用FPN结构。默认为True。
aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
......@@ -47,7 +47,8 @@ class FasterRCNN(BaseAPI):
self.init_params = locals()
super(FasterRCNN, self).__init__('detector')
backbones = [
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd'
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd',
'HRNet_W18'
]
assert backbone in backbones, "backbone should be one of {}".format(
backbones)
......@@ -79,6 +80,12 @@ class FasterRCNN(BaseAPI):
layers = 101
variant = 'd'
norm_type = 'affine_channel'
elif backbone_name == 'HRNet_W18':
backbone = paddlex.cv.nets.hrnet.HRNet(
width=18, freeze_norm=True, norm_decay=0., freeze_at=0)
if self.with_fpn is False:
self.with_fpn = True
return backbone
if self.with_fpn:
backbone = paddlex.cv.nets.resnet.ResNet(
norm_type='bn' if norm_type is None else norm_type,
......@@ -117,12 +124,12 @@ class FasterRCNN(BaseAPI):
model_out = model.build_net(inputs)
loss = model_out['loss']
self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']),
outputs = OrderedDict(
[('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']),
('loss_rpn_cls', model_out['loss_rpn_cls']),
('loss_rpn_bbox',
model_out['loss_rpn_bbox'])])
('loss_rpn_cls', model_out['loss_rpn_cls']), (
'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
else:
outputs = model.build_net(inputs)
return inputs, outputs
......@@ -131,8 +138,16 @@ class FasterRCNN(BaseAPI):
lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch):
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
raise Exception("warmup_steps should less than {}".format(
lr_decay_epochs[0] * num_steps_each_epoch))
logging.error(
"In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
// num_steps_each_epoch))
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [(lr_decay_gamma**i) * learning_rate
for i in range(len(lr_decay_epochs) + 1)]
......@@ -181,7 +196,8 @@ class FasterRCNN(BaseAPI):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。
save_dir (str): 模型保存路径。默认值为'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的初始学习率。默认为0.0025。
......@@ -227,7 +243,9 @@ class FasterRCNN(BaseAPI):
# 构建训练、验证、测试网络
self.build_program()
fuse_bn = True
if self.with_fpn and self.backbone in ['ResNet18', 'ResNet50']:
if self.with_fpn and self.backbone in [
'ResNet18', 'ResNet50', 'HRNet_W18'
]:
fuse_bn = False
self.net_initialize(
startup_prog=fluid.default_startup_program(),
......@@ -273,8 +291,7 @@ class FasterRCNN(BaseAPI):
eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
"""
self.arrange_transforms(
transforms=eval_dataset.transforms, mode='eval')
self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
if metric is None:
if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric
......@@ -293,14 +310,12 @@ class FasterRCNN(BaseAPI):
logging.warning(
"Faster RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1."
)
dataset = eval_dataset.generator(
batch_size=batch_size, drop_last=False)
dataset = eval_dataset.generator(batch_size=batch_size, drop_last=False)
total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
results = list()
logging.info(
"Start to evaluating(total_samples={}, total_steps={})...".format(
eval_dataset.num_samples, total_steps))
logging.info("Start to evaluating(total_samples={}, total_steps={})...".
format(eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm(enumerate(dataset()), total=total_steps):
images = np.array([d[0] for d in data]).astype('float32')
im_infos = np.array([d[1] for d in data]).astype('float32')
......@@ -310,8 +325,7 @@ class FasterRCNN(BaseAPI):
'im_info': im_infos,
'im_shape': im_shapes,
}
outputs = self.exe.run(
self.test_prog,
outputs = self.exe.run(self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
......@@ -339,13 +353,13 @@ class FasterRCNN(BaseAPI):
res['is_difficult'] = (np.array(res_is_difficult),
[res_is_difficult_lod])
results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
epoch_id, step + 1, total_steps))
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
1, total_steps))
box_ap_stats, eval_details = eval_results(
results, metric, eval_dataset.coco_gt, with_background=True)
metrics = OrderedDict(
zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'],
box_ap_stats))
zip(['bbox_mmap'
if metric == 'COCO' else 'bbox_map'], box_ap_stats))
if return_details:
return metrics, eval_details
return metrics
......@@ -359,7 +373,8 @@ class FasterRCNN(BaseAPI):
Returns:
list: 预测结果列表,每个预测结果由预测框类别标签、
预测框类别名称、预测框坐标、预测框得分组成。
预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
预测框得分组成。
"""
if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.")
......@@ -373,15 +388,15 @@ class FasterRCNN(BaseAPI):
im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run(
self.test_prog,
outputs = self.exe.run(self.test_prog,
feed={
'image': im,
'im_info': im_resize_info,
'im_shape': im_shape
},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
return_numpy=False,
use_program_cache=True)
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs)
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import paddle.fluid as fluid
import paddlex
from collections import OrderedDict
from .deeplabv3p import DeepLabv3p
class HRNet(DeepLabv3p):
"""实现HRNet网络的构建并进行训练、评估、预测和模型导出。
Args:
num_classes (int): 类别数。
width (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。
use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。
当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为
num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重
自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,
即平时使用的交叉熵损失函数。
ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。默认255。
Raises:
ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。
ValueError: class_weight为list, 但长度不等于num_class。
class_weight为str, 但class_weight.low()不等于dynamic。
TypeError: class_weight不为None时,其类型不是list或str。
"""
def __init__(self,
num_classes=2,
width=18,
use_bce_loss=False,
use_dice_loss=False,
class_weight=None,
ignore_index=255):
self.init_params = locals()
super(DeepLabv3p, self).__init__('segmenter')
# dice_loss或bce_loss只适用两类分割中
if num_classes > 2 and (use_bce_loss or use_dice_loss):
raise ValueError(
"dice loss and bce loss is only applicable to binary classfication"
)
if class_weight is not None:
if isinstance(class_weight, list):
if len(class_weight) != num_classes:
raise ValueError(
"Length of class_weight should be equal to number of classes"
)
elif isinstance(class_weight, str):
if class_weight.lower() != 'dynamic':
raise ValueError(
"if class_weight is string, must be dynamic!")
else:
raise TypeError(
'Expect class_weight is a list or string but receive {}'.
format(type(class_weight)))
self.num_classes = num_classes
self.width = width
self.use_bce_loss = use_bce_loss
self.use_dice_loss = use_dice_loss
self.class_weight = class_weight
self.ignore_index = ignore_index
self.labels = None
self.fixed_input_shape = None
def build_net(self, mode='train'):
model = paddlex.cv.nets.segmentation.HRNet(
self.num_classes,
width=self.width,
mode=mode,
use_bce_loss=self.use_bce_loss,
use_dice_loss=self.use_dice_loss,
class_weight=self.class_weight,
ignore_index=self.ignore_index,
fixed_input_shape=self.fixed_input_shape)
inputs = model.generate_inputs()
model_out = model.build_net(inputs)
outputs = OrderedDict()
if mode == 'train':
self.optimizer.minimize(model_out)
outputs['loss'] = model_out
else:
outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1]
return inputs, outputs
def default_optimizer(self,
learning_rate,
num_epochs,
num_steps_each_epoch,
lr_decay_power=0.9):
decay_step = num_epochs * num_steps_each_epoch
lr_decay = fluid.layers.polynomial_decay(
learning_rate,
decay_step,
end_learning_rate=0,
power=lr_decay_power)
optimizer = fluid.optimizer.Momentum(
lr_decay,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(
regularization_coeff=5e-04))
return optimizer
def train(self,
num_epochs,
train_dataset,
train_batch_size=2,
eval_dataset=None,
save_interval_epochs=1,
log_interval_steps=2,
save_dir='output',
pretrain_weights='IMAGENET',
optimizer=None,
learning_rate=0.01,
lr_decay_power=0.9,
use_vdl=False,
sensitivities_file=None,
eval_metric_loss=0.05,
early_stop=False,
early_stop_patience=5,
resume_checkpoint=None):
"""训练。
Args:
num_epochs (int): 训练迭代轮数。
train_dataset (paddlex.datasets): 训练数据读取器。
train_batch_size (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
eval_dataset (paddlex.datasets): 评估数据读取器。
save_interval_epochs (int): 模型保存间隔(单位:迭代轮数)。默认为1。
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
save_dir (str): 模型保存路径。默认'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在IMAGENET图片数据上预训练的模型权重;若为字符串'CITYSCAPES'
则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用
fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
learning_rate (float): 默认优化器的初始学习率。默认0.01。
lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
连续下降或持平,则终止训练。默认值为5。
resume_checkpoint (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
Raises:
ValueError: 模型从inference model进行加载。
"""
return super(HRNet, self).train(
num_epochs, train_dataset, train_batch_size, eval_dataset,
save_interval_epochs, log_interval_steps, save_dir,
pretrain_weights, optimizer, learning_rate, lr_decay_power, use_vdl,
sensitivities_file, eval_metric_loss, early_stop,
early_stop_patience, resume_checkpoint)
......@@ -41,7 +41,16 @@ def load_model(model_dir, fixed_input_shape=None):
if 'model_name' in info['_init_params']:
del info['_init_params']['model_name']
model = getattr(paddlex.cv.models, info['Model'])(**info['_init_params'])
model.fixed_input_shape = fixed_input_shape
if '_Attributes' in info:
if 'fixed_input_shape' in info['_Attributes']:
fixed_input_shape = info['_Attributes']['fixed_input_shape']
if fixed_input_shape is not None:
logging.info("Model already has fixed_input_shape with {}".
format(fixed_input_shape))
model.fixed_input_shape = fixed_input_shape
if status == "Normal" or \
status == "Prune" or status == "fluid.save":
startup_prog = fluid.Program()
......@@ -88,8 +97,8 @@ def load_model(model_dir, fixed_input_shape=None):
model.model_type, info['Transforms'], info['BatchTransforms'])
model.eval_transforms = copy.deepcopy(model.test_transforms)
else:
model.test_transforms = build_transforms(
model.model_type, info['Transforms'], to_rgb)
model.test_transforms = build_transforms(model.model_type,
info['Transforms'], to_rgb)
model.eval_transforms = copy.deepcopy(model.test_transforms)
if '_Attributes' in info:
......@@ -107,20 +116,7 @@ def fix_input_shape(info, fixed_input_shape=None):
resize = {'ResizeByShort': {}}
padding = {'Padding': {}}
if info['_Attributes']['model_type'] == 'classifier':
crop_size = 0
for transform in info['Transforms']:
if 'CenterCrop' in transform:
crop_size = transform['CenterCrop']['crop_size']
break
assert crop_size == fixed_input_shape[
0], "fixed_input_shape must == CenterCrop:crop_size:{}".format(
crop_size)
assert crop_size == fixed_input_shape[
1], "fixed_input_shape must == CenterCrop:crop_size:{}".format(
crop_size)
if crop_size == 0:
logging.warning(
"fixed_input_shape must == input shape when trainning")
pass
else:
resize['ResizeByShort']['short_size'] = min(fixed_input_shape)
resize['ResizeByShort']['max_size'] = max(fixed_input_shape)
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import math
......@@ -32,7 +32,7 @@ class MaskRCNN(FasterRCNN):
Args:
num_classes (int): 包含了背景类的类别数。默认为81。
backbone (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50',
'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。
'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
with_fpn (bool): 是否使用FPN结构。默认为True。
aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
......@@ -46,7 +46,8 @@ class MaskRCNN(FasterRCNN):
anchor_sizes=[32, 64, 128, 256, 512]):
self.init_params = locals()
backbones = [
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd'
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd',
'HRNet_W18'
]
assert backbone in backbones, "backbone should be one of {}".format(
backbones)
......@@ -81,13 +82,13 @@ class MaskRCNN(FasterRCNN):
model_out = model.build_net(inputs)
loss = model_out['loss']
self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']),
outputs = OrderedDict(
[('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']),
('loss_mask', model_out['loss_mask']),
('loss_rpn_cls', model_out['loss_rpn_cls']),
('loss_rpn_bbox',
model_out['loss_rpn_bbox'])])
('loss_rpn_cls', model_out['loss_rpn_cls']), (
'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
else:
outputs = model.build_net(inputs)
return inputs, outputs
......@@ -96,8 +97,16 @@ class MaskRCNN(FasterRCNN):
lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch):
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
raise Exception("warmup_step should less than {}".format(
lr_decay_epochs[0] * num_steps_each_epoch))
logging.error(
"In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
// num_steps_each_epoch))
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [(lr_decay_gamma**i) * learning_rate
for i in range(len(lr_decay_epochs) + 1)]
......@@ -146,7 +155,8 @@ class MaskRCNN(FasterRCNN):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。
save_dir (str): 模型保存路径。默认值为'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的学习率。默认为1.0/800。
......@@ -194,7 +204,9 @@ class MaskRCNN(FasterRCNN):
# 构建训练、验证、测试网络
self.build_program()
fuse_bn = True
if self.with_fpn and self.backbone in ['ResNet18', 'ResNet50']:
if self.with_fpn and self.backbone in [
'ResNet18', 'ResNet50', 'HRNet_W18'
]:
fuse_bn = False
self.net_initialize(
startup_prog=fluid.default_startup_program(),
......@@ -241,8 +253,7 @@ class MaskRCNN(FasterRCNN):
预测框坐标、预测框得分;'mask',对应元素预测区域结果列表,每个预测结果由图像id、
预测区域类别id、预测区域坐标、预测区域得分;’gt‘:真实标注框和标注区域相关信息。
"""
self.arrange_transforms(
transforms=eval_dataset.transforms, mode='eval')
self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
if metric is None:
if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric
......@@ -263,9 +274,8 @@ class MaskRCNN(FasterRCNN):
total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
results = list()
logging.info(
"Start to evaluating(total_samples={}, total_steps={})...".format(
eval_dataset.num_samples, total_steps))
logging.info("Start to evaluating(total_samples={}, total_steps={})...".
format(eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data]).astype('float32')
......@@ -276,8 +286,7 @@ class MaskRCNN(FasterRCNN):
'im_info': im_infos,
'im_shape': im_shapes,
}
outputs = self.exe.run(
self.test_prog,
outputs = self.exe.run(self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
......@@ -292,8 +301,8 @@ class MaskRCNN(FasterRCNN):
res['im_shape'] = (im_shapes, [])
res['im_id'] = (np.array(res_im_id), [])
results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
epoch_id, step + 1, total_steps))
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
1, total_steps))
ap_stats, eval_details = eval_results(
results,
......@@ -302,17 +311,16 @@ class MaskRCNN(FasterRCNN):
with_background=True,
resolution=self.mask_head_resolution)
if metric == 'VOC':
if isinstance(ap_stats[0], np.ndarray) and isinstance(
ap_stats[1], np.ndarray):
if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
np.ndarray):
metrics = OrderedDict(
zip(['bbox_map', 'segm_map'],
[ap_stats[0][1], ap_stats[1][1]]))
else:
metrics = OrderedDict(
zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
metrics = OrderedDict(zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
elif metric == 'COCO':
if isinstance(ap_stats[0], np.ndarray) and isinstance(
ap_stats[1], np.ndarray):
if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
np.ndarray):
metrics = OrderedDict(
zip(['bbox_mmap', 'segm_mmap'],
[ap_stats[0][0], ap_stats[1][0]]))
......@@ -331,7 +339,9 @@ class MaskRCNN(FasterRCNN):
transforms (paddlex.det.transforms): 数据预处理操作。
Returns:
dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、预测框坐标、预测框内的二值图、
dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、
预测框坐标(坐标格式为[xmin, ymin, w, h])、
原图大小的预测二值图(1表示预测框类别,0表示背景类)、
预测框得分组成。
"""
if transforms is None and not hasattr(self, 'test_transforms'):
......@@ -346,15 +356,15 @@ class MaskRCNN(FasterRCNN):
im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run(
self.test_prog,
outputs = self.exe.run(self.test_prog,
feed={
'image': im,
'im_info': im_resize_info,
'im_shape': im_shape
},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
return_numpy=False,
use_program_cache=True)
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs)
......@@ -368,8 +378,8 @@ class MaskRCNN(FasterRCNN):
import pycocotools.mask as mask_util
for index, xywh_res in enumerate(xywh_results):
del xywh_res['image_id']
xywh_res['mask'] = mask_util.decode(
segm_results[index]['segmentation'])
xywh_res['mask'] = mask_util.decode(segm_results[index][
'segmentation'])
xywh_res['category'] = self.labels[xywh_res['category_id']]
results.append(xywh_res)
return results
......@@ -66,16 +66,15 @@ def sensitivity(program,
progress = "%.2f%%" % (progress * 100)
logging.info(
"Total evaluate iters={}, current={}, progress={}, eta={}".
format(
total_evaluate_iters, current_iter, progress,
format(total_evaluate_iters, current_iter, progress,
seconds_to_hms(
int(cost * (total_evaluate_iters - current_iter)))),
use_color=True)
current_iter += 1
pruner = Pruner()
logging.info("sensitive - param: {}; ratios: {}".format(
name, ratio))
logging.info("sensitive - param: {}; ratios: {}".format(name,
ratio))
pruned_program, param_backup, _ = pruner.prune(
program=graph.program,
scope=scope,
......@@ -87,8 +86,8 @@ def sensitivity(program,
param_backup=True)
pruned_metric = eval_func(pruned_program)
loss = (baseline - pruned_metric) / baseline
logging.info("pruned param: {}; {}; loss={}".format(
name, ratio, loss))
logging.info("pruned param: {}; {}; loss={}".format(name, ratio,
loss))
sensitivities[name][ratio] = loss
......@@ -116,6 +115,21 @@ def channel_prune(program, prune_names, prune_ratios, place, only_graph=False):
Returns:
paddle.fluid.Program: 裁剪后的Program。
"""
prog_var_shape_dict = {}
for var in program.list_vars():
try:
prog_var_shape_dict[var.name] = var.shape
except Exception:
pass
index = 0
for param, ratio in zip(prune_names, prune_ratios):
origin_num = prog_var_shape_dict[param][0]
pruned_num = int(round(origin_num * ratio))
while origin_num == pruned_num:
ratio -= 0.1
pruned_num = int(round(origin_num * (ratio)))
prune_ratios[index] = ratio
index += 1
scope = fluid.global_scope()
pruner = Pruner()
program, _, _ = pruner.prune(
......@@ -221,6 +235,9 @@ def cal_params_sensitivities(model, save_file, eval_dataset, batch_size=8):
其中``weight_0``是卷积Kernel名;``sensitivities['weight_0']``是一个字典,key是裁剪率,value是敏感度。
"""
if os.path.exists(save_file):
os.remove(save_file)
prune_names = get_prune_params(model)
def eval_for_prune(program):
......@@ -284,6 +301,19 @@ def cal_model_size(program, place, sensitivities_file, eval_metric_loss=0.05):
"""
prune_params_ratios = get_params_ratios(sensitivities_file,
eval_metric_loss)
prog_var_shape_dict = {}
for var in program.list_vars():
try:
prog_var_shape_dict[var.name] = var.shape
except Exception:
pass
for param, ratio in prune_params_ratios.items():
origin_num = prog_var_shape_dict[param][0]
pruned_num = int(round(origin_num * ratio))
while origin_num == pruned_num:
ratio -= 0.1
pruned_num = int(round(origin_num * (ratio)))
prune_params_ratios[param] = ratio
prune_program = channel_prune(
program,
list(prune_params_ratios.keys()),
......
......@@ -142,13 +142,16 @@ def get_prune_params(model):
program = model.test_prog
if model_type.startswith('ResNet') or \
model_type.startswith('DenseNet') or \
model_type.startswith('DarkNet'):
model_type.startswith('DarkNet') or \
model_type.startswith('AlexNet'):
for block in program.blocks:
for param in block.all_parameters():
pd_var = fluid.global_scope().find_var(param.name)
pd_param = pd_var.get_tensor()
if len(np.array(pd_param).shape) == 4:
prune_names.append(param.name)
if model_type == 'AlexNet':
prune_names.remove('conv5_weights')
elif model_type == "MobileNetV1":
prune_names.append("conv1_weights")
for param in program.global_block().all_parameters():
......@@ -162,7 +165,7 @@ def get_prune_params(model):
continue
prune_names.append(param.name)
elif model_type.startswith("MobileNetV3"):
if model_type == 'MobileNetV3_small':
if model_type.startswith('MobileNetV3_small'):
expand_prune_id = [3, 4]
else:
expand_prune_id = [2, 3, 4, 8, 9, 11]
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import paddlex
......@@ -95,11 +95,6 @@ class UNet(DeepLabv3p):
if mode == 'train':
self.optimizer.minimize(model_out)
outputs['loss'] = model_out
elif mode == 'eval':
outputs['loss'] = model_out[0]
outputs['pred'] = model_out[1]
outputs['label'] = model_out[2]
outputs['mask'] = model_out[3]
else:
outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1]
......@@ -141,7 +136,7 @@ class UNet(DeepLabv3p):
lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
......
import paddlex
import paddlex.utils.logging as logging
import paddlehub as hub
import os
import os.path as osp
......@@ -56,19 +57,120 @@ image_pretrain = {
'https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar',
'ShuffleNetV2':
'https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar',
'HRNet_W18':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar',
'HRNet_W30':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W30_C_pretrained.tar',
'HRNet_W32':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W32_C_pretrained.tar',
'HRNet_W40':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W40_C_pretrained.tar',
'HRNet_W48':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W48_C_pretrained.tar',
'HRNet_W60':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W60_C_pretrained.tar',
'HRNet_W64':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W64_C_pretrained.tar',
'AlexNet':
'http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar'
}
coco_pretrain = {
'UNet': 'https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz'
'YOLOv3_DarkNet53_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar',
'YOLOv3_MobileNetV1_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar',
'YOLOv3_MobileNetV3_large_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams',
'YOLOv3_ResNet34_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar',
'YOLOv3_ResNet50_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn.tar',
'FasterRCNN_ResNet50_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_2x.tar',
'FasterRCNN_ResNet50_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar',
'FasterRCNN_ResNet101_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_2x.tar',
'FasterRCNN_ResNet101_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar',
'FasterRCNN_HRNet_W18_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_2x.tar',
'MaskRCNN_ResNet50_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_2x.tar',
'MaskRCNN_ResNet50_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_vd_fpn_2x.tar',
'MaskRCNN_ResNet101_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_fpn_1x.tar',
'MaskRCNN_ResNet101_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_vd_fpn_1x.tar',
'UNet_COCO': 'https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz',
'DeepLabv3p_MobileNetV2_x1.0_COCO':
'https://bj.bcebos.com/v1/paddleseg/deeplab_mobilenet_x1_0_coco.tgz',
'DeepLabv3p_Xception65_COCO':
'https://paddleseg.bj.bcebos.com/models/xception65_coco.tgz'
}
cityscapes_pretrain = {
'DeepLabv3p_MobileNetV2_x1.0_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz',
'DeepLabv3p_Xception65_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz',
'HRNet_W18_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz',
'FastSCNN_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar'
}
def get_pretrain_weights(flag, model_type, backbone, save_dir):
def get_pretrain_weights(flag, class_name, backbone, save_dir):
if flag is None:
return None
elif osp.isdir(flag):
return flag
elif osp.isfile(flag):
return flag
warning_info = "{} does not support to be finetuned with weights pretrained on the {} dataset, so pretrain_weights is forced to be set to {}"
if flag == 'COCO':
if class_name == "FasterRCNN" and backbone in ['ResNet18'] or \
class_name == "MaskRCNN" and backbone in ['ResNet18', 'HRNet_W18'] or \
class_name == 'DeepLabv3p' and backbone in ['Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0']:
model_name = '{}_{}'.format(class_name, backbone)
logging.warning(warning_info.format(model_name, flag, 'IMAGENET'))
flag = 'IMAGENET'
elif class_name == 'HRNet':
logging.warning(warning_info.format(class_name, flag, 'IMAGENET'))
flag = 'IMAGENET'
elif class_name == 'FastSCNN':
logging.warning(
warning_info.format(class_name, flag, 'CITYSCAPES'))
flag = 'CITYSCAPES'
elif flag == 'CITYSCAPES':
model_name = '{}_{}'.format(class_name, backbone)
if class_name == 'UNet':
logging.warning(warning_info.format(class_name, flag, 'COCO'))
flag = 'COCO'
if class_name == 'HRNet' and backbone.split('_')[
-1] in ['W30', 'W32', 'W40', 'W48', 'W60', 'W64']:
logging.warning(warning_info.format(backbone, flag, 'IMAGENET'))
flag = 'IMAGENET'
if class_name == 'DeepLabv3p' and backbone in [
'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5',
'MobileNetV2_x1.5', 'MobileNetV2_x2.0'
]:
model_name = '{}_{}'.format(class_name, backbone)
logging.warning(warning_info.format(model_name, flag, 'IMAGENET'))
flag = 'IMAGENET'
elif flag == 'IMAGENET':
if class_name == 'UNet':
logging.warning(warning_info.format(class_name, flag, 'COCO'))
flag = 'COCO'
elif class_name == 'FastSCNN':
logging.warning(
warning_info.format(class_name, flag, 'CITYSCAPES'))
flag = 'CITYSCAPES'
if flag == 'IMAGENET':
new_save_dir = save_dir
if hasattr(paddlex, 'pretrain_dir'):
new_save_dir = paddlex.pretrain_dir
......@@ -80,11 +182,13 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
backbone = 'MobileNetV3_small_x1_0_ssld'
elif backbone == 'MobileNetV3_large_ssld':
backbone = 'MobileNetV3_large_x1_0_ssld'
if model_type == 'detector':
if class_name in ['YOLOv3', 'FasterRCNN', 'MaskRCNN']:
if backbone == 'ResNet50':
backbone = 'DetResNet50'
assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format(
backbone)
# if backbone == 'AlexNet':
# url = image_pretrain[backbone]
# fname = osp.split(url)[-1].split('.')[0]
# paddlex.utils.download_and_decompress(url, path=new_save_dir)
......@@ -103,17 +207,20 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
raise Exception(
"Unexpected error, please make sure paddlehub >= 1.6.2")
return osp.join(new_save_dir, backbone)
elif flag == 'COCO':
elif flag in ['COCO', 'CITYSCAPES']:
new_save_dir = save_dir
if hasattr(paddlex, 'pretrain_dir'):
new_save_dir = paddlex.pretrain_dir
if class_name in ['YOLOv3', 'FasterRCNN', 'MaskRCNN', 'DeepLabv3p']:
backbone = '{}_{}'.format(class_name, backbone)
backbone = "{}_{}".format(backbone, flag)
if flag == 'COCO':
url = coco_pretrain[backbone]
elif flag == 'CITYSCAPES':
url = cityscapes_pretrain[backbone]
fname = osp.split(url)[-1].split('.')[0]
# paddlex.utils.download_and_decompress(url, path=new_save_dir)
# return osp.join(new_save_dir, fname)
assert backbone in coco_pretrain, "There is not COCO pretrain weights for {}, you may try ImageNet.".format(
backbone)
try:
hub.download(backbone, save_path=new_save_dir)
except Exception as e:
......@@ -130,5 +237,5 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
return osp.join(new_save_dir, backbone)
else:
raise Exception(
"pretrain_weights need to be defined as directory path or `IMAGENET` or 'COCO' (download pretrain weights automatically)."
"pretrain_weights need to be defined as directory path or 'IMAGENET' or 'COCO' or 'Cityscapes' (download pretrain weights automatically)."
)
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*
import os
import cv2
import colorsys
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import math
......@@ -128,8 +128,16 @@ class YOLOv3(BaseAPI):
lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch):
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
raise Exception("warmup_steps should less than {}".format(
lr_decay_epochs[0] * num_steps_each_epoch))
logging.error(
"In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
// num_steps_each_epoch))
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [(lr_decay_gamma**i) * learning_rate
for i in range(len(lr_decay_epochs) + 1)]
......@@ -180,7 +188,8 @@ class YOLOv3(BaseAPI):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为10。
save_dir (str): 模型保存路径。默认值为'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的学习率。默认为1.0/8000。
......@@ -277,8 +286,7 @@ class YOLOv3(BaseAPI):
eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
"""
self.arrange_transforms(
transforms=eval_dataset.transforms, mode='eval')
self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
if metric is None:
if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric
......@@ -298,16 +306,14 @@ class YOLOv3(BaseAPI):
data_generator = eval_dataset.generator(
batch_size=batch_size, drop_last=False)
logging.info(
"Start to evaluating(total_samples={}, total_steps={})...".format(
eval_dataset.num_samples, total_steps))
logging.info("Start to evaluating(total_samples={}, total_steps={})...".
format(eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data])
im_sizes = np.array([d[1] for d in data])
feed_data = {'image': images, 'im_size': im_sizes}
outputs = self.exe.run(
self.test_prog,
outputs = self.exe.run(self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
......@@ -326,13 +332,13 @@ class YOLOv3(BaseAPI):
res['gt_label'] = (res_gt_label, [])
res['is_difficult'] = (res_is_difficult, [])
results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
epoch_id, step + 1, total_steps))
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
1, total_steps))
box_ap_stats, eval_details = eval_results(
results, metric, eval_dataset.coco_gt, with_background=False)
evaluate_metrics = OrderedDict(
zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'],
box_ap_stats))
zip(['bbox_mmap'
if metric == 'COCO' else 'bbox_map'], box_ap_stats))
if return_details:
return evaluate_metrics, eval_details
return evaluate_metrics
......@@ -346,7 +352,8 @@ class YOLOv3(BaseAPI):
Returns:
list: 预测结果列表,每个预测结果由预测框类别标签、
预测框类别名称、预测框坐标、预测框得分组成。
预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
预测框得分组成。
"""
if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.")
......@@ -359,14 +366,12 @@ class YOLOv3(BaseAPI):
im, im_size = self.test_transforms(img_file)
im = np.expand_dims(im, axis=0)
im_size = np.expand_dims(im_size, axis=0)
outputs = self.exe.run(
self.test_prog,
feed={
'image': im,
'im_size': im_size
},
outputs = self.exe.run(self.test_prog,
feed={'image': im,
'im_size': im_size},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
return_numpy=False,
use_program_cache=True)
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs)
......
......@@ -20,9 +20,12 @@ from .mobilenet_v2 import MobileNetV2
from .mobilenet_v3 import MobileNetV3
from .segmentation import UNet
from .segmentation import DeepLabv3p
from .segmentation import FastSCNN
from .xception import Xception
from .densenet import DenseNet
from .shufflenet_v2 import ShuffleNetV2
from .hrnet import HRNet
from .alexnet import AlexNet
def resnet18(input, num_classes=1000):
......@@ -51,14 +54,20 @@ def resnet50_vd(input, num_classes=1000):
def resnet50_vd_ssld(input, num_classes=1000):
model = ResNet(layers=50, num_classes=num_classes,
variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
model = ResNet(
layers=50,
num_classes=num_classes,
variant='d',
lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
return model(input)
def resnet101_vd_ssld(input, num_classes=1000):
model = ResNet(layers=101, num_classes=num_classes,
variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
model = ResNet(
layers=101,
num_classes=num_classes,
variant='d',
lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
return model(input)
......@@ -93,13 +102,17 @@ def mobilenetv3_large(input, num_classes=1000):
def mobilenetv3_small_ssld(input, num_classes=1000):
model = MobileNetV3(num_classes=num_classes, model_name='small',
model = MobileNetV3(
num_classes=num_classes,
model_name='small',
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
return model(input)
def mobilenetv3_large_ssld(input, num_classes=1000):
model = MobileNetV3(num_classes=num_classes, model_name='large',
model = MobileNetV3(
num_classes=num_classes,
model_name='large',
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
return model(input)
......@@ -133,6 +146,17 @@ def densenet201(input, num_classes=1000):
model = DenseNet(layers=201, num_classes=num_classes)
return model(input)
def shufflenetv2(input, num_classes=1000):
model = ShuffleNetV2(num_classes=num_classes)
return model(input)
def hrnet_w18(input, num_classes=1000):
model = HRNet(width=18, num_classes=num_classes)
return model(input)
def alexnet(input, num_classes=1000):
model = AlexNet(num_classes=num_classes)
return model(input)
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
class AlexNet():
def __init__(self, num_classes=1000):
assert num_classes is not None, "In AlextNet, num_classes cannot be None"
self.num_classes = num_classes
def __call__(self, input):
stdv = 1.0 / math.sqrt(input.shape[1] * 11 * 11)
layer_name = [
"conv1", "conv2", "conv3", "conv4", "conv5", "fc6", "fc7", "fc8"
]
conv1 = fluid.layers.conv2d(
input=input,
num_filters=64,
filter_size=11,
stride=4,
padding=2,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[0] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[0] + "_weights"))
pool1 = fluid.layers.pool2d(
input=conv1,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
stdv = 1.0 / math.sqrt(pool1.shape[1] * 5 * 5)
conv2 = fluid.layers.conv2d(
input=pool1,
num_filters=192,
filter_size=5,
stride=1,
padding=2,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[1] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[1] + "_weights"))
pool2 = fluid.layers.pool2d(
input=conv2,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
stdv = 1.0 / math.sqrt(pool2.shape[1] * 3 * 3)
conv3 = fluid.layers.conv2d(
input=pool2,
num_filters=384,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[2] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[2] + "_weights"))
stdv = 1.0 / math.sqrt(conv3.shape[1] * 3 * 3)
conv4 = fluid.layers.conv2d(
input=conv3,
num_filters=256,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[3] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[3] + "_weights"))
stdv = 1.0 / math.sqrt(conv4.shape[1] * 3 * 3)
conv5 = fluid.layers.conv2d(
input=conv4,
num_filters=256,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[4] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[4] + "_weights"))
pool5 = fluid.layers.pool2d(
input=conv5,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
drop6 = fluid.layers.dropout(x=pool5, dropout_prob=0.5)
stdv = 1.0 / math.sqrt(drop6.shape[1] * drop6.shape[2] *
drop6.shape[3] * 1.0)
fc6 = fluid.layers.fc(
input=drop6,
size=4096,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[5] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[5] + "_weights"))
drop7 = fluid.layers.dropout(x=fc6, dropout_prob=0.5)
stdv = 1.0 / math.sqrt(drop7.shape[1] * 1.0)
fc7 = fluid.layers.fc(
input=drop7,
size=4096,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[6] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[6] + "_weights"))
stdv = 1.0 / math.sqrt(fc7.shape[1] * 1.0)
out = fluid.layers.fc(
input=fc7,
size=self.num_classes,
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[7] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[7] + "_weights"))
return out
......@@ -68,13 +68,14 @@ class DarkNet(object):
bias_attr=False)
bn_name = name + ".bn"
if self.num_classes:
regularizer = None
else:
regularizer = L2Decay(float(self.norm_decay))
bn_param_attr = ParamAttr(
regularizer=L2Decay(float(self.norm_decay)),
name=bn_name + '.scale')
regularizer=regularizer, name=bn_name + '.scale')
bn_bias_attr = ParamAttr(
regularizer=L2Decay(float(self.norm_decay)),
name=bn_name + '.offset')
regularizer=regularizer, name=bn_name + '.offset')
out = fluid.layers.batch_norm(
input=conv,
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
......
......@@ -21,7 +21,7 @@ import copy
from paddle import fluid
from .fpn import FPN
from .fpn import (FPN, HRFPN)
from .rpn_head import (RPNHead, FPNRPNHead)
from .roi_extractor import (RoIAlign, FPNRoIAlign)
from .bbox_head import (BBoxHead, TwoFCHead)
......@@ -82,6 +82,11 @@ class FasterRCNN(object):
self.backbone = backbone
self.mode = mode
if with_fpn and fpn is None:
if self.backbone.__class__.__name__.startswith('HRNet'):
fpn = HRFPN()
fpn.min_level = 2
fpn.max_level = 6
else:
fpn = FPN()
self.fpn = fpn
self.num_classes = num_classes
......
......@@ -23,7 +23,7 @@ from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Xavier
from paddle.fluid.regularizer import L2Decay
__all__ = ['FPN']
__all__ = ['FPN', 'HRFPN']
def ConvNorm(input,
......@@ -219,8 +219,8 @@ class FPN(object):
body_name = body_name_list[i]
body_input = body_dict[body_name]
top_output = self.fpn_inner_output[i - 1]
fpn_inner_single = self._add_topdown_lateral(
body_name, body_input, top_output)
fpn_inner_single = self._add_topdown_lateral(body_name, body_input,
top_output)
self.fpn_inner_output[i] = fpn_inner_single
fpn_dict = {}
fpn_name_list = []
......@@ -293,3 +293,107 @@ class FPN(object):
spatial_scale.insert(0, spatial_scale[0] * 0.5)
res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
return res_dict, spatial_scale
class HRFPN(object):
"""
HRNet, see https://arxiv.org/abs/1908.07919
Args:
num_chan (int): number of feature channels
pooling_type (str): pooling type of downsampling
share_conv (bool): whethet to share conv for different layers' reduction
spatial_scale (list): feature map scaling factor
"""
def __init__(
self,
num_chan=256,
pooling_type="avg",
share_conv=False,
spatial_scale=[1. / 64, 1. / 32, 1. / 16, 1. / 8, 1. / 4], ):
self.num_chan = num_chan
self.pooling_type = pooling_type
self.share_conv = share_conv
self.spatial_scale = spatial_scale
def get_output(self, body_dict):
num_out = len(self.spatial_scale)
body_name_list = list(body_dict.keys())
num_backbone_stages = len(body_name_list)
outs = []
outs.append(body_dict[body_name_list[0]])
# resize
for i in range(1, len(body_dict)):
resized = self.resize_input_tensor(body_dict[body_name_list[i]],
outs[0], 2**i)
outs.append(resized)
# concat
out = fluid.layers.concat(outs, axis=1)
# reduction
out = fluid.layers.conv2d(
input=out,
num_filters=self.num_chan,
filter_size=1,
stride=1,
padding=0,
param_attr=ParamAttr(name='hrfpn_reduction_weights'),
bias_attr=False)
# conv
outs = [out]
for i in range(1, num_out):
outs.append(
self.pooling(
out,
size=2**i,
stride=2**i,
pooling_type=self.pooling_type))
outputs = []
for i in range(num_out):
conv_name = "shared_fpn_conv" if self.share_conv else "shared_fpn_conv_" + str(
i)
conv = fluid.layers.conv2d(
input=outs[i],
num_filters=self.num_chan,
filter_size=3,
stride=1,
padding=1,
param_attr=ParamAttr(name=conv_name + "_weights"),
bias_attr=False)
outputs.append(conv)
for idx in range(0, num_out - len(body_name_list)):
body_name_list.append("fpn_res5_sum_subsampled_{}x".format(2**(
idx + 1)))
outputs = outputs[::-1]
body_name_list = body_name_list[::-1]
res_dict = OrderedDict([(body_name_list[k], outputs[k])
for k in range(len(body_name_list))])
return res_dict, self.spatial_scale
def resize_input_tensor(self, body_input, ref_output, scale):
shape = fluid.layers.shape(ref_output)
shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4])
out_shape_ = shape_hw
out_shape = fluid.layers.cast(out_shape_, dtype='int32')
out_shape.stop_gradient = True
body_output = fluid.layers.resize_bilinear(
body_input, scale=scale, out_shape=out_shape)
return body_output
def pooling(self, input, size, stride, pooling_type):
pool = fluid.layers.pool2d(
input=input,
pool_size=size,
pool_stride=stride,
pool_type=pooling_type)
return pool
......@@ -21,7 +21,7 @@ import copy
import paddle.fluid as fluid
from .fpn import FPN
from .fpn import (FPN, HRFPN)
from .rpn_head import (RPNHead, FPNRPNHead)
from .roi_extractor import (RoIAlign, FPNRoIAlign)
from .bbox_head import (BBoxHead, TwoFCHead)
......@@ -92,8 +92,12 @@ class MaskRCNN(object):
self.backbone = backbone
self.mode = mode
if with_fpn and fpn is None:
fpn = FPN(
num_chan=num_chan,
if self.backbone.__class__.__name__.startswith('HRNet'):
fpn = HRFPN()
fpn.min_level = 2
fpn.max_level = 6
else:
fpn = FPN(num_chan=num_chan,
min_level=min_level,
max_level=max_level,
spatial_scale=spatial_scale)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.framework import Variable
from paddle.fluid.regularizer import L2Decay
from numbers import Integral
from paddle.fluid.initializer import MSRA
import math
__all__ = ['HRNet']
class HRNet(object):
def __init__(self,
width=40,
has_se=False,
freeze_at=0,
norm_type='bn',
freeze_norm=False,
norm_decay=0.,
feature_maps=[2, 3, 4, 5],
num_classes=None):
super(HRNet, self).__init__()
if isinstance(feature_maps, Integral):
feature_maps = [feature_maps]
assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4"
assert len(feature_maps) > 0, "need one or more feature maps"
assert norm_type in ['bn', 'sync_bn']
self.width = width
self.has_se = has_se
self.channels = {
18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]],
30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]],
40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]],
48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]],
60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]],
64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]],
}
self.freeze_at = freeze_at
self.norm_type = norm_type
self.norm_decay = norm_decay
self.freeze_norm = freeze_norm
self.feature_maps = feature_maps
self.num_classes = num_classes
self.end_points = []
return
def net(self, input):
width = self.width
channels_2, channels_3, channels_4 = self.channels[width]
num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
x = self.conv_bn_layer(
input=input,
filter_size=3,
num_filters=64,
stride=2,
if_act=True,
name='layer1_1')
x = self.conv_bn_layer(
input=x,
filter_size=3,
num_filters=64,
stride=2,
if_act=True,
name='layer1_2')
la1 = self.layer1(x, name='layer2')
tr1 = self.transition_layer([la1], [256], channels_2, name='tr1')
st2 = self.stage(tr1, num_modules_2, channels_2, name='st2')
tr2 = self.transition_layer(st2, channels_2, channels_3, name='tr2')
st3 = self.stage(tr2, num_modules_3, channels_3, name='st3')
tr3 = self.transition_layer(st3, channels_3, channels_4, name='tr3')
st4 = self.stage(tr3, num_modules_4, channels_4, name='st4')
# classification
if self.num_classes:
last_cls = self.last_cls_out(x=st4, name='cls_head')
y = last_cls[0]
last_num_filters = [256, 512, 1024]
for i in range(3):
y = fluid.layers.elementwise_add(
last_cls[i + 1],
self.conv_bn_layer(
input=y,
filter_size=3,
num_filters=last_num_filters[i],
stride=2,
name='cls_head_add' + str(i + 1)))
y = self.conv_bn_layer(
input=y,
filter_size=1,
num_filters=2048,
stride=1,
name='cls_head_last_conv')
pool = fluid.layers.pool2d(
input=y, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(
input=pool,
size=self.num_classes,
param_attr=ParamAttr(
name='fc_weights',
initializer=fluid.initializer.Uniform(-stdv, stdv)),
bias_attr=ParamAttr(name='fc_offset'))
return out
# segmentation
if self.feature_maps == "stage4":
return st4
self.end_points = st4
return st4[-1]
def layer1(self, input, name=None):
conv = input
for i in range(4):
conv = self.bottleneck_block(
conv,
num_filters=64,
downsample=True if i == 0 else False,
name=name + '_' + str(i + 1))
return conv
def transition_layer(self, x, in_channels, out_channels, name=None):
num_in = len(in_channels)
num_out = len(out_channels)
out = []
for i in range(num_out):
if i < num_in:
if in_channels[i] != out_channels[i]:
residual = self.conv_bn_layer(
x[i],
filter_size=3,
num_filters=out_channels[i],
name=name + '_layer_' + str(i + 1))
out.append(residual)
else:
out.append(x[i])
else:
residual = self.conv_bn_layer(
x[-1],
filter_size=3,
num_filters=out_channels[i],
stride=2,
name=name + '_layer_' + str(i + 1))
out.append(residual)
return out
def branches(self, x, block_num, channels, name=None):
out = []
for i in range(len(channels)):
residual = x[i]
for j in range(block_num):
residual = self.basic_block(
residual,
channels[i],
name=name + '_branch_layer_' + str(i + 1) + '_' +
str(j + 1))
out.append(residual)
return out
def fuse_layers(self, x, channels, multi_scale_output=True, name=None):
out = []
for i in range(len(channels) if multi_scale_output else 1):
residual = x[i]
if self.feature_maps == "stage4":
shape = fluid.layers.shape(residual)
width = shape[-1]
height = shape[-2]
for j in range(len(channels)):
if j > i:
y = self.conv_bn_layer(
x[j],
filter_size=1,
num_filters=channels[i],
if_act=False,
name=name + '_layer_' + str(i + 1) + '_' + str(j + 1))
if self.feature_maps == "stage4":
y = fluid.layers.resize_bilinear(
input=y, out_shape=[height, width])
else:
y = fluid.layers.resize_nearest(
input=y, scale=2**(j - i))
residual = fluid.layers.elementwise_add(
x=residual, y=y, act=None)
elif j < i:
y = x[j]
for k in range(i - j):
if k == i - j - 1:
y = self.conv_bn_layer(
y,
filter_size=3,
num_filters=channels[i],
stride=2,
if_act=False,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1))
else:
y = self.conv_bn_layer(
y,
filter_size=3,
num_filters=channels[j],
stride=2,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1))
residual = fluid.layers.elementwise_add(
x=residual, y=y, act=None)
residual = fluid.layers.relu(residual)
out.append(residual)
return out
def high_resolution_module(self,
x,
channels,
multi_scale_output=True,
name=None):
residual = self.branches(x, 4, channels, name=name)
out = self.fuse_layers(
residual,
channels,
multi_scale_output=multi_scale_output,
name=name)
return out
def stage(self,
x,
num_modules,
channels,
multi_scale_output=True,
name=None):
out = x
for i in range(num_modules):
if i == num_modules - 1 and multi_scale_output == False:
out = self.high_resolution_module(
out,
channels,
multi_scale_output=False,
name=name + '_' + str(i + 1))
else:
out = self.high_resolution_module(
out, channels, name=name + '_' + str(i + 1))
return out
def last_cls_out(self, x, name=None):
out = []
num_filters_list = [32, 64, 128, 256]
for i in range(len(x)):
out.append(
self.bottleneck_block(
input=x[i],
num_filters=num_filters_list[i],
name=name + 'conv_' + str(i + 1),
downsample=True))
return out
def basic_block(self,
input,
num_filters,
stride=1,
downsample=False,
name=None):
residual = input
conv = self.conv_bn_layer(
input=input,
filter_size=3,
num_filters=num_filters,
stride=stride,
name=name + '_conv1')
conv = self.conv_bn_layer(
input=conv,
filter_size=3,
num_filters=num_filters,
if_act=False,
name=name + '_conv2')
if downsample:
residual = self.conv_bn_layer(
input=input,
filter_size=1,
num_filters=num_filters,
if_act=False,
name=name + '_downsample')
if self.has_se:
conv = self.squeeze_excitation(
input=conv,
num_channels=num_filters,
reduction_ratio=16,
name=name + '_fc')
return fluid.layers.elementwise_add(x=residual, y=conv, act='relu')
def bottleneck_block(self,
input,
num_filters,
stride=1,
downsample=False,
name=None):
residual = input
conv = self.conv_bn_layer(
input=input,
filter_size=1,
num_filters=num_filters,
name=name + '_conv1')
conv = self.conv_bn_layer(
input=conv,
filter_size=3,
num_filters=num_filters,
stride=stride,
name=name + '_conv2')
conv = self.conv_bn_layer(
input=conv,
filter_size=1,
num_filters=num_filters * 4,
if_act=False,
name=name + '_conv3')
if downsample:
residual = self.conv_bn_layer(
input=input,
filter_size=1,
num_filters=num_filters * 4,
if_act=False,
name=name + '_downsample')
if self.has_se:
conv = self.squeeze_excitation(
input=conv,
num_channels=num_filters * 4,
reduction_ratio=16,
name=name + '_fc')
return fluid.layers.elementwise_add(x=residual, y=conv, act='relu')
def squeeze_excitation(self,
input,
num_channels,
reduction_ratio,
name=None):
pool = fluid.layers.pool2d(
input=input, pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
squeeze = fluid.layers.fc(
input=pool,
size=num_channels / reduction_ratio,
act='relu',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + '_sqz_weights'),
bias_attr=ParamAttr(name=name + '_sqz_offset'))
stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
excitation = fluid.layers.fc(
input=squeeze,
size=num_channels,
act='sigmoid',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + '_exc_weights'),
bias_attr=ParamAttr(name=name + '_exc_offset'))
scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
return scale
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride=1,
padding=1,
num_groups=1,
if_act=True,
name=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=num_groups,
act=None,
param_attr=ParamAttr(
initializer=MSRA(), name=name + '_weights'),
bias_attr=False)
bn_name = name + '_bn'
bn = self._bn(input=conv, bn_name=bn_name)
if if_act:
bn = fluid.layers.relu(bn)
return bn
def _bn(self, input, act=None, bn_name=None):
norm_lr = 0. if self.freeze_norm else 1.
norm_decay = self.norm_decay
if self.num_classes or self.feature_maps == "stage4":
regularizer = None
pattr_initializer = fluid.initializer.Constant(1.0)
battr_initializer = fluid.initializer.Constant(0.0)
else:
regularizer = L2Decay(norm_decay)
pattr_initializer = None
battr_initializer = None
pattr = ParamAttr(
name=bn_name + '_scale',
learning_rate=norm_lr,
regularizer=regularizer,
initializer=pattr_initializer)
battr = ParamAttr(
name=bn_name + '_offset',
learning_rate=norm_lr,
regularizer=regularizer,
initializer=battr_initializer)
global_stats = True if self.freeze_norm else False
out = fluid.layers.batch_norm(
input=input,
act=act,
name=bn_name + '.output.1',
param_attr=pattr,
bias_attr=battr,
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance',
use_global_stats=global_stats)
scale = fluid.framework._get_var(pattr.name)
bias = fluid.framework._get_var(battr.name)
if self.freeze_norm:
scale.stop_gradient = True
bias.stop_gradient = True
return out
def __call__(self, input):
assert isinstance(input, Variable)
if isinstance(self.feature_maps, (list, tuple)):
assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \
"feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps)
res_endpoints = []
res = input
feature_maps = self.feature_maps
out = self.net(input)
if self.num_classes or self.feature_maps == "stage4":
return out
for i in feature_maps:
res = self.end_points[i - 2]
if i in self.feature_maps:
res_endpoints.append(res)
if self.freeze_at >= i:
res.stop_gradient = True
return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat)
for idx, feat in enumerate(res_endpoints)])
......@@ -79,10 +79,14 @@ class MobileNetV1(object):
bn_name = name + "_bn"
norm_decay = self.norm_decay
if self.num_classes:
regularizer = None
else:
regularizer = L2Decay(norm_decay)
bn_param_attr = ParamAttr(
regularizer=L2Decay(norm_decay), name=bn_name + '_scale')
regularizer=regularizer, name=bn_name + '_scale')
bn_bias_attr = ParamAttr(
regularizer=L2Decay(norm_decay), name=bn_name + '_offset')
regularizer=regularizer, name=bn_name + '_offset')
return fluid.layers.batch_norm(
input=conv,
act=act,
......@@ -189,11 +193,11 @@ class MobileNetV1(object):
if self.num_classes:
out = fluid.layers.pool2d(
input=out, pool_type='avg', global_pooling=True)
output = fluid.layers.fc(
input=out,
output = fluid.layers.fc(input=out,
size=self.num_classes,
param_attr=ParamAttr(
initializer=fluid.initializer.MSRA(), name="fc7_weights"),
initializer=fluid.initializer.MSRA(),
name="fc7_weights"),
bias_attr=ParamAttr(name="fc7_offset"))
return output
......
......@@ -31,6 +31,7 @@ class MobileNetV3():
with_extra_blocks (bool): if extra blocks should be added.
extra_block_filters (list): number of filter for each extra block.
"""
def __init__(self,
scale=1.0,
model_name='small',
......@@ -113,10 +114,16 @@ class MobileNetV3():
lr_idx = self.curr_stage // self.lr_interval
lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
lr_mult = self.lr_mult_list[lr_idx]
conv_param_attr = ParamAttr(name=name + '_weights',
if self.num_classes:
regularizer = None
else:
regularizer = L2Decay(self.conv_decay)
conv_param_attr = ParamAttr(
name=name + '_weights',
learning_rate=lr_mult,
regularizer=L2Decay(self.conv_decay))
conv = fluid.layers.conv2d(input=input,
regularizer=regularizer)
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
......@@ -127,11 +134,12 @@ class MobileNetV3():
param_attr=conv_param_attr,
bias_attr=False)
bn_name = name + '_bn'
bn_param_attr = ParamAttr(name=bn_name + "_scale",
regularizer=L2Decay(self.norm_decay))
bn_bias_attr = ParamAttr(name=bn_name + "_offset",
regularizer=L2Decay(self.norm_decay))
bn = fluid.layers.batch_norm(input=conv,
bn_param_attr = ParamAttr(
name=bn_name + "_scale", regularizer=L2Decay(self.norm_decay))
bn_bias_attr = ParamAttr(
name=bn_name + "_offset", regularizer=L2Decay(self.norm_decay))
bn = fluid.layers.batch_norm(
input=conv,
param_attr=bn_param_attr,
bias_attr=bn_bias_attr,
moving_mean_name=bn_name + '_mean',
......@@ -154,10 +162,8 @@ class MobileNetV3():
lr_mult = self.lr_mult_list[lr_idx]
num_mid_filter = int(num_out_filter // ratio)
pool = fluid.layers.pool2d(input=input,
pool_type='avg',
global_pooling=True,
use_cudnn=False)
pool = fluid.layers.pool2d(
input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
conv1 = fluid.layers.conv2d(
input=pool,
filter_size=1,
......@@ -191,7 +197,8 @@ class MobileNetV3():
use_se=False,
name=None):
input_data = input
conv0 = self._conv_bn_layer(input=input,
conv0 = self._conv_bn_layer(
input=input,
filter_size=1,
num_filters=num_mid_filter,
stride=1,
......@@ -201,7 +208,8 @@ class MobileNetV3():
name=name + '_expand')
if self.block_stride == 16 and stride == 2:
self.end_points.append(conv0)
conv1 = self._conv_bn_layer(input=conv0,
conv1 = self._conv_bn_layer(
input=conv0,
filter_size=filter_size,
num_filters=num_mid_filter,
stride=stride,
......@@ -213,11 +221,11 @@ class MobileNetV3():
name=name + '_depthwise')
if use_se:
conv1 = self._se_block(input=conv1,
num_out_filter=num_mid_filter,
name=name + '_se')
conv1 = self._se_block(
input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
conv2 = self._conv_bn_layer(input=conv1,
conv2 = self._conv_bn_layer(
input=conv1,
filter_size=1,
num_filters=num_out_filter,
stride=1,
......@@ -227,7 +235,8 @@ class MobileNetV3():
if num_in_filter != num_out_filter or stride != 1:
return conv2
else:
return fluid.layers.elementwise_add(x=input_data, y=conv2, act=None)
return fluid.layers.elementwise_add(
x=input_data, y=conv2, act=None)
def _extra_block_dw(self,
input,
......@@ -235,14 +244,16 @@ class MobileNetV3():
num_filters2,
stride,
name=None):
pointwise_conv = self._conv_bn_layer(input=input,
pointwise_conv = self._conv_bn_layer(
input=input,
filter_size=1,
num_filters=int(num_filters1),
stride=1,
padding="SAME",
act='relu6',
name=name + "_extra1")
depthwise_conv = self._conv_bn_layer(input=pointwise_conv,
depthwise_conv = self._conv_bn_layer(
input=pointwise_conv,
filter_size=3,
num_filters=int(num_filters2),
stride=stride,
......@@ -251,7 +262,8 @@ class MobileNetV3():
act='relu6',
use_cudnn=False,
name=name + "_extra2_dw")
normal_conv = self._conv_bn_layer(input=depthwise_conv,
normal_conv = self._conv_bn_layer(
input=depthwise_conv,
filter_size=1,
num_filters=int(num_filters2),
stride=1,
......@@ -282,7 +294,8 @@ class MobileNetV3():
self.block_stride *= layer_cfg[5]
if layer_cfg[5] == 2:
blocks.append(conv)
conv = self._residual_unit(input=conv,
conv = self._residual_unit(
input=conv,
num_in_filter=inplanes,
num_mid_filter=int(scale * layer_cfg[1]),
num_out_filter=int(scale * layer_cfg[2]),
......@@ -298,7 +311,8 @@ class MobileNetV3():
blocks.append(conv)
if self.num_classes:
conv = self._conv_bn_layer(input=conv,
conv = self._conv_bn_layer(
input=conv,
filter_size=1,
num_filters=int(scale * self.cls_ch_squeeze),
stride=1,
......@@ -308,7 +322,8 @@ class MobileNetV3():
act='hard_swish',
name='conv_last')
conv = fluid.layers.pool2d(input=conv,
conv = fluid.layers.pool2d(
input=conv,
pool_type='avg',
global_pooling=True,
use_cudnn=False)
......@@ -333,7 +348,8 @@ class MobileNetV3():
return blocks
# extra block
conv_extra = self._conv_bn_layer(conv,
conv_extra = self._conv_bn_layer(
conv,
filter_size=1,
num_filters=int(scale * cfg[-1][1]),
stride=1,
......
......@@ -135,8 +135,10 @@ class ResNet(object):
filter_size=filter_size,
stride=stride,
padding=padding,
param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"),
bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"),
param_attr=ParamAttr(
initializer=Constant(0.0), name=name + ".w_0"),
bias_attr=ParamAttr(
initializer=Constant(0.0), name=name + ".b_0"),
act=act,
name=name)
return out
......@@ -151,7 +153,8 @@ class ResNet(object):
name=None,
dcn_v2=False,
use_lr_mult_list=False):
lr_mult = self.lr_mult_list[self.curr_stage] if use_lr_mult_list else 1.0
lr_mult = self.lr_mult_list[
self.curr_stage] if use_lr_mult_list else 1.0
_name = self.prefix_name + name if self.prefix_name != '' else name
if not dcn_v2:
conv = fluid.layers.conv2d(
......@@ -162,8 +165,8 @@ class ResNet(object):
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
param_attr=ParamAttr(name=_name + "_weights",
learning_rate=lr_mult),
param_attr=ParamAttr(
name=_name + "_weights", learning_rate=lr_mult),
bias_attr=False,
name=_name + '.conv2d.output.1')
else:
......@@ -202,14 +205,18 @@ class ResNet(object):
norm_lr = 0. if self.freeze_norm else lr_mult
norm_decay = self.norm_decay
if self.num_classes:
regularizer = None
else:
regularizer = L2Decay(norm_decay)
pattr = ParamAttr(
name=bn_name + '_scale',
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay))
regularizer=regularizer)
battr = ParamAttr(
name=bn_name + '_offset',
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay))
regularizer=regularizer)
if self.norm_type in ['bn', 'sync_bn']:
global_stats = True if self.freeze_norm else False
......@@ -262,8 +269,8 @@ class ResNet(object):
pool_padding=0,
ceil_mode=True,
pool_type='avg')
return self._conv_norm(input, ch_out, 1, 1, name=name,
use_lr_mult_list=True)
return self._conv_norm(
input, ch_out, 1, 1, name=name, use_lr_mult_list=True)
return self._conv_norm(input, ch_out, 1, stride, name=name)
else:
return input
......
......@@ -14,5 +14,7 @@
from .unet import UNet
from .deeplabv3p import DeepLabv3p
from .hrnet import HRNet
from .fast_scnn import FastSCNN
from .model_utils import libs
from .model_utils import loss
......@@ -28,7 +28,6 @@ from .model_utils.libs import sigmoid_to_softmax
from .model_utils.loss import softmax_with_loss
from .model_utils.loss import dice_loss
from .model_utils.loss import bce_loss
import paddlex.utils.logging as logging
from paddlex.cv.nets.xception import Xception
from paddlex.cv.nets.mobilenet_v2 import MobileNetV2
......@@ -135,7 +134,8 @@ class DeepLabv3p(object):
param_attr = fluid.ParamAttr(
name=name_scope + 'weights',
regularizer=None,
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06))
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.06))
with scope('encoder'):
channel = 256
with scope("image_pool"):
......@@ -151,8 +151,8 @@ class DeepLabv3p(object):
padding=0,
param_attr=param_attr))
input_shape = fluid.layers.shape(input)
image_avg = fluid.layers.resize_bilinear(
image_avg, input_shape[2:])
image_avg = fluid.layers.resize_bilinear(image_avg,
input_shape[2:])
with scope("aspp0"):
aspp0 = bn_relu(
......@@ -244,7 +244,8 @@ class DeepLabv3p(object):
param_attr = fluid.ParamAttr(
name=name_scope + 'weights',
regularizer=None,
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06))
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.06))
with scope('decoder'):
with scope('concat'):
decode_shortcut = bn_relu(
......@@ -326,9 +327,6 @@ class DeepLabv3p(object):
if self.mode == 'train':
inputs['label'] = fluid.data(
dtype='int32', shape=[None, 1, None, None], name='label')
elif self.mode == 'eval':
inputs['label'] = fluid.data(
dtype='int32', shape=[None, 1, None, None], name='label')
return inputs
def build_net(self, inputs):
......@@ -351,7 +349,8 @@ class DeepLabv3p(object):
name=name_scope + 'weights',
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0),
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01))
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.01))
with scope('logit'):
with fluid.name_scope('last_conv'):
logit = conv(
......
# coding: utf8
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
from .model_utils.libs import scope
from .model_utils.libs import bn, bn_relu, relu, conv_bn_layer
from .model_utils.libs import conv, avg_pool
from .model_utils.libs import separate_conv
from .model_utils.libs import sigmoid_to_softmax
from .model_utils.loss import softmax_with_loss
from .model_utils.loss import dice_loss
from .model_utils.loss import bce_loss
class FastSCNN(object):
def __init__(self,
num_classes,
mode='train',
use_bce_loss=False,
use_dice_loss=False,
class_weight=None,
multi_loss_weight=[1.0],
ignore_index=255,
fixed_input_shape=None):
# dice_loss或bce_loss只适用两类分割中
if num_classes > 2 and (use_bce_loss or use_dice_loss):
raise ValueError(
"dice loss and bce loss is only applicable to binary classfication"
)
if class_weight is not None:
if isinstance(class_weight, list):
if len(class_weight) != num_classes:
raise ValueError(
"Length of class_weight should be equal to number of classes"
)
elif isinstance(class_weight, str):
if class_weight.lower() != 'dynamic':
raise ValueError(
"if class_weight is string, must be dynamic!")
else:
raise TypeError(
'Expect class_weight is a list or string but receive {}'.
format(type(class_weight)))
self.num_classes = num_classes
self.mode = mode
self.use_bce_loss = use_bce_loss
self.use_dice_loss = use_dice_loss
self.class_weight = class_weight
self.ignore_index = ignore_index
self.multi_loss_weight = multi_loss_weight
self.fixed_input_shape = fixed_input_shape
def build_net(self, inputs):
if self.use_dice_loss or self.use_bce_loss:
self.num_classes = 1
image = inputs['image']
size = fluid.layers.shape(image)[2:]
with scope('learning_to_downsample'):
higher_res_features = self._learning_to_downsample(image, 32, 48,
64)
with scope('global_feature_extractor'):
lower_res_feature = self._global_feature_extractor(
higher_res_features, 64, [64, 96, 128], 128, 6, [3, 3, 3])
with scope('feature_fusion'):
x = self._feature_fusion(higher_res_features, lower_res_feature,
64, 128, 128)
with scope('classifier'):
logit = self._classifier(x, 128)
logit = fluid.layers.resize_bilinear(logit, size, align_mode=0)
if len(self.multi_loss_weight) == 3:
with scope('aux_layer_higher'):
higher_logit = self._aux_layer(higher_res_features,
self.num_classes)
higher_logit = fluid.layers.resize_bilinear(
higher_logit, size, align_mode=0)
with scope('aux_layer_lower'):
lower_logit = self._aux_layer(lower_res_feature,
self.num_classes)
lower_logit = fluid.layers.resize_bilinear(
lower_logit, size, align_mode=0)
logit = (logit, higher_logit, lower_logit)
elif len(self.multi_loss_weight) == 2:
with scope('aux_layer_higher'):
higher_logit = self._aux_layer(higher_res_features,
self.num_classes)
higher_logit = fluid.layers.resize_bilinear(
higher_logit, size, align_mode=0)
logit = (logit, higher_logit)
else:
logit = (logit, )
if self.num_classes == 1:
out = sigmoid_to_softmax(logit[0])
out = fluid.layers.transpose(out, [0, 2, 3, 1])
else:
out = fluid.layers.transpose(logit[0], [0, 2, 3, 1])
pred = fluid.layers.argmax(out, axis=3)
pred = fluid.layers.unsqueeze(pred, axes=[3])
if self.mode == 'train':
label = inputs['label']
return self._get_loss(logit, label)
elif self.mode == 'eval':
label = inputs['label']
loss = self._get_loss(logit, label)
return loss, pred, label, mask
else:
if self.num_classes == 1:
logit = sigmoid_to_softmax(logit[0])
else:
logit = fluid.layers.softmax(logit[0], axis=1)
return pred, logit
def generate_inputs(self):
inputs = OrderedDict()
if self.fixed_input_shape is not None:
input_shape = [
None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
]
inputs['image'] = fluid.data(
dtype='float32', shape=input_shape, name='image')
else:
inputs['image'] = fluid.data(
dtype='float32', shape=[None, 3, None, None], name='image')
if self.mode == 'train':
inputs['label'] = fluid.data(
dtype='int32', shape=[None, 1, None, None], name='label')
elif self.mode == 'eval':
inputs['label'] = fluid.data(
dtype='int32', shape=[None, 1, None, None], name='label')
return inputs
def _get_loss(self, logits, label):
avg_loss = 0
if not (self.use_dice_loss or self.use_bce_loss):
for i, logit in enumerate(logits):
logit_mask = (
label.astype('int32') != self.ignore_index).astype('int32')
loss = softmax_with_loss(
logit,
label,
logit_mask,
num_classes=self.num_classes,
weight=self.class_weight,
ignore_index=self.ignore_index)
avg_loss += self.multi_loss_weight[i] * loss
else:
if self.use_dice_loss:
for i, logit in enumerate(logits):
logit_mask = (label.astype('int32') != self.ignore_index
).astype('int32')
loss = dice_loss(logit, label, logit_mask)
avg_loss += self.multi_loss_weight[i] * loss
if self.use_bce_loss:
for i, logit in enumerate(logits):
#logit_label = fluid.layers.resize_nearest(label, logit_shape[2:])
logit_mask = (label.astype('int32') != self.ignore_index
).astype('int32')
loss = bce_loss(
logit,
label,
logit_mask,
ignore_index=self.ignore_index)
avg_loss += self.multi_loss_weight[i] * loss
return avg_loss
def _learning_to_downsample(self,
x,
dw_channels1=32,
dw_channels2=48,
out_channels=64):
x = relu(bn(conv(x, dw_channels1, 3, 2)))
with scope('dsconv1'):
x = separate_conv(
x, dw_channels2, stride=2, filter=3, act=fluid.layers.relu)
with scope('dsconv2'):
x = separate_conv(
x, out_channels, stride=2, filter=3, act=fluid.layers.relu)
return x
def _shortcut(self, input, data_residual):
return fluid.layers.elementwise_add(input, data_residual)
def _dropout2d(self, input, prob, is_train=False):
if not is_train:
return input
keep_prob = 1.0 - prob
shape = fluid.layers.shape(input)
channels = shape[1]
random_tensor = keep_prob + fluid.layers.uniform_random(
[shape[0], channels, 1, 1], min=0., max=1.)
binary_tensor = fluid.layers.floor(random_tensor)
output = input / keep_prob * binary_tensor
return output
def _inverted_residual_unit(self,
input,
num_in_filter,
num_filters,
ifshortcut,
stride,
filter_size,
padding,
expansion_factor,
name=None):
num_expfilter = int(round(num_in_filter * expansion_factor))
channel_expand = conv_bn_layer(
input=input,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
name=name + '_expand')
bottleneck_conv = conv_bn_layer(
input=channel_expand,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=padding,
num_groups=num_expfilter,
if_act=True,
name=name + '_dwise',
use_cudnn=False)
depthwise_output = bottleneck_conv
linear_out = conv_bn_layer(
input=bottleneck_conv,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=False,
name=name + '_linear')
if ifshortcut:
out = self._shortcut(input=input, data_residual=linear_out)
return out, depthwise_output
else:
return linear_out, depthwise_output
def _inverted_blocks(self, input, in_c, t, c, n, s, name=None):
first_block, depthwise_output = self._inverted_residual_unit(
input=input,
num_in_filter=in_c,
num_filters=c,
ifshortcut=False,
stride=s,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + '_1')
last_residual_block = first_block
last_c = c
for i in range(1, n):
last_residual_block, depthwise_output = self._inverted_residual_unit(
input=last_residual_block,
num_in_filter=last_c,
num_filters=c,
ifshortcut=True,
stride=1,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + '_' + str(i + 1))
return last_residual_block, depthwise_output
def _psp_module(self, input, out_features):
cat_layers = []
sizes = (1, 2, 3, 6)
for size in sizes:
psp_name = "psp" + str(size)
with scope(psp_name):
pool = fluid.layers.adaptive_pool2d(
input,
pool_size=[size, size],
pool_type='avg',
name=psp_name + '_adapool')
data = conv(
pool,
out_features,
filter_size=1,
bias_attr=False,
name=psp_name + '_conv')
data_bn = bn(data, act='relu')
interp = fluid.layers.resize_bilinear(
data_bn,
out_shape=fluid.layers.shape(input)[2:],
name=psp_name + '_interp',
align_mode=0)
cat_layers.append(interp)
cat_layers = [input] + cat_layers
out = fluid.layers.concat(cat_layers, axis=1, name='psp_cat')
return out
def _aux_layer(self, x, num_classes):
x = relu(bn(conv(x, 32, 3, padding=1)))
x = self._dropout2d(x, 0.1, is_train=(self.mode == 'train'))
with scope('logit'):
x = conv(x, num_classes, 1, bias_attr=True)
return x
def _feature_fusion(self,
higher_res_feature,
lower_res_feature,
higher_in_channels,
lower_in_channels,
out_channels,
scale_factor=4):
shape = fluid.layers.shape(higher_res_feature)
w = shape[-1]
h = shape[-2]
lower_res_feature = fluid.layers.resize_bilinear(
lower_res_feature, [h, w], align_mode=0)
with scope('dwconv'):
lower_res_feature = relu(
bn(conv(lower_res_feature, out_channels,
1))) #(lower_res_feature)
with scope('conv_lower_res'):
lower_res_feature = bn(
conv(
lower_res_feature, out_channels, 1, bias_attr=True))
with scope('conv_higher_res'):
higher_res_feature = bn(
conv(
higher_res_feature, out_channels, 1, bias_attr=True))
out = higher_res_feature + lower_res_feature
return relu(out)
def _global_feature_extractor(self,
x,
in_channels=64,
block_channels=(64, 96, 128),
out_channels=128,
t=6,
num_blocks=(3, 3, 3)):
x, _ = self._inverted_blocks(x, in_channels, t, block_channels[0],
num_blocks[0], 2, 'inverted_block_1')
x, _ = self._inverted_blocks(x, block_channels[0], t,
block_channels[1], num_blocks[1], 2,
'inverted_block_2')
x, _ = self._inverted_blocks(x, block_channels[1], t,
block_channels[2], num_blocks[2], 1,
'inverted_block_3')
x = self._psp_module(x, block_channels[2] // 4)
with scope('out'):
x = relu(bn(conv(x, out_channels, 1)))
return x
def _classifier(self, x, dw_channels, stride=1):
with scope('dsconv1'):
x = separate_conv(
x, dw_channels, stride=stride, filter=3, act=fluid.layers.relu)
with scope('dsconv2'):
x = separate_conv(
x, dw_channels, stride=stride, filter=3, act=fluid.layers.relu)
x = self._dropout2d(x, 0.1, is_train=self.mode == 'train')
x = conv(x, self.num_classes, 1, bias_attr=True)
return x
# coding: utf8
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
from .model_utils.libs import sigmoid_to_softmax
from .model_utils.loss import softmax_with_loss
from .model_utils.loss import dice_loss
from .model_utils.loss import bce_loss
import paddlex
class HRNet(object):
def __init__(self,
num_classes,
mode='train',
width=18,
use_bce_loss=False,
use_dice_loss=False,
class_weight=None,
ignore_index=255,
fixed_input_shape=None):
# dice_loss或bce_loss只适用两类分割中
if num_classes > 2 and (use_bce_loss or use_dice_loss):
raise ValueError(
"dice loss and bce loss is only applicable to binary classfication"
)
if class_weight is not None:
if isinstance(class_weight, list):
if len(class_weight) != num_classes:
raise ValueError(
"Length of class_weight should be equal to number of classes"
)
elif isinstance(class_weight, str):
if class_weight.lower() != 'dynamic':
raise ValueError(
"if class_weight is string, must be dynamic!")
else:
raise TypeError(
'Expect class_weight is a list or string but receive {}'.
format(type(class_weight)))
self.num_classes = num_classes
self.mode = mode
self.use_bce_loss = use_bce_loss
self.use_dice_loss = use_dice_loss
self.class_weight = class_weight
self.ignore_index = ignore_index
self.fixed_input_shape = fixed_input_shape
self.backbone = paddlex.cv.nets.hrnet.HRNet(
width=width, feature_maps="stage4")
def build_net(self, inputs):
if self.use_dice_loss or self.use_bce_loss:
self.num_classes = 1
image = inputs['image']
st4 = self.backbone(image)
# upsample
shape = fluid.layers.shape(st4[0])[-2:]
st4[1] = fluid.layers.resize_bilinear(st4[1], out_shape=shape)
st4[2] = fluid.layers.resize_bilinear(st4[2], out_shape=shape)
st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=shape)
out = fluid.layers.concat(st4, axis=1)
last_channels = sum(self.backbone.channels[self.backbone.width][-1])
out = self._conv_bn_layer(
input=out,
filter_size=1,
num_filters=last_channels,
stride=1,
if_act=True,
name='conv-2')
out = fluid.layers.conv2d(
input=out,
num_filters=self.num_classes,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(
initializer=MSRA(), name='conv-1_weights'),
bias_attr=False)
input_shape = fluid.layers.shape(image)[-2:]
logit = fluid.layers.resize_bilinear(out, input_shape)
if self.num_classes == 1:
out = sigmoid_to_softmax(logit)
out = fluid.layers.transpose(out, [0, 2, 3, 1])
else:
out = fluid.layers.transpose(logit, [0, 2, 3, 1])
pred = fluid.layers.argmax(out, axis=3)
pred = fluid.layers.unsqueeze(pred, axes=[3])
if self.mode == 'train':
label = inputs['label']
mask = label != self.ignore_index
return self._get_loss(logit, label, mask)
elif self.mode == 'eval':
label = inputs['label']
mask = label != self.ignore_index
loss = self._get_loss(logit, label, mask)
return loss, pred, label, mask
else:
if self.num_classes == 1:
logit = sigmoid_to_softmax(logit)
else:
logit = fluid.layers.softmax(logit, axis=1)
return pred, logit
def generate_inputs(self):
inputs = OrderedDict()
if self.fixed_input_shape is not None:
input_shape = [
None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
]
inputs['image'] = fluid.data(
dtype='float32', shape=input_shape, name='image')
else:
inputs['image'] = fluid.data(
dtype='float32', shape=[None, 3, None, None], name='image')
if self.mode == 'train':
inputs['label'] = fluid.data(
dtype='int32', shape=[None, 1, None, None], name='label')
elif self.mode == 'eval':
inputs['label'] = fluid.data(
dtype='int32', shape=[None, 1, None, None], name='label')
return inputs
def _get_loss(self, logit, label, mask):
avg_loss = 0
if not (self.use_dice_loss or self.use_bce_loss):
avg_loss += softmax_with_loss(
logit,
label,
mask,
num_classes=self.num_classes,
weight=self.class_weight,
ignore_index=self.ignore_index)
else:
if self.use_dice_loss:
avg_loss += dice_loss(logit, label, mask)
if self.use_bce_loss:
avg_loss += bce_loss(
logit, label, mask, ignore_index=self.ignore_index)
return avg_loss
def _conv_bn_layer(self,
input,
filter_size,
num_filters,
stride=1,
padding=1,
num_groups=1,
if_act=True,
name=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=num_groups,
act=None,
param_attr=ParamAttr(
initializer=MSRA(), name=name + '_weights'),
bias_attr=False)
bn_name = name + '_bn'
bn = fluid.layers.batch_norm(
input=conv,
param_attr=ParamAttr(
name=bn_name + "_scale",
initializer=fluid.initializer.Constant(1.0)),
bias_attr=ParamAttr(
name=bn_name + "_offset",
initializer=fluid.initializer.Constant(0.0)),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
if if_act:
bn = fluid.layers.relu(bn)
return bn
......@@ -27,7 +27,6 @@ from .model_utils.libs import sigmoid_to_softmax
from .model_utils.loss import softmax_with_loss
from .model_utils.loss import dice_loss
from .model_utils.loss import bce_loss
import paddlex.utils.logging as logging
class UNet(object):
......@@ -106,7 +105,8 @@ class UNet(object):
name='weights',
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0),
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33))
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.33))
with scope("conv0"):
data = bn_relu(
conv(
......@@ -140,8 +140,7 @@ class UNet(object):
name='weights',
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0),
initializer=fluid.initializer.XavierInitializer(),
)
initializer=fluid.initializer.XavierInitializer(), )
with scope("up"):
if self.upsample_mode == 'bilinear':
short_cut_shape = fluid.layers.shape(short_cut)
......@@ -197,7 +196,8 @@ class UNet(object):
name='weights',
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0),
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01))
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.01))
with scope("logit"):
data = conv(
data,
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
......
......@@ -18,6 +18,7 @@ import random
import os.path as osp
import numpy as np
from PIL import Image, ImageEnhance
import paddlex.utils.logging as logging
class ClsTransform:
......@@ -92,6 +93,16 @@ class Compose(ClsTransform):
outputs = (im, label)
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class RandomCrop(ClsTransform):
"""对图像进行随机剪裁,模型训练时的数据增强操作。
......@@ -103,14 +114,14 @@ class RandomCrop(ClsTransform):
Args:
crop_size (int): 随机裁剪后重新调整的目标边长。默认为224。
lower_scale (float): 裁剪面积相对原面积比例的最小限制。默认为0.88。
lower_scale (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。
lower_ratio (float): 宽变换比例的最小限制。默认为3. / 4。
upper_ratio (float): 宽变换比例的最大限制。默认为4. / 3。
"""
def __init__(self,
crop_size=224,
lower_scale=0.88,
lower_scale=0.08,
lower_ratio=3. / 4,
upper_ratio=4. / 3):
self.crop_size = crop_size
......@@ -461,3 +472,56 @@ class ArrangeClassifier(ClsTransform):
else:
outputs = (im, )
return outputs
class ComposedClsTransforms(Compose):
""" 分类模型的基础Transforms流程,具体如下
训练阶段:
1. 随机从图像中crop一块子图,并resize成crop_size大小
2. 将1的输出按0.5的概率随机进行水平翻转
3. 将图像进行归一化
验证/预测阶段:
1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14
2. 从图像中心crop出一个大小为crop_size的图像
3. 将图像进行归一化
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
crop_size(int|list): 输入模型里的图像大小
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
crop_size=[224, 224],
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
width = crop_size
if isinstance(crop_size, list):
if crop_size[0] != crop_size[1]:
raise Exception(
"In classifier model, width and height should be equal, please modify your parameter `crop_size`"
)
width = crop_size[0]
if width % 32 != 0:
raise Exception(
"In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`"
)
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5),
Normalize(
mean=mean, std=std)
]
else:
# 验证/预测时的transforms
transforms = [
ResizeByShort(short_size=int(width * 1.14)),
CenterCrop(crop_size=width), Normalize(
mean=mean, std=std)
]
super(ComposedClsTransforms, self).__init__(transforms)
......@@ -27,6 +27,7 @@ from PIL import Image, ImageEnhance
from .imgaug_support import execute_imgaug
from .ops import *
from .box_utils import *
import paddlex.utils.logging as logging
class DetTransform:
......@@ -152,6 +153,16 @@ class Compose(DetTransform):
outputs = (im, im_info)
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class ResizeByShort(DetTransform):
"""根据图像的短边调整图像大小(resize)。
......@@ -716,22 +727,38 @@ class MixupImage(DetTransform):
'Becasuse gt_bbox/gt_class/gt_score is not in label_info!')
gt_bbox1 = label_info['gt_bbox']
gt_bbox2 = im_info['mixup'][2]['gt_bbox']
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
gt_class1 = label_info['gt_class']
gt_class2 = im_info['mixup'][2]['gt_class']
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
gt_score1 = label_info['gt_score']
gt_score2 = im_info['mixup'][2]['gt_score']
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
if 'gt_poly' in label_info:
gt_poly1 = label_info['gt_poly']
gt_poly2 = im_info['mixup'][2]['gt_poly']
label_info['gt_poly'] = gt_poly1 + gt_poly2
is_crowd1 = label_info['is_crowd']
is_crowd2 = im_info['mixup'][2]['is_crowd']
if 0 not in gt_class1 and 0 not in gt_class2:
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly1 + gt_poly2
is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
elif 0 in gt_class1:
gt_bbox = gt_bbox2
gt_class = gt_class2
gt_score = gt_score2 * (1. - factor)
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly2
is_crowd = is_crowd2
else:
gt_bbox = gt_bbox1
gt_class = gt_class1
gt_score = gt_score1 * factor
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly1
is_crowd = is_crowd1
label_info['gt_bbox'] = gt_bbox
label_info['gt_score'] = gt_score
label_info['gt_class'] = gt_class
......@@ -803,6 +830,8 @@ class RandomExpand(DetTransform):
if np.random.uniform(0., 1.) < self.prob:
return (im, im_info, label_info)
if 'gt_class' in label_info and 0 in label_info['gt_class']:
return (im, im_info, label_info)
image_shape = im_info['image_shape']
height = int(image_shape[0])
width = int(image_shape[1])
......@@ -898,6 +927,8 @@ class RandomCrop(DetTransform):
if len(label_info['gt_bbox']) == 0:
return (im, im_info, label_info)
if 'gt_class' in label_info and 0 in label_info['gt_class']:
return (im, im_info, label_info)
image_shape = im_info['image_shape']
w = image_shape[1]
......@@ -1193,6 +1224,7 @@ class ArrangeYOLOv3(DetTransform):
if gt_num > 0:
label_info['gt_class'][:gt_num, 0] = label_info[
'gt_class'][:gt_num, 0] - 1
if -1 not in label_info['gt_class']:
gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :]
gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0]
gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0]
......@@ -1227,3 +1259,108 @@ class ArrangeYOLOv3(DetTransform):
im_shape = im_info['image_shape']
outputs = (im, im_shape)
return outputs
class ComposedRCNNTransforms(Compose):
""" RCNN模型(faster-rcnn/mask-rcnn)图像处理流程,具体如下,
训练阶段:
1. 随机以0.5的概率将图像水平翻转
2. 图像归一化
3. 图像按比例Resize,scale计算方式如下
scale = min_max_size[0] / short_size_of_image
if max_size_of_image * scale > min_max_size[1]:
scale = min_max_size[1] / max_size_of_image
4. 将3步骤的长宽进行padding,使得长宽为32的倍数
验证阶段:
1. 图像归一化
2. 图像按比例Resize,scale计算方式同上训练阶段
3. 将2步骤的长宽进行padding,使得长宽为32的倍数
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
min_max_size(list): 图像在缩放时,最小边和最大边的约束条件
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
min_max_size=[800, 1333],
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomHorizontalFlip(prob=0.5), Normalize(
mean=mean, std=std), ResizeByShort(
short_size=min_max_size[0], max_size=min_max_size[1]),
Padding(coarsest_stride=32)
]
else:
# 验证/预测时的transforms
transforms = [
Normalize(
mean=mean, std=std), ResizeByShort(
short_size=min_max_size[0], max_size=min_max_size[1]),
Padding(coarsest_stride=32)
]
super(ComposedRCNNTransforms, self).__init__(transforms)
class ComposedYOLOv3Transforms(Compose):
"""YOLOv3模型的图像预处理流程,具体如下,
训练阶段:
1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage
2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调
3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand
4. 随机裁剪图像
5. 将4步骤的输出图像Resize成shape参数的大小
6. 随机0.5的概率水平翻转图像
7. 图像归一化
验证/预测阶段:
1. 将图像Resize成shape参数大小
2. 图像归一化
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小
mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
shape=[608, 608],
mixup_epoch=250,
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
width = shape
if isinstance(shape, list):
if shape[0] != shape[1]:
raise Exception(
"In YOLOv3 model, width and height should be equal")
width = shape[0]
if width % 32 != 0:
raise Exception(
"In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...."
)
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
MixupImage(mixup_epoch=mixup_epoch), RandomDistort(),
RandomExpand(), RandomCrop(), Resize(
target_size=width,
interp='RANDOM'), RandomHorizontalFlip(), Normalize(
mean=mean, std=std)
]
else:
# 验证/预测时的transforms
transforms = [
Resize(
target_size=width, interp='CUBIC'), Normalize(
mean=mean, std=std)
]
super(ComposedYOLOv3Transforms, self).__init__(transforms)
......@@ -23,7 +23,7 @@ def execute_imgaug(augmenter, im, bboxes=None, polygons=None,
import imgaug.augmentables.bbs as bbs
aug_im = im.astype('uint8')
aug_im = augmenter.augment(image=aug_im)
aug_im = augmenter.augment(image=aug_im).astype('float32')
return aug_im
# TODO imgaug的标注处理逻辑与paddlex已存的transform存在部分差异
......
......@@ -21,6 +21,7 @@ import numpy as np
from PIL import Image
import cv2
from collections import OrderedDict
import paddlex.utils.logging as logging
class SegTransform:
......@@ -108,6 +109,16 @@ class Compose(SegTransform):
outputs = (im, im_info)
return outputs
def add_augmenters(self, augmenters):
if not isinstance(augmenters, list):
raise Exception(
"augmenters should be list type in func add_augmenters()")
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class RandomHorizontalFlip(SegTransform):
"""以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。
......@@ -1088,3 +1099,39 @@ class ArrangeSegmenter(SegTransform):
return (im, im_info)
else:
return (im, )
class ComposedSegTransforms(Compose):
""" 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下
训练阶段:
1. 随机对图像以0.5的概率水平翻转
2. 按不同的比例随机Resize原图
3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
4. 图像归一化
预测阶段:
1. 图像归一化
Args:
mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
train_crop_size(list): 模型训练阶段,随机从原图crop的大小
mean(list): 图像均值
std(list): 图像方差
"""
def __init__(self,
mode,
train_crop_size=[769, 769],
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]):
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomHorizontalFlip(prob=0.5), ResizeStepScaling(),
RandomPaddingCrop(crop_size=train_crop_size), Normalize(
mean=mean, std=std)
]
else:
# 验证/预测时的transforms
transforms = [Normalize(mean=mean, std=std)]
super(ComposedSegTransforms, self).__init__(transforms)
......@@ -97,8 +97,6 @@ class Predictor:
config.disable_glog_info()
if memory_optimize:
config.enable_memory_optim()
else:
config.diable_memory_optim()
# 开启计算图分析优化,包括OP融合等
config.switch_ir_optim(True)
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
......@@ -20,6 +20,7 @@ import six
import glob
from .data_path_utils import _find_classes
from PIL import Image
import paddlex.utils.logging as logging
def resize_short(img, target_size, interpolation=None):
......@@ -117,7 +118,7 @@ def read_image(img_path, target_size=256, crop_size=224):
assert len(img_path.shape) == 4
return img_path
else:
ValueError(f"Not recognized data type {type(img_path)}.")
ValueError("Not recognized data type {}.".format(type(img_path)))
class ReaderConfig(object):
......@@ -156,7 +157,7 @@ class ReaderConfig(object):
img = cv2.imread(img_path)
if img is None:
print(img_path)
logging.info(img_path)
continue
img = resize_short(img, target_size, interpolation=None)
img = crop_image(img, crop_size, center=self.is_test)
......@@ -208,7 +209,7 @@ def create_reader(list_image_path, list_label=None, is_test=False):
img = cv2.imread(img_path)
if img is None:
print(img_path)
logging.info(img_path)
continue
img = resize_short(img, target_size, interpolation=None)
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
......@@ -20,6 +20,7 @@ import numpy as np
from paddle.fluid.param_attr import ParamAttr
from paddlex.interpret.as_data_reader.readers import preprocess_image
def gen_user_home():
if "HOME" in os.environ:
home_path = os.environ["HOME"]
......@@ -34,10 +35,20 @@ def paddle_get_fc_weights(var_name="fc_0.w_0"):
def paddle_resize(extracted_features, outsize):
resized_features = fluid.layers.resize_bilinear(extracted_features, outsize)
resized_features = fluid.layers.resize_bilinear(extracted_features,
outsize)
return resized_features
def get_precomputed_normlime_weights():
root_path = gen_user_home()
root_path = osp.join(root_path, '.paddlex')
h_pre_models = osp.join(root_path, "pre_models")
normlime_weights_file = osp.join(
h_pre_models, "normlime_weights_imagenet_resnet50vc.npy")
return np.load(normlime_weights_file, allow_pickle=True).item()
def compute_features_for_kmeans(data_content):
root_path = gen_user_home()
root_path = osp.join(root_path, '.paddlex')
......@@ -47,6 +58,7 @@ def compute_features_for_kmeans(data_content):
os.makedirs(root_path)
url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
pdx.utils.download_and_decompress(url, path=root_path)
def conv_bn_layer(input,
num_filters,
filter_size,
......@@ -55,7 +67,7 @@ def compute_features_for_kmeans(data_content):
act=None,
name=None,
is_test=True,
global_name=''):
global_name='for_kmeans_'):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
......@@ -79,14 +91,14 @@ def compute_features_for_kmeans(data_content):
bias_attr=ParamAttr(global_name + bn_name + '_offset'),
moving_mean_name=global_name + bn_name + '_mean',
moving_variance_name=global_name + bn_name + '_variance',
use_global_stats=is_test
)
use_global_stats=is_test)
startup_prog = fluid.default_startup_program().clone(for_test=True)
prog = fluid.Program()
with fluid.program_guard(prog, startup_prog):
with fluid.unique_name.guard():
image_op = fluid.data(name='image', shape=[None, 3, 224, 224], dtype='float32')
image_op = fluid.data(
name='image', shape=[None, 3, 224, 224], dtype='float32')
conv = conv_bn_layer(
input=image_op,
......@@ -110,7 +122,8 @@ def compute_features_for_kmeans(data_content):
act='relu',
name='conv1_3')
extracted_features = conv
resized_features = fluid.layers.resize_bilinear(extracted_features, image_op.shape[2:])
resized_features = fluid.layers.resize_bilinear(extracted_features,
image_op.shape[2:])
gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
place = fluid.CUDAPlace(gpu_id)
......@@ -119,7 +132,10 @@ def compute_features_for_kmeans(data_content):
exe.run(startup_prog)
fluid.io.load_persistables(exe, h_pre_models, prog)
images = preprocess_image(data_content) # transpose to [N, 3, H, W], scaled to [0.0, 1.0]
result = exe.run(prog, fetch_list=[resized_features], feed={'image': images})
images = preprocess_image(
data_content) # transpose to [N, 3, H, W], scaled to [0.0, 1.0]
result = exe.run(prog,
fetch_list=[resized_features],
feed={'image': images})
return result[0][0]
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .interpretation_algorithms import CAM, LIME, NormLIME
from .normlime_base import precompute_normlime_weights
......@@ -20,12 +20,10 @@ class Interpretation(object):
"""
Base class for all interpretation algorithms.
"""
def __init__(self, interpretation_algorithm_name, predict_fn, label_names, **kwargs):
supported_algorithms = {
'cam': CAM,
'lime': LIME,
'normlime': NormLIME
}
def __init__(self, interpretation_algorithm_name, predict_fn, label_names,
**kwargs):
supported_algorithms = {'cam': CAM, 'lime': LIME, 'normlime': NormLIME}
self.algorithm_name = interpretation_algorithm_name.lower()
assert self.algorithm_name in supported_algorithms.keys()
......@@ -33,19 +31,17 @@ class Interpretation(object):
# initialization for the interpretation algorithm.
self.algorithm = supported_algorithms[self.algorithm_name](
self.predict_fn, label_names, **kwargs
)
self.predict_fn, label_names, **kwargs)
def interpret(self, data_, visualization=True, save_to_disk=True, save_dir='./tmp'):
def interpret(self, data_, visualization=True, save_dir='./'):
"""
Args:
data_: data_ can be a path or numpy.ndarray.
visualization: whether to show using matplotlib.
save_to_disk: whether to save the figure in local disk.
save_dir: dir to save figure if save_to_disk is True.
Returns:
"""
return self.algorithm.interpret(data_, visualization, save_to_disk, save_dir)
return self.algorithm.interpret(data_, visualization, save_dir)
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
......@@ -21,7 +21,7 @@ from . import lime_base
from ._session_preparation import paddle_get_fc_weights, compute_features_for_kmeans, gen_user_home
from .normlime_base import combine_normlime_and_lime, get_feature_for_kmeans, load_kmeans_model
from paddlex.interpret.as_data_reader.readers import read_image
import paddlex.utils.logging as logging
import cv2
......@@ -71,14 +71,17 @@ class CAM(object):
if self.label_names is not None:
ln = self.label_names[l]
print(f'predicted result: {ln} with probability {probability[pred_label[0]]:.3f}')
prob_str = "%.3f" % (probability[pred_label[0]])
logging.info("predicted result: {} with probability {}.".format(
ln, prob_str))
return feature_maps, fc_weights
def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None):
def interpret(self, data_, visualization=True, save_outdir=None):
feature_maps, fc_weights = self.preparation_cam(data_)
cam = get_cam(self.image, feature_maps, fc_weights, self.predicted_label)
cam = get_cam(self.image, feature_maps, fc_weights,
self.predicted_label)
if visualization or save_to_disk:
if visualization or save_outdir is not None:
import matplotlib.pyplot as plt
from skimage.segmentation import mark_boundaries
l = self.labels[0]
......@@ -91,18 +94,19 @@ class CAM(object):
ncols = 2
plt.close()
f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows))
f, axes = plt.subplots(
nrows, ncols, figsize=(psize * ncols, psize * nrows))
for ax in axes.ravel():
ax.axis("off")
axes = axes.ravel()
axes[0].imshow(self.image)
axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}")
prob_str = "{%.3f}" % (self.predicted_probability)
axes[0].set_title("label {}, proba: {}".format(ln, prob_str))
axes[1].imshow(cam)
axes[1].set_title("CAM")
if save_to_disk and save_outdir is not None:
os.makedirs(save_outdir, exist_ok=True)
if save_outdir is not None:
save_fig(data_, save_outdir, 'cam')
if visualization:
......@@ -112,7 +116,11 @@ class CAM(object):
class LIME(object):
def __init__(self, predict_fn, label_names, num_samples=3000, batch_size=50):
def __init__(self,
predict_fn,
label_names,
num_samples=3000,
batch_size=50):
"""
LIME wrapper. See lime_base.py for the detailed LIME implementation.
Args:
......@@ -157,20 +165,27 @@ class LIME(object):
if self.label_names is not None:
ln = self.label_names[l]
print(f'predicted result: {ln} with probability {probability[pred_label[0]]:.3f}')
prob_str = "%.3f" % (probability[pred_label[0]])
logging.info("predicted result: {} with probability {}.".format(
ln, prob_str))
end = time.time()
algo = lime_base.LimeImageInterpreter()
interpreter = algo.interpret_instance(self.image, self.predict_fn, self.labels, 0,
num_samples=self.num_samples, batch_size=self.batch_size)
interpreter = algo.interpret_instance(
self.image,
self.predict_fn,
self.labels,
0,
num_samples=self.num_samples,
batch_size=self.batch_size)
self.lime_interpreter = interpreter
print('lime time: ', time.time() - end, 's.')
logging.info('lime time: ' + str(time.time() - end) + 's.')
def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None):
def interpret(self, data_, visualization=True, save_outdir=None):
if self.lime_interpreter is None:
self.preparation_lime(data_)
if visualization or save_to_disk:
if visualization or save_outdir is not None:
import matplotlib.pyplot as plt
from skimage.segmentation import mark_boundaries
l = self.labels[0]
......@@ -184,27 +199,33 @@ class LIME(object):
ncols = len(weights_choices)
plt.close()
f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows))
f, axes = plt.subplots(
nrows, ncols, figsize=(psize * ncols, psize * nrows))
for ax in axes.ravel():
ax.axis("off")
axes = axes.ravel()
axes[0].imshow(self.image)
axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}")
prob_str = "{%.3f}" % (self.predicted_probability)
axes[0].set_title("label {}, proba: {}".format(ln, prob_str))
axes[1].imshow(mark_boundaries(self.image, self.lime_interpreter.segments))
axes[1].imshow(
mark_boundaries(self.image, self.lime_interpreter.segments))
axes[1].set_title("superpixel segmentation")
# LIME visualization
for i, w in enumerate(weights_choices):
num_to_show = auto_choose_num_features_to_show(self.lime_interpreter, l, w)
num_to_show = auto_choose_num_features_to_show(
self.lime_interpreter, l, w)
temp, mask = self.lime_interpreter.get_image_and_mask(
l, positive_only=False, hide_rest=False, num_features=num_to_show
)
l,
positive_only=True,
hide_rest=False,
num_features=num_to_show)
axes[ncols + i].imshow(mark_boundaries(temp, mask))
axes[ncols + i].set_title(f"label {ln}, first {num_to_show} superpixels")
axes[ncols + i].set_title(
"label {}, first {} superpixels".format(ln, num_to_show))
if save_to_disk and save_outdir is not None:
os.makedirs(save_outdir, exist_ok=True)
if save_outdir is not None:
save_fig(data_, save_outdir, 'lime', self.num_samples)
if visualization:
......@@ -213,9 +234,14 @@ class LIME(object):
return
class NormLIME(object):
def __init__(self, predict_fn, label_names, num_samples=3000, batch_size=50,
kmeans_model_for_normlime=None, normlime_weights=None):
class NormLIMEStandard(object):
def __init__(self,
predict_fn,
label_names,
num_samples=3000,
batch_size=50,
kmeans_model_for_normlime=None,
normlime_weights=None):
root_path = gen_user_home()
root_path = osp.join(root_path, '.paddlex')
h_pre_models = osp.join(root_path, "pre_models")
......@@ -229,21 +255,25 @@ class NormLIME(object):
try:
self.kmeans_model = load_kmeans_model(h_pre_models_kmeans)
except:
raise ValueError("NormLIME needs the KMeans model, where we provided a default one in "
raise ValueError(
"NormLIME needs the KMeans model, where we provided a default one in "
"pre_models/kmeans_model.pkl.")
else:
print("Warning: It is *strongly* suggested to use the default KMeans model in pre_models/kmeans_model.pkl. "
"Use another one will change the final result.")
logging.debug("Warning: It is *strongly* suggested to use the \
default KMeans model in pre_models/kmeans_model.pkl. \
Use another one will change the final result.")
self.kmeans_model = load_kmeans_model(kmeans_model_for_normlime)
self.num_samples = num_samples
self.batch_size = batch_size
try:
self.normlime_weights = np.load(normlime_weights, allow_pickle=True).item()
self.normlime_weights = np.load(
normlime_weights, allow_pickle=True).item()
except:
self.normlime_weights = None
print("Warning: not find the correct precomputed Normlime result.")
logging.debug(
"Warning: not find the correct precomputed Normlime result.")
self.predict_fn = predict_fn
......@@ -257,30 +287,27 @@ class NormLIME(object):
cluster_labels = self.kmeans_model.predict(X)
except AttributeError:
from sklearn.metrics import pairwise_distances_argmin_min
cluster_labels, _ = pairwise_distances_argmin_min(X, self.kmeans_model.cluster_centers_)
cluster_labels, _ = pairwise_distances_argmin_min(
X, self.kmeans_model.cluster_centers_)
return cluster_labels
def predict_using_normlime_weights(self, pred_labels, predicted_cluster_labels):
def predict_using_normlime_weights(self, pred_labels,
predicted_cluster_labels):
# global weights
g_weights = {y: [] for y in pred_labels}
for y in pred_labels:
cluster_weights_y = self.normlime_weights.get(y, {})
g_weights[y] = [
(i, cluster_weights_y.get(k, 0.0)) for i, k in enumerate(predicted_cluster_labels)
]
g_weights[y] = [(i, cluster_weights_y.get(k, 0.0))
for i, k in enumerate(predicted_cluster_labels)]
g_weights[y] = sorted(g_weights[y],
key=lambda x: np.abs(x[1]), reverse=True)
g_weights[y] = sorted(
g_weights[y], key=lambda x: np.abs(x[1]), reverse=True)
return g_weights
def preparation_normlime(self, data_):
self._lime = LIME(
self.predict_fn,
self.label_names,
self.num_samples,
self.batch_size
)
self._lime = LIME(self.predict_fn, self.label_names, self.num_samples,
self.batch_size)
self._lime.preparation_lime(data_)
image_show = read_image(data_)
......@@ -289,26 +316,28 @@ class NormLIME(object):
self.predicted_probability = self._lime.predicted_probability
self.image = image_show[0]
self.labels = self._lime.labels
# print(f'predicted result: {self.predicted_label} with probability {self.predicted_probability: .3f}')
print('performing NormLIME operations ...')
logging.info('performing NormLIME operations ...')
cluster_labels = self.predict_cluster_labels(
compute_features_for_kmeans(image_show).transpose((1, 2, 0)), self._lime.lime_interpreter.segments
)
compute_features_for_kmeans(image_show).transpose((1, 2, 0)),
self._lime.lime_interpreter.segments)
g_weights = self.predict_using_normlime_weights(self.labels, cluster_labels)
g_weights = self.predict_using_normlime_weights(self.labels,
cluster_labels)
return g_weights
def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None):
def interpret(self, data_, visualization=True, save_outdir=None):
if self.normlime_weights is None:
raise ValueError("Not find the correct precomputed NormLIME result. \n"
"\t Try to call compute_normlime_weights() first or load the correct path.")
raise ValueError(
"Not find the correct precomputed NormLIME result. \n"
"\t Try to call compute_normlime_weights() first or load the correct path."
)
g_weights = self.preparation_normlime(data_)
lime_weights = self._lime.lime_interpreter.local_weights
if visualization or save_to_disk:
if visualization or save_outdir is not None:
import matplotlib.pyplot as plt
from skimage.segmentation import mark_boundaries
l = self.labels[0]
......@@ -323,72 +352,275 @@ class NormLIME(object):
ncols = len(weights_choices)
plt.close()
f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows))
f, axes = plt.subplots(
nrows, ncols, figsize=(psize * ncols, psize * nrows))
for ax in axes.ravel():
ax.axis("off")
axes = axes.ravel()
axes[0].imshow(self.image)
axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}")
prob_str = "{%.3f}" % (self.predicted_probability)
axes[0].set_title("label {}, proba: {}".format(ln, prob_str))
axes[1].imshow(mark_boundaries(self.image, self._lime.lime_interpreter.segments))
axes[1].imshow(
mark_boundaries(self.image,
self._lime.lime_interpreter.segments))
axes[1].set_title("superpixel segmentation")
# LIME visualization
for i, w in enumerate(weights_choices):
num_to_show = auto_choose_num_features_to_show(self._lime.lime_interpreter, l, w)
num_to_show = auto_choose_num_features_to_show(
self._lime.lime_interpreter, l, w)
nums_to_show.append(num_to_show)
temp, mask = self._lime.lime_interpreter.get_image_and_mask(
l, positive_only=False, hide_rest=False, num_features=num_to_show
)
l,
positive_only=False,
hide_rest=False,
num_features=num_to_show)
axes[ncols + i].imshow(mark_boundaries(temp, mask))
axes[ncols + i].set_title(f"LIME: first {num_to_show} superpixels")
axes[ncols + i].set_title("LIME: first {} superpixels".format(
num_to_show))
# NormLIME visualization
self._lime.lime_interpreter.local_weights = g_weights
for i, num_to_show in enumerate(nums_to_show):
temp, mask = self._lime.lime_interpreter.get_image_and_mask(
l, positive_only=False, hide_rest=False, num_features=num_to_show
)
l,
positive_only=False,
hide_rest=False,
num_features=num_to_show)
axes[ncols * 2 + i].imshow(mark_boundaries(temp, mask))
axes[ncols * 2 + i].set_title(f"NormLIME: first {num_to_show} superpixels")
axes[ncols * 2 + i].set_title(
"NormLIME: first {} superpixels".format(num_to_show))
# NormLIME*LIME visualization
combined_weights = combine_normlime_and_lime(lime_weights, g_weights)
combined_weights = combine_normlime_and_lime(lime_weights,
g_weights)
self._lime.lime_interpreter.local_weights = combined_weights
for i, num_to_show in enumerate(nums_to_show):
temp, mask = self._lime.lime_interpreter.get_image_and_mask(
l, positive_only=False, hide_rest=False, num_features=num_to_show
l,
positive_only=False,
hide_rest=False,
num_features=num_to_show)
axes[ncols * 3 + i].imshow(mark_boundaries(temp, mask))
axes[ncols * 3 + i].set_title(
"Combined: first {} superpixels".format(num_to_show))
self._lime.lime_interpreter.local_weights = lime_weights
if save_outdir is not None:
save_fig(data_, save_outdir, 'normlime', self.num_samples)
if visualization:
plt.show()
class NormLIME(object):
def __init__(self,
predict_fn,
label_names,
num_samples=3000,
batch_size=50,
kmeans_model_for_normlime=None,
normlime_weights=None):
root_path = gen_user_home()
root_path = osp.join(root_path, '.paddlex')
h_pre_models = osp.join(root_path, "pre_models")
if not osp.exists(h_pre_models):
if not osp.exists(root_path):
os.makedirs(root_path)
url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
pdx.utils.download_and_decompress(url, path=root_path)
h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl")
if kmeans_model_for_normlime is None:
try:
self.kmeans_model = load_kmeans_model(h_pre_models_kmeans)
except:
raise ValueError(
"NormLIME needs the KMeans model, where we provided a default one in "
"pre_models/kmeans_model.pkl.")
else:
logging.debug("Warning: It is *strongly* suggested to use the \
default KMeans model in pre_models/kmeans_model.pkl. \
Use another one will change the final result.")
self.kmeans_model = load_kmeans_model(kmeans_model_for_normlime)
self.num_samples = num_samples
self.batch_size = batch_size
try:
self.normlime_weights = np.load(
normlime_weights, allow_pickle=True).item()
except:
self.normlime_weights = None
logging.debug(
"Warning: not find the correct precomputed Normlime result.")
self.predict_fn = predict_fn
self.labels = None
self.image = None
self.label_names = label_names
def predict_cluster_labels(self, feature_map, segments):
X = get_feature_for_kmeans(feature_map, segments)
try:
cluster_labels = self.kmeans_model.predict(X)
except AttributeError:
from sklearn.metrics import pairwise_distances_argmin_min
cluster_labels, _ = pairwise_distances_argmin_min(
X, self.kmeans_model.cluster_centers_)
return cluster_labels
def predict_using_normlime_weights(self, pred_labels,
predicted_cluster_labels):
# global weights
g_weights = {y: [] for y in pred_labels}
for y in pred_labels:
cluster_weights_y = self.normlime_weights.get(y, {})
g_weights[y] = [(i, cluster_weights_y.get(k, 0.0))
for i, k in enumerate(predicted_cluster_labels)]
g_weights[y] = sorted(
g_weights[y], key=lambda x: np.abs(x[1]), reverse=True)
return g_weights
def preparation_normlime(self, data_):
self._lime = LIME(self.predict_fn, self.label_names, self.num_samples,
self.batch_size)
self._lime.preparation_lime(data_)
image_show = read_image(data_)
self.predicted_label = self._lime.predicted_label
self.predicted_probability = self._lime.predicted_probability
self.image = image_show[0]
self.labels = self._lime.labels
logging.info('performing NormLIME operations ...')
cluster_labels = self.predict_cluster_labels(
compute_features_for_kmeans(image_show).transpose((1, 2, 0)),
self._lime.lime_interpreter.segments)
g_weights = self.predict_using_normlime_weights(self.labels,
cluster_labels)
return g_weights
def interpret(self, data_, visualization=True, save_outdir=None):
if self.normlime_weights is None:
raise ValueError(
"Not find the correct precomputed NormLIME result. \n"
"\t Try to call compute_normlime_weights() first or load the correct path."
)
g_weights = self.preparation_normlime(data_)
lime_weights = self._lime.lime_interpreter.local_weights
if visualization or save_outdir is not None:
import matplotlib.pyplot as plt
from skimage.segmentation import mark_boundaries
l = self.labels[0]
ln = l
if self.label_names is not None:
ln = self.label_names[l]
psize = 5
nrows = 4
weights_choices = [0.6, 0.7, 0.75, 0.8, 0.85]
nums_to_show = []
ncols = len(weights_choices)
plt.close()
f, axes = plt.subplots(
nrows, ncols, figsize=(psize * ncols, psize * nrows))
for ax in axes.ravel():
ax.axis("off")
axes = axes.ravel()
axes[0].imshow(self.image)
prob_str = "{%.3f}" % (self.predicted_probability)
axes[0].set_title("label {}, proba: {}".format(ln, prob_str))
axes[1].imshow(
mark_boundaries(self.image,
self._lime.lime_interpreter.segments))
axes[1].set_title("superpixel segmentation")
# LIME visualization
for i, w in enumerate(weights_choices):
num_to_show = auto_choose_num_features_to_show(
self._lime.lime_interpreter, l, w)
nums_to_show.append(num_to_show)
temp, mask = self._lime.lime_interpreter.get_image_and_mask(
l,
positive_only=True,
hide_rest=False,
num_features=num_to_show)
axes[ncols + i].imshow(mark_boundaries(temp, mask))
axes[ncols + i].set_title("LIME: first {} superpixels".format(
num_to_show))
# NormLIME visualization
self._lime.lime_interpreter.local_weights = g_weights
for i, num_to_show in enumerate(nums_to_show):
temp, mask = self._lime.lime_interpreter.get_image_and_mask(
l,
positive_only=True,
hide_rest=False,
num_features=num_to_show)
axes[ncols * 2 + i].imshow(mark_boundaries(temp, mask))
axes[ncols * 2 + i].set_title(
"NormLIME: first {} superpixels".format(num_to_show))
# NormLIME*LIME visualization
combined_weights = combine_normlime_and_lime(lime_weights,
g_weights)
self._lime.lime_interpreter.local_weights = combined_weights
for i, num_to_show in enumerate(nums_to_show):
temp, mask = self._lime.lime_interpreter.get_image_and_mask(
l,
positive_only=True,
hide_rest=False,
num_features=num_to_show)
axes[ncols * 3 + i].imshow(mark_boundaries(temp, mask))
axes[ncols * 3 + i].set_title(f"Combined: first {num_to_show} superpixels")
axes[ncols * 3 + i].set_title(
"Combined: first {} superpixels".format(num_to_show))
self._lime.lime_interpreter.local_weights = lime_weights
if save_to_disk and save_outdir is not None:
os.makedirs(save_outdir, exist_ok=True)
if save_outdir is not None:
save_fig(data_, save_outdir, 'normlime', self.num_samples)
if visualization:
plt.show()
def auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show):
def auto_choose_num_features_to_show(lime_interpreter, label,
percentage_to_show):
segments = lime_interpreter.segments
lime_weights = lime_interpreter.local_weights[label]
num_pixels_threshold_in_a_sp = segments.shape[0] * segments.shape[1] // len(np.unique(segments)) // 8
num_pixels_threshold_in_a_sp = segments.shape[0] * segments.shape[
1] // len(np.unique(segments)) // 8
# l1 norm with filtered weights.
used_weights = [(tuple_w[0], tuple_w[1]) for i, tuple_w in enumerate(lime_weights) if tuple_w[1] > 0]
used_weights = [(tuple_w[0], tuple_w[1])
for i, tuple_w in enumerate(lime_weights)
if tuple_w[1] > 0]
norm = np.sum([tuple_w[1] for i, tuple_w in enumerate(used_weights)])
normalized_weights = [(tuple_w[0], tuple_w[1] / norm) for i, tuple_w in enumerate(lime_weights)]
normalized_weights = [(tuple_w[0], tuple_w[1] / norm)
for i, tuple_w in enumerate(lime_weights)]
a = 0.0
n = 0
for i, tuple_w in enumerate(normalized_weights):
if tuple_w[1] < 0:
continue
if len(np.where(segments == tuple_w[0])[0]) < num_pixels_threshold_in_a_sp:
if len(np.where(segments == tuple_w[0])[
0]) < num_pixels_threshold_in_a_sp:
continue
a += tuple_w[1]
......@@ -400,12 +632,18 @@ def auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show
return 5
if n == 0:
return auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show-0.1)
return auto_choose_num_features_to_show(lime_interpreter, label,
percentage_to_show - 0.1)
return n
def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam_max=None):
def get_cam(image_show,
feature_maps,
fc_weights,
label_index,
cam_min=None,
cam_max=None):
_, nc, h, w = feature_maps.shape
cam = feature_maps * fc_weights[:, label_index].reshape(1, nc, 1, 1)
......@@ -419,7 +657,8 @@ def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam
cam = cam - cam_min
cam = cam / cam_max
cam = np.uint8(255 * cam)
cam_img = cv2.resize(cam, image_show.shape[0:2], interpolation=cv2.INTER_LINEAR)
cam_img = cv2.resize(
cam, image_show.shape[0:2], interpolation=cv2.INTER_LINEAR)
heatmap = cv2.applyColorMap(np.uint8(255 * cam_img), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap)
......@@ -431,34 +670,11 @@ def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam
def save_fig(data_, save_outdir, algorithm_name, num_samples=3000):
import matplotlib.pyplot as plt
if isinstance(data_, str):
if algorithm_name == 'cam':
f_out = f"{algorithm_name}_{data_.split('/')[-1]}.png"
else:
f_out = f"{algorithm_name}_{data_.split('/')[-1]}_s{num_samples}.png"
plt.savefig(
os.path.join(save_outdir, f_out)
)
f_out = "{}_{}.png".format(algorithm_name, data_.split('/')[-1])
else:
n = 0
if algorithm_name == 'cam':
f_out = f'cam-{n}.png'
else:
f_out = f'{algorithm_name}_s{num_samples}-{n}.png'
while os.path.exists(
os.path.join(save_outdir, f_out)
):
n += 1
if algorithm_name == 'cam':
f_out = f'cam-{n}.png'
else:
f_out = f'{algorithm_name}_s{num_samples}-{n}.png'
continue
plt.savefig(
os.path.join(
save_outdir, f_out
)
)
print('The image of intrepretation result save in {}'.format(os.path.join(
save_outdir, f_out
)))
f_out = "{}_{}_s{}.png".format(save_outdir, algorithm_name,
num_samples)
plt.savefig(f_out)
logging.info('The image of intrepretation result save in {}'.format(f_out))
......@@ -27,21 +27,19 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The code in this file (lime_base.py) is modified from https://github.com/marcotcr/lime.
"""
import numpy as np
import scipy as sp
import tqdm
import copy
from functools import partial
import paddlex.utils.logging as logging
class LimeBase(object):
"""Class for learning a locally linear sparse model from perturbed data"""
def __init__(self,
kernel_fn,
verbose=False,
random_state=None):
def __init__(self, kernel_fn, verbose=False, random_state=None):
"""Init function
Args:
......@@ -71,15 +69,14 @@ class LimeBase(object):
"""
from sklearn.linear_model import lars_path
x_vector = weighted_data
alphas, _, coefs = lars_path(x_vector,
weighted_labels,
method='lasso',
verbose=False)
alphas, _, coefs = lars_path(
x_vector, weighted_labels, method='lasso', verbose=False)
return alphas, coefs
def forward_selection(self, data, labels, weights, num_features):
"""Iteratively adds features to the model"""
clf = Ridge(alpha=0, fit_intercept=True, random_state=self.random_state)
clf = Ridge(
alpha=0, fit_intercept=True, random_state=self.random_state)
used_features = []
for _ in range(min(num_features, data.shape[1])):
max_ = -100000000
......@@ -87,9 +84,11 @@ class LimeBase(object):
for feature in range(data.shape[1]):
if feature in used_features:
continue
clf.fit(data[:, used_features + [feature]], labels,
clf.fit(data[:, used_features + [feature]],
labels,
sample_weight=weights)
score = clf.score(data[:, used_features + [feature]],
score = clf.score(
data[:, used_features + [feature]],
labels,
sample_weight=weights)
if score > max_:
......@@ -107,8 +106,8 @@ class LimeBase(object):
elif method == 'forward_selection':
return self.forward_selection(data, labels, weights, num_features)
elif method == 'highest_weights':
clf = Ridge(alpha=0.01, fit_intercept=True,
random_state=self.random_state)
clf = Ridge(
alpha=0.01, fit_intercept=True, random_state=self.random_state)
clf.fit(data, labels, sample_weight=weights)
coef = clf.coef_
......@@ -124,7 +123,8 @@ class LimeBase(object):
nnz_indexes = argsort_data[::-1]
indices = weighted_data.indices[nnz_indexes]
num_to_pad = num_features - sdata
indices = np.concatenate((indices, np.zeros(num_to_pad, dtype=indices.dtype)))
indices = np.concatenate((indices, np.zeros(
num_to_pad, dtype=indices.dtype)))
indices_set = set(indices)
pad_counter = 0
for i in range(data.shape[1]):
......@@ -134,7 +134,8 @@ class LimeBase(object):
if pad_counter >= num_to_pad:
break
else:
nnz_indexes = argsort_data[sdata - num_features:sdata][::-1]
nnz_indexes = argsort_data[sdata - num_features:sdata][::
-1]
indices = weighted_data.indices[nnz_indexes]
return indices
else:
......@@ -145,13 +146,13 @@ class LimeBase(object):
reverse=True)
return np.array([x[0] for x in feature_weights[:num_features]])
elif method == 'lasso_path':
weighted_data = ((data - np.average(data, axis=0, weights=weights))
* np.sqrt(weights[:, np.newaxis]))
weighted_labels = ((labels - np.average(labels, weights=weights))
* np.sqrt(weights))
weighted_data = ((data - np.average(
data, axis=0, weights=weights)) *
np.sqrt(weights[:, np.newaxis]))
weighted_labels = ((labels - np.average(
labels, weights=weights)) * np.sqrt(weights))
nonzero = range(weighted_data.shape[1])
_, coefs = self.generate_lars_path(weighted_data,
weighted_labels)
_, coefs = self.generate_lars_path(weighted_data, weighted_labels)
for i in range(len(coefs.T) - 1, 0, -1):
nonzero = coefs.T[i].nonzero()[0]
if len(nonzero) <= num_features:
......@@ -163,8 +164,8 @@ class LimeBase(object):
n_method = 'forward_selection'
else:
n_method = 'highest_weights'
return self.feature_selection(data, labels, weights,
num_features, n_method)
return self.feature_selection(data, labels, weights, num_features,
n_method)
def interpret_instance_with_data(self,
neighborhood_data,
......@@ -213,30 +214,31 @@ class LimeBase(object):
weights = self.kernel_fn(distances)
labels_column = neighborhood_labels[:, label]
used_features = self.feature_selection(neighborhood_data,
labels_column,
weights,
num_features,
feature_selection)
labels_column, weights,
num_features, feature_selection)
if model_regressor is None:
model_regressor = Ridge(alpha=1, fit_intercept=True,
random_state=self.random_state)
model_regressor = Ridge(
alpha=1, fit_intercept=True, random_state=self.random_state)
easy_model = model_regressor
easy_model.fit(neighborhood_data[:, used_features],
labels_column, sample_weight=weights)
labels_column,
sample_weight=weights)
prediction_score = easy_model.score(
neighborhood_data[:, used_features],
labels_column, sample_weight=weights)
labels_column,
sample_weight=weights)
local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1))
local_pred = easy_model.predict(neighborhood_data[0, used_features]
.reshape(1, -1))
if self.verbose:
print('Intercept', easy_model.intercept_)
print('Prediction_local', local_pred,)
print('Right:', neighborhood_labels[0, label])
return (easy_model.intercept_,
sorted(zip(used_features, easy_model.coef_),
key=lambda x: np.abs(x[1]), reverse=True),
prediction_score, local_pred)
logging.info('Intercept' + str(easy_model.intercept_))
logging.info('Prediction_local' + str(local_pred))
logging.info('Right:' + str(neighborhood_labels[0, label]))
return (easy_model.intercept_, sorted(
zip(used_features, easy_model.coef_),
key=lambda x: np.abs(x[1]),
reverse=True), prediction_score, local_pred)
class ImageInterpretation(object):
......@@ -253,8 +255,13 @@ class ImageInterpretation(object):
self.local_weights = {}
self.local_pred = None
def get_image_and_mask(self, label, positive_only=True, negative_only=False, hide_rest=False,
num_features=5, min_weight=0.):
def get_image_and_mask(self,
label,
positive_only=True,
negative_only=False,
hide_rest=False,
num_features=5,
min_weight=0.):
"""Init function.
Args:
......@@ -278,7 +285,9 @@ class ImageInterpretation(object):
if label not in self.local_weights:
raise KeyError('Label not in interpretation')
if positive_only & negative_only:
raise ValueError("Positive_only and negative_only cannot be true at the same time.")
raise ValueError(
"Positive_only and negative_only cannot be true at the same time."
)
segments = self.segments
image = self.image
local_weights_label = self.local_weights[label]
......@@ -288,14 +297,20 @@ class ImageInterpretation(object):
else:
temp = self.image.copy()
if positive_only:
fs = [x[0] for x in local_weights_label
if x[1] > 0 and x[1] > min_weight][:num_features]
fs = [
x[0] for x in local_weights_label
if x[1] > 0 and x[1] > min_weight
][:num_features]
if negative_only:
fs = [x[0] for x in local_weights_label
if x[1] < 0 and abs(x[1]) > min_weight][:num_features]
fs = [
x[0] for x in local_weights_label
if x[1] < 0 and abs(x[1]) > min_weight
][:num_features]
if positive_only or negative_only:
c = 1 if positive_only else 0
for f in fs:
temp[segments == f] = image[segments == f].copy()
temp[segments == f] = [0, 255, 0]
# temp[segments == f, c] = np.max(image)
mask[segments == f] = 1
return temp, mask
else:
......@@ -329,8 +344,11 @@ class ImageInterpretation(object):
temp = np.zeros_like(image)
weight_max = abs(local_weights_label[0][1])
local_weights_label = [(f, w/weight_max) for f, w in local_weights_label]
local_weights_label = sorted(local_weights_label, key=lambda x: x[1], reverse=True) # negatives are at last.
local_weights_label = [(f, w / weight_max)
for f, w in local_weights_label]
local_weights_label = sorted(
local_weights_label, key=lambda x: x[1],
reverse=True) # negatives are at last.
cmaps = cm.get_cmap('Spectral')
colors = cmaps(np.linspace(0, 1, len(local_weights_label)))
......@@ -353,8 +371,12 @@ class LimeImageInterpreter(object):
feature that is 1 when the value is the same as the instance being
interpreted."""
def __init__(self, kernel_width=.25, kernel=None, verbose=False,
feature_selection='auto', random_state=None):
def __init__(self,
kernel_width=.25,
kernel=None,
verbose=False,
feature_selection='auto',
random_state=None):
"""Init function.
Args:
......@@ -376,22 +398,27 @@ class LimeImageInterpreter(object):
kernel_width = float(kernel_width)
if kernel is None:
def kernel(d, kernel_width):
return np.sqrt(np.exp(-(d ** 2) / kernel_width ** 2))
return np.sqrt(np.exp(-(d**2) / kernel_width**2))
kernel_fn = partial(kernel, kernel_width=kernel_width)
self.random_state = check_random_state(random_state)
self.feature_selection = feature_selection
self.base = LimeBase(kernel_fn, verbose, random_state=self.random_state)
self.base = LimeBase(
kernel_fn, verbose, random_state=self.random_state)
def interpret_instance(self, image, classifier_fn, labels=(1,),
def interpret_instance(self,
image,
classifier_fn,
labels=(1, ),
hide_color=None,
num_features=100000, num_samples=1000,
num_features=100000,
num_samples=1000,
batch_size=10,
distance_metric='cosine',
model_regressor=None
):
model_regressor=None):
"""Generates interpretations for a prediction.
First, we generate neighborhood data by randomly perturbing features
......@@ -434,6 +461,7 @@ class LimeImageInterpreter(object):
self.segments = segments
fudged_image = image.copy()
# global_mean = np.mean(image, (0, 1))
if hide_color is None:
# if no hide_color, use the mean
for x in np.unique(segments):
......@@ -451,7 +479,6 @@ class LimeImageInterpreter(object):
d = cdist(centroids, centroids, 'sqeuclidean')
for x in np.unique(segments):
# print(np.argmin(d[x]))
a = [image[segments == i] for i in np.argsort(d[x])[1:6]]
mx = np.mean(np.concatenate(a), axis=0)
fudged_image[segments == x] = mx
......@@ -461,22 +488,28 @@ class LimeImageInterpreter(object):
top = labels
data, labels = self.data_labels(image, fudged_image, segments,
classifier_fn, num_samples,
data, labels = self.data_labels(
image,
fudged_image,
segments,
classifier_fn,
num_samples,
batch_size=batch_size)
distances = sklearn.metrics.pairwise_distances(
data,
data[0].reshape(1, -1),
metric=distance_metric
).ravel()
data, data[0].reshape(1, -1), metric=distance_metric).ravel()
interpretation_image = ImageInterpretation(image, segments)
for label in top:
(interpretation_image.intercept[label],
interpretation_image.local_weights[label],
interpretation_image.score, interpretation_image.local_pred) = self.base.interpret_instance_with_data(
data, labels, distances, label, num_features,
interpretation_image.score, interpretation_image.local_pred
) = self.base.interpret_instance_with_data(
data,
labels,
distances,
label,
num_features,
model_regressor=model_regressor,
feature_selection=self.feature_selection)
return interpretation_image
......@@ -511,6 +544,9 @@ class LimeImageInterpreter(object):
labels = []
data[0, :] = 1
imgs = []
logging.info("Computing LIME.", use_color=True)
for row in tqdm.tqdm(data):
temp = copy.deepcopy(image)
zeros = np.where(row == 0)[0]
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
import numpy as np
import glob
import tqdm
from paddlex.interpret.as_data_reader.readers import read_image
import paddlex.utils.logging as logging
from . import lime_base
from ._session_preparation import compute_features_for_kmeans, gen_user_home
import paddlex.utils.logging as logging
def load_kmeans_model(fname):
......@@ -37,18 +39,24 @@ def combine_normlime_and_lime(lime_weights, g_weights):
for y in pred_labels:
normlized_lime_weights_y = lime_weights[y]
lime_weights_dict = {tuple_w[0]: tuple_w[1] for tuple_w in normlized_lime_weights_y}
lime_weights_dict = {
tuple_w[0]: tuple_w[1]
for tuple_w in normlized_lime_weights_y
}
normlized_g_weight_y = g_weights[y]
normlime_weights_dict = {tuple_w[0]: tuple_w[1] for tuple_w in normlized_g_weight_y}
normlime_weights_dict = {
tuple_w[0]: tuple_w[1]
for tuple_w in normlized_g_weight_y
}
combined_weights[y] = [
(seg_k, lime_weights_dict[seg_k] * normlime_weights_dict[seg_k])
for seg_k in lime_weights_dict.keys()
]
combined_weights[y] = sorted(combined_weights[y],
key=lambda x: np.abs(x[1]), reverse=True)
combined_weights[y] = sorted(
combined_weights[y], key=lambda x: np.abs(x[1]), reverse=True)
return combined_weights
......@@ -66,8 +74,8 @@ def centroid_using_superpixels(features, segments):
regions = regionprops(segments + 1)
one_list = np.zeros((len(np.unique(segments)), features.shape[2]))
for i, r in enumerate(regions):
one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] + 0.5), :]
# print(one_list.shape)
one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] +
0.5), :]
return one_list
......@@ -80,30 +88,39 @@ def get_feature_for_kmeans(feature_map, segments):
return x
def precompute_normlime_weights(list_data_, predict_fn, num_samples=3000, batch_size=50, save_dir='./tmp'):
def precompute_normlime_weights(list_data_,
predict_fn,
num_samples=3000,
batch_size=50,
save_dir='./tmp'):
# save lime weights and kmeans cluster labels
precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir)
precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size,
save_dir)
# load precomputed results, compute normlime weights and save.
fname_list = glob.glob(os.path.join(save_dir, f'lime_weights_s{num_samples}*.npy'))
fname_list = glob.glob(
os.path.join(save_dir, 'lime_weights_s{}*.npy'.format(num_samples)))
return compute_normlime_weights(fname_list, save_dir, num_samples)
def save_one_lime_predict_and_kmean_labels(lime_all_weights, image_pred_labels, cluster_labels, save_path):
def save_one_lime_predict_and_kmean_labels(lime_all_weights, image_pred_labels,
cluster_labels, save_path):
lime_weights = {}
for label in image_pred_labels:
lime_weights[label] = lime_all_weights[label]
for_normlime_weights = {
'lime_weights': lime_weights, # a dict: class_label: (seg_label, weight)
'lime_weights':
lime_weights, # a dict: class_label: (seg_label, weight)
'cluster': cluster_labels # a list with segments as indices.
}
np.save(save_path, for_normlime_weights)
def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir):
def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size,
save_dir):
root_path = gen_user_home()
root_path = osp.join(root_path, '.paddlex')
h_pre_models = osp.join(root_path, "pre_models")
......@@ -117,17 +134,24 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav
for data_index, each_data_ in enumerate(list_data_):
if isinstance(each_data_, str):
save_path = f"lime_weights_s{num_samples}_{each_data_.split('/')[-1].split('.')[0]}.npy"
save_path = "lime_weights_s{}_{}.npy".format(
num_samples, each_data_.split('/')[-1].split('.')[0])
save_path = os.path.join(save_dir, save_path)
else:
save_path = f"lime_weights_s{num_samples}_{data_index}.npy"
save_path = "lime_weights_s{}_{}.npy".format(num_samples,
data_index)
save_path = os.path.join(save_dir, save_path)
if os.path.exists(save_path):
logging.info(save_path + ' exists, not computing this one.', use_color=True)
logging.info(
save_path + ' exists, not computing this one.', use_color=True)
continue
img_file_name = each_data_ if isinstance(each_data_, str) else data_index
logging.info('processing '+ img_file_name + ' [{}/{}]'.format(data_index, len(list_data_)), use_color=True)
img_file_name = each_data_ if isinstance(each_data_,
str) else data_index
logging.info(
'processing ' + img_file_name + ' [{}/{}]'.format(data_index,
len(list_data_)),
use_color=True)
image_show = read_image(each_data_)
result = predict_fn(image_show)
......@@ -156,34 +180,40 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav
pred_label = pred_label[:top_k]
algo = lime_base.LimeImageInterpreter()
interpreter = algo.interpret_instance(image_show[0], predict_fn, pred_label, 0,
num_samples=num_samples, batch_size=batch_size)
X = get_feature_for_kmeans(compute_features_for_kmeans(image_show).transpose((1, 2, 0)), interpreter.segments)
interpreter = algo.interpret_instance(
image_show[0],
predict_fn,
pred_label,
0,
num_samples=num_samples,
batch_size=batch_size)
X = get_feature_for_kmeans(
compute_features_for_kmeans(image_show).transpose((1, 2, 0)),
interpreter.segments)
try:
cluster_labels = kmeans_model.predict(X)
except AttributeError:
from sklearn.metrics import pairwise_distances_argmin_min
cluster_labels, _ = pairwise_distances_argmin_min(X, kmeans_model.cluster_centers_)
cluster_labels, _ = pairwise_distances_argmin_min(
X, kmeans_model.cluster_centers_)
save_one_lime_predict_and_kmean_labels(
interpreter.local_weights, pred_label,
cluster_labels,
save_path
)
interpreter.local_weights, pred_label, cluster_labels, save_path)
def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples):
normlime_weights_all_labels = {}
for f in a_list_lime_fnames:
try:
lime_weights_and_cluster = np.load(f, allow_pickle=True).item()
lime_weights = lime_weights_and_cluster['lime_weights']
cluster = lime_weights_and_cluster['cluster']
except:
print('When loading precomputed LIME result, skipping', f)
logging.info('When loading precomputed LIME result, skipping' +
str(f))
continue
print('Loading precomputed LIME result,', f)
logging.info('Loading precomputed LIME result,' + str(f))
pred_labels = lime_weights.keys()
for y in pred_labels:
normlime_weights = normlime_weights_all_labels.get(y, {})
......@@ -203,32 +233,183 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples):
for y in normlime_weights_all_labels:
normlime_weights = normlime_weights_all_labels.get(y, {})
for k in normlime_weights:
normlime_weights[k] = sum(normlime_weights[k]) / len(normlime_weights[k])
normlime_weights[k] = sum(normlime_weights[k]) / len(
normlime_weights[k])
# check normlime
if len(normlime_weights_all_labels.keys()) < max(normlime_weights_all_labels.keys()) + 1:
print(
"\n"
"Warning: !!! \n"
f"There are at least {max(normlime_weights_all_labels.keys()) + 1} classes, "
f"but the NormLIME has results of only {len(normlime_weights_all_labels.keys())} classes. \n"
"It may have cause unstable results in the later computation"
" but can be improved by computing more test samples."
if len(normlime_weights_all_labels.keys()) < max(
normlime_weights_all_labels.keys()) + 1:
logging.info(
"\n" + \
"Warning: !!! \n" + \
"There are at least {} classes, ".format(max(normlime_weights_all_labels.keys()) + 1) + \
"but the NormLIME has results of only {} classes. \n".format(len(normlime_weights_all_labels.keys())) + \
"It may have cause unstable results in the later computation" + \
" but can be improved by computing more test samples." + \
"\n"
)
n = 0
f_out = f'normlime_weights_s{lime_num_samples}_samples_{len(a_list_lime_fnames)}-{n}.npy'
while os.path.exists(
os.path.join(save_dir, f_out)
):
f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(
lime_num_samples, len(a_list_lime_fnames), n)
while os.path.exists(os.path.join(save_dir, f_out)):
n += 1
f_out = f'normlime_weights_s{lime_num_samples}_samples_{len(a_list_lime_fnames)}-{n}.npy'
f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(
lime_num_samples, len(a_list_lime_fnames), n)
continue
np.save(
os.path.join(save_dir, f_out),
normlime_weights_all_labels
)
np.save(os.path.join(save_dir, f_out), normlime_weights_all_labels)
return os.path.join(save_dir, f_out)
def precompute_global_classifier(dataset,
predict_fn,
save_path,
batch_size=50,
max_num_samples=1000):
from sklearn.linear_model import LogisticRegression
root_path = gen_user_home()
root_path = osp.join(root_path, '.paddlex')
h_pre_models = osp.join(root_path, "pre_models")
if not osp.exists(h_pre_models):
if not osp.exists(root_path):
os.makedirs(root_path)
url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
pdx.utils.download_and_decompress(url, path=root_path)
h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl")
kmeans_model = load_kmeans_model(h_pre_models_kmeans)
image_list = []
for item in dataset.file_list:
image_list.append(item[0])
x_data = []
y_labels = []
num_features = len(kmeans_model.cluster_centers_)
logging.info(
"Initialization for NormLIME: Computing each sample in the test list.",
use_color=True)
for each_data_ in tqdm.tqdm(image_list):
x_data_i = np.zeros((num_features))
image_show = read_image(each_data_)
result = predict_fn(image_show)
result = result[0] # only one image here.
c = compute_features_for_kmeans(image_show).transpose((1, 2, 0))
segments = np.zeros((image_show.shape[1], image_show.shape[2]),
np.int32)
num_blocks = 10
height_per_i = segments.shape[0] // num_blocks + 1
width_per_i = segments.shape[1] // num_blocks + 1
for i in range(segments.shape[0]):
for j in range(segments.shape[1]):
segments[i,
j] = i // height_per_i * num_blocks + j // width_per_i
# segments = quickshift(image_show[0], sigma=1)
X = get_feature_for_kmeans(c, segments)
try:
cluster_labels = kmeans_model.predict(X)
except AttributeError:
from sklearn.metrics import pairwise_distances_argmin_min
cluster_labels, _ = pairwise_distances_argmin_min(
X, kmeans_model.cluster_centers_)
for c in cluster_labels:
x_data_i[c] = 1
# x_data_i /= len(cluster_labels)
pred_y_i = np.argmax(result)
y_labels.append(pred_y_i)
x_data.append(x_data_i)
if len(np.unique(y_labels)) < 2:
logging.info("Warning: The test samples in the dataset is limited.\n \
NormLIME may have no effect on the results.\n \
Try to add more test samples, or see the results of LIME.")
num_classes = np.max(np.unique(y_labels)) + 1
normlime_weights_all_labels = {}
for class_index in range(num_classes):
w = np.ones((num_features)) / num_features
normlime_weights_all_labels[class_index] = {
i: wi
for i, wi in enumerate(w)
}
logging.info("Saving the computed normlime_weights in {}".format(
save_path))
np.save(save_path, normlime_weights_all_labels)
return save_path
clf = LogisticRegression(multi_class='multinomial', max_iter=1000)
clf.fit(x_data, y_labels)
num_classes = np.max(np.unique(y_labels)) + 1
normlime_weights_all_labels = {}
if len(y_labels) / len(np.unique(y_labels)) < 3:
logging.info("Warning: The test samples in the dataset is limited.\n \
NormLIME may have no effect on the results.\n \
Try to add more test samples, or see the results of LIME.")
if len(np.unique(y_labels)) == 2:
# binary: clf.coef_ has shape of [1, num_features]
for class_index in range(num_classes):
if class_index not in clf.classes_:
w = np.ones((num_features)) / num_features
normlime_weights_all_labels[class_index] = {
i: wi
for i, wi in enumerate(w)
}
continue
if clf.classes_[0] == class_index:
w = -clf.coef_[0]
else:
w = clf.coef_[0]
# softmax
w = w - np.max(w)
exp_w = np.exp(w * 10)
w = exp_w / np.sum(exp_w)
normlime_weights_all_labels[class_index] = {
i: wi
for i, wi in enumerate(w)
}
else:
# clf.coef_ has shape of [len(np.unique(y_labels)), num_features]
for class_index in range(num_classes):
if class_index not in clf.classes_:
w = np.ones((num_features)) / num_features
normlime_weights_all_labels[class_index] = {
i: wi
for i, wi in enumerate(w)
}
continue
coef_class_index = np.where(clf.classes_ == class_index)[0][0]
w = clf.coef_[coef_class_index]
# softmax
w = w - np.max(w)
exp_w = np.exp(w * 10)
w = exp_w / np.sum(exp_w)
normlime_weights_all_labels[class_index] = {
i: wi
for i, wi in enumerate(w)
}
logging.info("Saving the computed normlime_weights in {}".format(
save_path))
np.save(save_path, normlime_weights_all_labels)
return save_path
......@@ -13,17 +13,26 @@
# limitations under the License.
import numpy as np
import cv2
import copy
def interpretation_predict(model, images):
model.arrange_transforms(
transforms=model.test_transforms, mode='test')
images = images.astype('float32')
model.arrange_transforms(transforms=model.test_transforms, mode='test')
tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
model.test_transforms.transforms = model.test_transforms.transforms[-2:]
new_imgs = []
for i in range(images.shape[0]):
img = images[i]
new_imgs.append(model.test_transforms(img)[0])
images[i] = cv2.cvtColor(images[i], cv2.COLOR_RGB2BGR)
new_imgs.append(model.test_transforms(images[i])[0])
new_imgs = np.array(new_imgs)
result = model.exe.run(
model.test_prog,
out = model.exe.run(model.test_prog,
feed={'image': new_imgs},
fetch_list=list(model.interpretation_feats.values()))
return result
\ No newline at end of file
model.test_transforms.transforms = tmp_transforms
return out
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import cv2
......@@ -20,14 +20,11 @@ import numpy as np
import paddlex as pdx
from .interpretation_predict import interpretation_predict
from .core.interpretation import Interpretation
from .core.normlime_base import precompute_normlime_weights
from .core.normlime_base import precompute_global_classifier
from .core._session_preparation import gen_user_home
def lime(img_file,
model,
num_samples=3000,
batch_size=50,
save_dir='./'):
def lime(img_file, model, num_samples=3000, batch_size=50, save_dir='./'):
"""使用LIME算法将模型预测结果的可解释性可视化。
LIME表示与模型无关的局部可解释性,可以解释任何模型。LIME的思想是以输入样本为中心,
......@@ -47,20 +44,21 @@ def lime(img_file,
assert model.model_type == 'classifier', \
'Now the interpretation visualize only be supported in classifier!'
if model.status != 'Normal':
raise Exception('The interpretation only can deal with the Normal model')
raise Exception(
'The interpretation only can deal with the Normal model')
if not osp.exists(save_dir):
os.makedirs(save_dir)
model.arrange_transforms(
transforms=model.test_transforms, mode='test')
model.arrange_transforms(transforms=model.test_transforms, mode='test')
tmp_transforms = copy.deepcopy(model.test_transforms)
tmp_transforms.transforms = tmp_transforms.transforms[:-2]
img = tmp_transforms(img_file)[0]
img = np.around(img).astype('uint8')
img = np.expand_dims(img, axis=0)
interpreter = None
interpreter = get_lime_interpreter(img, model, num_samples=num_samples, batch_size=batch_size)
interpreter = get_lime_interpreter(
img, model, num_samples=num_samples, batch_size=batch_size)
img_name = osp.splitext(osp.split(img_file)[-1])[0]
interpreter.interpret(img, save_dir=save_dir)
interpreter.interpret(img, save_dir=osp.join(save_dir, img_name))
def normlime(img_file,
......@@ -68,7 +66,8 @@ def normlime(img_file,
dataset=None,
num_samples=3000,
batch_size=50,
save_dir='./'):
save_dir='./',
normlime_weights_file=None):
"""使用NormLIME算法将模型预测结果的可解释性可视化。
NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会提前计算一定数量的测
......@@ -84,15 +83,16 @@ def normlime(img_file,
num_samples (int): LIME用于学习线性模型的采样数,默认为3000。
batch_size (int): 预测数据batch大小,默认为50。
save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。
normlime_weights_file (str): NormLIME初始化文件名,若不存在,则计算一次,保存于该路径;若存在,则直接载入。
"""
assert model.model_type == 'classifier', \
'Now the interpretation visualize only be supported in classifier!'
if model.status != 'Normal':
raise Exception('The interpretation only can deal with the Normal model')
raise Exception(
'The interpretation only can deal with the Normal model')
if not osp.exists(save_dir):
os.makedirs(save_dir)
model.arrange_transforms(
transforms=model.test_transforms, mode='test')
model.arrange_transforms(transforms=model.test_transforms, mode='test')
tmp_transforms = copy.deepcopy(model.test_transforms)
tmp_transforms.transforms = tmp_transforms.transforms[:-2]
img = tmp_transforms(img_file)[0]
......@@ -100,28 +100,30 @@ def normlime(img_file,
img = np.expand_dims(img, axis=0)
interpreter = None
if dataset is None:
raise Exception('The dataset is None. Cannot implement this kind of interpretation')
interpreter = get_normlime_interpreter(img, model, dataset,
num_samples=num_samples, batch_size=batch_size,
save_dir=save_dir)
raise Exception(
'The dataset is None. Cannot implement this kind of interpretation')
interpreter = get_normlime_interpreter(
img,
model,
dataset,
num_samples=num_samples,
batch_size=batch_size,
save_dir=save_dir,
normlime_weights_file=normlime_weights_file)
img_name = osp.splitext(osp.split(img_file)[-1])[0]
interpreter.interpret(img, save_dir=save_dir)
interpreter.interpret(img, save_dir=osp.join(save_dir, img_name))
def get_lime_interpreter(img, model, num_samples=3000, batch_size=50):
def predict_func(image):
image = image.astype('float32')
for i in range(image.shape[0]):
image[i] = cv2.cvtColor(image[i], cv2.COLOR_RGB2BGR)
tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
model.test_transforms.transforms = model.test_transforms.transforms[-2:]
out = interpretation_predict(model, image)
model.test_transforms.transforms = tmp_transforms
return out[0]
labels_name = None
if hasattr(model, 'labels'):
labels_name = model.labels
interpreter = Interpretation('lime',
interpreter = Interpretation(
'lime',
predict_func,
labels_name,
num_samples=num_samples,
......@@ -129,23 +131,17 @@ def get_lime_interpreter(img, model, num_samples=3000, batch_size=50):
return interpreter
def get_normlime_interpreter(img, model, dataset, num_samples=3000, batch_size=50, save_dir='./'):
def precompute_predict_func(image):
image = image.astype('float32')
tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
model.test_transforms.transforms = model.test_transforms.transforms[-2:]
out = interpretation_predict(model, image)
model.test_transforms.transforms = tmp_transforms
return out[0]
def get_normlime_interpreter(img,
model,
dataset,
num_samples=3000,
batch_size=50,
save_dir='./',
normlime_weights_file=None):
def predict_func(image):
image = image.astype('float32')
for i in range(image.shape[0]):
image[i] = cv2.cvtColor(image[i], cv2.COLOR_RGB2BGR)
tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
model.test_transforms.transforms = model.test_transforms.transforms[-2:]
out = interpretation_predict(model, image)
model.test_transforms.transforms = tmp_transforms
return out[0]
labels_name = None
if dataset is not None:
labels_name = dataset.labels
......@@ -157,28 +153,29 @@ def get_normlime_interpreter(img, model, dataset, num_samples=3000, batch_size=5
os.makedirs(root_path)
url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
pdx.utils.download_and_decompress(url, path=root_path)
npy_dir = precompute_for_normlime(precompute_predict_func,
if osp.exists(osp.join(save_dir, normlime_weights_file)):
normlime_weights_file = osp.join(save_dir, normlime_weights_file)
try:
np.load(normlime_weights_file, allow_pickle=True).item()
except:
normlime_weights_file = precompute_global_classifier(
dataset,
num_samples=num_samples,
batch_size=batch_size,
save_dir=save_dir)
interpreter = Interpretation('normlime',
predict_func,
labels_name,
num_samples=num_samples,
batch_size=batch_size,
normlime_weights=npy_dir)
return interpreter
save_path=normlime_weights_file,
batch_size=batch_size)
else:
normlime_weights_file = precompute_global_classifier(
dataset,
predict_func,
save_path=osp.join(save_dir, normlime_weights_file),
batch_size=batch_size)
def precompute_for_normlime(predict_func, dataset, num_samples=3000, batch_size=50, save_dir='./'):
image_list = []
for item in dataset.file_list:
image_list.append(item[0])
return precompute_normlime_weights(
image_list,
interpreter = Interpretation(
'normlime',
predict_func,
labels_name,
num_samples=num_samples,
batch_size=batch_size,
save_dir=save_dir)
normlime_weights=normlime_weights_file)
return interpreter
......@@ -17,5 +17,7 @@ from . import cv
UNet = cv.models.UNet
DeepLabv3p = cv.models.DeepLabv3p
HRNet = cv.models.HRNet
FastSCNN = cv.models.FastSCNN
transforms = cv.transforms.seg_transforms
visualize = cv.models.utils.visualize.visualize_segmentation
......@@ -31,4 +31,4 @@ def export_quant_model(model,
batch_size=batch_size,
batch_num=batch_num,
save_dir=save_dir,
cache_dir='./temp')
cache_dir=cache_dir)
......@@ -110,7 +110,7 @@ class LabelMe2COCO(X2COCO):
annotation["segmentation"] = [list(np.asarray(points).flatten())]
annotation["iscrowd"] = 0
annotation["image_id"] = image_id + 1
annotation["bbox"] = list(map(float, get_bbox(height, width, points)))
annotation["bbox"] = list(map(float, self.get_bbox(height, width, points)))
annotation["area"] = annotation["bbox"][2] * annotation["bbox"][3]
annotation["category_id"] = label_to_num[label]
annotation["id"] = object_id + 1
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from . import logging
......
......@@ -29,13 +29,11 @@ def log(level=2, message="", use_color=False):
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
if paddlex.log_level >= level:
if use_color:
print("\033[1;31;40m{} [{}]\t{}\033[0m".format(
current_time, levels[level],
message).encode("utf-8").decode("latin1"))
print("\033[1;31;40m{} [{}]\t{}\033[0m".format(current_time, levels[
level], message).encode("utf-8").decode("latin1"))
else:
print(
"{} [{}]\t{}".format(current_time, levels[level],
message).encode("utf-8").decode("latin1"))
print("{} [{}]\t{}".format(current_time, levels[level], message)
.encode("utf-8").decode("latin1"))
sys.stdout.flush()
......@@ -47,9 +45,11 @@ def info(message="", use_color=False):
log(level=2, message=message, use_color=use_color)
def warning(message="", use_color=False):
def warning(message="", use_color=True):
log(level=1, message=message, use_color=use_color)
def error(message="", use_color=False):
def error(message="", use_color=True, exit=True):
log(level=0, message=message, use_color=use_color)
if exit:
sys.exit(-1)
......@@ -67,8 +67,8 @@ def parse_param_file(param_file, return_shape=True):
f.close()
return tuple(tensor_desc.dims)
if tensor_desc.data_type != 5:
raise Exception(
"Unexpected data type while parse {}".format(param_file))
raise Exception("Unexpected data type while parse {}".format(
param_file))
data_size = 4
for i in range(len(tensor_shape)):
data_size *= tensor_shape[i]
......@@ -139,7 +139,12 @@ def load_pdparams(exe, main_prog, model_dir):
vars_to_load = list()
import pickle
with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f:
if osp.isfile(model_dir):
params_file = model_dir
else:
params_file = osp.join(model_dir, 'model.pdparams')
with open(params_file, 'rb') as f:
params_dict = pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
unused_vars = list()
......@@ -185,8 +190,8 @@ def is_belong_to_optimizer(var):
import paddle.fluid as fluid
from paddle.fluid.proto.framework_pb2 import VarType
if not (isinstance(var, fluid.framework.Parameter)
or var.desc.need_check_feed()):
if not (isinstance(var, fluid.framework.Parameter) or
var.desc.need_check_feed()):
return is_persistable(var)
return False
......@@ -206,9 +211,8 @@ def load_pdopt(exe, main_prog, model_dir):
if len(optimizer_var_list) > 0:
for var in optimizer_var_list:
if var.name not in opt_dict:
raise Exception(
"{} is not in saved paddlex optimizer, {}".format(
var.name, exception_message))
raise Exception("{} is not in saved paddlex optimizer, {}".
format(var.name, exception_message))
if var.shape != opt_dict[var.name].shape:
raise Exception(
"Shape of optimizer variable {} doesn't match.(Last: {}, Now: {}), {}"
......@@ -227,9 +231,8 @@ def load_pdopt(exe, main_prog, model_dir):
"There is no optimizer parameters in the model, please set the optimizer!"
)
else:
logging.info(
"There are {} optimizer parameters in {} are loaded.".format(
len(optimizer_var_list), model_dir))
logging.info("There are {} optimizer parameters in {} are loaded.".
format(len(optimizer_var_list), model_dir))
def load_pretrain_weights(exe,
......@@ -239,6 +242,12 @@ def load_pretrain_weights(exe,
resume=False):
if not osp.exists(weights_dir):
raise Exception("Path {} not exists.".format(weights_dir))
if osp.isfile(weights_dir):
if not weights_dir.endswith('.pdparams'):
raise Exception("File {} is not a paddle parameter file".format(
weights_dir))
load_pdparams(exe, main_prog, weights_dir)
return
if osp.exists(osp.join(weights_dir, "model.pdparams")):
load_pdparams(exe, main_prog, weights_dir)
if resume:
......@@ -255,9 +264,8 @@ def load_pretrain_weights(exe,
if not isinstance(var, fluid.framework.Parameter):
continue
if not osp.exists(osp.join(weights_dir, var.name)):
logging.debug(
"[SKIP] Pretrained weight {}/{} doesn't exist".format(
weights_dir, var.name))
logging.debug("[SKIP] Pretrained weight {}/{} doesn't exist".
format(weights_dir, var.name))
continue
pretrained_shape = parse_param_file(osp.join(weights_dir, var.name))
actual_shape = tuple(var.shape)
......@@ -269,11 +277,9 @@ def load_pretrain_weights(exe,
vars_to_load.append(var)
logging.debug("Weight {} will be load".format(var.name))
fluid.io.load_vars(
executor=exe,
dirname=weights_dir,
main_program=main_prog,
vars=vars_to_load)
params_dict = fluid.io.load_program_state(
weights_dir, var_list=vars_to_load)
fluid.io.set_program_state(main_prog, params_dict)
if len(vars_to_load) == 0:
logging.warning(
"There is no pretrain weights loaded, maybe you should check you pretrain model!"
......@@ -319,9 +325,8 @@ def load_pretrain_weights(exe,
"There is no optimizer parameters in the model, please set the optimizer!"
)
else:
logging.info(
"There are {} optimizer parameters in {} are loaded.".format(
len(optimizer_var_list), weights_dir))
logging.info("There are {} optimizer parameters in {} are loaded.".
format(len(optimizer_var_list), weights_dir))
class EarlyStop:
......@@ -344,12 +349,12 @@ class EarlyStop:
self.max = current_score
return False
else:
if (abs(self.score - current_score) < self.thresh
or current_score < self.score):
if (abs(self.score - current_score) < self.thresh or
current_score < self.score):
self.counter += 1
self.score = current_score
logging.debug(
"EarlyStopping: %i / %i" % (self.counter, self.patience))
logging.debug("EarlyStopping: %i / %i" %
(self.counter, self.patience))
if self.counter >= self.patience:
logging.info("EarlyStopping: Stop training")
return True
......
......@@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit
setuptools.setup(
name="paddlex",
version='1.0.2',
version='1.0.6',
author="paddlex",
author_email="paddlex@baidu.com",
description=long_description,
......
#!/bin/bash
set -e
readonly VERSION="3.8"
version=$(clang-format -version)
if ! [[ $version == *"$VERSION"* ]]; then
echo "clang-format version check failed."
echo "a version contains '$VERSION' is needed, but get '$version'"
echo "you can install the right version, and make an soft-link to '\$PATH' env"
exit -1
fi
clang-format $@
#!/bin/bash
TOTAL_ERRORS=0
if [[ ! $TRAVIS_BRANCH ]]; then
# install cpplint on local machine.
if [[ ! $(which cpplint) ]]; then
pip install cpplint
fi
# diff files on local machine.
files=$(git diff --cached --name-status | awk '$1 != "D" {print $2}')
else
# diff files between PR and latest commit on Travis CI.
branch_ref=$(git rev-parse "$TRAVIS_BRANCH")
head_ref=$(git rev-parse HEAD)
files=$(git diff --name-status $branch_ref $head_ref | awk '$1 != "D" {print $2}')
fi
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $files; do
if [[ $file =~ ^(patches/.*) ]]; then
continue;
else
cpplint --filter=-readability/fn_size $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
fi
done
exit $TOTAL_ERRORS
......@@ -14,18 +14,22 @@ model_file = 'https://bj.bcebos.com/paddlex/interpret/mini_imagenet_veg_mobilene
pdx.utils.download_and_decompress(model_file, path='./')
# 加载模型
model = pdx.load_model('mini_imagenet_veg_mobilenetv2')
model_file = 'mini_imagenet_veg_mobilenetv2'
model = pdx.load_model(model_file)
# 定义测试所用的数据集
dataset = 'mini_imagenet_veg'
test_dataset = pdx.datasets.ImageNet(
data_dir='mini_imagenet_veg',
file_list=osp.join('mini_imagenet_veg', 'test_list.txt'),
label_list=osp.join('mini_imagenet_veg', 'labels.txt'),
data_dir=dataset,
file_list=osp.join(dataset, 'test_list.txt'),
label_list=osp.join(dataset, 'labels.txt'),
transforms=model.test_transforms)
# 可解释性可视化
pdx.interpret.normlime(
'mini_imagenet_veg/mushroom/n07734744_1106.JPEG',
test_dataset.file_list[0][0],
model,
test_dataset,
save_dir='./')
save_dir='./',
normlime_weights_file='{}_{}.npy'.format(
dataset.split('/')[-1], model.model_name))
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
train_transforms = transforms.Compose([
transforms.RandomHorizontalFlip(), transforms.ResizeRangeScaling(),
transforms.RandomPaddingCrop(crop_size=512), transforms.Normalize()
])
eval_transforms = transforms.Compose([
transforms.ResizeByLong(long_size=512),
transforms.Padding(target_size=512), transforms.Normalize()
])
# 定义训练和验证所用的数据集
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
num_classes = len(train_dataset.labels)
model = pdx.seg.HRNet(num_classes=num_classes)
model.train(
num_epochs=20,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/hrnet',
use_vdl=True)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册