提交 6925722e 编写于 作者: F FlyingQianMM

Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleX into develop_kong

...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude: (?!.*third_party)^.*$ exclude: (?!.*third_party)^.*$
- repo: local
hooks: hooks:
- id: clang-format-with-version-check - id: clang-format-with-version-check
name: clang-format name: clang-format
...@@ -31,6 +32,7 @@ ...@@ -31,6 +32,7 @@
language: system language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
- repo: local
hooks: hooks:
- id: cpplint-cpp-source - id: cpplint-cpp-source
name: cpplint name: cpplint
......
...@@ -83,7 +83,7 @@ class ResizeByShort : public Transform { ...@@ -83,7 +83,7 @@ class ResizeByShort : public Transform {
} else { } else {
max_size_ = -1; max_size_ = -1;
} }
}; }
virtual bool Run(cv::Mat* im, ImageBlob* data); virtual bool Run(cv::Mat* im, ImageBlob* data);
private: private:
...@@ -96,7 +96,7 @@ class ResizeByLong : public Transform { ...@@ -96,7 +96,7 @@ class ResizeByLong : public Transform {
public: public:
virtual void Init(const YAML::Node& item) { virtual void Init(const YAML::Node& item) {
long_size_ = item["long_size"].as<int>(); long_size_ = item["long_size"].as<int>();
}; }
virtual bool Run(cv::Mat* im, ImageBlob* data); virtual bool Run(cv::Mat* im, ImageBlob* data);
private: private:
...@@ -167,9 +167,6 @@ class Padding : public Transform { ...@@ -167,9 +167,6 @@ class Padding : public Transform {
height_ = item["target_size"].as<std::vector<int>>()[1]; height_ = item["target_size"].as<std::vector<int>>()[1];
} }
} }
if (item["im_padding_value"].IsDefined()) {
value_ = item["im_padding_value"].as<std::vector<float>>();
}
} }
virtual bool Run(cv::Mat* im, ImageBlob* data); virtual bool Run(cv::Mat* im, ImageBlob* data);
...@@ -177,7 +174,6 @@ class Padding : public Transform { ...@@ -177,7 +174,6 @@ class Padding : public Transform {
int coarsest_stride_ = -1; int coarsest_stride_ = -1;
int width_ = 0; int width_ = 0;
int height_ = 0; int height_ = 0;
std::vector<float> value_;
}; };
class Transforms { class Transforms {
......
...@@ -98,7 +98,7 @@ bool Model::load_config(const std::string& model_dir) { ...@@ -98,7 +98,7 @@ bool Model::load_config(const std::string& model_dir) {
bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) { bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) {
cv::Mat im = input_im.clone(); cv::Mat im = input_im.clone();
if (!transforms_.Run(&im, &inputs_)) { if (!transforms_.Run(&im, blob)) {
return false; return false;
} }
return true; return true;
......
...@@ -60,3 +60,9 @@ ...@@ -60,3 +60,9 @@
## 11. 每次训练新的模型,都需要重新下载预训练模型,怎样可以下载一次就搞定 ## 11. 每次训练新的模型,都需要重新下载预训练模型,怎样可以下载一次就搞定
> 1.可以按照9的方式来解决这个问题 > 1.可以按照9的方式来解决这个问题
> 2.每次训练前都设定`paddlex.pretrain_dir`路径,如设定`paddlex.pretrain_dir='/usrname/paddlex`,如此下载完的预训练模型会存放至`/usrname/paddlex`目录下,而已经下载在该目录的模型也不会再次重复下载 > 2.每次训练前都设定`paddlex.pretrain_dir`路径,如设定`paddlex.pretrain_dir='/usrname/paddlex`,如此下载完的预训练模型会存放至`/usrname/paddlex`目录下,而已经下载在该目录的模型也不会再次重复下载
## 12. 程序启动时提示"Failed to execute script PaddleX",如何解决?
> 1. 请检查目标机器上PaddleX程序所在路径是否包含中文。目前暂不支持中文路径,请尝试将程序移动到英文目录。
> 2. 如果您的系统是Windows 7或者Windows Server 2012时,原因是缺少MFPlat.DLL/MF.dll/MFReadWrite.dll等OpenCV依赖的DLL,请按如下方式安装桌面体验:通过“我的电脑”-->“属性”-->"管理"打开服务器管理器,点击右上角“管理”选择“添加角色和功能”。点击“服务器选择”-->“功能”,拖动滚动条到最下端,点开“用户界面和基础结构”,勾选“桌面体验”后点击“安装”,等安装完成尝试再次运行PaddleX。
> 3. 请检查目标机器上是否有其他的PaddleX程序或者进程在运行中,如有请退出或者重启机器看是否解决
> 4. 请确认运行程序的用户是否有管理员权限,如非管理员权限用户请尝试使用管理员运行看是否成功
\ No newline at end of file
...@@ -15,7 +15,7 @@ paddlex.cls.ResNet50(num_classes=1000) ...@@ -15,7 +15,7 @@ paddlex.cls.ResNet50(num_classes=1000)
### train 训练接口 ### train 训练接口
```python ```python
train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None) train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, warmup_steps=0, warmup_start_lr=0.0, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None)
``` ```
> >
> **参数** > **参数**
...@@ -30,6 +30,8 @@ train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, s ...@@ -30,6 +30,8 @@ train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, s
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 > > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.025。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.025。
> > - **warmup_steps** (int): 默认优化器的warmup步数,学习率将在设定的步数内,从warmup_start_lr线性增长至设定的learning_rate,默认为0。
> > - **warmup_start_lr**(float): 默认优化器的warmup起始学习率,默认为0.0。
> > - **lr_decay_epochs** (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。 > > - **lr_decay_epochs** (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。
> > - **lr_decay_gamma** (float): 默认优化器的学习率衰减率。默认为0.1。 > > - **lr_decay_gamma** (float): 默认优化器的学习率衰减率。默认为0.1。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。
......
...@@ -122,3 +122,64 @@ paddlex.cls.transforms.RandomDistort(brightness_range=0.9, brightness_prob=0.5, ...@@ -122,3 +122,64 @@ paddlex.cls.transforms.RandomDistort(brightness_range=0.9, brightness_prob=0.5,
* **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。 * **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。
* **hue_range** (int): 色调因子的范围。默认为18。 * **hue_range** (int): 色调因子的范围。默认为18。
* **hue_prob** (float): 随机调整色调的概率。默认为0.5。 * **hue_prob** (float): 随机调整色调的概率。默认为0.5。
## ComposedClsTransforms类
```python
paddlex.cls.transforms.ComposedClsTransforms(mode, crop_size=[224, 224], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
分类模型中已经组合好的数据处理流程,开发者可以直接使用ComposedClsTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomCrop](#RandomCrop)[RandomHorizontalFlip](#RandomHorizontalFlip)两种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedClsTransforms共包括以下几个步骤:
> 训练阶段:
> > 1. 随机从图像中crop一块子图,并resize成crop_size大小
> > 2. 将1的输出按0.5的概率随机进行水平翻转
> > 3. 将图像进行归一化
> 验证/预测阶段:
> > 1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14
> > 2. 从图像中心crop出一个大小为crop_size的图像
> > 3. 将图像进行归一化
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **crop_size** (int|list): 输入到模型里的图像大小,默认为[224, 224](与原图大小无关,根据上述几个步骤,会将原图处理成该图大小输入给模型训练)
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedClsTransforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.cls import transforms
train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[320, 320])
eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[320, 320])
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.cls import transforms
train_transforms = transforms.Composed([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面两个为通过add_augmenters额外添加的数据增强方式
transforms.RandomCrop(crop_size=320),
transforms.RandomHorizontalFlip(prob=0.5),
transforms.Normalize()
])
eval_transforms = transforms.Composed([
transforms.ResizeByShort(short_size=int(320*1.14)),
transforms.CenterCrop(crop_size=320),
transforms.Normalize()
])
```
...@@ -167,3 +167,133 @@ paddlex.det.transforms.RandomCrop(aspect_ratio=[.5, 2.], thresholds=[.0, .1, .3, ...@@ -167,3 +167,133 @@ paddlex.det.transforms.RandomCrop(aspect_ratio=[.5, 2.], thresholds=[.0, .1, .3,
* **num_attempts** (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。 * **num_attempts** (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。
* **allow_no_crop** (bool): 是否允许未进行裁剪。默认值为True。 * **allow_no_crop** (bool): 是否允许未进行裁剪。默认值为True。
* **cover_all_box** (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。 * **cover_all_box** (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。
## ComposedRCNNTransforms类
```python
paddlex.det.transforms.ComposedRCNNTransforms(mode, min_max_size=[224, 224], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
目标检测FasterRCNN和实例分割MaskRCNN模型中已经组合好的数据处理流程,开发者可以直接使用ComposedRCNNTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomHorizontalFlip](#RandomHorizontalFlip)数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedRCNNTransforms共包括以下几个步骤:
> 训练阶段:
> > 1. 随机以0.5的概率将图像水平翻转
> > 2. 将图像进行归一化
> > 3. 图像采用[ResizeByShort](#ResizeByShort)方式,根据min_max_size参数,进行缩入
> > 4. 使用[Padding](#Padding)将图像的长和宽分别Padding成32的倍数
> 验证/预测阶段:
> > 1. 将图像进行归一化
> > 2. 图像采用[ResizeByShort](#ResizeByShort)方式,根据min_max_size参数,进行缩入
> > 3. 使用[Padding](#Padding)将图像的长和宽分别Padding成32的倍数
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **min_max_size** (list): 输入模型中图像的最短边长度和最长边长度,参考[ResizeByShort](#ResizeByShort)(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练),默认[800, 1333]
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedRCNNTransforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.Composed([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面两个为通过add_augmenters额外添加的数据增强方式
transforms.RandomHorizontalFlip(prob=0.5),
transforms.Normalize(),
transforms.ResizeByShort(short_size=800, max_size=1333),
transforms.Padding(coarsest_stride=32)
])
eval_transforms = transforms.Composed([
transforms.Normalize(),
transforms.ResizeByShort(short_size=800, max_size=1333),
transforms.Padding(coarsest_stride=32)
])
```
## ComposedYOLOv3Transforms类
```python
paddlex.det.transforms.ComposedYOLOv3Transforms(mode, shape=[608, 608], mixup_epoch=250, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
目标检测YOLOv3模型中已经组合好的数据处理流程,开发者可以直接使用ComposedYOLOv3Transforms,简化手动组合transforms的过程, 该类中已经包含了[MixupImage](#MixupImage)、[RandomDistort](#RandomDistort)、[RandomExpand](#RandomExpand)、[RandomCrop](#RandomCrop)、[RandomHorizontalFlip](#RandomHorizontalFlip)5种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedYOLOv3Transforms共包括以下几个步骤:
> 训练阶段:
> > 1. 在前mixup_epoch轮迭代中,使用MixupImage策略
> > 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调
> > 3. 随机扩充图像
> > 4. 随机裁剪图像
> > 5. 将4步骤的输出图像Resize成shape参数的大小
> > 6. 随机0.5的概率水平翻转图像
> > 7. 图像归一化
> 验证/预测阶段:
> > 1. 将图像Resize成shape参数大小
> > 2. 图像归一化
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **shape** (list): 输入模型中图像的大小(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练), 默认[608, 608]
* **mixup_epoch**(int): 模型训练过程中,在前mixup_epoch轮迭代中,使用mixup策略,如果为-1,则不使用mixup策略, 默认250。
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedYOLOv3Transforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[480, 480])
eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eval', shape=[480, 480])
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.Composed([
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面为通过add_augmenters额外添加的数据增强方式
transforms.MixupImage(mixup_epoch=250),
transforms.RandomDistort(),
transforms.RandomExpand(),
transforms.RandomCrop(),
transforms.Resize(target_size=480, interp='RANDOM'),
transforms.RandomHorizontalFlip(prob=0.5),
transforms.Normalize()
])
eval_transforms = transforms.Composed([
transforms.Resize(target_size=480, interp='CUBIC'),
transforms.Normalize()
])
```
...@@ -166,3 +166,63 @@ paddlex.seg.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5, ...@@ -166,3 +166,63 @@ paddlex.seg.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5,
* **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。 * **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。
* **hue_range** (int): 色调因子的范围。默认为18。 * **hue_range** (int): 色调因子的范围。默认为18。
* **hue_prob** (float): 随机调整色调的概率。默认为0.5。 * **hue_prob** (float): 随机调整色调的概率。默认为0.5。
## ComposedSegTransforms类
```python
paddlex.det.transforms.ComposedSegTransforms(mode, train_crop_shape=[769, 769], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
语义分割DeepLab和UNet模型中已经组合好的数据处理流程,开发者可以直接使用ComposedSegTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomHorizontalFlip](#RandomHorizontalFlip)[ResizeStepScaling](#ResizeStepScaling)[RandomPaddingCrop](#RandomPaddingCrop)3种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedSegTransforms共包括以下几个步骤:
> 训练阶段:
> > 1. 随机对图像以0.5的概率水平翻转
> > 2. 按不同的比例随机Resize原图
> > 3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
> > 4. 图像归一化
> 预测阶段:
> > 1. 图像归一化
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **train_crop_size** (list): 训练过程中随机Crop和Resize后(验证或预测过程中不需配置该参数,自动使用原图大小),输入到模型中图像的大小(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练), 默认[769, 769]
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedSegTransforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.seg import transforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[512, 512])
eval_transforms = transforms.ComposedYOLOTransforms(mode='eval')
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.Composed([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面2行为通过add_augmenters额外添加的数据增强方式
transforms.RandomHorizontalFlip(prob=0.5),
transforms.ResizeStepScaling(),
transforms.PaddingCrop(crop_size=[512, 512]),
transforms.Normalize()
])
eval_transforms = transforms.Composed([
transforms.Normalize()
])
```
...@@ -23,3 +23,35 @@ Batch Size指模型在训练过程中,一次性处理的样本数量, 如若 ...@@ -23,3 +23,35 @@ Batch Size指模型在训练过程中,一次性处理的样本数量, 如若
- [实例分割MaskRCNN-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#train) - [实例分割MaskRCNN-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#train)
- [语义分割DeepLabv3p-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#train) - [语义分割DeepLabv3p-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#train)
- [语义分割UNet](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#id2) - [语义分割UNet](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#id2)
## 关于lr_decay_epoch, warmup_steps等参数的说明
在PaddleX或其它深度学习模型的训练过程中,经常见到lr_decay_epoch, warmup_steps, warmup_start_lr等参数设置,下面介绍一些这些参数的作用。
首先这些参数都是用于控制模型训练过程中学习率的变化方式,例如我们在训练时将learning_rate设为0.1, 通常情况,在模型的训练过程中,学习率一直以0.1不变训练下去, 但为了调出更好的模型效果,我们往往不希望学习率一直保持不变。
### warmup_steps和warmup_start_lr
我们在训练模型时,一般都会使用预训练模型,例如检测模型在训练时使用backbone在ImageNet数据集上的预训练权重。但由于在自行训练时,自己的数据与ImageNet数据集存在较大的差异,可能会一开始由于梯度过大使得训练出现问题,因此可以在刚开始训练时,让学习率以一个较小的值,慢慢增长到设定的学习率。因此`warmup_steps``warmup_start_lr`就是这个作用,模型开始训练时,学习率会从`warmup_start_lr`开始,在`warmup_steps`内线性增长到设定的学习率。
### lr_decay_epochs和lr_decay_gamma
`lr_decay_epochs`用于让学习率在模型训练后期逐步衰减,它一般是一个list,如[6, 8, 10],表示学习率在第6个epoch时衰减一次,第8个epoch时再衰减一次,第10个epoch时再衰减一次。每次学习率衰减为之前的学习率*lr_decay_gamma
### Notice
在PaddleX中,限制warmup需要在第一个学习率decay衰减前结束,因此要满足下面的公式
```
warmup_steps <= lr_decay_epochs[0] * num_steps_each_epoch
```
其中公式中`num_steps_each_epoch = num_samples_in_train_dataset // train_batch_size`
> 因此如若在训练时PaddleX提示`warmup_steps should be less than xxx`时,即可根据上述公式来调整你的`lr_decay_epochs`或者是`warmup_steps`使得两个参数满足上面的条件
> - 图像分类模型 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#train)
> - FasterRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn)
> - YOLOv3 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3)
> - MaskRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn)
> - DeepLab [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p)
> - UNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet)
> - HRNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet)
# PaddleX GUI下载安装 ## <a name="2">PaddleX GUI安装</a>
PaddleX GUI是提升项目开发效率的核心模块,开发者可快速完成深度学习模型全流程开发。我们诚挚地邀请您前往 [官网](https://www.paddlepaddle.org.cn/paddle/paddleX)下载试用PaddleX GUI可视化前端,并获得您宝贵的意见或开源项目贡献。
#### <a name="1">安装推荐环境</a>
* **操作系统**
* Windows7/8/10(推荐Windows 10);
* Mac OS 10.13+;
* Ubuntu 18.04+;
***注:处理器需为x86_64架构,支持MKL。***
* **训练硬件**
* **GPU**(仅Windows及Linux系统):
推荐使用支持CUDA的NVIDIA显卡,例如:GTX 1070+以上性能的显卡;
Windows系统X86_64驱动版本>=411.31;
Linux系统X86_64驱动版本>=410.48;
显存8G以上;
* **CPU**
PaddleX当前支持您用本地CPU进行训练,但推荐使用GPU以获得更好的开发体验。
* **内存**:建议8G以上
* **硬盘空间**:建议SSD剩余空间1T以上(非必须)
***注:PaddleX在Windows及Mac OS系统只支持单卡模型。Windows系统暂不支持NCCL。***
# PaddleX GUI如何训练模型 # PaddleX GUI使用文档
飞桨全流程开发工具,集飞桨核心框架、模型库、工具及组件等深度学习开发全流程所需能力于一身,易用易集成,是开发者快速入门深度学习、提升深度学习项目开发效率的最佳辅助工具。
PaddleX GUI是一个应用PaddleX实现的一个图形化开发客户端产品,它使得开发者通过键入式输入即可完成深度学习模型全流程开发,可大幅度提升项目开发效率。飞桨团队期待各位开发者基于PaddleX,实现出各种符合自己产业实际需求的产品。
我们诚挚地邀请您前往 [官网](https://www.paddlepaddle.org.cn/paddlex)下载试用PaddleX GUI,并获得您宝贵的意见或开源项目贡献。
## 目录
* **产品特性**
* **PaddleX GUI可视化前端**
* **FAQ**
## 产品特性
\- **全流程打通**
将深度学习开发全流程打通,并提供可视化开发界面, 省去了对各环节API的熟悉过程及重复的代码开发,极大地提升了开发效率。
\- **易用易集成**
提供功能最全、最灵活的Python API开发模式,完全开源开放,易于集成和二次开发。键入式输入的图形化开发界面,使得非专业算法人员也可快速进行业务POC。
\- **融合产业实践经验**
融合飞桨产业落地经验,精选高质量的视觉模型方案,开放实际的案例教学,手把手带您实现产业需求落地。
\- **教程与服务**
从数据集准备到上线部署,为您提供业务开发全流程的文档说明及技术服务。开发者可以通过QQ群、微信群、GitHub社区等多种形式与飞桨团队及同业合作伙伴交流沟通。
## PaddleX GUI 可视化前端
**第一步:准备数据**
在开始模型训练前,您需要根据不同的任务类型,将数据标注为相应的格式。目前PaddleX支持【图像分类】、【目标检测】、【语义分割】、【实例分割】四种任务类型。不同类型任务的数据处理方式可查看[数据标注方式](https://paddlex.readthedocs.io/zh_CN/latest/appendix/datasets.html)
**第二步:导入我的数据集**
①数据标注完成后,您需要根据不同的任务,将数据和标注文件,按照客户端提示更名并保存到正确的文件中。
②在客户端新建数据集,选择与数据集匹配的任务类型,并选择数据集对应的路径,将数据集导入。
![](images/datasets1.jpg)
③选定导入数据集后,客户端会自动校验数据及标注文件是否合规,校验成功后,您可根据实际需求,将数据集按比例划分为训练集、验证集、测试集。
④您可在「数据分析」模块按规则预览您标注的数据集,双击单张图片可放大查看。
![](images/dataset2.jpg)
**第三步:创建项目**
① 在完成数据导入后,您可以点击「新建项目」创建一个项目。
② 您可根据实际任务需求选择项目的任务类型,需要注意项目所采用的数据集也带有任务类型属性,两者需要进行匹配。
![](images/project3.jpg)
**第四步:项目开发**
**数据选择**:项目创建完成后,您需要选择已载入客户端并校验后的数据集,并点击下一步,进入参数配置页面。
![](images/project1.jpg)
**参数配置**:主要分为**模型参数****训练参数****优化策略**三部分。您可根据实际需求选择模型结构、骨架网络及对应的训练参数、优化策略,使得任务效果最佳。
![](images/project2.jpg)
参数配置完成后,点击启动训练,模型开始训练并进行效果评估。
**训练可视化**:在训练过程中,您可通过VisualDL查看模型训练过程参数变化、日志详情,及当前最优的训练集和验证集训练指标。模型在训练过程中通过点击"中止训练"随时中止训练过程。
![](images/visualization1.jpg)
模型训练结束后,可选择进入『模型剪裁分析』或者直接进入『模型评估』。
![](images/visualization2.jpg)
**模型裁剪**:如果开发者希望减少模型的体积、计算量,提升模型在设备上的预测性能,可以采用PaddleX提供的模型裁剪策略。裁剪过程将对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪,再进行精调训练获得最终裁剪后的模型。
![](images/visualization3.jpg)
**模型评估**:在模型评估页面,您可查看训练后的模型效果。评估方法包括混淆矩阵、精度、召回率等。
![](images/visualization4.jpg)
您还可以选择『数据集切分』时留出的『测试数据集』或从本地文件夹中导入一张/多张图片,将训练后的模型进行测试。根据测试结果,您可决定是否将训练完成的模型保存为预训练模型并进入模型发布页面,或返回先前步骤调整参数配置重新进行训练。
![](images/visualization5.jpg)
**第五步:模型发布**
当模型效果满意后,您可根据实际的生产环境需求,选择将模型发布为需要的版本。
![](images/publish.jpg)
## FAQ
1. **为什么训练速度这么慢?**
PaddleX完全采用您本地的硬件进行计算,深度学习任务确实对算力要求较高,为了使您能快速体验应用PaddleX进行开发,我们适配了CPU硬件,但强烈建议您使用GPU以提升训练速度和开发体验。
2. **我可以在服务器或云平台上部署PaddleX么?**
PaddleX GUI是一个适配本地单机安装的客户端,无法在服务器上直接进行部署,您可以直接使用PaddleX API,或采用飞桨核心框架进行服务器上的部署。如果您希望使用公有算力,强烈建议您尝试飞桨产品系列中的 [EasyDL](https://ai.baidu.com/easydl/)[AI Studio](https://aistudio.baidu.com/aistudio/index)进行开发。
3. **PaddleX支持EasyData标注的数据吗?**
支持,PaddleX可顺畅读取EasyData标注的数据。但当前版本的PaddleX GUI暂时无法支持直接导入EasyData数据格式,您可以参照文档,将[数据集进行转换](https://paddlex.readthedocs.io/zh_CN/latest/appendix/how_to_convert_dataset.html)再导入PaddleX GUI进行后续开发。
同时,我们也在紧密开发PaddleX GUI可直接导入EasyData数据格式的功能。
4. **为什么模型裁剪分析耗时这么长?**
模型裁剪分析过程是对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪。此过程需要重复多次直至FLOPS满足要求,最后再进行精调训练获得最终裁剪后的模型,因此耗时较长。有关模型裁剪的原理,可参见文档[剪裁原理介绍](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)
5. **如何调用后端代码?**
PaddleX 团队为您整理了相关的API接口文档,方便您学习和使用。具体请参见[PaddleX API说明文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/index.html)
**如果您有更多问题或建议,欢迎以issue的形式,或加入PaddleX官方QQ群(1045148026)直接反馈您的问题和需求**
![](images/QR.jpg)
...@@ -21,7 +21,7 @@ PaddleX GUI是基于PaddleX开发实现的可视化模型训练套件,可以 ...@@ -21,7 +21,7 @@ PaddleX GUI是基于PaddleX开发实现的可视化模型训练套件,可以
how_to_use.md how_to_use.md
xx.md xx.md
* PaddleX版本: v0.1.7 * PaddleX GUI版本: v1.0
* 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex * 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex
* 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop * 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop
* 官方QQ用户群: 1045148026 * 官方QQ用户群: 1045148026
......
模型压缩
============================
.. toctree::
:maxdepth: 2
prune.md
quant.md
# 模型裁剪
## 原理介绍
模型裁剪用于减小模型的计算量和体积,可以加快模型部署后的预测速度,是一种减小模型大小和降低模型计算复杂度的常用方式,通过裁剪卷积层中Kernel输出通道的大小及其关联层参数大小来实现,其关联裁剪的原理可参见[PaddleSlim相关文档](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#id16)**一般而言,在同等模型精度前提下,数据复杂度越低,模型可以被裁剪的比例就越高**
## 裁剪方法
PaddleX提供了两种方式:
**1.用户自行计算裁剪配置(推荐),整体流程包含三个步骤,**
> **第一步**: 使用数据集训练原始模型
> **第二步**:利用第一步训练好的模型,在验证数据集上计算模型中各个参数的敏感度,并将敏感度信息存储至本地文件
> **第三步**:使用数据集训练裁剪模型(与第一步差异在于需要在`train`接口中,将第二步计算得到的敏感信息文件传给接口的`sensitivities_file`参数)
> 在如上三个步骤中,**相当于模型共需要训练两遍**,分别对应第一步和第三步,但其中第三步训练的是裁剪后的模型,因此训练速度较第一步会更快。
> 第二步会遍历模型中的部分裁剪参数,分别计算各个参数裁剪后对于模型在验证集上效果的影响,**因此会反复在验证集上评估多次**。
**2.使用PaddleX内置的裁剪方案**
> PaddleX内置的模型裁剪方案是**基于标准数据集**上计算得到的参数敏感度信息,由于不同数据集特征分布会有较大差异,所以该方案相较于第1种方案训练得到的模型**精度一般而言会更低**(**且用户自定义数据集与标准数据集特征分布差异越大,导致训练的模型精度会越低**),仅在用户想节省时间的前提下可以参考使用,使用方式只需一步,
> **一步**: 使用数据集训练裁剪模型,在训练调用`train`接口时,将接口中的`sensitivities_file`参数设置为'DEFAULT'字符串
> 注:各模型内置的裁剪方案分别依据的数据集为: 图像分类——ImageNet数据集、目标检测——PascalVOC数据集、语义分割——CityScape数据集
## 裁剪实验
基于上述两种方案,我们在PaddleX上使用样例数据进行了实验,在Tesla P40上实验指标如下所示,
### 图像分类
实验背景:使用MobileNetV2模型,数据集为蔬菜分类示例数据,见[使用教程-模型压缩-图像分类](../tutorials/compress/classification.md)
| 模型 | 裁剪情况 | 模型大小 | Top1准确率(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- |:----------|
|MobileNetV2 | 无裁剪(原模型)| 13.0M | 97.50|6.47ms |47.44ms |
|MobileNetV2 | 方案一(eval_metric_loss=0.10) | 2.1M | 99.58 |5.03ms |20.22ms |
|MobileNetV2 | 方案二(eval_metric_loss=0.10) | 6.0M | 99.58 |5.42ms |29.06ms |
### 目标检测
实验背景:使用YOLOv3-MobileNetV1模型,数据集为昆虫检测示例数据,见[使用教程-模型压缩-目标检测](../tutorials/compress/detection.md)
| 模型 | 裁剪情况 | 模型大小 | MAP(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- | :---------|
|YOLOv3-MobileNetV1 | 无裁剪(原模型)| 139M | 67.57| 14.88ms |976.42ms |
|YOLOv3-MobileNetV1 | 方案一(eval_metric_loss=0.10) | 34M | 75.49 |10.60ms |558.49ms |
|YOLOv3-MobileNetV1 | 方案二(eval_metric_loss=0.05) | 29M | 50.27| 9.43ms |360.46ms |
### 语义分割
实验背景:使用UNet模型,数据集为视盘分割示例数据, 见[使用教程-模型压缩-语义分割](../tutorials/compress/segmentation.md)
| 模型 | 裁剪情况 | 模型大小 | mIOU(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- | :---------|
|UNet | 无裁剪(原模型)| 77M | 91.22 |33.28ms |9523.55ms |
|UNet | 方案一(eval_metric_loss=0.10) |26M | 90.37 |21.04ms |3936.20ms |
|UNet | 方案二(eval_metric_loss=0.10) |23M | 91.21 |18.61ms |3447.75ms |
# 模型量化
## 原理介绍
为了满足低内存带宽、低功耗、低计算资源占用以及低模型存储等需求,定点量化被提出。为此我们提供了训练后量化,该量化使用KL散度确定量化比例因子,将FP32模型转成INT8模型,且不需要重新训练,可以快速得到量化模型。
## 使用PaddleX量化模型
PaddleX提供了`export_quant_model`接口,让用户以接口的形式完成模型以post_quantization方式量化并导出。点击查看[量化接口使用文档](../apis/slim.md)
## 量化性能对比
模型量化后的性能对比指标请查阅[PaddleSlim模型库](https://paddlepaddle.github.io/PaddleSlim/model_zoo.html)
...@@ -96,6 +96,17 @@ cmake .. \ ...@@ -96,6 +96,17 @@ cmake .. \
make make
``` ```
**注意:** linux环境下编译会自动下载OPENCV, PaddleX-Encryption和YAML,如果编译环境无法访问外网,可手动下载:
- [opencv3gcc4.8.tar.bz2](https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2)
- [paddlex-encryption.zip](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)
- [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
opencv3gcc4.8.tar.bz2文件下载后解压,然后在script/build.sh中指定`OPENCE_DIR`为解压后的路径。
paddlex-encryption.zip文件下载后解压,然后在script/build.sh中指定`ENCRYPTION_DIR`为解压后的路径。
yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。
修改脚本设置好主要参数后,执行`build`脚本: 修改脚本设置好主要参数后,执行`build`脚本:
```shell ```shell
...@@ -104,8 +115,9 @@ make ...@@ -104,8 +115,9 @@ make
### Step5: 预测及可视化 ### Step5: 预测及可视化
参考[导出inference模型](../../deploy_python.html#inference)将模型导出为inference格式模型。 **在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**
**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
> **注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
编译成功后,预测demo的可执行程序分别为`build/demo/detector``build/demo/classifer``build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下: 编译成功后,预测demo的可执行程序分别为`build/demo/detector``build/demo/classifer``build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
...@@ -117,7 +129,7 @@ make ...@@ -117,7 +129,7 @@ make
| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
| use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) | | use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) |
| gpu_id | GPU 设备ID, 默认值为0 | | gpu_id | GPU 设备ID, 默认值为0 |
| save_dir | 保存可视化结果的路径, 默认值为"output",classfier无该参数 | | save_dir | 保存可视化结果的路径, 默认值为"output",**classfier无该参数** |
## 样例 ## 样例
......
...@@ -86,7 +86,14 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens ...@@ -86,7 +86,14 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
| OPENCV_DIR | OpenCV的安装路径, | | OPENCV_DIR | OpenCV的安装路径, |
| PADDLE_DIR | Paddle c++预测库的路径 | | PADDLE_DIR | Paddle c++预测库的路径 |
**注意:** 1. 使用`CPU`版预测库,请把`WITH_GPU`的``去掉勾 2. 如果使用的是`openblas`版本,请把`WITH_MKL`的``去掉勾 **注意:**
1. 使用`CPU`版预测库,请把`WITH_GPU`的``去掉勾
2. 如果使用的是`openblas`版本,请把`WITH_MKL`的``去掉勾
3. Windows环境下编译会自动下载YAML,如果编译环境无法访问外网,可手动下载: [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。
![step4](../../images/vs2019_step5.png) ![step4](../../images/vs2019_step5.png)
...@@ -99,8 +106,10 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens ...@@ -99,8 +106,10 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
### Step5: 预测及可视化 ### Step5: 预测及可视化
参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。
**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。** **在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**
**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
......
# 使用教程——训练模型
本目录下整理了使用PaddleX训练模型的示例代码,代码中均提供了示例数据的自动下载,并均使用单张GPU卡进行训练。
|代码 | 模型任务 | 数据 |
|------|--------|---------|
|classification/mobilenetv2.py | 图像分类MobileNetV2 | 蔬菜分类 |
|classification/resnet50.py | 图像分类ResNet50 | 蔬菜分类 |
|detection/faster_rcnn_r50_fpn.py | 目标检测FasterRCNN | 昆虫检测 |
|detection/mask_rcnn_f50_fpn.py | 实例分割MaskRCNN | 垃圾分拣 |
|segmentation/deeplabv3p.py | 语义分割DeepLabV3| 视盘分割 |
|segmentation/unet.py | 语义分割UNet | 视盘分割 |
## 开始训练
在安装PaddleX后,使用如下命令开始训练
```
python classification/mobilenetv2.py
```
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.cls import transforms
import paddlex as pdx
# 下载和解压蔬菜分类数据集
veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
pdx.utils.download_and_decompress(veg_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
train_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/train_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/val_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels))
model.train(
num_epochs=10,
train_dataset=train_dataset,
train_batch_size=32,
eval_dataset=eval_dataset,
lr_decay_epochs=[4, 6, 8],
learning_rate=0.025,
save_dir='output/mobilenetv2',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddle.fluid as fluid
from paddlex.cls import transforms
import paddlex as pdx
# 下载和解压蔬菜分类数据集
veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
pdx.utils.download_and_decompress(veg_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
train_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/train_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/val_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=eval_transforms)
# PaddleX支持自定义构建优化器
step_each_epoch = train_dataset.num_samples // 32
learning_rate = fluid.layers.cosine_decay(
learning_rate=0.025, step_each_epoch=step_each_epoch, epochs=10)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(4e-5))
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels))
model.train(
num_epochs=10,
train_dataset=train_dataset,
train_batch_size=32,
eval_dataset=eval_dataset,
optimizer=optimizer,
save_dir='output/resnet50',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压昆虫检测数据集
insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
pdx.utils.download_and_decompress(insect_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
train_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/train_list.txt',
label_list='insect_det/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/val_list.txt',
label_list='insect_det/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn
num_classes = len(train_dataset.labels) + 1
model = pdx.det.FasterRCNN(num_classes=num_classes)
model.train(
num_epochs=12,
train_dataset=train_dataset,
train_batch_size=2,
eval_dataset=eval_dataset,
learning_rate=0.0025,
lr_decay_epochs=[8, 11],
save_dir='output/faster_rcnn_r50_fpn',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压小度熊分拣数据集
xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz'
pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#cocodetection
train_dataset = pdx.datasets.CocoDetection(
data_dir='xiaoduxiong_ins_det/JPEGImages',
ann_file='xiaoduxiong_ins_det/train.json',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.CocoDetection(
data_dir='xiaoduxiong_ins_det/JPEGImages',
ann_file='xiaoduxiong_ins_det/val.json',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn
num_classes = len(train_dataset.labels) + 1
model = pdx.det.MaskRCNN(num_classes=num_classes)
model.train(
num_epochs=12,
train_dataset=train_dataset,
train_batch_size=1,
eval_dataset=eval_dataset,
learning_rate=0.00125,
warmup_steps=10,
lr_decay_epochs=[8, 11],
save_dir='output/mask_rcnn_r50_fpn',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压昆虫检测数据集
insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
pdx.utils.download_and_decompress(insect_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedyolotransforms
train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[608, 608])
eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eva', shape=[608, 608])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
train_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/train_list.txt',
label_list='insect_det/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/val_list.txt',
label_list='insect_det/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3
num_classes = len(train_dataset.labels)
model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53')
model.train(
num_epochs=270,
train_dataset=train_dataset,
train_batch_size=8,
eval_dataset=eval_dataset,
learning_rate=0.000125,
lr_decay_epochs=[210, 240],
save_dir='output/yolov3_darknet53',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
train_transforms.add_augmenters([
transforms.RandomRotate()
])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p
num_classes = len(train_dataset.labels)
model = pdx.seg.DeepLabv3p(num_classes=num_classes)
model.train(
num_epochs=40,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/deeplab',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet
num_classes = len(train_dataset.labels)
model = pdx.seg.HRNet(num_classes=num_classes)
model.train(
num_epochs=20,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/hrnet',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet
num_classes = len(train_dataset.labels)
model = pdx.seg.UNet(num_classes=num_classes)
model.train(
num_epochs=20,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/unet',
use_vdl=True)
...@@ -53,4 +53,4 @@ log_level = 2 ...@@ -53,4 +53,4 @@ log_level = 2
from . import interpret from . import interpret
__version__ = '1.0.4' __version__ = '1.0.6'
...@@ -37,5 +37,6 @@ DenseNet161 = cv.models.DenseNet161 ...@@ -37,5 +37,6 @@ DenseNet161 = cv.models.DenseNet161
DenseNet201 = cv.models.DenseNet201 DenseNet201 = cv.models.DenseNet201
ShuffleNetV2 = cv.models.ShuffleNetV2 ShuffleNetV2 = cv.models.ShuffleNetV2
HRNet_W18 = cv.models.HRNet_W18 HRNet_W18 = cv.models.HRNet_W18
AlexNet = cv.models.AlexNet
transforms = cv.transforms.cls_transforms transforms = cv.transforms.cls_transforms
...@@ -35,6 +35,7 @@ from .classifier import DenseNet161 ...@@ -35,6 +35,7 @@ from .classifier import DenseNet161
from .classifier import DenseNet201 from .classifier import DenseNet201
from .classifier import ShuffleNetV2 from .classifier import ShuffleNetV2
from .classifier import HRNet_W18 from .classifier import HRNet_W18
from .classifier import AlexNet
from .base import BaseAPI from .base import BaseAPI
from .yolo_v3 import YOLOv3 from .yolo_v3 import YOLOv3
from .faster_rcnn import FasterRCNN from .faster_rcnn import FasterRCNN
......
...@@ -221,8 +221,8 @@ class BaseAPI: ...@@ -221,8 +221,8 @@ class BaseAPI:
logging.info( logging.info(
"Load pretrain weights from {}.".format(pretrain_weights), "Load pretrain weights from {}.".format(pretrain_weights),
use_color=True) use_color=True)
paddlex.utils.utils.load_pretrain_weights( paddlex.utils.utils.load_pretrain_weights(self.exe, self.train_prog,
self.exe, self.train_prog, pretrain_weights, fuse_bn) pretrain_weights, fuse_bn)
# 进行裁剪 # 进行裁剪
if sensitivities_file is not None: if sensitivities_file is not None:
import paddleslim import paddleslim
...@@ -262,6 +262,7 @@ class BaseAPI: ...@@ -262,6 +262,7 @@ class BaseAPI:
info['_Attributes']['num_classes'] = self.num_classes info['_Attributes']['num_classes'] = self.num_classes
info['_Attributes']['labels'] = self.labels info['_Attributes']['labels'] = self.labels
info['_Attributes']['fixed_input_shape'] = self.fixed_input_shape
try: try:
primary_metric_key = list(self.eval_metrics.keys())[0] primary_metric_key = list(self.eval_metrics.keys())[0]
primary_metric_value = float(self.eval_metrics[primary_metric_key]) primary_metric_value = float(self.eval_metrics[primary_metric_key])
...@@ -325,9 +326,7 @@ class BaseAPI: ...@@ -325,9 +326,7 @@ class BaseAPI:
logging.info("Model saved in {}.".format(save_dir)) logging.info("Model saved in {}.".format(save_dir))
def export_inference_model(self, save_dir): def export_inference_model(self, save_dir):
test_input_names = [ test_input_names = [var.name for var in list(self.test_inputs.values())]
var.name for var in list(self.test_inputs.values())
]
test_outputs = list(self.test_outputs.values()) test_outputs = list(self.test_outputs.values())
if self.__class__.__name__ == 'MaskRCNN': if self.__class__.__name__ == 'MaskRCNN':
from paddlex.utils.save import save_mask_inference_model from paddlex.utils.save import save_mask_inference_model
...@@ -364,8 +363,7 @@ class BaseAPI: ...@@ -364,8 +363,7 @@ class BaseAPI:
# 模型保存成功的标志 # 模型保存成功的标志
open(osp.join(save_dir, '.success'), 'w').close() open(osp.join(save_dir, '.success'), 'w').close()
logging.info("Model for inference deploy saved in {}.".format( logging.info("Model for inference deploy saved in {}.".format(save_dir))
save_dir))
def train_loop(self, def train_loop(self,
num_epochs, num_epochs,
...@@ -489,12 +487,10 @@ class BaseAPI: ...@@ -489,12 +487,10 @@ class BaseAPI:
eta = ((num_epochs - i) * total_num_steps - step - 1 eta = ((num_epochs - i) * total_num_steps - step - 1
) * avg_step_time ) * avg_step_time
if time_eval_one_epoch is not None: if time_eval_one_epoch is not None:
eval_eta = ( eval_eta = (total_eval_times - i // save_interval_epochs
total_eval_times - i // save_interval_epochs
) * time_eval_one_epoch ) * time_eval_one_epoch
else: else:
eval_eta = ( eval_eta = (total_eval_times - i // save_interval_epochs
total_eval_times - i // save_interval_epochs
) * total_num_steps_eval * avg_step_time ) * total_num_steps_eval * avg_step_time
eta_str = seconds_to_hms(eta + eval_eta) eta_str = seconds_to_hms(eta + eval_eta)
......
...@@ -48,12 +48,13 @@ class BaseClassifier(BaseAPI): ...@@ -48,12 +48,13 @@ class BaseClassifier(BaseAPI):
self.fixed_input_shape = None self.fixed_input_shape = None
def build_net(self, mode='train'): def build_net(self, mode='train'):
if self.__class__.__name__ == "AlexNet":
assert self.fixed_input_shape is not None, "In AlexNet, input_shape should be defined, e.g. model = paddlex.cls.AlexNet(num_classes=1000, input_shape=[224, 224])"
if self.fixed_input_shape is not None: if self.fixed_input_shape is not None:
input_shape = [ input_shape = [
None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
] ]
image = fluid.data( image = fluid.data(dtype='float32', shape=input_shape, name='image')
dtype='float32', shape=input_shape, name='image')
else: else:
image = fluid.data( image = fluid.data(
dtype='float32', shape=[None, 3, None, None], name='image') dtype='float32', shape=[None, 3, None, None], name='image')
...@@ -81,7 +82,8 @@ class BaseClassifier(BaseAPI): ...@@ -81,7 +82,8 @@ class BaseClassifier(BaseAPI):
del outputs['loss'] del outputs['loss']
return inputs, outputs return inputs, outputs
def default_optimizer(self, learning_rate, lr_decay_epochs, lr_decay_gamma, def default_optimizer(self, learning_rate, warmup_steps, warmup_start_lr,
lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch): num_steps_each_epoch):
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [ values = [
...@@ -90,6 +92,24 @@ class BaseClassifier(BaseAPI): ...@@ -90,6 +92,24 @@ class BaseClassifier(BaseAPI):
] ]
lr_decay = fluid.layers.piecewise_decay( lr_decay = fluid.layers.piecewise_decay(
boundaries=boundaries, values=values) boundaries=boundaries, values=values)
if warmup_steps > 0:
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
logging.error(
"In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch,
warmup_steps // num_steps_each_epoch))
lr_decay = fluid.layers.linear_lr_warmup(
learning_rate=lr_decay,
warmup_steps=warmup_steps,
start_lr=warmup_start_lr,
end_lr=learning_rate)
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
lr_decay, lr_decay,
momentum=0.9, momentum=0.9,
...@@ -107,6 +127,8 @@ class BaseClassifier(BaseAPI): ...@@ -107,6 +127,8 @@ class BaseClassifier(BaseAPI):
pretrain_weights='IMAGENET', pretrain_weights='IMAGENET',
optimizer=None, optimizer=None,
learning_rate=0.025, learning_rate=0.025,
warmup_steps=0,
warmup_start_lr=0.0,
lr_decay_epochs=[30, 60, 90], lr_decay_epochs=[30, 60, 90],
lr_decay_gamma=0.1, lr_decay_gamma=0.1,
use_vdl=False, use_vdl=False,
...@@ -129,6 +151,8 @@ class BaseClassifier(BaseAPI): ...@@ -129,6 +151,8 @@ class BaseClassifier(BaseAPI):
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的初始学习率。默认为0.025。 learning_rate (float): 默认优化器的初始学习率。默认为0.025。
warmup_steps(int): 学习率从warmup_start_lr上升至设定的learning_rate,所需的步数,默认为0
warmup_start_lr(float): 学习率在warmup阶段时的起始值,默认为0.0
lr_decay_epochs (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。 lr_decay_epochs (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。
lr_decay_gamma (float): 默认优化器的学习率衰减率。默认为0.1。 lr_decay_gamma (float): 默认优化器的学习率衰减率。默认为0.1。
use_vdl (bool): 是否使用VisualDL进行可视化。默认值为False。 use_vdl (bool): 是否使用VisualDL进行可视化。默认值为False。
...@@ -149,6 +173,8 @@ class BaseClassifier(BaseAPI): ...@@ -149,6 +173,8 @@ class BaseClassifier(BaseAPI):
num_steps_each_epoch = train_dataset.num_samples // train_batch_size num_steps_each_epoch = train_dataset.num_samples // train_batch_size
optimizer = self.default_optimizer( optimizer = self.default_optimizer(
learning_rate=learning_rate, learning_rate=learning_rate,
warmup_steps=warmup_steps,
warmup_start_lr=warmup_start_lr,
lr_decay_epochs=lr_decay_epochs, lr_decay_epochs=lr_decay_epochs,
lr_decay_gamma=lr_decay_gamma, lr_decay_gamma=lr_decay_gamma,
num_steps_each_epoch=num_steps_each_epoch) num_steps_each_epoch=num_steps_each_epoch)
...@@ -193,8 +219,7 @@ class BaseClassifier(BaseAPI): ...@@ -193,8 +219,7 @@ class BaseClassifier(BaseAPI):
tuple (metrics, eval_details): 当return_details为True时,增加返回dict, tuple (metrics, eval_details): 当return_details为True时,增加返回dict,
包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。 包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。
""" """
self.arrange_transforms( self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
transforms=eval_dataset.transforms, mode='eval')
data_generator = eval_dataset.generator( data_generator = eval_dataset.generator(
batch_size=batch_size, drop_last=False) batch_size=batch_size, drop_last=False)
k = min(5, self.num_classes) k = min(5, self.num_classes)
...@@ -206,9 +231,8 @@ class BaseClassifier(BaseAPI): ...@@ -206,9 +231,8 @@ class BaseClassifier(BaseAPI):
self.test_prog).with_data_parallel( self.test_prog).with_data_parallel(
share_vars_from=self.parallel_train_prog) share_vars_from=self.parallel_train_prog)
batch_size_each_gpu = self._get_single_card_bs(batch_size) batch_size_each_gpu = self._get_single_card_bs(batch_size)
logging.info( logging.info("Start to evaluating(total_samples={}, total_steps={})...".
"Start to evaluating(total_samples={}, total_steps={})...".format( format(eval_dataset.num_samples, total_steps))
eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm( for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps): enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data]).astype('float32') images = np.array([d[0] for d in data]).astype('float32')
...@@ -264,7 +288,8 @@ class BaseClassifier(BaseAPI): ...@@ -264,7 +288,8 @@ class BaseClassifier(BaseAPI):
im = self.test_transforms(img_file) im = self.test_transforms(img_file)
result = self.exe.run(self.test_prog, result = self.exe.run(self.test_prog,
feed={'image': im}, feed={'image': im},
fetch_list=list(self.test_outputs.values())) fetch_list=list(self.test_outputs.values()),
use_program_cache=True)
pred_label = np.argsort(result[0][0])[::-1][:true_topk] pred_label = np.argsort(result[0][0])[::-1][:true_topk]
res = [{ res = [{
'category_id': l, 'category_id': l,
...@@ -404,3 +429,10 @@ class HRNet_W18(BaseClassifier): ...@@ -404,3 +429,10 @@ class HRNet_W18(BaseClassifier):
def __init__(self, num_classes=1000): def __init__(self, num_classes=1000):
super(HRNet_W18, self).__init__( super(HRNet_W18, self).__init__(
model_name='HRNet_W18', num_classes=num_classes) model_name='HRNet_W18', num_classes=num_classes)
class AlexNet(BaseClassifier):
def __init__(self, num_classes=1000, input_shape=None):
super(AlexNet, self).__init__(
model_name='AlexNet', num_classes=num_classes)
self.fixed_input_shape = input_shape
...@@ -337,7 +337,8 @@ class DeepLabv3p(BaseAPI): ...@@ -337,7 +337,8 @@ class DeepLabv3p(BaseAPI):
for d in data: for d in data:
padding_label = np.zeros( padding_label = np.zeros(
(1, im_h, im_w)).astype('int64') + self.ignore_index (1, im_h, im_w)).astype('int64') + self.ignore_index
padding_label[:, :im_h, :im_w] = d[1] _, label_h, label_w = d[1].shape
padding_label[:, :label_h, :label_w] = d[1]
labels.append(padding_label) labels.append(padding_label)
labels = np.array(labels) labels = np.array(labels)
...@@ -398,7 +399,8 @@ class DeepLabv3p(BaseAPI): ...@@ -398,7 +399,8 @@ class DeepLabv3p(BaseAPI):
im = np.expand_dims(im, axis=0) im = np.expand_dims(im, axis=0)
result = self.exe.run(self.test_prog, result = self.exe.run(self.test_prog,
feed={'image': im}, feed={'image': im},
fetch_list=list(self.test_outputs.values())) fetch_list=list(self.test_outputs.values()),
use_program_cache=True)
pred = result[0] pred = result[0]
pred = np.squeeze(pred).astype('uint8') pred = np.squeeze(pred).astype('uint8')
logit = result[1] logit = result[1]
......
...@@ -138,8 +138,16 @@ class FasterRCNN(BaseAPI): ...@@ -138,8 +138,16 @@ class FasterRCNN(BaseAPI):
lr_decay_epochs, lr_decay_gamma, lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch): num_steps_each_epoch):
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
raise Exception("warmup_steps should less than {}".format( logging.error(
lr_decay_epochs[0] * num_steps_each_epoch)) "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
// num_steps_each_epoch))
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [(lr_decay_gamma**i) * learning_rate values = [(lr_decay_gamma**i) * learning_rate
for i in range(len(lr_decay_epochs) + 1)] for i in range(len(lr_decay_epochs) + 1)]
...@@ -282,8 +290,7 @@ class FasterRCNN(BaseAPI): ...@@ -282,8 +290,7 @@ class FasterRCNN(BaseAPI):
eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、 eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
""" """
self.arrange_transforms( self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
transforms=eval_dataset.transforms, mode='eval')
if metric is None: if metric is None:
if hasattr(self, 'metric') and self.metric is not None: if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric metric = self.metric
...@@ -302,14 +309,12 @@ class FasterRCNN(BaseAPI): ...@@ -302,14 +309,12 @@ class FasterRCNN(BaseAPI):
logging.warning( logging.warning(
"Faster RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1." "Faster RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1."
) )
dataset = eval_dataset.generator( dataset = eval_dataset.generator(batch_size=batch_size, drop_last=False)
batch_size=batch_size, drop_last=False)
total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
results = list() results = list()
logging.info( logging.info("Start to evaluating(total_samples={}, total_steps={})...".
"Start to evaluating(total_samples={}, total_steps={})...".format( format(eval_dataset.num_samples, total_steps))
eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm(enumerate(dataset()), total=total_steps): for step, data in tqdm.tqdm(enumerate(dataset()), total=total_steps):
images = np.array([d[0] for d in data]).astype('float32') images = np.array([d[0] for d in data]).astype('float32')
im_infos = np.array([d[1] for d in data]).astype('float32') im_infos = np.array([d[1] for d in data]).astype('float32')
...@@ -389,7 +394,8 @@ class FasterRCNN(BaseAPI): ...@@ -389,7 +394,8 @@ class FasterRCNN(BaseAPI):
'im_shape': im_shape 'im_shape': im_shape
}, },
fetch_list=list(self.test_outputs.values()), fetch_list=list(self.test_outputs.values()),
return_numpy=False) return_numpy=False,
use_program_cache=True)
res = { res = {
k: (np.array(v), v.recursive_sequence_lengths()) k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs) for k, v in zip(list(self.test_outputs.keys()), outputs)
......
...@@ -41,7 +41,16 @@ def load_model(model_dir, fixed_input_shape=None): ...@@ -41,7 +41,16 @@ def load_model(model_dir, fixed_input_shape=None):
if 'model_name' in info['_init_params']: if 'model_name' in info['_init_params']:
del info['_init_params']['model_name'] del info['_init_params']['model_name']
model = getattr(paddlex.cv.models, info['Model'])(**info['_init_params']) model = getattr(paddlex.cv.models, info['Model'])(**info['_init_params'])
model.fixed_input_shape = fixed_input_shape model.fixed_input_shape = fixed_input_shape
if '_Attributes' in info:
if 'fixed_input_shape' in info['_Attributes']:
fixed_input_shape = info['_Attributes']['fixed_input_shape']
if fixed_input_shape is not None:
logging.info("Model already has fixed_input_shape with {}".
format(fixed_input_shape))
model.fixed_input_shape = fixed_input_shape
if status == "Normal" or \ if status == "Normal" or \
status == "Prune" or status == "fluid.save": status == "Prune" or status == "fluid.save":
startup_prog = fluid.Program() startup_prog = fluid.Program()
...@@ -88,8 +97,8 @@ def load_model(model_dir, fixed_input_shape=None): ...@@ -88,8 +97,8 @@ def load_model(model_dir, fixed_input_shape=None):
model.model_type, info['Transforms'], info['BatchTransforms']) model.model_type, info['Transforms'], info['BatchTransforms'])
model.eval_transforms = copy.deepcopy(model.test_transforms) model.eval_transforms = copy.deepcopy(model.test_transforms)
else: else:
model.test_transforms = build_transforms( model.test_transforms = build_transforms(model.model_type,
model.model_type, info['Transforms'], to_rgb) info['Transforms'], to_rgb)
model.eval_transforms = copy.deepcopy(model.test_transforms) model.eval_transforms = copy.deepcopy(model.test_transforms)
if '_Attributes' in info: if '_Attributes' in info:
......
...@@ -97,8 +97,16 @@ class MaskRCNN(FasterRCNN): ...@@ -97,8 +97,16 @@ class MaskRCNN(FasterRCNN):
lr_decay_epochs, lr_decay_gamma, lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch): num_steps_each_epoch):
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
raise Exception("warmup_step should less than {}".format( logging.error(
lr_decay_epochs[0] * num_steps_each_epoch)) "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
// num_steps_each_epoch))
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [(lr_decay_gamma**i) * learning_rate values = [(lr_decay_gamma**i) * learning_rate
for i in range(len(lr_decay_epochs) + 1)] for i in range(len(lr_decay_epochs) + 1)]
...@@ -244,8 +252,7 @@ class MaskRCNN(FasterRCNN): ...@@ -244,8 +252,7 @@ class MaskRCNN(FasterRCNN):
预测框坐标、预测框得分;'mask',对应元素预测区域结果列表,每个预测结果由图像id、 预测框坐标、预测框得分;'mask',对应元素预测区域结果列表,每个预测结果由图像id、
预测区域类别id、预测区域坐标、预测区域得分;’gt‘:真实标注框和标注区域相关信息。 预测区域类别id、预测区域坐标、预测区域得分;’gt‘:真实标注框和标注区域相关信息。
""" """
self.arrange_transforms( self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
transforms=eval_dataset.transforms, mode='eval')
if metric is None: if metric is None:
if hasattr(self, 'metric') and self.metric is not None: if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric metric = self.metric
...@@ -266,9 +273,8 @@ class MaskRCNN(FasterRCNN): ...@@ -266,9 +273,8 @@ class MaskRCNN(FasterRCNN):
total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
results = list() results = list()
logging.info( logging.info("Start to evaluating(total_samples={}, total_steps={})...".
"Start to evaluating(total_samples={}, total_steps={})...".format( format(eval_dataset.num_samples, total_steps))
eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm( for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps): enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data]).astype('float32') images = np.array([d[0] for d in data]).astype('float32')
...@@ -310,8 +316,7 @@ class MaskRCNN(FasterRCNN): ...@@ -310,8 +316,7 @@ class MaskRCNN(FasterRCNN):
zip(['bbox_map', 'segm_map'], zip(['bbox_map', 'segm_map'],
[ap_stats[0][1], ap_stats[1][1]])) [ap_stats[0][1], ap_stats[1][1]]))
else: else:
metrics = OrderedDict( metrics = OrderedDict(zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
elif metric == 'COCO': elif metric == 'COCO':
if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1], if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
np.ndarray): np.ndarray):
...@@ -357,7 +362,8 @@ class MaskRCNN(FasterRCNN): ...@@ -357,7 +362,8 @@ class MaskRCNN(FasterRCNN):
'im_shape': im_shape 'im_shape': im_shape
}, },
fetch_list=list(self.test_outputs.values()), fetch_list=list(self.test_outputs.values()),
return_numpy=False) return_numpy=False,
use_program_cache=True)
res = { res = {
k: (np.array(v), v.recursive_sequence_lengths()) k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs) for k, v in zip(list(self.test_outputs.keys()), outputs)
......
...@@ -66,16 +66,15 @@ def sensitivity(program, ...@@ -66,16 +66,15 @@ def sensitivity(program,
progress = "%.2f%%" % (progress * 100) progress = "%.2f%%" % (progress * 100)
logging.info( logging.info(
"Total evaluate iters={}, current={}, progress={}, eta={}". "Total evaluate iters={}, current={}, progress={}, eta={}".
format( format(total_evaluate_iters, current_iter, progress,
total_evaluate_iters, current_iter, progress,
seconds_to_hms( seconds_to_hms(
int(cost * (total_evaluate_iters - current_iter)))), int(cost * (total_evaluate_iters - current_iter)))),
use_color=True) use_color=True)
current_iter += 1 current_iter += 1
pruner = Pruner() pruner = Pruner()
logging.info("sensitive - param: {}; ratios: {}".format( logging.info("sensitive - param: {}; ratios: {}".format(name,
name, ratio)) ratio))
pruned_program, param_backup, _ = pruner.prune( pruned_program, param_backup, _ = pruner.prune(
program=graph.program, program=graph.program,
scope=scope, scope=scope,
...@@ -87,8 +86,8 @@ def sensitivity(program, ...@@ -87,8 +86,8 @@ def sensitivity(program,
param_backup=True) param_backup=True)
pruned_metric = eval_func(pruned_program) pruned_metric = eval_func(pruned_program)
loss = (baseline - pruned_metric) / baseline loss = (baseline - pruned_metric) / baseline
logging.info("pruned param: {}; {}; loss={}".format( logging.info("pruned param: {}; {}; loss={}".format(name, ratio,
name, ratio, loss)) loss))
sensitivities[name][ratio] = loss sensitivities[name][ratio] = loss
...@@ -116,6 +115,21 @@ def channel_prune(program, prune_names, prune_ratios, place, only_graph=False): ...@@ -116,6 +115,21 @@ def channel_prune(program, prune_names, prune_ratios, place, only_graph=False):
Returns: Returns:
paddle.fluid.Program: 裁剪后的Program。 paddle.fluid.Program: 裁剪后的Program。
""" """
prog_var_shape_dict = {}
for var in program.list_vars():
try:
prog_var_shape_dict[var.name] = var.shape
except Exception:
pass
index = 0
for param, ratio in zip(prune_names, prune_ratios):
origin_num = prog_var_shape_dict[param][0]
pruned_num = int(round(origin_num * ratio))
while origin_num == pruned_num:
ratio -= 0.1
pruned_num = int(round(origin_num * (ratio)))
prune_ratios[index] = ratio
index += 1
scope = fluid.global_scope() scope = fluid.global_scope()
pruner = Pruner() pruner = Pruner()
program, _, _ = pruner.prune( program, _, _ = pruner.prune(
...@@ -221,6 +235,9 @@ def cal_params_sensitivities(model, save_file, eval_dataset, batch_size=8): ...@@ -221,6 +235,9 @@ def cal_params_sensitivities(model, save_file, eval_dataset, batch_size=8):
其中``weight_0``是卷积Kernel名;``sensitivities['weight_0']``是一个字典,key是裁剪率,value是敏感度。 其中``weight_0``是卷积Kernel名;``sensitivities['weight_0']``是一个字典,key是裁剪率,value是敏感度。
""" """
if os.path.exists(save_file):
os.remove(save_file)
prune_names = get_prune_params(model) prune_names = get_prune_params(model)
def eval_for_prune(program): def eval_for_prune(program):
...@@ -284,6 +301,19 @@ def cal_model_size(program, place, sensitivities_file, eval_metric_loss=0.05): ...@@ -284,6 +301,19 @@ def cal_model_size(program, place, sensitivities_file, eval_metric_loss=0.05):
""" """
prune_params_ratios = get_params_ratios(sensitivities_file, prune_params_ratios = get_params_ratios(sensitivities_file,
eval_metric_loss) eval_metric_loss)
prog_var_shape_dict = {}
for var in program.list_vars():
try:
prog_var_shape_dict[var.name] = var.shape
except Exception:
pass
for param, ratio in prune_params_ratios.items():
origin_num = prog_var_shape_dict[param][0]
pruned_num = int(round(origin_num * ratio))
while origin_num == pruned_num:
ratio -= 0.1
pruned_num = int(round(origin_num * (ratio)))
prune_params_ratios[param] = ratio
prune_program = channel_prune( prune_program = channel_prune(
program, program,
list(prune_params_ratios.keys()), list(prune_params_ratios.keys()),
......
...@@ -142,13 +142,16 @@ def get_prune_params(model): ...@@ -142,13 +142,16 @@ def get_prune_params(model):
program = model.test_prog program = model.test_prog
if model_type.startswith('ResNet') or \ if model_type.startswith('ResNet') or \
model_type.startswith('DenseNet') or \ model_type.startswith('DenseNet') or \
model_type.startswith('DarkNet'): model_type.startswith('DarkNet') or \
model_type.startswith('AlexNet'):
for block in program.blocks: for block in program.blocks:
for param in block.all_parameters(): for param in block.all_parameters():
pd_var = fluid.global_scope().find_var(param.name) pd_var = fluid.global_scope().find_var(param.name)
pd_param = pd_var.get_tensor() pd_param = pd_var.get_tensor()
if len(np.array(pd_param).shape) == 4: if len(np.array(pd_param).shape) == 4:
prune_names.append(param.name) prune_names.append(param.name)
if model_type == 'AlexNet':
prune_names.remove('conv5_weights')
elif model_type == "MobileNetV1": elif model_type == "MobileNetV1":
prune_names.append("conv1_weights") prune_names.append("conv1_weights")
for param in program.global_block().all_parameters(): for param in program.global_block().all_parameters():
...@@ -162,7 +165,7 @@ def get_prune_params(model): ...@@ -162,7 +165,7 @@ def get_prune_params(model):
continue continue
prune_names.append(param.name) prune_names.append(param.name)
elif model_type.startswith("MobileNetV3"): elif model_type.startswith("MobileNetV3"):
if model_type == 'MobileNetV3_small': if model_type.startswith('MobileNetV3_small'):
expand_prune_id = [3, 4] expand_prune_id = [3, 4]
else: else:
expand_prune_id = [2, 3, 4, 8, 9, 11] expand_prune_id = [2, 3, 4, 8, 9, 11]
......
...@@ -70,6 +70,8 @@ image_pretrain = { ...@@ -70,6 +70,8 @@ image_pretrain = {
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W60_C_pretrained.tar', 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W60_C_pretrained.tar',
'HRNet_W64': 'HRNet_W64':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W64_C_pretrained.tar', 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W64_C_pretrained.tar',
'AlexNet':
'http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar'
} }
coco_pretrain = { coco_pretrain = {
...@@ -99,6 +101,8 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir): ...@@ -99,6 +101,8 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
backbone = 'DetResNet50' backbone = 'DetResNet50'
assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format( assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format(
backbone) backbone)
# if backbone == 'AlexNet':
# url = image_pretrain[backbone] # url = image_pretrain[backbone]
# fname = osp.split(url)[-1].split('.')[0] # fname = osp.split(url)[-1].split('.')[0]
# paddlex.utils.download_and_decompress(url, path=new_save_dir) # paddlex.utils.download_and_decompress(url, path=new_save_dir)
......
...@@ -128,8 +128,16 @@ class YOLOv3(BaseAPI): ...@@ -128,8 +128,16 @@ class YOLOv3(BaseAPI):
lr_decay_epochs, lr_decay_gamma, lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch): num_steps_each_epoch):
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
raise Exception("warmup_steps should less than {}".format( logging.error(
lr_decay_epochs[0] * num_steps_each_epoch)) "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
// num_steps_each_epoch))
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [(lr_decay_gamma**i) * learning_rate values = [(lr_decay_gamma**i) * learning_rate
for i in range(len(lr_decay_epochs) + 1)] for i in range(len(lr_decay_epochs) + 1)]
...@@ -277,8 +285,7 @@ class YOLOv3(BaseAPI): ...@@ -277,8 +285,7 @@ class YOLOv3(BaseAPI):
eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、 eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
""" """
self.arrange_transforms( self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
transforms=eval_dataset.transforms, mode='eval')
if metric is None: if metric is None:
if hasattr(self, 'metric') and self.metric is not None: if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric metric = self.metric
...@@ -298,9 +305,8 @@ class YOLOv3(BaseAPI): ...@@ -298,9 +305,8 @@ class YOLOv3(BaseAPI):
data_generator = eval_dataset.generator( data_generator = eval_dataset.generator(
batch_size=batch_size, drop_last=False) batch_size=batch_size, drop_last=False)
logging.info( logging.info("Start to evaluating(total_samples={}, total_steps={})...".
"Start to evaluating(total_samples={}, total_steps={})...".format( format(eval_dataset.num_samples, total_steps))
eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm( for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps): enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data]) images = np.array([d[0] for d in data])
...@@ -363,7 +369,8 @@ class YOLOv3(BaseAPI): ...@@ -363,7 +369,8 @@ class YOLOv3(BaseAPI):
feed={'image': im, feed={'image': im,
'im_size': im_size}, 'im_size': im_size},
fetch_list=list(self.test_outputs.values()), fetch_list=list(self.test_outputs.values()),
return_numpy=False) return_numpy=False,
use_program_cache=True)
res = { res = {
k: (np.array(v), v.recursive_sequence_lengths()) k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs) for k, v in zip(list(self.test_outputs.keys()), outputs)
......
...@@ -24,6 +24,7 @@ from .xception import Xception ...@@ -24,6 +24,7 @@ from .xception import Xception
from .densenet import DenseNet from .densenet import DenseNet
from .shufflenet_v2 import ShuffleNetV2 from .shufflenet_v2 import ShuffleNetV2
from .hrnet import HRNet from .hrnet import HRNet
from .alexnet import AlexNet
def resnet18(input, num_classes=1000): def resnet18(input, num_classes=1000):
...@@ -153,3 +154,8 @@ def shufflenetv2(input, num_classes=1000): ...@@ -153,3 +154,8 @@ def shufflenetv2(input, num_classes=1000):
def hrnet_w18(input, num_classes=1000): def hrnet_w18(input, num_classes=1000):
model = HRNet(width=18, num_classes=num_classes) model = HRNet(width=18, num_classes=num_classes)
return model(input) return model(input)
def alexnet(input, num_classes=1000):
model = AlexNet(num_classes=num_classes)
return model(input)
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
class AlexNet():
def __init__(self, num_classes=1000):
assert num_classes is not None, "In AlextNet, num_classes cannot be None"
self.num_classes = num_classes
def __call__(self, input):
stdv = 1.0 / math.sqrt(input.shape[1] * 11 * 11)
layer_name = [
"conv1", "conv2", "conv3", "conv4", "conv5", "fc6", "fc7", "fc8"
]
conv1 = fluid.layers.conv2d(
input=input,
num_filters=64,
filter_size=11,
stride=4,
padding=2,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[0] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[0] + "_weights"))
pool1 = fluid.layers.pool2d(
input=conv1,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
stdv = 1.0 / math.sqrt(pool1.shape[1] * 5 * 5)
conv2 = fluid.layers.conv2d(
input=pool1,
num_filters=192,
filter_size=5,
stride=1,
padding=2,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[1] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[1] + "_weights"))
pool2 = fluid.layers.pool2d(
input=conv2,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
stdv = 1.0 / math.sqrt(pool2.shape[1] * 3 * 3)
conv3 = fluid.layers.conv2d(
input=pool2,
num_filters=384,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[2] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[2] + "_weights"))
stdv = 1.0 / math.sqrt(conv3.shape[1] * 3 * 3)
conv4 = fluid.layers.conv2d(
input=conv3,
num_filters=256,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[3] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[3] + "_weights"))
stdv = 1.0 / math.sqrt(conv4.shape[1] * 3 * 3)
conv5 = fluid.layers.conv2d(
input=conv4,
num_filters=256,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[4] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[4] + "_weights"))
pool5 = fluid.layers.pool2d(
input=conv5,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
drop6 = fluid.layers.dropout(x=pool5, dropout_prob=0.5)
stdv = 1.0 / math.sqrt(drop6.shape[1] * drop6.shape[2] *
drop6.shape[3] * 1.0)
fc6 = fluid.layers.fc(
input=drop6,
size=4096,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[5] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[5] + "_weights"))
drop7 = fluid.layers.dropout(x=fc6, dropout_prob=0.5)
stdv = 1.0 / math.sqrt(drop7.shape[1] * 1.0)
fc7 = fluid.layers.fc(
input=drop7,
size=4096,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[6] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[6] + "_weights"))
stdv = 1.0 / math.sqrt(fc7.shape[1] * 1.0)
out = fluid.layers.fc(
input=fc7,
size=self.num_classes,
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[7] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[7] + "_weights"))
return out
...@@ -71,7 +71,7 @@ class HRNet(object): ...@@ -71,7 +71,7 @@ class HRNet(object):
self.end_points = [] self.end_points = []
return return
def net(self, input, class_dim=1000): def net(self, input):
width = self.width width = self.width
channels_2, channels_3, channels_4 = self.channels[width] channels_2, channels_3, channels_4 = self.channels[width]
num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3 num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
...@@ -125,7 +125,7 @@ class HRNet(object): ...@@ -125,7 +125,7 @@ class HRNet(object):
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc( out = fluid.layers.fc(
input=pool, input=pool,
size=class_dim, size=self.num_classes,
param_attr=ParamAttr( param_attr=ParamAttr(
name='fc_weights', name='fc_weights',
initializer=fluid.initializer.Uniform(-stdv, stdv)), initializer=fluid.initializer.Uniform(-stdv, stdv)),
......
...@@ -18,6 +18,7 @@ import random ...@@ -18,6 +18,7 @@ import random
import os.path as osp import os.path as osp
import numpy as np import numpy as np
from PIL import Image, ImageEnhance from PIL import Image, ImageEnhance
import paddlex.utils.logging as logging
class ClsTransform: class ClsTransform:
...@@ -96,7 +97,11 @@ class Compose(ClsTransform): ...@@ -96,7 +97,11 @@ class Compose(ClsTransform):
if not isinstance(augmenters, list): if not isinstance(augmenters, list):
raise Exception( raise Exception(
"augmenters should be list type in func add_augmenters()") "augmenters should be list type in func add_augmenters()")
self.transforms = augmenters + self.transforms.transforms transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class RandomCrop(ClsTransform): class RandomCrop(ClsTransform):
......
...@@ -27,6 +27,7 @@ from PIL import Image, ImageEnhance ...@@ -27,6 +27,7 @@ from PIL import Image, ImageEnhance
from .imgaug_support import execute_imgaug from .imgaug_support import execute_imgaug
from .ops import * from .ops import *
from .box_utils import * from .box_utils import *
import paddlex.utils.logging as logging
class DetTransform: class DetTransform:
...@@ -156,7 +157,11 @@ class Compose(DetTransform): ...@@ -156,7 +157,11 @@ class Compose(DetTransform):
if not isinstance(augmenters, list): if not isinstance(augmenters, list):
raise Exception( raise Exception(
"augmenters should be list type in func add_augmenters()") "augmenters should be list type in func add_augmenters()")
self.transforms = augmenters + self.transforms.transforms transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class ResizeByShort(DetTransform): class ResizeByShort(DetTransform):
...@@ -1303,7 +1308,7 @@ class ComposedRCNNTransforms(Compose): ...@@ -1303,7 +1308,7 @@ class ComposedRCNNTransforms(Compose):
super(ComposedRCNNTransforms, self).__init__(transforms) super(ComposedRCNNTransforms, self).__init__(transforms)
class ComposedYOLOTransforms(Compose): class ComposedYOLOv3Transforms(Compose):
"""YOLOv3模型的图像预处理流程,具体如下, """YOLOv3模型的图像预处理流程,具体如下,
训练阶段: 训练阶段:
1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage 1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage
...@@ -1358,4 +1363,4 @@ class ComposedYOLOTransforms(Compose): ...@@ -1358,4 +1363,4 @@ class ComposedYOLOTransforms(Compose):
target_size=width, interp='CUBIC'), Normalize( target_size=width, interp='CUBIC'), Normalize(
mean=mean, std=std) mean=mean, std=std)
] ]
super(ComposedYOLOTransforms, self).__init__(transforms) super(ComposedYOLOv3Transforms, self).__init__(transforms)
...@@ -21,6 +21,7 @@ import numpy as np ...@@ -21,6 +21,7 @@ import numpy as np
from PIL import Image from PIL import Image
import cv2 import cv2
from collections import OrderedDict from collections import OrderedDict
import paddlex.utils.logging as logging
class SegTransform: class SegTransform:
...@@ -112,7 +113,11 @@ class Compose(SegTransform): ...@@ -112,7 +113,11 @@ class Compose(SegTransform):
if not isinstance(augmenters, list): if not isinstance(augmenters, list):
raise Exception( raise Exception(
"augmenters should be list type in func add_augmenters()") "augmenters should be list type in func add_augmenters()")
self.transforms = augmenters + self.transforms.transforms transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
self.transforms = augmenters + self.transforms
class RandomHorizontalFlip(SegTransform): class RandomHorizontalFlip(SegTransform):
...@@ -1127,6 +1132,6 @@ class ComposedSegTransforms(Compose): ...@@ -1127,6 +1132,6 @@ class ComposedSegTransforms(Compose):
] ]
else: else:
# 验证/预测时的transforms # 验证/预测时的transforms
transforms = [Resize(512), Normalize(mean=mean, std=std)] transforms = [Normalize(mean=mean, std=std)]
super(ComposedSegTransforms, self).__init__(transforms) super(ComposedSegTransforms, self).__init__(transforms)
...@@ -20,6 +20,7 @@ import numpy as np ...@@ -20,6 +20,7 @@ import numpy as np
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddlex.interpret.as_data_reader.readers import preprocess_image from paddlex.interpret.as_data_reader.readers import preprocess_image
def gen_user_home(): def gen_user_home():
if "HOME" in os.environ: if "HOME" in os.environ:
home_path = os.environ["HOME"] home_path = os.environ["HOME"]
...@@ -34,10 +35,20 @@ def paddle_get_fc_weights(var_name="fc_0.w_0"): ...@@ -34,10 +35,20 @@ def paddle_get_fc_weights(var_name="fc_0.w_0"):
def paddle_resize(extracted_features, outsize): def paddle_resize(extracted_features, outsize):
resized_features = fluid.layers.resize_bilinear(extracted_features, outsize) resized_features = fluid.layers.resize_bilinear(extracted_features,
outsize)
return resized_features return resized_features
def get_precomputed_normlime_weights():
root_path = gen_user_home()
root_path = osp.join(root_path, '.paddlex')
h_pre_models = osp.join(root_path, "pre_models")
normlime_weights_file = osp.join(
h_pre_models, "normlime_weights_imagenet_resnet50vc.npy")
return np.load(normlime_weights_file, allow_pickle=True).item()
def compute_features_for_kmeans(data_content): def compute_features_for_kmeans(data_content):
root_path = gen_user_home() root_path = gen_user_home()
root_path = osp.join(root_path, '.paddlex') root_path = osp.join(root_path, '.paddlex')
...@@ -47,6 +58,7 @@ def compute_features_for_kmeans(data_content): ...@@ -47,6 +58,7 @@ def compute_features_for_kmeans(data_content):
os.makedirs(root_path) os.makedirs(root_path)
url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
pdx.utils.download_and_decompress(url, path=root_path) pdx.utils.download_and_decompress(url, path=root_path)
def conv_bn_layer(input, def conv_bn_layer(input,
num_filters, num_filters,
filter_size, filter_size,
...@@ -55,7 +67,7 @@ def compute_features_for_kmeans(data_content): ...@@ -55,7 +67,7 @@ def compute_features_for_kmeans(data_content):
act=None, act=None,
name=None, name=None,
is_test=True, is_test=True,
global_name=''): global_name='for_kmeans_'):
conv = fluid.layers.conv2d( conv = fluid.layers.conv2d(
input=input, input=input,
num_filters=num_filters, num_filters=num_filters,
...@@ -79,14 +91,14 @@ def compute_features_for_kmeans(data_content): ...@@ -79,14 +91,14 @@ def compute_features_for_kmeans(data_content):
bias_attr=ParamAttr(global_name + bn_name + '_offset'), bias_attr=ParamAttr(global_name + bn_name + '_offset'),
moving_mean_name=global_name + bn_name + '_mean', moving_mean_name=global_name + bn_name + '_mean',
moving_variance_name=global_name + bn_name + '_variance', moving_variance_name=global_name + bn_name + '_variance',
use_global_stats=is_test use_global_stats=is_test)
)
startup_prog = fluid.default_startup_program().clone(for_test=True) startup_prog = fluid.default_startup_program().clone(for_test=True)
prog = fluid.Program() prog = fluid.Program()
with fluid.program_guard(prog, startup_prog): with fluid.program_guard(prog, startup_prog):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
image_op = fluid.data(name='image', shape=[None, 3, 224, 224], dtype='float32') image_op = fluid.data(
name='image', shape=[None, 3, 224, 224], dtype='float32')
conv = conv_bn_layer( conv = conv_bn_layer(
input=image_op, input=image_op,
...@@ -110,7 +122,8 @@ def compute_features_for_kmeans(data_content): ...@@ -110,7 +122,8 @@ def compute_features_for_kmeans(data_content):
act='relu', act='relu',
name='conv1_3') name='conv1_3')
extracted_features = conv extracted_features = conv
resized_features = fluid.layers.resize_bilinear(extracted_features, image_op.shape[2:]) resized_features = fluid.layers.resize_bilinear(extracted_features,
image_op.shape[2:])
gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
place = fluid.CUDAPlace(gpu_id) place = fluid.CUDAPlace(gpu_id)
...@@ -119,7 +132,10 @@ def compute_features_for_kmeans(data_content): ...@@ -119,7 +132,10 @@ def compute_features_for_kmeans(data_content):
exe.run(startup_prog) exe.run(startup_prog)
fluid.io.load_persistables(exe, h_pre_models, prog) fluid.io.load_persistables(exe, h_pre_models, prog)
images = preprocess_image(data_content) # transpose to [N, 3, H, W], scaled to [0.0, 1.0] images = preprocess_image(
result = exe.run(prog, fetch_list=[resized_features], feed={'image': images}) data_content) # transpose to [N, 3, H, W], scaled to [0.0, 1.0]
result = exe.run(prog,
fetch_list=[resized_features],
feed={'image': images})
return result[0][0] return result[0][0]
...@@ -20,12 +20,10 @@ class Interpretation(object): ...@@ -20,12 +20,10 @@ class Interpretation(object):
""" """
Base class for all interpretation algorithms. Base class for all interpretation algorithms.
""" """
def __init__(self, interpretation_algorithm_name, predict_fn, label_names, **kwargs):
supported_algorithms = { def __init__(self, interpretation_algorithm_name, predict_fn, label_names,
'cam': CAM, **kwargs):
'lime': LIME, supported_algorithms = {'cam': CAM, 'lime': LIME, 'normlime': NormLIME}
'normlime': NormLIME
}
self.algorithm_name = interpretation_algorithm_name.lower() self.algorithm_name = interpretation_algorithm_name.lower()
assert self.algorithm_name in supported_algorithms.keys() assert self.algorithm_name in supported_algorithms.keys()
...@@ -33,19 +31,17 @@ class Interpretation(object): ...@@ -33,19 +31,17 @@ class Interpretation(object):
# initialization for the interpretation algorithm. # initialization for the interpretation algorithm.
self.algorithm = supported_algorithms[self.algorithm_name]( self.algorithm = supported_algorithms[self.algorithm_name](
self.predict_fn, label_names, **kwargs self.predict_fn, label_names, **kwargs)
)
def interpret(self, data_, visualization=True, save_to_disk=True, save_dir='./tmp'): def interpret(self, data_, visualization=True, save_dir='./'):
""" """
Args: Args:
data_: data_ can be a path or numpy.ndarray. data_: data_ can be a path or numpy.ndarray.
visualization: whether to show using matplotlib. visualization: whether to show using matplotlib.
save_to_disk: whether to save the figure in local disk.
save_dir: dir to save figure if save_to_disk is True. save_dir: dir to save figure if save_to_disk is True.
Returns: Returns:
""" """
return self.algorithm.interpret(data_, visualization, save_to_disk, save_dir) return self.algorithm.interpret(data_, visualization, save_dir)
...@@ -27,7 +27,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ...@@ -27,7 +27,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The code in this file (lime_base.py) is modified from https://github.com/marcotcr/lime. The code in this file (lime_base.py) is modified from https://github.com/marcotcr/lime.
""" """
import numpy as np import numpy as np
import scipy as sp import scipy as sp
...@@ -39,10 +38,8 @@ import paddlex.utils.logging as logging ...@@ -39,10 +38,8 @@ import paddlex.utils.logging as logging
class LimeBase(object): class LimeBase(object):
"""Class for learning a locally linear sparse model from perturbed data""" """Class for learning a locally linear sparse model from perturbed data"""
def __init__(self,
kernel_fn, def __init__(self, kernel_fn, verbose=False, random_state=None):
verbose=False,
random_state=None):
"""Init function """Init function
Args: Args:
...@@ -72,15 +69,14 @@ class LimeBase(object): ...@@ -72,15 +69,14 @@ class LimeBase(object):
""" """
from sklearn.linear_model import lars_path from sklearn.linear_model import lars_path
x_vector = weighted_data x_vector = weighted_data
alphas, _, coefs = lars_path(x_vector, alphas, _, coefs = lars_path(
weighted_labels, x_vector, weighted_labels, method='lasso', verbose=False)
method='lasso',
verbose=False)
return alphas, coefs return alphas, coefs
def forward_selection(self, data, labels, weights, num_features): def forward_selection(self, data, labels, weights, num_features):
"""Iteratively adds features to the model""" """Iteratively adds features to the model"""
clf = Ridge(alpha=0, fit_intercept=True, random_state=self.random_state) clf = Ridge(
alpha=0, fit_intercept=True, random_state=self.random_state)
used_features = [] used_features = []
for _ in range(min(num_features, data.shape[1])): for _ in range(min(num_features, data.shape[1])):
max_ = -100000000 max_ = -100000000
...@@ -88,9 +84,11 @@ class LimeBase(object): ...@@ -88,9 +84,11 @@ class LimeBase(object):
for feature in range(data.shape[1]): for feature in range(data.shape[1]):
if feature in used_features: if feature in used_features:
continue continue
clf.fit(data[:, used_features + [feature]], labels, clf.fit(data[:, used_features + [feature]],
labels,
sample_weight=weights) sample_weight=weights)
score = clf.score(data[:, used_features + [feature]], score = clf.score(
data[:, used_features + [feature]],
labels, labels,
sample_weight=weights) sample_weight=weights)
if score > max_: if score > max_:
...@@ -108,8 +106,8 @@ class LimeBase(object): ...@@ -108,8 +106,8 @@ class LimeBase(object):
elif method == 'forward_selection': elif method == 'forward_selection':
return self.forward_selection(data, labels, weights, num_features) return self.forward_selection(data, labels, weights, num_features)
elif method == 'highest_weights': elif method == 'highest_weights':
clf = Ridge(alpha=0.01, fit_intercept=True, clf = Ridge(
random_state=self.random_state) alpha=0.01, fit_intercept=True, random_state=self.random_state)
clf.fit(data, labels, sample_weight=weights) clf.fit(data, labels, sample_weight=weights)
coef = clf.coef_ coef = clf.coef_
...@@ -125,7 +123,8 @@ class LimeBase(object): ...@@ -125,7 +123,8 @@ class LimeBase(object):
nnz_indexes = argsort_data[::-1] nnz_indexes = argsort_data[::-1]
indices = weighted_data.indices[nnz_indexes] indices = weighted_data.indices[nnz_indexes]
num_to_pad = num_features - sdata num_to_pad = num_features - sdata
indices = np.concatenate((indices, np.zeros(num_to_pad, dtype=indices.dtype))) indices = np.concatenate((indices, np.zeros(
num_to_pad, dtype=indices.dtype)))
indices_set = set(indices) indices_set = set(indices)
pad_counter = 0 pad_counter = 0
for i in range(data.shape[1]): for i in range(data.shape[1]):
...@@ -135,7 +134,8 @@ class LimeBase(object): ...@@ -135,7 +134,8 @@ class LimeBase(object):
if pad_counter >= num_to_pad: if pad_counter >= num_to_pad:
break break
else: else:
nnz_indexes = argsort_data[sdata - num_features:sdata][::-1] nnz_indexes = argsort_data[sdata - num_features:sdata][::
-1]
indices = weighted_data.indices[nnz_indexes] indices = weighted_data.indices[nnz_indexes]
return indices return indices
else: else:
...@@ -146,13 +146,13 @@ class LimeBase(object): ...@@ -146,13 +146,13 @@ class LimeBase(object):
reverse=True) reverse=True)
return np.array([x[0] for x in feature_weights[:num_features]]) return np.array([x[0] for x in feature_weights[:num_features]])
elif method == 'lasso_path': elif method == 'lasso_path':
weighted_data = ((data - np.average(data, axis=0, weights=weights)) weighted_data = ((data - np.average(
* np.sqrt(weights[:, np.newaxis])) data, axis=0, weights=weights)) *
weighted_labels = ((labels - np.average(labels, weights=weights)) np.sqrt(weights[:, np.newaxis]))
* np.sqrt(weights)) weighted_labels = ((labels - np.average(
labels, weights=weights)) * np.sqrt(weights))
nonzero = range(weighted_data.shape[1]) nonzero = range(weighted_data.shape[1])
_, coefs = self.generate_lars_path(weighted_data, _, coefs = self.generate_lars_path(weighted_data, weighted_labels)
weighted_labels)
for i in range(len(coefs.T) - 1, 0, -1): for i in range(len(coefs.T) - 1, 0, -1):
nonzero = coefs.T[i].nonzero()[0] nonzero = coefs.T[i].nonzero()[0]
if len(nonzero) <= num_features: if len(nonzero) <= num_features:
...@@ -164,8 +164,8 @@ class LimeBase(object): ...@@ -164,8 +164,8 @@ class LimeBase(object):
n_method = 'forward_selection' n_method = 'forward_selection'
else: else:
n_method = 'highest_weights' n_method = 'highest_weights'
return self.feature_selection(data, labels, weights, return self.feature_selection(data, labels, weights, num_features,
num_features, n_method) n_method)
def interpret_instance_with_data(self, def interpret_instance_with_data(self,
neighborhood_data, neighborhood_data,
...@@ -214,30 +214,31 @@ class LimeBase(object): ...@@ -214,30 +214,31 @@ class LimeBase(object):
weights = self.kernel_fn(distances) weights = self.kernel_fn(distances)
labels_column = neighborhood_labels[:, label] labels_column = neighborhood_labels[:, label]
used_features = self.feature_selection(neighborhood_data, used_features = self.feature_selection(neighborhood_data,
labels_column, labels_column, weights,
weights, num_features, feature_selection)
num_features,
feature_selection)
if model_regressor is None: if model_regressor is None:
model_regressor = Ridge(alpha=1, fit_intercept=True, model_regressor = Ridge(
random_state=self.random_state) alpha=1, fit_intercept=True, random_state=self.random_state)
easy_model = model_regressor easy_model = model_regressor
easy_model.fit(neighborhood_data[:, used_features], easy_model.fit(neighborhood_data[:, used_features],
labels_column, sample_weight=weights) labels_column,
sample_weight=weights)
prediction_score = easy_model.score( prediction_score = easy_model.score(
neighborhood_data[:, used_features], neighborhood_data[:, used_features],
labels_column, sample_weight=weights) labels_column,
sample_weight=weights)
local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1)) local_pred = easy_model.predict(neighborhood_data[0, used_features]
.reshape(1, -1))
if self.verbose: if self.verbose:
logging.info('Intercept' + str(easy_model.intercept_)) logging.info('Intercept' + str(easy_model.intercept_))
logging.info('Prediction_local' + str(local_pred)) logging.info('Prediction_local' + str(local_pred))
logging.info('Right:' + str(neighborhood_labels[0, label])) logging.info('Right:' + str(neighborhood_labels[0, label]))
return (easy_model.intercept_, return (easy_model.intercept_, sorted(
sorted(zip(used_features, easy_model.coef_), zip(used_features, easy_model.coef_),
key=lambda x: np.abs(x[1]), reverse=True), key=lambda x: np.abs(x[1]),
prediction_score, local_pred) reverse=True), prediction_score, local_pred)
class ImageInterpretation(object): class ImageInterpretation(object):
...@@ -254,8 +255,13 @@ class ImageInterpretation(object): ...@@ -254,8 +255,13 @@ class ImageInterpretation(object):
self.local_weights = {} self.local_weights = {}
self.local_pred = None self.local_pred = None
def get_image_and_mask(self, label, positive_only=True, negative_only=False, hide_rest=False, def get_image_and_mask(self,
num_features=5, min_weight=0.): label,
positive_only=True,
negative_only=False,
hide_rest=False,
num_features=5,
min_weight=0.):
"""Init function. """Init function.
Args: Args:
...@@ -279,7 +285,9 @@ class ImageInterpretation(object): ...@@ -279,7 +285,9 @@ class ImageInterpretation(object):
if label not in self.local_weights: if label not in self.local_weights:
raise KeyError('Label not in interpretation') raise KeyError('Label not in interpretation')
if positive_only & negative_only: if positive_only & negative_only:
raise ValueError("Positive_only and negative_only cannot be true at the same time.") raise ValueError(
"Positive_only and negative_only cannot be true at the same time."
)
segments = self.segments segments = self.segments
image = self.image image = self.image
local_weights_label = self.local_weights[label] local_weights_label = self.local_weights[label]
...@@ -289,14 +297,20 @@ class ImageInterpretation(object): ...@@ -289,14 +297,20 @@ class ImageInterpretation(object):
else: else:
temp = self.image.copy() temp = self.image.copy()
if positive_only: if positive_only:
fs = [x[0] for x in local_weights_label fs = [
if x[1] > 0 and x[1] > min_weight][:num_features] x[0] for x in local_weights_label
if x[1] > 0 and x[1] > min_weight
][:num_features]
if negative_only: if negative_only:
fs = [x[0] for x in local_weights_label fs = [
if x[1] < 0 and abs(x[1]) > min_weight][:num_features] x[0] for x in local_weights_label
if x[1] < 0 and abs(x[1]) > min_weight
][:num_features]
if positive_only or negative_only: if positive_only or negative_only:
c = 1 if positive_only else 0
for f in fs: for f in fs:
temp[segments == f] = image[segments == f].copy() temp[segments == f] = [0, 255, 0]
# temp[segments == f, c] = np.max(image)
mask[segments == f] = 1 mask[segments == f] = 1
return temp, mask return temp, mask
else: else:
...@@ -330,8 +344,11 @@ class ImageInterpretation(object): ...@@ -330,8 +344,11 @@ class ImageInterpretation(object):
temp = np.zeros_like(image) temp = np.zeros_like(image)
weight_max = abs(local_weights_label[0][1]) weight_max = abs(local_weights_label[0][1])
local_weights_label = [(f, w/weight_max) for f, w in local_weights_label] local_weights_label = [(f, w / weight_max)
local_weights_label = sorted(local_weights_label, key=lambda x: x[1], reverse=True) # negatives are at last. for f, w in local_weights_label]
local_weights_label = sorted(
local_weights_label, key=lambda x: x[1],
reverse=True) # negatives are at last.
cmaps = cm.get_cmap('Spectral') cmaps = cm.get_cmap('Spectral')
colors = cmaps(np.linspace(0, 1, len(local_weights_label))) colors = cmaps(np.linspace(0, 1, len(local_weights_label)))
...@@ -354,8 +371,12 @@ class LimeImageInterpreter(object): ...@@ -354,8 +371,12 @@ class LimeImageInterpreter(object):
feature that is 1 when the value is the same as the instance being feature that is 1 when the value is the same as the instance being
interpreted.""" interpreted."""
def __init__(self, kernel_width=.25, kernel=None, verbose=False, def __init__(self,
feature_selection='auto', random_state=None): kernel_width=.25,
kernel=None,
verbose=False,
feature_selection='auto',
random_state=None):
"""Init function. """Init function.
Args: Args:
...@@ -377,22 +398,27 @@ class LimeImageInterpreter(object): ...@@ -377,22 +398,27 @@ class LimeImageInterpreter(object):
kernel_width = float(kernel_width) kernel_width = float(kernel_width)
if kernel is None: if kernel is None:
def kernel(d, kernel_width): def kernel(d, kernel_width):
return np.sqrt(np.exp(-(d ** 2) / kernel_width ** 2)) return np.sqrt(np.exp(-(d**2) / kernel_width**2))
kernel_fn = partial(kernel, kernel_width=kernel_width) kernel_fn = partial(kernel, kernel_width=kernel_width)
self.random_state = check_random_state(random_state) self.random_state = check_random_state(random_state)
self.feature_selection = feature_selection self.feature_selection = feature_selection
self.base = LimeBase(kernel_fn, verbose, random_state=self.random_state) self.base = LimeBase(
kernel_fn, verbose, random_state=self.random_state)
def interpret_instance(self, image, classifier_fn, labels=(1,), def interpret_instance(self,
image,
classifier_fn,
labels=(1, ),
hide_color=None, hide_color=None,
num_features=100000, num_samples=1000, num_features=100000,
num_samples=1000,
batch_size=10, batch_size=10,
distance_metric='cosine', distance_metric='cosine',
model_regressor=None model_regressor=None):
):
"""Generates interpretations for a prediction. """Generates interpretations for a prediction.
First, we generate neighborhood data by randomly perturbing features First, we generate neighborhood data by randomly perturbing features
...@@ -435,6 +461,7 @@ class LimeImageInterpreter(object): ...@@ -435,6 +461,7 @@ class LimeImageInterpreter(object):
self.segments = segments self.segments = segments
fudged_image = image.copy() fudged_image = image.copy()
# global_mean = np.mean(image, (0, 1))
if hide_color is None: if hide_color is None:
# if no hide_color, use the mean # if no hide_color, use the mean
for x in np.unique(segments): for x in np.unique(segments):
...@@ -461,22 +488,28 @@ class LimeImageInterpreter(object): ...@@ -461,22 +488,28 @@ class LimeImageInterpreter(object):
top = labels top = labels
data, labels = self.data_labels(image, fudged_image, segments, data, labels = self.data_labels(
classifier_fn, num_samples, image,
fudged_image,
segments,
classifier_fn,
num_samples,
batch_size=batch_size) batch_size=batch_size)
distances = sklearn.metrics.pairwise_distances( distances = sklearn.metrics.pairwise_distances(
data, data, data[0].reshape(1, -1), metric=distance_metric).ravel()
data[0].reshape(1, -1),
metric=distance_metric
).ravel()
interpretation_image = ImageInterpretation(image, segments) interpretation_image = ImageInterpretation(image, segments)
for label in top: for label in top:
(interpretation_image.intercept[label], (interpretation_image.intercept[label],
interpretation_image.local_weights[label], interpretation_image.local_weights[label],
interpretation_image.score, interpretation_image.local_pred) = self.base.interpret_instance_with_data( interpretation_image.score, interpretation_image.local_pred
data, labels, distances, label, num_features, ) = self.base.interpret_instance_with_data(
data,
labels,
distances,
label,
num_features,
model_regressor=model_regressor, model_regressor=model_regressor,
feature_selection=self.feature_selection) feature_selection=self.feature_selection)
return interpretation_image return interpretation_image
...@@ -511,6 +544,9 @@ class LimeImageInterpreter(object): ...@@ -511,6 +544,9 @@ class LimeImageInterpreter(object):
labels = [] labels = []
data[0, :] = 1 data[0, :] = 1
imgs = [] imgs = []
logging.info("Computing LIME.", use_color=True)
for row in tqdm.tqdm(data): for row in tqdm.tqdm(data):
temp = copy.deepcopy(image) temp = copy.deepcopy(image)
zeros = np.where(row == 0)[0] zeros = np.where(row == 0)[0]
......
...@@ -16,6 +16,7 @@ import os ...@@ -16,6 +16,7 @@ import os
import os.path as osp import os.path as osp
import numpy as np import numpy as np
import glob import glob
import tqdm
from paddlex.interpret.as_data_reader.readers import read_image from paddlex.interpret.as_data_reader.readers import read_image
import paddlex.utils.logging as logging import paddlex.utils.logging as logging
...@@ -38,18 +39,24 @@ def combine_normlime_and_lime(lime_weights, g_weights): ...@@ -38,18 +39,24 @@ def combine_normlime_and_lime(lime_weights, g_weights):
for y in pred_labels: for y in pred_labels:
normlized_lime_weights_y = lime_weights[y] normlized_lime_weights_y = lime_weights[y]
lime_weights_dict = {tuple_w[0]: tuple_w[1] for tuple_w in normlized_lime_weights_y} lime_weights_dict = {
tuple_w[0]: tuple_w[1]
for tuple_w in normlized_lime_weights_y
}
normlized_g_weight_y = g_weights[y] normlized_g_weight_y = g_weights[y]
normlime_weights_dict = {tuple_w[0]: tuple_w[1] for tuple_w in normlized_g_weight_y} normlime_weights_dict = {
tuple_w[0]: tuple_w[1]
for tuple_w in normlized_g_weight_y
}
combined_weights[y] = [ combined_weights[y] = [
(seg_k, lime_weights_dict[seg_k] * normlime_weights_dict[seg_k]) (seg_k, lime_weights_dict[seg_k] * normlime_weights_dict[seg_k])
for seg_k in lime_weights_dict.keys() for seg_k in lime_weights_dict.keys()
] ]
combined_weights[y] = sorted(combined_weights[y], combined_weights[y] = sorted(
key=lambda x: np.abs(x[1]), reverse=True) combined_weights[y], key=lambda x: np.abs(x[1]), reverse=True)
return combined_weights return combined_weights
...@@ -67,7 +74,8 @@ def centroid_using_superpixels(features, segments): ...@@ -67,7 +74,8 @@ def centroid_using_superpixels(features, segments):
regions = regionprops(segments + 1) regions = regionprops(segments + 1)
one_list = np.zeros((len(np.unique(segments)), features.shape[2])) one_list = np.zeros((len(np.unique(segments)), features.shape[2]))
for i, r in enumerate(regions): for i, r in enumerate(regions):
one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] + 0.5), :] one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] +
0.5), :]
return one_list return one_list
...@@ -80,30 +88,39 @@ def get_feature_for_kmeans(feature_map, segments): ...@@ -80,30 +88,39 @@ def get_feature_for_kmeans(feature_map, segments):
return x return x
def precompute_normlime_weights(list_data_, predict_fn, num_samples=3000, batch_size=50, save_dir='./tmp'): def precompute_normlime_weights(list_data_,
predict_fn,
num_samples=3000,
batch_size=50,
save_dir='./tmp'):
# save lime weights and kmeans cluster labels # save lime weights and kmeans cluster labels
precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir) precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size,
save_dir)
# load precomputed results, compute normlime weights and save. # load precomputed results, compute normlime weights and save.
fname_list = glob.glob(os.path.join(save_dir, 'lime_weights_s{}*.npy'.format(num_samples))) fname_list = glob.glob(
os.path.join(save_dir, 'lime_weights_s{}*.npy'.format(num_samples)))
return compute_normlime_weights(fname_list, save_dir, num_samples) return compute_normlime_weights(fname_list, save_dir, num_samples)
def save_one_lime_predict_and_kmean_labels(lime_all_weights, image_pred_labels, cluster_labels, save_path): def save_one_lime_predict_and_kmean_labels(lime_all_weights, image_pred_labels,
cluster_labels, save_path):
lime_weights = {} lime_weights = {}
for label in image_pred_labels: for label in image_pred_labels:
lime_weights[label] = lime_all_weights[label] lime_weights[label] = lime_all_weights[label]
for_normlime_weights = { for_normlime_weights = {
'lime_weights': lime_weights, # a dict: class_label: (seg_label, weight) 'lime_weights':
lime_weights, # a dict: class_label: (seg_label, weight)
'cluster': cluster_labels # a list with segments as indices. 'cluster': cluster_labels # a list with segments as indices.
} }
np.save(save_path, for_normlime_weights) np.save(save_path, for_normlime_weights)
def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir): def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size,
save_dir):
root_path = gen_user_home() root_path = gen_user_home()
root_path = osp.join(root_path, '.paddlex') root_path = osp.join(root_path, '.paddlex')
h_pre_models = osp.join(root_path, "pre_models") h_pre_models = osp.join(root_path, "pre_models")
...@@ -117,17 +134,24 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav ...@@ -117,17 +134,24 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav
for data_index, each_data_ in enumerate(list_data_): for data_index, each_data_ in enumerate(list_data_):
if isinstance(each_data_, str): if isinstance(each_data_, str):
save_path = "lime_weights_s{}_{}.npy".format(num_samples, each_data_.split('/')[-1].split('.')[0]) save_path = "lime_weights_s{}_{}.npy".format(
num_samples, each_data_.split('/')[-1].split('.')[0])
save_path = os.path.join(save_dir, save_path) save_path = os.path.join(save_dir, save_path)
else: else:
save_path = "lime_weights_s{}_{}.npy".format(num_samples, data_index) save_path = "lime_weights_s{}_{}.npy".format(num_samples,
data_index)
save_path = os.path.join(save_dir, save_path) save_path = os.path.join(save_dir, save_path)
if os.path.exists(save_path): if os.path.exists(save_path):
logging.info(save_path + ' exists, not computing this one.', use_color=True) logging.info(
save_path + ' exists, not computing this one.', use_color=True)
continue continue
img_file_name = each_data_ if isinstance(each_data_, str) else data_index img_file_name = each_data_ if isinstance(each_data_,
logging.info('processing '+ img_file_name + ' [{}/{}]'.format(data_index, len(list_data_)), use_color=True) str) else data_index
logging.info(
'processing ' + img_file_name + ' [{}/{}]'.format(data_index,
len(list_data_)),
use_color=True)
image_show = read_image(each_data_) image_show = read_image(each_data_)
result = predict_fn(image_show) result = predict_fn(image_show)
...@@ -156,20 +180,25 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav ...@@ -156,20 +180,25 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav
pred_label = pred_label[:top_k] pred_label = pred_label[:top_k]
algo = lime_base.LimeImageInterpreter() algo = lime_base.LimeImageInterpreter()
interpreter = algo.interpret_instance(image_show[0], predict_fn, pred_label, 0, interpreter = algo.interpret_instance(
num_samples=num_samples, batch_size=batch_size) image_show[0],
predict_fn,
X = get_feature_for_kmeans(compute_features_for_kmeans(image_show).transpose((1, 2, 0)), interpreter.segments) pred_label,
0,
num_samples=num_samples,
batch_size=batch_size)
X = get_feature_for_kmeans(
compute_features_for_kmeans(image_show).transpose((1, 2, 0)),
interpreter.segments)
try: try:
cluster_labels = kmeans_model.predict(X) cluster_labels = kmeans_model.predict(X)
except AttributeError: except AttributeError:
from sklearn.metrics import pairwise_distances_argmin_min from sklearn.metrics import pairwise_distances_argmin_min
cluster_labels, _ = pairwise_distances_argmin_min(X, kmeans_model.cluster_centers_) cluster_labels, _ = pairwise_distances_argmin_min(
X, kmeans_model.cluster_centers_)
save_one_lime_predict_and_kmean_labels( save_one_lime_predict_and_kmean_labels(
interpreter.local_weights, pred_label, interpreter.local_weights, pred_label, cluster_labels, save_path)
cluster_labels,
save_path
)
def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples):
...@@ -181,7 +210,8 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): ...@@ -181,7 +210,8 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples):
lime_weights = lime_weights_and_cluster['lime_weights'] lime_weights = lime_weights_and_cluster['lime_weights']
cluster = lime_weights_and_cluster['cluster'] cluster = lime_weights_and_cluster['cluster']
except: except:
logging.info('When loading precomputed LIME result, skipping' + str(f)) logging.info('When loading precomputed LIME result, skipping' +
str(f))
continue continue
logging.info('Loading precomputed LIME result,' + str(f)) logging.info('Loading precomputed LIME result,' + str(f))
pred_labels = lime_weights.keys() pred_labels = lime_weights.keys()
...@@ -203,10 +233,12 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): ...@@ -203,10 +233,12 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples):
for y in normlime_weights_all_labels: for y in normlime_weights_all_labels:
normlime_weights = normlime_weights_all_labels.get(y, {}) normlime_weights = normlime_weights_all_labels.get(y, {})
for k in normlime_weights: for k in normlime_weights:
normlime_weights[k] = sum(normlime_weights[k]) / len(normlime_weights[k]) normlime_weights[k] = sum(normlime_weights[k]) / len(
normlime_weights[k])
# check normlime # check normlime
if len(normlime_weights_all_labels.keys()) < max(normlime_weights_all_labels.keys()) + 1: if len(normlime_weights_all_labels.keys()) < max(
normlime_weights_all_labels.keys()) + 1:
logging.info( logging.info(
"\n" + \ "\n" + \
"Warning: !!! \n" + \ "Warning: !!! \n" + \
...@@ -218,17 +250,166 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): ...@@ -218,17 +250,166 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples):
) )
n = 0 n = 0
f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(lime_num_samples, len(a_list_lime_fnames), n) f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(
while os.path.exists( lime_num_samples, len(a_list_lime_fnames), n)
os.path.join(save_dir, f_out) while os.path.exists(os.path.join(save_dir, f_out)):
):
n += 1 n += 1
f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(lime_num_samples, len(a_list_lime_fnames), n) f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(
lime_num_samples, len(a_list_lime_fnames), n)
continue continue
np.save( np.save(os.path.join(save_dir, f_out), normlime_weights_all_labels)
os.path.join(save_dir, f_out),
normlime_weights_all_labels
)
return os.path.join(save_dir, f_out) return os.path.join(save_dir, f_out)
def precompute_global_classifier(dataset,
predict_fn,
save_path,
batch_size=50,
max_num_samples=1000):
from sklearn.linear_model import LogisticRegression
root_path = gen_user_home()
root_path = osp.join(root_path, '.paddlex')
h_pre_models = osp.join(root_path, "pre_models")
if not osp.exists(h_pre_models):
if not osp.exists(root_path):
os.makedirs(root_path)
url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
pdx.utils.download_and_decompress(url, path=root_path)
h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl")
kmeans_model = load_kmeans_model(h_pre_models_kmeans)
image_list = []
for item in dataset.file_list:
image_list.append(item[0])
x_data = []
y_labels = []
num_features = len(kmeans_model.cluster_centers_)
logging.info(
"Initialization for NormLIME: Computing each sample in the test list.",
use_color=True)
for each_data_ in tqdm.tqdm(image_list):
x_data_i = np.zeros((num_features))
image_show = read_image(each_data_)
result = predict_fn(image_show)
result = result[0] # only one image here.
c = compute_features_for_kmeans(image_show).transpose((1, 2, 0))
segments = np.zeros((image_show.shape[1], image_show.shape[2]),
np.int32)
num_blocks = 10
height_per_i = segments.shape[0] // num_blocks + 1
width_per_i = segments.shape[1] // num_blocks + 1
for i in range(segments.shape[0]):
for j in range(segments.shape[1]):
segments[i,
j] = i // height_per_i * num_blocks + j // width_per_i
# segments = quickshift(image_show[0], sigma=1)
X = get_feature_for_kmeans(c, segments)
try:
cluster_labels = kmeans_model.predict(X)
except AttributeError:
from sklearn.metrics import pairwise_distances_argmin_min
cluster_labels, _ = pairwise_distances_argmin_min(
X, kmeans_model.cluster_centers_)
for c in cluster_labels:
x_data_i[c] = 1
# x_data_i /= len(cluster_labels)
pred_y_i = np.argmax(result)
y_labels.append(pred_y_i)
x_data.append(x_data_i)
if len(np.unique(y_labels)) < 2:
logging.info("Warning: The test samples in the dataset is limited.\n \
NormLIME may have no effect on the results.\n \
Try to add more test samples, or see the results of LIME.")
num_classes = np.max(np.unique(y_labels)) + 1
normlime_weights_all_labels = {}
for class_index in range(num_classes):
w = np.ones((num_features)) / num_features
normlime_weights_all_labels[class_index] = {
i: wi
for i, wi in enumerate(w)
}
logging.info("Saving the computed normlime_weights in {}".format(
save_path))
np.save(save_path, normlime_weights_all_labels)
return save_path
clf = LogisticRegression(multi_class='multinomial', max_iter=1000)
clf.fit(x_data, y_labels)
num_classes = np.max(np.unique(y_labels)) + 1
normlime_weights_all_labels = {}
if len(y_labels) / len(np.unique(y_labels)) < 3:
logging.info("Warning: The test samples in the dataset is limited.\n \
NormLIME may have no effect on the results.\n \
Try to add more test samples, or see the results of LIME.")
if len(np.unique(y_labels)) == 2:
# binary: clf.coef_ has shape of [1, num_features]
for class_index in range(num_classes):
if class_index not in clf.classes_:
w = np.ones((num_features)) / num_features
normlime_weights_all_labels[class_index] = {
i: wi
for i, wi in enumerate(w)
}
continue
if clf.classes_[0] == class_index:
w = -clf.coef_[0]
else:
w = clf.coef_[0]
# softmax
w = w - np.max(w)
exp_w = np.exp(w * 10)
w = exp_w / np.sum(exp_w)
normlime_weights_all_labels[class_index] = {
i: wi
for i, wi in enumerate(w)
}
else:
# clf.coef_ has shape of [len(np.unique(y_labels)), num_features]
for class_index in range(num_classes):
if class_index not in clf.classes_:
w = np.ones((num_features)) / num_features
normlime_weights_all_labels[class_index] = {
i: wi
for i, wi in enumerate(w)
}
continue
coef_class_index = np.where(clf.classes_ == class_index)[0][0]
w = clf.coef_[coef_class_index]
# softmax
w = w - np.max(w)
exp_w = np.exp(w * 10)
w = exp_w / np.sum(exp_w)
normlime_weights_all_labels[class_index] = {
i: wi
for i, wi in enumerate(w)
}
logging.info("Saving the computed normlime_weights in {}".format(
save_path))
np.save(save_path, normlime_weights_all_labels)
return save_path
...@@ -13,17 +13,26 @@ ...@@ -13,17 +13,26 @@
# limitations under the License. # limitations under the License.
import numpy as np import numpy as np
import cv2
import copy
def interpretation_predict(model, images): def interpretation_predict(model, images):
model.arrange_transforms( images = images.astype('float32')
transforms=model.test_transforms, mode='test') model.arrange_transforms(transforms=model.test_transforms, mode='test')
tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
model.test_transforms.transforms = model.test_transforms.transforms[-2:]
new_imgs = [] new_imgs = []
for i in range(images.shape[0]): for i in range(images.shape[0]):
img = images[i] images[i] = cv2.cvtColor(images[i], cv2.COLOR_RGB2BGR)
new_imgs.append(model.test_transforms(img)[0]) new_imgs.append(model.test_transforms(images[i])[0])
new_imgs = np.array(new_imgs) new_imgs = np.array(new_imgs)
result = model.exe.run( out = model.exe.run(model.test_prog,
model.test_prog,
feed={'image': new_imgs}, feed={'image': new_imgs},
fetch_list=list(model.interpretation_feats.values())) fetch_list=list(model.interpretation_feats.values()))
return result
\ No newline at end of file model.test_transforms.transforms = tmp_transforms
return out
...@@ -20,14 +20,11 @@ import numpy as np ...@@ -20,14 +20,11 @@ import numpy as np
import paddlex as pdx import paddlex as pdx
from .interpretation_predict import interpretation_predict from .interpretation_predict import interpretation_predict
from .core.interpretation import Interpretation from .core.interpretation import Interpretation
from .core.normlime_base import precompute_normlime_weights from .core.normlime_base import precompute_global_classifier
from .core._session_preparation import gen_user_home from .core._session_preparation import gen_user_home
def lime(img_file,
model, def lime(img_file, model, num_samples=3000, batch_size=50, save_dir='./'):
num_samples=3000,
batch_size=50,
save_dir='./'):
"""使用LIME算法将模型预测结果的可解释性可视化。 """使用LIME算法将模型预测结果的可解释性可视化。
LIME表示与模型无关的局部可解释性,可以解释任何模型。LIME的思想是以输入样本为中心, LIME表示与模型无关的局部可解释性,可以解释任何模型。LIME的思想是以输入样本为中心,
...@@ -47,20 +44,21 @@ def lime(img_file, ...@@ -47,20 +44,21 @@ def lime(img_file,
assert model.model_type == 'classifier', \ assert model.model_type == 'classifier', \
'Now the interpretation visualize only be supported in classifier!' 'Now the interpretation visualize only be supported in classifier!'
if model.status != 'Normal': if model.status != 'Normal':
raise Exception('The interpretation only can deal with the Normal model') raise Exception(
'The interpretation only can deal with the Normal model')
if not osp.exists(save_dir): if not osp.exists(save_dir):
os.makedirs(save_dir) os.makedirs(save_dir)
model.arrange_transforms( model.arrange_transforms(transforms=model.test_transforms, mode='test')
transforms=model.test_transforms, mode='test')
tmp_transforms = copy.deepcopy(model.test_transforms) tmp_transforms = copy.deepcopy(model.test_transforms)
tmp_transforms.transforms = tmp_transforms.transforms[:-2] tmp_transforms.transforms = tmp_transforms.transforms[:-2]
img = tmp_transforms(img_file)[0] img = tmp_transforms(img_file)[0]
img = np.around(img).astype('uint8') img = np.around(img).astype('uint8')
img = np.expand_dims(img, axis=0) img = np.expand_dims(img, axis=0)
interpreter = None interpreter = None
interpreter = get_lime_interpreter(img, model, num_samples=num_samples, batch_size=batch_size) interpreter = get_lime_interpreter(
img, model, num_samples=num_samples, batch_size=batch_size)
img_name = osp.splitext(osp.split(img_file)[-1])[0] img_name = osp.splitext(osp.split(img_file)[-1])[0]
interpreter.interpret(img, save_dir=save_dir) interpreter.interpret(img, save_dir=osp.join(save_dir, img_name))
def normlime(img_file, def normlime(img_file,
...@@ -68,7 +66,8 @@ def normlime(img_file, ...@@ -68,7 +66,8 @@ def normlime(img_file,
dataset=None, dataset=None,
num_samples=3000, num_samples=3000,
batch_size=50, batch_size=50,
save_dir='./'): save_dir='./',
normlime_weights_file=None):
"""使用NormLIME算法将模型预测结果的可解释性可视化。 """使用NormLIME算法将模型预测结果的可解释性可视化。
NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会提前计算一定数量的测 NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会提前计算一定数量的测
...@@ -84,15 +83,16 @@ def normlime(img_file, ...@@ -84,15 +83,16 @@ def normlime(img_file,
num_samples (int): LIME用于学习线性模型的采样数,默认为3000。 num_samples (int): LIME用于学习线性模型的采样数,默认为3000。
batch_size (int): 预测数据batch大小,默认为50。 batch_size (int): 预测数据batch大小,默认为50。
save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。 save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。
normlime_weights_file (str): NormLIME初始化文件名,若不存在,则计算一次,保存于该路径;若存在,则直接载入。
""" """
assert model.model_type == 'classifier', \ assert model.model_type == 'classifier', \
'Now the interpretation visualize only be supported in classifier!' 'Now the interpretation visualize only be supported in classifier!'
if model.status != 'Normal': if model.status != 'Normal':
raise Exception('The interpretation only can deal with the Normal model') raise Exception(
'The interpretation only can deal with the Normal model')
if not osp.exists(save_dir): if not osp.exists(save_dir):
os.makedirs(save_dir) os.makedirs(save_dir)
model.arrange_transforms( model.arrange_transforms(transforms=model.test_transforms, mode='test')
transforms=model.test_transforms, mode='test')
tmp_transforms = copy.deepcopy(model.test_transforms) tmp_transforms = copy.deepcopy(model.test_transforms)
tmp_transforms.transforms = tmp_transforms.transforms[:-2] tmp_transforms.transforms = tmp_transforms.transforms[:-2]
img = tmp_transforms(img_file)[0] img = tmp_transforms(img_file)[0]
...@@ -100,28 +100,30 @@ def normlime(img_file, ...@@ -100,28 +100,30 @@ def normlime(img_file,
img = np.expand_dims(img, axis=0) img = np.expand_dims(img, axis=0)
interpreter = None interpreter = None
if dataset is None: if dataset is None:
raise Exception('The dataset is None. Cannot implement this kind of interpretation') raise Exception(
interpreter = get_normlime_interpreter(img, model, dataset, 'The dataset is None. Cannot implement this kind of interpretation')
num_samples=num_samples, batch_size=batch_size, interpreter = get_normlime_interpreter(
save_dir=save_dir) img,
model,
dataset,
num_samples=num_samples,
batch_size=batch_size,
save_dir=save_dir,
normlime_weights_file=normlime_weights_file)
img_name = osp.splitext(osp.split(img_file)[-1])[0] img_name = osp.splitext(osp.split(img_file)[-1])[0]
interpreter.interpret(img, save_dir=save_dir) interpreter.interpret(img, save_dir=osp.join(save_dir, img_name))
def get_lime_interpreter(img, model, num_samples=3000, batch_size=50): def get_lime_interpreter(img, model, num_samples=3000, batch_size=50):
def predict_func(image): def predict_func(image):
image = image.astype('float32')
for i in range(image.shape[0]):
image[i] = cv2.cvtColor(image[i], cv2.COLOR_RGB2BGR)
tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
model.test_transforms.transforms = model.test_transforms.transforms[-2:]
out = interpretation_predict(model, image) out = interpretation_predict(model, image)
model.test_transforms.transforms = tmp_transforms
return out[0] return out[0]
labels_name = None labels_name = None
if hasattr(model, 'labels'): if hasattr(model, 'labels'):
labels_name = model.labels labels_name = model.labels
interpreter = Interpretation('lime', interpreter = Interpretation(
'lime',
predict_func, predict_func,
labels_name, labels_name,
num_samples=num_samples, num_samples=num_samples,
...@@ -129,23 +131,17 @@ def get_lime_interpreter(img, model, num_samples=3000, batch_size=50): ...@@ -129,23 +131,17 @@ def get_lime_interpreter(img, model, num_samples=3000, batch_size=50):
return interpreter return interpreter
def get_normlime_interpreter(img, model, dataset, num_samples=3000, batch_size=50, save_dir='./'): def get_normlime_interpreter(img,
def precompute_predict_func(image): model,
image = image.astype('float32') dataset,
tmp_transforms = copy.deepcopy(model.test_transforms.transforms) num_samples=3000,
model.test_transforms.transforms = model.test_transforms.transforms[-2:] batch_size=50,
out = interpretation_predict(model, image) save_dir='./',
model.test_transforms.transforms = tmp_transforms normlime_weights_file=None):
return out[0]
def predict_func(image): def predict_func(image):
image = image.astype('float32')
for i in range(image.shape[0]):
image[i] = cv2.cvtColor(image[i], cv2.COLOR_RGB2BGR)
tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
model.test_transforms.transforms = model.test_transforms.transforms[-2:]
out = interpretation_predict(model, image) out = interpretation_predict(model, image)
model.test_transforms.transforms = tmp_transforms
return out[0] return out[0]
labels_name = None labels_name = None
if dataset is not None: if dataset is not None:
labels_name = dataset.labels labels_name = dataset.labels
...@@ -157,28 +153,29 @@ def get_normlime_interpreter(img, model, dataset, num_samples=3000, batch_size=5 ...@@ -157,28 +153,29 @@ def get_normlime_interpreter(img, model, dataset, num_samples=3000, batch_size=5
os.makedirs(root_path) os.makedirs(root_path)
url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
pdx.utils.download_and_decompress(url, path=root_path) pdx.utils.download_and_decompress(url, path=root_path)
npy_dir = precompute_for_normlime(precompute_predict_func,
if osp.exists(osp.join(save_dir, normlime_weights_file)):
normlime_weights_file = osp.join(save_dir, normlime_weights_file)
try:
np.load(normlime_weights_file, allow_pickle=True).item()
except:
normlime_weights_file = precompute_global_classifier(
dataset, dataset,
num_samples=num_samples,
batch_size=batch_size,
save_dir=save_dir)
interpreter = Interpretation('normlime',
predict_func, predict_func,
labels_name, save_path=normlime_weights_file,
num_samples=num_samples, batch_size=batch_size)
batch_size=batch_size, else:
normlime_weights=npy_dir) normlime_weights_file = precompute_global_classifier(
return interpreter dataset,
predict_func,
save_path=osp.join(save_dir, normlime_weights_file),
batch_size=batch_size)
def precompute_for_normlime(predict_func, dataset, num_samples=3000, batch_size=50, save_dir='./'): interpreter = Interpretation(
image_list = [] 'normlime',
for item in dataset.file_list:
image_list.append(item[0])
return precompute_normlime_weights(
image_list,
predict_func, predict_func,
labels_name,
num_samples=num_samples, num_samples=num_samples,
batch_size=batch_size, batch_size=batch_size,
save_dir=save_dir) normlime_weights=normlime_weights_file)
return interpreter
...@@ -31,4 +31,4 @@ def export_quant_model(model, ...@@ -31,4 +31,4 @@ def export_quant_model(model,
batch_size=batch_size, batch_size=batch_size,
batch_num=batch_num, batch_num=batch_num,
save_dir=save_dir, save_dir=save_dir,
cache_dir='./temp') cache_dir=cache_dir)
...@@ -110,7 +110,7 @@ class LabelMe2COCO(X2COCO): ...@@ -110,7 +110,7 @@ class LabelMe2COCO(X2COCO):
annotation["segmentation"] = [list(np.asarray(points).flatten())] annotation["segmentation"] = [list(np.asarray(points).flatten())]
annotation["iscrowd"] = 0 annotation["iscrowd"] = 0
annotation["image_id"] = image_id + 1 annotation["image_id"] = image_id + 1
annotation["bbox"] = list(map(float, get_bbox(height, width, points))) annotation["bbox"] = list(map(float, self.get_bbox(height, width, points)))
annotation["area"] = annotation["bbox"][2] * annotation["bbox"][3] annotation["area"] = annotation["bbox"][2] * annotation["bbox"][3]
annotation["category_id"] = label_to_num[label] annotation["category_id"] = label_to_num[label]
annotation["id"] = object_id + 1 annotation["id"] = object_id + 1
......
...@@ -29,13 +29,11 @@ def log(level=2, message="", use_color=False): ...@@ -29,13 +29,11 @@ def log(level=2, message="", use_color=False):
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array) current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
if paddlex.log_level >= level: if paddlex.log_level >= level:
if use_color: if use_color:
print("\033[1;31;40m{} [{}]\t{}\033[0m".format( print("\033[1;31;40m{} [{}]\t{}\033[0m".format(current_time, levels[
current_time, levels[level], level], message).encode("utf-8").decode("latin1"))
message).encode("utf-8").decode("latin1"))
else: else:
print( print("{} [{}]\t{}".format(current_time, levels[level], message)
"{} [{}]\t{}".format(current_time, levels[level], .encode("utf-8").decode("latin1"))
message).encode("utf-8").decode("latin1"))
sys.stdout.flush() sys.stdout.flush()
...@@ -47,9 +45,11 @@ def info(message="", use_color=False): ...@@ -47,9 +45,11 @@ def info(message="", use_color=False):
log(level=2, message=message, use_color=use_color) log(level=2, message=message, use_color=use_color)
def warning(message="", use_color=False): def warning(message="", use_color=True):
log(level=1, message=message, use_color=use_color) log(level=1, message=message, use_color=use_color)
def error(message="", use_color=False): def error(message="", use_color=True, exit=True):
log(level=0, message=message, use_color=use_color) log(level=0, message=message, use_color=use_color)
if exit:
sys.exit(-1)
...@@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit ...@@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit
setuptools.setup( setuptools.setup(
name="paddlex", name="paddlex",
version='1.0.4', version='1.0.6',
author="paddlex", author="paddlex",
author_email="paddlex@baidu.com", author_email="paddlex@baidu.com",
description=long_description, description=long_description,
......
...@@ -14,18 +14,22 @@ model_file = 'https://bj.bcebos.com/paddlex/interpret/mini_imagenet_veg_mobilene ...@@ -14,18 +14,22 @@ model_file = 'https://bj.bcebos.com/paddlex/interpret/mini_imagenet_veg_mobilene
pdx.utils.download_and_decompress(model_file, path='./') pdx.utils.download_and_decompress(model_file, path='./')
# 加载模型 # 加载模型
model = pdx.load_model('mini_imagenet_veg_mobilenetv2') model_file = 'mini_imagenet_veg_mobilenetv2'
model = pdx.load_model(model_file)
# 定义测试所用的数据集 # 定义测试所用的数据集
dataset = 'mini_imagenet_veg'
test_dataset = pdx.datasets.ImageNet( test_dataset = pdx.datasets.ImageNet(
data_dir='mini_imagenet_veg', data_dir=dataset,
file_list=osp.join('mini_imagenet_veg', 'test_list.txt'), file_list=osp.join(dataset, 'test_list.txt'),
label_list=osp.join('mini_imagenet_veg', 'labels.txt'), label_list=osp.join(dataset, 'labels.txt'),
transforms=model.test_transforms) transforms=model.test_transforms)
# 可解释性可视化 # 可解释性可视化
pdx.interpret.normlime( pdx.interpret.normlime(
'mini_imagenet_veg/mushroom/n07734744_1106.JPEG', test_dataset.file_list[0][0],
model, model,
test_dataset, test_dataset,
save_dir='./') save_dir='./',
normlime_weights_file='{}_{}.npy'.format(
dataset.split('/')[-1], model.model_name))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册